Loading
Create a JavaScript code formatter that parses source into an AST, applies formatting rules for indentation, semicolons, and line width, then outputs clean code.
Code formatters like Prettier transform messy source code into consistently styled output. Under the hood they parse source into an Abstract Syntax Tree (AST), walk the tree to build an intermediate representation (IR) of formatted "documents," then print those documents within a given line width. In this tutorial you will build a simplified JavaScript formatter that handles indentation, semicolons, quote style, and line-width wrapping.
You will use acorn to parse JavaScript into an ESTree-compatible AST, then write your own printer that converts AST nodes back into formatted source. This is the same architecture Prettier uses.
Prerequisites: Node.js 18+, TypeScript, understanding of tree data structures.
Add scripts:
Create src/types.ts with the configuration interface and document IR types.
Create src/parser/parse.ts. This wraps acorn and returns the ESTree AST.
Create src/printer/doc.ts with factory functions for building the document IR.
Create src/printer/ast-printer.ts. This walks AST nodes and converts them to document IR. We handle a subset of JavaScript: variable declarations, functions, expressions, if/else, return, and object/array literals.
Create src/printer/render.ts. This takes document IR and renders it to a string, respecting print width and indent settings.
Create src/formatter.ts that ties parsing, printing, and rendering together.
Create src/cli.ts with flag parsing for all formatting options.
Create a test file test-input.js:
Run the formatter:
Expected output:
Extend the CLI to support --check which exits with code 1 if any file would change. This is what teams run in CI to enforce formatting.
Add this to src/cli.ts before the main() call:
Then modify the loop in main() when --check is passed:
This gives you a formatter that parses real JavaScript, applies configurable style rules, and integrates into CI pipelines. The AST-based approach means formatting is correct by construction — it does not rely on regex hacks that break on edge cases. Extend the printNode function to handle more AST node types (classes, for-loops, switch statements) and you will have a production-grade tool.
mkdir code-formatter && cd code-formatter
npm init -y
npm install acorn --save
npm install typescript tsx @types/node --save-dev
npx tsc --init --strict --target ES2022 --module NodeNext --moduleResolution NodeNext --outDir dist --rootDir src
mkdir -p src/{parser,printer,rules}{
"scripts": {
"format": "tsx src/cli.ts",
"build": "tsc"
}
}// src/types.ts
export interface FormatOptions {
indentSize: number;
useTabs: boolean;
semicolons: boolean;
singleQuote: boolean;
printWidth: number;
trailingComma: boolean;
}
export const DEFAULT_OPTIONS: FormatOptions = {
indentSize: 2,
useTabs: false,
semicolons: true,
singleQuote: true,
printWidth: 80,
trailingComma: true,
};
// Intermediate Representation for the printer
export type Doc = string | DocGroup | DocIndent | DocLine | DocConcat;
export interface DocGroup {
type: "group";
contents: Doc;
}
export interface DocIndent {
type: "indent";
contents: Doc;
}
export interface DocLine {
type: "line";
hard?: boolean;
}
export interface DocConcat {
type: "concat";
parts: Doc[];
}// src/parser/parse.ts
import * as acorn from "acorn";
export function parseJS(source: string): acorn.Node {
try {
return acorn.parse(source, {
ecmaVersion: "latest",
sourceType: "module",
locations: true,
});
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
throw new Error(`Parse error: ${message}`);
}
}// src/printer/doc.ts
import { Doc, DocGroup, DocIndent, DocLine, DocConcat } from "../types.js";
export function group(contents: Doc): DocGroup {
return { type: "group", contents };
}
export function indent(contents: Doc): DocIndent {
return { type: "indent", contents };
}
export function line(): DocLine {
return { type: "line" };
}
export function hardline(): DocLine {
return { type: "line", hard: true };
}
export function concat(...parts: Doc[]): DocConcat {
return { type: "concat", parts };
}
export function join(separator: Doc, docs: Doc[]): Doc {
const parts: Doc[] = [];
for (let i = 0; i < docs.length; i++) {
if (i > 0) parts.push(separator);
parts.push(docs[i]);
}
return { type: "concat", parts };
}// src/printer/ast-printer.ts
import { Doc, FormatOptions } from "../types.js";
import { concat, group, indent, hardline, line, join } from "./doc.js";
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- AST nodes are loosely typed
type ASTNode = any;
export function printNode(node: ASTNode, options: FormatOptions): Doc {
if (!node) return "";
switch (node.type) {
case "Program":
return join(
hardline(),
node.body.map((n: ASTNode) => printNode(n, options))
);
case "VariableDeclaration": {
const keyword = node.kind;
const decls = node.declarations.map((d: ASTNode) => printNode(d, options));
const semi = options.semicolons ? ";" : "";
return concat(keyword, " ", join(", ", decls), semi);
}
case "VariableDeclarator": {
if (!node.init) return printNode(node.id, options);
return concat(printNode(node.id, options), " = ", printNode(node.init, options));
}
case "FunctionDeclaration": {
const name = printNode(node.id, options);
const params = join(
", ",
node.params.map((p: ASTNode) => printNode(p, options))
);
const body = printNode(node.body, options);
const async_ = node.async ? "async " : "";
return concat(async_, "function ", name, "(", params, ") ", body);
}
case "ArrowFunctionExpression": {
const params =
node.params.length === 1 && node.params[0].type === "Identifier"
? printNode(node.params[0], options)
: concat(
"(",
join(
", ",
node.params.map((p: ASTNode) => printNode(p, options))
),
")"
);
const body = printNode(node.body, options);
return group(concat(params, " => ", body));
}
case "BlockStatement": {
if (node.body.length === 0) return "{}";
const stmts = node.body.map((s: ASTNode) => printNode(s, options));
return concat("{", indent(concat(hardline(), join(hardline(), stmts))), hardline(), "}");
}
case "ReturnStatement": {
const semi = options.semicolons ? ";" : "";
if (!node.argument) return concat("return", semi);
return concat("return ", printNode(node.argument, options), semi);
}
case "ExpressionStatement": {
const semi = options.semicolons ? ";" : "";
return concat(printNode(node.expression, options), semi);
}
case "CallExpression": {
const callee = printNode(node.callee, options);
const args = join(
", ",
node.arguments.map((a: ASTNode) => printNode(a, options))
);
return concat(callee, "(", args, ")");
}
case "MemberExpression": {
const obj = printNode(node.object, options);
const prop = printNode(node.property, options);
return node.computed ? concat(obj, "[", prop, "]") : concat(obj, ".", prop);
}
case "IfStatement": {
const test = printNode(node.test, options);
const consequent = printNode(node.consequent, options);
const base = concat("if (", test, ") ", consequent);
if (!node.alternate) return base;
return concat(base, " else ", printNode(node.alternate, options));
}
case "ObjectExpression": {
if (node.properties.length === 0) return "{}";
const props = node.properties.map((p: ASTNode) => printNode(p, options));
const trailing = options.trailingComma ? "," : "";
return group(
concat(
"{",
indent(concat(hardline(), join(concat(",", hardline()), props), trailing)),
hardline(),
"}"
)
);
}
case "Property": {
const key = printNode(node.key, options);
if (node.shorthand) return key;
const value = printNode(node.value, options);
return concat(key, ": ", value);
}
case "ArrayExpression": {
if (node.elements.length === 0) return "[]";
const elems = node.elements.map((e: ASTNode) => printNode(e, options));
return group(concat("[", join(", ", elems), "]"));
}
case "Identifier":
return node.name;
case "Literal": {
if (typeof node.value === "string") {
const quote = options.singleQuote ? "'" : '"';
return concat(quote, node.value, quote);
}
return String(node.value);
}
case "BinaryExpression":
case "LogicalExpression":
return concat(
printNode(node.left, options),
` ${node.operator} `,
printNode(node.right, options)
);
case "AssignmentExpression":
return concat(printNode(node.left, options), " = ", printNode(node.right, options));
case "TemplateLiteral": {
const parts: Doc[] = ["`"];
for (let i = 0; i < node.quasis.length; i++) {
parts.push(node.quasis[i].value.raw);
if (i < node.expressions.length) {
parts.push("${", printNode(node.expressions[i], options), "}");
}
}
parts.push("`");
return concat(...parts);
}
default:
// Fallback: return raw source placeholder
return `/* unsupported: ${node.type} */`;
}
}// src/printer/render.ts
import { Doc, FormatOptions } from "../types.js";
interface RenderState {
output: string;
currentLineWidth: number;
indentLevel: number;
}
export function renderDoc(doc: Doc, options: FormatOptions): string {
const state: RenderState = { output: "", currentLineWidth: 0, indentLevel: 0 };
render(doc, state, options);
return state.output;
}
function getIndent(level: number, options: FormatOptions): string {
const char = options.useTabs ? "\t" : " ".repeat(options.indentSize);
return char.repeat(level);
}
function render(doc: Doc, state: RenderState, options: FormatOptions): void {
if (typeof doc === "string") {
state.output += doc;
state.currentLineWidth += doc.length;
return;
}
switch (doc.type) {
case "concat":
for (const part of doc.parts) {
render(part, state, options);
}
break;
case "group":
render(doc.contents, state, options);
break;
case "indent":
state.indentLevel++;
render(doc.contents, state, options);
state.indentLevel--;
break;
case "line": {
const indentStr = getIndent(state.indentLevel, options);
state.output += "\n" + indentStr;
state.currentLineWidth = indentStr.length;
break;
}
}
}// src/formatter.ts
import { FormatOptions, DEFAULT_OPTIONS } from "./types.js";
import { parseJS } from "./parser/parse.js";
import { printNode } from "./printer/ast-printer.js";
import { renderDoc } from "./printer/render.js";
export function format(source: string, userOptions: Partial<FormatOptions> = {}): string {
const options: FormatOptions = { ...DEFAULT_OPTIONS, ...userOptions };
const ast = parseJS(source);
const doc = printNode(ast, options);
const output = renderDoc(doc, options);
// Ensure file ends with newline
return output.endsWith("\n") ? output : output + "\n";
}// src/cli.ts
import * as fs from "node:fs";
import { format } from "./formatter.js";
import { FormatOptions, DEFAULT_OPTIONS } from "./types.js";
function parseArgs(args: string[]): { files: string[]; options: Partial<FormatOptions> } {
const files: string[] = [];
const options: Partial<FormatOptions> = {};
for (let i = 0; i < args.length; i++) {
const arg = args[i];
switch (arg) {
case "--tabs":
options.useTabs = true;
break;
case "--indent-size":
options.indentSize = parseInt(args[++i], 10);
break;
case "--no-semicolons":
options.semicolons = false;
break;
case "--double-quote":
options.singleQuote = false;
break;
case "--print-width":
options.printWidth = parseInt(args[++i], 10);
break;
case "--no-trailing-comma":
options.trailingComma = false;
break;
case "--write":
// handled below
break;
default:
files.push(arg);
}
}
return { files, options };
}
function main(): void {
const args = process.argv.slice(2);
if (args.length === 0) {
console.log("Usage: format [options] <file1> [file2] ...");
console.log("\nOptions:");
console.log(" --tabs Use tabs instead of spaces");
console.log(" --indent-size N Spaces per indent (default: 2)");
console.log(" --no-semicolons Omit semicolons");
console.log(" --double-quote Use double quotes");
console.log(" --print-width N Max line width (default: 80)");
console.log(" --no-trailing-comma Omit trailing commas");
console.log(" --write Write back to file");
process.exit(0);
}
const shouldWrite = args.includes("--write");
const { files, options } = parseArgs(args.filter((a) => a !== "--write"));
for (const file of files) {
try {
const source = fs.readFileSync(file, "utf-8");
const formatted = format(source, options);
if (shouldWrite) {
fs.writeFileSync(file, formatted);
console.log(`Formatted: ${file}`);
} else {
process.stdout.write(formatted);
}
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
console.error(`Error formatting ${file}: ${message}`);
process.exit(1);
}
}
}
main();const x = 1;
const y = { a: 1, b: 2, c: 3 };
function greet(name) {
const message = "Hello, " + name;
console.log(message);
return message;
}
if (x === 1) {
greet("world");
}npx tsx src/cli.ts test-input.jsconst x = 1;
const y = {
a: 1,
b: 2,
c: 3,
};
function greet(name) {
const message = "Hello, " + name;
console.log(message);
return message;
}
if (x === 1) {
greet("world");
}function checkFile(filePath: string, options: Partial<FormatOptions>): boolean {
const source = fs.readFileSync(filePath, "utf-8");
const formatted = format(source, options);
return source === formatted;
}const shouldCheck = args.includes("--check");
if (shouldCheck) {
let allClean = true;
for (const file of files) {
const isClean = checkFile(file, options);
if (!isClean) {
console.log(`Would reformat: ${file}`);
allClean = false;
}
}
process.exit(allClean ? 0 : 1);
}