Loading
Generate realistic names, emails, addresses, dates, and custom schemas with seeded randomness and bulk export.
Fake data generators are essential for testing, prototyping, and demo environments. Instead of manually creating test records or using production data (a security risk), you generate realistic but fictional data on demand. Libraries like Faker.js handle this, but building one teaches you seeded random number generators, combinatorial data construction, and schema-driven generation.
In this tutorial, you will build a fake data generator that produces names, emails, addresses, phone numbers, dates, and more. It supports seeded randomness for reproducible output, custom schemas for structured records, and bulk export to JSON or CSV. No external dependencies. Works on macOS, Windows, and Linux.
Reproducible randomness is critical for testing. Given the same seed, you always get the same data.
Real-sounding fake data requires good source lists.
Each generator produces one type of fake data.
Define custom data shapes and generate records that match them.
Export generated data as JSON or CSV.
Create a clean top-level API that ties everything together.
Build a command-line tool for generating data from built-in or custom schemas.
Show that seeded generation produces identical output and how to define custom schemas.
Run the demo with npx tsx src/demo.ts. You will see that both faker instances with seed 42 produce identical names, the custom blog post schema generates structured content, and 10,000 users are generated in milliseconds.
For the CLI, try:
From here, you could add more data types (credit card numbers, IP addresses, colors), locale support for international names, relational generation where orders reference existing users, or a web API for on-demand generation.
// src/random.ts
export class SeededRandom {
private state: number;
constructor(seed: number = Date.now()) {
this.state = seed;
}
// Mulberry32 PRNG - fast, simple, good distribution
next(): number {
this.state |= 0;
this.state = (this.state + 0x6d2b79f5) | 0;
let t = Math.imul(this.state ^ (this.state >>> 15), 1 | this.state);
t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t;
return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
}
// Integer in range [min, max] inclusive
int(min: number, max: number): number {
return Math.floor(this.next() * (max - min + 1)) + min;
}
// Pick random element from array
pick<T>(array: T[]): T {
return array[this.int(0, array.length - 1)];
}
// Pick N unique elements
pickMany<T>(array: T[], count: number): T[] {
const shuffled = [...array];
for (let i = shuffled.length - 1; i > 0; i--) {
const j = this.int(0, i);
[shuffled[i], shuffled[j]] = [shuffled[j], shuffled[i]];
}
return shuffled.slice(0, Math.min(count, shuffled.length));
}
// Float in range [min, max]
float(min: number, max: number, decimals: number = 2): number {
const value = this.next() * (max - min) + min;
const factor = Math.pow(10, decimals);
return Math.round(value * factor) / factor;
}
// Boolean with probability
boolean(probability: number = 0.5): boolean {
return this.next() < probability;
}
}// src/dictionaries.ts
export const FIRST_NAMES = [
"James",
"Mary",
"Robert",
"Patricia",
"John",
"Jennifer",
"Michael",
"Linda",
"David",
"Elizabeth",
"William",
"Barbara",
"Richard",
"Susan",
"Joseph",
"Jessica",
"Thomas",
"Sarah",
"Charles",
"Karen",
"Christopher",
"Lisa",
"Daniel",
"Nancy",
"Matthew",
"Betty",
"Anthony",
"Margaret",
"Mark",
"Sandra",
"Donald",
"Ashley",
"Steven",
"Dorothy",
"Paul",
"Kimberly",
"Andrew",
"Emily",
"Joshua",
"Donna",
];
export const LAST_NAMES = [
"Smith",
"Johnson",
"Williams",
"Brown",
"Jones",
"Garcia",
"Miller",
"Davis",
"Rodriguez",
"Martinez",
"Hernandez",
"Lopez",
"Gonzalez",
"Wilson",
"Anderson",
"Thomas",
"Taylor",
"Moore",
"Jackson",
"Martin",
"Lee",
"Perez",
"Thompson",
"White",
"Harris",
"Sanchez",
"Clark",
"Ramirez",
"Lewis",
"Robinson",
];
export const STREET_NAMES = [
"Main St",
"Oak Ave",
"Maple Dr",
"Cedar Ln",
"Elm St",
"Pine Rd",
"Washington Blvd",
"Park Ave",
"Lake Dr",
"Hill Rd",
"River Rd",
"Sunset Blvd",
"Broadway",
"Market St",
"Highland Ave",
"Forest Dr",
];
export const CITIES = [
"Springfield",
"Portland",
"Franklin",
"Clinton",
"Madison",
"Georgetown",
"Arlington",
"Salem",
"Fairview",
"Chester",
"Riverside",
"Oakland",
"Burlington",
"Manchester",
"Milton",
"Newport",
"Ashland",
"Greenville",
];
export const STATES = [
{ name: "California", abbr: "CA" },
{ name: "Texas", abbr: "TX" },
{ name: "Florida", abbr: "FL" },
{ name: "New York", abbr: "NY" },
{ name: "Pennsylvania", abbr: "PA" },
{ name: "Illinois", abbr: "IL" },
{ name: "Ohio", abbr: "OH" },
{ name: "Georgia", abbr: "GA" },
{ name: "North Carolina", abbr: "NC" },
{ name: "Michigan", abbr: "MI" },
{ name: "Oregon", abbr: "OR" },
{ name: "Washington", abbr: "WA" },
];
export const DOMAINS = [
"gmail.com",
"yahoo.com",
"outlook.com",
"hotmail.com",
"protonmail.com",
"icloud.com",
"mail.com",
"fastmail.com",
];
export const COMPANIES = [
"Acme Corp",
"Globex",
"Initech",
"Umbrella Inc",
"Stark Industries",
"Wayne Enterprises",
"Hooli",
"Pied Piper",
"Dunder Mifflin",
"Sterling Cooper",
"Cyberdyne Systems",
"Soylent Corp",
"Tyrell Corp",
"Wonka Industries",
];
export const LOREM_WORDS = [
"lorem",
"ipsum",
"dolor",
"sit",
"amet",
"consectetur",
"adipiscing",
"elit",
"sed",
"do",
"eiusmod",
"tempor",
"incididunt",
"ut",
"labore",
"et",
"dolore",
"magna",
"aliqua",
"enim",
"ad",
"minim",
"veniam",
"quis",
"nostrud",
"exercitation",
"ullamco",
"laboris",
"nisi",
"aliquip",
"ex",
"ea",
"commodo",
"consequat",
"duis",
"aute",
"irure",
"in",
"reprehenderit",
"voluptate",
];// src/generators.ts
import { SeededRandom } from "./random.js";
import * as dict from "./dictionaries.js";
export class Generators {
constructor(private rng: SeededRandom) {}
firstName(): string {
return this.rng.pick(dict.FIRST_NAMES);
}
lastName(): string {
return this.rng.pick(dict.LAST_NAMES);
}
fullName(): string {
return `${this.firstName()} ${this.lastName()}`;
}
email(name?: string): string {
const first = name?.split(" ")[0]?.toLowerCase() ?? this.firstName().toLowerCase();
const last = name?.split(" ")[1]?.toLowerCase() ?? this.lastName().toLowerCase();
const domain = this.rng.pick(dict.DOMAINS);
const separator = this.rng.pick([".", "_", ""]);
const suffix = this.rng.boolean(0.3) ? String(this.rng.int(1, 99)) : "";
return `${first}${separator}${last}${suffix}@${domain}`;
}
phone(): string {
const area = this.rng.int(200, 999);
const prefix = this.rng.int(200, 999);
const line = this.rng.int(1000, 9999);
return `(${area}) ${prefix}-${line}`;
}
address(): string {
const number = this.rng.int(100, 9999);
const street = this.rng.pick(dict.STREET_NAMES);
return `${number} ${street}`;
}
city(): string {
return this.rng.pick(dict.CITIES);
}
state(): { name: string; abbr: string } {
return this.rng.pick(dict.STATES);
}
zipCode(): string {
return String(this.rng.int(10000, 99999));
}
fullAddress(): string {
const st = this.state();
return `${this.address()}, ${this.city()}, ${st.abbr} ${this.zipCode()}`;
}
company(): string {
return this.rng.pick(dict.COMPANIES);
}
date(start: Date = new Date(2020, 0, 1), end: Date = new Date()): Date {
const timestamp = this.rng.int(start.getTime(), end.getTime());
return new Date(timestamp);
}
dateString(start?: Date, end?: Date): string {
return this.date(start, end).toISOString().split("T")[0];
}
integer(min: number = 0, max: number = 1000): number {
return this.rng.int(min, max);
}
decimal(min: number = 0, max: number = 1000, decimals: number = 2): number {
return this.rng.float(min, max, decimals);
}
boolean(probability: number = 0.5): boolean {
return this.rng.boolean(probability);
}
uuid(): string {
const hex = () => this.rng.int(0, 15).toString(16);
const segment = (len: number) => Array.from({ length: len }, hex).join("");
return `${segment(8)}-${segment(4)}-4${segment(3)}-${this.rng.pick(["8", "9", "a", "b"])}${segment(3)}-${segment(12)}`;
}
sentence(wordCount: number = 0): string {
const count = wordCount || this.rng.int(5, 15);
const words = Array.from({ length: count }, () => this.rng.pick(dict.LOREM_WORDS));
words[0] = words[0].charAt(0).toUpperCase() + words[0].slice(1);
return words.join(" ") + ".";
}
paragraph(sentenceCount: number = 0): string {
const count = sentenceCount || this.rng.int(3, 7);
return Array.from({ length: count }, () => this.sentence()).join(" ");
}
pick<T>(array: T[]): T {
return this.rng.pick(array);
}
}// src/schema.ts
import { Generators } from "./generators.js";
export type FieldGenerator = (gen: Generators, index: number) => unknown;
export interface SchemaField {
name: string;
generator: FieldGenerator;
}
export class Schema {
private fields: SchemaField[] = [];
field(name: string, generator: FieldGenerator): this {
this.fields.push({ name, generator });
return this;
}
generate(gen: Generators, count: number): Record<string, unknown>[] {
const records: Record<string, unknown>[] = [];
for (let i = 0; i < count; i++) {
const record: Record<string, unknown> = {};
for (const field of this.fields) {
record[field.name] = field.generator(gen, i);
}
records.push(record);
}
return records;
}
}
// Pre-built schema templates
export function userSchema(): Schema {
return new Schema()
.field("id", (gen, i) => i + 1)
.field("name", (gen) => gen.fullName())
.field("email", (gen) => gen.email())
.field("phone", (gen) => gen.phone())
.field("address", (gen) => gen.fullAddress())
.field("company", (gen) => gen.company())
.field("joinedAt", (gen) => gen.dateString())
.field("isActive", (gen) => gen.boolean(0.8));
}
export function productSchema(): Schema {
const categories = ["Electronics", "Books", "Clothing", "Home", "Sports", "Toys"];
return new Schema()
.field("id", (gen, i) => i + 1)
.field(
"name",
(gen) =>
`${gen.pick(["Pro", "Ultra", "Basic", "Premium"])} ${gen.pick(["Widget", "Gadget", "Tool", "Device"])} ${gen.pick(["X", "Plus", "Max", "Mini"])}`
)
.field("price", (gen) => gen.decimal(9.99, 499.99))
.field("category", (gen) => gen.pick(categories))
.field("inStock", (gen) => gen.boolean(0.7))
.field("rating", (gen) => gen.decimal(1, 5, 1))
.field("reviews", (gen) => gen.integer(0, 500));
}
export function orderSchema(): Schema {
const statuses = ["pending", "processing", "shipped", "delivered", "cancelled"];
return new Schema()
.field("id", (gen, i) => gen.uuid())
.field("userId", (gen) => gen.integer(1, 100))
.field("total", (gen) => gen.decimal(10, 2000))
.field("status", (gen) => gen.pick(statuses))
.field("items", (gen) => gen.integer(1, 10))
.field("createdAt", (gen) => gen.dateString());
}// src/export.ts
import { writeFileSync } from "node:fs";
export function toJSON(data: Record<string, unknown>[], pretty: boolean = true): string {
return JSON.stringify(data, null, pretty ? 2 : undefined);
}
export function toCSV(data: Record<string, unknown>[]): string {
if (data.length === 0) return "";
const headers = Object.keys(data[0]);
const rows = data.map((record) =>
headers
.map((h) => {
const value = record[h];
const str = String(value ?? "");
// Escape values containing commas, quotes, or newlines
if (str.includes(",") || str.includes('"') || str.includes("\n")) {
return `"${str.replace(/"/g, '""')}"`;
}
return str;
})
.join(",")
);
return [headers.join(","), ...rows].join("\n");
}
export function exportToFile(
data: Record<string, unknown>[],
filepath: string,
format: "json" | "csv" = "json"
): void {
const content = format === "csv" ? toCSV(data) : toJSON(data);
writeFileSync(filepath, content);
console.log(`Exported ${data.length} records to ${filepath}`);
}// src/faker.ts
import { SeededRandom } from "./random.js";
import { Generators } from "./generators.js";
import { Schema } from "./schema.js";
export class Faker {
private rng: SeededRandom;
public gen: Generators;
constructor(seed?: number) {
this.rng = new SeededRandom(seed ?? Date.now());
this.gen = new Generators(this.rng);
}
generate(schema: Schema, count: number): Record<string, unknown>[] {
return schema.generate(this.gen, count);
}
// Quick generators
name(): string {
return this.gen.fullName();
}
email(): string {
return this.gen.email();
}
phone(): string {
return this.gen.phone();
}
address(): string {
return this.gen.fullAddress();
}
company(): string {
return this.gen.company();
}
date(): string {
return this.gen.dateString();
}
uuid(): string {
return this.gen.uuid();
}
sentence(): string {
return this.gen.sentence();
}
paragraph(): string {
return this.gen.paragraph();
}
integer(min?: number, max?: number): number {
return this.gen.integer(min, max);
}
// Generate N items using a callback
many<T>(count: number, fn: (gen: Generators, index: number) => T): T[] {
return Array.from({ length: count }, (_, i) => fn(this.gen, i));
}
}// src/main.ts
import { Faker } from "./faker.js";
import { userSchema, productSchema, orderSchema, Schema } from "./schema.js";
import { exportToFile, toJSON, toCSV } from "./export.js";
const args = process.argv.slice(2);
const schemaName = args[0] ?? "user";
const count = parseInt(args[1] ?? "10");
const format = (args[2] ?? "json") as "json" | "csv";
const seed = args[3] ? parseInt(args[3]) : undefined;
const outputFile = args[4];
const schemas: Record<string, () => Schema> = {
user: userSchema,
product: productSchema,
order: orderSchema,
};
function main(): void {
if (schemaName === "help" || schemaName === "--help") {
console.log("Usage: npx tsx src/main.ts <schema> [count] [format] [seed] [output]");
console.log("");
console.log("Schemas: user, product, order");
console.log("Formats: json, csv");
console.log("Seed: integer for reproducible output");
console.log("Output: file path (optional, prints to stdout if omitted)");
console.log("");
console.log("Examples:");
console.log(" npx tsx src/main.ts user 100 json");
console.log(" npx tsx src/main.ts product 50 csv 42");
console.log(" npx tsx src/main.ts order 1000 json 12345 orders.json");
return;
}
const schemaFactory = schemas[schemaName];
if (!schemaFactory) {
console.error(`Unknown schema: ${schemaName}. Available: ${Object.keys(schemas).join(", ")}`);
process.exit(1);
}
const faker = new Faker(seed);
const schema = schemaFactory();
const data = faker.generate(schema, count);
if (outputFile) {
exportToFile(data, outputFile, format);
} else {
console.log(format === "csv" ? toCSV(data) : toJSON(data));
}
if (seed !== undefined) {
console.error(`(seed: ${seed} - run again with same seed for identical output)`);
}
}
main();// src/demo.ts
import { Faker } from "./faker.js";
import { Schema } from "./schema.js";
// Reproducibility: same seed = same output
console.log("--- Reproducibility Test ---");
const faker1 = new Faker(42);
const faker2 = new Faker(42);
for (let i = 0; i < 5; i++) {
const a = faker1.name();
const b = faker2.name();
console.log(`${a === b ? "MATCH" : "DIFF"}: "${a}" vs "${b}"`);
}
// Custom schema
console.log("\n--- Custom Blog Post Schema ---");
const faker = new Faker(123);
const blogSchema = new Schema()
.field("id", (gen, i) => i + 1)
.field("title", (gen) => {
const adjectives = ["Amazing", "Complete", "Ultimate", "Essential", "Quick"];
const topics = ["Guide", "Tutorial", "Deep Dive", "Overview", "Introduction"];
const subjects = ["TypeScript", "React", "Node.js", "CSS", "APIs"];
return `The ${gen.pick(adjectives)} ${gen.pick(subjects)} ${gen.pick(topics)}`;
})
.field("author", (gen) => gen.fullName())
.field("publishedAt", (gen) => gen.dateString())
.field("readingTime", (gen) => `${gen.integer(3, 20)} min`)
.field("tags", (gen) => {
const allTags = ["javascript", "typescript", "react", "node", "css", "tutorial", "beginner"];
return gen.pick(allTags);
})
.field("excerpt", (gen) => gen.sentence(10));
const posts = faker.generate(blogSchema, 5);
for (const post of posts) {
console.log(`[${post.id}] ${post.title} by ${post.author} (${post.readingTime})`);
}
// Bulk generation
console.log("\n--- Bulk Generation ---");
const bulkFaker = new Faker(999);
const start = performance.now();
const users = bulkFaker.many(10000, (gen, i) => ({
id: i + 1,
name: gen.fullName(),
email: gen.email(),
}));
const elapsed = (performance.now() - start).toFixed(1);
console.log(`Generated ${users.length} users in ${elapsed}ms`);
console.log(`First: ${users[0].name} <${users[0].email}>`);
console.log(`Last: ${users[9999].name} <${users[9999].email}>`);# Generate 20 users as JSON
npx tsx src/main.ts user 20
# Generate 50 products as CSV with a fixed seed
npx tsx src/main.ts product 50 csv 42
# Export 1000 orders to a file
npx tsx src/main.ts order 1000 json 12345 orders.json