✓ Verified 💻 Development ✓ Enhanced Data

Data Validation

Validate data with schemas across languages and formats.

Rating
4.5 (103 reviews)
Downloads
39,865 downloads
Version
1.0.0

Overview

Validate data with schemas across languages and formats.

Complete Documentation

View Source →

Data Validation

Schema-based data validation across languages and formats. Covers JSON Schema, Zod (TypeScript), Pydantic (Python), API boundary validation, data contracts, and integrity checking.

When to Use

  • Defining the shape of API request/response bodies
  • Validating user input before processing
  • Setting up data contracts between services
  • Checking CSV/JSON file integrity before import
  • Migrating data (did the ETL preserve everything?)
  • Generating types or documentation from schemas

JSON Schema

Basic schema

json
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "type": "object",
  "required": ["name", "email", "age"],
  "properties": {
    "name": {
      "type": "string",
      "minLength": 1,
      "maxLength": 100
    },
    "email": {
      "type": "string",
      "format": "email"
    },
    "age": {
      "type": "integer",
      "minimum": 0,
      "maximum": 150
    },
    "role": {
      "type": "string",
      "enum": ["user", "admin", "moderator"],
      "default": "user"
    },
    "tags": {
      "type": "array",
      "items": { "type": "string" },
      "uniqueItems": true,
      "maxItems": 10
    },
    "address": {
      "type": "object",
      "properties": {
        "street": { "type": "string" },
        "city": { "type": "string" },
        "zip": { "type": "string", "pattern": "^\\d{5}(-\\d{4})?$" }
      },
      "required": ["street", "city"]
    }
  },
  "additionalProperties": false
}

Common patterns

json
// Nullable field
{ "type": ["string", "null"] }

// Union type (string or number)
{ "oneOf": [{ "type": "string" }, { "type": "number" }] }

// Conditional: if role is admin, require permissions
{
  "if": { "properties": { "role": { "const": "admin" } } },
  "then": { "required": ["permissions"] }
}

// Pattern properties (dynamic keys)
{
  "type": "object",
  "patternProperties": {
    "^env_": { "type": "string" }
  }
}

// Reusable definitions
{
  "$defs": {
    "address": {
      "type": "object",
      "properties": {
        "street": { "type": "string" },
        "city": { "type": "string" }
      }
    }
  },
  "properties": {
    "home": { "$ref": "#/$defs/address" },
    "work": { "$ref": "#/$defs/address" }
  }
}

Validate with command line

bash
# Using ajv-cli (Node.js)
npx ajv-cli validate -s schema.json -d data.json

# Using jsonschema (Python)
pip install jsonschema
python3 -c "
import json, jsonschema
schema = json.load(open('schema.json'))
data = json.load(open('data.json'))
jsonschema.validate(data, schema)
print('Valid')
"

# Validate multiple files
for f in data/*.json; do
  npx ajv-cli validate -s schema.json -d "$f" 2>&1 || echo "INVALID: $f"
done

Zod (TypeScript)

Basic schemas

typescript
import { z } from 'zod';

// Primitives
const nameSchema = z.string().min(1).max(100);
const ageSchema = z.number().int().min(0).max(150);
const emailSchema = z.string().email();
const urlSchema = z.string().url();

// Objects
const userSchema = z.object({
  name: z.string().min(1),
  email: z.string().email(),
  age: z.number().int().min(0),
  role: z.enum(['user', 'admin', 'moderator']).default('user'),
  tags: z.array(z.string()).max(10).default([]),
  createdAt: z.string().datetime(),
});

// Infer TypeScript type from schema
type User = z.infer<typeof userSchema>;
// { name: string; email: string; age: number; role: "user" | "admin" | "moderator"; ... }

// Validate
const result = userSchema.safeParse(data);
if (result.success) {
  console.log(result.data); // typed as User
} else {
  console.log(result.error.issues); // validation errors
}

// Parse (throws on invalid)
const user = userSchema.parse(data);

Advanced patterns

typescript
// Optional and nullable
const schema = z.object({
  name: z.string(),
  nickname: z.string().optional(),       // string | undefined
  middleName: z.string().nullable(),     // string | null
  suffix: z.string().nullish(),          // string | null | undefined
});

// Transforms (validate then transform)
const dateSchema = z.string().datetime().transform(s => new Date(s));
const trimmed = z.string().trim().toLowerCase();
const parsed = z.string().transform(s => parseInt(s, 10)).pipe(z.number().int());

// Discriminated unions (tagged unions)
const eventSchema = z.discriminatedUnion('type', [
  z.object({ type: z.literal('click'), x: z.number(), y: z.number() }),
  z.object({ type: z.literal('keypress'), key: z.string() }),
  z.object({ type: z.literal('scroll'), delta: z.number() }),
]);

// Recursive types
const categorySchema: z.ZodType<Category> = z.object({
  name: z.string(),
  children: z.lazy(() => z.array(categorySchema)).default([]),
});

// Refinements (custom validation)
const passwordSchema = z.string()
  .min(8)
  .refine(s => /[A-Z]/.test(s), 'Must contain uppercase')
  .refine(s => /[0-9]/.test(s), 'Must contain digit')
  .refine(s => /[^a-zA-Z0-9]/.test(s), 'Must contain special character');

// Extend/merge objects
const baseUser = z.object({ name: z.string(), email: z.string() });
const adminUser = baseUser.extend({ permissions: z.array(z.string()) });

// Pick/omit
const createUser = userSchema.omit({ createdAt: true });
const userSummary = userSchema.pick({ name: true, email: true });

// Passthrough (allow extra fields)
const flexible = userSchema.passthrough();

// Strip unknown fields
const strict = userSchema.strict(); // Error on extra fields

API validation with Zod

typescript
// Express middleware
import { z } from 'zod';

const createUserBody = z.object({
  name: z.string().min(1),
  email: z.string().email(),
  password: z.string().min(8),
});

app.post('/api/users', (req, res) => {
  const result = createUserBody.safeParse(req.body);
  if (!result.success) {
    return res.status(400).json({ errors: result.error.issues });
  }
  const { name, email, password } = result.data;
  // ... create user
});

// Query parameter validation
const listParams = z.object({
  page: z.coerce.number().int().min(1).default(1),
  limit: z.coerce.number().int().min(1).max(100).default(20),
  sort: z.enum(['newest', 'oldest', 'name']).default('newest'),
  q: z.string().optional(),
});

app.get('/api/users', (req, res) => {
  const params = listParams.parse(req.query);
  // params.page is a number, params.sort is typed
});

Pydantic (Python)

Basic models

python
from pydantic import BaseModel, Field, EmailStr, field_validator
from typing import Optional
from datetime import datetime
from enum import Enum

class Role(str, Enum):
    USER = "user"
    ADMIN = "admin"
    MODERATOR = "moderator"

class Address(BaseModel):
    street: str
    city: str
    zip_code: str = Field(pattern=r"^\d{5}(-\d{4})?$")

class User(BaseModel):
    name: str = Field(min_length=1, max_length=100)
    email: EmailStr
    age: int = Field(ge=0, le=150)
    role: Role = Role.USER
    tags: list[str] = Field(default_factory=list, max_length=10)
    address: Optional[Address] = None
    created_at: datetime = Field(default_factory=datetime.now)

    @field_validator("name")
    @classmethod
    def name_must_not_be_empty(cls, v: str) -> str:
        if not v.strip():
            raise ValueError("name cannot be blank")
        return v.strip()

# Validate
user = User(name="Alice", email="[email protected]", age=30)
print(user.model_dump())      # dict
print(user.model_dump_json())  # JSON string

# Validation errors
try:
    User(name="", email="bad", age=-1)
except Exception as e:
    print(e)  # Detailed validation errors

Advanced patterns

python
from pydantic import BaseModel, model_validator, ConfigDict
from typing import Literal, Union, Annotated

# Discriminated union
class ClickEvent(BaseModel):
    type: Literal["click"]
    x: int
    y: int

class KeypressEvent(BaseModel):
    type: Literal["keypress"]
    key: str

Event = Annotated[Union[ClickEvent, KeypressEvent], Field(discriminator="type")]

# Model-level validation (cross-field)
class DateRange(BaseModel):
    start: datetime
    end: datetime

    @model_validator(mode="after")
    def end_after_start(self):
        if self.end <= self.start:
            raise ValueError("end must be after start")
        return self

# Strict mode (no type coercion)
class StrictUser(BaseModel):
    model_config = ConfigDict(strict=True)
    age: int  # "30" will be rejected, must be int 30

# Alias (accept different field names in input)
class APIResponse(BaseModel):
    user_name: str = Field(alias="userName")
    created_at: datetime = Field(alias="createdAt")

    model_config = ConfigDict(populate_by_name=True)

# Computed fields
from pydantic import computed_field

class Order(BaseModel):
    items: list[dict]
    tax_rate: float = 0.08

    @computed_field
    @property
    def total(self) -> float:
        subtotal = sum(i.get("price", 0) * i.get("qty", 1) for i in self.items)
        return round(subtotal * (1 + self.tax_rate), 2)

# Generate JSON Schema
print(User.model_json_schema())

FastAPI integration

python
from fastapi import FastAPI, Query
from pydantic import BaseModel

app = FastAPI()

class CreateUser(BaseModel):
    name: str = Field(min_length=1)
    email: EmailStr
    password: str = Field(min_length=8)

class UserResponse(BaseModel):
    id: int
    name: str
    email: str

@app.post("/api/users", response_model=UserResponse)
async def create_user(body: CreateUser):
    # body is already validated
    return {"id": 1, "name": body.name, "email": body.email}

@app.get("/api/users")
async def list_users(
    page: int = Query(default=1, ge=1),
    limit: int = Query(default=20, ge=1, le=100),
    q: str | None = Query(default=None),
):
    # All params validated and typed
    pass

Data Integrity Checks

CSV validation

bash
#!/bin/bash
# validate-csv.sh — Check CSV structure and data quality
FILE="${1:?Usage: validate-csv.sh <file.csv>}"

echo "=== CSV Validation: $FILE ==="

# Row count
ROWS=$(wc -l < "$FILE")
echo "Rows: $ROWS (including header)"

# Column count consistency
HEADER_COLS=$(head -1 "$FILE" | awk -F',' '{print NF}')
echo "Columns (header): $HEADER_COLS"

BAD_ROWS=$(awk -F',' -v expected="$HEADER_COLS" 'NR>1 && NF!=expected {count++} END {print count+0}' "$FILE")
if [ "$BAD_ROWS" -gt 0 ]; then
    echo "ERROR: $BAD_ROWS rows have wrong column count"
    awk -F',' -v expected="$HEADER_COLS" 'NR>1 && NF!=expected {print "  Line "NR": "NF" columns (expected "expected")"}' "$FILE" | head -5
else
    echo "Column count: consistent"
fi

# Empty fields
EMPTY=$(awk -F',' '{for(i=1;i<=NF;i++) if($i=="") count++} END {print count}' "$FILE")
echo "Empty fields: $EMPTY"

# Duplicate rows
DUPES=$(($(sort "$FILE" | uniq -d | wc -l)))
echo "Duplicate rows: $DUPES"

echo "=== Done ==="

JSON validation

bash
# Check if file is valid JSON
jq empty data.json && echo "Valid JSON" || echo "Invalid JSON"

# Validate structure of each object in an array
jq -e '
  .[] |
  select(
    (.name | type) != "string" or
    (.email | type) != "string" or
    (.age | type) != "number" or
    .age < 0
  )
' data.json && echo "INVALID records found" || echo "All records valid"

# Check for required fields
jq -e '.[] | select(.id == null or .name == null)' data.json

# Check for unique IDs
jq '[.[].id] | length != (. | unique | length)' data.json
# true = duplicates exist

# Compare record counts between source and target
SRC=$(jq length source.json)
TGT=$(jq length target.json)
echo "Source: $SRC, Target: $TGT, Match: $([ "$SRC" = "$TGT" ] && echo yes || echo NO)"

Migration validation

python
#!/usr/bin/env python3
"""Validate that a data migration preserved all records."""
import json
import sys

def validate_migration(source_path, target_path, key_field="id"):
    with open(source_path) as f:
        source = {r[key_field]: r for r in json.load(f)}
    with open(target_path) as f:
        target = {r[key_field]: r for r in json.load(f)}

    missing = set(source) - set(target)
    extra = set(target) - set(source)
    changed = []

    for key in set(source) & set(target):
        if source[key] != target[key]:
            changed.append(key)

    print(f"Source records: {len(source)}")
    print(f"Target records: {len(target)}")
    print(f"Missing in target: {len(missing)}")
    print(f"Extra in target: {len(extra)}")
    print(f"Changed: {len(changed)}")

    if missing:
        print(f"\nMissing IDs (first 10): {list(missing)[:10]}")
    if extra:
        print(f"\nExtra IDs (first 10): {list(extra)[:10]}")
    if changed:
        print(f"\nChanged IDs (first 5): {changed[:5]}")
        for key in changed[:3]:
            print(f"\n  {key}:")
            for field in set(source[key]) | set(target[key]):
                s = source[key].get(field)
                t = target[key].get(field)
                if s != t:
                    print(f"    {field}: {s!r} → {t!r}")

    return len(missing) == 0 and len(extra) == 0

if __name__ == "__main__":
    ok = validate_migration(sys.argv[1], sys.argv[2], sys.argv[3] if len(sys.argv) > 3 else "id")
    sys.exit(0 if ok else 1)

Tips

  • Validate at system boundaries (API endpoints, file imports, message queues), not deep inside business logic. Trust internal data.
  • Zod and Pydantic both generate JSON Schema from their definitions. Use this for documentation, OpenAPI specs, and cross-language contracts.
  • additionalProperties: false in JSON Schema catches typos in field names. Use it for strict APIs.
  • Pydantic v2 is significantly faster than v1. Use model_config = ConfigDict(strict=True) when you want no implicit type coercion.
  • Zod's .safeParse() returns a result object; .parse() throws. Use safeParse in API handlers to return structured errors.
  • For CSV validation, always check column count consistency first — most downstream errors trace back to misaligned columns.
  • Data migration validation should compare record counts, check for missing/extra records, and sample-check field values. Counting alone isn't enough.

Installation

Terminal bash

openclaw install data-validation
    
Copied!

💻Code Examples

done

done.txt
## Zod (TypeScript)

### Basic schemas

});

.txt
## Pydantic (Python)

### Basic models

pass

-pass.txt
## Data Integrity Checks

### CSV validation
example.json
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "type": "object",
  "required": ["name", "email", "age"],
  "properties": {
    "name": {
      "type": "string",
      "minLength": 1,
      "maxLength": 100
    },
    "email": {
      "type": "string",
      "format": "email"
    },
    "age": {
      "type": "integer",
      "minimum": 0,
      "maximum": 150
    },
    "role": {
      "type": "string",
      "enum": ["user", "admin", "moderator"],
      "default": "user"
    },
    "tags": {
      "type": "array",
      "items": { "type": "string" },
      "uniqueItems": true,
      "maxItems": 10
    },
    "address": {
      "type": "object",
      "properties": {
        "street": { "type": "string" },
        "city": { "type": "string" },
        "zip": { "type": "string", "pattern": "^\\d{5}(-\\d{4})?$" }
      },
      "required": ["street", "city"]
    }
  },
  "additionalProperties": false
}
example.json
// Nullable field
{ "type": ["string", "null"] }

// Union type (string or number)
{ "oneOf": [{ "type": "string" }, { "type": "number" }] }

// Conditional: if role is admin, require permissions
{
  "if": { "properties": { "role": { "const": "admin" } } },
  "then": { "required": ["permissions"] }
}

// Pattern properties (dynamic keys)
{
  "type": "object",
  "patternProperties": {
    "^env_": { "type": "string" }
  }
}

// Reusable definitions
{
  "$defs": {
    "address": {
      "type": "object",
      "properties": {
        "street": { "type": "string" },
        "city": { "type": "string" }
      }
    }
  },
  "properties": {
    "home": { "$ref": "#/$defs/address" },
    "work": { "$ref": "#/$defs/address" }
  }
}
example.sh
# Using ajv-cli (Node.js)
npx ajv-cli validate -s schema.json -d data.json

# Using jsonschema (Python)
pip install jsonschema
python3 -c "
import json, jsonschema
schema = json.load(open('schema.json'))
data = json.load(open('data.json'))
jsonschema.validate(data, schema)
print('Valid')
"

# Validate multiple files
for f in data/*.json; do
  npx ajv-cli validate -s schema.json -d "$f" 2>&1 || echo "INVALID: $f"
done
example.ts
import { z } from 'zod';

// Primitives
const nameSchema = z.string().min(1).max(100);
const ageSchema = z.number().int().min(0).max(150);
const emailSchema = z.string().email();
const urlSchema = z.string().url();

// Objects
const userSchema = z.object({
  name: z.string().min(1),
  email: z.string().email(),
  age: z.number().int().min(0),
  role: z.enum(['user', 'admin', 'moderator']).default('user'),
  tags: z.array(z.string()).max(10).default([]),
  createdAt: z.string().datetime(),
});

// Infer TypeScript type from schema
type User = z.infer<typeof userSchema>;
// { name: string; email: string; age: number; role: "user" | "admin" | "moderator"; ... }

// Validate
const result = userSchema.safeParse(data);
if (result.success) {
  console.log(result.data); // typed as User
} else {
  console.log(result.error.issues); // validation errors
}

// Parse (throws on invalid)
const user = userSchema.parse(data);
example.ts
// Optional and nullable
const schema = z.object({
  name: z.string(),
  nickname: z.string().optional(),       // string | undefined
  middleName: z.string().nullable(),     // string | null
  suffix: z.string().nullish(),          // string | null | undefined
});

// Transforms (validate then transform)
const dateSchema = z.string().datetime().transform(s => new Date(s));
const trimmed = z.string().trim().toLowerCase();
const parsed = z.string().transform(s => parseInt(s, 10)).pipe(z.number().int());

// Discriminated unions (tagged unions)
const eventSchema = z.discriminatedUnion('type', [
  z.object({ type: z.literal('click'), x: z.number(), y: z.number() }),
  z.object({ type: z.literal('keypress'), key: z.string() }),
  z.object({ type: z.literal('scroll'), delta: z.number() }),
]);

// Recursive types
const categorySchema: z.ZodType<Category> = z.object({
  name: z.string(),
  children: z.lazy(() => z.array(categorySchema)).default([]),
});

// Refinements (custom validation)
const passwordSchema = z.string()
  .min(8)
  .refine(s => /[A-Z]/.test(s), 'Must contain uppercase')
  .refine(s => /[0-9]/.test(s), 'Must contain digit')
  .refine(s => /[^a-zA-Z0-9]/.test(s), 'Must contain special character');

// Extend/merge objects
const baseUser = z.object({ name: z.string(), email: z.string() });
const adminUser = baseUser.extend({ permissions: z.array(z.string()) });

// Pick/omit
const createUser = userSchema.omit({ createdAt: true });
const userSummary = userSchema.pick({ name: true, email: true });

// Passthrough (allow extra fields)
const flexible = userSchema.passthrough();

// Strip unknown fields
const strict = userSchema.strict(); // Error on extra fields
example.ts
// Express middleware
import { z } from 'zod';

const createUserBody = z.object({
  name: z.string().min(1),
  email: z.string().email(),
  password: z.string().min(8),
});

app.post('/api/users', (req, res) => {
  const result = createUserBody.safeParse(req.body);
  if (!result.success) {
    return res.status(400).json({ errors: result.error.issues });
  }
  const { name, email, password } = result.data;
  // ... create user
});

// Query parameter validation
const listParams = z.object({
  page: z.coerce.number().int().min(1).default(1),
  limit: z.coerce.number().int().min(1).max(100).default(20),
  sort: z.enum(['newest', 'oldest', 'name']).default('newest'),
  q: z.string().optional(),
});

app.get('/api/users', (req, res) => {
  const params = listParams.parse(req.query);
  // params.page is a number, params.sort is typed
});
example.py
from pydantic import BaseModel, Field, EmailStr, field_validator
from typing import Optional
from datetime import datetime
from enum import Enum

class Role(str, Enum):
    USER = "user"
    ADMIN = "admin"
    MODERATOR = "moderator"

class Address(BaseModel):
    street: str
    city: str
    zip_code: str = Field(pattern=r"^\d{5}(-\d{4})?$")

class User(BaseModel):
    name: str = Field(min_length=1, max_length=100)
    email: EmailStr
    age: int = Field(ge=0, le=150)
    role: Role = Role.USER
    tags: list[str] = Field(default_factory=list, max_length=10)
    address: Optional[Address] = None
    created_at: datetime = Field(default_factory=datetime.now)

    @field_validator("name")
    @classmethod
    def name_must_not_be_empty(cls, v: str) -> str:
        if not v.strip():
            raise ValueError("name cannot be blank")
        return v.strip()

# Validate
user = User(name="Alice", email="[email protected]", age=30)
print(user.model_dump())      # dict
print(user.model_dump_json())  # JSON string

# Validation errors
try:
    User(name="", email="bad", age=-1)
except Exception as e:
    print(e)  # Detailed validation errors

Tags

#web_and-frontend-development #data

Quick Info

Category Development
Model Claude 3.5
Complexity One-Click
Author gitgoodordietrying
Last Updated 3/10/2026
🚀
Optimized for
Claude 3.5
🧠

Ready to Install?

Get started with this skill in seconds

openclaw install data-validation