#!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.10" # dependencies = ["pyyaml>=6.0"] # /// """Validate DesireCore market i18n state. Checks: 1. SKILL.md frontmatter top-level `name` matches parent dir name and is spec-compliant. 2. metadata.i18n.default_locale and source_locale are listed in metadata.i18n.locales. 3. Each declared locale has metadata.i18n..{name,short_desc}. 4. metadata.i18n..body, if present, points to an existing file; otherwise the fallback chain must terminate at a readable root SKILL.md. 5. SKILL..md, if it declares , must match the filename locale. 6. Frontmatter parses cleanly; heading count of locale body matches source body (+/- 0). 7. categories.json's per-category i18n covers all locales declared in manifest.json. 8. Top-level description is 1-1024 chars (spec); top-level name is 1-64 chars (spec). Exit codes: 0 = pass 1 = validation errors found 2 = unexpected runtime error / missing dependencies Usage: python3 scripts/i18n/validate-i18n.py # validate everything under repo root python3 scripts/i18n/validate-i18n.py skills/web-access # validate single skill python3 scripts/i18n/validate-i18n.py --json # machine-readable output """ from __future__ import annotations import argparse import json import re import sys from dataclasses import dataclass, field from pathlib import Path from typing import Any, Iterable try: import yaml except ImportError: sys.stderr.write("ERROR: PyYAML is required. Install with: pip install pyyaml\n") sys.exit(2) REPO_ROOT = Path(__file__).resolve().parents[2] NAME_PATTERN = re.compile(r"^(?!-)(?!.*--)[a-z0-9](?:[a-z0-9-]*[a-z0-9])?$") RESERVED_NAMES = {"anthropic", "claude"} LOCALE_PATTERN = re.compile(r"^[a-z]{2,3}(?:-[A-Z]{2})?$") LOCALE_HEADER_PATTERN = re.compile(r"^") HEADING_PATTERN = re.compile(r"^(#{1,6})\s+\S", re.MULTILINE) FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n(.*)$", re.DOTALL) @dataclass class Issue: path: str rule: str message: str severity: str = "error" def to_dict(self) -> dict[str, str]: return { "path": self.path, "rule": self.rule, "message": self.message, "severity": self.severity, } @dataclass class Report: issues: list[Issue] = field(default_factory=list) def add(self, issue: Issue) -> None: self.issues.append(issue) @property def has_errors(self) -> bool: return any(i.severity == "error" for i in self.issues) def parse_frontmatter(text: str) -> tuple[dict[str, Any] | None, str | None, str | None]: """Return (frontmatter_dict, body, error). All three Nones means file is empty.""" if not text.strip(): return None, None, "empty file" m = FRONTMATTER_RE.match(text) if not m: return None, None, "no YAML frontmatter (file must start with '---')" try: fm = yaml.safe_load(m.group(1)) or {} except yaml.YAMLError as e: return None, None, f"YAML parse error: {e}" if not isinstance(fm, dict): return None, None, "frontmatter must be a YAML mapping" return fm, m.group(2), None def heading_count(text: str) -> int: return len(HEADING_PATTERN.findall(text or "")) def validate_skill(skill_dir: Path, report: Report, declared_locales: set[str] | None = None) -> None: """Validate one skill directory (must contain SKILL.md).""" rel_dir = skill_dir.relative_to(REPO_ROOT).as_posix() skill_md = skill_dir / "SKILL.md" if not skill_md.is_file(): report.add(Issue(rel_dir, "structure", "SKILL.md not found")) return text = skill_md.read_text(encoding="utf-8") fm, body, err = parse_frontmatter(text) if err: report.add(Issue(f"{rel_dir}/SKILL.md", "rule-6", err)) return assert fm is not None and body is not None name = fm.get("name", "") description = fm.get("description", "") # Rule 1: name spec-compliance + matches dir if not isinstance(name, str) or not NAME_PATTERN.match(name) or len(name) > 64 or name in RESERVED_NAMES: report.add(Issue( f"{rel_dir}/SKILL.md", "rule-1", f"name {name!r} is not spec-compliant (must be lowercase ASCII + hyphens, 1-64 chars, not 'anthropic'/'claude')" )) if name != skill_dir.name: report.add(Issue( f"{rel_dir}/SKILL.md", "rule-1", f"name '{name}' must equal parent dir name '{skill_dir.name}' (spec)" )) # Rule 8: description length if not isinstance(description, str) or not (1 <= len(description) <= 1024): report.add(Issue( f"{rel_dir}/SKILL.md", "rule-8", f"description must be 1-1024 chars (got {len(description) if isinstance(description, str) else 'non-string'})" )) # Rule 2/3/4: i18n block metadata = fm.get("metadata") or {} i18n = metadata.get("i18n") if isinstance(metadata, dict) else None if not isinstance(i18n, dict): report.add(Issue(f"{rel_dir}/SKILL.md", "rule-2", "metadata.i18n block missing")) return default_locale = i18n.get("default_locale") source_locale = i18n.get("source_locale") locales = i18n.get("locales") or [] if not isinstance(locales, list) or not all(isinstance(x, str) for x in locales): report.add(Issue(f"{rel_dir}/SKILL.md", "rule-2", "metadata.i18n.locales must be a list of strings")) return locale_set = set(locales) for tag in ("default_locale", "source_locale"): val = i18n.get(tag) if not isinstance(val, str) or not LOCALE_PATTERN.match(val): report.add(Issue(f"{rel_dir}/SKILL.md", "rule-2", f"metadata.i18n.{tag} '{val!r}' is not a valid BCP-47 locale")) elif val not in locale_set: report.add(Issue(f"{rel_dir}/SKILL.md", "rule-2", f"metadata.i18n.{tag} '{val}' not present in metadata.i18n.locales")) if declared_locales is not None: missing = declared_locales - locale_set if missing: report.add(Issue( f"{rel_dir}/SKILL.md", "rule-7", f"manifest declares locales {sorted(declared_locales)} but skill is missing {sorted(missing)}", severity="error" )) # Rule 3: per-locale name/short_desc presence source_body_text: str | None = None for locale in locales: if not LOCALE_PATTERN.match(locale): report.add(Issue(f"{rel_dir}/SKILL.md", "rule-3", f"locale '{locale}' is not a valid BCP-47 tag")) continue payload = i18n.get(locale) if not isinstance(payload, dict): report.add(Issue(f"{rel_dir}/SKILL.md", "rule-3", f"metadata.i18n.{locale} block missing or not a mapping")) continue for required in ("name", "short_desc"): v = payload.get(required) if not isinstance(v, str) or not v.strip(): report.add(Issue( f"{rel_dir}/SKILL.md", "rule-3", f"metadata.i18n.{locale}.{required} is missing or empty" )) # Rule 4: body file presence body_path_str = payload.get("body") body_text: str | None = None if body_path_str: if not isinstance(body_path_str, str) or not body_path_str.startswith("./"): report.add(Issue( f"{rel_dir}/SKILL.md", "rule-4", f"metadata.i18n.{locale}.body must be a relative path starting with './' (got {body_path_str!r})" )) else: body_file = (skill_dir / body_path_str.removeprefix("./")).resolve() if not body_file.is_file(): report.add(Issue( f"{rel_dir}/SKILL.md", "rule-4", f"metadata.i18n.{locale}.body points to missing file '{body_path_str}'" )) else: body_text = body_file.read_text(encoding="utf-8") # Rule 5: locale header self-check (only when not the root SKILL.md) if body_file.name != "SKILL.md": first_line = body_text.splitlines()[0] if body_text else "" m = LOCALE_HEADER_PATTERN.match(first_line) if m and m.group(1) != locale: report.add(Issue( f"{rel_dir}/{body_file.name}", "rule-5", f"file declares locale '{m.group(1)}' but is referenced as '{locale}'" )) else: # Fallback to root SKILL.md body (default_locale must have a usable body) if locale == default_locale: body_text = body else: # OK to omit body for non-default locales (will fall back at runtime) pass if locale == source_locale: source_body_text = body_text or body # source defaults to root if not specified # Rule 6: heading count consistency between source and other locales' bodies if source_body_text is not None: source_count = heading_count(source_body_text) for locale in locales: if locale == source_locale: continue payload = i18n.get(locale) or {} body_path_str = payload.get("body") if body_path_str: body_file = (skill_dir / body_path_str.removeprefix("./")).resolve() if body_file.is_file(): other_text = body_file.read_text(encoding="utf-8") other_count = heading_count(other_text) if other_count != source_count: report.add(Issue( f"{rel_dir}/{body_file.name}", "rule-6", f"heading count {other_count} differs from source ({source_count})", severity="warning", )) def validate_market_root(report: Report) -> set[str]: """Validate manifest.json + categories.json. Returns the declared locale set or empty.""" manifest_path = REPO_ROOT / "manifest.json" categories_path = REPO_ROOT / "categories.json" declared: set[str] = set() if manifest_path.is_file(): try: manifest = json.loads(manifest_path.read_text(encoding="utf-8")) except json.JSONDecodeError as e: report.add(Issue("manifest.json", "rule-7", f"JSON parse error: {e}")) manifest = {} supported = manifest.get("supportedLocales") or [] if not isinstance(supported, list) or not all(isinstance(x, str) and LOCALE_PATTERN.match(x) for x in supported): report.add(Issue("manifest.json", "rule-7", "supportedLocales must be a list of BCP-47 tags")) else: declared = set(supported) default = manifest.get("defaultLocale") if declared and default not in declared: report.add(Issue("manifest.json", "rule-7", f"defaultLocale '{default}' not in supportedLocales")) if categories_path.is_file() and declared: try: categories = json.loads(categories_path.read_text(encoding="utf-8")) except json.JSONDecodeError as e: report.add(Issue("categories.json", "rule-7", f"JSON parse error: {e}")) return declared for cat_id, cat in categories.items(): i18n = cat.get("i18n") if isinstance(cat, dict) else None if not isinstance(i18n, dict): report.add(Issue("categories.json", "rule-7", f"category '{cat_id}' missing i18n block")) continue for locale in declared: payload = i18n.get(locale) if not isinstance(payload, dict) or not payload.get("label"): report.add(Issue( "categories.json", "rule-7", f"category '{cat_id}' missing i18n.{locale}.label" )) return declared def iter_skill_dirs(targets: Iterable[Path]) -> Iterable[Path]: for t in targets: if t.is_file() and t.name == "SKILL.md": yield t.parent elif t.is_dir() and (t / "SKILL.md").is_file(): yield t elif t.is_dir(): for child in sorted(t.iterdir()): if child.is_dir() and (child / "SKILL.md").is_file(): yield child def main(argv: list[str]) -> int: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("paths", nargs="*", help="Skills or directories to validate (default: repo root)") parser.add_argument("--json", action="store_true", help="Emit machine-readable JSON") args = parser.parse_args(argv) report = Report() declared_locales = validate_market_root(report) if args.paths: targets = [Path(p).resolve() for p in args.paths] else: targets = [REPO_ROOT / "skills"] for skill_dir in iter_skill_dirs(targets): validate_skill(skill_dir, report, declared_locales=declared_locales or None) if args.json: json.dump([i.to_dict() for i in report.issues], sys.stdout, indent=2, ensure_ascii=False) sys.stdout.write("\n") else: if not report.issues: print("OK: no i18n issues found.") else: for issue in report.issues: marker = "ERROR" if issue.severity == "error" else "WARN " print(f"[{marker}] {issue.path} :: {issue.rule} :: {issue.message}") errors = sum(1 for i in report.issues if i.severity == "error") warns = sum(1 for i in report.issues if i.severity == "warning") print(f"\n{errors} error(s), {warns} warning(s).") return 1 if report.has_errors else 0 if __name__ == "__main__": try: sys.exit(main(sys.argv[1:])) except KeyboardInterrupt: sys.exit(130)