mirror of
https://git.openapi.site/https://github.com/desirecore/market.git
synced 2026-06-06 04:30:42 +08:00
* feat: skills i18n 改造 — schemaVersion 1.1,零向后兼容
把 21 个 skills + 1 个 agent + manifest/categories 全量迁移到 schemaVersion 1.1
的 i18n 结构,配套 CI AI 翻译流水线(GitHub Models)与本地工具链。
## 关键变更
### 数据结构(破坏性,schemaVersion 1.0 → 1.1)
- SKILL.md: 顶层 name 改为 ASCII slug(== 目录名,符合 agentskills.io 规范);
中文显示名/short_desc/description 全部迁入 metadata.i18n.<locale>
- agents/<id>/agent.json: shortDesc/fullDesc/tags/persona.{role,traits} 迁入
i18n.<locale>;changelog[].changes 改为 { <locale>: string[] } 对象
- categories.json: 每个分类的 label/description 迁入 i18n.<locale>,顶层只剩
color/icon
- manifest.json: 加 supportedLocales / defaultLocale;顶层 description 迁入
i18n.<locale>
### Body 文件结构
- 根 SKILL.md = frontmatter + default_locale (en-US) body
- SKILL.<locale>.md = 各 locale 的 markdown body(首行 <!-- locale: xx --> 自校验)
### 工具链(scripts/i18n/)
- glossary.json: zh→en 术语表 + do_not_translate 白名单
- schema/skill-frontmatter.schema.json: i18n frontmatter JSON Schema
- validate-i18n.py: 8 条校验规则(name 合规 / locale 完整性 / hash 一致性等)
- translate.py: GitHub Models / Anthropic 双 backend,sha256 增量翻译
- migrate.py: 一次性迁移脚本(旧格式 → i18n 结构)
### CI(.github/workflows/)
- i18n-validate.yml: PR 触发跑 validate + translate --check
- i18n-translate.yml: PR 触发用 GitHub Models(默认 openai/gpt-5-mini)翻译缺失
locale,自动追加 commit;可切到 ANTHROPIC_API_KEY 走 Claude
### 文档
- docs/I18N.md: 作者贡献指南(schema 说明 / 提交流程 / 常见问题)
- README.md: 加多语言段落
## 验证
- uv run scripts/i18n/validate-i18n.py: OK,49 文件 0 错误
- uv run scripts/i18n/translate.py --check: 0 stale locale
- 21 skills 标题数 zh-CN == en-US 严格对齐(最大 66=66)
- skills-ref 规范校验:全部通过(顶层 name ASCII slug + description 单字段)
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
* fix(i18n): 修复 PR #1 review 反馈的 6 项问题
- schema: translated_by 正则放宽为 ^(human|ai:[A-Za-z0-9._:/-]+)$,接受
'ai:github:openai/gpt-5-mini' 这类 backend:model 形式(CI 翻译输出格式)
- README + docs/I18N.md: 修正"CI 用 Claude API"误导描述,正确说明默认是
GitHub Models(openai/gpt-5-mini)+ GITHUB_TOKEN,可选切到 Anthropic
- skills/minimax-tts/SKILL.md & SKILL.zh-CN.md: 删除多余的 ``` 闭合,避免
Markdown 后续渲染错乱
- skills/docx/SKILL.md: 翻译时丢失的 • Unicode escape 示例已恢复,
与 zh-CN 版本对齐
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---------
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
341 lines
14 KiB
Python
Executable File
341 lines
14 KiB
Python
Executable File
#!/usr/bin/env -S uv run --script
|
|
# /// script
|
|
# requires-python = ">=3.10"
|
|
# dependencies = ["pyyaml>=6.0"]
|
|
# ///
|
|
"""Validate DesireCore market i18n state.
|
|
|
|
Checks:
|
|
1. SKILL.md frontmatter top-level `name` matches parent dir name and is spec-compliant.
|
|
2. metadata.i18n.default_locale and source_locale are listed in metadata.i18n.locales.
|
|
3. Each declared locale has metadata.i18n.<locale>.{name,short_desc}.
|
|
4. metadata.i18n.<locale>.body, if present, points to an existing file; otherwise the
|
|
fallback chain must terminate at a readable root SKILL.md.
|
|
5. SKILL.<locale>.md, if it declares <!-- locale: ... -->, must match the filename locale.
|
|
6. Frontmatter parses cleanly; heading count of locale body matches source body (+/- 0).
|
|
7. categories.json's per-category i18n covers all locales declared in manifest.json.
|
|
8. Top-level description is 1-1024 chars (spec); top-level name is 1-64 chars (spec).
|
|
|
|
Exit codes:
|
|
0 = pass
|
|
1 = validation errors found
|
|
2 = unexpected runtime error / missing dependencies
|
|
|
|
Usage:
|
|
python3 scripts/i18n/validate-i18n.py # validate everything under repo root
|
|
python3 scripts/i18n/validate-i18n.py skills/web-access # validate single skill
|
|
python3 scripts/i18n/validate-i18n.py --json # machine-readable output
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import re
|
|
import sys
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
from typing import Any, Iterable
|
|
|
|
try:
|
|
import yaml
|
|
except ImportError:
|
|
sys.stderr.write("ERROR: PyYAML is required. Install with: pip install pyyaml\n")
|
|
sys.exit(2)
|
|
|
|
REPO_ROOT = Path(__file__).resolve().parents[2]
|
|
|
|
NAME_PATTERN = re.compile(r"^(?!-)(?!.*--)[a-z0-9](?:[a-z0-9-]*[a-z0-9])?$")
|
|
RESERVED_NAMES = {"anthropic", "claude"}
|
|
LOCALE_PATTERN = re.compile(r"^[a-z]{2,3}(?:-[A-Z]{2})?$")
|
|
LOCALE_HEADER_PATTERN = re.compile(r"^<!--\s*locale:\s*([a-zA-Z-]+)\s*-->")
|
|
HEADING_PATTERN = re.compile(r"^(#{1,6})\s+\S", re.MULTILINE)
|
|
FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n(.*)$", re.DOTALL)
|
|
|
|
|
|
@dataclass
|
|
class Issue:
|
|
path: str
|
|
rule: str
|
|
message: str
|
|
severity: str = "error"
|
|
|
|
def to_dict(self) -> dict[str, str]:
|
|
return {
|
|
"path": self.path,
|
|
"rule": self.rule,
|
|
"message": self.message,
|
|
"severity": self.severity,
|
|
}
|
|
|
|
|
|
@dataclass
|
|
class Report:
|
|
issues: list[Issue] = field(default_factory=list)
|
|
|
|
def add(self, issue: Issue) -> None:
|
|
self.issues.append(issue)
|
|
|
|
@property
|
|
def has_errors(self) -> bool:
|
|
return any(i.severity == "error" for i in self.issues)
|
|
|
|
|
|
def parse_frontmatter(text: str) -> tuple[dict[str, Any] | None, str | None, str | None]:
|
|
"""Return (frontmatter_dict, body, error). All three Nones means file is empty."""
|
|
if not text.strip():
|
|
return None, None, "empty file"
|
|
m = FRONTMATTER_RE.match(text)
|
|
if not m:
|
|
return None, None, "no YAML frontmatter (file must start with '---')"
|
|
try:
|
|
fm = yaml.safe_load(m.group(1)) or {}
|
|
except yaml.YAMLError as e:
|
|
return None, None, f"YAML parse error: {e}"
|
|
if not isinstance(fm, dict):
|
|
return None, None, "frontmatter must be a YAML mapping"
|
|
return fm, m.group(2), None
|
|
|
|
|
|
def heading_count(text: str) -> int:
|
|
return len(HEADING_PATTERN.findall(text or ""))
|
|
|
|
|
|
def validate_skill(skill_dir: Path, report: Report, declared_locales: set[str] | None = None) -> None:
|
|
"""Validate one skill directory (must contain SKILL.md)."""
|
|
rel_dir = skill_dir.relative_to(REPO_ROOT).as_posix()
|
|
skill_md = skill_dir / "SKILL.md"
|
|
if not skill_md.is_file():
|
|
report.add(Issue(rel_dir, "structure", "SKILL.md not found"))
|
|
return
|
|
|
|
text = skill_md.read_text(encoding="utf-8")
|
|
fm, body, err = parse_frontmatter(text)
|
|
if err:
|
|
report.add(Issue(f"{rel_dir}/SKILL.md", "rule-6", err))
|
|
return
|
|
assert fm is not None and body is not None
|
|
|
|
name = fm.get("name", "")
|
|
description = fm.get("description", "")
|
|
|
|
# Rule 1: name spec-compliance + matches dir
|
|
if not isinstance(name, str) or not NAME_PATTERN.match(name) or len(name) > 64 or name in RESERVED_NAMES:
|
|
report.add(Issue(
|
|
f"{rel_dir}/SKILL.md", "rule-1",
|
|
f"name {name!r} is not spec-compliant (must be lowercase ASCII + hyphens, 1-64 chars, not 'anthropic'/'claude')"
|
|
))
|
|
if name != skill_dir.name:
|
|
report.add(Issue(
|
|
f"{rel_dir}/SKILL.md", "rule-1",
|
|
f"name '{name}' must equal parent dir name '{skill_dir.name}' (spec)"
|
|
))
|
|
|
|
# Rule 8: description length
|
|
if not isinstance(description, str) or not (1 <= len(description) <= 1024):
|
|
report.add(Issue(
|
|
f"{rel_dir}/SKILL.md", "rule-8",
|
|
f"description must be 1-1024 chars (got {len(description) if isinstance(description, str) else 'non-string'})"
|
|
))
|
|
|
|
# Rule 2/3/4: i18n block
|
|
metadata = fm.get("metadata") or {}
|
|
i18n = metadata.get("i18n") if isinstance(metadata, dict) else None
|
|
if not isinstance(i18n, dict):
|
|
report.add(Issue(f"{rel_dir}/SKILL.md", "rule-2", "metadata.i18n block missing"))
|
|
return
|
|
|
|
default_locale = i18n.get("default_locale")
|
|
source_locale = i18n.get("source_locale")
|
|
locales = i18n.get("locales") or []
|
|
if not isinstance(locales, list) or not all(isinstance(x, str) for x in locales):
|
|
report.add(Issue(f"{rel_dir}/SKILL.md", "rule-2", "metadata.i18n.locales must be a list of strings"))
|
|
return
|
|
locale_set = set(locales)
|
|
|
|
for tag in ("default_locale", "source_locale"):
|
|
val = i18n.get(tag)
|
|
if not isinstance(val, str) or not LOCALE_PATTERN.match(val):
|
|
report.add(Issue(f"{rel_dir}/SKILL.md", "rule-2", f"metadata.i18n.{tag} '{val!r}' is not a valid BCP-47 locale"))
|
|
elif val not in locale_set:
|
|
report.add(Issue(f"{rel_dir}/SKILL.md", "rule-2", f"metadata.i18n.{tag} '{val}' not present in metadata.i18n.locales"))
|
|
|
|
if declared_locales is not None:
|
|
missing = declared_locales - locale_set
|
|
if missing:
|
|
report.add(Issue(
|
|
f"{rel_dir}/SKILL.md", "rule-7",
|
|
f"manifest declares locales {sorted(declared_locales)} but skill is missing {sorted(missing)}",
|
|
severity="error"
|
|
))
|
|
|
|
# Rule 3: per-locale name/short_desc presence
|
|
source_body_text: str | None = None
|
|
for locale in locales:
|
|
if not LOCALE_PATTERN.match(locale):
|
|
report.add(Issue(f"{rel_dir}/SKILL.md", "rule-3", f"locale '{locale}' is not a valid BCP-47 tag"))
|
|
continue
|
|
payload = i18n.get(locale)
|
|
if not isinstance(payload, dict):
|
|
report.add(Issue(f"{rel_dir}/SKILL.md", "rule-3", f"metadata.i18n.{locale} block missing or not a mapping"))
|
|
continue
|
|
for required in ("name", "short_desc"):
|
|
v = payload.get(required)
|
|
if not isinstance(v, str) or not v.strip():
|
|
report.add(Issue(
|
|
f"{rel_dir}/SKILL.md", "rule-3",
|
|
f"metadata.i18n.{locale}.{required} is missing or empty"
|
|
))
|
|
|
|
# Rule 4: body file presence
|
|
body_path_str = payload.get("body")
|
|
body_text: str | None = None
|
|
if body_path_str:
|
|
if not isinstance(body_path_str, str) or not body_path_str.startswith("./"):
|
|
report.add(Issue(
|
|
f"{rel_dir}/SKILL.md", "rule-4",
|
|
f"metadata.i18n.{locale}.body must be a relative path starting with './' (got {body_path_str!r})"
|
|
))
|
|
else:
|
|
body_file = (skill_dir / body_path_str.removeprefix("./")).resolve()
|
|
if not body_file.is_file():
|
|
report.add(Issue(
|
|
f"{rel_dir}/SKILL.md", "rule-4",
|
|
f"metadata.i18n.{locale}.body points to missing file '{body_path_str}'"
|
|
))
|
|
else:
|
|
body_text = body_file.read_text(encoding="utf-8")
|
|
# Rule 5: locale header self-check (only when not the root SKILL.md)
|
|
if body_file.name != "SKILL.md":
|
|
first_line = body_text.splitlines()[0] if body_text else ""
|
|
m = LOCALE_HEADER_PATTERN.match(first_line)
|
|
if m and m.group(1) != locale:
|
|
report.add(Issue(
|
|
f"{rel_dir}/{body_file.name}", "rule-5",
|
|
f"file declares locale '{m.group(1)}' but is referenced as '{locale}'"
|
|
))
|
|
else:
|
|
# Fallback to root SKILL.md body (default_locale must have a usable body)
|
|
if locale == default_locale:
|
|
body_text = body
|
|
else:
|
|
# OK to omit body for non-default locales (will fall back at runtime)
|
|
pass
|
|
|
|
if locale == source_locale:
|
|
source_body_text = body_text or body # source defaults to root if not specified
|
|
|
|
# Rule 6: heading count consistency between source and other locales' bodies
|
|
if source_body_text is not None:
|
|
source_count = heading_count(source_body_text)
|
|
for locale in locales:
|
|
if locale == source_locale:
|
|
continue
|
|
payload = i18n.get(locale) or {}
|
|
body_path_str = payload.get("body")
|
|
if body_path_str:
|
|
body_file = (skill_dir / body_path_str.removeprefix("./")).resolve()
|
|
if body_file.is_file():
|
|
other_text = body_file.read_text(encoding="utf-8")
|
|
other_count = heading_count(other_text)
|
|
if other_count != source_count:
|
|
report.add(Issue(
|
|
f"{rel_dir}/{body_file.name}", "rule-6",
|
|
f"heading count {other_count} differs from source ({source_count})",
|
|
severity="warning",
|
|
))
|
|
|
|
|
|
def validate_market_root(report: Report) -> set[str]:
|
|
"""Validate manifest.json + categories.json. Returns the declared locale set or empty."""
|
|
manifest_path = REPO_ROOT / "manifest.json"
|
|
categories_path = REPO_ROOT / "categories.json"
|
|
|
|
declared: set[str] = set()
|
|
if manifest_path.is_file():
|
|
try:
|
|
manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
|
|
except json.JSONDecodeError as e:
|
|
report.add(Issue("manifest.json", "rule-7", f"JSON parse error: {e}"))
|
|
manifest = {}
|
|
supported = manifest.get("supportedLocales") or []
|
|
if not isinstance(supported, list) or not all(isinstance(x, str) and LOCALE_PATTERN.match(x) for x in supported):
|
|
report.add(Issue("manifest.json", "rule-7", "supportedLocales must be a list of BCP-47 tags"))
|
|
else:
|
|
declared = set(supported)
|
|
default = manifest.get("defaultLocale")
|
|
if declared and default not in declared:
|
|
report.add(Issue("manifest.json", "rule-7", f"defaultLocale '{default}' not in supportedLocales"))
|
|
|
|
if categories_path.is_file() and declared:
|
|
try:
|
|
categories = json.loads(categories_path.read_text(encoding="utf-8"))
|
|
except json.JSONDecodeError as e:
|
|
report.add(Issue("categories.json", "rule-7", f"JSON parse error: {e}"))
|
|
return declared
|
|
for cat_id, cat in categories.items():
|
|
i18n = cat.get("i18n") if isinstance(cat, dict) else None
|
|
if not isinstance(i18n, dict):
|
|
report.add(Issue("categories.json", "rule-7", f"category '{cat_id}' missing i18n block"))
|
|
continue
|
|
for locale in declared:
|
|
payload = i18n.get(locale)
|
|
if not isinstance(payload, dict) or not payload.get("label"):
|
|
report.add(Issue(
|
|
"categories.json", "rule-7",
|
|
f"category '{cat_id}' missing i18n.{locale}.label"
|
|
))
|
|
return declared
|
|
|
|
|
|
def iter_skill_dirs(targets: Iterable[Path]) -> Iterable[Path]:
|
|
for t in targets:
|
|
if t.is_file() and t.name == "SKILL.md":
|
|
yield t.parent
|
|
elif t.is_dir() and (t / "SKILL.md").is_file():
|
|
yield t
|
|
elif t.is_dir():
|
|
for child in sorted(t.iterdir()):
|
|
if child.is_dir() and (child / "SKILL.md").is_file():
|
|
yield child
|
|
|
|
|
|
def main(argv: list[str]) -> int:
|
|
parser = argparse.ArgumentParser(description=__doc__)
|
|
parser.add_argument("paths", nargs="*", help="Skills or directories to validate (default: repo root)")
|
|
parser.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
|
|
args = parser.parse_args(argv)
|
|
|
|
report = Report()
|
|
declared_locales = validate_market_root(report)
|
|
|
|
if args.paths:
|
|
targets = [Path(p).resolve() for p in args.paths]
|
|
else:
|
|
targets = [REPO_ROOT / "skills"]
|
|
|
|
for skill_dir in iter_skill_dirs(targets):
|
|
validate_skill(skill_dir, report, declared_locales=declared_locales or None)
|
|
|
|
if args.json:
|
|
json.dump([i.to_dict() for i in report.issues], sys.stdout, indent=2, ensure_ascii=False)
|
|
sys.stdout.write("\n")
|
|
else:
|
|
if not report.issues:
|
|
print("OK: no i18n issues found.")
|
|
else:
|
|
for issue in report.issues:
|
|
marker = "ERROR" if issue.severity == "error" else "WARN "
|
|
print(f"[{marker}] {issue.path} :: {issue.rule} :: {issue.message}")
|
|
errors = sum(1 for i in report.issues if i.severity == "error")
|
|
warns = sum(1 for i in report.issues if i.severity == "warning")
|
|
print(f"\n{errors} error(s), {warns} warning(s).")
|
|
|
|
return 1 if report.has_errors else 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
try:
|
|
sys.exit(main(sys.argv[1:]))
|
|
except KeyboardInterrupt:
|
|
sys.exit(130)
|