diff --git a/.github/workflows/i18n-translate.yml b/.github/workflows/i18n-translate.yml index dada687..bf77a8c 100644 --- a/.github/workflows/i18n-translate.yml +++ b/.github/workflows/i18n-translate.yml @@ -9,6 +9,12 @@ name: i18n Auto-Translate # then set repository variable TRANSLATE_BACKEND=anthropic and TRANSLATE_MODEL to # a Claude model id (e.g. claude-sonnet-4-6). Claude is NOT in the GitHub Models # catalog as of 2026-05. +# +# Scope: on pull_request events, only skills whose SKILL*.md files actually changed +# in the PR are checked/translated — keeps token usage proportional to the PR. If +# manifest.json or categories.json changed, falls back to full-repo scan (these +# affect the i18n fallback chain globally). workflow_dispatch always does a full +# scan unless `skill` input is supplied. on: pull_request: @@ -50,17 +56,77 @@ jobs: BASE_SHA: ${{ github.event.pull_request.base.sha }} HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }} run: | - set -e + # GitHub Actions default shell already runs with -eo pipefail; we set + # it explicitly so the `|| true` workaround on grep below is unambiguous. + set -eo pipefail + # workflow_dispatch: full scan (skill input handled later if provided) if [ "${{ github.event_name }}" != "pull_request" ]; then echo "relevant=true" >> "$GITHUB_OUTPUT" + echo "skill_paths=" >> "$GITHUB_OUTPUT" exit 0 fi - git fetch --no-tags --depth=1 origin "$BASE_SHA" 2>/dev/null || true - changed=$(git diff --name-only "${BASE_SHA}...${HEAD_SHA}" || true) - if echo "$changed" | grep -qE '^(skills/.+/SKILL(\.zh-CN)?\.md$|manifest\.json$|categories\.json$)'; then + + # Ensure BASE_SHA is locally reachable; if we can't fetch it, fall back + # to a full scan rather than silently producing an empty diff that would + # mis-classify a real i18n PR as relevant=false. + if ! git cat-file -e "${BASE_SHA}^{commit}" 2>/dev/null; then + if ! git fetch --no-tags --depth=1 origin "$BASE_SHA" 2>/dev/null; then + echo "::warning::failed to fetch base SHA ${BASE_SHA}; falling back to full scan" + echo "relevant=true" >> "$GITHUB_OUTPUT" + echo "skill_paths=" >> "$GITHUB_OUTPUT" + exit 0 + fi + fi + + # `A...B` (triple-dot) needs merge-base(A,B) in history. With + # actions/checkout@v4 + fetch-depth: 0 it normally is, but if the base + # branch advanced after the PR was opened and the merge-base isn't + # locally reachable, fall back to a full scan instead of silently + # producing a wrong diff. + if ! merge_base=$(git merge-base "${BASE_SHA}" "${HEAD_SHA}" 2>/dev/null); then + echo "::warning::could not compute merge-base for ${BASE_SHA}...${HEAD_SHA}; falling back to full scan" echo "relevant=true" >> "$GITHUB_OUTPUT" + echo "skill_paths=" >> "$GITHUB_OUTPUT" + exit 0 + fi + changed=$(git diff --name-only "${merge_base}" "${HEAD_SHA}") + + # If manifest.json or categories.json changed, fall back to full scan + # (these affect i18n fallback chain / supportedLocales globally). + if printf '%s\n' "$changed" | grep -qE '^(manifest\.json|categories\.json)$'; then + echo "relevant=true" >> "$GITHUB_OUTPUT" + echo "skill_paths=" >> "$GITHUB_OUTPUT" + exit 0 + fi + + # Otherwise: extract unique skill directories touched by SKILL*.md edits. + # Grep enforces: + # - skill name: lowercase ASCII letters, digits, hyphens + # (no leading/trailing hyphen — anchored with [a-z0-9]) + # - locale tag (BCP-47 subset): 2-3 lowercase letters + optional -RR + # awk enforces the rest of the schema's name constraints (no consecutive + # hyphens, no reserved names) as defense-in-depth — those would already + # have been blocked upstream by validate-i18n.py / schema validation. + # The brace group with `|| true` turns grep's "no match" (exit 1) into + # success so pipefail doesn't kill the step when this PR is unrelated. + # Strict filtering also sanitizes input for the $GITHUB_OUTPUT write + # below (any path failing the shape is dropped). + skill_paths=$(printf '%s\n' "$changed" \ + | { grep -E '^skills/[a-z0-9]([a-z0-9-]*[a-z0-9])?/SKILL(\.[a-z]{2,3}(-[A-Z]{2})?)?\.md$' || true; } \ + | awk -F/ ' + $2 !~ /--/ && $2 != "anthropic" && $2 != "claude" { + print $1 "/" $2 + }' \ + | sort -u \ + | tr '\n' ' ' \ + | sed 's/ $//') + + if [ -n "$skill_paths" ]; then + echo "relevant=true" >> "$GITHUB_OUTPUT" + echo "skill_paths=$skill_paths" >> "$GITHUB_OUTPUT" else echo "relevant=false" >> "$GITHUB_OUTPUT" + echo "skill_paths=" >> "$GITHUB_OUTPUT" fi - name: Install uv @@ -70,9 +136,22 @@ jobs: - name: Check for stale translations id: precheck if: steps.changes.outputs.relevant == 'true' + env: + SKILL_PATHS: ${{ steps.changes.outputs.skill_paths }} + INPUT_SKILL: ${{ github.event.inputs.skill }} run: | set +e - uv run --quiet scripts/i18n/translate.py --check + ARGS=(--check) + if [ -n "$INPUT_SKILL" ]; then + # workflow_dispatch with explicit skill input takes precedence + ARGS+=("$INPUT_SKILL") + elif [ -n "$SKILL_PATHS" ]; then + # pull_request: read space-separated paths into an array to avoid + # word-splitting / glob expansion surprises. + read -r -a SKILL_ARR <<<"$SKILL_PATHS" + ARGS+=("${SKILL_ARR[@]}") + fi + uv run --quiet scripts/i18n/translate.py "${ARGS[@]}" rc=$? if [ $rc -eq 0 ]; then echo "stale=false" >> "$GITHUB_OUTPUT" @@ -88,14 +167,22 @@ jobs: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} TRANSLATE_BACKEND: ${{ vars.TRANSLATE_BACKEND || 'github' }} TRANSLATE_MODEL: ${{ vars.TRANSLATE_MODEL || 'openai/gpt-5-mini' }} + SKILL_PATHS: ${{ steps.changes.outputs.skill_paths }} + INPUT_SKILL: ${{ github.event.inputs.skill }} + INPUT_TARGET: ${{ github.event.inputs.target_locale }} run: | set -e ARGS=() - if [ -n "${{ github.event.inputs.skill }}" ]; then - ARGS+=("${{ github.event.inputs.skill }}") + if [ -n "$INPUT_SKILL" ]; then + # workflow_dispatch input takes precedence (single specific skill) + ARGS+=("$INPUT_SKILL") + elif [ -n "$SKILL_PATHS" ]; then + # pull_request: only translate skills touched by this PR + read -r -a SKILL_ARR <<<"$SKILL_PATHS" + ARGS+=("${SKILL_ARR[@]}") fi - if [ -n "${{ github.event.inputs.target_locale }}" ]; then - ARGS+=(--target "${{ github.event.inputs.target_locale }}") + if [ -n "$INPUT_TARGET" ]; then + ARGS+=(--target "$INPUT_TARGET") fi uv run --quiet scripts/i18n/translate.py "${ARGS[@]}"