From 8610f19f7ec6a6fb5e40f8f7c7c59da283132af7 Mon Sep 17 00:00:00 2001
From: Yige <a@wyr.me>
Date: Wed, 13 May 2026 17:32:19 +0800
Subject: [PATCH] =?UTF-8?q?ci(i18n-translate):=20=E4=BB=85=E7=BF=BB?=
 =?UTF-8?q?=E8=AF=91=E6=9C=AC=20PR=20=E6=94=B9=E5=8A=A8=E7=9A=84=20skill?=
 =?UTF-8?q?=EF=BC=8C=E9=81=BF=E5=85=8D=20stale=20=E9=9B=AA=E5=B4=A9=20(#6)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## 背景

PR #4 翻译失败的根因：`translate.py` 总扫描整个 `skills/` 目录，任何一个 skill 的 stale
source_hash 都会触发翻译并占用 GitHub Models quota。一个大文件（如 manage-skills 14KB）撞免费
tier 8K input 上限 → 413 → 整个 workflow fail → ruleset 阻塞合并。

虽然 GitHub Models 已升级付费 quota（200K input 不会再 413），但 scope 收敛仍是更稳健的做法：单 PR
token 消耗与改动量成正比，而不是与整个 repo 的 stale skill 数成正比。

## 改动

- `detect-changes` step 在 `pull_request` 事件下提取本 PR 实际触及的 skill 目录（去重到
`skills/<name>` 粒度），输出到 `GITHUB_OUTPUT skill_paths`
- `precheck`（--check）与 `Translate stale locales` 步骤把 `skill_paths`
作为位置参数传给 `translate.py`，仅检查/翻译相关 skill
- `manifest.json` / `categories.json` 变动时回退到全扫描（这些影响 i18n fallback 链 /
supportedLocales 全局语义）
- `workflow_dispatch` 仍默认全扫描；其 `skill` 输入参数优先级最高

## 验证

- 本地测试 detect-changes shell 提取逻辑：skill 文件 → 正确去重；manifest.json →
全扫描；无相关改动 → relevant=false
- 本地 `translate.py --check skills/<name>` 正常工作
- 本 PR 自身仅触及 `.github/workflows/i18n-translate.yml`，detect-changes 应输出
`relevant=false`，translate 整体走 skip 分支

## Test plan

- [ ] CI 上 `validate` / `translate` / `wait-for-copilot-review` 全绿
- [ ] Copilot 评审通过 / conversation resolved
- [ ] Squash merge
---
 .github/workflows/i18n-translate.yml | 105 ++++++++++++++++++++++++---
 1 file changed, 96 insertions(+), 9 deletions(-)
diff --git a/.github/workflows/i18n-translate.yml b/.github/workflows/i18n-translate.yml
index dada687..bf77a8c 100644
--- a/.github/workflows/i18n-translate.yml
+++ b/.github/workflows/i18n-translate.yml
@@ -9,6 +9,12 @@ name: i18n Auto-Translate
 # then set repository variable TRANSLATE_BACKEND=anthropic and TRANSLATE_MODEL to
 # a Claude model id (e.g. claude-sonnet-4-6). Claude is NOT in the GitHub Models
 # catalog as of 2026-05.
+#
+# Scope: on pull_request events, only skills whose SKILL*.md files actually changed
+# in the PR are checked/translated — keeps token usage proportional to the PR. If
+# manifest.json or categories.json changed, falls back to full-repo scan (these
+# affect the i18n fallback chain globally). workflow_dispatch always does a full
+# scan unless `skill` input is supplied.
 
 on:
   pull_request:
@@ -50,17 +56,77 @@ jobs:
           BASE_SHA: ${{ github.event.pull_request.base.sha }}
           HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
         run: |
-          set -e
+          # GitHub Actions default shell already runs with -eo pipefail; we set
+          # it explicitly so the `|| true` workaround on grep below is unambiguous.
+          set -eo pipefail
+          # workflow_dispatch: full scan (skill input handled later if provided)
           if [ "${{ github.event_name }}" != "pull_request" ]; then
             echo "relevant=true" >> "$GITHUB_OUTPUT"
+            echo "skill_paths=" >> "$GITHUB_OUTPUT"
             exit 0
           fi
-          git fetch --no-tags --depth=1 origin "$BASE_SHA" 2>/dev/null || true
-          changed=$(git diff --name-only "${BASE_SHA}...${HEAD_SHA}" || true)
-          if echo "$changed" | grep -qE '^(skills/.+/SKILL(\.zh-CN)?\.md$|manifest\.json$|categories\.json$)'; then
+
+          # Ensure BASE_SHA is locally reachable; if we can't fetch it, fall back
+          # to a full scan rather than silently producing an empty diff that would
+          # mis-classify a real i18n PR as relevant=false.
+          if ! git cat-file -e "${BASE_SHA}^{commit}" 2>/dev/null; then
+            if ! git fetch --no-tags --depth=1 origin "$BASE_SHA" 2>/dev/null; then
+              echo "::warning::failed to fetch base SHA ${BASE_SHA}; falling back to full scan"
+              echo "relevant=true" >> "$GITHUB_OUTPUT"
+              echo "skill_paths=" >> "$GITHUB_OUTPUT"
+              exit 0
+            fi
+          fi
+
+          # `A...B` (triple-dot) needs merge-base(A,B) in history. With
+          # actions/checkout@v4 + fetch-depth: 0 it normally is, but if the base
+          # branch advanced after the PR was opened and the merge-base isn't
+          # locally reachable, fall back to a full scan instead of silently
+          # producing a wrong diff.
+          if ! merge_base=$(git merge-base "${BASE_SHA}" "${HEAD_SHA}" 2>/dev/null); then
+            echo "::warning::could not compute merge-base for ${BASE_SHA}...${HEAD_SHA}; falling back to full scan"
             echo "relevant=true" >> "$GITHUB_OUTPUT"
+            echo "skill_paths=" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+          changed=$(git diff --name-only "${merge_base}" "${HEAD_SHA}")
+
+          # If manifest.json or categories.json changed, fall back to full scan
+          # (these affect i18n fallback chain / supportedLocales globally).
+          if printf '%s\n' "$changed" | grep -qE '^(manifest\.json|categories\.json)$'; then
+            echo "relevant=true" >> "$GITHUB_OUTPUT"
+            echo "skill_paths=" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+
+          # Otherwise: extract unique skill directories touched by SKILL*.md edits.
+          # Grep enforces:
+          #   - skill name: lowercase ASCII letters, digits, hyphens
+          #     (no leading/trailing hyphen — anchored with [a-z0-9])
+          #   - locale tag (BCP-47 subset): 2-3 lowercase letters + optional -RR
+          # awk enforces the rest of the schema's name constraints (no consecutive
+          # hyphens, no reserved names) as defense-in-depth — those would already
+          # have been blocked upstream by validate-i18n.py / schema validation.
+          # The brace group with `|| true` turns grep's "no match" (exit 1) into
+          # success so pipefail doesn't kill the step when this PR is unrelated.
+          # Strict filtering also sanitizes input for the $GITHUB_OUTPUT write
+          # below (any path failing the shape is dropped).
+          skill_paths=$(printf '%s\n' "$changed" \
+            | { grep -E '^skills/[a-z0-9]([a-z0-9-]*[a-z0-9])?/SKILL(\.[a-z]{2,3}(-[A-Z]{2})?)?\.md$' || true; } \
+            | awk -F/ '
+                $2 !~ /--/ && $2 != "anthropic" && $2 != "claude" {
+                  print $1 "/" $2
+                }' \
+            | sort -u \
+            | tr '\n' ' ' \
+            | sed 's/ $//')
+
+          if [ -n "$skill_paths" ]; then
+            echo "relevant=true" >> "$GITHUB_OUTPUT"
+            echo "skill_paths=$skill_paths" >> "$GITHUB_OUTPUT"
           else
             echo "relevant=false" >> "$GITHUB_OUTPUT"
+            echo "skill_paths=" >> "$GITHUB_OUTPUT"
           fi
 
       - name: Install uv
@@ -70,9 +136,22 @@ jobs:
       - name: Check for stale translations
         id: precheck
         if: steps.changes.outputs.relevant == 'true'
+        env:
+          SKILL_PATHS: ${{ steps.changes.outputs.skill_paths }}
+          INPUT_SKILL: ${{ github.event.inputs.skill }}
         run: |
           set +e
-          uv run --quiet scripts/i18n/translate.py --check
+          ARGS=(--check)
+          if [ -n "$INPUT_SKILL" ]; then
+            # workflow_dispatch with explicit skill input takes precedence
+            ARGS+=("$INPUT_SKILL")
+          elif [ -n "$SKILL_PATHS" ]; then
+            # pull_request: read space-separated paths into an array to avoid
+            # word-splitting / glob expansion surprises.
+            read -r -a SKILL_ARR <<<"$SKILL_PATHS"
+            ARGS+=("${SKILL_ARR[@]}")
+          fi
+          uv run --quiet scripts/i18n/translate.py "${ARGS[@]}"
           rc=$?
           if [ $rc -eq 0 ]; then
             echo "stale=false" >> "$GITHUB_OUTPUT"
@@ -88,14 +167,22 @@ jobs:
           ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
           TRANSLATE_BACKEND: ${{ vars.TRANSLATE_BACKEND || 'github' }}
           TRANSLATE_MODEL: ${{ vars.TRANSLATE_MODEL || 'openai/gpt-5-mini' }}
+          SKILL_PATHS: ${{ steps.changes.outputs.skill_paths }}
+          INPUT_SKILL: ${{ github.event.inputs.skill }}
+          INPUT_TARGET: ${{ github.event.inputs.target_locale }}
         run: |
           set -e
           ARGS=()
-          if [ -n "${{ github.event.inputs.skill }}" ]; then
-            ARGS+=("${{ github.event.inputs.skill }}")
+          if [ -n "$INPUT_SKILL" ]; then
+            # workflow_dispatch input takes precedence (single specific skill)
+            ARGS+=("$INPUT_SKILL")
+          elif [ -n "$SKILL_PATHS" ]; then
+            # pull_request: only translate skills touched by this PR
+            read -r -a SKILL_ARR <<<"$SKILL_PATHS"
+            ARGS+=("${SKILL_ARR[@]}")
           fi
-          if [ -n "${{ github.event.inputs.target_locale }}" ]; then
-            ARGS+=(--target "${{ github.event.inputs.target_locale }}")
+          if [ -n "$INPUT_TARGET" ]; then
+            ARGS+=(--target "$INPUT_TARGET")
           fi
           uv run --quiet scripts/i18n/translate.py "${ARGS[@]}"