From 94652d293c3b9fe233db140b2bb73d4fd1235654 Mon Sep 17 00:00:00 2001
From: Yige <a@wyr.me>
Date: Wed, 13 May 2026 18:21:51 +0800
Subject: [PATCH] =?UTF-8?q?fix(i18n):=20GPT-5=20=E7=94=A8=20max=5Fcompleti?=
 =?UTF-8?q?on=5Ftokens=20=E8=A7=A3=E9=94=81=E4=BB=98=E8=B4=B9=20quota=20?=
 =?UTF-8?q?=E7=9C=9F=E5=AE=9E=E7=BF=BB=E8=AF=91=E8=83=BD=E5=8A=9B=20(#8)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## 根因

我以为 PR #4 的 translate 失败是 `gpt-5-mini` 在免费 tier 不可用 / payload too large
/ quota 用尽。**实际不是**——付费 quota 已生效，gpt-5-mini 可访问。真正原因是 **OpenAI Chat
Completions 2024+ 契约变化**：

- GPT-5 系列要求 `max_completion_tokens`
- 旧的 `max_tokens` 会被拒绝：`HTTP 400
{"error":{"code":"unsupported_parameter","param":"max_tokens"}}`
- translate.py 之前对 400 也 retry 3 次，连续 retry 撞 RPM 触发
429，把“参数错误”错误诊断为“quota 用尽”

通过本地 curl 直接调 API 确认：gpt-5-mini + max_completion_tokens 返回 HTTP 200。

## 改动

- `call_github_models`: `max_tokens` → `max_completion_tokens`（GPT-4
也接受新参数名，向前兼容）
- `_post_with_retries`: 400 直接 raise 并打印 response body（payload 不会变，retry
浪费 quota）；429/5xx retry 前先打印 body 让 budget / rate-limit 详情可见

## 验证

- 本地 curl 用 gpt-5-mini + max_completion_tokens：HTTP 200 通过
- CI 上本 PR 不触及 skill，translate workflow 走 skip 分支（验证 detect-changes
不破坏现有逻辑）
- 合并后再开一个 verify PR 实际触发翻译验证端到端
---
 scripts/i18n/translate.py | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/scripts/i18n/translate.py b/scripts/i18n/translate.py
index 8c3057d..a5837af 100755
--- a/scripts/i18n/translate.py
+++ b/scripts/i18n/translate.py
@@ -188,6 +188,9 @@ def call_github_models(system_prompt: str, user_payload: str, model: str, endpoi
             "Locally, create a fine-grained PAT with 'Models: Read' permission."
         )
     url = f"{endpoint.rstrip('/')}/chat/completions"
+    # GPT-5 series rejects the legacy `max_tokens` field and requires
+    # `max_completion_tokens` instead (OpenAI Chat Completions 2024+ contract).
+    # GPT-4 and earlier accept either, so always use the new name.
     payload = {
         "model": model,
         "messages": [
@@ -195,7 +198,7 @@ def call_github_models(system_prompt: str, user_payload: str, model: str, endpoi
             {"role": "user", "content": user_payload},
         ],
         "temperature": 0.1,
-        "max_tokens": 8192,
+        "max_completion_tokens": 8192,
     }
     headers = {
         "Authorization": f"Bearer {token}",
@@ -255,7 +258,22 @@ def _post_with_retries(url: str, headers: dict, payload: dict, *, extract) -> st
                     f"Switch backend (TRANSLATE_BACKEND=anthropic), use a model with larger input budget, "
                     f"or set translated_by: human to lock the locale."
                 )
+            if resp.status_code == 400:
+                # 400 is almost always a payload contract issue (unsupported
+                # parameter, content filter, missing field, model not in
+                # account's allowlist, etc.) — same payload won't fix itself,
+                # so surface the response body and bail without retrying.
+                body_preview = (resp.text or "")[:600]
+                raise RuntimeError(
+                    f"400 Bad Request from {url}; response body: {body_preview}"
+                )
             if resp.status_code == 429 or resp.status_code >= 500:
+                # Log body so rate-limit / billing details (e.g. "budget limit
+                # reached") are visible in CI logs before we retry.
+                body_preview = (resp.text or "")[:300]
+                sys.stderr.write(
+                    f"[translate] {resp.status_code} body preview: {body_preview}\n"
+                )
                 raise httpx.HTTPStatusError(f"{resp.status_code}", request=resp.request, response=resp)
             resp.raise_for_status()
             return extract(resp.json())