From 94652d293c3b9fe233db140b2bb73d4fd1235654 Mon Sep 17 00:00:00 2001 From: Yige Date: Wed, 13 May 2026 18:21:51 +0800 Subject: [PATCH] =?UTF-8?q?fix(i18n):=20GPT-5=20=E7=94=A8=20max=5Fcompleti?= =?UTF-8?q?on=5Ftokens=20=E8=A7=A3=E9=94=81=E4=BB=98=E8=B4=B9=20quota=20?= =?UTF-8?q?=E7=9C=9F=E5=AE=9E=E7=BF=BB=E8=AF=91=E8=83=BD=E5=8A=9B=20(#8)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## 根因 我以为 PR #4 的 translate 失败是 `gpt-5-mini` 在免费 tier 不可用 / payload too large / quota 用尽。**实际不是**——付费 quota 已生效,gpt-5-mini 可访问。真正原因是 **OpenAI Chat Completions 2024+ 契约变化**: - GPT-5 系列要求 `max_completion_tokens` - 旧的 `max_tokens` 会被拒绝:`HTTP 400 {"error":{"code":"unsupported_parameter","param":"max_tokens"}}` - translate.py 之前对 400 也 retry 3 次,连续 retry 撞 RPM 触发 429,把“参数错误”错误诊断为“quota 用尽” 通过本地 curl 直接调 API 确认:gpt-5-mini + max_completion_tokens 返回 HTTP 200。 ## 改动 - `call_github_models`: `max_tokens` → `max_completion_tokens`(GPT-4 也接受新参数名,向前兼容) - `_post_with_retries`: 400 直接 raise 并打印 response body(payload 不会变,retry 浪费 quota);429/5xx retry 前先打印 body 让 budget / rate-limit 详情可见 ## 验证 - 本地 curl 用 gpt-5-mini + max_completion_tokens:HTTP 200 通过 - CI 上本 PR 不触及 skill,translate workflow 走 skip 分支(验证 detect-changes 不破坏现有逻辑) - 合并后再开一个 verify PR 实际触发翻译验证端到端 --- scripts/i18n/translate.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/scripts/i18n/translate.py b/scripts/i18n/translate.py index 8c3057d..a5837af 100755 --- a/scripts/i18n/translate.py +++ b/scripts/i18n/translate.py @@ -188,6 +188,9 @@ def call_github_models(system_prompt: str, user_payload: str, model: str, endpoi "Locally, create a fine-grained PAT with 'Models: Read' permission." ) url = f"{endpoint.rstrip('/')}/chat/completions" + # GPT-5 series rejects the legacy `max_tokens` field and requires + # `max_completion_tokens` instead (OpenAI Chat Completions 2024+ contract). + # GPT-4 and earlier accept either, so always use the new name. payload = { "model": model, "messages": [ @@ -195,7 +198,7 @@ def call_github_models(system_prompt: str, user_payload: str, model: str, endpoi {"role": "user", "content": user_payload}, ], "temperature": 0.1, - "max_tokens": 8192, + "max_completion_tokens": 8192, } headers = { "Authorization": f"Bearer {token}", @@ -255,7 +258,22 @@ def _post_with_retries(url: str, headers: dict, payload: dict, *, extract) -> st f"Switch backend (TRANSLATE_BACKEND=anthropic), use a model with larger input budget, " f"or set translated_by: human to lock the locale." ) + if resp.status_code == 400: + # 400 is almost always a payload contract issue (unsupported + # parameter, content filter, missing field, model not in + # account's allowlist, etc.) — same payload won't fix itself, + # so surface the response body and bail without retrying. + body_preview = (resp.text or "")[:600] + raise RuntimeError( + f"400 Bad Request from {url}; response body: {body_preview}" + ) if resp.status_code == 429 or resp.status_code >= 500: + # Log body so rate-limit / billing details (e.g. "budget limit + # reached") are visible in CI logs before we retry. + body_preview = (resp.text or "")[:300] + sys.stderr.write( + f"[translate] {resp.status_code} body preview: {body_preview}\n" + ) raise httpx.HTTPStatusError(f"{resp.status_code}", request=resp.request, response=resp) resp.raise_for_status() return extract(resp.json())