mirror of
https://git.openapi.site/https://github.com/desirecore/market.git
synced 2026-06-06 05:50:41 +08:00
fix(i18n): GPT-5 用 max_completion_tokens 解锁付费 quota 真实翻译能力 (#8)
## 根因 我以为 PR #4 的 translate 失败是 `gpt-5-mini` 在免费 tier 不可用 / payload too large / quota 用尽。**实际不是**——付费 quota 已生效,gpt-5-mini 可访问。真正原因是 **OpenAI Chat Completions 2024+ 契约变化**: - GPT-5 系列要求 `max_completion_tokens` - 旧的 `max_tokens` 会被拒绝:`HTTP 400 {"error":{"code":"unsupported_parameter","param":"max_tokens"}}` - translate.py 之前对 400 也 retry 3 次,连续 retry 撞 RPM 触发 429,把“参数错误”错误诊断为“quota 用尽” 通过本地 curl 直接调 API 确认:gpt-5-mini + max_completion_tokens 返回 HTTP 200。 ## 改动 - `call_github_models`: `max_tokens` → `max_completion_tokens`(GPT-4 也接受新参数名,向前兼容) - `_post_with_retries`: 400 直接 raise 并打印 response body(payload 不会变,retry 浪费 quota);429/5xx retry 前先打印 body 让 budget / rate-limit 详情可见 ## 验证 - 本地 curl 用 gpt-5-mini + max_completion_tokens:HTTP 200 通过 - CI 上本 PR 不触及 skill,translate workflow 走 skip 分支(验证 detect-changes 不破坏现有逻辑) - 合并后再开一个 verify PR 实际触发翻译验证端到端
This commit is contained in:
@@ -188,6 +188,9 @@ def call_github_models(system_prompt: str, user_payload: str, model: str, endpoi
|
|||||||
"Locally, create a fine-grained PAT with 'Models: Read' permission."
|
"Locally, create a fine-grained PAT with 'Models: Read' permission."
|
||||||
)
|
)
|
||||||
url = f"{endpoint.rstrip('/')}/chat/completions"
|
url = f"{endpoint.rstrip('/')}/chat/completions"
|
||||||
|
# GPT-5 series rejects the legacy `max_tokens` field and requires
|
||||||
|
# `max_completion_tokens` instead (OpenAI Chat Completions 2024+ contract).
|
||||||
|
# GPT-4 and earlier accept either, so always use the new name.
|
||||||
payload = {
|
payload = {
|
||||||
"model": model,
|
"model": model,
|
||||||
"messages": [
|
"messages": [
|
||||||
@@ -195,7 +198,7 @@ def call_github_models(system_prompt: str, user_payload: str, model: str, endpoi
|
|||||||
{"role": "user", "content": user_payload},
|
{"role": "user", "content": user_payload},
|
||||||
],
|
],
|
||||||
"temperature": 0.1,
|
"temperature": 0.1,
|
||||||
"max_tokens": 8192,
|
"max_completion_tokens": 8192,
|
||||||
}
|
}
|
||||||
headers = {
|
headers = {
|
||||||
"Authorization": f"Bearer {token}",
|
"Authorization": f"Bearer {token}",
|
||||||
@@ -255,7 +258,22 @@ def _post_with_retries(url: str, headers: dict, payload: dict, *, extract) -> st
|
|||||||
f"Switch backend (TRANSLATE_BACKEND=anthropic), use a model with larger input budget, "
|
f"Switch backend (TRANSLATE_BACKEND=anthropic), use a model with larger input budget, "
|
||||||
f"or set translated_by: human to lock the locale."
|
f"or set translated_by: human to lock the locale."
|
||||||
)
|
)
|
||||||
|
if resp.status_code == 400:
|
||||||
|
# 400 is almost always a payload contract issue (unsupported
|
||||||
|
# parameter, content filter, missing field, model not in
|
||||||
|
# account's allowlist, etc.) — same payload won't fix itself,
|
||||||
|
# so surface the response body and bail without retrying.
|
||||||
|
body_preview = (resp.text or "")[:600]
|
||||||
|
raise RuntimeError(
|
||||||
|
f"400 Bad Request from {url}; response body: {body_preview}"
|
||||||
|
)
|
||||||
if resp.status_code == 429 or resp.status_code >= 500:
|
if resp.status_code == 429 or resp.status_code >= 500:
|
||||||
|
# Log body so rate-limit / billing details (e.g. "budget limit
|
||||||
|
# reached") are visible in CI logs before we retry.
|
||||||
|
body_preview = (resp.text or "")[:300]
|
||||||
|
sys.stderr.write(
|
||||||
|
f"[translate] {resp.status_code} body preview: {body_preview}\n"
|
||||||
|
)
|
||||||
raise httpx.HTTPStatusError(f"{resp.status_code}", request=resp.request, response=resp)
|
raise httpx.HTTPStatusError(f"{resp.status_code}", request=resp.request, response=resp)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
return extract(resp.json())
|
return extract(resp.json())
|
||||||
|
|||||||
Reference in New Issue
Block a user