From 380e99c577f6816e331c7628c0c212459580d24f Mon Sep 17 00:00:00 2001 From: xieyuanxiang <124608760@qq.com> Date: Sat, 25 Apr 2026 21:39:51 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E7=AE=97=E5=8A=9B=E6=A8=A1?= =?UTF-8?q?=E5=9E=8B=E9=87=87=E6=A0=B7=E9=BB=98=E8=AE=A4=E5=80=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- compute/providers/anthropic.json | 128 ++++++------ compute/providers/baichuan.json | 48 +++-- compute/providers/baidu.json | 132 ++++++++++-- compute/providers/cohere.json | 85 ++++++-- compute/providers/dashscope.json | 260 ++++++++++++++++++++---- compute/providers/deepseek.json | 32 +-- compute/providers/google.json | 119 ++++++++++- compute/providers/internal-testing.json | 60 +++--- compute/providers/lingyiwanwu.json | 12 +- compute/providers/minimax.json | 102 +++++++--- compute/providers/mistral.json | 39 ++-- compute/providers/moonshot.json | 139 ++++++++++--- compute/providers/ollama.json | 3 + compute/providers/openai.json | 178 ++++++++++------ compute/providers/openrouter.json | 21 +- compute/providers/perplexity.json | 61 ++++-- compute/providers/siliconflow.json | 41 ++-- compute/providers/tencent.json | 70 ++++++- compute/providers/volcengine.json | 136 ++++++++----- compute/providers/xai.json | 39 ++-- compute/providers/xunfei.json | 20 +- compute/providers/zhipu-embedding.json | 17 +- compute/providers/zhipu.json | 177 +++++++++++++--- manifest.json | 2 +- 24 files changed, 1413 insertions(+), 508 deletions(-) diff --git a/compute/providers/anthropic.json b/compute/providers/anthropic.json index d205155..6bed9f5 100644 --- a/compute/providers/anthropic.json +++ b/compute/providers/anthropic.json @@ -15,12 +15,14 @@ ], "models": [ { - "modelName": "claude-opus-4-6", - "displayName": "Claude Opus 4.6", - "serviceType": ["chat"], - "description": "Anthropic 最新旗舰 Opus 模型,最强能力", - "contextWindow": 200000, - "maxOutputTokens": 32768, + "modelName": "claude-opus-4-7", + "displayName": "Claude Opus 4.7", + "serviceType": [ + "chat" + ], + "description": "Anthropic 当前最强通用模型,适合复杂推理和智能体编码任务", + "contextWindow": 1000000, + "maxOutputTokens": 128000, "capabilities": [ "chat", "reasoning", @@ -31,83 +33,73 @@ "inputPrice": 5, "outputPrice": 25, "defaultTemperature": 1, - "extra": {} + "extra": { + "cachePricing": { + "write5m": 6.25, + "write1h": 10, + "read": 0.5 + }, + "pricingNotes": "Prices are per 1M tokens. Opus 4.7 includes the full 1M context window at standard pricing." + } }, { - "modelName": "claude-opus-4-5", - "displayName": "Claude Opus 4.5", - "serviceType": ["chat"], - "description": "Anthropic Opus 4.5,顶级推理能力", - "contextWindow": 200000, - "maxOutputTokens": 32768, - "capabilities": [ + "modelName": "claude-sonnet-4-6", + "displayName": "Claude Sonnet 4.6", + "serviceType": [ "chat", - "reasoning", - "code", - "vision", - "tool_use" + "computer_use" ], - "inputPrice": 5, - "outputPrice": 25, - "defaultTemperature": 1, - "extra": {} - }, - { - "modelName": "claude-sonnet-4-5", - "displayName": "Claude Sonnet 4.5", - "serviceType": ["chat"], - "description": "Anthropic Sonnet 4.5,高性能性价比", - "contextWindow": 200000, + "description": "Anthropic 高智能高速度模型,适合编码、工具使用和智能体任务", + "contextWindow": 1000000, "maxOutputTokens": 64000, "capabilities": [ "chat", "reasoning", "code", "vision", - "tool_use" - ], - "inputPrice": 3, - "outputPrice": 15, - "defaultTemperature": 1, - "extra": {} - }, - { - "modelName": "claude-sonnet-4", - "displayName": "Claude Sonnet 4", - "serviceType": ["chat"], - "description": "Anthropic Sonnet 4 旗舰模型", - "contextWindow": 200000, - "maxOutputTokens": 64000, - "capabilities": [ - "chat", - "reasoning", - "code", - "vision", - "tool_use" - ], - "inputPrice": 3, - "outputPrice": 15, - "defaultTemperature": 1, - "extra": {} - }, - { - "modelName": "claude-sonnet-4-5-computer", - "displayName": "Claude Sonnet 4.5 Computer Use", - "serviceType": ["computer_use"], - "description": "Anthropic 最佳 Computer Use 模型", - "contextWindow": 200000, - "maxOutputTokens": 64000, - "capabilities": [ - "chat", - "vision", - "computer_use", "tool_use", - "reasoning" + "computer_use" ], "inputPrice": 3, "outputPrice": 15, "defaultTemperature": 1, - "extra": {} + "extra": { + "cachePricing": { + "write5m": 3.75, + "write1h": 6, + "read": 0.3 + }, + "pricingNotes": "Prices are per 1M tokens. Sonnet 4.6 includes the full 1M context window at standard pricing." + } + }, + { + "modelName": "claude-haiku-4-5", + "displayName": "Claude Haiku 4.5", + "serviceType": [ + "chat" + ], + "description": "Anthropic 最快模型,具备接近前沿模型的智能水平", + "contextWindow": 200000, + "maxOutputTokens": 64000, + "capabilities": [ + "chat", + "reasoning", + "code", + "vision", + "tool_use" + ], + "inputPrice": 1, + "outputPrice": 5, + "defaultTemperature": 1, + "extra": { + "modelId": "claude-haiku-4-5-20251001", + "cachePricing": { + "write5m": 1.25, + "write1h": 2, + "read": 0.1 + }, + "pricingNotes": "Prices are per 1M tokens." + } } ] } diff --git a/compute/providers/baichuan.json b/compute/providers/baichuan.json index b725abf..ddc4aa9 100644 --- a/compute/providers/baichuan.json +++ b/compute/providers/baichuan.json @@ -16,10 +16,12 @@ { "modelName": "Baichuan-M3-Plus", "displayName": "百川 M3-Plus", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "百川智能M3-Plus医疗增强模型,235B参数,幻觉率仅2.6%,API价格较前代下降70%", - "contextWindow": 192000, - "maxOutputTokens": 8192, + "contextWindow": 32000, + "maxOutputTokens": 32000, "capabilities": [ "chat", "reasoning", @@ -31,17 +33,19 @@ ], "inputPrice": 5, "outputPrice": 9, - "defaultTemperature": 0.7, - "defaultTopP": 0.9, + "defaultTemperature": 0.3, + "defaultTopP": 0.85, "extra": {} }, { "modelName": "Baichuan-M3", "displayName": "百川 M3", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "百川智能M3新一代开源医疗增强模型,235B参数", - "contextWindow": 192000, - "maxOutputTokens": 8192, + "contextWindow": 32000, + "maxOutputTokens": 32000, "capabilities": [ "chat", "reasoning", @@ -52,17 +56,19 @@ ], "inputPrice": 10, "outputPrice": 30, - "defaultTemperature": 0.7, - "defaultTopP": 0.9, + "defaultTemperature": 0.3, + "defaultTopP": 0.85, "extra": {} }, { "modelName": "Baichuan-M2-Plus", "displayName": "百川 M2-Plus", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "百川智能M2-Plus旗舰模型,最强推理和创作能力", - "contextWindow": 128000, - "maxOutputTokens": 8192, + "contextWindow": 32000, + "maxOutputTokens": 32000, "capabilities": [ "chat", "reasoning", @@ -72,17 +78,19 @@ ], "inputPrice": 10, "outputPrice": 30, - "defaultTemperature": 0.7, - "defaultTopP": 0.9, + "defaultTemperature": 0.3, + "defaultTopP": 0.85, "extra": {} }, { "modelName": "Baichuan-M2", "displayName": "百川 M2", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "百川智能M2模型,高性能通用模型", - "contextWindow": 128000, - "maxOutputTokens": 8192, + "contextWindow": 32000, + "maxOutputTokens": 32000, "capabilities": [ "chat", "reasoning", @@ -91,8 +99,8 @@ ], "inputPrice": 2, "outputPrice": 20, - "defaultTemperature": 0.7, - "defaultTopP": 0.9, + "defaultTemperature": 0.3, + "defaultTopP": 0.85, "extra": {} } ] diff --git a/compute/providers/baidu.json b/compute/providers/baidu.json index 103bf25..b640198 100644 --- a/compute/providers/baidu.json +++ b/compute/providers/baidu.json @@ -9,14 +9,20 @@ "enabled": false, "status": "unconfigured", "priceCurrency": "CNY", - "services": ["chat", "reasoning"], + "services": [ + "chat", + "reasoning" + ], "models": [ { "modelName": "ernie-5.0-thinking-latest", "displayName": "文心 ERNIE 5.0", - "serviceType": ["chat", "reasoning"], + "serviceType": [ + "chat", + "reasoning" + ], "description": "百度最新旗舰模型,支持深度推理、多模态和工具调用,119K输入/64K输出", - "contextWindow": 131072, + "contextWindow": 128000, "maxOutputTokens": 65536, "capabilities": [ "chat", @@ -25,19 +31,67 @@ "multimodal", "tool_use" ], - "inputPrice": 8, - "outputPrice": 32, - "defaultTemperature": 1, - "defaultTopP": 1, - "extra": {} + "inputPrice": 6, + "outputPrice": 24, + "extra": { + "pricingTiers": [ + { + "maxInputTokens": 32768, + "inputPrice": 6, + "outputPrice": 24 + }, + { + "maxInputTokens": 131072, + "inputPrice": 10, + "outputPrice": 40 + } + ], + "thinkingMaxTokens": 60000 + } }, { - "modelName": "ernie-4.5-turbo-128k-latest", - "displayName": "文心 ERNIE 4.5 Turbo", - "serviceType": ["chat"], - "description": "百度高性价比长上下文模型,128K 窗口,适合日常对话和文档处理", + "modelName": "ernie-5.0", + "displayName": "文心 ERNIE 5.0 非思考", + "serviceType": [ + "chat" + ], + "description": "百度 ERNIE 5.0 旗舰模型,128K 上下文,119K 最大输入,最大输出 65,536 token", "contextWindow": 131072, - "maxOutputTokens": 8192, + "maxOutputTokens": 65536, + "capabilities": [ + "chat", + "vision", + "multimodal", + "tool_use" + ], + "inputPrice": 6, + "outputPrice": 24, + "defaultTemperature": 0.95, + "defaultTopP": 0.7, + "extra": { + "pricingTiers": [ + { + "maxInputTokens": 32768, + "inputPrice": 6, + "outputPrice": 24 + }, + { + "maxInputTokens": 131072, + "inputPrice": 10, + "outputPrice": 40 + } + ] + } + }, + { + "modelName": "ernie-4.5-turbo-128k", + "displayName": "文心 ERNIE 4.5 Turbo", + "serviceType": [ + "chat" + ], + "description": "百度高性价比长上下文模型,128K 窗口,最大输出 12,288 token", + "contextWindow": 131072, + "maxOutputTokens": 12288, "capabilities": [ "chat", "code", @@ -47,9 +101,57 @@ ], "inputPrice": 0.8, "outputPrice": 3.2, - "defaultTemperature": 1, + "defaultTemperature": 0.8, "defaultTopP": 1, - "extra": {} + "extra": { + "cacheHitPrice": 0.2 + } + }, + { + "modelName": "ernie-4.5-turbo-20260402", + "displayName": "文心 ERNIE 4.5 Turbo 20260402", + "serviceType": [ + "chat" + ], + "description": "百度 ERNIE 4.5 Turbo 20260402 快照,128K 上下文,最大输出 12,288 token", + "contextWindow": 131072, + "maxOutputTokens": 12288, + "capabilities": [ + "chat", + "code", + "vision", + "long_context", + "fast" + ], + "inputPrice": 0.8, + "outputPrice": 3.2, + "defaultTemperature": 0.8, + "defaultTopP": 1, + "extra": { + "cacheHitPrice": 0.2 + } + }, + { + "modelName": "ernie-x1.1", + "displayName": "文心 ERNIE X1.1", + "serviceType": [ + "reasoning" + ], + "description": "百度 ERNIE X1.1 深度思考模型,64K 上下文,最大输出 65,536 token", + "contextWindow": 65536, + "maxOutputTokens": 65536, + "capabilities": [ + "chat", + "reasoning", + "deep_thinking", + "math", + "code" + ], + "inputPrice": 1, + "outputPrice": 4, + "extra": { + "thinkingMaxTokens": 65536 + } } ] } diff --git a/compute/providers/cohere.json b/compute/providers/cohere.json index 7815cc8..cd81655 100644 --- a/compute/providers/cohere.json +++ b/compute/providers/cohere.json @@ -9,15 +9,21 @@ "enabled": false, "status": "unconfigured", "priceCurrency": "USD", - "services": ["chat", "embedding", "rerank"], + "services": [ + "chat", + "embedding", + "rerank" + ], "models": [ { "modelName": "command-a-03-2025", "displayName": "Command A", - "serviceType": ["chat"], - "description": "Cohere 旗舰对话模型,262K 上下文,擅长 RAG 和工具调用", - "contextWindow": 262144, - "maxOutputTokens": 8192, + "serviceType": [ + "chat" + ], + "description": "Cohere 旗舰对话模型,256K 上下文,擅长 RAG 和工具调用", + "contextWindow": 256000, + "maxOutputTokens": 8000, "capabilities": [ "chat", "reasoning", @@ -27,18 +33,46 @@ "rag", "long_context" ], - "inputPrice": 2.50, - "outputPrice": 10.00, - "defaultTemperature": 1, - "defaultTopP": 1, - "extra": {} + "inputPrice": 2.5, + "outputPrice": 10.0, + "defaultTemperature": 0.3, + "defaultTopP": 0.75, + "extra": { + "pricingNotes": "Prices are per 1M tokens." + } + }, + { + "modelName": "command-r7b-12-2024", + "displayName": "Command R7B", + "serviceType": [ + "fast" + ], + "description": "Cohere 小型高速对话模型,适合高吞吐、低延迟场景", + "contextWindow": 128000, + "maxOutputTokens": 4000, + "capabilities": [ + "chat", + "reasoning", + "tool_use", + "rag", + "fast" + ], + "inputPrice": 0.0375, + "outputPrice": 0.15, + "defaultTemperature": 0.3, + "defaultTopP": 0.75, + "extra": { + "pricingNotes": "Prices are per 1M tokens." + } }, { "modelName": "embed-v4.0", "displayName": "Embed V4", - "serviceType": ["embedding"], - "description": "Cohere 最新 Embedding 模型,131K 上下文,多语言支持", - "contextWindow": 131072, + "serviceType": [ + "embedding" + ], + "description": "Cohere 最新 Embedding 模型,128K 上下文,多语言支持", + "contextWindow": 128000, "maxOutputTokens": 0, "capabilities": [ "text_embedding", @@ -46,22 +80,35 @@ ], "inputPrice": 0.12, "outputPrice": 0, - "extra": {} + "extra": { + "dimensions": [ + 256, + 512, + 1024, + 1536 + ], + "defaultDimension": 1536, + "pricingNotes": "Embedding models are priced by embedded tokens; price is per 1M tokens." + } }, { "modelName": "rerank-v3.5", "displayName": "Rerank V3.5", - "serviceType": ["rerank"], - "description": "Cohere 语义重排序模型,用于检索结果精排", - "contextWindow": 0, + "serviceType": [ + "rerank" + ], + "description": "Cohere 语义重排序模型,用于检索结果精排,默认单文档截断 4096 token", + "contextWindow": 4096, "maxOutputTokens": 0, "capabilities": [ "rerank", "semantic_reranking" ], - "inputPrice": 2.00, + "inputPrice": 2.0, "outputPrice": 0, - "extra": {} + "extra": { + "pricingNotes": "Cohere Rerank is priced by search units, not input/output tokens; one search unit is one query with up to 100 documents. The token price fields are retained for schema compatibility." + } } ] } diff --git a/compute/providers/dashscope.json b/compute/providers/dashscope.json index 3d89263..13d6d43 100644 --- a/compute/providers/dashscope.json +++ b/compute/providers/dashscope.json @@ -23,13 +23,97 @@ "translation" ], "models": [ + { + "modelName": "qwen3.6-plus", + "displayName": "阿里云 Qwen3.6-Plus", + "serviceType": [ + "chat", + "vision" + ], + "description": "通义千问3.6 Plus,支持文本/图像/视频输入,100万上下文,支持内置工具和 Function Calling", + "contextWindow": 1000000, + "maxOutputTokens": 65536, + "capabilities": [ + "chat", + "reasoning", + "code", + "multilingual", + "long_context", + "tool_use", + "agent", + "vision" + ], + "inputPrice": 2, + "outputPrice": 12, + "defaultTemperature": 0.6, + "defaultTopP": 0.95, + "extra": { + "pricingTiers": [ + { + "maxInputTokens": 256000, + "inputPrice": 2, + "outputPrice": 12, + "thinkingOutputPrice": 12 + }, + { + "maxInputTokens": 1000000, + "inputPrice": 8, + "outputPrice": 48, + "thinkingOutputPrice": 48 + } + ], + "thinkingMaxTokens": 81920 + } + }, + { + "modelName": "qwen3.6-flash", + "displayName": "阿里云 Qwen3.6-Flash", + "serviceType": [ + "chat", + "vision" + ], + "description": "通义千问3.6 Flash,高性价比多模态模型,100万上下文,支持内置工具和 Function Calling", + "contextWindow": 1000000, + "maxOutputTokens": 65536, + "capabilities": [ + "chat", + "reasoning", + "code", + "multilingual", + "long_context", + "tool_use", + "vision", + "fast" + ], + "inputPrice": 1.2, + "outputPrice": 7.2, + "defaultTemperature": 0.6, + "defaultTopP": 0.95, + "extra": { + "pricingTiers": [ + { + "maxInputTokens": 256000, + "inputPrice": 1.2, + "outputPrice": 7.2 + }, + { + "maxInputTokens": 1000000, + "inputPrice": 4.8, + "outputPrice": 28.8 + } + ], + "thinkingMaxTokens": 131072 + } + }, { "modelName": "qwen3.5-plus", "displayName": "阿里云 Qwen3.5-Plus", - "serviceType": ["chat"], - "description": "通义千问3.5旗舰模型,支持文本/图像/视频输入,效果媲美Qwen3 Max,性价比更优", - "contextWindow": 262144, - "maxOutputTokens": 16384, + "serviceType": [ + "chat" + ], + "description": "通义千问3.5旗舰模型,支持文本/图像/视频输入,100万上下文,默认开启思考模式", + "contextWindow": 1000000, + "maxOutputTokens": 65536, "capabilities": [ "chat", "reasoning", @@ -42,16 +126,37 @@ ], "inputPrice": 0.8, "outputPrice": 4.8, - "defaultTemperature": 0.7, - "defaultTopP": 0.8, - "extra": {} + "defaultTemperature": 0.6, + "defaultTopP": 0.95, + "extra": { + "pricingTiers": [ + { + "maxInputTokens": 128000, + "inputPrice": 0.8, + "outputPrice": 4.8 + }, + { + "maxInputTokens": 256000, + "inputPrice": 2, + "outputPrice": 12 + }, + { + "maxInputTokens": 1000000, + "inputPrice": 4, + "outputPrice": 24 + } + ], + "thinkingMaxTokens": 81920 + } }, { "modelName": "qwen-max", "displayName": "阿里云 qwen-max", - "serviceType": ["chat"], - "description": "通义千问Max,千亿参数旗舰模型,最强性能", - "contextWindow": 131072, + "serviceType": [ + "chat" + ], + "description": "通义千问Max,千亿参数旗舰模型,32K 上下文", + "contextWindow": 32768, "maxOutputTokens": 8192, "capabilities": [ "chat", @@ -69,10 +174,12 @@ { "modelName": "qwen-plus", "displayName": "阿里云 qwen-plus", - "serviceType": ["chat"], - "description": "通义千问Plus,支持思考模式,128K 上下文", - "contextWindow": 131072, - "maxOutputTokens": 8192, + "serviceType": [ + "chat" + ], + "description": "通义千问Plus,支持思考模式,100万上下文", + "contextWindow": 1000000, + "maxOutputTokens": 32768, "capabilities": [ "chat", "reasoning", @@ -81,36 +188,66 @@ ], "inputPrice": 0.8, "outputPrice": 2, - "defaultTemperature": 0.6, + "defaultTemperature": 0.7, "defaultTopP": 0.8, - "extra": {} + "extra": { + "pricingTiers": [ + { + "maxInputTokens": 128000, + "inputPrice": 0.8, + "outputPrice": 2, + "thinkingOutputPrice": 8 + }, + { + "maxInputTokens": 256000, + "inputPrice": 2.4, + "outputPrice": 20, + "thinkingOutputPrice": 24 + }, + { + "maxInputTokens": 1000000, + "inputPrice": 4.8, + "outputPrice": 48, + "thinkingOutputPrice": 64 + } + ], + "thinkingMaxTokens": 81920 + } }, { "modelName": "qwen-turbo", "displayName": "阿里云 qwen-turbo", - "serviceType": ["chat"], - "description": "通义千问Turbo,支持100万上下文,高性价比", + "serviceType": [ + "chat" + ], + "description": "通义千问Turbo,非思考模式支持100万上下文,思考模式支持131K上下文", "contextWindow": 1000000, - "maxOutputTokens": 8192, + "maxOutputTokens": 16384, "capabilities": [ "chat", "code", "long_context", "fast" ], - "inputPrice": 0.3, - "outputPrice": 0.6, + "inputPrice": 0.367, + "outputPrice": 1.468, "defaultTemperature": 0.7, "defaultTopP": 0.8, - "extra": {} + "extra": { + "thinkingContextWindow": 131072, + "thinkingOutputPrice": 3.67, + "thinkingMaxTokens": 38912 + } }, { "modelName": "qwen-long", "displayName": "阿里云 qwen-long", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "通义千问Long,支持1000万上下文,适合超长文档处理,降价97%", "contextWindow": 10000000, - "maxOutputTokens": 8192, + "maxOutputTokens": 32768, "capabilities": [ "chat", "ultra_long_context", @@ -118,14 +255,16 @@ ], "inputPrice": 0.5, "outputPrice": 2, - "defaultTemperature": 0.7, + "defaultTemperature": 1, "defaultTopP": 0.8, "extra": {} }, { "modelName": "qwen3-max", "displayName": "阿里云 Qwen3-Max", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "通义千问3代旗舰模型,262K 上下文,支持思考模式", "contextWindow": 262144, "maxOutputTokens": 65536, @@ -148,10 +287,12 @@ { "modelName": "qwen3-vl-plus", "displayName": "阿里云 Qwen3-VL-Plus", - "serviceType": ["vision"], + "serviceType": [ + "vision" + ], "description": "通义千问3代视觉模型Plus版,支持图像理解", - "contextWindow": 131072, - "maxOutputTokens": 8192, + "contextWindow": 262144, + "maxOutputTokens": 32768, "capabilities": [ "chat", "vision", @@ -161,15 +302,19 @@ ], "inputPrice": 1.5, "outputPrice": 6, + "defaultTemperature": 0.7, + "defaultTopP": 0.8, "extra": {} }, { "modelName": "qwen3-vl-flash", "displayName": "阿里云 Qwen3-VL-Flash", - "serviceType": ["vision"], + "serviceType": [ + "vision" + ], "description": "通义千问3代视觉模型Flash版,高性价比", - "contextWindow": 131072, - "maxOutputTokens": 8192, + "contextWindow": 262144, + "maxOutputTokens": 32768, "capabilities": [ "chat", "vision", @@ -178,13 +323,18 @@ ], "inputPrice": 0.8, "outputPrice": 3, + "defaultTemperature": 0.7, + "defaultTopP": 0.8, "extra": {} }, { "modelName": "text-embedding-v3", "displayName": "text-embedding-v3", - "serviceType": ["embedding"], + "serviceType": [ + "embedding" + ], "description": "阿里云文本向量化", + "contextWindow": 8192, "capabilities": [ "text_embedding", "semantic_search", @@ -196,8 +346,11 @@ { "modelName": "text-embedding-v4", "displayName": "阿里云 text-embedding-v4", - "serviceType": ["embedding"], + "serviceType": [ + "embedding" + ], "description": "阿里云百炼文本向量模型v4,支持多维度与多语种", + "contextWindow": 8192, "capabilities": [ "text_embedding", "semantic_search", @@ -209,8 +362,11 @@ { "modelName": "qwen3-rerank", "displayName": "Qwen3-Rerank", - "serviceType": ["rerank"], + "serviceType": [ + "rerank" + ], "description": "检索重排序,支持 100+ 语种", + "contextWindow": 120000, "capabilities": [ "rerank", "semantic_reranking", @@ -222,7 +378,9 @@ { "modelName": "cosyvoice-v2", "displayName": "CosyVoice V2", - "serviceType": ["tts"], + "serviceType": [ + "tts" + ], "description": "通义语音合成,支持中英文", "capabilities": [ "tts", @@ -246,7 +404,9 @@ { "modelName": "paraformer-v2", "displayName": "Paraformer V2", - "serviceType": ["asr"], + "serviceType": [ + "asr" + ], "description": "通义语音识别", "capabilities": [ "asr", @@ -266,7 +426,9 @@ { "modelName": "wanx-v2", "displayName": "通义万相 V2", - "serviceType": ["image_gen"], + "serviceType": [ + "image_gen" + ], "description": "通义万相文生图", "capabilities": [ "image_generation", @@ -289,7 +451,9 @@ { "modelName": "wanx-video", "displayName": "通义万相视频", - "serviceType": ["video_gen"], + "serviceType": [ + "video_gen" + ], "description": "通义万相文生视频", "capabilities": [ "video_generation", @@ -306,7 +470,9 @@ { "modelName": "cosyvoice-clone", "displayName": "CosyVoice 声音复刻", - "serviceType": ["voice_clone"], + "serviceType": [ + "voice_clone" + ], "description": "通义声音复刻,少量样本即可克隆", "capabilities": [ "voice_clone", @@ -321,9 +487,12 @@ { "modelName": "qwen-omni-turbo", "displayName": "Qwen-Omni-Turbo", - "serviceType": ["omni"], + "serviceType": [ + "omni" + ], "description": "通义全能模型,支持音视频文本多模态", "contextWindow": 32768, + "maxOutputTokens": 2048, "capabilities": [ "chat", "vision", @@ -332,19 +501,26 @@ "multimodal", "tool_use" ], + "defaultTemperature": 0.7, + "defaultTopP": 0.01, "extra": {} }, { "modelName": "qwen3-max-trans", "displayName": "Qwen3-Max(翻译)", - "serviceType": ["translation"], + "serviceType": [ + "translation" + ], "description": "通义千问翻译,中英日韩等多语种互译", "contextWindow": 131072, + "maxOutputTokens": 8192, "capabilities": [ "translation", "multilingual", "chinese_optimized" ], + "defaultTemperature": 0.65, + "defaultTopP": 0.8, "extra": {} } ] diff --git a/compute/providers/deepseek.json b/compute/providers/deepseek.json index 69cd4f1..a8f513d 100644 --- a/compute/providers/deepseek.json +++ b/compute/providers/deepseek.json @@ -17,10 +17,12 @@ { "modelName": "deepseek-chat", "displayName": "DeepSeek V3.2", - "serviceType": ["chat"], - "description": "高性价比通用对话模型,64K 上下文", - "contextWindow": 64000, - "maxOutputTokens": 8000, + "serviceType": [ + "chat" + ], + "description": "高性价比通用对话模型,128K 上下文", + "contextWindow": 128000, + "maxOutputTokens": 8192, "capabilities": [ "chat", "code", @@ -28,18 +30,22 @@ "multilingual" ], "inputPrice": 2, - "outputPrice": 8, + "outputPrice": 3, "defaultTemperature": 1, "defaultTopP": 1, - "extra": {} + "extra": { + "cacheHitPrice": 0.2 + } }, { "modelName": "deepseek-reasoner", "displayName": "DeepSeek R1", - "serviceType": ["reasoning"], - "description": "DeepSeek R1 推理模型,64K 上下文,深度推理能力,思维链最长 32K", - "contextWindow": 64000, - "maxOutputTokens": 8192, + "serviceType": [ + "reasoning" + ], + "description": "DeepSeek V3.2 思考模式,128K 上下文,思维链默认 32K,最大输出 64K", + "contextWindow": 128000, + "maxOutputTokens": 65536, "capabilities": [ "chat", "reasoning", @@ -49,9 +55,9 @@ ], "inputPrice": 4, "outputPrice": 16, - "defaultTemperature": 1, - "defaultTopP": 1, - "extra": {} + "extra": { + "cacheHitPrice": 0.2 + } } ] } diff --git a/compute/providers/google.json b/compute/providers/google.json index b4a1dba..91a0813 100644 --- a/compute/providers/google.json +++ b/compute/providers/google.json @@ -15,12 +15,106 @@ "embedding" ], "models": [ + { + "modelName": "gemini-3.1-pro-preview", + "displayName": "Gemini 3.1 Pro Preview", + "serviceType": [ + "chat" + ], + "description": "Google Gemini 3.1 Pro Preview,面向复杂推理、编码和智能体工作流", + "contextWindow": 1048576, + "maxOutputTokens": 65536, + "capabilities": [ + "chat", + "reasoning", + "code", + "vision", + "ultra_long_context", + "tool_use" + ], + "inputPrice": 2.0, + "outputPrice": 12.0, + "defaultTemperature": 1, + "defaultTopP": 0.95, + "extra": { + "pricingNotes": "Standard paid tier text/image/video price for prompts <= 200K tokens; prompts > 200K are $4 input and $18 output per 1M tokens.", + "cachePricing": { + "inputCacheRead": 0.2, + "inputCacheReadOver200k": 0.4, + "storagePerMillionTokensPerHour": 4.5 + } + } + }, + { + "modelName": "gemini-3-flash-preview", + "displayName": "Gemini 3 Flash Preview", + "serviceType": [ + "chat" + ], + "description": "Google Gemini 3 Flash Preview,高速多模态推理模型", + "contextWindow": 1048576, + "maxOutputTokens": 65536, + "capabilities": [ + "chat", + "reasoning", + "code", + "vision", + "ultra_long_context", + "tool_use", + "fast" + ], + "inputPrice": 0.5, + "outputPrice": 3.0, + "defaultTemperature": 1, + "defaultTopP": 0.95, + "extra": { + "pricingNotes": "Standard paid tier text/image/video price; audio input is $1.00 per 1M tokens.", + "cachePricing": { + "inputCacheRead": 0.05, + "audioInputCacheRead": 0.1, + "storagePerMillionTokensPerHour": 1.0 + } + } + }, + { + "modelName": "gemini-3.1-flash-lite-preview", + "displayName": "Gemini 3.1 Flash-Lite Preview", + "serviceType": [ + "fast" + ], + "description": "Google Gemini 3.1 Flash-Lite Preview,低成本高吞吐多模态模型", + "contextWindow": 1048576, + "maxOutputTokens": 65536, + "capabilities": [ + "chat", + "reasoning", + "code", + "vision", + "ultra_long_context", + "tool_use", + "fast" + ], + "inputPrice": 0.25, + "outputPrice": 1.5, + "defaultTemperature": 1, + "defaultTopP": 0.95, + "extra": { + "pricingNotes": "Standard paid tier text/image/video price; audio input is $0.50 per 1M tokens.", + "cachePricing": { + "inputCacheRead": 0.025, + "audioInputCacheRead": 0.05, + "storagePerMillionTokensPerHour": 1.0 + } + } + }, { "modelName": "gemini-2.5-pro", "displayName": "Gemini 2.5 Pro", - "serviceType": ["chat"], - "description": "Google 最新旗舰模型,100万上下文,多模态", - "contextWindow": 1000000, + "serviceType": [ + "chat" + ], + "description": "Google 最新旗舰模型,1,048,576 token 上下文,多模态", + "contextWindow": 1048576, "maxOutputTokens": 65536, "capabilities": [ "chat", @@ -39,9 +133,11 @@ { "modelName": "gemini-2.5-flash", "displayName": "Gemini 2.5 Flash", - "serviceType": ["chat"], - "description": "Google 高性价比模型,100万上下文,速度快", - "contextWindow": 1000000, + "serviceType": [ + "chat" + ], + "description": "Google 高性价比模型,1,048,576 token 上下文,速度快", + "contextWindow": 1048576, "maxOutputTokens": 65536, "capabilities": [ "chat", @@ -52,8 +148,8 @@ "tool_use", "fast" ], - "inputPrice": 0.30, - "outputPrice": 2.50, + "inputPrice": 0.3, + "outputPrice": 2.5, "defaultTemperature": 1, "defaultTopP": 0.95, "extra": {} @@ -61,13 +157,16 @@ { "modelName": "text-embedding-005", "displayName": "Text Embedding 005", - "serviceType": ["embedding"], + "serviceType": [ + "embedding" + ], "description": "Google 文本嵌入模型,768维度", + "contextWindow": 2048, "capabilities": [ "text_embedding", "semantic_search" ], - "inputPrice": 0.10, + "inputPrice": 0.1, "extra": {} } ] diff --git a/compute/providers/internal-testing.json b/compute/providers/internal-testing.json index e4d28e3..002fed2 100644 --- a/compute/providers/internal-testing.json +++ b/compute/providers/internal-testing.json @@ -23,7 +23,7 @@ "chat" ], "description": "MiniMax M2.7 高速版,低延迟吞吐优化,200K 上下文", - "contextWindow": 200000, + "contextWindow": 204800, "maxOutputTokens": 8192, "capabilities": [ "chat", @@ -33,6 +33,7 @@ "inputPrice": 0, "outputPrice": 0, "defaultTemperature": 1, + "defaultTopP": 0.95, "extra": {} }, { @@ -41,9 +42,9 @@ "serviceType": [ "chat" ], - "description": "智谱 GLM-5.1 新一代旗舰模型,编程与推理能力断档领先,204K 上下文", - "contextWindow": 204800, - "maxOutputTokens": 131072, + "description": "智谱 GLM-5.1 新一代旗舰模型,编程与推理能力断档领先,200K 上下文", + "contextWindow": 200000, + "maxOutputTokens": 128000, "capabilities": [ "chat", "reasoning", @@ -54,7 +55,8 @@ ], "inputPrice": 0, "outputPrice": 0, - "defaultTemperature": 0.95, + "defaultTemperature": 1, + "defaultTopP": 0.95, "extra": {} }, { @@ -63,9 +65,9 @@ "serviceType": [ "chat" ], - "description": "智谱 GLM-5 大语言模型", - "contextWindow": 128000, - "maxOutputTokens": 8192, + "description": "智谱 GLM-5 大语言模型,200K 上下文", + "contextWindow": 200000, + "maxOutputTokens": 128000, "capabilities": [ "chat", "reasoning", @@ -74,6 +76,7 @@ "inputPrice": 0, "outputPrice": 0, "defaultTemperature": 1, + "defaultTopP": 0.95, "extra": {} }, { @@ -83,8 +86,8 @@ "chat" ], "description": "智谱 GLM-5-Turbo 大语言模型,更快的推理速度", - "contextWindow": 128000, - "maxOutputTokens": 8192, + "contextWindow": 200000, + "maxOutputTokens": 128000, "capabilities": [ "chat", "reasoning", @@ -93,6 +96,7 @@ "inputPrice": 0, "outputPrice": 0, "defaultTemperature": 1, + "defaultTopP": 0.95, "extra": {} }, { @@ -112,6 +116,7 @@ "inputPrice": 0, "outputPrice": 0, "defaultTemperature": 1, + "defaultTopP": 0.95, "extra": {} }, { @@ -122,7 +127,7 @@ ], "description": "月之暗面 Kimi K2.6 Code Preview,万亿参数 MoE 架构(32B 激活),256K 上下文,代码与智能体能力增强", "contextWindow": 256000, - "maxOutputTokens": 16384, + "maxOutputTokens": 32768, "capabilities": [ "chat", "reasoning", @@ -134,7 +139,8 @@ ], "inputPrice": 0, "outputPrice": 0, - "defaultTemperature": 0.7, + "defaultTemperature": 1, + "defaultTopP": 0.95, "extra": {} }, { @@ -144,8 +150,8 @@ "chat" ], "description": "月之暗面 Kimi-2.5 大语言模型", - "contextWindow": 128000, - "maxOutputTokens": 8192, + "contextWindow": 256000, + "maxOutputTokens": 32768, "capabilities": [ "chat", "reasoning", @@ -158,6 +164,7 @@ "inputPrice": 0, "outputPrice": 0, "defaultTemperature": 1, + "defaultTopP": 0.95, "extra": {} }, { @@ -167,7 +174,7 @@ "chat" ], "description": "MiniMax M2.5 大语言模型", - "contextWindow": 128000, + "contextWindow": 204800, "maxOutputTokens": 8192, "capabilities": [ "chat", @@ -179,6 +186,7 @@ "inputPrice": 0, "outputPrice": 0, "defaultTemperature": 1, + "defaultTopP": 0.95, "extra": {} }, { @@ -202,6 +210,7 @@ "inputPrice": 0, "outputPrice": 0, "defaultTemperature": 0.7, + "defaultTopP": 0.8, "extra": {} }, { @@ -211,8 +220,8 @@ "chat" ], "description": "阿里通义千问 Qwen3.5 Plus", - "contextWindow": 128000, - "maxOutputTokens": 8192, + "contextWindow": 1000000, + "maxOutputTokens": 65536, "capabilities": [ "chat", "reasoning", @@ -224,7 +233,8 @@ ], "inputPrice": 0, "outputPrice": 0, - "defaultTemperature": 1, + "defaultTemperature": 0.7, + "defaultTopP": 0.8, "extra": {} }, { @@ -246,6 +256,7 @@ "inputPrice": 0, "outputPrice": 0, "defaultTemperature": 0.7, + "defaultTopP": 0.8, "extra": {} }, { @@ -267,6 +278,7 @@ "inputPrice": 0, "outputPrice": 0, "defaultTemperature": 0.7, + "defaultTopP": 0.8, "extra": {} }, { @@ -276,8 +288,8 @@ "chat" ], "description": "阿里通义千问 Qwen3 Max (2026-01-23)", - "contextWindow": 128000, - "maxOutputTokens": 8192, + "contextWindow": 262144, + "maxOutputTokens": 32768, "capabilities": [ "chat", "reasoning", @@ -289,7 +301,8 @@ ], "inputPrice": 0, "outputPrice": 0, - "defaultTemperature": 1, + "defaultTemperature": 0.7, + "defaultTopP": 0.8, "extra": {} }, { @@ -299,8 +312,8 @@ "chat" ], "description": "字节跳动豆包 Seed 2.0 Code Preview (260215)", - "contextWindow": 128000, - "maxOutputTokens": 8192, + "contextWindow": 256000, + "maxOutputTokens": 128000, "capabilities": [ "chat", "reasoning", @@ -309,6 +322,7 @@ "inputPrice": 0, "outputPrice": 0, "defaultTemperature": 1, + "defaultTopP": 0.7, "extra": {} } ] diff --git a/compute/providers/lingyiwanwu.json b/compute/providers/lingyiwanwu.json index 7589d1f..0a4e6e9 100644 --- a/compute/providers/lingyiwanwu.json +++ b/compute/providers/lingyiwanwu.json @@ -17,7 +17,9 @@ { "modelName": "yi-lightning", "displayName": "Yi Lightning", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "零一万物高性能模型,16K上下文,智能路由,极高性价比", "contextWindow": 16000, "maxOutputTokens": 4096, @@ -30,14 +32,16 @@ ], "inputPrice": 0.99, "outputPrice": 0.99, - "defaultTemperature": 0.7, + "defaultTemperature": 0.3, "defaultTopP": 0.9, "extra": {} }, { "modelName": "yi-vision-v2", "displayName": "Yi Vision V2", - "serviceType": ["vision"], + "serviceType": [ + "vision" + ], "description": "零一万物复杂视觉任务模型,16K上下文,支持多图分析", "contextWindow": 16000, "maxOutputTokens": 4096, @@ -51,7 +55,7 @@ ], "inputPrice": 6, "outputPrice": 6, - "defaultTemperature": 0.7, + "defaultTemperature": 0.3, "defaultTopP": 0.9, "extra": {} } diff --git a/compute/providers/minimax.json b/compute/providers/minimax.json index 9e17698..28c5bef 100644 --- a/compute/providers/minimax.json +++ b/compute/providers/minimax.json @@ -9,12 +9,19 @@ "enabled": false, "status": "unconfigured", "priceCurrency": "CNY", - "services": ["chat", "fast", "reasoning"], + "services": [ + "chat", + "fast", + "reasoning" + ], "models": [ { "modelName": "MiniMax-M2.7", "displayName": "MiniMax M2.7", - "serviceType": ["chat", "reasoning"], + "serviceType": [ + "chat", + "reasoning" + ], "description": "MiniMax 新一代旗舰模型,擅长复杂 Agent、软件工程与专业办公任务", "contextWindow": 204800, "maxOutputTokens": 131072, @@ -24,14 +31,22 @@ "code", "tool_use" ], + "inputPrice": 2.1, + "outputPrice": 8.4, "defaultTemperature": 1, - "defaultTopP": 1, - "extra": {} + "defaultTopP": 0.95, + "extra": { + "cacheReadPrice": 0.42, + "cacheWritePrice": 2.625, + "outputSpeedTps": 60 + } }, { "modelName": "MiniMax-M2.7-highspeed", "displayName": "MiniMax M2.7 高速版", - "serviceType": ["fast"], + "serviceType": [ + "fast" + ], "description": "MiniMax M2.7 极速版,效果一致,输出更快,适合低延迟场景", "contextWindow": 204800, "maxOutputTokens": 131072, @@ -42,16 +57,25 @@ "tool_use", "fast" ], + "inputPrice": 4.2, + "outputPrice": 16.8, "defaultTemperature": 1, - "defaultTopP": 1, - "extra": {} + "defaultTopP": 0.95, + "extra": { + "cacheReadPrice": 0.42, + "cacheWritePrice": 2.625, + "outputSpeedTps": 100 + } }, { "modelName": "MiniMax-M2.5", "displayName": "MiniMax M2.5", - "serviceType": ["chat", "reasoning"], - "description": "MiniMax 最新旗舰模型,230B参数(10B激活)MoE架构,百万级上下文,支持深度推理和工具调用", - "contextWindow": 1000000, + "serviceType": [ + "chat", + "reasoning" + ], + "description": "MiniMax M2.5,204.8K 上下文,支持深度推理和工具调用", + "contextWindow": 204800, "maxOutputTokens": 131072, "capabilities": [ "chat", @@ -63,15 +87,21 @@ "inputPrice": 2.1, "outputPrice": 8.4, "defaultTemperature": 1, - "defaultTopP": 1, - "extra": {} + "defaultTopP": 0.95, + "extra": { + "cacheReadPrice": 0.21, + "cacheWritePrice": 2.625, + "outputSpeedTps": 60 + } }, { "modelName": "MiniMax-M2.5-highspeed", "displayName": "MiniMax M2.5 高速版", - "serviceType": ["fast"], + "serviceType": [ + "fast" + ], "description": "MiniMax M2.5 高速推理版本(100TPS),适合低延迟场景", - "contextWindow": 200000, + "contextWindow": 204800, "maxOutputTokens": 131072, "capabilities": [ "chat", @@ -80,16 +110,22 @@ "tool_use", "fast" ], - "inputPrice": 2.1, + "inputPrice": 4.2, "outputPrice": 16.8, "defaultTemperature": 1, - "defaultTopP": 1, - "extra": {} + "defaultTopP": 0.95, + "extra": { + "cacheReadPrice": 0.21, + "cacheWritePrice": 2.625, + "outputSpeedTps": 100 + } }, { "modelName": "MiniMax-M2.1", "displayName": "MiniMax M2.1", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "MiniMax 上一代主力模型,200K 上下文,性价比优秀", "contextWindow": 204800, "maxOutputTokens": 131072, @@ -102,13 +138,19 @@ "inputPrice": 2.1, "outputPrice": 8.4, "defaultTemperature": 1, - "defaultTopP": 1, - "extra": {} + "defaultTopP": 0.95, + "extra": { + "cacheReadPrice": 0.21, + "cacheWritePrice": 2.625, + "outputSpeedTps": 60 + } }, { "modelName": "MiniMax-M2.1-highspeed", "displayName": "MiniMax M2.1 高速版", - "serviceType": ["fast"], + "serviceType": [ + "fast" + ], "description": "MiniMax M2.1 高速推理版本,适合低延迟场景", "contextWindow": 204800, "maxOutputTokens": 131072, @@ -119,16 +161,22 @@ "tool_use", "fast" ], - "inputPrice": 2.1, + "inputPrice": 4.2, "outputPrice": 16.8, "defaultTemperature": 1, - "defaultTopP": 1, - "extra": {} + "defaultTopP": 0.95, + "extra": { + "cacheReadPrice": 0.21, + "cacheWritePrice": 2.625, + "outputSpeedTps": 100 + } }, { "modelName": "MiniMax-Text-01", "displayName": "MiniMax Text 01", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "MiniMax 百万级长上下文文本模型,擅长长文档处理", "contextWindow": 1000000, "maxOutputTokens": 131072, @@ -140,8 +188,8 @@ ], "inputPrice": 1.4, "outputPrice": 7.7, - "defaultTemperature": 1, - "defaultTopP": 1, + "defaultTemperature": 0.1, + "defaultTopP": 0.95, "extra": {} } ] diff --git a/compute/providers/mistral.json b/compute/providers/mistral.json index 44dc440..1a66915 100644 --- a/compute/providers/mistral.json +++ b/compute/providers/mistral.json @@ -9,12 +9,17 @@ "enabled": false, "status": "unconfigured", "priceCurrency": "USD", - "services": ["chat", "fast"], + "services": [ + "chat", + "fast" + ], "models": [ { "modelName": "mistral-large-latest", "displayName": "Mistral Large 3", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "Mistral 旗舰模型,256K 上下文,支持视觉和工具调用", "contextWindow": 256000, "maxOutputTokens": 262144, @@ -26,18 +31,20 @@ "tool_use", "long_context" ], - "inputPrice": 0.50, - "outputPrice": 1.50, - "defaultTemperature": 1, + "inputPrice": 0.5, + "outputPrice": 1.5, + "defaultTemperature": 0.7, "defaultTopP": 1, "extra": {} }, { "modelName": "mistral-small-latest", "displayName": "Mistral Small 3.2", - "serviceType": ["fast"], + "serviceType": [ + "fast" + ], "description": "Mistral 高效小模型,低延迟,适合快速推理场景", - "contextWindow": 130000, + "contextWindow": 128000, "maxOutputTokens": 8192, "capabilities": [ "chat", @@ -47,26 +54,28 @@ "fast", "tool_use" ], - "inputPrice": 0.10, - "outputPrice": 0.30, - "defaultTemperature": 1, + "inputPrice": 0.1, + "outputPrice": 0.3, + "defaultTemperature": 0.7, "defaultTopP": 1, "extra": {} }, { "modelName": "codestral-latest", "displayName": "Codestral", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "Mistral 专业代码模型,支持 Fill-in-the-Middle 补全", - "contextWindow": 256000, + "contextWindow": 128000, "maxOutputTokens": 32768, "capabilities": [ "code", "fill_in_the_middle" ], - "inputPrice": 0.30, - "outputPrice": 0.90, - "defaultTemperature": 1, + "inputPrice": 0.3, + "outputPrice": 0.9, + "defaultTemperature": 0.7, "defaultTopP": 1, "extra": {} } diff --git a/compute/providers/moonshot.json b/compute/providers/moonshot.json index 99be6c9..1fc23f6 100644 --- a/compute/providers/moonshot.json +++ b/compute/providers/moonshot.json @@ -14,13 +14,64 @@ "reasoning" ], "models": [ + { + "modelName": "kimi-k2.6", + "displayName": "Kimi K2.6", + "serviceType": [ + "chat" + ], + "description": "月之暗面 Kimi K2.6,最新多模态模型,长程代码编写与 Agent 自主执行能力增强,256K 上下文,支持思考/非思考模式", + "contextWindow": 256000, + "maxOutputTokens": 32768, + "capabilities": [ + "chat", + "reasoning", + "code", + "tool_use", + "agent", + "long_context", + "vision", + "video_understanding", + "image_understanding" + ], + "inputPrice": 6.5, + "outputPrice": 27, + "defaultTemperature": 1, + "defaultTopP": 0.95, + "extra": { + "thinking": { + "default": "enabled", + "disabledTemperature": 0.6 + }, + "cacheHitPrice": 1.1, + "supportedImageFormats": [ + "png", + "jpeg", + "webp", + "gif" + ], + "supportedVideoFormats": [ + "mp4", + "mpeg", + "mov", + "avi", + "x-flv", + "mpg", + "webm", + "wmv", + "3gpp" + ] + } + }, { "modelName": "kimi-k2.5", "displayName": "Kimi K2.5", - "serviceType": ["chat"], - "description": "月之暗面Kimi K2.5,2026年1月发布的原生多模态模型,支持视觉编码和智能体集群", + "serviceType": [ + "chat" + ], + "description": "月之暗面Kimi K2.5,原生多模态模型,256K 上下文,支持思考/非思考模式", "contextWindow": 256000, - "maxOutputTokens": 16384, + "maxOutputTokens": 32768, "capabilities": [ "chat", "reasoning", @@ -32,15 +83,40 @@ ], "inputPrice": 4, "outputPrice": 21, - "defaultTemperature": 0.7, - "defaultTopP": 0.9, - "extra": {} + "defaultTemperature": 1, + "defaultTopP": 0.95, + "extra": { + "thinking": { + "default": "enabled", + "disabledTemperature": 0.6 + }, + "cacheHitPrice": 0.7, + "supportedImageFormats": [ + "png", + "jpeg", + "webp", + "gif" + ], + "supportedVideoFormats": [ + "mp4", + "mpeg", + "mov", + "avi", + "x-flv", + "mpg", + "webm", + "wmv", + "3gpp" + ] + } }, { "modelName": "kimi-k2", "displayName": "Kimi K2", - "serviceType": ["chat"], - "description": "月之暗面Kimi K2,万亿参数MoE模型(320B激活),256K上下文,专为智能体设计", + "serviceType": [ + "chat" + ], + "description": "月之暗面Kimi K2,万亿参数MoE模型(32B激活),256K上下文,专为智能体设计", "contextWindow": 256000, "maxOutputTokens": 8192, "capabilities": [ @@ -49,19 +125,22 @@ "code", "tool_use", "agent", - "long_context", - "vision" + "long_context" ], "inputPrice": 4, "outputPrice": 16, - "defaultTemperature": 0.7, - "defaultTopP": 0.9, - "extra": {} + "defaultTemperature": 0.6, + "defaultTopP": 1, + "extra": { + "cacheHitPrice": 1 + } }, { "modelName": "kimi-k2-thinking", "displayName": "Kimi K2 思考版", - "serviceType": ["reasoning"], + "serviceType": [ + "reasoning" + ], "description": "月之暗面Kimi K2思考版,256K上下文,深度推理能力", "contextWindow": 256000, "maxOutputTokens": 16384, @@ -74,14 +153,18 @@ ], "inputPrice": 4, "outputPrice": 16, - "defaultTemperature": 0.7, - "defaultTopP": 0.9, - "extra": {} + "defaultTemperature": 1, + "defaultTopP": 1, + "extra": { + "cacheHitPrice": 1 + } }, { "modelName": "moonshot-v1-8k", "displayName": "Moonshot V1 8K", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "月之暗面标准模型,8K上下文", "contextWindow": 8192, "maxOutputTokens": 4096, @@ -91,14 +174,16 @@ ], "inputPrice": 2, "outputPrice": 2, - "defaultTemperature": 0.7, - "defaultTopP": 0.9, + "defaultTemperature": 0, + "defaultTopP": 1, "extra": {} }, { "modelName": "moonshot-v1-32k", "displayName": "Moonshot V1 32K", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "月之暗面标准模型,32K上下文", "contextWindow": 32768, "maxOutputTokens": 4096, @@ -109,14 +194,16 @@ ], "inputPrice": 5, "outputPrice": 5, - "defaultTemperature": 0.7, - "defaultTopP": 0.9, + "defaultTemperature": 0, + "defaultTopP": 1, "extra": {} }, { "modelName": "moonshot-v1-128k", "displayName": "Moonshot V1 128K", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "月之暗面标准模型,128K上下文", "contextWindow": 131072, "maxOutputTokens": 4096, @@ -127,8 +214,8 @@ ], "inputPrice": 10, "outputPrice": 10, - "defaultTemperature": 0.7, - "defaultTopP": 0.9, + "defaultTemperature": 0, + "defaultTopP": 1, "extra": {} } ] diff --git a/compute/providers/ollama.json b/compute/providers/ollama.json index 3262dcf..153c2b0 100644 --- a/compute/providers/ollama.json +++ b/compute/providers/ollama.json @@ -19,11 +19,14 @@ "serviceType": ["chat"], "description": "本地运行的 Llama 3.1 70B", "contextWindow": 131072, + "maxOutputTokens": 8192, "capabilities": [ "chat", "code", "reasoning" ], + "defaultTemperature": 0.8, + "defaultTopP": 0.9, "extra": {} } ] diff --git a/compute/providers/openai.json b/compute/providers/openai.json index 338bb9f..e1e9c6a 100644 --- a/compute/providers/openai.json +++ b/compute/providers/openai.json @@ -25,7 +25,9 @@ { "modelName": "gpt-5.2", "displayName": "GPT-5.2", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "OpenAI 最新旗舰模型,400K 上下文,编码和智能体任务优化", "contextWindow": 400000, "maxOutputTokens": 128000, @@ -46,7 +48,9 @@ { "modelName": "gpt-5.2-pro", "displayName": "GPT-5.2 Pro", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "OpenAI GPT-5.2 Pro 专业版,最高性能", "contextWindow": 400000, "maxOutputTokens": 128000, @@ -67,7 +71,9 @@ { "modelName": "gpt-5.1", "displayName": "GPT-5.1", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "OpenAI GPT-5.1,400K 上下文,高性能旗舰", "contextWindow": 400000, "maxOutputTokens": 128000, @@ -88,7 +94,9 @@ { "modelName": "gpt-5", "displayName": "GPT-5", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "OpenAI GPT-5 新一代旗舰模型,统一所有模型能力", "contextWindow": 400000, "maxOutputTokens": 128000, @@ -109,10 +117,12 @@ { "modelName": "gpt-5-pro", "displayName": "GPT-5 Pro", - "serviceType": ["chat"], - "description": "OpenAI GPT-5 Pro 专业版高性能模型", + "serviceType": [ + "chat" + ], + "description": "OpenAI GPT-5 Pro 专业版高性能推理模型", "contextWindow": 400000, - "maxOutputTokens": 128000, + "maxOutputTokens": 272000, "capabilities": [ "chat", "reasoning", @@ -130,10 +140,12 @@ { "modelName": "gpt-5-mini", "displayName": "GPT-5-mini", - "serviceType": ["chat"], - "description": "高性价比模型,适合日常对话", - "contextWindow": 128000, - "maxOutputTokens": 32768, + "serviceType": [ + "chat" + ], + "description": "高性价比模型,适合日常对话,400K 上下文", + "contextWindow": 400000, + "maxOutputTokens": 128000, "capabilities": [ "chat", "fast", @@ -150,17 +162,19 @@ { "modelName": "gpt-5-nano", "displayName": "GPT-5-nano", - "serviceType": ["fast"], - "description": "极速响应模型,适合简单任务", - "contextWindow": 128000, - "maxOutputTokens": 16384, + "serviceType": [ + "fast" + ], + "description": "极速响应模型,适合简单任务,400K 上下文", + "contextWindow": 400000, + "maxOutputTokens": 128000, "capabilities": [ "chat", "code", "fast" ], - "inputPrice": 0.02, - "outputPrice": 0.08, + "inputPrice": 0.05, + "outputPrice": 0.4, "defaultTemperature": 1, "defaultTopP": 1, "extra": {} @@ -168,9 +182,11 @@ { "modelName": "gpt-4.1", "displayName": "GPT-4.1", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "OpenAI GPT-4.1,支持 1M 上下文,编码和指令遵循能力大幅提升", - "contextWindow": 1000000, + "contextWindow": 1047576, "maxOutputTokens": 32768, "capabilities": [ "chat", @@ -180,8 +196,8 @@ "ultra_long_context", "tool_use" ], - "inputPrice": 2.00, - "outputPrice": 8.00, + "inputPrice": 2, + "outputPrice": 8, "defaultTemperature": 1, "defaultTopP": 1, "extra": {} @@ -189,9 +205,11 @@ { "modelName": "gpt-4.1-mini", "displayName": "GPT-4.1 mini", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "OpenAI GPT-4.1 mini,1M 上下文,高性价比", - "contextWindow": 1000000, + "contextWindow": 1047576, "maxOutputTokens": 32768, "capabilities": [ "chat", @@ -200,8 +218,8 @@ "ultra_long_context", "tool_use" ], - "inputPrice": 0.40, - "outputPrice": 1.60, + "inputPrice": 0.4, + "outputPrice": 1.6, "defaultTemperature": 1, "defaultTopP": 1, "extra": {} @@ -209,9 +227,11 @@ { "modelName": "gpt-4.1-nano", "displayName": "GPT-4.1 nano", - "serviceType": ["fast"], + "serviceType": [ + "fast" + ], "description": "OpenAI GPT-4.1 nano,1M 上下文,极致性价比", - "contextWindow": 1000000, + "contextWindow": 1047576, "maxOutputTokens": 32768, "capabilities": [ "chat", @@ -219,8 +239,8 @@ "ultra_long_context", "fast" ], - "inputPrice": 0.10, - "outputPrice": 0.40, + "inputPrice": 0.1, + "outputPrice": 0.4, "defaultTemperature": 1, "defaultTopP": 1, "extra": {} @@ -228,7 +248,9 @@ { "modelName": "gpt-4o", "displayName": "GPT-4o", - "serviceType": ["vision"], + "serviceType": [ + "vision" + ], "description": "多模态模型,支持图像理解", "contextWindow": 128000, "maxOutputTokens": 16384, @@ -238,8 +260,8 @@ "image_understanding", "tool_use" ], - "inputPrice": 2.50, - "outputPrice": 10.00, + "inputPrice": 2.5, + "outputPrice": 10, "defaultTemperature": 1, "defaultTopP": 1, "extra": {} @@ -247,7 +269,9 @@ { "modelName": "gpt-4o-mini", "displayName": "GPT-4o mini", - "serviceType": ["vision"], + "serviceType": [ + "vision" + ], "description": "OpenAI GPT-4o mini,高性价比多模态模型", "contextWindow": 128000, "maxOutputTokens": 16384, @@ -258,7 +282,7 @@ "tool_use" ], "inputPrice": 0.15, - "outputPrice": 0.60, + "outputPrice": 0.6, "defaultTemperature": 1, "defaultTopP": 1, "extra": {} @@ -266,8 +290,11 @@ { "modelName": "text-embedding-3-small", "displayName": "text-embedding-3-small", - "serviceType": ["embedding"], + "serviceType": [ + "embedding" + ], "description": "轻量文本向量化模型,1536 维", + "contextWindow": 8192, "capabilities": [ "text_embedding", "semantic_search" @@ -278,8 +305,11 @@ { "modelName": "text-embedding-3-large", "displayName": "text-embedding-3-large", - "serviceType": ["embedding"], + "serviceType": [ + "embedding" + ], "description": "高精度文本向量化模型,3072 维", + "contextWindow": 8192, "capabilities": [ "text_embedding", "semantic_search" @@ -290,11 +320,14 @@ { "modelName": "tts-1", "displayName": "TTS-1", - "serviceType": ["tts"], + "serviceType": [ + "tts" + ], "description": "标准语音合成", "capabilities": [ "tts" ], + "inputPrice": 15, "extra": { "voices": [ "alloy", @@ -315,11 +348,14 @@ { "modelName": "tts-1-hd", "displayName": "TTS-1-HD", - "serviceType": ["tts"], + "serviceType": [ + "tts" + ], "description": "高清语音合成", "capabilities": [ "tts" ], + "inputPrice": 30, "extra": { "voices": [ "alloy", @@ -340,12 +376,15 @@ { "modelName": "whisper-1", "displayName": "Whisper", - "serviceType": ["asr"], + "serviceType": [ + "asr" + ], "description": "通用语音识别", "capabilities": [ "asr", "multilingual" ], + "inputPrice": 0.006, "extra": { "maxAudioLength": 600, "supportedInputFormats": [ @@ -363,7 +402,9 @@ { "modelName": "o3", "displayName": "o3", - "serviceType": ["responses"], + "serviceType": [ + "responses" + ], "description": "OpenAI o3 推理模型,200K 上下文,支持工具调用", "contextWindow": 200000, "maxOutputTokens": 100000, @@ -375,8 +416,8 @@ "science", "tool_use" ], - "inputPrice": 2.00, - "outputPrice": 8.00, + "inputPrice": 2, + "outputPrice": 8, "defaultTemperature": 1, "defaultTopP": 1, "extra": {} @@ -384,7 +425,9 @@ { "modelName": "o3-pro", "displayName": "o3-pro", - "serviceType": ["responses"], + "serviceType": [ + "responses" + ], "description": "OpenAI o3-pro 高级推理模型,200K 上下文", "contextWindow": 200000, "maxOutputTokens": 100000, @@ -395,8 +438,8 @@ "math", "science" ], - "inputPrice": 20.00, - "outputPrice": 80.00, + "inputPrice": 20, + "outputPrice": 80, "defaultTemperature": 1, "defaultTopP": 1, "extra": {} @@ -404,9 +447,11 @@ { "modelName": "o3-mini", "displayName": "o3-mini", - "serviceType": ["responses"], - "description": "OpenAI o3-mini 推理模型,128K 上下文", - "contextWindow": 128000, + "serviceType": [ + "responses" + ], + "description": "OpenAI o3-mini 推理模型,200K 上下文", + "contextWindow": 200000, "maxOutputTokens": 100000, "capabilities": [ "reasoning", @@ -414,8 +459,8 @@ "code", "fast" ], - "inputPrice": 1.10, - "outputPrice": 4.40, + "inputPrice": 1.1, + "outputPrice": 4.4, "defaultTemperature": 1, "defaultTopP": 1, "extra": {} @@ -423,7 +468,9 @@ { "modelName": "o4-mini", "displayName": "o4-mini", - "serviceType": ["responses"], + "serviceType": [ + "responses" + ], "description": "OpenAI o4-mini 推理模型,200K 上下文,支持工具调用", "contextWindow": 200000, "maxOutputTokens": 100000, @@ -434,8 +481,8 @@ "tool_use", "vision" ], - "inputPrice": 1.10, - "outputPrice": 4.40, + "inputPrice": 1.1, + "outputPrice": 4.4, "defaultTemperature": 1, "defaultTopP": 1, "extra": {} @@ -443,7 +490,9 @@ { "modelName": "dall-e-3", "displayName": "DALL-E 3", - "serviceType": ["image_gen"], + "serviceType": [ + "image_gen" + ], "description": "高质量文生图模型", "capabilities": [ "image_generation", @@ -464,9 +513,12 @@ { "modelName": "gpt-4o-realtime", "displayName": "GPT-4o Realtime", - "serviceType": ["omni"], + "serviceType": [ + "omni" + ], "description": "全能实时模型,支持语音 + 文本 + 视觉", - "contextWindow": 128000, + "contextWindow": 32000, + "maxOutputTokens": 4096, "capabilities": [ "chat", "vision", @@ -474,6 +526,10 @@ "asr", "realtime" ], + "inputPrice": 4, + "outputPrice": 16, + "defaultTemperature": 0.7, + "defaultTopP": 1, "extra": { "supportedModes": [ "speech-to-speech", @@ -485,13 +541,21 @@ { "modelName": "gpt-4o-realtime-preview", "displayName": "GPT-4o Realtime Preview", - "serviceType": ["realtime_voice"], + "serviceType": [ + "realtime_voice" + ], "description": "实时语音交互模型", + "contextWindow": 32000, + "maxOutputTokens": 4096, "capabilities": [ "realtime", "speech_to_speech", "low_latency" ], + "inputPrice": 5, + "outputPrice": 20, + "defaultTemperature": 0.7, + "defaultTopP": 1, "extra": { "supportedModes": [ "speech-to-speech", diff --git a/compute/providers/openrouter.json b/compute/providers/openrouter.json index 87647ca..2f0a5b1 100644 --- a/compute/providers/openrouter.json +++ b/compute/providers/openrouter.json @@ -16,7 +16,7 @@ "displayName": "OpenRouter Auto", "serviceType": ["chat"], "description": "OpenRouter 自动路由,智能选择最优模型", - "contextWindow": 200000, + "contextWindow": 2000000, "maxOutputTokens": 16384, "capabilities": [ "chat", @@ -29,19 +29,16 @@ "extra": {} }, { - "modelName": "google/gemini-2.5-flash-exp:free", - "displayName": "Gemini 2.5 Flash (免费)", + "modelName": "openai/gpt-oss-120b:free", + "displayName": "GPT-OSS 120B (免费)", "serviceType": ["chat"], - "description": "通过 OpenRouter 免费使用的 Gemini 2.5 Flash,1M 上下文", - "contextWindow": 1048576, - "maxOutputTokens": 65535, + "description": "通过 OpenRouter 免费使用的 OpenAI GPT-OSS 120B", + "contextWindow": 131072, + "maxOutputTokens": 131072, "capabilities": [ "chat", "reasoning", - "vision", - "tool_use", - "fast", - "long_context" + "code" ], "inputPrice": 0.00, "outputPrice": 0.00, @@ -54,8 +51,8 @@ "displayName": "Qwen3 Coder 480B (免费)", "serviceType": ["chat"], "description": "通过 OpenRouter 免费使用的 Qwen3 Coder 480B", - "contextWindow": 262144, - "maxOutputTokens": 32768, + "contextWindow": 262000, + "maxOutputTokens": 262000, "capabilities": [ "code", "reasoning", diff --git a/compute/providers/perplexity.json b/compute/providers/perplexity.json index bc7a12f..c297fd9 100644 --- a/compute/providers/perplexity.json +++ b/compute/providers/perplexity.json @@ -9,12 +9,16 @@ "enabled": false, "status": "unconfigured", "priceCurrency": "USD", - "services": ["chat"], + "services": [ + "chat" + ], "models": [ { "modelName": "sonar-pro", "displayName": "Sonar Pro", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "Perplexity 旗舰搜索增强模型,内置实时联网搜索和引用", "contextWindow": 200000, "maxOutputTokens": 8192, @@ -24,16 +28,23 @@ "reasoning", "citation" ], - "inputPrice": 3.00, - "outputPrice": 15.00, - "defaultTemperature": 1, - "defaultTopP": 1, - "extra": {} + "inputPrice": 3.0, + "outputPrice": 15.0, + "extra": { + "requestPricingPer1k": { + "low": 6, + "medium": 10, + "high": 14 + }, + "pricingNotes": "Total Sonar API cost includes token costs plus a request fee based on search context size." + } }, { "modelName": "sonar-reasoning-pro", "displayName": "Sonar Reasoning Pro", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "Perplexity 深度推理模型,内置联网搜索和深度思考", "contextWindow": 128000, "maxOutputTokens": 8192, @@ -43,16 +54,23 @@ "reasoning", "deep_thinking" ], - "inputPrice": 2.00, - "outputPrice": 8.00, - "defaultTemperature": 1, - "defaultTopP": 1, - "extra": {} + "inputPrice": 2.0, + "outputPrice": 8.0, + "extra": { + "requestPricingPer1k": { + "low": 6, + "medium": 10, + "high": 14 + }, + "pricingNotes": "Total Sonar API cost includes token costs plus a request fee based on search context size." + } }, { "modelName": "sonar", "displayName": "Sonar", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "Perplexity 轻量搜索模型,低成本联网搜索", "contextWindow": 128000, "maxOutputTokens": 4096, @@ -62,11 +80,16 @@ "citation", "fast" ], - "inputPrice": 1.00, - "outputPrice": 1.00, - "defaultTemperature": 1, - "defaultTopP": 1, - "extra": {} + "inputPrice": 1.0, + "outputPrice": 1.0, + "extra": { + "requestPricingPer1k": { + "low": 5, + "medium": 8, + "high": 12 + }, + "pricingNotes": "Total Sonar API cost includes token costs plus a request fee based on search context size." + } } ] } diff --git a/compute/providers/siliconflow.json b/compute/providers/siliconflow.json index e778373..07e5fbb 100644 --- a/compute/providers/siliconflow.json +++ b/compute/providers/siliconflow.json @@ -9,53 +9,62 @@ "enabled": false, "status": "unconfigured", "priceCurrency": "CNY", - "services": ["chat", "embedding"], + "services": [ + "chat", + "embedding" + ], "models": [ { "modelName": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "displayName": "Qwen3 Coder 480B (硅基)", - "serviceType": ["chat"], - "description": "开源最强代码模型,480B MoE 架构,擅长代码生成和 Agent 场景", + "serviceType": [ + "chat" + ], + "description": "Qwen3-Coder-480B-A35B-Instruct,480B MoE(35B 激活),原生 256K 上下文,可通过外推扩展至 1M,面向 Agentic Coding", "contextWindow": 262144, - "maxOutputTokens": 32768, + "maxOutputTokens": 262144, "capabilities": [ + "chat", "code", - "reasoning", + "long_context", "agent", "tool_use" ], "inputPrice": 8, "outputPrice": 16, - "defaultTemperature": 1, - "defaultTopP": 1, + "defaultTemperature": 0.7, + "defaultTopP": 0.7, "extra": {} }, { - "modelName": "Qwen/Qwen3-235B-A22B-Instruct", + "modelName": "Qwen/Qwen3-235B-A22B-Instruct-2507", "displayName": "Qwen3 235B (硅基)", - "serviceType": ["chat"], - "description": "Qwen3 旗舰通用模型,235B MoE 架构,多语言能力突出", + "serviceType": [ + "chat" + ], + "description": "Qwen3-235B-A22B-Instruct-2507,235B MoE(22B 激活)通用指令模型,262K 上下文", "contextWindow": 262144, - "maxOutputTokens": 38912, + "maxOutputTokens": 262144, "capabilities": [ "chat", - "reasoning", "code", "multilingual", "tool_use", "agent", - "vision" + "long_context" ], "inputPrice": 2.5, "outputPrice": 10, - "defaultTemperature": 1, - "defaultTopP": 1, + "defaultTemperature": 0.7, + "defaultTopP": 0.7, "extra": {} }, { "modelName": "BAAI/bge-m3", "displayName": "BGE-M3 (硅基)", - "serviceType": ["embedding"], + "serviceType": [ + "embedding" + ], "description": "多语言 Embedding 模型,支持 8K 上下文,免费版", "contextWindow": 8192, "maxOutputTokens": 0, diff --git a/compute/providers/tencent.json b/compute/providers/tencent.json index 92e097a..b28024c 100644 --- a/compute/providers/tencent.json +++ b/compute/providers/tencent.json @@ -9,14 +9,19 @@ "enabled": false, "status": "unconfigured", "priceCurrency": "CNY", - "services": ["chat", "reasoning"], + "services": [ + "chat", + "reasoning" + ], "models": [ { "modelName": "hunyuan-2.0-thinking-20251109", "displayName": "混元 2.0 Think", - "serviceType": ["reasoning"], + "serviceType": [ + "reasoning" + ], "description": "腾讯混元最新推理模型,128K输入/64K输出,MoE架构406B总参数", - "contextWindow": 131072, + "contextWindow": 196608, "maxOutputTokens": 65536, "capabilities": [ "chat", @@ -28,14 +33,63 @@ "outputPrice": 15.9, "defaultTemperature": 1, "defaultTopP": 1, - "extra": {} + "extra": { + "pricingTiers": [ + { + "maxInputTokens": 32768, + "inputPrice": 3.975, + "outputPrice": 15.9 + }, + { + "maxInputTokens": 131072, + "inputPrice": 5.3, + "outputPrice": 21.2 + } + ] + } }, { - "modelName": "hunyuan-turbo-s", + "modelName": "hunyuan-2.0-instruct-20251111", + "displayName": "混元 2.0 Instruct", + "serviceType": [ + "chat" + ], + "description": "腾讯混元 2.0 指令模型,最大输入128K/最大输出16K,支持联网搜索和 Function Calling", + "contextWindow": 147456, + "maxOutputTokens": 16384, + "capabilities": [ + "chat", + "code", + "long_context", + "tool_use" + ], + "inputPrice": 3.18, + "outputPrice": 7.95, + "defaultTemperature": 1, + "defaultTopP": 1, + "extra": { + "pricingTiers": [ + { + "maxInputTokens": 32768, + "inputPrice": 3.18, + "outputPrice": 7.95 + }, + { + "maxInputTokens": 131072, + "inputPrice": 4.505, + "outputPrice": 11.13 + } + ] + } + }, + { + "modelName": "hunyuan-turbos-latest", "displayName": "混元 Turbo S", - "serviceType": ["chat"], - "description": "腾讯混元高速模型,262K 上下文,支持推理和工具调用", - "contextWindow": 262144, + "serviceType": [ + "chat" + ], + "description": "腾讯混元高速模型,32K输入/16K输出,支持推理和工具调用", + "contextWindow": 32768, "maxOutputTokens": 16384, "capabilities": [ "chat", diff --git a/compute/providers/volcengine.json b/compute/providers/volcengine.json index ca8eef2..1a06adb 100644 --- a/compute/providers/volcengine.json +++ b/compute/providers/volcengine.json @@ -23,10 +23,12 @@ { "modelName": "doubao-2.0-pro", "displayName": "豆包 2.0 Pro", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "火山引擎豆包2.0旗舰模型,256K上下文,相比Gemini 3 Pro和GPT 5.2有较大成本优势", "contextWindow": 256000, - "maxOutputTokens": 16384, + "maxOutputTokens": 128000, "capabilities": [ "chat", "reasoning", @@ -37,17 +39,19 @@ ], "inputPrice": 3.2, "outputPrice": 16, - "defaultTemperature": 0.7, - "defaultTopP": 0.9, + "defaultTemperature": 1, + "defaultTopP": 0.7, "extra": {} }, { "modelName": "doubao-seed-1.8", "displayName": "豆包 Seed-1.8", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "火山引擎豆包1.8旗舰模型,256K上下文,支持深度思考和多模态", "contextWindow": 256000, - "maxOutputTokens": 16384, + "maxOutputTokens": 16000, "capabilities": [ "chat", "reasoning", @@ -58,17 +62,19 @@ ], "inputPrice": 0.8, "outputPrice": 2, - "defaultTemperature": 0.7, - "defaultTopP": 0.9, + "defaultTemperature": 1, + "defaultTopP": 0.7, "extra": {} }, { "modelName": "doubao-seed-1.6", "displayName": "豆包 Seed-1.6", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "火山引擎豆包1.6旗舰模型,256K上下文", "contextWindow": 256000, - "maxOutputTokens": 16384, + "maxOutputTokens": 32000, "capabilities": [ "chat", "reasoning", @@ -79,17 +85,19 @@ ], "inputPrice": 0.8, "outputPrice": 8, - "defaultTemperature": 0.7, - "defaultTopP": 0.9, + "defaultTemperature": 1, + "defaultTopP": 0.7, "extra": {} }, { "modelName": "doubao-seed-1.6-thinking", "displayName": "豆包 Seed-1.6 思考版", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "火山引擎豆包1.6深度思考模型,256K上下文", "contextWindow": 256000, - "maxOutputTokens": 32768, + "maxOutputTokens": 16000, "capabilities": [ "reasoning", "math", @@ -99,16 +107,19 @@ ], "inputPrice": 0.8, "outputPrice": 8, - "defaultTemperature": 0.3, + "defaultTemperature": 1, + "defaultTopP": 0.7, "extra": {} }, { "modelName": "doubao-seed-1.6-flash", "displayName": "豆包 Seed-1.6 Flash", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "火山引擎豆包1.6快速版,成本较前代下降63%", "contextWindow": 256000, - "maxOutputTokens": 8192, + "maxOutputTokens": 16000, "capabilities": [ "chat", "code", @@ -117,17 +128,19 @@ ], "inputPrice": 0.15, "outputPrice": 1.5, - "defaultTemperature": 0.7, - "defaultTopP": 0.9, + "defaultTemperature": 1, + "defaultTopP": 0.7, "extra": {} }, { "modelName": "doubao-seed-1.6-lite", "displayName": "豆包 Seed-1.6 Lite", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "火山引擎豆包1.6轻量版,极致性价比", - "contextWindow": 128000, - "maxOutputTokens": 4096, + "contextWindow": 256000, + "maxOutputTokens": 32000, "capabilities": [ "chat", "code", @@ -135,17 +148,19 @@ ], "inputPrice": 0.3, "outputPrice": 0.6, - "defaultTemperature": 0.7, - "defaultTopP": 0.9, + "defaultTemperature": 1, + "defaultTopP": 0.7, "extra": {} }, { "modelName": "doubao-seed-1.6-vision", "displayName": "豆包 Seed-1.6 视觉版", - "serviceType": ["vision"], + "serviceType": [ + "vision" + ], "description": "火山引擎豆包1.6视觉模型,256K上下文,支持视频理解", "contextWindow": 256000, - "maxOutputTokens": 16384, + "maxOutputTokens": 32000, "capabilities": [ "chat", "vision", @@ -155,14 +170,16 @@ ], "inputPrice": 0.8, "outputPrice": 8, - "defaultTemperature": 0.7, - "defaultTopP": 0.9, + "defaultTemperature": 1, + "defaultTopP": 0.7, "extra": {} }, { "modelName": "doubao-seed-code", "displayName": "豆包 Seed-Code", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "火山引擎豆包编程专用模型,256K上下文", "contextWindow": 256000, "maxOutputTokens": 32768, @@ -174,16 +191,19 @@ ], "inputPrice": 1.2, "outputPrice": 8, - "defaultTemperature": 0.3, + "defaultTemperature": 1, + "defaultTopP": 0.7, "extra": {} }, { "modelName": "deepseek-v3.2", "displayName": "DeepSeek V3.2 (火山引擎)", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "火山引擎托管的DeepSeek V3.2,128K上下文", "contextWindow": 128000, - "maxOutputTokens": 8000, + "maxOutputTokens": 32000, "capabilities": [ "chat", "reasoning", @@ -199,10 +219,12 @@ { "modelName": "deepseek-r1", "displayName": "DeepSeek R1 (火山引擎)", - "serviceType": ["reasoning"], - "description": "火山引擎托管的DeepSeek R1推理模型,64K上下文", - "contextWindow": 64000, - "maxOutputTokens": 32768, + "serviceType": [ + "reasoning" + ], + "description": "火山引擎托管的DeepSeek R1推理模型,128K上下文", + "contextWindow": 128000, + "maxOutputTokens": 65536, "capabilities": [ "reasoning", "math", @@ -212,16 +234,16 @@ ], "inputPrice": 4, "outputPrice": 16, - "defaultTemperature": 1, - "defaultTopP": 1, "extra": {} }, { "modelName": "kimi-k2-volcengine", "displayName": "Kimi K2 (火山引擎)", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "火山引擎托管的Kimi K2,万亿参数MoE模型,128K上下文", - "contextWindow": 128000, + "contextWindow": 256000, "maxOutputTokens": 8192, "capabilities": [ "chat", @@ -233,17 +255,19 @@ ], "inputPrice": 4, "outputPrice": 16, - "defaultTemperature": 0.7, - "defaultTopP": 0.9, + "defaultTemperature": 0.6, + "defaultTopP": 1, "extra": {} }, { "modelName": "glm-4-7", "displayName": "GLM-4.7 (火山引擎)", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "火山引擎托管的智谱GLM-4.7,200K上下文", "contextWindow": 200000, - "maxOutputTokens": 200000, + "maxOutputTokens": 128000, "capabilities": [ "chat", "reasoning", @@ -254,14 +278,16 @@ ], "inputPrice": 4, "outputPrice": 16, - "defaultTemperature": 0.7, - "defaultTopP": 0.9, + "defaultTemperature": 1, + "defaultTopP": 0.95, "extra": {} }, { "modelName": "doubao-embedding", "displayName": "豆包 Embedding", - "serviceType": ["embedding"], + "serviceType": [ + "embedding" + ], "description": "火山引擎豆包标准向量模型,2560维度", "contextWindow": 4096, "capabilities": [ @@ -276,7 +302,9 @@ { "modelName": "volc-mega-tts-clone", "displayName": "火山声音复刻", - "serviceType": ["voice_clone"], + "serviceType": [ + "voice_clone" + ], "description": "火山引擎声音复刻,支持极少样本", "capabilities": [ "voice_clone", @@ -291,7 +319,9 @@ { "modelName": "volc-realtime-voice", "displayName": "火山实时语音", - "serviceType": ["realtime_voice"], + "serviceType": [ + "realtime_voice" + ], "description": "火山引擎实时语音交互", "capabilities": [ "realtime", @@ -309,7 +339,9 @@ { "modelName": "volc-simultaneous", "displayName": "火山同声传译", - "serviceType": ["simultaneous_interpret"], + "serviceType": [ + "simultaneous_interpret" + ], "description": "火山引擎同声传译,支持中英双向", "capabilities": [ "simultaneous_interpretation", @@ -323,7 +355,9 @@ { "modelName": "volc-translation", "displayName": "火山翻译", - "serviceType": ["translation"], + "serviceType": [ + "translation" + ], "description": "火山引擎机器翻译,支持多语种互译", "capabilities": [ "translation", diff --git a/compute/providers/xai.json b/compute/providers/xai.json index a06693e..9002922 100644 --- a/compute/providers/xai.json +++ b/compute/providers/xai.json @@ -9,15 +9,21 @@ "enabled": false, "status": "unconfigured", "priceCurrency": "USD", - "services": ["chat", "reasoning"], + "services": [ + "chat", + "reasoning" + ], "models": [ { - "modelName": "grok-4-0709", - "displayName": "Grok 4", - "serviceType": ["chat", "reasoning"], - "description": "xAI 最新旗舰推理模型,262K 上下文,支持工具调用", - "contextWindow": 262144, - "maxOutputTokens": 8192, + "modelName": "grok-4.20-0309-reasoning", + "displayName": "Grok 4.20 Reasoning", + "serviceType": [ + "chat", + "reasoning" + ], + "description": "xAI 旗舰推理模型,支持函数调用与结构化输出", + "contextWindow": 2000000, + "maxOutputTokens": 16384, "capabilities": [ "chat", "reasoning", @@ -25,17 +31,20 @@ "vision", "tool_use" ], - "inputPrice": 3.00, - "outputPrice": 15.00, + "inputPrice": 2.0, + "outputPrice": 6.0, "defaultTemperature": 1, "defaultTopP": 1, "extra": {} }, { - "modelName": "grok-4-1-fast", - "displayName": "Grok 4.1 Fast", - "serviceType": ["chat"], - "description": "xAI 超长上下文高速模型,2M token 窗口", + "modelName": "grok-4-1-fast-reasoning", + "displayName": "Grok 4.1 Fast Reasoning", + "serviceType": [ + "chat", + "reasoning" + ], + "description": "xAI 高速推理模型,2M 上下文窗口", "contextWindow": 2000000, "maxOutputTokens": 16384, "capabilities": [ @@ -47,8 +56,8 @@ "fast", "long_context" ], - "inputPrice": 0.20, - "outputPrice": 0.50, + "inputPrice": 0.2, + "outputPrice": 0.5, "defaultTemperature": 1, "defaultTopP": 1, "extra": {} diff --git a/compute/providers/xunfei.json b/compute/providers/xunfei.json index fc6eff5..c236129 100644 --- a/compute/providers/xunfei.json +++ b/compute/providers/xunfei.json @@ -12,12 +12,12 @@ "services": ["chat", "reasoning"], "models": [ { - "modelName": "x1", + "modelName": "spark-x", "displayName": "讯飞星火 X1", "serviceType": ["reasoning"], - "description": "讯飞最新推理模型,基于全国产算力训练,擅长数学和代码推理", - "contextWindow": 32768, - "maxOutputTokens": 16384, + "description": "讯飞深度推理模型(X2),输入64K/输出128K,支持思考模式与函数调用", + "contextWindow": 65536, + "maxOutputTokens": 128000, "capabilities": [ "chat", "reasoning", @@ -25,12 +25,10 @@ "math", "code" ], - "inputPrice": 11, - "outputPrice": 11, - "defaultTemperature": 1, - "defaultTopP": 1, + "defaultTemperature": 1.2, + "defaultTopP": 0.95, "extra": { - "pricingNote": "讯飞按token包计费,不区分输入输出" + "pricingNote": "官方公开文档说明模型价格以控制台及实际购买页为准,未公开固定 token 单价" } }, { @@ -46,12 +44,10 @@ "code", "tool_use" ], - "inputPrice": 60, - "outputPrice": 60, "defaultTemperature": 1, "defaultTopP": 1, "extra": { - "pricingNote": "讯飞按token包计费,不区分输入输出" + "pricingNote": "官方公开文档说明模型价格以控制台及实际购买页为准,未公开固定 token 单价" } } ] diff --git a/compute/providers/zhipu-embedding.json b/compute/providers/zhipu-embedding.json index 76151c8..80fc203 100644 --- a/compute/providers/zhipu-embedding.json +++ b/compute/providers/zhipu-embedding.json @@ -17,16 +17,23 @@ { "modelName": "embedding-3", "displayName": "智谱 embedding-3", - "serviceType": ["embedding"], - "description": "智谱嵌入模型v3,支持自定义维度,单条最大3072 tokens。走 OpenAI 兼容 /embeddings 端点(Anthropic 协议不提供 embeddings)", - "contextWindow": 3072, + "serviceType": [ + "embedding" + ], + "description": "智谱嵌入模型v3,支持自定义维度;模型上下文窗口 8K,单条输入最多 3072 tokens。走 OpenAI 兼容 /embeddings 端点(Anthropic 协议不提供 embeddings)", + "contextWindow": 8192, "capabilities": [ "text_embedding", "semantic_search", - "rag" + "rag", + "custom_dimensions" ], "inputPrice": 0.5, - "extra": {} + "extra": { + "maxInputTokensPerItem": 3072, + "maxBatchItems": 64, + "vectorDimensions": "256-2048" + } } ] } diff --git a/compute/providers/zhipu.json b/compute/providers/zhipu.json index f0197a1..fdfeaa8 100644 --- a/compute/providers/zhipu.json +++ b/compute/providers/zhipu.json @@ -18,10 +18,72 @@ "embedding-3" ], "models": [ + { + "modelName": "glm-5.1", + "displayName": "GLM-5.1", + "serviceType": [ + "chat" + ], + "description": "智谱 GLM-5.1 最新旗舰模型,面向长程 Agentic Coding 与 Autonomous Agent 场景,200K 上下文,128K 最大输出", + "contextWindow": 200000, + "maxOutputTokens": 128000, + "capabilities": [ + "chat", + "reasoning", + "code", + "multilingual", + "deep_thinking", + "long_context", + "math", + "tool_use", + "agent" + ], + "inputPrice": 6, + "outputPrice": 24, + "defaultTemperature": 1, + "defaultTopP": 0.95, + "extra": { + "cacheHitPrice": 1.2, + "thinking": { + "default": "enabled" + } + } + }, + { + "modelName": "glm-5-turbo", + "displayName": "GLM-5-Turbo", + "serviceType": [ + "chat" + ], + "description": "智谱 GLM-5-Turbo,面向 OpenClaw 等长链路任务优化,200K 上下文,128K 最大输出", + "contextWindow": 200000, + "maxOutputTokens": 128000, + "capabilities": [ + "chat", + "reasoning", + "code", + "deep_thinking", + "long_context", + "tool_use", + "agent" + ], + "inputPrice": 5, + "outputPrice": 22, + "defaultTemperature": 1, + "defaultTopP": 0.95, + "extra": { + "cacheHitPrice": 1, + "thinking": { + "default": "enabled" + } + } + }, { "modelName": "glm-5", "displayName": "GLM-5", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "智谱GLM-5,744B参数旗舰基座模型,2026年2月发布,面向Agentic Engineering,编程能力接近Claude Opus", "contextWindow": 200000, "maxOutputTokens": 128000, @@ -38,14 +100,18 @@ ], "inputPrice": 4, "outputPrice": 18, - "defaultTemperature": 0.95, - "defaultTopP": 0.7, - "extra": {} + "defaultTemperature": 1, + "defaultTopP": 0.95, + "extra": { + "cacheHitPrice": 0.8 + } }, { "modelName": "glm-4.7", "displayName": "GLM-4.7", - "serviceType": ["chat"], + "serviceType": [ + "chat" + ], "description": "智谱GLM-4.7,355B参数MoE架构旗舰模型,200K上下文,编程能力对齐Claude Sonnet", "contextWindow": 200000, "maxOutputTokens": 128000, @@ -58,16 +124,20 @@ "long_context", "tool_use" ], - "inputPrice": 4, - "outputPrice": 16, - "defaultTemperature": 0.95, - "defaultTopP": 0.7, - "extra": {} + "inputPrice": 2, + "outputPrice": 8, + "defaultTemperature": 1, + "defaultTopP": 0.95, + "extra": { + "cacheHitPrice": 0.5 + } }, { "modelName": "glm-4.7-thinking", "displayName": "GLM-4.7 Thinking", - "serviceType": ["reasoning"], + "serviceType": [ + "reasoning" + ], "description": "智谱GLM-4.7深度思考模式,200K上下文,交错式/保留式/轮级思考", "contextWindow": 200000, "maxOutputTokens": 128000, @@ -78,19 +148,57 @@ "deep_thinking", "long_context" ], - "inputPrice": 8, - "outputPrice": 32, - "defaultTemperature": 0.95, - "defaultTopP": 0.7, - "extra": {} + "inputPrice": 2, + "outputPrice": 8, + "defaultTemperature": 1, + "defaultTopP": 0.95, + "extra": { + "cacheHitPrice": 0.5, + "aliasOf": "glm-4.7" + } + }, + { + "modelName": "glm-5v-turbo", + "displayName": "GLM-5V-Turbo", + "serviceType": [ + "vision" + ], + "description": "智谱首个多模态 Coding 基座模型,支持视频、图像、文本和文件输入,200K 上下文,128K 最大输出", + "contextWindow": 200000, + "maxOutputTokens": 128000, + "capabilities": [ + "chat", + "vision", + "video_understanding", + "image_understanding", + "file_understanding", + "reasoning", + "code", + "deep_thinking", + "long_context", + "tool_use", + "agent" + ], + "inputPrice": 5, + "outputPrice": 22, + "defaultTemperature": 1, + "defaultTopP": 0.95, + "extra": { + "cacheHitPrice": 1, + "thinking": { + "default": "enabled" + } + } }, { "modelName": "glm-4.6v", "displayName": "GLM-4.6V", - "serviceType": ["vision"], + "serviceType": [ + "vision" + ], "description": "智谱GLM-4.6V多模态版,106B/12B MoE,支持图像视频理解与工具调用,128K上下文", "contextWindow": 128000, - "maxOutputTokens": 8192, + "maxOutputTokens": 32768, "capabilities": [ "chat", "vision", @@ -99,28 +207,37 @@ "long_context", "tool_use" ], - "inputPrice": 8, - "outputPrice": 32, - "extra": {} + "inputPrice": 1, + "outputPrice": 4, + "defaultTemperature": 1, + "defaultTopP": 0.95, + "extra": { + "cacheHitPrice": 0.2 + } }, { "modelName": "glm-4.6", "displayName": "GLM-4.6", - "serviceType": ["chat"], - "description": "智谱GLM-4.6,增强推理能力,128K上下文", - "contextWindow": 128000, - "maxOutputTokens": 8192, + "serviceType": [ + "chat" + ], + "description": "智谱GLM-4.6,增强推理能力,200K上下文", + "contextWindow": 200000, + "maxOutputTokens": 128000, "capabilities": [ "chat", "reasoning", "code", "multilingual", - "deep_thinking", - "vision" + "deep_thinking" ], - "inputPrice": 5, - "outputPrice": 5, - "extra": {} + "inputPrice": 2, + "outputPrice": 8, + "defaultTemperature": 1, + "defaultTopP": 0.95, + "extra": { + "cacheHitPrice": 0.5 + } } ] } diff --git a/manifest.json b/manifest.json index 734e404..8477697 100644 --- a/manifest.json +++ b/manifest.json @@ -1,6 +1,6 @@ { "version": "1.0.0", - "presetDataVersion": 30, + "presetDataVersion": 31, "updatedAt": "2026-04-25", "description": "DesireCore 官方配置中心" }