修复算力模型采样默认值

2026-07-23 04:33:20 +08:00 · 2026-04-25 21:39:51 +08:00
parent 10465e3570
commit 380e99c577
24 changed files with 1413 additions and 508 deletions
--- a/compute/providers/anthropic.json
+++ b/compute/providers/anthropic.json
@@ -15,12 +15,14 @@
  ],
  "models": [
    {
-      "modelName": "claude-opus-4-6",
-      "displayName": "Claude Opus 4.6",
-      "serviceType": ["chat"],
-      "description": "Anthropic 最新旗舰 Opus 模型，最强能力",
-      "contextWindow": 200000,
-      "maxOutputTokens": 32768,
+      "modelName": "claude-opus-4-7",
+      "displayName": "Claude Opus 4.7",
+      "serviceType": [
+        "chat"
+      ],
+      "description": "Anthropic 当前最强通用模型，适合复杂推理和智能体编码任务",
+      "contextWindow": 1000000,
+      "maxOutputTokens": 128000,
      "capabilities": [
        "chat",
        "reasoning",
@@ -31,83 +33,73 @@
      "inputPrice": 5,
      "outputPrice": 25,
      "defaultTemperature": 1,
-      "extra": {}
+      "extra": {
+        "cachePricing": {
+          "write5m": 6.25,
+          "write1h": 10,
+          "read": 0.5
+        },
+        "pricingNotes": "Prices are per 1M tokens. Opus 4.7 includes the full 1M context window at standard pricing."
+      }
    },
    {
-      "modelName": "claude-opus-4-5",
-      "displayName": "Claude Opus 4.5",
-      "serviceType": ["chat"],
-      "description": "Anthropic Opus 4.5，顶级推理能力",
-      "contextWindow": 200000,
-      "maxOutputTokens": 32768,
-      "capabilities": [
+      "modelName": "claude-sonnet-4-6",
+      "displayName": "Claude Sonnet 4.6",
+      "serviceType": [
        "chat",
-        "reasoning",
-        "code",
-        "vision",
-        "tool_use"
+        "computer_use"
      ],
-      "inputPrice": 5,
-      "outputPrice": 25,
-      "defaultTemperature": 1,
-      "extra": {}
-    },
-    {
-      "modelName": "claude-sonnet-4-5",
-      "displayName": "Claude Sonnet 4.5",
-      "serviceType": ["chat"],
-      "description": "Anthropic Sonnet 4.5，高性能性价比",
-      "contextWindow": 200000,
+      "description": "Anthropic 高智能高速度模型，适合编码、工具使用和智能体任务",
+      "contextWindow": 1000000,
      "maxOutputTokens": 64000,
      "capabilities": [
        "chat",
        "reasoning",
        "code",
        "vision",
-        "tool_use"
-      ],
-      "inputPrice": 3,
-      "outputPrice": 15,
-      "defaultTemperature": 1,
-      "extra": {}
-    },
-    {
-      "modelName": "claude-sonnet-4",
-      "displayName": "Claude Sonnet 4",
-      "serviceType": ["chat"],
-      "description": "Anthropic Sonnet 4 旗舰模型",
-      "contextWindow": 200000,
-      "maxOutputTokens": 64000,
-      "capabilities": [
-        "chat",
-        "reasoning",
-        "code",
-        "vision",
-        "tool_use"
-      ],
-      "inputPrice": 3,
-      "outputPrice": 15,
-      "defaultTemperature": 1,
-      "extra": {}
-    },
-    {
-      "modelName": "claude-sonnet-4-5-computer",
-      "displayName": "Claude Sonnet 4.5 Computer Use",
-      "serviceType": ["computer_use"],
-      "description": "Anthropic 最佳 Computer Use 模型",
-      "contextWindow": 200000,
-      "maxOutputTokens": 64000,
-      "capabilities": [
-        "chat",
-        "vision",
-        "computer_use",
        "tool_use",
-        "reasoning"
+        "computer_use"
      ],
      "inputPrice": 3,
      "outputPrice": 15,
      "defaultTemperature": 1,
-      "extra": {}
+      "extra": {
+        "cachePricing": {
+          "write5m": 3.75,
+          "write1h": 6,
+          "read": 0.3
+        },
+        "pricingNotes": "Prices are per 1M tokens. Sonnet 4.6 includes the full 1M context window at standard pricing."
+      }
+    },
+    {
+      "modelName": "claude-haiku-4-5",
+      "displayName": "Claude Haiku 4.5",
+      "serviceType": [
+        "chat"
+      ],
+      "description": "Anthropic 最快模型，具备接近前沿模型的智能水平",
+      "contextWindow": 200000,
+      "maxOutputTokens": 64000,
+      "capabilities": [
+        "chat",
+        "reasoning",
+        "code",
+        "vision",
+        "tool_use"
+      ],
+      "inputPrice": 1,
+      "outputPrice": 5,
+      "defaultTemperature": 1,
+      "extra": {
+        "modelId": "claude-haiku-4-5-20251001",
+        "cachePricing": {
+          "write5m": 1.25,
+          "write1h": 2,
+          "read": 0.1
+        },
+        "pricingNotes": "Prices are per 1M tokens."
+      }
    }
  ]
 }
--- a/compute/providers/baichuan.json
+++ b/compute/providers/baichuan.json
@@ -16,10 +16,12 @@
    {
      "modelName": "Baichuan-M3-Plus",
      "displayName": "百川 M3-Plus",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "百川智能M3-Plus医疗增强模型，235B参数，幻觉率仅2.6%，API价格较前代下降70%",
-      "contextWindow": 192000,
-      "maxOutputTokens": 8192,
+      "contextWindow": 32000,
+      "maxOutputTokens": 32000,
      "capabilities": [
        "chat",
        "reasoning",
@@ -31,17 +33,19 @@
      ],
      "inputPrice": 5,
      "outputPrice": 9,
-      "defaultTemperature": 0.7,
-      "defaultTopP": 0.9,
+      "defaultTemperature": 0.3,
+      "defaultTopP": 0.85,
      "extra": {}
    },
    {
      "modelName": "Baichuan-M3",
      "displayName": "百川 M3",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "百川智能M3新一代开源医疗增强模型，235B参数",
-      "contextWindow": 192000,
-      "maxOutputTokens": 8192,
+      "contextWindow": 32000,
+      "maxOutputTokens": 32000,
      "capabilities": [
        "chat",
        "reasoning",
@@ -52,17 +56,19 @@
      ],
      "inputPrice": 10,
      "outputPrice": 30,
-      "defaultTemperature": 0.7,
-      "defaultTopP": 0.9,
+      "defaultTemperature": 0.3,
+      "defaultTopP": 0.85,
      "extra": {}
    },
    {
      "modelName": "Baichuan-M2-Plus",
      "displayName": "百川 M2-Plus",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "百川智能M2-Plus旗舰模型，最强推理和创作能力",
-      "contextWindow": 128000,
-      "maxOutputTokens": 8192,
+      "contextWindow": 32000,
+      "maxOutputTokens": 32000,
      "capabilities": [
        "chat",
        "reasoning",
@@ -72,17 +78,19 @@
      ],
      "inputPrice": 10,
      "outputPrice": 30,
-      "defaultTemperature": 0.7,
-      "defaultTopP": 0.9,
+      "defaultTemperature": 0.3,
+      "defaultTopP": 0.85,
      "extra": {}
    },
    {
      "modelName": "Baichuan-M2",
      "displayName": "百川 M2",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "百川智能M2模型，高性能通用模型",
-      "contextWindow": 128000,
-      "maxOutputTokens": 8192,
+      "contextWindow": 32000,
+      "maxOutputTokens": 32000,
      "capabilities": [
        "chat",
        "reasoning",
@@ -91,8 +99,8 @@
      ],
      "inputPrice": 2,
      "outputPrice": 20,
-      "defaultTemperature": 0.7,
-      "defaultTopP": 0.9,
+      "defaultTemperature": 0.3,
+      "defaultTopP": 0.85,
      "extra": {}
    }
  ]
--- a/compute/providers/baidu.json
+++ b/compute/providers/baidu.json
@@ -9,14 +9,20 @@
  "enabled": false,
  "status": "unconfigured",
  "priceCurrency": "CNY",
-  "services": ["chat", "reasoning"],
+  "services": [
+    "chat",
+    "reasoning"
+  ],
  "models": [
    {
      "modelName": "ernie-5.0-thinking-latest",
      "displayName": "文心 ERNIE 5.0",
-      "serviceType": ["chat", "reasoning"],
+      "serviceType": [
+        "chat",
+        "reasoning"
+      ],
      "description": "百度最新旗舰模型，支持深度推理、多模态和工具调用，119K输入/64K输出",
-      "contextWindow": 131072,
+      "contextWindow": 128000,
      "maxOutputTokens": 65536,
      "capabilities": [
        "chat",
@@ -25,19 +31,67 @@
        "multimodal",
        "tool_use"
      ],
-      "inputPrice": 8,
-      "outputPrice": 32,
-      "defaultTemperature": 1,
-      "defaultTopP": 1,
-      "extra": {}
+      "inputPrice": 6,
+      "outputPrice": 24,
+      "extra": {
+        "pricingTiers": [
+          {
+            "maxInputTokens": 32768,
+            "inputPrice": 6,
+            "outputPrice": 24
          },
          {
-      "modelName": "ernie-4.5-turbo-128k-latest",
-      "displayName": "文心 ERNIE 4.5 Turbo",
-      "serviceType": ["chat"],
-      "description": "百度高性价比长上下文模型，128K 窗口，适合日常对话和文档处理",
+            "maxInputTokens": 131072,
+            "inputPrice": 10,
+            "outputPrice": 40
+          }
+        ],
+        "thinkingMaxTokens": 60000
+      }
+    },
+    {
+      "modelName": "ernie-5.0",
+      "displayName": "文心 ERNIE 5.0 非思考",
+      "serviceType": [
+        "chat"
+      ],
+      "description": "百度 ERNIE 5.0 旗舰模型，128K 上下文，119K 最大输入，最大输出 65,536 token",
      "contextWindow": 131072,
-      "maxOutputTokens": 8192,
+      "maxOutputTokens": 65536,
+      "capabilities": [
+        "chat",
+        "vision",
+        "multimodal",
+        "tool_use"
+      ],
+      "inputPrice": 6,
+      "outputPrice": 24,
+      "defaultTemperature": 0.95,
+      "defaultTopP": 0.7,
+      "extra": {
+        "pricingTiers": [
+          {
+            "maxInputTokens": 32768,
+            "inputPrice": 6,
+            "outputPrice": 24
+          },
+          {
+            "maxInputTokens": 131072,
+            "inputPrice": 10,
+            "outputPrice": 40
+          }
+        ]
+      }
+    },
+    {
+      "modelName": "ernie-4.5-turbo-128k",
+      "displayName": "文心 ERNIE 4.5 Turbo",
+      "serviceType": [
+        "chat"
+      ],
+      "description": "百度高性价比长上下文模型，128K 窗口，最大输出 12,288 token",
+      "contextWindow": 131072,
+      "maxOutputTokens": 12288,
      "capabilities": [
        "chat",
        "code",
@@ -47,9 +101,57 @@
      ],
      "inputPrice": 0.8,
      "outputPrice": 3.2,
-      "defaultTemperature": 1,
+      "defaultTemperature": 0.8,
      "defaultTopP": 1,
-      "extra": {}
+      "extra": {
+        "cacheHitPrice": 0.2
+      }
+    },
+    {
+      "modelName": "ernie-4.5-turbo-20260402",
+      "displayName": "文心 ERNIE 4.5 Turbo 20260402",
+      "serviceType": [
+        "chat"
+      ],
+      "description": "百度 ERNIE 4.5 Turbo 20260402 快照，128K 上下文，最大输出 12,288 token",
+      "contextWindow": 131072,
+      "maxOutputTokens": 12288,
+      "capabilities": [
+        "chat",
+        "code",
+        "vision",
+        "long_context",
+        "fast"
+      ],
+      "inputPrice": 0.8,
+      "outputPrice": 3.2,
+      "defaultTemperature": 0.8,
+      "defaultTopP": 1,
+      "extra": {
+        "cacheHitPrice": 0.2
+      }
+    },
+    {
+      "modelName": "ernie-x1.1",
+      "displayName": "文心 ERNIE X1.1",
+      "serviceType": [
+        "reasoning"
+      ],
+      "description": "百度 ERNIE X1.1 深度思考模型，64K 上下文，最大输出 65,536 token",
+      "contextWindow": 65536,
+      "maxOutputTokens": 65536,
+      "capabilities": [
+        "chat",
+        "reasoning",
+        "deep_thinking",
+        "math",
+        "code"
+      ],
+      "inputPrice": 1,
+      "outputPrice": 4,
+      "extra": {
+        "thinkingMaxTokens": 65536
+      }
    }
  ]
 }
--- a/compute/providers/cohere.json
+++ b/compute/providers/cohere.json
@@ -9,15 +9,21 @@
  "enabled": false,
  "status": "unconfigured",
  "priceCurrency": "USD",
-  "services": ["chat", "embedding", "rerank"],
+  "services": [
+    "chat",
+    "embedding",
+    "rerank"
+  ],
  "models": [
    {
      "modelName": "command-a-03-2025",
      "displayName": "Command A",
-      "serviceType": ["chat"],
-      "description": "Cohere 旗舰对话模型，262K 上下文，擅长 RAG 和工具调用",
-      "contextWindow": 262144,
-      "maxOutputTokens": 8192,
+      "serviceType": [
+        "chat"
+      ],
+      "description": "Cohere 旗舰对话模型，256K 上下文，擅长 RAG 和工具调用",
+      "contextWindow": 256000,
+      "maxOutputTokens": 8000,
      "capabilities": [
        "chat",
        "reasoning",
@@ -27,18 +33,46 @@
        "rag",
        "long_context"
      ],
-      "inputPrice": 2.50,
-      "outputPrice": 10.00,
-      "defaultTemperature": 1,
-      "defaultTopP": 1,
-      "extra": {}
+      "inputPrice": 2.5,
+      "outputPrice": 10.0,
+      "defaultTemperature": 0.3,
+      "defaultTopP": 0.75,
+      "extra": {
+        "pricingNotes": "Prices are per 1M tokens."
+      }
+    },
+    {
+      "modelName": "command-r7b-12-2024",
+      "displayName": "Command R7B",
+      "serviceType": [
+        "fast"
+      ],
+      "description": "Cohere 小型高速对话模型，适合高吞吐、低延迟场景",
+      "contextWindow": 128000,
+      "maxOutputTokens": 4000,
+      "capabilities": [
+        "chat",
+        "reasoning",
+        "tool_use",
+        "rag",
+        "fast"
+      ],
+      "inputPrice": 0.0375,
+      "outputPrice": 0.15,
+      "defaultTemperature": 0.3,
+      "defaultTopP": 0.75,
+      "extra": {
+        "pricingNotes": "Prices are per 1M tokens."
+      }
    },
    {
      "modelName": "embed-v4.0",
      "displayName": "Embed V4",
-      "serviceType": ["embedding"],
-      "description": "Cohere 最新 Embedding 模型，131K 上下文，多语言支持",
-      "contextWindow": 131072,
+      "serviceType": [
+        "embedding"
+      ],
+      "description": "Cohere 最新 Embedding 模型，128K 上下文，多语言支持",
+      "contextWindow": 128000,
      "maxOutputTokens": 0,
      "capabilities": [
        "text_embedding",
@@ -46,22 +80,35 @@
      ],
      "inputPrice": 0.12,
      "outputPrice": 0,
-      "extra": {}
+      "extra": {
+        "dimensions": [
+          256,
+          512,
+          1024,
+          1536
+        ],
+        "defaultDimension": 1536,
+        "pricingNotes": "Embedding models are priced by embedded tokens; price is per 1M tokens."
+      }
    },
    {
      "modelName": "rerank-v3.5",
      "displayName": "Rerank V3.5",
-      "serviceType": ["rerank"],
-      "description": "Cohere 语义重排序模型，用于检索结果精排",
-      "contextWindow": 0,
+      "serviceType": [
+        "rerank"
+      ],
+      "description": "Cohere 语义重排序模型，用于检索结果精排，默认单文档截断 4096 token",
+      "contextWindow": 4096,
      "maxOutputTokens": 0,
      "capabilities": [
        "rerank",
        "semantic_reranking"
      ],
-      "inputPrice": 2.00,
+      "inputPrice": 2.0,
      "outputPrice": 0,
-      "extra": {}
+      "extra": {
+        "pricingNotes": "Cohere Rerank is priced by search units, not input/output tokens; one search unit is one query with up to 100 documents. The token price fields are retained for schema compatibility."
+      }
    }
  ]
 }
--- a/compute/providers/dashscope.json
+++ b/compute/providers/dashscope.json
@@ -23,13 +23,97 @@
    "translation"
  ],
  "models": [
+    {
+      "modelName": "qwen3.6-plus",
+      "displayName": "阿里云 Qwen3.6-Plus",
+      "serviceType": [
+        "chat",
+        "vision"
+      ],
+      "description": "通义千问3.6 Plus，支持文本/图像/视频输入，100万上下文，支持内置工具和 Function Calling",
+      "contextWindow": 1000000,
+      "maxOutputTokens": 65536,
+      "capabilities": [
+        "chat",
+        "reasoning",
+        "code",
+        "multilingual",
+        "long_context",
+        "tool_use",
+        "agent",
+        "vision"
+      ],
+      "inputPrice": 2,
+      "outputPrice": 12,
+      "defaultTemperature": 0.6,
+      "defaultTopP": 0.95,
+      "extra": {
+        "pricingTiers": [
+          {
+            "maxInputTokens": 256000,
+            "inputPrice": 2,
+            "outputPrice": 12,
+            "thinkingOutputPrice": 12
+          },
+          {
+            "maxInputTokens": 1000000,
+            "inputPrice": 8,
+            "outputPrice": 48,
+            "thinkingOutputPrice": 48
+          }
+        ],
+        "thinkingMaxTokens": 81920
+      }
+    },
+    {
+      "modelName": "qwen3.6-flash",
+      "displayName": "阿里云 Qwen3.6-Flash",
+      "serviceType": [
+        "chat",
+        "vision"
+      ],
+      "description": "通义千问3.6 Flash，高性价比多模态模型，100万上下文，支持内置工具和 Function Calling",
+      "contextWindow": 1000000,
+      "maxOutputTokens": 65536,
+      "capabilities": [
+        "chat",
+        "reasoning",
+        "code",
+        "multilingual",
+        "long_context",
+        "tool_use",
+        "vision",
+        "fast"
+      ],
+      "inputPrice": 1.2,
+      "outputPrice": 7.2,
+      "defaultTemperature": 0.6,
+      "defaultTopP": 0.95,
+      "extra": {
+        "pricingTiers": [
+          {
+            "maxInputTokens": 256000,
+            "inputPrice": 1.2,
+            "outputPrice": 7.2
+          },
+          {
+            "maxInputTokens": 1000000,
+            "inputPrice": 4.8,
+            "outputPrice": 28.8
+          }
+        ],
+        "thinkingMaxTokens": 131072
+      }
+    },
    {
      "modelName": "qwen3.5-plus",
      "displayName": "阿里云 Qwen3.5-Plus",
-      "serviceType": ["chat"],
-      "description": "通义千问3.5旗舰模型，支持文本/图像/视频输入，效果媲美Qwen3 Max，性价比更优",
-      "contextWindow": 262144,
-      "maxOutputTokens": 16384,
+      "serviceType": [
+        "chat"
+      ],
+      "description": "通义千问3.5旗舰模型，支持文本/图像/视频输入，100万上下文，默认开启思考模式",
+      "contextWindow": 1000000,
+      "maxOutputTokens": 65536,
      "capabilities": [
        "chat",
        "reasoning",
@@ -42,16 +126,37 @@
      ],
      "inputPrice": 0.8,
      "outputPrice": 4.8,
-      "defaultTemperature": 0.7,
-      "defaultTopP": 0.8,
-      "extra": {}
+      "defaultTemperature": 0.6,
+      "defaultTopP": 0.95,
+      "extra": {
+        "pricingTiers": [
+          {
+            "maxInputTokens": 128000,
+            "inputPrice": 0.8,
+            "outputPrice": 4.8
+          },
+          {
+            "maxInputTokens": 256000,
+            "inputPrice": 2,
+            "outputPrice": 12
+          },
+          {
+            "maxInputTokens": 1000000,
+            "inputPrice": 4,
+            "outputPrice": 24
+          }
+        ],
+        "thinkingMaxTokens": 81920
+      }
    },
    {
      "modelName": "qwen-max",
      "displayName": "阿里云 qwen-max",
-      "serviceType": ["chat"],
-      "description": "通义千问Max，千亿参数旗舰模型，最强性能",
-      "contextWindow": 131072,
+      "serviceType": [
+        "chat"
+      ],
+      "description": "通义千问Max，千亿参数旗舰模型，32K 上下文",
+      "contextWindow": 32768,
      "maxOutputTokens": 8192,
      "capabilities": [
        "chat",
@@ -69,10 +174,12 @@
    {
      "modelName": "qwen-plus",
      "displayName": "阿里云 qwen-plus",
-      "serviceType": ["chat"],
-      "description": "通义千问Plus，支持思考模式，128K 上下文",
-      "contextWindow": 131072,
-      "maxOutputTokens": 8192,
+      "serviceType": [
+        "chat"
+      ],
+      "description": "通义千问Plus，支持思考模式，100万上下文",
+      "contextWindow": 1000000,
+      "maxOutputTokens": 32768,
      "capabilities": [
        "chat",
        "reasoning",
@@ -81,36 +188,66 @@
      ],
      "inputPrice": 0.8,
      "outputPrice": 2,
-      "defaultTemperature": 0.6,
+      "defaultTemperature": 0.7,
      "defaultTopP": 0.8,
-      "extra": {}
+      "extra": {
+        "pricingTiers": [
+          {
+            "maxInputTokens": 128000,
+            "inputPrice": 0.8,
+            "outputPrice": 2,
+            "thinkingOutputPrice": 8
+          },
+          {
+            "maxInputTokens": 256000,
+            "inputPrice": 2.4,
+            "outputPrice": 20,
+            "thinkingOutputPrice": 24
+          },
+          {
+            "maxInputTokens": 1000000,
+            "inputPrice": 4.8,
+            "outputPrice": 48,
+            "thinkingOutputPrice": 64
+          }
+        ],
+        "thinkingMaxTokens": 81920
+      }
    },
    {
      "modelName": "qwen-turbo",
      "displayName": "阿里云 qwen-turbo",
-      "serviceType": ["chat"],
-      "description": "通义千问Turbo，支持100万上下文，高性价比",
+      "serviceType": [
+        "chat"
+      ],
+      "description": "通义千问Turbo，非思考模式支持100万上下文，思考模式支持131K上下文",
      "contextWindow": 1000000,
-      "maxOutputTokens": 8192,
+      "maxOutputTokens": 16384,
      "capabilities": [
        "chat",
        "code",
        "long_context",
        "fast"
      ],
-      "inputPrice": 0.3,
-      "outputPrice": 0.6,
+      "inputPrice": 0.367,
+      "outputPrice": 1.468,
      "defaultTemperature": 0.7,
      "defaultTopP": 0.8,
-      "extra": {}
+      "extra": {
+        "thinkingContextWindow": 131072,
+        "thinkingOutputPrice": 3.67,
+        "thinkingMaxTokens": 38912
+      }
    },
    {
      "modelName": "qwen-long",
      "displayName": "阿里云 qwen-long",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "通义千问Long，支持1000万上下文，适合超长文档处理，降价97%",
      "contextWindow": 10000000,
-      "maxOutputTokens": 8192,
+      "maxOutputTokens": 32768,
      "capabilities": [
        "chat",
        "ultra_long_context",
@@ -118,14 +255,16 @@
      ],
      "inputPrice": 0.5,
      "outputPrice": 2,
-      "defaultTemperature": 0.7,
+      "defaultTemperature": 1,
      "defaultTopP": 0.8,
      "extra": {}
    },
    {
      "modelName": "qwen3-max",
      "displayName": "阿里云 Qwen3-Max",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "通义千问3代旗舰模型，262K 上下文，支持思考模式",
      "contextWindow": 262144,
      "maxOutputTokens": 65536,
@@ -148,10 +287,12 @@
    {
      "modelName": "qwen3-vl-plus",
      "displayName": "阿里云 Qwen3-VL-Plus",
-      "serviceType": ["vision"],
+      "serviceType": [
+        "vision"
+      ],
      "description": "通义千问3代视觉模型Plus版，支持图像理解",
-      "contextWindow": 131072,
-      "maxOutputTokens": 8192,
+      "contextWindow": 262144,
+      "maxOutputTokens": 32768,
      "capabilities": [
        "chat",
        "vision",
@@ -161,15 +302,19 @@
      ],
      "inputPrice": 1.5,
      "outputPrice": 6,
+      "defaultTemperature": 0.7,
+      "defaultTopP": 0.8,
      "extra": {}
    },
    {
      "modelName": "qwen3-vl-flash",
      "displayName": "阿里云 Qwen3-VL-Flash",
-      "serviceType": ["vision"],
+      "serviceType": [
+        "vision"
+      ],
      "description": "通义千问3代视觉模型Flash版，高性价比",
-      "contextWindow": 131072,
-      "maxOutputTokens": 8192,
+      "contextWindow": 262144,
+      "maxOutputTokens": 32768,
      "capabilities": [
        "chat",
        "vision",
@@ -178,13 +323,18 @@
      ],
      "inputPrice": 0.8,
      "outputPrice": 3,
+      "defaultTemperature": 0.7,
+      "defaultTopP": 0.8,
      "extra": {}
    },
    {
      "modelName": "text-embedding-v3",
      "displayName": "text-embedding-v3",
-      "serviceType": ["embedding"],
+      "serviceType": [
+        "embedding"
+      ],
      "description": "阿里云文本向量化",
+      "contextWindow": 8192,
      "capabilities": [
        "text_embedding",
        "semantic_search",
@@ -196,8 +346,11 @@
    {
      "modelName": "text-embedding-v4",
      "displayName": "阿里云 text-embedding-v4",
-      "serviceType": ["embedding"],
+      "serviceType": [
+        "embedding"
+      ],
      "description": "阿里云百炼文本向量模型v4，支持多维度与多语种",
+      "contextWindow": 8192,
      "capabilities": [
        "text_embedding",
        "semantic_search",
@@ -209,8 +362,11 @@
    {
      "modelName": "qwen3-rerank",
      "displayName": "Qwen3-Rerank",
-      "serviceType": ["rerank"],
+      "serviceType": [
+        "rerank"
+      ],
      "description": "检索重排序，支持 100+ 语种",
+      "contextWindow": 120000,
      "capabilities": [
        "rerank",
        "semantic_reranking",
@@ -222,7 +378,9 @@
    {
      "modelName": "cosyvoice-v2",
      "displayName": "CosyVoice V2",
-      "serviceType": ["tts"],
+      "serviceType": [
+        "tts"
+      ],
      "description": "通义语音合成，支持中英文",
      "capabilities": [
        "tts",
@@ -246,7 +404,9 @@
    {
      "modelName": "paraformer-v2",
      "displayName": "Paraformer V2",
-      "serviceType": ["asr"],
+      "serviceType": [
+        "asr"
+      ],
      "description": "通义语音识别",
      "capabilities": [
        "asr",
@@ -266,7 +426,9 @@
    {
      "modelName": "wanx-v2",
      "displayName": "通义万相 V2",
-      "serviceType": ["image_gen"],
+      "serviceType": [
+        "image_gen"
+      ],
      "description": "通义万相文生图",
      "capabilities": [
        "image_generation",
@@ -289,7 +451,9 @@
    {
      "modelName": "wanx-video",
      "displayName": "通义万相视频",
-      "serviceType": ["video_gen"],
+      "serviceType": [
+        "video_gen"
+      ],
      "description": "通义万相文生视频",
      "capabilities": [
        "video_generation",
@@ -306,7 +470,9 @@
    {
      "modelName": "cosyvoice-clone",
      "displayName": "CosyVoice 声音复刻",
-      "serviceType": ["voice_clone"],
+      "serviceType": [
+        "voice_clone"
+      ],
      "description": "通义声音复刻，少量样本即可克隆",
      "capabilities": [
        "voice_clone",
@@ -321,9 +487,12 @@
    {
      "modelName": "qwen-omni-turbo",
      "displayName": "Qwen-Omni-Turbo",
-      "serviceType": ["omni"],
+      "serviceType": [
+        "omni"
+      ],
      "description": "通义全能模型，支持音视频文本多模态",
      "contextWindow": 32768,
+      "maxOutputTokens": 2048,
      "capabilities": [
        "chat",
        "vision",
@@ -332,19 +501,26 @@
        "multimodal",
        "tool_use"
      ],
+      "defaultTemperature": 0.7,
+      "defaultTopP": 0.01,
      "extra": {}
    },
    {
      "modelName": "qwen3-max-trans",
      "displayName": "Qwen3-Max（翻译）",
-      "serviceType": ["translation"],
+      "serviceType": [
+        "translation"
+      ],
      "description": "通义千问翻译，中英日韩等多语种互译",
      "contextWindow": 131072,
+      "maxOutputTokens": 8192,
      "capabilities": [
        "translation",
        "multilingual",
        "chinese_optimized"
      ],
+      "defaultTemperature": 0.65,
+      "defaultTopP": 0.8,
      "extra": {}
    }
  ]
--- a/compute/providers/deepseek.json
+++ b/compute/providers/deepseek.json
@@ -17,10 +17,12 @@
    {
      "modelName": "deepseek-chat",
      "displayName": "DeepSeek V3.2",
-      "serviceType": ["chat"],
-      "description": "高性价比通用对话模型，64K 上下文",
-      "contextWindow": 64000,
-      "maxOutputTokens": 8000,
+      "serviceType": [
+        "chat"
+      ],
+      "description": "高性价比通用对话模型，128K 上下文",
+      "contextWindow": 128000,
+      "maxOutputTokens": 8192,
      "capabilities": [
        "chat",
        "code",
@@ -28,18 +30,22 @@
        "multilingual"
      ],
      "inputPrice": 2,
-      "outputPrice": 8,
+      "outputPrice": 3,
      "defaultTemperature": 1,
      "defaultTopP": 1,
-      "extra": {}
+      "extra": {
+        "cacheHitPrice": 0.2
+      }
    },
    {
      "modelName": "deepseek-reasoner",
      "displayName": "DeepSeek R1",
-      "serviceType": ["reasoning"],
-      "description": "DeepSeek R1 推理模型，64K 上下文，深度推理能力，思维链最长 32K",
-      "contextWindow": 64000,
-      "maxOutputTokens": 8192,
+      "serviceType": [
+        "reasoning"
+      ],
+      "description": "DeepSeek V3.2 思考模式，128K 上下文，思维链默认 32K，最大输出 64K",
+      "contextWindow": 128000,
+      "maxOutputTokens": 65536,
      "capabilities": [
        "chat",
        "reasoning",
@@ -49,9 +55,9 @@
      ],
      "inputPrice": 4,
      "outputPrice": 16,
-      "defaultTemperature": 1,
-      "defaultTopP": 1,
-      "extra": {}
+      "extra": {
+        "cacheHitPrice": 0.2
+      }
    }
  ]
 }
--- a/compute/providers/google.json
+++ b/compute/providers/google.json
@@ -15,12 +15,106 @@
    "embedding"
  ],
  "models": [
+    {
+      "modelName": "gemini-3.1-pro-preview",
+      "displayName": "Gemini 3.1 Pro Preview",
+      "serviceType": [
+        "chat"
+      ],
+      "description": "Google Gemini 3.1 Pro Preview，面向复杂推理、编码和智能体工作流",
+      "contextWindow": 1048576,
+      "maxOutputTokens": 65536,
+      "capabilities": [
+        "chat",
+        "reasoning",
+        "code",
+        "vision",
+        "ultra_long_context",
+        "tool_use"
+      ],
+      "inputPrice": 2.0,
+      "outputPrice": 12.0,
+      "defaultTemperature": 1,
+      "defaultTopP": 0.95,
+      "extra": {
+        "pricingNotes": "Standard paid tier text/image/video price for prompts <= 200K tokens; prompts > 200K are $4 input and $18 output per 1M tokens.",
+        "cachePricing": {
+          "inputCacheRead": 0.2,
+          "inputCacheReadOver200k": 0.4,
+          "storagePerMillionTokensPerHour": 4.5
+        }
+      }
+    },
+    {
+      "modelName": "gemini-3-flash-preview",
+      "displayName": "Gemini 3 Flash Preview",
+      "serviceType": [
+        "chat"
+      ],
+      "description": "Google Gemini 3 Flash Preview，高速多模态推理模型",
+      "contextWindow": 1048576,
+      "maxOutputTokens": 65536,
+      "capabilities": [
+        "chat",
+        "reasoning",
+        "code",
+        "vision",
+        "ultra_long_context",
+        "tool_use",
+        "fast"
+      ],
+      "inputPrice": 0.5,
+      "outputPrice": 3.0,
+      "defaultTemperature": 1,
+      "defaultTopP": 0.95,
+      "extra": {
+        "pricingNotes": "Standard paid tier text/image/video price; audio input is $1.00 per 1M tokens.",
+        "cachePricing": {
+          "inputCacheRead": 0.05,
+          "audioInputCacheRead": 0.1,
+          "storagePerMillionTokensPerHour": 1.0
+        }
+      }
+    },
+    {
+      "modelName": "gemini-3.1-flash-lite-preview",
+      "displayName": "Gemini 3.1 Flash-Lite Preview",
+      "serviceType": [
+        "fast"
+      ],
+      "description": "Google Gemini 3.1 Flash-Lite Preview，低成本高吞吐多模态模型",
+      "contextWindow": 1048576,
+      "maxOutputTokens": 65536,
+      "capabilities": [
+        "chat",
+        "reasoning",
+        "code",
+        "vision",
+        "ultra_long_context",
+        "tool_use",
+        "fast"
+      ],
+      "inputPrice": 0.25,
+      "outputPrice": 1.5,
+      "defaultTemperature": 1,
+      "defaultTopP": 0.95,
+      "extra": {
+        "pricingNotes": "Standard paid tier text/image/video price; audio input is $0.50 per 1M tokens.",
+        "cachePricing": {
+          "inputCacheRead": 0.025,
+          "audioInputCacheRead": 0.05,
+          "storagePerMillionTokensPerHour": 1.0
+        }
+      }
+    },
    {
      "modelName": "gemini-2.5-pro",
      "displayName": "Gemini 2.5 Pro",
-      "serviceType": ["chat"],
-      "description": "Google 最新旗舰模型，100万上下文，多模态",
-      "contextWindow": 1000000,
+      "serviceType": [
+        "chat"
+      ],
+      "description": "Google 最新旗舰模型，1,048,576 token 上下文，多模态",
+      "contextWindow": 1048576,
      "maxOutputTokens": 65536,
      "capabilities": [
        "chat",
@@ -39,9 +133,11 @@
    {
      "modelName": "gemini-2.5-flash",
      "displayName": "Gemini 2.5 Flash",
-      "serviceType": ["chat"],
-      "description": "Google 高性价比模型，100万上下文，速度快",
-      "contextWindow": 1000000,
+      "serviceType": [
+        "chat"
+      ],
+      "description": "Google 高性价比模型，1,048,576 token 上下文，速度快",
+      "contextWindow": 1048576,
      "maxOutputTokens": 65536,
      "capabilities": [
        "chat",
@@ -52,8 +148,8 @@
        "tool_use",
        "fast"
      ],
-      "inputPrice": 0.30,
-      "outputPrice": 2.50,
+      "inputPrice": 0.3,
+      "outputPrice": 2.5,
      "defaultTemperature": 1,
      "defaultTopP": 0.95,
      "extra": {}
@@ -61,13 +157,16 @@
    {
      "modelName": "text-embedding-005",
      "displayName": "Text Embedding 005",
-      "serviceType": ["embedding"],
+      "serviceType": [
+        "embedding"
+      ],
      "description": "Google 文本嵌入模型，768维度",
+      "contextWindow": 2048,
      "capabilities": [
        "text_embedding",
        "semantic_search"
      ],
-      "inputPrice": 0.10,
+      "inputPrice": 0.1,
      "extra": {}
    }
  ]
--- a/compute/providers/internal-testing.json
+++ b/compute/providers/internal-testing.json
@@ -23,7 +23,7 @@
        "chat"
      ],
      "description": "MiniMax M2.7 高速版，低延迟吞吐优化，200K 上下文",
-      "contextWindow": 200000,
+      "contextWindow": 204800,
      "maxOutputTokens": 8192,
      "capabilities": [
        "chat",
@@ -33,6 +33,7 @@
      "inputPrice": 0,
      "outputPrice": 0,
      "defaultTemperature": 1,
+      "defaultTopP": 0.95,
      "extra": {}
    },
    {
@@ -41,9 +42,9 @@
      "serviceType": [
        "chat"
      ],
-      "description": "智谱 GLM-5.1 新一代旗舰模型，编程与推理能力断档领先，204K 上下文",
-      "contextWindow": 204800,
-      "maxOutputTokens": 131072,
+      "description": "智谱 GLM-5.1 新一代旗舰模型，编程与推理能力断档领先，200K 上下文",
+      "contextWindow": 200000,
+      "maxOutputTokens": 128000,
      "capabilities": [
        "chat",
        "reasoning",
@@ -54,7 +55,8 @@
      ],
      "inputPrice": 0,
      "outputPrice": 0,
-      "defaultTemperature": 0.95,
+      "defaultTemperature": 1,
+      "defaultTopP": 0.95,
      "extra": {}
    },
    {
@@ -63,9 +65,9 @@
      "serviceType": [
        "chat"
      ],
-      "description": "智谱 GLM-5 大语言模型",
-      "contextWindow": 128000,
-      "maxOutputTokens": 8192,
+      "description": "智谱 GLM-5 大语言模型，200K 上下文",
+      "contextWindow": 200000,
+      "maxOutputTokens": 128000,
      "capabilities": [
        "chat",
        "reasoning",
@@ -74,6 +76,7 @@
      "inputPrice": 0,
      "outputPrice": 0,
      "defaultTemperature": 1,
+      "defaultTopP": 0.95,
      "extra": {}
    },
    {
@@ -83,8 +86,8 @@
        "chat"
      ],
      "description": "智谱 GLM-5-Turbo 大语言模型，更快的推理速度",
-      "contextWindow": 128000,
-      "maxOutputTokens": 8192,
+      "contextWindow": 200000,
+      "maxOutputTokens": 128000,
      "capabilities": [
        "chat",
        "reasoning",
@@ -93,6 +96,7 @@
      "inputPrice": 0,
      "outputPrice": 0,
      "defaultTemperature": 1,
+      "defaultTopP": 0.95,
      "extra": {}
    },
    {
@@ -112,6 +116,7 @@
      "inputPrice": 0,
      "outputPrice": 0,
      "defaultTemperature": 1,
+      "defaultTopP": 0.95,
      "extra": {}
    },
    {
@@ -122,7 +127,7 @@
      ],
      "description": "月之暗面 Kimi K2.6 Code Preview，万亿参数 MoE 架构（32B 激活），256K 上下文，代码与智能体能力增强",
      "contextWindow": 256000,
-      "maxOutputTokens": 16384,
+      "maxOutputTokens": 32768,
      "capabilities": [
        "chat",
        "reasoning",
@@ -134,7 +139,8 @@
      ],
      "inputPrice": 0,
      "outputPrice": 0,
-      "defaultTemperature": 0.7,
+      "defaultTemperature": 1,
+      "defaultTopP": 0.95,
      "extra": {}
    },
    {
@@ -144,8 +150,8 @@
        "chat"
      ],
      "description": "月之暗面 Kimi-2.5 大语言模型",
-      "contextWindow": 128000,
-      "maxOutputTokens": 8192,
+      "contextWindow": 256000,
+      "maxOutputTokens": 32768,
      "capabilities": [
        "chat",
        "reasoning",
@@ -158,6 +164,7 @@
      "inputPrice": 0,
      "outputPrice": 0,
      "defaultTemperature": 1,
+      "defaultTopP": 0.95,
      "extra": {}
    },
    {
@@ -167,7 +174,7 @@
        "chat"
      ],
      "description": "MiniMax M2.5 大语言模型",
-      "contextWindow": 128000,
+      "contextWindow": 204800,
      "maxOutputTokens": 8192,
      "capabilities": [
        "chat",
@@ -179,6 +186,7 @@
      "inputPrice": 0,
      "outputPrice": 0,
      "defaultTemperature": 1,
+      "defaultTopP": 0.95,
      "extra": {}
    },
    {
@@ -202,6 +210,7 @@
      "inputPrice": 0,
      "outputPrice": 0,
      "defaultTemperature": 0.7,
+      "defaultTopP": 0.8,
      "extra": {}
    },
    {
@@ -211,8 +220,8 @@
        "chat"
      ],
      "description": "阿里通义千问 Qwen3.5 Plus",
-      "contextWindow": 128000,
-      "maxOutputTokens": 8192,
+      "contextWindow": 1000000,
+      "maxOutputTokens": 65536,
      "capabilities": [
        "chat",
        "reasoning",
@@ -224,7 +233,8 @@
      ],
      "inputPrice": 0,
      "outputPrice": 0,
-      "defaultTemperature": 1,
+      "defaultTemperature": 0.7,
+      "defaultTopP": 0.8,
      "extra": {}
    },
    {
@@ -246,6 +256,7 @@
      "inputPrice": 0,
      "outputPrice": 0,
      "defaultTemperature": 0.7,
+      "defaultTopP": 0.8,
      "extra": {}
    },
    {
@@ -267,6 +278,7 @@
      "inputPrice": 0,
      "outputPrice": 0,
      "defaultTemperature": 0.7,
+      "defaultTopP": 0.8,
      "extra": {}
    },
    {
@@ -276,8 +288,8 @@
        "chat"
      ],
      "description": "阿里通义千问 Qwen3 Max (2026-01-23)",
-      "contextWindow": 128000,
-      "maxOutputTokens": 8192,
+      "contextWindow": 262144,
+      "maxOutputTokens": 32768,
      "capabilities": [
        "chat",
        "reasoning",
@@ -289,7 +301,8 @@
      ],
      "inputPrice": 0,
      "outputPrice": 0,
-      "defaultTemperature": 1,
+      "defaultTemperature": 0.7,
+      "defaultTopP": 0.8,
      "extra": {}
    },
    {
@@ -299,8 +312,8 @@
        "chat"
      ],
      "description": "字节跳动豆包 Seed 2.0 Code Preview (260215)",
-      "contextWindow": 128000,
-      "maxOutputTokens": 8192,
+      "contextWindow": 256000,
+      "maxOutputTokens": 128000,
      "capabilities": [
        "chat",
        "reasoning",
@@ -309,6 +322,7 @@
      "inputPrice": 0,
      "outputPrice": 0,
      "defaultTemperature": 1,
+      "defaultTopP": 0.7,
      "extra": {}
    }
  ]
--- a/compute/providers/lingyiwanwu.json
+++ b/compute/providers/lingyiwanwu.json
@@ -17,7 +17,9 @@
    {
      "modelName": "yi-lightning",
      "displayName": "Yi Lightning",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "零一万物高性能模型，16K上下文，智能路由，极高性价比",
      "contextWindow": 16000,
      "maxOutputTokens": 4096,
@@ -30,14 +32,16 @@
      ],
      "inputPrice": 0.99,
      "outputPrice": 0.99,
-      "defaultTemperature": 0.7,
+      "defaultTemperature": 0.3,
      "defaultTopP": 0.9,
      "extra": {}
    },
    {
      "modelName": "yi-vision-v2",
      "displayName": "Yi Vision V2",
-      "serviceType": ["vision"],
+      "serviceType": [
+        "vision"
+      ],
      "description": "零一万物复杂视觉任务模型，16K上下文，支持多图分析",
      "contextWindow": 16000,
      "maxOutputTokens": 4096,
@@ -51,7 +55,7 @@
      ],
      "inputPrice": 6,
      "outputPrice": 6,
-      "defaultTemperature": 0.7,
+      "defaultTemperature": 0.3,
      "defaultTopP": 0.9,
      "extra": {}
    }
--- a/compute/providers/minimax.json
+++ b/compute/providers/minimax.json
@@ -9,12 +9,19 @@
  "enabled": false,
  "status": "unconfigured",
  "priceCurrency": "CNY",
-  "services": ["chat", "fast", "reasoning"],
+  "services": [
+    "chat",
+    "fast",
+    "reasoning"
+  ],
  "models": [
    {
      "modelName": "MiniMax-M2.7",
      "displayName": "MiniMax M2.7",
-      "serviceType": ["chat", "reasoning"],
+      "serviceType": [
+        "chat",
+        "reasoning"
+      ],
      "description": "MiniMax 新一代旗舰模型，擅长复杂 Agent、软件工程与专业办公任务",
      "contextWindow": 204800,
      "maxOutputTokens": 131072,
@@ -24,14 +31,22 @@
        "code",
        "tool_use"
      ],
+      "inputPrice": 2.1,
+      "outputPrice": 8.4,
      "defaultTemperature": 1,
-      "defaultTopP": 1,
-      "extra": {}
+      "defaultTopP": 0.95,
+      "extra": {
+        "cacheReadPrice": 0.42,
+        "cacheWritePrice": 2.625,
+        "outputSpeedTps": 60
+      }
    },
    {
      "modelName": "MiniMax-M2.7-highspeed",
      "displayName": "MiniMax M2.7 高速版",
-      "serviceType": ["fast"],
+      "serviceType": [
+        "fast"
+      ],
      "description": "MiniMax M2.7 极速版，效果一致，输出更快，适合低延迟场景",
      "contextWindow": 204800,
      "maxOutputTokens": 131072,
@@ -42,16 +57,25 @@
        "tool_use",
        "fast"
      ],
+      "inputPrice": 4.2,
+      "outputPrice": 16.8,
      "defaultTemperature": 1,
-      "defaultTopP": 1,
-      "extra": {}
+      "defaultTopP": 0.95,
+      "extra": {
+        "cacheReadPrice": 0.42,
+        "cacheWritePrice": 2.625,
+        "outputSpeedTps": 100
+      }
    },
    {
      "modelName": "MiniMax-M2.5",
      "displayName": "MiniMax M2.5",
-      "serviceType": ["chat", "reasoning"],
-      "description": "MiniMax 最新旗舰模型，230B参数(10B激活)MoE架构，百万级上下文，支持深度推理和工具调用",
-      "contextWindow": 1000000,
+      "serviceType": [
+        "chat",
+        "reasoning"
+      ],
+      "description": "MiniMax M2.5，204.8K 上下文，支持深度推理和工具调用",
+      "contextWindow": 204800,
      "maxOutputTokens": 131072,
      "capabilities": [
        "chat",
@@ -63,15 +87,21 @@
      "inputPrice": 2.1,
      "outputPrice": 8.4,
      "defaultTemperature": 1,
-      "defaultTopP": 1,
-      "extra": {}
+      "defaultTopP": 0.95,
+      "extra": {
+        "cacheReadPrice": 0.21,
+        "cacheWritePrice": 2.625,
+        "outputSpeedTps": 60
+      }
    },
    {
      "modelName": "MiniMax-M2.5-highspeed",
      "displayName": "MiniMax M2.5 高速版",
-      "serviceType": ["fast"],
+      "serviceType": [
+        "fast"
+      ],
      "description": "MiniMax M2.5 高速推理版本(100TPS)，适合低延迟场景",
-      "contextWindow": 200000,
+      "contextWindow": 204800,
      "maxOutputTokens": 131072,
      "capabilities": [
        "chat",
@@ -80,16 +110,22 @@
        "tool_use",
        "fast"
      ],
-      "inputPrice": 2.1,
+      "inputPrice": 4.2,
      "outputPrice": 16.8,
      "defaultTemperature": 1,
-      "defaultTopP": 1,
-      "extra": {}
+      "defaultTopP": 0.95,
+      "extra": {
+        "cacheReadPrice": 0.21,
+        "cacheWritePrice": 2.625,
+        "outputSpeedTps": 100
+      }
    },
    {
      "modelName": "MiniMax-M2.1",
      "displayName": "MiniMax M2.1",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "MiniMax 上一代主力模型，200K 上下文，性价比优秀",
      "contextWindow": 204800,
      "maxOutputTokens": 131072,
@@ -102,13 +138,19 @@
      "inputPrice": 2.1,
      "outputPrice": 8.4,
      "defaultTemperature": 1,
-      "defaultTopP": 1,
-      "extra": {}
+      "defaultTopP": 0.95,
+      "extra": {
+        "cacheReadPrice": 0.21,
+        "cacheWritePrice": 2.625,
+        "outputSpeedTps": 60
+      }
    },
    {
      "modelName": "MiniMax-M2.1-highspeed",
      "displayName": "MiniMax M2.1 高速版",
-      "serviceType": ["fast"],
+      "serviceType": [
+        "fast"
+      ],
      "description": "MiniMax M2.1 高速推理版本，适合低延迟场景",
      "contextWindow": 204800,
      "maxOutputTokens": 131072,
@@ -119,16 +161,22 @@
        "tool_use",
        "fast"
      ],
-      "inputPrice": 2.1,
+      "inputPrice": 4.2,
      "outputPrice": 16.8,
      "defaultTemperature": 1,
-      "defaultTopP": 1,
-      "extra": {}
+      "defaultTopP": 0.95,
+      "extra": {
+        "cacheReadPrice": 0.21,
+        "cacheWritePrice": 2.625,
+        "outputSpeedTps": 100
+      }
    },
    {
      "modelName": "MiniMax-Text-01",
      "displayName": "MiniMax Text 01",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "MiniMax 百万级长上下文文本模型，擅长长文档处理",
      "contextWindow": 1000000,
      "maxOutputTokens": 131072,
@@ -140,8 +188,8 @@
      ],
      "inputPrice": 1.4,
      "outputPrice": 7.7,
-      "defaultTemperature": 1,
-      "defaultTopP": 1,
+      "defaultTemperature": 0.1,
+      "defaultTopP": 0.95,
      "extra": {}
    }
  ]
--- a/compute/providers/mistral.json
+++ b/compute/providers/mistral.json
@@ -9,12 +9,17 @@
  "enabled": false,
  "status": "unconfigured",
  "priceCurrency": "USD",
-  "services": ["chat", "fast"],
+  "services": [
+    "chat",
+    "fast"
+  ],
  "models": [
    {
      "modelName": "mistral-large-latest",
      "displayName": "Mistral Large 3",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "Mistral 旗舰模型，256K 上下文，支持视觉和工具调用",
      "contextWindow": 256000,
      "maxOutputTokens": 262144,
@@ -26,18 +31,20 @@
        "tool_use",
        "long_context"
      ],
-      "inputPrice": 0.50,
-      "outputPrice": 1.50,
-      "defaultTemperature": 1,
+      "inputPrice": 0.5,
+      "outputPrice": 1.5,
+      "defaultTemperature": 0.7,
      "defaultTopP": 1,
      "extra": {}
    },
    {
      "modelName": "mistral-small-latest",
      "displayName": "Mistral Small 3.2",
-      "serviceType": ["fast"],
+      "serviceType": [
+        "fast"
+      ],
      "description": "Mistral 高效小模型，低延迟，适合快速推理场景",
-      "contextWindow": 130000,
+      "contextWindow": 128000,
      "maxOutputTokens": 8192,
      "capabilities": [
        "chat",
@@ -47,26 +54,28 @@
        "fast",
        "tool_use"
      ],
-      "inputPrice": 0.10,
-      "outputPrice": 0.30,
-      "defaultTemperature": 1,
+      "inputPrice": 0.1,
+      "outputPrice": 0.3,
+      "defaultTemperature": 0.7,
      "defaultTopP": 1,
      "extra": {}
    },
    {
      "modelName": "codestral-latest",
      "displayName": "Codestral",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "Mistral 专业代码模型，支持 Fill-in-the-Middle 补全",
-      "contextWindow": 256000,
+      "contextWindow": 128000,
      "maxOutputTokens": 32768,
      "capabilities": [
        "code",
        "fill_in_the_middle"
      ],
-      "inputPrice": 0.30,
-      "outputPrice": 0.90,
-      "defaultTemperature": 1,
+      "inputPrice": 0.3,
+      "outputPrice": 0.9,
+      "defaultTemperature": 0.7,
      "defaultTopP": 1,
      "extra": {}
    }
--- a/compute/providers/moonshot.json
+++ b/compute/providers/moonshot.json
@@ -14,13 +14,64 @@
    "reasoning"
  ],
  "models": [
+    {
+      "modelName": "kimi-k2.6",
+      "displayName": "Kimi K2.6",
+      "serviceType": [
+        "chat"
+      ],
+      "description": "月之暗面 Kimi K2.6，最新多模态模型，长程代码编写与 Agent 自主执行能力增强，256K 上下文，支持思考/非思考模式",
+      "contextWindow": 256000,
+      "maxOutputTokens": 32768,
+      "capabilities": [
+        "chat",
+        "reasoning",
+        "code",
+        "tool_use",
+        "agent",
+        "long_context",
+        "vision",
+        "video_understanding",
+        "image_understanding"
+      ],
+      "inputPrice": 6.5,
+      "outputPrice": 27,
+      "defaultTemperature": 1,
+      "defaultTopP": 0.95,
+      "extra": {
+        "thinking": {
+          "default": "enabled",
+          "disabledTemperature": 0.6
+        },
+        "cacheHitPrice": 1.1,
+        "supportedImageFormats": [
+          "png",
+          "jpeg",
+          "webp",
+          "gif"
+        ],
+        "supportedVideoFormats": [
+          "mp4",
+          "mpeg",
+          "mov",
+          "avi",
+          "x-flv",
+          "mpg",
+          "webm",
+          "wmv",
+          "3gpp"
+        ]
+      }
+    },
    {
      "modelName": "kimi-k2.5",
      "displayName": "Kimi K2.5",
-      "serviceType": ["chat"],
-      "description": "月之暗面Kimi K2.5，2026年1月发布的原生多模态模型，支持视觉编码和智能体集群",
+      "serviceType": [
+        "chat"
+      ],
+      "description": "月之暗面Kimi K2.5，原生多模态模型，256K 上下文，支持思考/非思考模式",
      "contextWindow": 256000,
-      "maxOutputTokens": 16384,
+      "maxOutputTokens": 32768,
      "capabilities": [
        "chat",
        "reasoning",
@@ -32,15 +83,40 @@
      ],
      "inputPrice": 4,
      "outputPrice": 21,
-      "defaultTemperature": 0.7,
-      "defaultTopP": 0.9,
-      "extra": {}
+      "defaultTemperature": 1,
+      "defaultTopP": 0.95,
+      "extra": {
+        "thinking": {
+          "default": "enabled",
+          "disabledTemperature": 0.6
+        },
+        "cacheHitPrice": 0.7,
+        "supportedImageFormats": [
+          "png",
+          "jpeg",
+          "webp",
+          "gif"
+        ],
+        "supportedVideoFormats": [
+          "mp4",
+          "mpeg",
+          "mov",
+          "avi",
+          "x-flv",
+          "mpg",
+          "webm",
+          "wmv",
+          "3gpp"
+        ]
+      }
    },
    {
      "modelName": "kimi-k2",
      "displayName": "Kimi K2",
-      "serviceType": ["chat"],
-      "description": "月之暗面Kimi K2，万亿参数MoE模型(320B激活)，256K上下文，专为智能体设计",
+      "serviceType": [
+        "chat"
+      ],
+      "description": "月之暗面Kimi K2，万亿参数MoE模型(32B激活)，256K上下文，专为智能体设计",
      "contextWindow": 256000,
      "maxOutputTokens": 8192,
      "capabilities": [
@@ -49,19 +125,22 @@
        "code",
        "tool_use",
        "agent",
-        "long_context",
-        "vision"
+        "long_context"
      ],
      "inputPrice": 4,
      "outputPrice": 16,
-      "defaultTemperature": 0.7,
-      "defaultTopP": 0.9,
-      "extra": {}
+      "defaultTemperature": 0.6,
+      "defaultTopP": 1,
+      "extra": {
+        "cacheHitPrice": 1
+      }
    },
    {
      "modelName": "kimi-k2-thinking",
      "displayName": "Kimi K2 思考版",
-      "serviceType": ["reasoning"],
+      "serviceType": [
+        "reasoning"
+      ],
      "description": "月之暗面Kimi K2思考版，256K上下文，深度推理能力",
      "contextWindow": 256000,
      "maxOutputTokens": 16384,
@@ -74,14 +153,18 @@
      ],
      "inputPrice": 4,
      "outputPrice": 16,
-      "defaultTemperature": 0.7,
-      "defaultTopP": 0.9,
-      "extra": {}
+      "defaultTemperature": 1,
+      "defaultTopP": 1,
+      "extra": {
+        "cacheHitPrice": 1
+      }
    },
    {
      "modelName": "moonshot-v1-8k",
      "displayName": "Moonshot V1 8K",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "月之暗面标准模型，8K上下文",
      "contextWindow": 8192,
      "maxOutputTokens": 4096,
@@ -91,14 +174,16 @@
      ],
      "inputPrice": 2,
      "outputPrice": 2,
-      "defaultTemperature": 0.7,
-      "defaultTopP": 0.9,
+      "defaultTemperature": 0,
+      "defaultTopP": 1,
      "extra": {}
    },
    {
      "modelName": "moonshot-v1-32k",
      "displayName": "Moonshot V1 32K",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "月之暗面标准模型，32K上下文",
      "contextWindow": 32768,
      "maxOutputTokens": 4096,
@@ -109,14 +194,16 @@
      ],
      "inputPrice": 5,
      "outputPrice": 5,
-      "defaultTemperature": 0.7,
-      "defaultTopP": 0.9,
+      "defaultTemperature": 0,
+      "defaultTopP": 1,
      "extra": {}
    },
    {
      "modelName": "moonshot-v1-128k",
      "displayName": "Moonshot V1 128K",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "月之暗面标准模型，128K上下文",
      "contextWindow": 131072,
      "maxOutputTokens": 4096,
@@ -127,8 +214,8 @@
      ],
      "inputPrice": 10,
      "outputPrice": 10,
-      "defaultTemperature": 0.7,
-      "defaultTopP": 0.9,
+      "defaultTemperature": 0,
+      "defaultTopP": 1,
      "extra": {}
    }
  ]
--- a/compute/providers/ollama.json
+++ b/compute/providers/ollama.json
@@ -19,11 +19,14 @@
      "serviceType": ["chat"],
      "description": "本地运行的 Llama 3.1 70B",
      "contextWindow": 131072,
+      "maxOutputTokens": 8192,
      "capabilities": [
        "chat",
        "code",
        "reasoning"
      ],
+      "defaultTemperature": 0.8,
+      "defaultTopP": 0.9,
      "extra": {}
    }
  ]
--- a/compute/providers/openai.json
+++ b/compute/providers/openai.json
@@ -25,7 +25,9 @@
    {
      "modelName": "gpt-5.2",
      "displayName": "GPT-5.2",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "OpenAI 最新旗舰模型，400K 上下文，编码和智能体任务优化",
      "contextWindow": 400000,
      "maxOutputTokens": 128000,
@@ -46,7 +48,9 @@
    {
      "modelName": "gpt-5.2-pro",
      "displayName": "GPT-5.2 Pro",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "OpenAI GPT-5.2 Pro 专业版，最高性能",
      "contextWindow": 400000,
      "maxOutputTokens": 128000,
@@ -67,7 +71,9 @@
    {
      "modelName": "gpt-5.1",
      "displayName": "GPT-5.1",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "OpenAI GPT-5.1，400K 上下文，高性能旗舰",
      "contextWindow": 400000,
      "maxOutputTokens": 128000,
@@ -88,7 +94,9 @@
    {
      "modelName": "gpt-5",
      "displayName": "GPT-5",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "OpenAI GPT-5 新一代旗舰模型，统一所有模型能力",
      "contextWindow": 400000,
      "maxOutputTokens": 128000,
@@ -109,10 +117,12 @@
    {
      "modelName": "gpt-5-pro",
      "displayName": "GPT-5 Pro",
-      "serviceType": ["chat"],
-      "description": "OpenAI GPT-5 Pro 专业版高性能模型",
+      "serviceType": [
+        "chat"
+      ],
+      "description": "OpenAI GPT-5 Pro 专业版高性能推理模型",
      "contextWindow": 400000,
-      "maxOutputTokens": 128000,
+      "maxOutputTokens": 272000,
      "capabilities": [
        "chat",
        "reasoning",
@@ -130,10 +140,12 @@
    {
      "modelName": "gpt-5-mini",
      "displayName": "GPT-5-mini",
-      "serviceType": ["chat"],
-      "description": "高性价比模型，适合日常对话",
-      "contextWindow": 128000,
-      "maxOutputTokens": 32768,
+      "serviceType": [
+        "chat"
+      ],
+      "description": "高性价比模型，适合日常对话，400K 上下文",
+      "contextWindow": 400000,
+      "maxOutputTokens": 128000,
      "capabilities": [
        "chat",
        "fast",
@@ -150,17 +162,19 @@
    {
      "modelName": "gpt-5-nano",
      "displayName": "GPT-5-nano",
-      "serviceType": ["fast"],
-      "description": "极速响应模型，适合简单任务",
-      "contextWindow": 128000,
-      "maxOutputTokens": 16384,
+      "serviceType": [
+        "fast"
+      ],
+      "description": "极速响应模型，适合简单任务，400K 上下文",
+      "contextWindow": 400000,
+      "maxOutputTokens": 128000,
      "capabilities": [
        "chat",
        "code",
        "fast"
      ],
-      "inputPrice": 0.02,
-      "outputPrice": 0.08,
+      "inputPrice": 0.05,
+      "outputPrice": 0.4,
      "defaultTemperature": 1,
      "defaultTopP": 1,
      "extra": {}
@@ -168,9 +182,11 @@
    {
      "modelName": "gpt-4.1",
      "displayName": "GPT-4.1",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "OpenAI GPT-4.1，支持 1M 上下文，编码和指令遵循能力大幅提升",
-      "contextWindow": 1000000,
+      "contextWindow": 1047576,
      "maxOutputTokens": 32768,
      "capabilities": [
        "chat",
@@ -180,8 +196,8 @@
        "ultra_long_context",
        "tool_use"
      ],
-      "inputPrice": 2.00,
-      "outputPrice": 8.00,
+      "inputPrice": 2,
+      "outputPrice": 8,
      "defaultTemperature": 1,
      "defaultTopP": 1,
      "extra": {}
@@ -189,9 +205,11 @@
    {
      "modelName": "gpt-4.1-mini",
      "displayName": "GPT-4.1 mini",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "OpenAI GPT-4.1 mini，1M 上下文，高性价比",
-      "contextWindow": 1000000,
+      "contextWindow": 1047576,
      "maxOutputTokens": 32768,
      "capabilities": [
        "chat",
@@ -200,8 +218,8 @@
        "ultra_long_context",
        "tool_use"
      ],
-      "inputPrice": 0.40,
-      "outputPrice": 1.60,
+      "inputPrice": 0.4,
+      "outputPrice": 1.6,
      "defaultTemperature": 1,
      "defaultTopP": 1,
      "extra": {}
@@ -209,9 +227,11 @@
    {
      "modelName": "gpt-4.1-nano",
      "displayName": "GPT-4.1 nano",
-      "serviceType": ["fast"],
+      "serviceType": [
+        "fast"
+      ],
      "description": "OpenAI GPT-4.1 nano，1M 上下文，极致性价比",
-      "contextWindow": 1000000,
+      "contextWindow": 1047576,
      "maxOutputTokens": 32768,
      "capabilities": [
        "chat",
@@ -219,8 +239,8 @@
        "ultra_long_context",
        "fast"
      ],
-      "inputPrice": 0.10,
-      "outputPrice": 0.40,
+      "inputPrice": 0.1,
+      "outputPrice": 0.4,
      "defaultTemperature": 1,
      "defaultTopP": 1,
      "extra": {}
@@ -228,7 +248,9 @@
    {
      "modelName": "gpt-4o",
      "displayName": "GPT-4o",
-      "serviceType": ["vision"],
+      "serviceType": [
+        "vision"
+      ],
      "description": "多模态模型，支持图像理解",
      "contextWindow": 128000,
      "maxOutputTokens": 16384,
@@ -238,8 +260,8 @@
        "image_understanding",
        "tool_use"
      ],
-      "inputPrice": 2.50,
-      "outputPrice": 10.00,
+      "inputPrice": 2.5,
+      "outputPrice": 10,
      "defaultTemperature": 1,
      "defaultTopP": 1,
      "extra": {}
@@ -247,7 +269,9 @@
    {
      "modelName": "gpt-4o-mini",
      "displayName": "GPT-4o mini",
-      "serviceType": ["vision"],
+      "serviceType": [
+        "vision"
+      ],
      "description": "OpenAI GPT-4o mini，高性价比多模态模型",
      "contextWindow": 128000,
      "maxOutputTokens": 16384,
@@ -258,7 +282,7 @@
        "tool_use"
      ],
      "inputPrice": 0.15,
-      "outputPrice": 0.60,
+      "outputPrice": 0.6,
      "defaultTemperature": 1,
      "defaultTopP": 1,
      "extra": {}
@@ -266,8 +290,11 @@
    {
      "modelName": "text-embedding-3-small",
      "displayName": "text-embedding-3-small",
-      "serviceType": ["embedding"],
+      "serviceType": [
+        "embedding"
+      ],
      "description": "轻量文本向量化模型，1536 维",
+      "contextWindow": 8192,
      "capabilities": [
        "text_embedding",
        "semantic_search"
@@ -278,8 +305,11 @@
    {
      "modelName": "text-embedding-3-large",
      "displayName": "text-embedding-3-large",
-      "serviceType": ["embedding"],
+      "serviceType": [
+        "embedding"
+      ],
      "description": "高精度文本向量化模型，3072 维",
+      "contextWindow": 8192,
      "capabilities": [
        "text_embedding",
        "semantic_search"
@@ -290,11 +320,14 @@
    {
      "modelName": "tts-1",
      "displayName": "TTS-1",
-      "serviceType": ["tts"],
+      "serviceType": [
+        "tts"
+      ],
      "description": "标准语音合成",
      "capabilities": [
        "tts"
      ],
+      "inputPrice": 15,
      "extra": {
        "voices": [
          "alloy",
@@ -315,11 +348,14 @@
    {
      "modelName": "tts-1-hd",
      "displayName": "TTS-1-HD",
-      "serviceType": ["tts"],
+      "serviceType": [
+        "tts"
+      ],
      "description": "高清语音合成",
      "capabilities": [
        "tts"
      ],
+      "inputPrice": 30,
      "extra": {
        "voices": [
          "alloy",
@@ -340,12 +376,15 @@
    {
      "modelName": "whisper-1",
      "displayName": "Whisper",
-      "serviceType": ["asr"],
+      "serviceType": [
+        "asr"
+      ],
      "description": "通用语音识别",
      "capabilities": [
        "asr",
        "multilingual"
      ],
+      "inputPrice": 0.006,
      "extra": {
        "maxAudioLength": 600,
        "supportedInputFormats": [
@@ -363,7 +402,9 @@
    {
      "modelName": "o3",
      "displayName": "o3",
-      "serviceType": ["responses"],
+      "serviceType": [
+        "responses"
+      ],
      "description": "OpenAI o3 推理模型，200K 上下文，支持工具调用",
      "contextWindow": 200000,
      "maxOutputTokens": 100000,
@@ -375,8 +416,8 @@
        "science",
        "tool_use"
      ],
-      "inputPrice": 2.00,
-      "outputPrice": 8.00,
+      "inputPrice": 2,
+      "outputPrice": 8,
      "defaultTemperature": 1,
      "defaultTopP": 1,
      "extra": {}
@@ -384,7 +425,9 @@
    {
      "modelName": "o3-pro",
      "displayName": "o3-pro",
-      "serviceType": ["responses"],
+      "serviceType": [
+        "responses"
+      ],
      "description": "OpenAI o3-pro 高级推理模型，200K 上下文",
      "contextWindow": 200000,
      "maxOutputTokens": 100000,
@@ -395,8 +438,8 @@
        "math",
        "science"
      ],
-      "inputPrice": 20.00,
-      "outputPrice": 80.00,
+      "inputPrice": 20,
+      "outputPrice": 80,
      "defaultTemperature": 1,
      "defaultTopP": 1,
      "extra": {}
@@ -404,9 +447,11 @@
    {
      "modelName": "o3-mini",
      "displayName": "o3-mini",
-      "serviceType": ["responses"],
-      "description": "OpenAI o3-mini 推理模型，128K 上下文",
-      "contextWindow": 128000,
+      "serviceType": [
+        "responses"
+      ],
+      "description": "OpenAI o3-mini 推理模型，200K 上下文",
+      "contextWindow": 200000,
      "maxOutputTokens": 100000,
      "capabilities": [
        "reasoning",
@@ -414,8 +459,8 @@
        "code",
        "fast"
      ],
-      "inputPrice": 1.10,
-      "outputPrice": 4.40,
+      "inputPrice": 1.1,
+      "outputPrice": 4.4,
      "defaultTemperature": 1,
      "defaultTopP": 1,
      "extra": {}
@@ -423,7 +468,9 @@
    {
      "modelName": "o4-mini",
      "displayName": "o4-mini",
-      "serviceType": ["responses"],
+      "serviceType": [
+        "responses"
+      ],
      "description": "OpenAI o4-mini 推理模型，200K 上下文，支持工具调用",
      "contextWindow": 200000,
      "maxOutputTokens": 100000,
@@ -434,8 +481,8 @@
        "tool_use",
        "vision"
      ],
-      "inputPrice": 1.10,
-      "outputPrice": 4.40,
+      "inputPrice": 1.1,
+      "outputPrice": 4.4,
      "defaultTemperature": 1,
      "defaultTopP": 1,
      "extra": {}
@@ -443,7 +490,9 @@
    {
      "modelName": "dall-e-3",
      "displayName": "DALL-E 3",
-      "serviceType": ["image_gen"],
+      "serviceType": [
+        "image_gen"
+      ],
      "description": "高质量文生图模型",
      "capabilities": [
        "image_generation",
@@ -464,9 +513,12 @@
    {
      "modelName": "gpt-4o-realtime",
      "displayName": "GPT-4o Realtime",
-      "serviceType": ["omni"],
+      "serviceType": [
+        "omni"
+      ],
      "description": "全能实时模型，支持语音 + 文本 + 视觉",
-      "contextWindow": 128000,
+      "contextWindow": 32000,
+      "maxOutputTokens": 4096,
      "capabilities": [
        "chat",
        "vision",
@@ -474,6 +526,10 @@
        "asr",
        "realtime"
      ],
+      "inputPrice": 4,
+      "outputPrice": 16,
+      "defaultTemperature": 0.7,
+      "defaultTopP": 1,
      "extra": {
        "supportedModes": [
          "speech-to-speech",
@@ -485,13 +541,21 @@
    {
      "modelName": "gpt-4o-realtime-preview",
      "displayName": "GPT-4o Realtime Preview",
-      "serviceType": ["realtime_voice"],
+      "serviceType": [
+        "realtime_voice"
+      ],
      "description": "实时语音交互模型",
+      "contextWindow": 32000,
+      "maxOutputTokens": 4096,
      "capabilities": [
        "realtime",
        "speech_to_speech",
        "low_latency"
      ],
+      "inputPrice": 5,
+      "outputPrice": 20,
+      "defaultTemperature": 0.7,
+      "defaultTopP": 1,
      "extra": {
        "supportedModes": [
          "speech-to-speech",
--- a/compute/providers/openrouter.json
+++ b/compute/providers/openrouter.json
@@ -16,7 +16,7 @@
      "displayName": "OpenRouter Auto",
      "serviceType": ["chat"],
      "description": "OpenRouter 自动路由，智能选择最优模型",
-      "contextWindow": 200000,
+      "contextWindow": 2000000,
      "maxOutputTokens": 16384,
      "capabilities": [
        "chat",
@@ -29,19 +29,16 @@
      "extra": {}
    },
    {
-      "modelName": "google/gemini-2.5-flash-exp:free",
-      "displayName": "Gemini 2.5 Flash (免费)",
+      "modelName": "openai/gpt-oss-120b:free",
+      "displayName": "GPT-OSS 120B (免费)",
      "serviceType": ["chat"],
-      "description": "通过 OpenRouter 免费使用的 Gemini 2.5 Flash，1M 上下文",
-      "contextWindow": 1048576,
-      "maxOutputTokens": 65535,
+      "description": "通过 OpenRouter 免费使用的 OpenAI GPT-OSS 120B",
+      "contextWindow": 131072,
+      "maxOutputTokens": 131072,
      "capabilities": [
        "chat",
        "reasoning",
-        "vision",
-        "tool_use",
-        "fast",
-        "long_context"
+        "code"
      ],
      "inputPrice": 0.00,
      "outputPrice": 0.00,
@@ -54,8 +51,8 @@
      "displayName": "Qwen3 Coder 480B (免费)",
      "serviceType": ["chat"],
      "description": "通过 OpenRouter 免费使用的 Qwen3 Coder 480B",
-      "contextWindow": 262144,
-      "maxOutputTokens": 32768,
+      "contextWindow": 262000,
+      "maxOutputTokens": 262000,
      "capabilities": [
        "code",
        "reasoning",
--- a/compute/providers/perplexity.json
+++ b/compute/providers/perplexity.json
@@ -9,12 +9,16 @@
  "enabled": false,
  "status": "unconfigured",
  "priceCurrency": "USD",
-  "services": ["chat"],
+  "services": [
+    "chat"
+  ],
  "models": [
    {
      "modelName": "sonar-pro",
      "displayName": "Sonar Pro",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "Perplexity 旗舰搜索增强模型，内置实时联网搜索和引用",
      "contextWindow": 200000,
      "maxOutputTokens": 8192,
@@ -24,16 +28,23 @@
        "reasoning",
        "citation"
      ],
-      "inputPrice": 3.00,
-      "outputPrice": 15.00,
-      "defaultTemperature": 1,
-      "defaultTopP": 1,
-      "extra": {}
+      "inputPrice": 3.0,
+      "outputPrice": 15.0,
+      "extra": {
+        "requestPricingPer1k": {
+          "low": 6,
+          "medium": 10,
+          "high": 14
+        },
+        "pricingNotes": "Total Sonar API cost includes token costs plus a request fee based on search context size."
+      }
    },
    {
      "modelName": "sonar-reasoning-pro",
      "displayName": "Sonar Reasoning Pro",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "Perplexity 深度推理模型，内置联网搜索和深度思考",
      "contextWindow": 128000,
      "maxOutputTokens": 8192,
@@ -43,16 +54,23 @@
        "reasoning",
        "deep_thinking"
      ],
-      "inputPrice": 2.00,
-      "outputPrice": 8.00,
-      "defaultTemperature": 1,
-      "defaultTopP": 1,
-      "extra": {}
+      "inputPrice": 2.0,
+      "outputPrice": 8.0,
+      "extra": {
+        "requestPricingPer1k": {
+          "low": 6,
+          "medium": 10,
+          "high": 14
+        },
+        "pricingNotes": "Total Sonar API cost includes token costs plus a request fee based on search context size."
+      }
    },
    {
      "modelName": "sonar",
      "displayName": "Sonar",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "Perplexity 轻量搜索模型，低成本联网搜索",
      "contextWindow": 128000,
      "maxOutputTokens": 4096,
@@ -62,11 +80,16 @@
        "citation",
        "fast"
      ],
-      "inputPrice": 1.00,
-      "outputPrice": 1.00,
-      "defaultTemperature": 1,
-      "defaultTopP": 1,
-      "extra": {}
+      "inputPrice": 1.0,
+      "outputPrice": 1.0,
+      "extra": {
+        "requestPricingPer1k": {
+          "low": 5,
+          "medium": 8,
+          "high": 12
+        },
+        "pricingNotes": "Total Sonar API cost includes token costs plus a request fee based on search context size."
+      }
    }
  ]
 }
--- a/compute/providers/siliconflow.json
+++ b/compute/providers/siliconflow.json
@@ -9,53 +9,62 @@
  "enabled": false,
  "status": "unconfigured",
  "priceCurrency": "CNY",
-  "services": ["chat", "embedding"],
+  "services": [
+    "chat",
+    "embedding"
+  ],
  "models": [
    {
      "modelName": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
      "displayName": "Qwen3 Coder 480B (硅基)",
-      "serviceType": ["chat"],
-      "description": "开源最强代码模型，480B MoE 架构，擅长代码生成和 Agent 场景",
+      "serviceType": [
+        "chat"
+      ],
+      "description": "Qwen3-Coder-480B-A35B-Instruct，480B MoE（35B 激活），原生 256K 上下文，可通过外推扩展至 1M，面向 Agentic Coding",
      "contextWindow": 262144,
-      "maxOutputTokens": 32768,
+      "maxOutputTokens": 262144,
      "capabilities": [
+        "chat",
        "code",
-        "reasoning",
+        "long_context",
        "agent",
        "tool_use"
      ],
      "inputPrice": 8,
      "outputPrice": 16,
-      "defaultTemperature": 1,
-      "defaultTopP": 1,
+      "defaultTemperature": 0.7,
+      "defaultTopP": 0.7,
      "extra": {}
    },
    {
-      "modelName": "Qwen/Qwen3-235B-A22B-Instruct",
+      "modelName": "Qwen/Qwen3-235B-A22B-Instruct-2507",
      "displayName": "Qwen3 235B (硅基)",
-      "serviceType": ["chat"],
-      "description": "Qwen3 旗舰通用模型，235B MoE 架构，多语言能力突出",
+      "serviceType": [
+        "chat"
+      ],
+      "description": "Qwen3-235B-A22B-Instruct-2507，235B MoE（22B 激活）通用指令模型，262K 上下文",
      "contextWindow": 262144,
-      "maxOutputTokens": 38912,
+      "maxOutputTokens": 262144,
      "capabilities": [
        "chat",
-        "reasoning",
        "code",
        "multilingual",
        "tool_use",
        "agent",
-        "vision"
+        "long_context"
      ],
      "inputPrice": 2.5,
      "outputPrice": 10,
-      "defaultTemperature": 1,
-      "defaultTopP": 1,
+      "defaultTemperature": 0.7,
+      "defaultTopP": 0.7,
      "extra": {}
    },
    {
      "modelName": "BAAI/bge-m3",
      "displayName": "BGE-M3 (硅基)",
-      "serviceType": ["embedding"],
+      "serviceType": [
+        "embedding"
+      ],
      "description": "多语言 Embedding 模型，支持 8K 上下文，免费版",
      "contextWindow": 8192,
      "maxOutputTokens": 0,
--- a/compute/providers/tencent.json
+++ b/compute/providers/tencent.json
@@ -9,14 +9,19 @@
  "enabled": false,
  "status": "unconfigured",
  "priceCurrency": "CNY",
-  "services": ["chat", "reasoning"],
+  "services": [
+    "chat",
+    "reasoning"
+  ],
  "models": [
    {
      "modelName": "hunyuan-2.0-thinking-20251109",
      "displayName": "混元 2.0 Think",
-      "serviceType": ["reasoning"],
+      "serviceType": [
+        "reasoning"
+      ],
      "description": "腾讯混元最新推理模型，128K输入/64K输出，MoE架构406B总参数",
-      "contextWindow": 131072,
+      "contextWindow": 196608,
      "maxOutputTokens": 65536,
      "capabilities": [
        "chat",
@@ -28,14 +33,63 @@
      "outputPrice": 15.9,
      "defaultTemperature": 1,
      "defaultTopP": 1,
-      "extra": {}
+      "extra": {
+        "pricingTiers": [
+          {
+            "maxInputTokens": 32768,
+            "inputPrice": 3.975,
+            "outputPrice": 15.9
          },
          {
-      "modelName": "hunyuan-turbo-s",
+            "maxInputTokens": 131072,
+            "inputPrice": 5.3,
+            "outputPrice": 21.2
+          }
+        ]
+      }
+    },
+    {
+      "modelName": "hunyuan-2.0-instruct-20251111",
+      "displayName": "混元 2.0 Instruct",
+      "serviceType": [
+        "chat"
+      ],
+      "description": "腾讯混元 2.0 指令模型，最大输入128K/最大输出16K，支持联网搜索和 Function Calling",
+      "contextWindow": 147456,
+      "maxOutputTokens": 16384,
+      "capabilities": [
+        "chat",
+        "code",
+        "long_context",
+        "tool_use"
+      ],
+      "inputPrice": 3.18,
+      "outputPrice": 7.95,
+      "defaultTemperature": 1,
+      "defaultTopP": 1,
+      "extra": {
+        "pricingTiers": [
+          {
+            "maxInputTokens": 32768,
+            "inputPrice": 3.18,
+            "outputPrice": 7.95
+          },
+          {
+            "maxInputTokens": 131072,
+            "inputPrice": 4.505,
+            "outputPrice": 11.13
+          }
+        ]
+      }
+    },
+    {
+      "modelName": "hunyuan-turbos-latest",
      "displayName": "混元 Turbo S",
-      "serviceType": ["chat"],
-      "description": "腾讯混元高速模型，262K 上下文，支持推理和工具调用",
-      "contextWindow": 262144,
+      "serviceType": [
+        "chat"
+      ],
+      "description": "腾讯混元高速模型，32K输入/16K输出，支持推理和工具调用",
+      "contextWindow": 32768,
      "maxOutputTokens": 16384,
      "capabilities": [
        "chat",
--- a/compute/providers/volcengine.json
+++ b/compute/providers/volcengine.json
@@ -23,10 +23,12 @@
    {
      "modelName": "doubao-2.0-pro",
      "displayName": "豆包 2.0 Pro",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "火山引擎豆包2.0旗舰模型，256K上下文，相比Gemini 3 Pro和GPT 5.2有较大成本优势",
      "contextWindow": 256000,
-      "maxOutputTokens": 16384,
+      "maxOutputTokens": 128000,
      "capabilities": [
        "chat",
        "reasoning",
@@ -37,17 +39,19 @@
      ],
      "inputPrice": 3.2,
      "outputPrice": 16,
-      "defaultTemperature": 0.7,
-      "defaultTopP": 0.9,
+      "defaultTemperature": 1,
+      "defaultTopP": 0.7,
      "extra": {}
    },
    {
      "modelName": "doubao-seed-1.8",
      "displayName": "豆包 Seed-1.8",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "火山引擎豆包1.8旗舰模型，256K上下文，支持深度思考和多模态",
      "contextWindow": 256000,
-      "maxOutputTokens": 16384,
+      "maxOutputTokens": 16000,
      "capabilities": [
        "chat",
        "reasoning",
@@ -58,17 +62,19 @@
      ],
      "inputPrice": 0.8,
      "outputPrice": 2,
-      "defaultTemperature": 0.7,
-      "defaultTopP": 0.9,
+      "defaultTemperature": 1,
+      "defaultTopP": 0.7,
      "extra": {}
    },
    {
      "modelName": "doubao-seed-1.6",
      "displayName": "豆包 Seed-1.6",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "火山引擎豆包1.6旗舰模型，256K上下文",
      "contextWindow": 256000,
-      "maxOutputTokens": 16384,
+      "maxOutputTokens": 32000,
      "capabilities": [
        "chat",
        "reasoning",
@@ -79,17 +85,19 @@
      ],
      "inputPrice": 0.8,
      "outputPrice": 8,
-      "defaultTemperature": 0.7,
-      "defaultTopP": 0.9,
+      "defaultTemperature": 1,
+      "defaultTopP": 0.7,
      "extra": {}
    },
    {
      "modelName": "doubao-seed-1.6-thinking",
      "displayName": "豆包 Seed-1.6 思考版",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "火山引擎豆包1.6深度思考模型，256K上下文",
      "contextWindow": 256000,
-      "maxOutputTokens": 32768,
+      "maxOutputTokens": 16000,
      "capabilities": [
        "reasoning",
        "math",
@@ -99,16 +107,19 @@
      ],
      "inputPrice": 0.8,
      "outputPrice": 8,
-      "defaultTemperature": 0.3,
+      "defaultTemperature": 1,
+      "defaultTopP": 0.7,
      "extra": {}
    },
    {
      "modelName": "doubao-seed-1.6-flash",
      "displayName": "豆包 Seed-1.6 Flash",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "火山引擎豆包1.6快速版，成本较前代下降63%",
      "contextWindow": 256000,
-      "maxOutputTokens": 8192,
+      "maxOutputTokens": 16000,
      "capabilities": [
        "chat",
        "code",
@@ -117,17 +128,19 @@
      ],
      "inputPrice": 0.15,
      "outputPrice": 1.5,
-      "defaultTemperature": 0.7,
-      "defaultTopP": 0.9,
+      "defaultTemperature": 1,
+      "defaultTopP": 0.7,
      "extra": {}
    },
    {
      "modelName": "doubao-seed-1.6-lite",
      "displayName": "豆包 Seed-1.6 Lite",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "火山引擎豆包1.6轻量版，极致性价比",
-      "contextWindow": 128000,
-      "maxOutputTokens": 4096,
+      "contextWindow": 256000,
+      "maxOutputTokens": 32000,
      "capabilities": [
        "chat",
        "code",
@@ -135,17 +148,19 @@
      ],
      "inputPrice": 0.3,
      "outputPrice": 0.6,
-      "defaultTemperature": 0.7,
-      "defaultTopP": 0.9,
+      "defaultTemperature": 1,
+      "defaultTopP": 0.7,
      "extra": {}
    },
    {
      "modelName": "doubao-seed-1.6-vision",
      "displayName": "豆包 Seed-1.6 视觉版",
-      "serviceType": ["vision"],
+      "serviceType": [
+        "vision"
+      ],
      "description": "火山引擎豆包1.6视觉模型，256K上下文，支持视频理解",
      "contextWindow": 256000,
-      "maxOutputTokens": 16384,
+      "maxOutputTokens": 32000,
      "capabilities": [
        "chat",
        "vision",
@@ -155,14 +170,16 @@
      ],
      "inputPrice": 0.8,
      "outputPrice": 8,
-      "defaultTemperature": 0.7,
-      "defaultTopP": 0.9,
+      "defaultTemperature": 1,
+      "defaultTopP": 0.7,
      "extra": {}
    },
    {
      "modelName": "doubao-seed-code",
      "displayName": "豆包 Seed-Code",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "火山引擎豆包编程专用模型，256K上下文",
      "contextWindow": 256000,
      "maxOutputTokens": 32768,
@@ -174,16 +191,19 @@
      ],
      "inputPrice": 1.2,
      "outputPrice": 8,
-      "defaultTemperature": 0.3,
+      "defaultTemperature": 1,
+      "defaultTopP": 0.7,
      "extra": {}
    },
    {
      "modelName": "deepseek-v3.2",
      "displayName": "DeepSeek V3.2 (火山引擎)",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "火山引擎托管的DeepSeek V3.2，128K上下文",
      "contextWindow": 128000,
-      "maxOutputTokens": 8000,
+      "maxOutputTokens": 32000,
      "capabilities": [
        "chat",
        "reasoning",
@@ -199,10 +219,12 @@
    {
      "modelName": "deepseek-r1",
      "displayName": "DeepSeek R1 (火山引擎)",
-      "serviceType": ["reasoning"],
-      "description": "火山引擎托管的DeepSeek R1推理模型，64K上下文",
-      "contextWindow": 64000,
-      "maxOutputTokens": 32768,
+      "serviceType": [
+        "reasoning"
+      ],
+      "description": "火山引擎托管的DeepSeek R1推理模型，128K上下文",
+      "contextWindow": 128000,
+      "maxOutputTokens": 65536,
      "capabilities": [
        "reasoning",
        "math",
@@ -212,16 +234,16 @@
      ],
      "inputPrice": 4,
      "outputPrice": 16,
-      "defaultTemperature": 1,
-      "defaultTopP": 1,
      "extra": {}
    },
    {
      "modelName": "kimi-k2-volcengine",
      "displayName": "Kimi K2 (火山引擎)",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "火山引擎托管的Kimi K2，万亿参数MoE模型，128K上下文",
-      "contextWindow": 128000,
+      "contextWindow": 256000,
      "maxOutputTokens": 8192,
      "capabilities": [
        "chat",
@@ -233,17 +255,19 @@
      ],
      "inputPrice": 4,
      "outputPrice": 16,
-      "defaultTemperature": 0.7,
-      "defaultTopP": 0.9,
+      "defaultTemperature": 0.6,
+      "defaultTopP": 1,
      "extra": {}
    },
    {
      "modelName": "glm-4-7",
      "displayName": "GLM-4.7 (火山引擎)",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "火山引擎托管的智谱GLM-4.7，200K上下文",
      "contextWindow": 200000,
-      "maxOutputTokens": 200000,
+      "maxOutputTokens": 128000,
      "capabilities": [
        "chat",
        "reasoning",
@@ -254,14 +278,16 @@
      ],
      "inputPrice": 4,
      "outputPrice": 16,
-      "defaultTemperature": 0.7,
-      "defaultTopP": 0.9,
+      "defaultTemperature": 1,
+      "defaultTopP": 0.95,
      "extra": {}
    },
    {
      "modelName": "doubao-embedding",
      "displayName": "豆包 Embedding",
-      "serviceType": ["embedding"],
+      "serviceType": [
+        "embedding"
+      ],
      "description": "火山引擎豆包标准向量模型，2560维度",
      "contextWindow": 4096,
      "capabilities": [
@@ -276,7 +302,9 @@
    {
      "modelName": "volc-mega-tts-clone",
      "displayName": "火山声音复刻",
-      "serviceType": ["voice_clone"],
+      "serviceType": [
+        "voice_clone"
+      ],
      "description": "火山引擎声音复刻，支持极少样本",
      "capabilities": [
        "voice_clone",
@@ -291,7 +319,9 @@
    {
      "modelName": "volc-realtime-voice",
      "displayName": "火山实时语音",
-      "serviceType": ["realtime_voice"],
+      "serviceType": [
+        "realtime_voice"
+      ],
      "description": "火山引擎实时语音交互",
      "capabilities": [
        "realtime",
@@ -309,7 +339,9 @@
    {
      "modelName": "volc-simultaneous",
      "displayName": "火山同声传译",
-      "serviceType": ["simultaneous_interpret"],
+      "serviceType": [
+        "simultaneous_interpret"
+      ],
      "description": "火山引擎同声传译，支持中英双向",
      "capabilities": [
        "simultaneous_interpretation",
@@ -323,7 +355,9 @@
    {
      "modelName": "volc-translation",
      "displayName": "火山翻译",
-      "serviceType": ["translation"],
+      "serviceType": [
+        "translation"
+      ],
      "description": "火山引擎机器翻译，支持多语种互译",
      "capabilities": [
        "translation",
--- a/compute/providers/xai.json
+++ b/compute/providers/xai.json
@@ -9,15 +9,21 @@
  "enabled": false,
  "status": "unconfigured",
  "priceCurrency": "USD",
-  "services": ["chat", "reasoning"],
+  "services": [
+    "chat",
+    "reasoning"
+  ],
  "models": [
    {
-      "modelName": "grok-4-0709",
-      "displayName": "Grok 4",
-      "serviceType": ["chat", "reasoning"],
-      "description": "xAI 最新旗舰推理模型，262K 上下文，支持工具调用",
-      "contextWindow": 262144,
-      "maxOutputTokens": 8192,
+      "modelName": "grok-4.20-0309-reasoning",
+      "displayName": "Grok 4.20 Reasoning",
+      "serviceType": [
+        "chat",
+        "reasoning"
+      ],
+      "description": "xAI 旗舰推理模型，支持函数调用与结构化输出",
+      "contextWindow": 2000000,
+      "maxOutputTokens": 16384,
      "capabilities": [
        "chat",
        "reasoning",
@@ -25,17 +31,20 @@
        "vision",
        "tool_use"
      ],
-      "inputPrice": 3.00,
-      "outputPrice": 15.00,
+      "inputPrice": 2.0,
+      "outputPrice": 6.0,
      "defaultTemperature": 1,
      "defaultTopP": 1,
      "extra": {}
    },
    {
-      "modelName": "grok-4-1-fast",
-      "displayName": "Grok 4.1 Fast",
-      "serviceType": ["chat"],
-      "description": "xAI 超长上下文高速模型，2M token 窗口",
+      "modelName": "grok-4-1-fast-reasoning",
+      "displayName": "Grok 4.1 Fast Reasoning",
+      "serviceType": [
+        "chat",
+        "reasoning"
+      ],
+      "description": "xAI 高速推理模型，2M 上下文窗口",
      "contextWindow": 2000000,
      "maxOutputTokens": 16384,
      "capabilities": [
@@ -47,8 +56,8 @@
        "fast",
        "long_context"
      ],
-      "inputPrice": 0.20,
-      "outputPrice": 0.50,
+      "inputPrice": 0.2,
+      "outputPrice": 0.5,
      "defaultTemperature": 1,
      "defaultTopP": 1,
      "extra": {}
--- a/compute/providers/xunfei.json
+++ b/compute/providers/xunfei.json
@@ -12,12 +12,12 @@
  "services": ["chat", "reasoning"],
  "models": [
    {
-      "modelName": "x1",
+      "modelName": "spark-x",
      "displayName": "讯飞星火 X1",
      "serviceType": ["reasoning"],
-      "description": "讯飞最新推理模型，基于全国产算力训练，擅长数学和代码推理",
-      "contextWindow": 32768,
-      "maxOutputTokens": 16384,
+      "description": "讯飞深度推理模型（X2），输入64K/输出128K，支持思考模式与函数调用",
+      "contextWindow": 65536,
+      "maxOutputTokens": 128000,
      "capabilities": [
        "chat",
        "reasoning",
@@ -25,12 +25,10 @@
        "math",
        "code"
      ],
-      "inputPrice": 11,
-      "outputPrice": 11,
-      "defaultTemperature": 1,
-      "defaultTopP": 1,
+      "defaultTemperature": 1.2,
+      "defaultTopP": 0.95,
      "extra": {
-        "pricingNote": "讯飞按token包计费，不区分输入输出"
+        "pricingNote": "官方公开文档说明模型价格以控制台及实际购买页为准，未公开固定 token 单价"
      }
    },
    {
@@ -46,12 +44,10 @@
        "code",
        "tool_use"
      ],
-      "inputPrice": 60,
-      "outputPrice": 60,
      "defaultTemperature": 1,
      "defaultTopP": 1,
      "extra": {
-        "pricingNote": "讯飞按token包计费，不区分输入输出"
+        "pricingNote": "官方公开文档说明模型价格以控制台及实际购买页为准，未公开固定 token 单价"
      }
    }
  ]
--- a/compute/providers/zhipu-embedding.json
+++ b/compute/providers/zhipu-embedding.json
@@ -17,16 +17,23 @@
    {
      "modelName": "embedding-3",
      "displayName": "智谱 embedding-3",
-      "serviceType": ["embedding"],
-      "description": "智谱嵌入模型v3，支持自定义维度，单条最大3072 tokens。走 OpenAI 兼容 /embeddings 端点（Anthropic 协议不提供 embeddings）",
-      "contextWindow": 3072,
+      "serviceType": [
+        "embedding"
+      ],
+      "description": "智谱嵌入模型v3，支持自定义维度；模型上下文窗口 8K，单条输入最多 3072 tokens。走 OpenAI 兼容 /embeddings 端点（Anthropic 协议不提供 embeddings）",
+      "contextWindow": 8192,
      "capabilities": [
        "text_embedding",
        "semantic_search",
-        "rag"
+        "rag",
+        "custom_dimensions"
      ],
      "inputPrice": 0.5,
-      "extra": {}
+      "extra": {
+        "maxInputTokensPerItem": 3072,
+        "maxBatchItems": 64,
+        "vectorDimensions": "256-2048"
+      }
    }
  ]
 }
--- a/compute/providers/zhipu.json
+++ b/compute/providers/zhipu.json
@@ -18,10 +18,72 @@
    "embedding-3"
  ],
  "models": [
+    {
+      "modelName": "glm-5.1",
+      "displayName": "GLM-5.1",
+      "serviceType": [
+        "chat"
+      ],
+      "description": "智谱 GLM-5.1 最新旗舰模型，面向长程 Agentic Coding 与 Autonomous Agent 场景，200K 上下文，128K 最大输出",
+      "contextWindow": 200000,
+      "maxOutputTokens": 128000,
+      "capabilities": [
+        "chat",
+        "reasoning",
+        "code",
+        "multilingual",
+        "deep_thinking",
+        "long_context",
+        "math",
+        "tool_use",
+        "agent"
+      ],
+      "inputPrice": 6,
+      "outputPrice": 24,
+      "defaultTemperature": 1,
+      "defaultTopP": 0.95,
+      "extra": {
+        "cacheHitPrice": 1.2,
+        "thinking": {
+          "default": "enabled"
+        }
+      }
+    },
+    {
+      "modelName": "glm-5-turbo",
+      "displayName": "GLM-5-Turbo",
+      "serviceType": [
+        "chat"
+      ],
+      "description": "智谱 GLM-5-Turbo，面向 OpenClaw 等长链路任务优化，200K 上下文，128K 最大输出",
+      "contextWindow": 200000,
+      "maxOutputTokens": 128000,
+      "capabilities": [
+        "chat",
+        "reasoning",
+        "code",
+        "deep_thinking",
+        "long_context",
+        "tool_use",
+        "agent"
+      ],
+      "inputPrice": 5,
+      "outputPrice": 22,
+      "defaultTemperature": 1,
+      "defaultTopP": 0.95,
+      "extra": {
+        "cacheHitPrice": 1,
+        "thinking": {
+          "default": "enabled"
+        }
+      }
+    },
    {
      "modelName": "glm-5",
      "displayName": "GLM-5",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "智谱GLM-5，744B参数旗舰基座模型，2026年2月发布，面向Agentic Engineering，编程能力接近Claude Opus",
      "contextWindow": 200000,
      "maxOutputTokens": 128000,
@@ -38,14 +100,18 @@
      ],
      "inputPrice": 4,
      "outputPrice": 18,
-      "defaultTemperature": 0.95,
-      "defaultTopP": 0.7,
-      "extra": {}
+      "defaultTemperature": 1,
+      "defaultTopP": 0.95,
+      "extra": {
+        "cacheHitPrice": 0.8
+      }
    },
    {
      "modelName": "glm-4.7",
      "displayName": "GLM-4.7",
-      "serviceType": ["chat"],
+      "serviceType": [
+        "chat"
+      ],
      "description": "智谱GLM-4.7，355B参数MoE架构旗舰模型，200K上下文，编程能力对齐Claude Sonnet",
      "contextWindow": 200000,
      "maxOutputTokens": 128000,
@@ -58,16 +124,20 @@
        "long_context",
        "tool_use"
      ],
-      "inputPrice": 4,
-      "outputPrice": 16,
-      "defaultTemperature": 0.95,
-      "defaultTopP": 0.7,
-      "extra": {}
+      "inputPrice": 2,
+      "outputPrice": 8,
+      "defaultTemperature": 1,
+      "defaultTopP": 0.95,
+      "extra": {
+        "cacheHitPrice": 0.5
+      }
    },
    {
      "modelName": "glm-4.7-thinking",
      "displayName": "GLM-4.7 Thinking",
-      "serviceType": ["reasoning"],
+      "serviceType": [
+        "reasoning"
+      ],
      "description": "智谱GLM-4.7深度思考模式，200K上下文，交错式/保留式/轮级思考",
      "contextWindow": 200000,
      "maxOutputTokens": 128000,
@@ -78,19 +148,57 @@
        "deep_thinking",
        "long_context"
      ],
-      "inputPrice": 8,
-      "outputPrice": 32,
-      "defaultTemperature": 0.95,
-      "defaultTopP": 0.7,
-      "extra": {}
+      "inputPrice": 2,
+      "outputPrice": 8,
+      "defaultTemperature": 1,
+      "defaultTopP": 0.95,
+      "extra": {
+        "cacheHitPrice": 0.5,
+        "aliasOf": "glm-4.7"
+      }
+    },
+    {
+      "modelName": "glm-5v-turbo",
+      "displayName": "GLM-5V-Turbo",
+      "serviceType": [
+        "vision"
+      ],
+      "description": "智谱首个多模态 Coding 基座模型，支持视频、图像、文本和文件输入，200K 上下文，128K 最大输出",
+      "contextWindow": 200000,
+      "maxOutputTokens": 128000,
+      "capabilities": [
+        "chat",
+        "vision",
+        "video_understanding",
+        "image_understanding",
+        "file_understanding",
+        "reasoning",
+        "code",
+        "deep_thinking",
+        "long_context",
+        "tool_use",
+        "agent"
+      ],
+      "inputPrice": 5,
+      "outputPrice": 22,
+      "defaultTemperature": 1,
+      "defaultTopP": 0.95,
+      "extra": {
+        "cacheHitPrice": 1,
+        "thinking": {
+          "default": "enabled"
+        }
+      }
    },
    {
      "modelName": "glm-4.6v",
      "displayName": "GLM-4.6V",
-      "serviceType": ["vision"],
+      "serviceType": [
+        "vision"
+      ],
      "description": "智谱GLM-4.6V多模态版，106B/12B MoE，支持图像视频理解与工具调用，128K上下文",
      "contextWindow": 128000,
-      "maxOutputTokens": 8192,
+      "maxOutputTokens": 32768,
      "capabilities": [
        "chat",
        "vision",
@@ -99,28 +207,37 @@
        "long_context",
        "tool_use"
      ],
-      "inputPrice": 8,
-      "outputPrice": 32,
-      "extra": {}
+      "inputPrice": 1,
+      "outputPrice": 4,
+      "defaultTemperature": 1,
+      "defaultTopP": 0.95,
+      "extra": {
+        "cacheHitPrice": 0.2
+      }
    },
    {
      "modelName": "glm-4.6",
      "displayName": "GLM-4.6",
-      "serviceType": ["chat"],
-      "description": "智谱GLM-4.6，增强推理能力，128K上下文",
-      "contextWindow": 128000,
-      "maxOutputTokens": 8192,
+      "serviceType": [
+        "chat"
+      ],
+      "description": "智谱GLM-4.6，增强推理能力，200K上下文",
+      "contextWindow": 200000,
+      "maxOutputTokens": 128000,
      "capabilities": [
        "chat",
        "reasoning",
        "code",
        "multilingual",
-        "deep_thinking",
-        "vision"
+        "deep_thinking"
      ],
-      "inputPrice": 5,
-      "outputPrice": 5,
-      "extra": {}
+      "inputPrice": 2,
+      "outputPrice": 8,
+      "defaultTemperature": 1,
+      "defaultTopP": 0.95,
+      "extra": {
+        "cacheHitPrice": 0.5
+      }
    }
  ]
 }
--- a/manifest.json
+++ b/manifest.json
@@ -1,6 +1,6 @@
 {
  "version": "1.0.0",
-  "presetDataVersion": 30,
+  "presetDataVersion": 31,
  "updatedAt": "2026-04-25",
  "description": "DesireCore 官方配置中心"
 }