feat: enhance openai-compatible provider support and patch responses API instructions handling

- Add compatibility patch for langchain-openai responses API to ensure system messages are extracted as top-level instructions, addressing Codex endpoint requirements.
- Update provider list: add Alibaba, Volcengine, and Tencent TokenHub; adjust SiliconFlow and MiniMax endpoints; refine provider ordering and model list strategies.
- Extend models.dev-only listing logic for providers lacking stable models.list endpoints.
- Increase models.dev cache TTL for improved efficiency.
- Add tests for openai responses API and streaming compatibility patches.
This commit is contained in:
jxxghp
2026-04-30 11:32:55 +08:00
parent b228107a25
commit 34e7c4ac14
5 changed files with 166 additions and 38 deletions

View File

@@ -182,6 +182,77 @@ def _patch_deepseek_reasoning_content_support():
logger.debug("已修补 langchain-deepseek thinking tool-call 的 reasoning_content 回传兼容性")
def _patch_openai_responses_instructions_support():
"""
修补 langchain-openai 在使用 use_responses_api=True 时,
提取 system 消息为顶层 instructions 字段。
由于 Codex 等模型 (Responses API) 强依赖 instructions 字段,
如果没有该字段会报 400 "Instructions are required"
"""
try:
from langchain_openai import ChatOpenAI
except Exception as err:
logger.debug(f"跳过 langchain-openai instructions 修补:{err}")
return
if getattr(ChatOpenAI, "_moviepilot_responses_instructions_patched", False):
return
original_get_request_payload = getattr(ChatOpenAI, "_get_request_payload", None)
if not callable(original_get_request_payload):
logger.warning("langchain-openai 缺少 _get_request_payload无法修补 instructions")
return
@wraps(original_get_request_payload)
def _patched_get_request_payload(self, input_, *, stop=None, **kwargs):
payload = original_get_request_payload(self, input_, stop=stop, **kwargs)
base_url = str(getattr(self, "openai_api_base", "") or "").lower()
# 处理 GitHub Copilot 端点兼容性
if "githubcopilot.com" in base_url:
payload.pop("stream_options", None)
payload.pop("metadata", None)
# 处理 ChatGPT 官方 Responses API (Codex) 端点兼容性
is_codex = "chatgpt.com/backend-api/codex" in base_url
if is_codex and (getattr(self, "use_responses_api", False) or "input" in payload):
instructions = payload.get("instructions", "")
inputs = payload.get("input", [])
new_inputs = []
for msg in inputs:
if isinstance(msg, dict) and msg.get("role") == "system":
content = msg.get("content")
if isinstance(content, str) and content.strip():
if instructions:
instructions += "\n\n" + content
else:
instructions = content
else:
new_inputs.append(msg)
payload["input"] = new_inputs
payload["instructions"] = instructions or "You are a helpful assistant."
payload["store"] = False
# Codex 端点不支持的部分常见补全参数,统一清理避免 400 报错
unsupported_keys = [
"presence_penalty", "frequency_penalty", "top_p", "n", "user",
"stop", "metadata", "logit_bias", "logprobs", "top_logprobs",
"stream_options", "temperature"
]
for key in unsupported_keys:
payload.pop(key, None)
return payload
ChatOpenAI._get_request_payload = _patched_get_request_payload
ChatOpenAI._moviepilot_responses_instructions_patched = True
logger.debug("已修补 langchain-openai responses API 的 instructions 兼容性")
class LLMHelper:
"""LLM模型相关辅助功能"""
@@ -559,6 +630,12 @@ class LLMHelper:
else:
from langchain_openai import ChatOpenAI
_patch_openai_responses_instructions_support()
# ChatGPT Codex 端点强制要求 stream: True
if runtime.get("use_responses_api") and "chatgpt.com/backend-api/codex" in str(runtime.get("base_url") or ""):
streaming = True
model = ChatOpenAI(
model=model_name,
api_key=runtime["api_key"],

View File

@@ -89,7 +89,7 @@ class LLMProviderManager(metaclass=Singleton):
"""统一维护 provider 目录、models.dev 缓存和 OAuth 状态。"""
_MODELS_DEV_URL = "https://models.dev/api.json"
_MODELS_DEV_CACHE_TTL = 5 * 60
_MODELS_DEV_CACHE_TTL = 12 * 60 * 60
_CHATGPT_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
_CHATGPT_ISSUER = "https://auth.openai.com"
_CHATGPT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
@@ -201,26 +201,46 @@ class LLMProviderManager(metaclass=Singleton):
description="通过 GitHub Copilot 订阅接入。",
sort_order=50,
),
ProviderSpec(
id="nvidia",
name="Nvidia",
runtime="openai_compatible",
models_dev_provider_id="nvidia",
default_base_url="https://integrate.api.nvidia.com/v1",
api_key_hint="填写 Nvidia API Key。",
description="Nvidia 集成推理平台。",
sort_order=60,
),
ProviderSpec(
id="siliconflow",
name="硅基流动",
runtime="openai_compatible",
models_dev_provider_id="siliconflow",
default_base_url="https://api.siliconflow.com/v1",
default_base_url="https://api.siliconflow.cn/v1",
api_key_hint="填写硅基流动 API Key。",
description="SiliconFlow 官方兼容端点。",
sort_order=60,
),
ProviderSpec(
id="alibaba",
name="阿里云百炼",
runtime="openai_compatible",
models_dev_provider_id="alibaba",
default_base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
api_key_hint="填写 DashScope / Alibaba API Key。",
description="阿里云百炼兼容端点。",
sort_order=70,
),
ProviderSpec(
id="volcengine",
name="火山方舟",
runtime="openai_compatible",
default_base_url="https://ark.cn-beijing.volces.com/api/v3",
api_key_hint="填写火山方舟 API Key。",
description="字节跳动火山引擎兼容端点。",
sort_order=80,
),
ProviderSpec(
id="tencent",
name="Tencent",
runtime="openai_compatible",
models_dev_provider_id="tencent-tokenhub",
default_base_url="https://tokenhub.tencentmaas.com/v1",
api_key_hint="填写 Tencent API Key。",
model_list_strategy="models_dev_only",
description="腾讯兼容端点。",
sort_order=90,
),
ProviderSpec(
id="ollama-cloud",
name="Ollama Cloud",
@@ -229,28 +249,28 @@ class LLMProviderManager(metaclass=Singleton):
default_base_url="https://ollama.com/v1",
api_key_hint="填写 Ollama Cloud API Key。",
description="Ollama Cloud 云端模型服务。",
sort_order=80,
sort_order=100,
),
ProviderSpec(
id="alibaba",
name="Alibaba",
id="nvidia",
name="Nvidia",
runtime="openai_compatible",
models_dev_provider_id="alibaba",
default_base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
api_key_hint="填写 DashScope / Alibaba API Key。",
description="阿里云百炼兼容端点",
sort_order=90,
models_dev_provider_id="nvidia",
default_base_url="https://integrate.api.nvidia.com/v1",
api_key_hint="填写 Nvidia API Key。",
description="Nvidia 集成推理平台",
sort_order=110,
),
ProviderSpec(
id="minimax",
name="MiniMax",
runtime="anthropic_compatible",
models_dev_provider_id="minimax",
default_base_url="https://api.minimax.io/anthropic/v1",
default_base_url="https://api.minimaxi.com/anthropic/v1",
api_key_hint="填写 MiniMax API Key。",
model_list_strategy="anthropic_compatible",
description="MiniMax Anthropic-compatible 端点。",
sort_order=100,
sort_order=120,
),
ProviderSpec(
id="xiaomi",
@@ -260,17 +280,7 @@ class LLMProviderManager(metaclass=Singleton):
default_base_url="https://api.xiaomimimo.com/v1",
api_key_hint="填写 Xiaomi API Key。",
description="小米 Mimo 兼容端点。",
sort_order=110,
),
ProviderSpec(
id="tencent",
name="Tencent",
runtime="openai_compatible",
models_dev_provider_id="tencent",
default_base_url="https://api.lkeap.cloud.tencent.com/coding/v3",
api_key_hint="填写 Tencent API Key。",
description="腾讯兼容端点。",
sort_order=120,
sort_order=130,
),
ProviderSpec(
id="openai",
@@ -640,12 +650,13 @@ class LLMProviderManager(metaclass=Singleton):
)
return sorted(results, key=lambda item: item["name"].lower())
async def _list_models_from_anthropic_compatible(
async def _list_models_from_models_dev_only(
self,
provider_id: str,
transport: str = "openai",
) -> list[dict[str, Any]]:
"""
Anthropic-compatible 生态没有像 OpenAI 那样统一稳定的 models.list 行为,
某些 provider 没有统一稳定的 models.list 行为,
因此优先读取 models.dev 目录;若未来 provider 暴露标准 models 接口,
再平滑补充实时刷新即可。
"""
@@ -660,7 +671,7 @@ class LLMProviderManager(metaclass=Singleton):
model_id=model_id,
display_name=metadata.get("name") or model_id,
metadata=metadata,
transport="anthropic",
transport=transport,
source="models.dev",
)
)
@@ -832,8 +843,15 @@ class LLMProviderManager(metaclass=Singleton):
)
if spec.model_list_strategy == "anthropic_compatible":
return await self._list_models_from_anthropic_compatible(
return await self._list_models_from_models_dev_only(
provider_id=provider_id,
transport="anthropic",
)
if spec.model_list_strategy == "models_dev_only":
return await self._list_models_from_models_dev_only(
provider_id=provider_id,
transport="openai",
)
# openai-compatible / deepseek 默认走官方 models 端点。
@@ -1348,7 +1366,8 @@ class LLMProviderManager(metaclass=Singleton):
auth = None
try:
auth = await self._resolve_chatgpt_oauth()
except Exception:
except Exception as err:
print(err)
pass
if auth:

View File

@@ -0,0 +1,9 @@
from app.agent.llm.helper import _patch_openai_responses_instructions_support
from langchain_openai import ChatOpenAI
from langchain_core.messages import SystemMessage, HumanMessage
import json
_patch_openai_responses_instructions_support()
model = ChatOpenAI(model="gpt-4o", openai_api_key="sk-123", base_url="https://api.githubcopilot.com", stream_usage=True)
payload = model._get_request_payload([SystemMessage(content="Hello system"), HumanMessage(content="Hello user")])
print(json.dumps(payload, indent=2))

View File

@@ -0,0 +1,9 @@
from app.agent.llm.helper import _patch_openai_responses_instructions_support
from langchain_openai import ChatOpenAI
from langchain_core.messages import SystemMessage, HumanMessage
import json
_patch_openai_responses_instructions_support()
model = ChatOpenAI(model="gpt-4o", openai_api_key="sk-123", use_responses_api=True, temperature=0.7)
payload = model._get_request_payload([SystemMessage(content="Hello system"), HumanMessage(content="Hello user")])
print(json.dumps(payload, indent=2))

View File

@@ -0,0 +1,14 @@
import asyncio
from app.agent.llm.helper import LLMHelper
from app.core.config import settings
import json
async def run():
llm = await LLMHelper.get_llm(
streaming=False,
provider="chatgpt",
model="gpt-5.1-codex",
)
print("streaming:", llm.streaming)
asyncio.run(run())