From a6cfc1244304306604bb194188ab8f8d55191d24 Mon Sep 17 00:00:00 2001 From: snaily Date: Thu, 10 Jul 2025 21:21:55 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=9B=B4=E6=96=B0=E5=93=8D=E5=BA=94?= =?UTF-8?q?=E5=A4=84=E7=90=86=E9=80=BB=E8=BE=91=E4=BB=A5=E6=94=AF=E6=8C=81?= =?UTF-8?q?=E6=8E=A8=E7=90=86=E5=86=85=E5=AE=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 修改了 response_handler.py 中的 _handle_openai_stream_response 和 _handle_openai_normal_response 方法,增加了对推理内容 (reasoning_content) 的支持。 - 更新了 _extract_result 方法的返回值,确保能够提取推理内容。 - 在 gemini_chat_service.py 和 openai_chat_service.py 中,调整了生成配置以包含思考过程的选项。 - 在 vertex_express_chat_service.py 中,增强了对客户端思考配置的处理逻辑,确保优先使用客户端提供的配置。 --- app/handler/response_handler.py | 71 ++++++++----------- app/service/chat/gemini_chat_service.py | 8 ++- app/service/chat/openai_chat_service.py | 10 ++- .../chat/vertex_express_chat_service.py | 24 +++++-- 4 files changed, 65 insertions(+), 48 deletions(-) diff --git a/app/handler/response_handler.py b/app/handler/response_handler.py index c0517b4..95f631c 100644 --- a/app/handler/response_handler.py +++ b/app/handler/response_handler.py @@ -39,13 +39,13 @@ class GeminiResponseHandler(ResponseHandler): def _handle_openai_stream_response( response: Dict[str, Any], model: str, finish_reason: str, usage_metadata: Optional[Dict[str, Any]] ) -> Dict[str, Any]: - text, tool_calls, _ = _extract_result( + text, reasoning_content, tool_calls, _ = _extract_result( response, model, stream=True, gemini_format=False ) - if not text and not tool_calls: + if not text and not tool_calls and not reasoning_content: delta = {} else: - delta = {"content": text, "role": "assistant"} + delta = {"content": text, "reasoning_content": reasoning_content, "role": "assistant"} if tool_calls: delta["tool_calls"] = tool_calls template_chunk = { @@ -63,7 +63,7 @@ def _handle_openai_stream_response( def _handle_openai_normal_response( response: Dict[str, Any], model: str, finish_reason: str, usage_metadata: Optional[Dict[str, Any]] ) -> Dict[str, Any]: - text, tool_calls, _ = _extract_result( + text, reasoning_content, tool_calls, _ = _extract_result( response, model, stream=False, gemini_format=False ) return { @@ -77,6 +77,7 @@ def _handle_openai_normal_response( "message": { "role": "assistant", "content": text, + "reasoning_content": reasoning_content, "tool_calls": tool_calls, }, "finish_reason": finish_reason, @@ -156,19 +157,21 @@ def _extract_result( model: str, stream: bool = False, gemini_format: bool = False, -) -> tuple[str, List[Dict[str, Any]], Optional[bool]]: - text, tool_calls = "", [] - thought = None +) -> tuple[str, Optional[str], List[Dict[str, Any]], Optional[bool]]: + text, reasoning_content, tool_calls, thought = "", "", [], None if stream: if response.get("candidates"): candidate = response["candidates"][0] content = candidate.get("content", {}) parts = content.get("parts", []) if not parts: - return "", [], None + return "", None, [], None if "text" in parts[0]: text = parts[0].get("text") if "thought" in parts[0]: + if not gemini_format and settings.SHOW_THINKING_PROCESS: + reasoning_content = text + text = "" thought = parts[0].get("thought") elif "executableCode" in parts[0]: text = _format_code_block(parts[0]["executableCode"]) @@ -187,32 +190,18 @@ def _extract_result( else: if response.get("candidates"): candidate = response["candidates"][0] - if "thinking" in model: - if settings.SHOW_THINKING_PROCESS: - if len(candidate["content"]["parts"]) == 2: - text = ( - "> thinking\n\n" - + candidate["content"]["parts"][0]["text"] - + "\n\n---\n> output\n\n" - + candidate["content"]["parts"][1]["text"] - ) - else: - text = candidate["content"]["parts"][0]["text"] - else: - if len(candidate["content"]["parts"]) == 2: - text = candidate["content"]["parts"][1]["text"] - else: - text = candidate["content"]["parts"][0]["text"] - else: - text = "" - if "parts" in candidate["content"]: - for part in candidate["content"]["parts"]: - if "text" in part: + text, reasoning_content = "", "" + if "parts" in candidate["content"]: + for part in candidate["content"]["parts"]: + if "text" in part: + if "thought" in part and settings.SHOW_THINKING_PROCESS: + reasoning_content += part["text"] + else: text += part["text"] - if "thought" in part and thought is None: - thought = part.get("thought") - elif "inlineData" in part: - text += _extract_image_data(part) + if "thought" in part and thought is None: + thought = part.get("thought") + elif "inlineData" in part: + text += _extract_image_data(part) text = _add_search_link_text(model, candidate, text) tool_calls = _extract_tool_calls( @@ -220,7 +209,7 @@ def _extract_result( ) else: text = "暂无返回" - return text, tool_calls, thought + return text, reasoning_content, tool_calls, thought def _extract_image_data(part: dict) -> str: @@ -294,7 +283,7 @@ def _extract_tool_calls( def _handle_gemini_stream_response( response: Dict[str, Any], model: str, stream: bool ) -> Dict[str, Any]: - text, tool_calls, thought = _extract_result( + text, reasoning_content, tool_calls, thought = _extract_result( response, model, stream=stream, gemini_format=True ) if tool_calls: @@ -311,16 +300,18 @@ def _handle_gemini_stream_response( def _handle_gemini_normal_response( response: Dict[str, Any], model: str, stream: bool ) -> Dict[str, Any]: - text, tool_calls, thought = _extract_result( + text, reasoning_content, tool_calls, thought = _extract_result( response, model, stream=stream, gemini_format=True ) + parts = [] if tool_calls: - content = {"parts": tool_calls, "role": "model"} + parts = tool_calls else: - part = {"text": text} if thought is not None: - part["thought"] = thought - content = {"parts": [part], "role": "model"} + parts.append({"text": reasoning_content,"thought": thought}) + part = {"text": text} + parts.append(part) + content = {"parts": parts, "role": "model"} response["candidates"][0]["content"] = content return response diff --git a/app/service/chat/gemini_chat_service.py b/app/service/chat/gemini_chat_service.py index ea37fe0..0a8ffb3 100644 --- a/app/service/chat/gemini_chat_service.py +++ b/app/service/chat/gemini_chat_service.py @@ -170,7 +170,13 @@ def _build_payload(model: str, request: GeminiRequest) -> Dict[str, Any]: if model.endswith("-non-thinking"): payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": 0} elif model in settings.THINKING_BUDGET_MAP: - payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": settings.THINKING_BUDGET_MAP.get(model,1000)} + if settings.SHOW_THINKING_PROCESS: + payload["generationConfig"]["thinkingConfig"] = { + "thinkingBudget": settings.THINKING_BUDGET_MAP.get(model,1000), + "includeThoughts": True + } + else: + payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": settings.THINKING_BUDGET_MAP.get(model,1000)} return payload diff --git a/app/service/chat/openai_chat_service.py b/app/service/chat/openai_chat_service.py index 432b8b4..2a7a067 100644 --- a/app/service/chat/openai_chat_service.py +++ b/app/service/chat/openai_chat_service.py @@ -166,9 +166,13 @@ def _build_payload( if request.model.endswith("-non-thinking"): payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": 0} if request.model in settings.THINKING_BUDGET_MAP: - payload["generationConfig"]["thinkingConfig"] = { - "thinkingBudget": settings.THINKING_BUDGET_MAP.get(request.model, 1000) - } + if settings.SHOW_THINKING_PROCESS: + payload["generationConfig"]["thinkingConfig"] = { + "thinkingBudget": settings.THINKING_BUDGET_MAP.get(request.model, 1000), + "includeThoughts": True + } + else: + payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": settings.THINKING_BUDGET_MAP.get(request.model, 1000)} if ( instruction diff --git a/app/service/chat/vertex_express_chat_service.py b/app/service/chat/vertex_express_chat_service.py index 6dcfc08..e62394c 100644 --- a/app/service/chat/vertex_express_chat_service.py +++ b/app/service/chat/vertex_express_chat_service.py @@ -133,10 +133,26 @@ def _build_payload(model: str, request: GeminiRequest) -> Dict[str, Any]: payload.pop("systemInstruction") payload["generationConfig"]["responseModalities"] = ["Text", "Image"] - if model.endswith("-non-thinking"): - payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": 0} - if model in settings.THINKING_BUDGET_MAP: - payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": settings.THINKING_BUDGET_MAP.get(model,1000)} + # 处理思考配置:优先使用客户端提供的配置,否则使用默认配置 + client_thinking_config = None + if request.generationConfig and request.generationConfig.thinkingConfig: + client_thinking_config = request.generationConfig.thinkingConfig + + if client_thinking_config is not None: + # 客户端提供了思考配置,直接使用 + payload["generationConfig"]["thinkingConfig"] = client_thinking_config + else: + # 客户端没有提供思考配置,使用默认配置 + if model.endswith("-non-thinking"): + payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": 0} + elif model in settings.THINKING_BUDGET_MAP: + if settings.SHOW_THINKING_PROCESS: + payload["generationConfig"]["thinkingConfig"] = { + "thinkingBudget": settings.THINKING_BUDGET_MAP.get(model,1000), + "includeThoughts": True + } + else: + payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": settings.THINKING_BUDGET_MAP.get(model,1000)} return payload