feat: 更新响应处理逻辑以支持推理内容

- 修改了 response_handler.py 中的 _handle_openai_stream_response 和 _handle_openai_normal_response 方法，增加了对推理内容 (reasoning_content) 的支持。 - 更新了 _extract_result 方法的返回值，确保能够提取推理内容。 - 在 gemini_chat_service.py 和 openai_chat_service.py 中，调整了生成配置以包含思考过程的选项。 - 在 vertex_express_chat_service.py 中，增强了对客户端思考配置的处理逻辑，确保优先使用客户端提供的配置。
2026-06-08 17:19:48 +08:00 · 2025-07-10 21:21:55 +08:00
parent f6d64dd850
commit a6cfc12443
4 changed files with 65 additions and 48 deletions
--- a/app/handler/response_handler.py
+++ b/app/handler/response_handler.py
@@ -39,13 +39,13 @@ class GeminiResponseHandler(ResponseHandler):
 def _handle_openai_stream_response(
    response: Dict[str, Any], model: str, finish_reason: str, usage_metadata: Optional[Dict[str, Any]]
 ) -> Dict[str, Any]:
-    text, tool_calls, _ = _extract_result(
+    text, reasoning_content, tool_calls, _ = _extract_result(
        response, model, stream=True, gemini_format=False
    )
-    if not text and not tool_calls:
+    if not text and not tool_calls and not reasoning_content:
        delta = {}
    else:
-        delta = {"content": text, "role": "assistant"}
+        delta = {"content": text, "reasoning_content": reasoning_content, "role": "assistant"}
        if tool_calls:
            delta["tool_calls"] = tool_calls
    template_chunk = {
@@ -63,7 +63,7 @@ def _handle_openai_stream_response(
 def _handle_openai_normal_response(
    response: Dict[str, Any], model: str, finish_reason: str, usage_metadata: Optional[Dict[str, Any]]
 ) -> Dict[str, Any]:
-    text, tool_calls, _ = _extract_result(
+    text, reasoning_content, tool_calls, _ = _extract_result(
        response, model, stream=False, gemini_format=False
    )
    return {
@@ -77,6 +77,7 @@ def _handle_openai_normal_response(
                "message": {
                    "role": "assistant",
                    "content": text,
+                    "reasoning_content": reasoning_content,
                    "tool_calls": tool_calls,
                },
                "finish_reason": finish_reason,
@@ -156,19 +157,21 @@ def _extract_result(
    model: str,
    stream: bool = False,
    gemini_format: bool = False,
-) -> tuple[str, List[Dict[str, Any]], Optional[bool]]:
-    text, tool_calls = "", []
-    thought = None
+) -> tuple[str, Optional[str], List[Dict[str, Any]], Optional[bool]]:
+    text, reasoning_content, tool_calls, thought = "", "", [], None
    if stream:
        if response.get("candidates"):
            candidate = response["candidates"][0]
            content = candidate.get("content", {})
            parts = content.get("parts", [])
            if not parts:
-                return "", [], None
+                return "", None, [], None
            if "text" in parts[0]:
                text = parts[0].get("text")
                if "thought" in parts[0]:
+                    if not gemini_format and settings.SHOW_THINKING_PROCESS:
+                        reasoning_content = text
+                        text = ""
                    thought = parts[0].get("thought")
            elif "executableCode" in parts[0]:
                text = _format_code_block(parts[0]["executableCode"])
@@ -187,32 +190,18 @@ def _extract_result(
    else:
        if response.get("candidates"):
            candidate = response["candidates"][0]
-            if "thinking" in model:
-                if settings.SHOW_THINKING_PROCESS:
-                    if len(candidate["content"]["parts"]) == 2:
-                        text = (
-                            "> thinking\n\n"
-                            + candidate["content"]["parts"][0]["text"]
-                            + "\n\n---\n> output\n\n"
-                            + candidate["content"]["parts"][1]["text"]
-                        )
-                    else:
-                        text = candidate["content"]["parts"][0]["text"]
-                else:
-                    if len(candidate["content"]["parts"]) == 2:
-                        text = candidate["content"]["parts"][1]["text"]
-                    else:
-                        text = candidate["content"]["parts"][0]["text"]
-            else:
-                text = ""
-                if "parts" in candidate["content"]:
-                    for part in candidate["content"]["parts"]:
-                        if "text" in part:
+            text, reasoning_content = "", ""
+            if "parts" in candidate["content"]:
+                for part in candidate["content"]["parts"]:
+                    if "text" in part:
+                        if "thought" in part and settings.SHOW_THINKING_PROCESS:
+                            reasoning_content += part["text"]
+                        else:
                            text += part["text"]
-                            if "thought" in part and thought is None:
-                                thought = part.get("thought")
-                        elif "inlineData" in part:
-                            text += _extract_image_data(part)
+                        if "thought" in part and thought is None:
+                            thought = part.get("thought")
+                    elif "inlineData" in part:
+                        text += _extract_image_data(part)

            text = _add_search_link_text(model, candidate, text)
            tool_calls = _extract_tool_calls(
@@ -220,7 +209,7 @@ def _extract_result(
            )
        else:
            text = "暂无返回"
-    return text, tool_calls, thought
+    return text, reasoning_content, tool_calls, thought


 def _extract_image_data(part: dict) -> str:
@@ -294,7 +283,7 @@ def _extract_tool_calls(
 def _handle_gemini_stream_response(
    response: Dict[str, Any], model: str, stream: bool
 ) -> Dict[str, Any]:
-    text, tool_calls, thought = _extract_result(
+    text, reasoning_content, tool_calls, thought = _extract_result(
        response, model, stream=stream, gemini_format=True
    )
    if tool_calls:
@@ -311,16 +300,18 @@ def _handle_gemini_stream_response(
 def _handle_gemini_normal_response(
    response: Dict[str, Any], model: str, stream: bool
 ) -> Dict[str, Any]:
-    text, tool_calls, thought = _extract_result(
+    text, reasoning_content, tool_calls, thought = _extract_result(
        response, model, stream=stream, gemini_format=True
    )
+    parts = []
    if tool_calls:
-        content = {"parts": tool_calls, "role": "model"}
+        parts = tool_calls
    else:
-        part = {"text": text}
        if thought is not None:
-            part["thought"] = thought
-        content = {"parts": [part], "role": "model"}
+            parts.append({"text": reasoning_content,"thought": thought})
+        part = {"text": text}
+        parts.append(part)
+    content = {"parts": parts, "role": "model"}
    response["candidates"][0]["content"] = content
    return response

--- a/app/service/chat/gemini_chat_service.py
+++ b/app/service/chat/gemini_chat_service.py
@@ -170,7 +170,13 @@ def _build_payload(model: str, request: GeminiRequest) -> Dict[str, Any]:
        if model.endswith("-non-thinking"):
            payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": 0} 
        elif model in settings.THINKING_BUDGET_MAP:
-            payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": settings.THINKING_BUDGET_MAP.get(model,1000)}
+            if settings.SHOW_THINKING_PROCESS:
+                payload["generationConfig"]["thinkingConfig"] = {
+                    "thinkingBudget": settings.THINKING_BUDGET_MAP.get(model,1000),
+                    "includeThoughts": True
+                }
+            else:
+                payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": settings.THINKING_BUDGET_MAP.get(model,1000)}

    return payload

--- a/app/service/chat/openai_chat_service.py
+++ b/app/service/chat/openai_chat_service.py
@@ -166,9 +166,13 @@ def _build_payload(
    if request.model.endswith("-non-thinking"):
        payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": 0}
    if request.model in settings.THINKING_BUDGET_MAP:
-        payload["generationConfig"]["thinkingConfig"] = {
-            "thinkingBudget": settings.THINKING_BUDGET_MAP.get(request.model, 1000)
-        }
+        if settings.SHOW_THINKING_PROCESS:
+            payload["generationConfig"]["thinkingConfig"] = {
+                "thinkingBudget": settings.THINKING_BUDGET_MAP.get(request.model, 1000),
+                "includeThoughts": True
+            }
+        else:
+            payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": settings.THINKING_BUDGET_MAP.get(request.model, 1000)}

    if (
        instruction
--- a/app/service/chat/vertex_express_chat_service.py
+++ b/app/service/chat/vertex_express_chat_service.py
@@ -133,10 +133,26 @@ def _build_payload(model: str, request: GeminiRequest) -> Dict[str, Any]:
        payload.pop("systemInstruction")
        payload["generationConfig"]["responseModalities"] = ["Text", "Image"]
        
-    if model.endswith("-non-thinking"):
-        payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": 0} 
-    if model in settings.THINKING_BUDGET_MAP:
-        payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": settings.THINKING_BUDGET_MAP.get(model,1000)}
+    # 处理思考配置：优先使用客户端提供的配置，否则使用默认配置
+    client_thinking_config = None
+    if request.generationConfig and request.generationConfig.thinkingConfig:
+        client_thinking_config = request.generationConfig.thinkingConfig
+    
+    if client_thinking_config is not None:
+        # 客户端提供了思考配置，直接使用
+        payload["generationConfig"]["thinkingConfig"] = client_thinking_config
+    else:
+        # 客户端没有提供思考配置，使用默认配置    
+        if model.endswith("-non-thinking"):
+            payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": 0} 
+        elif model in settings.THINKING_BUDGET_MAP:
+            if settings.SHOW_THINKING_PROCESS:
+                payload["generationConfig"]["thinkingConfig"] = {
+                    "thinkingBudget": settings.THINKING_BUDGET_MAP.get(model,1000),
+                    "includeThoughts": True
+                }
+            else:
+                payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": settings.THINKING_BUDGET_MAP.get(model,1000)}

    return payload