mirror of
https://github.com/snailyp/gemini-balance.git
synced 2026-06-08 17:19:48 +08:00
feat: 更新响应处理逻辑以支持推理内容
- 修改了 response_handler.py 中的 _handle_openai_stream_response 和 _handle_openai_normal_response 方法,增加了对推理内容 (reasoning_content) 的支持。 - 更新了 _extract_result 方法的返回值,确保能够提取推理内容。 - 在 gemini_chat_service.py 和 openai_chat_service.py 中,调整了生成配置以包含思考过程的选项。 - 在 vertex_express_chat_service.py 中,增强了对客户端思考配置的处理逻辑,确保优先使用客户端提供的配置。
This commit is contained in:
@@ -39,13 +39,13 @@ class GeminiResponseHandler(ResponseHandler):
|
||||
def _handle_openai_stream_response(
|
||||
response: Dict[str, Any], model: str, finish_reason: str, usage_metadata: Optional[Dict[str, Any]]
|
||||
) -> Dict[str, Any]:
|
||||
text, tool_calls, _ = _extract_result(
|
||||
text, reasoning_content, tool_calls, _ = _extract_result(
|
||||
response, model, stream=True, gemini_format=False
|
||||
)
|
||||
if not text and not tool_calls:
|
||||
if not text and not tool_calls and not reasoning_content:
|
||||
delta = {}
|
||||
else:
|
||||
delta = {"content": text, "role": "assistant"}
|
||||
delta = {"content": text, "reasoning_content": reasoning_content, "role": "assistant"}
|
||||
if tool_calls:
|
||||
delta["tool_calls"] = tool_calls
|
||||
template_chunk = {
|
||||
@@ -63,7 +63,7 @@ def _handle_openai_stream_response(
|
||||
def _handle_openai_normal_response(
|
||||
response: Dict[str, Any], model: str, finish_reason: str, usage_metadata: Optional[Dict[str, Any]]
|
||||
) -> Dict[str, Any]:
|
||||
text, tool_calls, _ = _extract_result(
|
||||
text, reasoning_content, tool_calls, _ = _extract_result(
|
||||
response, model, stream=False, gemini_format=False
|
||||
)
|
||||
return {
|
||||
@@ -77,6 +77,7 @@ def _handle_openai_normal_response(
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": text,
|
||||
"reasoning_content": reasoning_content,
|
||||
"tool_calls": tool_calls,
|
||||
},
|
||||
"finish_reason": finish_reason,
|
||||
@@ -156,19 +157,21 @@ def _extract_result(
|
||||
model: str,
|
||||
stream: bool = False,
|
||||
gemini_format: bool = False,
|
||||
) -> tuple[str, List[Dict[str, Any]], Optional[bool]]:
|
||||
text, tool_calls = "", []
|
||||
thought = None
|
||||
) -> tuple[str, Optional[str], List[Dict[str, Any]], Optional[bool]]:
|
||||
text, reasoning_content, tool_calls, thought = "", "", [], None
|
||||
if stream:
|
||||
if response.get("candidates"):
|
||||
candidate = response["candidates"][0]
|
||||
content = candidate.get("content", {})
|
||||
parts = content.get("parts", [])
|
||||
if not parts:
|
||||
return "", [], None
|
||||
return "", None, [], None
|
||||
if "text" in parts[0]:
|
||||
text = parts[0].get("text")
|
||||
if "thought" in parts[0]:
|
||||
if not gemini_format and settings.SHOW_THINKING_PROCESS:
|
||||
reasoning_content = text
|
||||
text = ""
|
||||
thought = parts[0].get("thought")
|
||||
elif "executableCode" in parts[0]:
|
||||
text = _format_code_block(parts[0]["executableCode"])
|
||||
@@ -187,32 +190,18 @@ def _extract_result(
|
||||
else:
|
||||
if response.get("candidates"):
|
||||
candidate = response["candidates"][0]
|
||||
if "thinking" in model:
|
||||
if settings.SHOW_THINKING_PROCESS:
|
||||
if len(candidate["content"]["parts"]) == 2:
|
||||
text = (
|
||||
"> thinking\n\n"
|
||||
+ candidate["content"]["parts"][0]["text"]
|
||||
+ "\n\n---\n> output\n\n"
|
||||
+ candidate["content"]["parts"][1]["text"]
|
||||
)
|
||||
else:
|
||||
text = candidate["content"]["parts"][0]["text"]
|
||||
else:
|
||||
if len(candidate["content"]["parts"]) == 2:
|
||||
text = candidate["content"]["parts"][1]["text"]
|
||||
else:
|
||||
text = candidate["content"]["parts"][0]["text"]
|
||||
else:
|
||||
text = ""
|
||||
if "parts" in candidate["content"]:
|
||||
for part in candidate["content"]["parts"]:
|
||||
if "text" in part:
|
||||
text, reasoning_content = "", ""
|
||||
if "parts" in candidate["content"]:
|
||||
for part in candidate["content"]["parts"]:
|
||||
if "text" in part:
|
||||
if "thought" in part and settings.SHOW_THINKING_PROCESS:
|
||||
reasoning_content += part["text"]
|
||||
else:
|
||||
text += part["text"]
|
||||
if "thought" in part and thought is None:
|
||||
thought = part.get("thought")
|
||||
elif "inlineData" in part:
|
||||
text += _extract_image_data(part)
|
||||
if "thought" in part and thought is None:
|
||||
thought = part.get("thought")
|
||||
elif "inlineData" in part:
|
||||
text += _extract_image_data(part)
|
||||
|
||||
text = _add_search_link_text(model, candidate, text)
|
||||
tool_calls = _extract_tool_calls(
|
||||
@@ -220,7 +209,7 @@ def _extract_result(
|
||||
)
|
||||
else:
|
||||
text = "暂无返回"
|
||||
return text, tool_calls, thought
|
||||
return text, reasoning_content, tool_calls, thought
|
||||
|
||||
|
||||
def _extract_image_data(part: dict) -> str:
|
||||
@@ -294,7 +283,7 @@ def _extract_tool_calls(
|
||||
def _handle_gemini_stream_response(
|
||||
response: Dict[str, Any], model: str, stream: bool
|
||||
) -> Dict[str, Any]:
|
||||
text, tool_calls, thought = _extract_result(
|
||||
text, reasoning_content, tool_calls, thought = _extract_result(
|
||||
response, model, stream=stream, gemini_format=True
|
||||
)
|
||||
if tool_calls:
|
||||
@@ -311,16 +300,18 @@ def _handle_gemini_stream_response(
|
||||
def _handle_gemini_normal_response(
|
||||
response: Dict[str, Any], model: str, stream: bool
|
||||
) -> Dict[str, Any]:
|
||||
text, tool_calls, thought = _extract_result(
|
||||
text, reasoning_content, tool_calls, thought = _extract_result(
|
||||
response, model, stream=stream, gemini_format=True
|
||||
)
|
||||
parts = []
|
||||
if tool_calls:
|
||||
content = {"parts": tool_calls, "role": "model"}
|
||||
parts = tool_calls
|
||||
else:
|
||||
part = {"text": text}
|
||||
if thought is not None:
|
||||
part["thought"] = thought
|
||||
content = {"parts": [part], "role": "model"}
|
||||
parts.append({"text": reasoning_content,"thought": thought})
|
||||
part = {"text": text}
|
||||
parts.append(part)
|
||||
content = {"parts": parts, "role": "model"}
|
||||
response["candidates"][0]["content"] = content
|
||||
return response
|
||||
|
||||
|
||||
@@ -170,7 +170,13 @@ def _build_payload(model: str, request: GeminiRequest) -> Dict[str, Any]:
|
||||
if model.endswith("-non-thinking"):
|
||||
payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": 0}
|
||||
elif model in settings.THINKING_BUDGET_MAP:
|
||||
payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": settings.THINKING_BUDGET_MAP.get(model,1000)}
|
||||
if settings.SHOW_THINKING_PROCESS:
|
||||
payload["generationConfig"]["thinkingConfig"] = {
|
||||
"thinkingBudget": settings.THINKING_BUDGET_MAP.get(model,1000),
|
||||
"includeThoughts": True
|
||||
}
|
||||
else:
|
||||
payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": settings.THINKING_BUDGET_MAP.get(model,1000)}
|
||||
|
||||
return payload
|
||||
|
||||
|
||||
@@ -166,9 +166,13 @@ def _build_payload(
|
||||
if request.model.endswith("-non-thinking"):
|
||||
payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": 0}
|
||||
if request.model in settings.THINKING_BUDGET_MAP:
|
||||
payload["generationConfig"]["thinkingConfig"] = {
|
||||
"thinkingBudget": settings.THINKING_BUDGET_MAP.get(request.model, 1000)
|
||||
}
|
||||
if settings.SHOW_THINKING_PROCESS:
|
||||
payload["generationConfig"]["thinkingConfig"] = {
|
||||
"thinkingBudget": settings.THINKING_BUDGET_MAP.get(request.model, 1000),
|
||||
"includeThoughts": True
|
||||
}
|
||||
else:
|
||||
payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": settings.THINKING_BUDGET_MAP.get(request.model, 1000)}
|
||||
|
||||
if (
|
||||
instruction
|
||||
|
||||
@@ -133,10 +133,26 @@ def _build_payload(model: str, request: GeminiRequest) -> Dict[str, Any]:
|
||||
payload.pop("systemInstruction")
|
||||
payload["generationConfig"]["responseModalities"] = ["Text", "Image"]
|
||||
|
||||
if model.endswith("-non-thinking"):
|
||||
payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": 0}
|
||||
if model in settings.THINKING_BUDGET_MAP:
|
||||
payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": settings.THINKING_BUDGET_MAP.get(model,1000)}
|
||||
# 处理思考配置:优先使用客户端提供的配置,否则使用默认配置
|
||||
client_thinking_config = None
|
||||
if request.generationConfig and request.generationConfig.thinkingConfig:
|
||||
client_thinking_config = request.generationConfig.thinkingConfig
|
||||
|
||||
if client_thinking_config is not None:
|
||||
# 客户端提供了思考配置,直接使用
|
||||
payload["generationConfig"]["thinkingConfig"] = client_thinking_config
|
||||
else:
|
||||
# 客户端没有提供思考配置,使用默认配置
|
||||
if model.endswith("-non-thinking"):
|
||||
payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": 0}
|
||||
elif model in settings.THINKING_BUDGET_MAP:
|
||||
if settings.SHOW_THINKING_PROCESS:
|
||||
payload["generationConfig"]["thinkingConfig"] = {
|
||||
"thinkingBudget": settings.THINKING_BUDGET_MAP.get(model,1000),
|
||||
"includeThoughts": True
|
||||
}
|
||||
else:
|
||||
payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": settings.THINKING_BUDGET_MAP.get(model,1000)}
|
||||
|
||||
return payload
|
||||
|
||||
|
||||
Reference in New Issue
Block a user