diff --git a/VERSION b/VERSION index b370e25..b88e500 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.1.8 \ No newline at end of file +2.1.9 \ No newline at end of file diff --git a/app/core/constants.py b/app/core/constants.py index 21d72aa..cbdfb8f 100644 --- a/app/core/constants.py +++ b/app/core/constants.py @@ -76,4 +76,15 @@ DEFAULT_SAFETY_SETTINGS = [ {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "OFF"}, {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"}, {"category": "HARM_CATEGORY_CIVIC_INTEGRITY", "threshold": "BLOCK_NONE"}, - ] \ No newline at end of file + ] + +TTS_VOICE_NAMES = [ + "Zephyr", "Puck", "Charon", "Kore", + "Fenrir", "Leda", "Orus", "Aoede", + "Callirhoe", "Autonoe", "Enceladus", "Iapetus", + "Umbriel", "Algieba", "Despina", "Erinome", + "Algenib", "Rasalgethi", "Laomedeia", "Achernar", + "Alnilam", "Schedar", "Gacrux", "Pulcherrima", + "Achird", "Zubenelgenubi", "Vindemiatrix", "Sadachbia", + "Sadaltager", "Sulafat" +] \ No newline at end of file diff --git a/app/handler/response_handler.py b/app/handler/response_handler.py index c0517b4..95f631c 100644 --- a/app/handler/response_handler.py +++ b/app/handler/response_handler.py @@ -39,13 +39,13 @@ class GeminiResponseHandler(ResponseHandler): def _handle_openai_stream_response( response: Dict[str, Any], model: str, finish_reason: str, usage_metadata: Optional[Dict[str, Any]] ) -> Dict[str, Any]: - text, tool_calls, _ = _extract_result( + text, reasoning_content, tool_calls, _ = _extract_result( response, model, stream=True, gemini_format=False ) - if not text and not tool_calls: + if not text and not tool_calls and not reasoning_content: delta = {} else: - delta = {"content": text, "role": "assistant"} + delta = {"content": text, "reasoning_content": reasoning_content, "role": "assistant"} if tool_calls: delta["tool_calls"] = tool_calls template_chunk = { @@ -63,7 +63,7 @@ def _handle_openai_stream_response( def _handle_openai_normal_response( response: Dict[str, Any], model: str, finish_reason: str, usage_metadata: Optional[Dict[str, Any]] ) -> Dict[str, Any]: - text, tool_calls, _ = _extract_result( + text, reasoning_content, tool_calls, _ = _extract_result( response, model, stream=False, gemini_format=False ) return { @@ -77,6 +77,7 @@ def _handle_openai_normal_response( "message": { "role": "assistant", "content": text, + "reasoning_content": reasoning_content, "tool_calls": tool_calls, }, "finish_reason": finish_reason, @@ -156,19 +157,21 @@ def _extract_result( model: str, stream: bool = False, gemini_format: bool = False, -) -> tuple[str, List[Dict[str, Any]], Optional[bool]]: - text, tool_calls = "", [] - thought = None +) -> tuple[str, Optional[str], List[Dict[str, Any]], Optional[bool]]: + text, reasoning_content, tool_calls, thought = "", "", [], None if stream: if response.get("candidates"): candidate = response["candidates"][0] content = candidate.get("content", {}) parts = content.get("parts", []) if not parts: - return "", [], None + return "", None, [], None if "text" in parts[0]: text = parts[0].get("text") if "thought" in parts[0]: + if not gemini_format and settings.SHOW_THINKING_PROCESS: + reasoning_content = text + text = "" thought = parts[0].get("thought") elif "executableCode" in parts[0]: text = _format_code_block(parts[0]["executableCode"]) @@ -187,32 +190,18 @@ def _extract_result( else: if response.get("candidates"): candidate = response["candidates"][0] - if "thinking" in model: - if settings.SHOW_THINKING_PROCESS: - if len(candidate["content"]["parts"]) == 2: - text = ( - "> thinking\n\n" - + candidate["content"]["parts"][0]["text"] - + "\n\n---\n> output\n\n" - + candidate["content"]["parts"][1]["text"] - ) - else: - text = candidate["content"]["parts"][0]["text"] - else: - if len(candidate["content"]["parts"]) == 2: - text = candidate["content"]["parts"][1]["text"] - else: - text = candidate["content"]["parts"][0]["text"] - else: - text = "" - if "parts" in candidate["content"]: - for part in candidate["content"]["parts"]: - if "text" in part: + text, reasoning_content = "", "" + if "parts" in candidate["content"]: + for part in candidate["content"]["parts"]: + if "text" in part: + if "thought" in part and settings.SHOW_THINKING_PROCESS: + reasoning_content += part["text"] + else: text += part["text"] - if "thought" in part and thought is None: - thought = part.get("thought") - elif "inlineData" in part: - text += _extract_image_data(part) + if "thought" in part and thought is None: + thought = part.get("thought") + elif "inlineData" in part: + text += _extract_image_data(part) text = _add_search_link_text(model, candidate, text) tool_calls = _extract_tool_calls( @@ -220,7 +209,7 @@ def _extract_result( ) else: text = "暂无返回" - return text, tool_calls, thought + return text, reasoning_content, tool_calls, thought def _extract_image_data(part: dict) -> str: @@ -294,7 +283,7 @@ def _extract_tool_calls( def _handle_gemini_stream_response( response: Dict[str, Any], model: str, stream: bool ) -> Dict[str, Any]: - text, tool_calls, thought = _extract_result( + text, reasoning_content, tool_calls, thought = _extract_result( response, model, stream=stream, gemini_format=True ) if tool_calls: @@ -311,16 +300,18 @@ def _handle_gemini_stream_response( def _handle_gemini_normal_response( response: Dict[str, Any], model: str, stream: bool ) -> Dict[str, Any]: - text, tool_calls, thought = _extract_result( + text, reasoning_content, tool_calls, thought = _extract_result( response, model, stream=stream, gemini_format=True ) + parts = [] if tool_calls: - content = {"parts": tool_calls, "role": "model"} + parts = tool_calls else: - part = {"text": text} if thought is not None: - part["thought"] = thought - content = {"parts": [part], "role": "model"} + parts.append({"text": reasoning_content,"thought": thought}) + part = {"text": text} + parts.append(part) + content = {"parts": parts, "role": "model"} response["candidates"][0]["content"] = content return response diff --git a/app/service/chat/gemini_chat_service.py b/app/service/chat/gemini_chat_service.py index ca49ca8..edb719a 100644 --- a/app/service/chat/gemini_chat_service.py +++ b/app/service/chat/gemini_chat_service.py @@ -50,6 +50,33 @@ def _extract_file_references(contents: List[Dict[str, Any]]) -> List[str]: logger.info(f"Found file reference: {file_id}") return file_names +def _clean_json_schema_properties(obj: Any) -> Any: + """清理JSON Schema中Gemini API不支持的字段""" + if not isinstance(obj, dict): + return obj + + # Gemini API不支持的JSON Schema字段 + unsupported_fields = { + "exclusiveMaximum", "exclusiveMinimum", "const", "examples", + "contentEncoding", "contentMediaType", "if", "then", "else", + "allOf", "anyOf", "oneOf", "not", "definitions", "$schema", + "$id", "$ref", "$comment", "readOnly", "writeOnly" + } + + cleaned = {} + for key, value in obj.items(): + if key in unsupported_fields: + continue + if isinstance(value, dict): + cleaned[key] = _clean_json_schema_properties(value) + elif isinstance(value, list): + cleaned[key] = [_clean_json_schema_properties(item) for item in value] + else: + cleaned[key] = value + + return cleaned + + def _build_tools(model: str, payload: Dict[str, Any]) -> List[Dict[str, Any]]: """构建工具""" @@ -62,7 +89,15 @@ def _build_tools(model: str, payload: Dict[str, Any]) -> List[Dict[str, Any]]: for k, v in item.items(): if k == "functionDeclarations" and v and isinstance(v, list): functions = record.get("functionDeclarations", []) - functions.extend(v) + # 清理每个函数声明中的不支持字段 + cleaned_functions = [] + for func in v: + if isinstance(func, dict): + cleaned_func = _clean_json_schema_properties(func) + cleaned_functions.append(cleaned_func) + else: + cleaned_functions.append(func) + functions.extend(cleaned_functions) record["functionDeclarations"] = functions else: record[k] = v @@ -136,6 +171,10 @@ def _build_payload(model: str, request: GeminiRequest) -> Dict[str, Any]: "systemInstruction": request_dict.get("systemInstruction"), } + # 确保 generationConfig 不为 None + if payload["generationConfig"] is None: + payload["generationConfig"] = {} + if model.endswith("-image") or model.endswith("-image-generation"): payload.pop("systemInstruction") payload["generationConfig"]["responseModalities"] = ["Text", "Image"] @@ -153,7 +192,13 @@ def _build_payload(model: str, request: GeminiRequest) -> Dict[str, Any]: if model.endswith("-non-thinking"): payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": 0} elif model in settings.THINKING_BUDGET_MAP: - payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": settings.THINKING_BUDGET_MAP.get(model,1000)} + if settings.SHOW_THINKING_PROCESS: + payload["generationConfig"]["thinkingConfig"] = { + "thinkingBudget": settings.THINKING_BUDGET_MAP.get(model,1000), + "includeThoughts": True + } + else: + payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": settings.THINKING_BUDGET_MAP.get(model,1000)} return payload diff --git a/app/service/chat/openai_chat_service.py b/app/service/chat/openai_chat_service.py index d2866a0..2a7a067 100644 --- a/app/service/chat/openai_chat_service.py +++ b/app/service/chat/openai_chat_service.py @@ -26,16 +26,43 @@ from app.service.key.key_manager import KeyManager logger = get_openai_logger() -def _has_media_parts(contents: List[Dict[str, Any]]) -> bool: - """判断消息是否包含图片、音频或视频部分 (inline_data)""" - for content in contents: - if content and "parts" in content and isinstance(content["parts"], list): - for part in content["parts"]: - if isinstance(part, dict) and "inline_data" in part: +def _has_media_parts(messages: List[Dict[str, Any]]) -> bool: + """判断消息是否包含多媒体部分""" + for message in messages: + if "parts" in message: + for part in message["parts"]: + if "image_url" in part or "inline_data" in part: return True return False +def _clean_json_schema_properties(obj: Any) -> Any: + """清理JSON Schema中Gemini API不支持的字段""" + if not isinstance(obj, dict): + return obj + + # Gemini API不支持的JSON Schema字段 + unsupported_fields = { + "exclusiveMaximum", "exclusiveMinimum", "const", "examples", + "contentEncoding", "contentMediaType", "if", "then", "else", + "allOf", "anyOf", "oneOf", "not", "definitions", "$schema", + "$id", "$ref", "$comment", "readOnly", "writeOnly" + } + + cleaned = {} + for key, value in obj.items(): + if key in unsupported_fields: + continue + if isinstance(value, dict): + cleaned[key] = _clean_json_schema_properties(value) + elif isinstance(value, list): + cleaned[key] = [_clean_json_schema_properties(item) for item in value] + else: + cleaned[key] = value + + return cleaned + + def _build_tools( request: ChatRequest, messages: List[Dict[str, Any]] ) -> List[Dict[str, Any]]: @@ -76,6 +103,8 @@ def _build_tools( ): function.pop("parameters", None) + # 清理函数中的不支持字段 + function = _clean_json_schema_properties(function) function_declarations.append(function) if function_declarations: @@ -137,9 +166,13 @@ def _build_payload( if request.model.endswith("-non-thinking"): payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": 0} if request.model in settings.THINKING_BUDGET_MAP: - payload["generationConfig"]["thinkingConfig"] = { - "thinkingBudget": settings.THINKING_BUDGET_MAP.get(request.model, 1000) - } + if settings.SHOW_THINKING_PROCESS: + payload["generationConfig"]["thinkingConfig"] = { + "thinkingBudget": settings.THINKING_BUDGET_MAP.get(request.model, 1000), + "includeThoughts": True + } + else: + payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": settings.THINKING_BUDGET_MAP.get(request.model, 1000)} if ( instruction diff --git a/app/service/chat/vertex_express_chat_service.py b/app/service/chat/vertex_express_chat_service.py index 313cb89..e62394c 100644 --- a/app/service/chat/vertex_express_chat_service.py +++ b/app/service/chat/vertex_express_chat_service.py @@ -28,6 +28,33 @@ def _has_image_parts(contents: List[Dict[str, Any]]) -> bool: return False +def _clean_json_schema_properties(obj: Any) -> Any: + """清理JSON Schema中Gemini API不支持的字段""" + if not isinstance(obj, dict): + return obj + + # Gemini API不支持的JSON Schema字段 + unsupported_fields = { + "exclusiveMaximum", "exclusiveMinimum", "const", "examples", + "contentEncoding", "contentMediaType", "if", "then", "else", + "allOf", "anyOf", "oneOf", "not", "definitions", "$schema", + "$id", "$ref", "$comment", "readOnly", "writeOnly" + } + + cleaned = {} + for key, value in obj.items(): + if key in unsupported_fields: + continue + if isinstance(value, dict): + cleaned[key] = _clean_json_schema_properties(value) + elif isinstance(value, list): + cleaned[key] = [_clean_json_schema_properties(item) for item in value] + else: + cleaned[key] = value + + return cleaned + + def _build_tools(model: str, payload: Dict[str, Any]) -> List[Dict[str, Any]]: """构建工具""" @@ -40,7 +67,15 @@ def _build_tools(model: str, payload: Dict[str, Any]) -> List[Dict[str, Any]]: for k, v in item.items(): if k == "functionDeclarations" and v and isinstance(v, list): functions = record.get("functionDeclarations", []) - functions.extend(v) + # 清理每个函数声明中的不支持字段 + cleaned_functions = [] + for func in v: + if isinstance(func, dict): + cleaned_func = _clean_json_schema_properties(func) + cleaned_functions.append(cleaned_func) + else: + cleaned_functions.append(func) + functions.extend(cleaned_functions) record["functionDeclarations"] = functions else: record[k] = v @@ -98,10 +133,26 @@ def _build_payload(model: str, request: GeminiRequest) -> Dict[str, Any]: payload.pop("systemInstruction") payload["generationConfig"]["responseModalities"] = ["Text", "Image"] - if model.endswith("-non-thinking"): - payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": 0} - if model in settings.THINKING_BUDGET_MAP: - payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": settings.THINKING_BUDGET_MAP.get(model,1000)} + # 处理思考配置:优先使用客户端提供的配置,否则使用默认配置 + client_thinking_config = None + if request.generationConfig and request.generationConfig.thinkingConfig: + client_thinking_config = request.generationConfig.thinkingConfig + + if client_thinking_config is not None: + # 客户端提供了思考配置,直接使用 + payload["generationConfig"]["thinkingConfig"] = client_thinking_config + else: + # 客户端没有提供思考配置,使用默认配置 + if model.endswith("-non-thinking"): + payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": 0} + elif model in settings.THINKING_BUDGET_MAP: + if settings.SHOW_THINKING_PROCESS: + payload["generationConfig"]["thinkingConfig"] = { + "thinkingBudget": settings.THINKING_BUDGET_MAP.get(model,1000), + "includeThoughts": True + } + else: + payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": settings.THINKING_BUDGET_MAP.get(model,1000)} return payload diff --git a/app/service/client/api_client.py b/app/service/client/api_client.py index 2b672f9..dd297a5 100644 --- a/app/service/client/api_client.py +++ b/app/service/client/api_client.py @@ -21,10 +21,6 @@ class ApiClient(ABC): async def stream_generate_content(self, payload: Dict[str, Any], model: str, api_key: str) -> AsyncGenerator[str, None]: pass - @abstractmethod - async def count_tokens(self, payload: Dict[str, Any], model: str, api_key: str) -> Dict[str, Any]: - pass - class GeminiApiClient(ApiClient): """Gemini API客户端""" diff --git a/app/service/tts/tts_service.py b/app/service/tts/tts_service.py index 99a6074..b7c19f0 100644 --- a/app/service/tts/tts_service.py +++ b/app/service/tts/tts_service.py @@ -8,6 +8,7 @@ from typing import Optional from google import genai from app.config.config import settings +from app.core.constants import TTS_VOICE_NAMES from app.database.services import add_error_log, add_request_log from app.domain.openai_models import TTSRequest from app.log.logger import get_openai_logger @@ -47,7 +48,7 @@ class TTSService: "speech_config": { "voice_config": { "prebuilt_voice_config": { - "voice_name": settings.TTS_VOICE_NAME + "voice_name": request.voice if request.voice in TTS_VOICE_NAMES else settings.TTS_VOICE_NAME } } },