fix # 219

修复token的问题
2026-05-06 20:32:47 +08:00 · 2025-07-13 00:06:44 +08:00
parent 0b837c3f80
commit 445ef49dc8
3 changed files with 128 additions and 36 deletions
--- a/app/handler/response_handler.py
+++ b/app/handler/response_handler.py
@@ -9,6 +9,9 @@ from typing import Any, Dict, List, Optional

 from app.config.config import settings
 from app.utils.uploader import ImageUploaderFactory
+from app.log.logger import get_openai_logger
+
+logger = get_openai_logger()


 class ResponseHandler(ABC):
@@ -159,13 +162,16 @@ def _extract_result(
    gemini_format: bool = False,
 ) -> tuple[str, Optional[str], List[Dict[str, Any]], Optional[bool]]:
    text, reasoning_content, tool_calls, thought = "", "", [], None
+    
    if stream:
        if response.get("candidates"):
            candidate = response["candidates"][0]
            content = candidate.get("content", {})
            parts = content.get("parts", [])
            if not parts:
+                logger.warning("No parts found in stream response")
                return "", None, [], None
+            
            if "text" in parts[0]:
                text = parts[0].get("text")
                if "thought" in parts[0]:
@@ -191,24 +197,38 @@ def _extract_result(
        if response.get("candidates"):
            candidate = response["candidates"][0]
            text, reasoning_content = "", ""
-            if "parts" in candidate["content"]:
-                for part in candidate["content"]["parts"]:
-                    if "text" in part:
-                        if "thought" in part and settings.SHOW_THINKING_PROCESS:
-                            reasoning_content += part["text"]
-                        else:
-                            text += part["text"]
-                        if "thought" in part and thought is None:
-                            thought = part.get("thought")
-                    elif "inlineData" in part:
-                        text += _extract_image_data(part)
+            
+            # 使用安全的访问方式
+            content = candidate.get("content", {})
+            
+            if content and isinstance(content, dict):
+                parts = content.get("parts", [])
+                
+                if parts:
+                    for part in parts:
+                        if "text" in part:
+                            if "thought" in part and settings.SHOW_THINKING_PROCESS:
+                                reasoning_content += part["text"]
+                            else:
+                                text += part["text"]
+                            if "thought" in part and thought is None:
+                                thought = part.get("thought")
+                        elif "inlineData" in part:
+                            text += _extract_image_data(part)
+                else:
+                    logger.warning(f"No parts found in content for model: {model}")
+            else:
+                logger.error(f"Invalid content structure for model: {model}")

            text = _add_search_link_text(model, candidate, text)
-            tool_calls = _extract_tool_calls(
-                candidate["content"]["parts"], gemini_format
-            )
+            
+            # 安全地获取 parts 用于工具调用提取
+            parts = candidate.get("content", {}).get("parts", [])
+            tool_calls = _extract_tool_calls(parts, gemini_format)
        else:
+            logger.warning(f"No candidates found in response for model: {model}")
            text = "暂无返回"
+    
    return text, reasoning_content, tool_calls, thought


@@ -250,8 +270,8 @@ def _extract_tool_calls(
        return []

    letters = string.ascii_lowercase + string.digits
-
    tool_calls = list()
+    
    for i in range(len(parts)):
        part = parts[i]
        if not part or not isinstance(part, dict):
@@ -260,7 +280,7 @@ def _extract_tool_calls(
        item = part.get("functionCall", {})
        if not item or not isinstance(item, dict):
            continue
-
+        
        if gemini_format:
            tool_calls.append(part)
        else:
--- a/app/service/chat/openai_chat_service.py
+++ b/app/service/chat/openai_chat_service.py
@@ -142,6 +142,23 @@ def _get_safety_settings(model: str) -> List[Dict[str, str]]:
    return settings.SAFETY_SETTINGS


+def _validate_and_set_max_tokens(
+    payload: Dict[str, Any], 
+    max_tokens: Optional[int], 
+    logger_instance
+) -> None:
+    """验证并设置 max_tokens 参数"""
+    if max_tokens is None:
+        return
+    
+    # 参数验证和处理
+    if max_tokens <= 0:
+        logger_instance.warning(f"Invalid max_tokens value: {max_tokens}, will not set maxOutputTokens")
+        # 不设置 maxOutputTokens，让 Gemini API 使用默认值
+    else:
+        payload["generationConfig"]["maxOutputTokens"] = max_tokens
+
+
 def _build_payload(
    request: ChatRequest,
    messages: List[Dict[str, Any]],
@@ -159,12 +176,16 @@ def _build_payload(
        "tools": _build_tools(request, messages),
        "safetySettings": _get_safety_settings(request.model),
    }
-    if request.max_tokens is not None:
-        payload["generationConfig"]["maxOutputTokens"] = request.max_tokens
+    
+    # 处理 max_tokens 参数
+    _validate_and_set_max_tokens(payload, request.max_tokens, logger)
+    
    if request.model.endswith("-image") or request.model.endswith("-image-generation"):
        payload["generationConfig"]["responseModalities"] = ["Text", "Image"]
+    
    if request.model.endswith("-non-thinking"):
        payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": 0}
+    
    if request.model in settings.THINKING_BUDGET_MAP:
        if settings.SHOW_THINKING_PROCESS:
            payload["generationConfig"]["thinkingConfig"] = {
@@ -239,27 +260,53 @@ class OpenAIChatService:
        is_success = False
        status_code = None
        response = None
+        
        try:
            response = await self.api_client.generate_content(payload, model, api_key)
            usage_metadata = response.get("usageMetadata", {})
            is_success = True
            status_code = 200
-            return self.response_handler.handle_response(
-                response,
-                model,
-                stream=False,
-                finish_reason="stop",
-                usage_metadata=usage_metadata,
-            )
+            
+            # 尝试处理响应，捕获可能的响应处理异常
+            try:
+                result = self.response_handler.handle_response(
+                    response,
+                    model,
+                    stream=False,
+                    finish_reason="stop",
+                    usage_metadata=usage_metadata,
+                )
+                return result
+            except Exception as response_error:
+                logger.error(f"Response processing failed for model {model}: {str(response_error)}")
+                
+                # 记录详细的错误信息
+                if "parts" in str(response_error):
+                    logger.error("Response structure issue - missing or invalid parts")
+                    if response.get("candidates"):
+                        candidate = response["candidates"][0]
+                        content = candidate.get("content", {})
+                        logger.error(f"Content structure: {content}")
+                
+                # 重新抛出异常
+                raise response_error
+                
        except Exception as e:
            is_success = False
            error_log_msg = str(e)
-            logger.error(f"Normal API call failed with error: {error_log_msg}")
+            logger.error(f"API call failed for model {model}: {error_log_msg}")
+            
+            # 特别记录 max_tokens 相关的错误
+            gen_config = payload.get('generationConfig', {})
+            if "maxOutputTokens" in gen_config:
+                logger.error(f"Request had maxOutputTokens: {gen_config['maxOutputTokens']}")
+            
+            # 如果是响应处理错误，记录更多信息
+            if "parts" in error_log_msg:
+                logger.error("This is likely a response processing error")
+            
            match = re.search(r"status code (\d+)", error_log_msg)
-            if match:
-                status_code = int(match.group(1))
-            else:
-                status_code = 500
+            status_code = int(match.group(1)) if match else 500

            await add_error_log(
                gemini_key=api_key,
@@ -273,6 +320,8 @@ class OpenAIChatService:
        finally:
            end_time = time.perf_counter()
            latency_ms = int((end_time - start_time) * 1000)
+            logger.info(f"Normal completion finished - Success: {is_success}, Latency: {latency_ms}ms")
+            
            await add_request_log(
                model_name=model,
                api_key=api_key,
--- a/app/service/client/api_client.py
+++ b/app/service/client/api_client.py
@@ -7,6 +7,7 @@ from abc import ABC, abstractmethod
 from app.config.config import settings
 from app.log.logger import get_api_client_logger
 from app.core.constants import DEFAULT_TIMEOUT
+import json

 logger = get_api_client_logger()

@@ -77,7 +78,7 @@ class GeminiApiClient(ApiClient):
    async def generate_content(self, payload: Dict[str, Any], model: str, api_key: str) -> Dict[str, Any]:
        timeout = httpx.Timeout(self.timeout, read=self.timeout)
        model = self._get_real_model(model)
-
+        
        proxy_to_use = None
        if settings.PROXIES:
            if settings.PROXIES_USE_CONSISTENCY_HASH_BY_API_KEY:
@@ -87,13 +88,35 @@ class GeminiApiClient(ApiClient):
            logger.info(f"Using proxy for getting models: {proxy_to_use}")
            
        headers = self._prepare_headers()
+        
        async with httpx.AsyncClient(timeout=timeout, proxy=proxy_to_use) as client:
            url = f"{self.base_url}/models/{model}:generateContent?key={api_key}"
-            response = await client.post(url, json=payload, headers=headers)
-            if response.status_code != 200:
-                error_content = response.text
-                raise Exception(f"API call failed with status code {response.status_code}, {error_content}")
-            return response.json()
+            
+            try:
+                response = await client.post(url, json=payload, headers=headers)
+                
+                if response.status_code != 200:
+                    error_content = response.text
+                    logger.error(f"API call failed - Status: {response.status_code}, Content: {error_content}")
+                    raise Exception(f"API call failed with status code {response.status_code}, {error_content}")
+                
+                response_data = response.json()
+                
+                # 检查响应结构的基本信息
+                if not response_data.get("candidates"):
+                    logger.warning("No candidates found in API response")
+                
+                return response_data
+                
+            except httpx.TimeoutException as e:
+                logger.error(f"Request timeout: {e}")
+                raise Exception(f"Request timeout: {e}")
+            except httpx.RequestError as e:
+                logger.error(f"Request error: {e}")
+                raise Exception(f"Request error: {e}")
+            except Exception as e:
+                logger.error(f"Unexpected error: {e}")
+                raise

    async def stream_generate_content(self, payload: Dict[str, Any], model: str, api_key: str) -> AsyncGenerator[str, None]:
        timeout = httpx.Timeout(self.timeout, read=self.timeout)