mirror of
https://github.com/snailyp/gemini-balance.git
synced 2026-05-06 20:32:47 +08:00
fix # 219
修复token的问题
This commit is contained in:
@@ -9,6 +9,9 @@ from typing import Any, Dict, List, Optional
|
||||
|
||||
from app.config.config import settings
|
||||
from app.utils.uploader import ImageUploaderFactory
|
||||
from app.log.logger import get_openai_logger
|
||||
|
||||
logger = get_openai_logger()
|
||||
|
||||
|
||||
class ResponseHandler(ABC):
|
||||
@@ -159,13 +162,16 @@ def _extract_result(
|
||||
gemini_format: bool = False,
|
||||
) -> tuple[str, Optional[str], List[Dict[str, Any]], Optional[bool]]:
|
||||
text, reasoning_content, tool_calls, thought = "", "", [], None
|
||||
|
||||
if stream:
|
||||
if response.get("candidates"):
|
||||
candidate = response["candidates"][0]
|
||||
content = candidate.get("content", {})
|
||||
parts = content.get("parts", [])
|
||||
if not parts:
|
||||
logger.warning("No parts found in stream response")
|
||||
return "", None, [], None
|
||||
|
||||
if "text" in parts[0]:
|
||||
text = parts[0].get("text")
|
||||
if "thought" in parts[0]:
|
||||
@@ -191,24 +197,38 @@ def _extract_result(
|
||||
if response.get("candidates"):
|
||||
candidate = response["candidates"][0]
|
||||
text, reasoning_content = "", ""
|
||||
if "parts" in candidate["content"]:
|
||||
for part in candidate["content"]["parts"]:
|
||||
if "text" in part:
|
||||
if "thought" in part and settings.SHOW_THINKING_PROCESS:
|
||||
reasoning_content += part["text"]
|
||||
else:
|
||||
text += part["text"]
|
||||
if "thought" in part and thought is None:
|
||||
thought = part.get("thought")
|
||||
elif "inlineData" in part:
|
||||
text += _extract_image_data(part)
|
||||
|
||||
# 使用安全的访问方式
|
||||
content = candidate.get("content", {})
|
||||
|
||||
if content and isinstance(content, dict):
|
||||
parts = content.get("parts", [])
|
||||
|
||||
if parts:
|
||||
for part in parts:
|
||||
if "text" in part:
|
||||
if "thought" in part and settings.SHOW_THINKING_PROCESS:
|
||||
reasoning_content += part["text"]
|
||||
else:
|
||||
text += part["text"]
|
||||
if "thought" in part and thought is None:
|
||||
thought = part.get("thought")
|
||||
elif "inlineData" in part:
|
||||
text += _extract_image_data(part)
|
||||
else:
|
||||
logger.warning(f"No parts found in content for model: {model}")
|
||||
else:
|
||||
logger.error(f"Invalid content structure for model: {model}")
|
||||
|
||||
text = _add_search_link_text(model, candidate, text)
|
||||
tool_calls = _extract_tool_calls(
|
||||
candidate["content"]["parts"], gemini_format
|
||||
)
|
||||
|
||||
# 安全地获取 parts 用于工具调用提取
|
||||
parts = candidate.get("content", {}).get("parts", [])
|
||||
tool_calls = _extract_tool_calls(parts, gemini_format)
|
||||
else:
|
||||
logger.warning(f"No candidates found in response for model: {model}")
|
||||
text = "暂无返回"
|
||||
|
||||
return text, reasoning_content, tool_calls, thought
|
||||
|
||||
|
||||
@@ -250,8 +270,8 @@ def _extract_tool_calls(
|
||||
return []
|
||||
|
||||
letters = string.ascii_lowercase + string.digits
|
||||
|
||||
tool_calls = list()
|
||||
|
||||
for i in range(len(parts)):
|
||||
part = parts[i]
|
||||
if not part or not isinstance(part, dict):
|
||||
@@ -260,7 +280,7 @@ def _extract_tool_calls(
|
||||
item = part.get("functionCall", {})
|
||||
if not item or not isinstance(item, dict):
|
||||
continue
|
||||
|
||||
|
||||
if gemini_format:
|
||||
tool_calls.append(part)
|
||||
else:
|
||||
|
||||
@@ -142,6 +142,23 @@ def _get_safety_settings(model: str) -> List[Dict[str, str]]:
|
||||
return settings.SAFETY_SETTINGS
|
||||
|
||||
|
||||
def _validate_and_set_max_tokens(
|
||||
payload: Dict[str, Any],
|
||||
max_tokens: Optional[int],
|
||||
logger_instance
|
||||
) -> None:
|
||||
"""验证并设置 max_tokens 参数"""
|
||||
if max_tokens is None:
|
||||
return
|
||||
|
||||
# 参数验证和处理
|
||||
if max_tokens <= 0:
|
||||
logger_instance.warning(f"Invalid max_tokens value: {max_tokens}, will not set maxOutputTokens")
|
||||
# 不设置 maxOutputTokens,让 Gemini API 使用默认值
|
||||
else:
|
||||
payload["generationConfig"]["maxOutputTokens"] = max_tokens
|
||||
|
||||
|
||||
def _build_payload(
|
||||
request: ChatRequest,
|
||||
messages: List[Dict[str, Any]],
|
||||
@@ -159,12 +176,16 @@ def _build_payload(
|
||||
"tools": _build_tools(request, messages),
|
||||
"safetySettings": _get_safety_settings(request.model),
|
||||
}
|
||||
if request.max_tokens is not None:
|
||||
payload["generationConfig"]["maxOutputTokens"] = request.max_tokens
|
||||
|
||||
# 处理 max_tokens 参数
|
||||
_validate_and_set_max_tokens(payload, request.max_tokens, logger)
|
||||
|
||||
if request.model.endswith("-image") or request.model.endswith("-image-generation"):
|
||||
payload["generationConfig"]["responseModalities"] = ["Text", "Image"]
|
||||
|
||||
if request.model.endswith("-non-thinking"):
|
||||
payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": 0}
|
||||
|
||||
if request.model in settings.THINKING_BUDGET_MAP:
|
||||
if settings.SHOW_THINKING_PROCESS:
|
||||
payload["generationConfig"]["thinkingConfig"] = {
|
||||
@@ -239,27 +260,53 @@ class OpenAIChatService:
|
||||
is_success = False
|
||||
status_code = None
|
||||
response = None
|
||||
|
||||
try:
|
||||
response = await self.api_client.generate_content(payload, model, api_key)
|
||||
usage_metadata = response.get("usageMetadata", {})
|
||||
is_success = True
|
||||
status_code = 200
|
||||
return self.response_handler.handle_response(
|
||||
response,
|
||||
model,
|
||||
stream=False,
|
||||
finish_reason="stop",
|
||||
usage_metadata=usage_metadata,
|
||||
)
|
||||
|
||||
# 尝试处理响应,捕获可能的响应处理异常
|
||||
try:
|
||||
result = self.response_handler.handle_response(
|
||||
response,
|
||||
model,
|
||||
stream=False,
|
||||
finish_reason="stop",
|
||||
usage_metadata=usage_metadata,
|
||||
)
|
||||
return result
|
||||
except Exception as response_error:
|
||||
logger.error(f"Response processing failed for model {model}: {str(response_error)}")
|
||||
|
||||
# 记录详细的错误信息
|
||||
if "parts" in str(response_error):
|
||||
logger.error("Response structure issue - missing or invalid parts")
|
||||
if response.get("candidates"):
|
||||
candidate = response["candidates"][0]
|
||||
content = candidate.get("content", {})
|
||||
logger.error(f"Content structure: {content}")
|
||||
|
||||
# 重新抛出异常
|
||||
raise response_error
|
||||
|
||||
except Exception as e:
|
||||
is_success = False
|
||||
error_log_msg = str(e)
|
||||
logger.error(f"Normal API call failed with error: {error_log_msg}")
|
||||
logger.error(f"API call failed for model {model}: {error_log_msg}")
|
||||
|
||||
# 特别记录 max_tokens 相关的错误
|
||||
gen_config = payload.get('generationConfig', {})
|
||||
if "maxOutputTokens" in gen_config:
|
||||
logger.error(f"Request had maxOutputTokens: {gen_config['maxOutputTokens']}")
|
||||
|
||||
# 如果是响应处理错误,记录更多信息
|
||||
if "parts" in error_log_msg:
|
||||
logger.error("This is likely a response processing error")
|
||||
|
||||
match = re.search(r"status code (\d+)", error_log_msg)
|
||||
if match:
|
||||
status_code = int(match.group(1))
|
||||
else:
|
||||
status_code = 500
|
||||
status_code = int(match.group(1)) if match else 500
|
||||
|
||||
await add_error_log(
|
||||
gemini_key=api_key,
|
||||
@@ -273,6 +320,8 @@ class OpenAIChatService:
|
||||
finally:
|
||||
end_time = time.perf_counter()
|
||||
latency_ms = int((end_time - start_time) * 1000)
|
||||
logger.info(f"Normal completion finished - Success: {is_success}, Latency: {latency_ms}ms")
|
||||
|
||||
await add_request_log(
|
||||
model_name=model,
|
||||
api_key=api_key,
|
||||
|
||||
@@ -7,6 +7,7 @@ from abc import ABC, abstractmethod
|
||||
from app.config.config import settings
|
||||
from app.log.logger import get_api_client_logger
|
||||
from app.core.constants import DEFAULT_TIMEOUT
|
||||
import json
|
||||
|
||||
logger = get_api_client_logger()
|
||||
|
||||
@@ -77,7 +78,7 @@ class GeminiApiClient(ApiClient):
|
||||
async def generate_content(self, payload: Dict[str, Any], model: str, api_key: str) -> Dict[str, Any]:
|
||||
timeout = httpx.Timeout(self.timeout, read=self.timeout)
|
||||
model = self._get_real_model(model)
|
||||
|
||||
|
||||
proxy_to_use = None
|
||||
if settings.PROXIES:
|
||||
if settings.PROXIES_USE_CONSISTENCY_HASH_BY_API_KEY:
|
||||
@@ -87,13 +88,35 @@ class GeminiApiClient(ApiClient):
|
||||
logger.info(f"Using proxy for getting models: {proxy_to_use}")
|
||||
|
||||
headers = self._prepare_headers()
|
||||
|
||||
async with httpx.AsyncClient(timeout=timeout, proxy=proxy_to_use) as client:
|
||||
url = f"{self.base_url}/models/{model}:generateContent?key={api_key}"
|
||||
response = await client.post(url, json=payload, headers=headers)
|
||||
if response.status_code != 200:
|
||||
error_content = response.text
|
||||
raise Exception(f"API call failed with status code {response.status_code}, {error_content}")
|
||||
return response.json()
|
||||
|
||||
try:
|
||||
response = await client.post(url, json=payload, headers=headers)
|
||||
|
||||
if response.status_code != 200:
|
||||
error_content = response.text
|
||||
logger.error(f"API call failed - Status: {response.status_code}, Content: {error_content}")
|
||||
raise Exception(f"API call failed with status code {response.status_code}, {error_content}")
|
||||
|
||||
response_data = response.json()
|
||||
|
||||
# 检查响应结构的基本信息
|
||||
if not response_data.get("candidates"):
|
||||
logger.warning("No candidates found in API response")
|
||||
|
||||
return response_data
|
||||
|
||||
except httpx.TimeoutException as e:
|
||||
logger.error(f"Request timeout: {e}")
|
||||
raise Exception(f"Request timeout: {e}")
|
||||
except httpx.RequestError as e:
|
||||
logger.error(f"Request error: {e}")
|
||||
raise Exception(f"Request error: {e}")
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error: {e}")
|
||||
raise
|
||||
|
||||
async def stream_generate_content(self, payload: Dict[str, Any], model: str, api_key: str) -> AsyncGenerator[str, None]:
|
||||
timeout = httpx.Timeout(self.timeout, read=self.timeout)
|
||||
|
||||
Reference in New Issue
Block a user