feat: 新增请求超时配置及优化模型列表接口api_key获取方式

1. 新增功能:
   - 在`.env.example`中添加`TIME_OUT=300`配置项(包含中文注释)
   - 在`Settings`类中增加`TIME_OUT`字段(读取自`DEFAULT_TIMEOUT`)

2. 优化内容:
   - 生成配置:
     * 为`GenerationConfig`设置默认温度/TOP_P/TOP_K值
     * 移除`maxOutputTokens`默认值,改为可选传递
   - OpenAI请求:
     * 移除`max_tokens`默认值
     * 只有当`max_tokens`有值时才添加到请求payload
   - 日志优化:
     * 注释掉`stream_optimizer.py`中部分调试日志

3. 模型列表接口api_key获取方式
This commit is contained in:
snaily
2025-04-03 03:12:59 +08:00
parent 360bc9e48d
commit baf643e884
10 changed files with 31 additions and 13 deletions

View File

@@ -10,6 +10,8 @@ SHOW_SEARCH_LINK=true
SHOW_THINKING_PROCESS=true
BASE_URL=https://generativelanguage.googleapis.com/v1beta
MAX_FAILURES=10
# 请求超时时间(秒)
TIME_OUT=300
#########################image_generate 相关配置###########################
PAID_KEY=AIzaSyxxxxxxxxxxxxxxxxxxx
CREATE_IMAGE_MODEL=imagen-3.0-generate-002

View File

@@ -4,7 +4,7 @@
from typing import List
from pydantic_settings import BaseSettings
from app.core.constants import API_VERSION, DEFAULT_CREATE_IMAGE_MODEL, DEFAULT_FILTER_MODELS, DEFAULT_MODEL, DEFAULT_STREAM_CHUNK_SIZE, DEFAULT_STREAM_LONG_TEXT_THRESHOLD, DEFAULT_STREAM_MAX_DELAY, DEFAULT_STREAM_MIN_DELAY, DEFAULT_STREAM_SHORT_TEXT_THRESHOLD
from app.core.constants import API_VERSION, DEFAULT_CREATE_IMAGE_MODEL, DEFAULT_FILTER_MODELS, DEFAULT_MODEL, DEFAULT_STREAM_CHUNK_SIZE, DEFAULT_STREAM_LONG_TEXT_THRESHOLD, DEFAULT_STREAM_MAX_DELAY, DEFAULT_STREAM_MIN_DELAY, DEFAULT_STREAM_SHORT_TEXT_THRESHOLD, DEFAULT_TIMEOUT
class Settings(BaseSettings):
@@ -16,6 +16,7 @@ class Settings(BaseSettings):
AUTH_TOKEN: str = ""
MAX_FAILURES: int = 3
TEST_MODEL: str = DEFAULT_MODEL
TIME_OUT: int = DEFAULT_TIMEOUT
# 模型相关配置
SEARCH_MODELS: List[str] = ["gemini-2.0-flash-exp"]

View File

@@ -1,6 +1,8 @@
from typing import List, Optional, Dict, Any, Literal, Union
from pydantic import BaseModel
from app.core.constants import DEFAULT_TEMPERATURE, DEFAULT_TOP_K, DEFAULT_TOP_P
class SafetySetting(BaseModel):
category: Optional[Literal["HARM_CATEGORY_HATE_SPEECH", "HARM_CATEGORY_DANGEROUS_CONTENT", "HARM_CATEGORY_HARASSMENT", "HARM_CATEGORY_SEXUALLY_EXPLICIT", "HARM_CATEGORY_CIVIC_INTEGRITY"]] = None
@@ -13,9 +15,9 @@ class GenerationConfig(BaseModel):
responseSchema: Optional[Dict[str, Any]] = None
candidateCount: Optional[int] = 1
maxOutputTokens: Optional[int] = None
temperature: Optional[float] = None
topP: Optional[float] = None
topK: Optional[int] = None
temperature: Optional[float] = DEFAULT_TEMPERATURE
topP: Optional[float] = DEFAULT_TOP_P
topK: Optional[int] = DEFAULT_TOP_K
presencePenalty: Optional[float] = None
frequencyPenalty: Optional[float] = None
responseLogprobs: Optional[bool] = None

View File

@@ -1,7 +1,7 @@
from pydantic import BaseModel
from typing import List, Optional, Union
from app.core.constants import DEFAULT_MAX_TOKENS, DEFAULT_MODEL, DEFAULT_TEMPERATURE, DEFAULT_TOP_K, DEFAULT_TOP_P
from app.core.constants import DEFAULT_MODEL, DEFAULT_TEMPERATURE, DEFAULT_TOP_K, DEFAULT_TOP_P
class ChatRequest(BaseModel):
@@ -10,7 +10,7 @@ class ChatRequest(BaseModel):
temperature: Optional[float] = DEFAULT_TEMPERATURE
stream: Optional[bool] = False
tools: Optional[List[dict]] = []
max_tokens: Optional[int] = DEFAULT_MAX_TOKENS
max_tokens: Optional[int] = None
top_p: Optional[float] = DEFAULT_TOP_P
top_k: Optional[int] = DEFAULT_TOP_K
stop: Optional[List[str]] = []

View File

@@ -107,15 +107,15 @@ class StreamOptimizer:
# 计算智能延迟时间
delay = self.calculate_delay(len(text))
if self.logger:
self.logger.info(f"Text length: {len(text)}, delay: {delay:.4f}s")
# if self.logger:
# self.logger.info(f"Text length: {len(text)}, delay: {delay:.4f}s")
# 根据文本长度决定输出方式
if len(text) >= self.long_text_threshold:
# 长文本:分块输出
chunks = self.split_text_into_chunks(text)
if self.logger:
self.logger.info(f"Long text: splitting into {len(chunks)} chunks")
# if self.logger:
# self.logger.info(f"Long text: splitting into {len(chunks)} chunks")
for chunk_text in chunks:
chunk_response = create_response_chunk(chunk_text)
yield format_chunk(chunk_response)

View File

@@ -41,7 +41,7 @@ async def list_models(
logger.info("-" * 50 + "list_gemini_models" + "-" * 50)
logger.info("Handling Gemini models list request")
api_key = await key_manager.get_next_working_key()
api_key = await key_manager.get_first_valid_key()
logger.info(f"Using API key: {api_key}")
models_json = model_service.get_gemini_models(api_key)

View File

@@ -44,7 +44,7 @@ async def list_models(
):
logger.info("-" * 50 + "list_models" + "-" * 50)
logger.info("Handling models list request")
api_key = await key_manager.get_next_working_key()
api_key = await key_manager.get_first_valid_key()
logger.info(f"Using API key: {api_key}")
try:
return model_service.get_gemini_openai_models(api_key)

View File

@@ -89,6 +89,11 @@ def _get_safety_settings(model: str) -> List[Dict[str, str]]:
def _build_payload(model: str, request: GeminiRequest) -> Dict[str, Any]:
"""构建请求payload"""
request_dict = request.model_dump()
if request.generationConfig:
if request.generationConfig.maxOutputTokens is None:
# 如果未指定最大输出长度,则不传递该字段,解决截断的问题
request_dict["generationConfig"].pop("maxOutputTokens")
payload = {
"contents": request_dict.get("contents", []),
"tools": _build_tools(model, request_dict),

View File

@@ -115,7 +115,6 @@ def _build_payload(
"contents": messages,
"generationConfig": {
"temperature": request.temperature,
"maxOutputTokens": request.max_tokens,
"stopSequences": request.stop,
"topP": request.top_p,
"topK": request.top_k,
@@ -123,6 +122,8 @@ def _build_payload(
"tools": _build_tools(request, messages),
"safetySettings": _get_safety_settings(request.model),
}
if request.max_tokens is not None:
payload["generationConfig"]["maxOutputTokens"] = request.max_tokens
if request.model.endswith("-image") or request.model.endswith("-image-generation"):
payload["generationConfig"]["responseModalities"] = ["Text", "Image"]

View File

@@ -81,6 +81,13 @@ class KeyManager:
return {"valid_keys": valid_keys, "invalid_keys": invalid_keys}
async def get_first_valid_key(self) -> str:
"""获取第一个有效的API key"""
async with self.failure_count_lock:
for key in self.key_failure_counts:
if self.key_failure_counts[key] < self.MAX_FAILURES:
return key
return self.api_keys[0]
_singleton_instance = None
_singleton_lock = asyncio.Lock()