From 91aca101fd53afb551cd06922e53810ff5b688eb Mon Sep 17 00:00:00 2001
From: yinpeng <2291314224@qq.com>
Date: Thu, 12 Dec 2024 14:49:16 +0800
Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E6=8F=90=E4=BA=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 main.py | 246 +++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 145 insertions(+), 101 deletions(-)

diff --git a/main.py b/main.py
index 9197d96..f4a227a 100644
--- a/main.py
+++ b/main.py
@@ -40,40 +40,56 @@ API_KEYS = config.settings.API_KEYS
 
 # 创建一个循环迭代器
 key_cycle = cycle(API_KEYS)
-key_lock = asyncio.Lock()
+
+# 创建两个独立的锁
+key_cycle_lock = asyncio.Lock()
+failure_count_lock = asyncio.Lock()
 
 # 添加key失败计数记录
 key_failure_counts = {key: 0 for key in API_KEYS}
-MAX_FAILURES = 1  # 最大失败次数阈值
+MAX_FAILURES = 10  # 最大失败次数阈值
+MAX_RETRIES = 3  # 最大重试次数
 
 
+async def get_next_key():
+    """仅获取下一个key,不检查失败次数"""
+    async with key_cycle_lock:
+        return next(key_cycle)
+
+async def is_key_valid(key):
+    """检查key是否有效"""
+    async with failure_count_lock:
+        return key_failure_counts[key] < MAX_FAILURES
+
+async def reset_failure_counts():
+    """重置所有key的失败计数"""
+    async with failure_count_lock:
+        for key in key_failure_counts:
+            key_failure_counts[key] = 0
+
 async def get_next_working_key():
     """获取下一个可用的API key"""
-    async with key_lock:
-        current_key = next(key_cycle)
-        initial_key = current_key
-
-        while key_failure_counts[current_key] >= MAX_FAILURES:
-            current_key = next(key_cycle)
-            if current_key == initial_key:  # 已经循环了一圈
-                # 重置所有失败计数
-                for key in key_failure_counts:
-                    key_failure_counts[key] = 0
-                break
-
-        return current_key
-
+    initial_key = await get_next_key()
+    current_key = initial_key
+    
+    while True:
+        if await is_key_valid(current_key):
+            return current_key
+            
+        current_key = await get_next_key()
+        if current_key == initial_key:  # 已经循环了一圈
+            await reset_failure_counts()
+            return current_key
 
 async def handle_api_failure(api_key):
     """处理API调用失败"""
-    async with key_lock:
+    async with failure_count_lock:
         key_failure_counts[api_key] += 1
         if key_failure_counts[api_key] >= MAX_FAILURES:
-            logger.warning(
-                f"API key {api_key} has failed {MAX_FAILURES} times, switching to next key"
-            )
-            return await get_next_working_key()
-    return api_key
+            logger.warning(f"API key {api_key} has failed {MAX_FAILURES} times, switching to next key")
+    
+    # 在锁外获取新的key
+    return await get_next_working_key()
 
 
 class ChatRequest(BaseModel):
@@ -206,9 +222,8 @@ def convert_gemini_response_to_openai(response, model, stream=False):
 @app.get("/hf/v1/models")
 async def list_models(authorization: str = Header(None)):
     await verify_authorization(authorization)
-    async with key_lock:
-        api_key = next(key_cycle)
-        logger.info(f"Using API key: {api_key}")
+    api_key = await get_next_working_key()
+    logger.info(f"Using API key: {api_key}")
     try:
         response = get_gemini_models(api_key)
         logger.info("Successfully retrieved models list")
@@ -223,95 +238,124 @@ async def list_models(authorization: str = Header(None)):
 async def chat_completion(request: ChatRequest, authorization: str = Header(None)):
     await verify_authorization(authorization)
     api_key = await get_next_working_key()
-    logger.info(f"Using API key: {api_key}")
+    logger.info(f"Chat completion request - Model: {request.model}")
+    retries = 0
+    
+    while retries < MAX_RETRIES:
+        try:
+            logger.info(f"Attempt {retries + 1} with API key: {api_key}")
+            
+            if request.model in config.settings.MODEL_SEARCH:
+                # Gemini API调用部分
+                gemini_messages = convert_messages_to_gemini_format(request.messages)
+                # 调用Gemini API
+                payload = {
+                    "contents": gemini_messages,
+                    "generationConfig": {
+                        "temperature": request.temperature,
+                    },
+                    "tools": [{"googleSearch": {}}],
+                }
+                
+                if request.stream:
+                    logger.info("Streaming response enabled")
 
-    try:
-        logger.info(f"Chat completion request - Model: {request.model}")
-        if request.model in config.settings.MODEL_SEARCH:
-            # 转换消息格式
-            gemini_messages = convert_messages_to_gemini_format(request.messages)
-
-            # 调用Gemini API
-            non_stream_url = f"https://generativelanguage.googleapis.com/v1beta/models/{request.model}:generateContent?key={api_key}"
-            stream_url = f"https://generativelanguage.googleapis.com/v1beta/models/{request.model}:streamGenerateContent?alt=sse&key={api_key}"
-            payload = {
-                "contents": gemini_messages,
-                "generationConfig": {
-                    "temperature": request.temperature,
-                },
-                "tools": [{"googleSearch": {}}],
-            }
-
-            if request.stream:
+                    async def generate():
+                        nonlocal api_key, retries
+                        while retries < MAX_RETRIES:
+                            try:
+                                async with httpx.AsyncClient() as client:
+                                    stream_url = f"https://generativelanguage.googleapis.com/v1beta/models/{request.model}:streamGenerateContent?alt=sse&key={api_key}"
+                                    async with client.stream("POST", stream_url, json=payload) as response:
+                                        if response.status_code == 429:
+                                            logger.warning(f"Rate limit reached for key: {api_key}")
+                                            api_key = await handle_api_failure(api_key)
+                                            logger.info(f"Retrying with new API key: {api_key}")
+                                            retries += 1
+                                            if retries >= MAX_RETRIES:
+                                                yield f"data: {json.dumps({'error': 'Max retries reached'})}\n\n"
+                                                break
+                                            continue
+                                            
+                                        if response.status_code != 200:
+                                            logger.error(f"Error in streaming response: {response.status_code}")
+                                            yield f"data: {json.dumps({'error': f'API error: {response.status_code}'})}\n\n"
+                                            break
+                                            
+                                        async for line in response.aiter_lines():
+                                            if line.startswith("data: "):
+                                                try:
+                                                    chunk = json.loads(line[6:])
+                                                    openai_chunk = convert_gemini_response_to_openai(
+                                                        chunk, request.model, stream=True
+                                                    )
+                                                    if openai_chunk:
+                                                        yield f"data: {json.dumps(openai_chunk)}\n\n"
+                                                except json.JSONDecodeError:
+                                                    continue
+                                        yield "data: [DONE]\n\n"
+                                        return
+                            except Exception as e:
+                                logger.error(f"Stream error: {str(e)}")
+                                api_key = await handle_api_failure(api_key)
+                                retries += 1
+                                if retries >= MAX_RETRIES:
+                                    yield f"data: {json.dumps({'error': 'Max retries reached'})}\n\n"
+                                    break
+                                continue
+                                
+                    return StreamingResponse(content=generate(), media_type="text/event-stream")
+                else:
+                    # 非流式响应
+                    async with httpx.AsyncClient() as client:
+                        non_stream_url = f"https://generativelanguage.googleapis.com/v1beta/models/{request.model}:generateContent?key={api_key}"
+                        response = await client.post(non_stream_url, json=payload)
+                        gemini_response = response.json()
+                        logger.info("Chat completion successful")
+                        return convert_gemini_response_to_openai(gemini_response, request.model)
+            
+            # OpenAI API调用部分
+            client = openai.OpenAI(api_key=api_key, base_url=config.settings.BASE_URL)
+            response = client.chat.completions.create(
+                model=request.model,
+                messages=request.messages,
+                temperature=request.temperature,
+                stream=request.stream if hasattr(request, "stream") else False,
+            )
+            
+            if hasattr(request, "stream") and request.stream:
                 logger.info("Streaming response enabled")
 
                 async def generate():
-                    async with httpx.AsyncClient() as client:
-                        async with client.stream(
-                            "POST", stream_url, json=payload
-                        ) as response:
-                            async for line in response.aiter_lines():
-                                if line.startswith("data: "):
-                                    try:
-                                        chunk = json.loads(line[6:])
-                                        openai_chunk = (
-                                            convert_gemini_response_to_openai(
-                                                chunk, request.model, stream=True
-                                            )
-                                        )
-                                        if openai_chunk:
-                                            yield f"data: {json.dumps(openai_chunk)}\n\n"
-                                    except json.JSONDecodeError:
-                                        continue
-                        yield "data: [DONE]\n\n"
-
-                return StreamingResponse(
-                    content=generate(), media_type="text/event-stream"
-                )
-            else:
-                # 非流式响应
-                async with httpx.AsyncClient() as client:
-                    response = await client.post(non_stream_url, json=payload)
-                    gemini_response = response.json()
-                    openai_response = convert_gemini_response_to_openai(
-                        gemini_response, request.model
-                    )
-
+                    for chunk in response:
+                        yield f"data: {chunk.model_dump_json()}\n\n"
                 logger.info("Chat completion successful")
-                return openai_response
-        client = openai.OpenAI(api_key=api_key, base_url=config.settings.BASE_URL)
-        response = client.chat.completions.create(
-            model=request.model,
-            messages=request.messages,
-            temperature=request.temperature,
-            stream=request.stream if hasattr(request, "stream") else False,
-        )
+                return StreamingResponse(content=generate(), media_type="text/event-stream")
+            
+            logger.info("Chat completion successful")
+            return response
 
-        if hasattr(request, "stream") and request.stream:
-            logger.info("Streaming response enabled")
+        except Exception as e:
+            logger.error(f"Error in chat completion: {str(e)}")
+            api_key = await handle_api_failure(api_key)
+            retries += 1
+            
+            if retries >= MAX_RETRIES:
+                logger.error("Max retries reached, giving up")
+                raise HTTPException(status_code=500, detail="Max retries reached with all available API keys")
+            
+            logger.info(f"Retrying with new API key: {api_key}")
+            continue
 
-            async def generate():
-                for chunk in response:
-                    yield f"data: {chunk.model_dump_json()}\n\n"
-
-            return StreamingResponse(content=generate(), media_type="text/event-stream")
-
-        logger.info("Chat completion successful")
-        return response
-
-    except Exception as e:
-        logger.error(f"Error in chat completion: {str(e)}")
-        api_key = await handle_api_failure(api_key)  # 处理失败并可能切换key
-        raise HTTPException(status_code=500, detail=str(e))
+    raise HTTPException(status_code=500, detail="Unexpected error in chat completion")
 
 
 @app.post("/v1/embeddings")
 @app.post("/hf/v1/embeddings")
 async def embedding(request: EmbeddingRequest, authorization: str = Header(None)):
     await verify_authorization(authorization)
-    async with key_lock:
-        api_key = next(key_cycle)
-        logger.info(f"Using API key: {api_key}")
+    api_key = await get_next_working_key()
+    logger.info(f"Using API key: {api_key}")
 
     try:
         client = openai.OpenAI(api_key=api_key, base_url=config.settings.BASE_URL)