mirror of
https://github.com/snailyp/gemini-balance.git
synced 2026-07-04 14:21:27 +08:00
Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cc36ba4c9e | ||
|
|
baf643e884 | ||
|
|
360bc9e48d | ||
|
|
c0a27d0542 | ||
|
|
84052a2179 |
@@ -10,6 +10,8 @@ SHOW_SEARCH_LINK=true
|
||||
SHOW_THINKING_PROCESS=true
|
||||
BASE_URL=https://generativelanguage.googleapis.com/v1beta
|
||||
MAX_FAILURES=10
|
||||
# 请求超时时间(秒)
|
||||
TIME_OUT=300
|
||||
#########################image_generate 相关配置###########################
|
||||
PAID_KEY=AIzaSyxxxxxxxxxxxxxxxxxxx
|
||||
CREATE_IMAGE_MODEL=imagen-3.0-generate-002
|
||||
@@ -20,6 +22,7 @@ CLOUDFLARE_IMGBED_URL=https://xxxxxxx.pages.dev/upload
|
||||
CLOUDFLARE_IMGBED_AUTH_CODE=xxxxxxxxx
|
||||
##########################################################################
|
||||
#########################stream_optimizer 相关配置########################
|
||||
STREAM_OPTIMIZER_ENABLED=false
|
||||
STREAM_MIN_DELAY=0.016
|
||||
STREAM_MAX_DELAY=0.024
|
||||
STREAM_SHORT_TEXT_THRESHOLD=10
|
||||
|
||||
24
.github/workflows/docker-publish.yml
vendored
24
.github/workflows/docker-publish.yml
vendored
@@ -2,8 +2,6 @@ name: Docker Image CI
|
||||
|
||||
on:
|
||||
push:
|
||||
# branches: [ "main" ]
|
||||
tags: [ 'v*.*.*' ]
|
||||
pull_request:
|
||||
branches: [ "main" ]
|
||||
|
||||
@@ -43,20 +41,30 @@ jobs:
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
tags: |
|
||||
type=raw,value=latest,enable={{is_default_branch}}
|
||||
# https://github.com/docker/metadata-action/tree/v5/?tab=readme-ov-file#semver
|
||||
# Event: push, Ref: refs/head/main, Tags: main
|
||||
# Event: push tag, Ref: refs/tags/v1.2.3, Tags: 1.2.3, 1.2, 1, latest
|
||||
# Event: push tag, Ref: refs/tags/v2.0.8-rc1, Tags: 2.0.8-rc1
|
||||
type=ref,event=branch
|
||||
type=semver,pattern={{version}}
|
||||
type=semver,pattern={{major}}.{{minor}}
|
||||
type=sha,format=long
|
||||
type=semver,pattern={{major}}
|
||||
labels: |
|
||||
org.opencontainers.image.description=OpenAI API Compatible Server
|
||||
org.opencontainers.image.source=${{ github.event.repository.html_url }}
|
||||
|
||||
- name: Build and push Docker image
|
||||
uses: docker/build-push-action@v5
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
file: Dockerfile
|
||||
context: .
|
||||
platforms: linux/amd64,linux/arm64
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
load: false
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
cache-from: type=gha,scope=${{ github.workflow }}
|
||||
cache-to: type=gha,scope=${{ github.workflow }}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# 🚀 FastAPI OpenAI (Gemini) 代理服务
|
||||
# 🚀 Gemini 代理服务(支持openai/gemini格式)
|
||||
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
from typing import List
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
from app.core.constants import API_VERSION, DEFAULT_CREATE_IMAGE_MODEL, DEFAULT_FILTER_MODELS, DEFAULT_MODEL, DEFAULT_STREAM_CHUNK_SIZE, DEFAULT_STREAM_LONG_TEXT_THRESHOLD, DEFAULT_STREAM_MAX_DELAY, DEFAULT_STREAM_MIN_DELAY, DEFAULT_STREAM_SHORT_TEXT_THRESHOLD
|
||||
from app.core.constants import API_VERSION, DEFAULT_CREATE_IMAGE_MODEL, DEFAULT_FILTER_MODELS, DEFAULT_MODEL, DEFAULT_STREAM_CHUNK_SIZE, DEFAULT_STREAM_LONG_TEXT_THRESHOLD, DEFAULT_STREAM_MAX_DELAY, DEFAULT_STREAM_MIN_DELAY, DEFAULT_STREAM_SHORT_TEXT_THRESHOLD, DEFAULT_TIMEOUT
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
@@ -16,6 +16,7 @@ class Settings(BaseSettings):
|
||||
AUTH_TOKEN: str = ""
|
||||
MAX_FAILURES: int = 3
|
||||
TEST_MODEL: str = DEFAULT_MODEL
|
||||
TIME_OUT: int = DEFAULT_TIMEOUT
|
||||
|
||||
# 模型相关配置
|
||||
SEARCH_MODELS: List[str] = ["gemini-2.0-flash-exp"]
|
||||
@@ -35,6 +36,7 @@ class Settings(BaseSettings):
|
||||
CLOUDFLARE_IMGBED_AUTH_CODE: str = ""
|
||||
|
||||
# 流式输出优化器配置
|
||||
STREAM_OPTIMIZER_ENABLED: bool = False
|
||||
STREAM_MIN_DELAY: float = DEFAULT_STREAM_MIN_DELAY
|
||||
STREAM_MAX_DELAY: float = DEFAULT_STREAM_MAX_DELAY
|
||||
STREAM_SHORT_TEXT_THRESHOLD: int = DEFAULT_STREAM_SHORT_TEXT_THRESHOLD
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
from typing import List, Optional, Dict, Any, Literal, Union
|
||||
from pydantic import BaseModel
|
||||
|
||||
from app.core.constants import DEFAULT_TEMPERATURE, DEFAULT_TOP_K, DEFAULT_TOP_P
|
||||
|
||||
|
||||
class SafetySetting(BaseModel):
|
||||
category: Optional[Literal["HARM_CATEGORY_HATE_SPEECH", "HARM_CATEGORY_DANGEROUS_CONTENT", "HARM_CATEGORY_HARASSMENT", "HARM_CATEGORY_SEXUALLY_EXPLICIT", "HARM_CATEGORY_CIVIC_INTEGRITY"]] = None
|
||||
@@ -13,9 +15,9 @@ class GenerationConfig(BaseModel):
|
||||
responseSchema: Optional[Dict[str, Any]] = None
|
||||
candidateCount: Optional[int] = 1
|
||||
maxOutputTokens: Optional[int] = None
|
||||
temperature: Optional[float] = None
|
||||
topP: Optional[float] = None
|
||||
topK: Optional[int] = None
|
||||
temperature: Optional[float] = DEFAULT_TEMPERATURE
|
||||
topP: Optional[float] = DEFAULT_TOP_P
|
||||
topK: Optional[int] = DEFAULT_TOP_K
|
||||
presencePenalty: Optional[float] = None
|
||||
frequencyPenalty: Optional[float] = None
|
||||
responseLogprobs: Optional[bool] = None
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from pydantic import BaseModel
|
||||
from typing import List, Optional, Union
|
||||
|
||||
from app.core.constants import DEFAULT_MAX_TOKENS, DEFAULT_MODEL, DEFAULT_TEMPERATURE, DEFAULT_TOP_K, DEFAULT_TOP_P
|
||||
from app.core.constants import DEFAULT_MODEL, DEFAULT_TEMPERATURE, DEFAULT_TOP_K, DEFAULT_TOP_P
|
||||
|
||||
|
||||
class ChatRequest(BaseModel):
|
||||
@@ -10,7 +10,7 @@ class ChatRequest(BaseModel):
|
||||
temperature: Optional[float] = DEFAULT_TEMPERATURE
|
||||
stream: Optional[bool] = False
|
||||
tools: Optional[List[dict]] = []
|
||||
max_tokens: Optional[int] = DEFAULT_MAX_TOKENS
|
||||
max_tokens: Optional[int] = None
|
||||
top_p: Optional[float] = DEFAULT_TOP_P
|
||||
top_k: Optional[int] = DEFAULT_TOP_K
|
||||
stop: Optional[List[str]] = []
|
||||
|
||||
@@ -107,15 +107,15 @@ class StreamOptimizer:
|
||||
|
||||
# 计算智能延迟时间
|
||||
delay = self.calculate_delay(len(text))
|
||||
if self.logger:
|
||||
self.logger.info(f"Text length: {len(text)}, delay: {delay:.4f}s")
|
||||
# if self.logger:
|
||||
# self.logger.info(f"Text length: {len(text)}, delay: {delay:.4f}s")
|
||||
|
||||
# 根据文本长度决定输出方式
|
||||
if len(text) >= self.long_text_threshold:
|
||||
# 长文本:分块输出
|
||||
chunks = self.split_text_into_chunks(text)
|
||||
if self.logger:
|
||||
self.logger.info(f"Long text: splitting into {len(chunks)} chunks")
|
||||
# if self.logger:
|
||||
# self.logger.info(f"Long text: splitting into {len(chunks)} chunks")
|
||||
for chunk_text in chunks:
|
||||
chunk_response = create_response_chunk(chunk_text)
|
||||
yield format_chunk(chunk_response)
|
||||
|
||||
@@ -41,7 +41,7 @@ async def list_models(
|
||||
logger.info("-" * 50 + "list_gemini_models" + "-" * 50)
|
||||
logger.info("Handling Gemini models list request")
|
||||
|
||||
api_key = await key_manager.get_next_working_key()
|
||||
api_key = await key_manager.get_first_valid_key()
|
||||
logger.info(f"Using API key: {api_key}")
|
||||
|
||||
models_json = model_service.get_gemini_models(api_key)
|
||||
@@ -86,7 +86,7 @@ async def list_models(
|
||||
async def generate_content(
|
||||
model_name: str,
|
||||
request: GeminiRequest,
|
||||
_=Depends(security_service.verify_goog_api_key),
|
||||
_=Depends(security_service.verify_key_or_goog_api_key),
|
||||
api_key: str = Depends(get_next_working_key),
|
||||
key_manager: KeyManager = Depends(get_key_manager)
|
||||
):
|
||||
@@ -118,7 +118,7 @@ async def generate_content(
|
||||
async def stream_generate_content(
|
||||
model_name: str,
|
||||
request: GeminiRequest,
|
||||
_=Depends(security_service.verify_goog_api_key),
|
||||
_=Depends(security_service.verify_key_or_goog_api_key),
|
||||
api_key: str = Depends(get_next_working_key),
|
||||
key_manager: KeyManager = Depends(get_key_manager)
|
||||
):
|
||||
|
||||
@@ -44,7 +44,7 @@ async def list_models(
|
||||
):
|
||||
logger.info("-" * 50 + "list_models" + "-" * 50)
|
||||
logger.info("Handling models list request")
|
||||
api_key = await key_manager.get_next_working_key()
|
||||
api_key = await key_manager.get_first_valid_key()
|
||||
logger.info(f"Using API key: {api_key}")
|
||||
try:
|
||||
return model_service.get_gemini_openai_models(api_key)
|
||||
|
||||
@@ -89,6 +89,11 @@ def _get_safety_settings(model: str) -> List[Dict[str, str]]:
|
||||
def _build_payload(model: str, request: GeminiRequest) -> Dict[str, Any]:
|
||||
"""构建请求payload"""
|
||||
request_dict = request.model_dump()
|
||||
if request.generationConfig:
|
||||
if request.generationConfig.maxOutputTokens is None:
|
||||
# 如果未指定最大输出长度,则不传递该字段,解决截断的问题
|
||||
request_dict["generationConfig"].pop("maxOutputTokens")
|
||||
|
||||
payload = {
|
||||
"contents": request_dict.get("contents", []),
|
||||
"tools": _build_tools(model, request_dict),
|
||||
@@ -162,9 +167,8 @@ class GeminiChatService:
|
||||
json.loads(line), model, stream=True
|
||||
)
|
||||
text = self._extract_text_from_response(response_data)
|
||||
|
||||
# 如果有文本内容,使用流式输出优化器处理
|
||||
if text:
|
||||
# 如果有文本内容,且开启了流式输出优化器,则使用流式输出优化器处理
|
||||
if text and settings.STREAM_OPTIMIZER_ENABLED:
|
||||
# 使用流式输出优化器处理文本输出
|
||||
async for (
|
||||
optimized_chunk
|
||||
|
||||
@@ -115,7 +115,6 @@ def _build_payload(
|
||||
"contents": messages,
|
||||
"generationConfig": {
|
||||
"temperature": request.temperature,
|
||||
"maxOutputTokens": request.max_tokens,
|
||||
"stopSequences": request.stop,
|
||||
"topP": request.top_p,
|
||||
"topK": request.top_k,
|
||||
@@ -123,6 +122,8 @@ def _build_payload(
|
||||
"tools": _build_tools(request, messages),
|
||||
"safetySettings": _get_safety_settings(request.model),
|
||||
}
|
||||
if request.max_tokens is not None:
|
||||
payload["generationConfig"]["maxOutputTokens"] = request.max_tokens
|
||||
if request.model.endswith("-image") or request.model.endswith("-image-generation"):
|
||||
payload["generationConfig"]["responseModalities"] = ["Text", "Image"]
|
||||
|
||||
@@ -214,7 +215,7 @@ class OpenAIChatService:
|
||||
if openai_chunk:
|
||||
# 提取文本内容
|
||||
text = self._extract_text_from_openai_chunk(openai_chunk)
|
||||
if text:
|
||||
if text and settings.STREAM_OPTIMIZER_ENABLED:
|
||||
# 使用流式输出优化器处理文本输出
|
||||
async for (
|
||||
optimized_chunk
|
||||
|
||||
@@ -81,6 +81,13 @@ class KeyManager:
|
||||
|
||||
return {"valid_keys": valid_keys, "invalid_keys": invalid_keys}
|
||||
|
||||
async def get_first_valid_key(self) -> str:
|
||||
"""获取第一个有效的API key"""
|
||||
async with self.failure_count_lock:
|
||||
for key in self.key_failure_counts:
|
||||
if self.key_failure_counts[key] < self.MAX_FAILURES:
|
||||
return key
|
||||
return self.api_keys[0]
|
||||
|
||||
_singleton_instance = None
|
||||
_singleton_lock = asyncio.Lock()
|
||||
|
||||
Reference in New Issue
Block a user