Files
gemini-balance/app/service/embedding/gemini_embedding_service.py
snaily 708fb1604b feat(config): 新增错误日志请求体记录开关(默认关闭)
- 新增环境变量 ERROR_LOG_RECORD_REQUEST_BODY,默认 false
- Settings 增加该配置,并在各服务写入错误日志时按开关决定是否
  入库请求体,降低敏感信息泄露风险
- 配置编辑页新增对应开关,前端初始化默认值;.env.example、
  README/README_ZH 同步更新
- db: add_error_log 支持 None 请求体并更稳健解析字符串/字典
- perf(db): 将错误日志批量删除 batch_size 从 500 下调到 200,
  兼容 SQLite/MySQL 参数上限并提升稳定性
- docs: 补充 aliyun_oss 上传提供商与 OSS 配置示例
- style: 轻微代码格式化与导入顺序优化
2025-09-18 04:21:28 +08:00

142 lines
4.8 KiB
Python

# app/service/embedding/gemini_embedding_service.py
import datetime
import time
from typing import Any, Dict
from app.config.config import settings
from app.database.services import add_error_log, add_request_log
from app.domain.gemini_models import GeminiBatchEmbedRequest, GeminiEmbedRequest
from app.log.logger import get_gemini_embedding_logger
from app.service.client.api_client import GeminiApiClient
from app.service.key.key_manager import KeyManager
logger = get_gemini_embedding_logger()
def _build_embed_payload(request: GeminiEmbedRequest) -> Dict[str, Any]:
"""构建嵌入请求payload"""
payload = {"content": request.content.model_dump()}
if request.taskType:
payload["taskType"] = request.taskType
if request.title:
payload["title"] = request.title
if request.outputDimensionality:
payload["outputDimensionality"] = request.outputDimensionality
return payload
def _build_batch_embed_payload(
request: GeminiBatchEmbedRequest, model: str
) -> Dict[str, Any]:
"""构建批量嵌入请求payload"""
requests = []
for embed_request in request.requests:
embed_payload = _build_embed_payload(embed_request)
embed_payload["model"] = (
f"models/{model}" # Gemini API要求每个请求包含model字段
)
requests.append(embed_payload)
return {"requests": requests}
class GeminiEmbeddingService:
"""Gemini嵌入服务"""
def __init__(self, base_url: str, key_manager: KeyManager):
self.api_client = GeminiApiClient(base_url, settings.TIME_OUT)
self.key_manager = key_manager
async def embed_content(
self, model: str, request: GeminiEmbedRequest, api_key: str
) -> Dict[str, Any]:
"""生成单一嵌入内容"""
payload = _build_embed_payload(request)
start_time = time.perf_counter()
request_datetime = datetime.datetime.now()
is_success = False
status_code = None
response = None
try:
response = await self.api_client.embed_content(payload, model, api_key)
is_success = True
status_code = 200
return response
except Exception as e:
is_success = False
status_code = e.args[0]
error_log_msg = e.args[1]
logger.error(f"Single embedding API call failed: {error_log_msg}")
await add_error_log(
gemini_key=api_key,
model_name=model,
error_type="gemini-embed-single",
error_log=error_log_msg,
error_code=status_code,
request_msg=payload if settings.ERROR_LOG_RECORD_REQUEST_BODY else None,
request_datetime=request_datetime,
)
raise e
finally:
end_time = time.perf_counter()
latency_ms = int((end_time - start_time) * 1000)
await add_request_log(
model_name=model,
api_key=api_key,
is_success=is_success,
status_code=status_code,
latency_ms=latency_ms,
request_time=request_datetime,
)
async def batch_embed_contents(
self, model: str, request: GeminiBatchEmbedRequest, api_key: str
) -> Dict[str, Any]:
"""生成批量嵌入内容"""
payload = _build_batch_embed_payload(request, model)
start_time = time.perf_counter()
request_datetime = datetime.datetime.now()
is_success = False
status_code = None
response = None
try:
response = await self.api_client.batch_embed_contents(
payload, model, api_key
)
is_success = True
status_code = 200
return response
except Exception as e:
is_success = False
status_code = e.args[0]
error_log_msg = e.args[1]
logger.error(f"Batch embedding API call failed: {error_log_msg}")
await add_error_log(
gemini_key=api_key,
model_name=model,
error_type="gemini-embed-batch",
error_log=error_log_msg,
error_code=status_code,
request_msg=payload if settings.ERROR_LOG_RECORD_REQUEST_BODY else None,
request_datetime=request_datetime,
)
raise e
finally:
end_time = time.perf_counter()
latency_ms = int((end_time - start_time) * 1000)
await add_request_log(
model_name=model,
api_key=api_key,
is_success=is_success,
status_code=status_code,
latency_ms=latency_ms,
request_time=request_datetime,
)