Files
BiliNote/backend/app/transcriber/groq.py
huangjianwu 41f17592c2 fix(backend): 部署韧性——模型自愈/就绪门禁/全局代理/启动诊断
- whisper: model.bin 截断/损坏时删目录重下重试一次,修「Unable to
  open file model.bin」死循环;mlx 同样按 config.json 判完整性
- /generate_note 加就绪门禁:本地转写引擎模型没下好直接拦截,返回
  reason=transcriber_model_not_ready,不让任务静默卡在首次下载
- 全局代理:新增 ProxyConfigManager(JSON 配置 + HTTP_PROXY env 兜底)
  + build_openai_client,统一注入代理到 LLM/Groq 客户端;yt-dlp 与
  youtube-transcript-api 也走代理
- build_openai_client 校验 api_key 非空,空 key 给「xxx 的 API Key
  未配置」而不是天书般的 Illegal header value b'Bearer '
- universal_gpt: 模型拒绝自定义 temperature(o1/o3/gpt-5 系列)时
  就地去掉参数重试,不消耗重试预算
- connect_test 改用真实 chat completion 而非 /v1/models 探测
- main.py: lifespan 拆 [startup 1/5..5/5] 分段日志 + 异常清晰定位
- /sys_health 重构为结构化返回 {backend,ffmpeg,db,whisper_model}

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 19:01:14 +08:00

71 lines
2.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from abc import ABC
import os
from app.decorators.timeit import timeit
from app.models.transcriber_model import TranscriptResult, TranscriptSegment
from app.services.provider import ProviderService
from app.transcriber.base import Transcriber
from app.utils.openai_client import build_openai_client
import ffmpeg
import tempfile
from dotenv import load_dotenv
load_dotenv()
MAX_SIZE_MB = 18
MAX_SIZE_BYTES = MAX_SIZE_MB * 1024 * 1024
def compress_audio(input_path: str, target_bitrate='64k') -> str:
output_fd, output_path = tempfile.mkstemp(suffix=".mp3") # 临时输出文件
os.close(output_fd) # 关闭文件描述符ffmpeg 会用路径操作
ffmpeg.input(input_path).output(output_path, audio_bitrate=target_bitrate).run(quiet=True, overwrite_output=True)
return output_path
class GroqTranscriber(Transcriber, ABC):
@timeit
def transcript(self, file_path: str) -> TranscriptResult:
file_size = os.path.getsize(file_path)
if file_size > MAX_SIZE_BYTES:
print(f"文件超过 {MAX_SIZE_MB}MB开始压缩当前 {round(file_size / (1024 * 1024), 2)}MB...")
file_path = compress_audio(file_path)
print(f"压缩完成,临时路径:{file_path}")
provider = ProviderService.get_provider_by_id('groq')
if not provider:
raise Exception("Groq 供应商未配置,请配置以后使用。")
# build_openai_client 会校验 api_key 非空(空 key 会抛天书般的
# `Illegal header value b'Bearer '`),并自动注入全局代理
client = build_openai_client(
api_key=provider.get('api_key'),
base_url=provider.get('base_url'),
key_label="Groq 转写引擎的 API Key",
)
filename = file_path
with open(filename, "rb") as file:
transcription = client.audio.transcriptions.create(
file=(filename, file.read()),
model=os.getenv('GROQ_TRANSCRIBER_MODEL'),
response_format="verbose_json",
)
print(transcription.text)
print(transcription)
segments = []
full_text = ""
for seg in transcription.segments:
text = seg.text.strip()
full_text += text + " "
segments.append(TranscriptSegment(
start=seg.start,
end=seg.end,
text=text
))
result = TranscriptResult(
language=transcription.language,
full_text=full_text.strip(),
segments=segments,
raw=transcription.to_dict()
)
return result