From 41f17592c23f16ba32002c79200dc626f199c791 Mon Sep 17 00:00:00 2001 From: huangjianwu Date: Thu, 14 May 2026 19:01:14 +0800 Subject: [PATCH] =?UTF-8?q?fix(backend):=20=E9=83=A8=E7=BD=B2=E9=9F=A7?= =?UTF-8?q?=E6=80=A7=E2=80=94=E2=80=94=E6=A8=A1=E5=9E=8B=E8=87=AA=E6=84=88?= =?UTF-8?q?/=E5=B0=B1=E7=BB=AA=E9=97=A8=E7=A6=81/=E5=85=A8=E5=B1=80?= =?UTF-8?q?=E4=BB=A3=E7=90=86/=E5=90=AF=E5=8A=A8=E8=AF=8A=E6=96=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - whisper: model.bin 截断/损坏时删目录重下重试一次,修「Unable to open file model.bin」死循环;mlx 同样按 config.json 判完整性 - /generate_note 加就绪门禁:本地转写引擎模型没下好直接拦截,返回 reason=transcriber_model_not_ready,不让任务静默卡在首次下载 - 全局代理:新增 ProxyConfigManager(JSON 配置 + HTTP_PROXY env 兜底) + build_openai_client,统一注入代理到 LLM/Groq 客户端;yt-dlp 与 youtube-transcript-api 也走代理 - build_openai_client 校验 api_key 非空,空 key 给「xxx 的 API Key 未配置」而不是天书般的 Illegal header value b'Bearer ' - universal_gpt: 模型拒绝自定义 temperature(o1/o3/gpt-5 系列)时 就地去掉参数重试,不消耗重试预算 - connect_test 改用真实 chat completion 而非 /v1/models 探测 - main.py: lifespan 拆 [startup 1/5..5/5] 分段日志 + 异常清晰定位 - /sys_health 重构为结构化返回 {backend,ffmpeg,db,whisper_model} Co-Authored-By: Claude Opus 4.7 (1M context) --- backend/app/downloaders/youtube_downloader.py | 12 ++ backend/app/downloaders/youtube_subtitle.py | 17 +- backend/app/gpt/deepseek_gpt.py | 4 +- .../provider/OpenAI_compatible_provider.py | 36 ++-- backend/app/gpt/universal_gpt.py | 35 +++- backend/app/routers/config.py | 184 +++++++++++++++--- backend/app/routers/note.py | 18 ++ backend/app/routers/provider.py | 4 +- backend/app/services/model.py | 49 +++-- backend/app/services/proxy_config_manager.py | 60 ++++++ .../services/transcriber_config_manager.py | 51 +++++ backend/app/transcriber/groq.py | 10 +- .../transcriber/mlx_whisper_transcriber.py | 13 +- backend/app/transcriber/whisper.py | 46 +++-- backend/app/utils/openai_client.py | 45 +++++ backend/main.py | 42 +++- 16 files changed, 534 insertions(+), 92 deletions(-) create mode 100644 backend/app/services/proxy_config_manager.py create mode 100644 backend/app/utils/openai_client.py diff --git a/backend/app/downloaders/youtube_downloader.py b/backend/app/downloaders/youtube_downloader.py index bb8ed8a..d3de680 100644 --- a/backend/app/downloaders/youtube_downloader.py +++ b/backend/app/downloaders/youtube_downloader.py @@ -9,12 +9,22 @@ from app.downloaders.base import Downloader, DownloadQuality from app.downloaders.youtube_subtitle import YouTubeSubtitleFetcher from app.models.notes_model import AudioDownloadResult from app.models.transcriber_model import TranscriptResult +from app.services.proxy_config_manager import ProxyConfigManager from app.utils.path_helper import get_data_dir from app.utils.url_parser import extract_video_id logger = logging.getLogger(__name__) +def _apply_proxy(ydl_opts: dict) -> dict: + """YouTube 在国内需要代理。配置了全局代理就塞进 yt-dlp opts。""" + proxy = ProxyConfigManager().get_proxy_url() + if proxy: + ydl_opts['proxy'] = proxy + logger.info(f"yt-dlp 走代理: {proxy}") + return ydl_opts + + class YoutubeDownloader(Downloader, ABC): def __init__(self): @@ -46,6 +56,7 @@ class YoutubeDownloader(Downloader, ABC): if skip_download: ydl_opts['skip_download'] = True + _apply_proxy(ydl_opts) with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(video_url, download=not skip_download) video_id = info.get("id") @@ -91,6 +102,7 @@ class YoutubeDownloader(Downloader, ABC): 'merge_output_format': 'mp4', # 确保合并成 mp4 } + _apply_proxy(ydl_opts) with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(video_url, download=True) video_id = info.get("id") diff --git a/backend/app/downloaders/youtube_subtitle.py b/backend/app/downloaders/youtube_subtitle.py index d59a871..81a559a 100644 --- a/backend/app/downloaders/youtube_subtitle.py +++ b/backend/app/downloaders/youtube_subtitle.py @@ -8,6 +8,7 @@ from typing import Optional, List from youtube_transcript_api import YouTubeTranscriptApi from app.models.transcriber_model import TranscriptResult, TranscriptSegment +from app.services.proxy_config_manager import ProxyConfigManager from app.utils.logger import get_logger logger = get_logger(__name__) @@ -17,7 +18,21 @@ class YouTubeSubtitleFetcher: """通过 youtube-transcript-api 获取 YouTube 字幕。""" def __init__(self): - self._api = YouTubeTranscriptApi() + # 配了全局代理就给 youtube-transcript-api 套一个带 proxies 的 requests.Session, + # 否则国内拉字幕同样会超时。代理未配置时退回默认无代理客户端。 + proxy = ProxyConfigManager().get_proxy_url() + if proxy: + try: + import requests + session = requests.Session() + session.proxies = {"http": proxy, "https": proxy} + self._api = YouTubeTranscriptApi(http_client=session) + logger.info(f"YouTube 字幕走代理: {proxy}") + except Exception as e: + logger.warning(f"为 youtube-transcript-api 注入代理失败,回退无代理: {e}") + self._api = YouTubeTranscriptApi() + else: + self._api = YouTubeTranscriptApi() def fetch_subtitles( self, diff --git a/backend/app/gpt/deepseek_gpt.py b/backend/app/gpt/deepseek_gpt.py index 2341425..1c230b8 100644 --- a/backend/app/gpt/deepseek_gpt.py +++ b/backend/app/gpt/deepseek_gpt.py @@ -1,6 +1,6 @@ from typing import List from app.gpt.base import GPT -from openai import OpenAI +from app.utils.openai_client import build_openai_client from app.gpt.prompt import BASE_PROMPT, AI_SUM, SCREENSHOT from app.gpt.utils import fix_markdown from app.models.gpt_model import GPTSource @@ -15,7 +15,7 @@ class DeepSeekGPT(GPT): self.base_url = getenv("DEEP_SEEK_API_BASE_URL") self.model=getenv('DEEP_SEEK_MODEL') print(self.model) - self.client = OpenAI(api_key=self.api_key, base_url=self.base_url) + self.client = build_openai_client(self.api_key, self.base_url, key_label="DeepSeek 的 API Key") self.screenshot = False def _format_time(self, seconds: float) -> str: diff --git a/backend/app/gpt/provider/OpenAI_compatible_provider.py b/backend/app/gpt/provider/OpenAI_compatible_provider.py index a3af2d1..5ed279e 100644 --- a/backend/app/gpt/provider/OpenAI_compatible_provider.py +++ b/backend/app/gpt/provider/OpenAI_compatible_provider.py @@ -1,13 +1,13 @@ from typing import Optional, Union -from openai import OpenAI - from app.utils.logger import get_logger +from app.utils.openai_client import build_openai_client logging= get_logger(__name__) class OpenAICompatibleProvider: def __init__(self, api_key: str, base_url: str, model: Union[str, None]=None): - self.client = OpenAI(api_key=api_key, base_url=base_url) + # build_openai_client:注入全局代理 + 校验 api_key 非空 + self.client = build_openai_client(api_key, base_url, key_label="模型供应商的 API Key") self.model = model @property @@ -15,17 +15,27 @@ class OpenAICompatibleProvider: return self.client @staticmethod - def test_connection(api_key: str, base_url: str) -> bool: + def test_connection(api_key: str, base_url: str, model: str) -> bool: + """发一条最小化 chat completion 验证 key / base_url / model 三方都通。 + + 为什么不用 client.models.list(): + - 部分代理 / 自建供应商不实现 /v1/models(如某些 OpenAI 兼容网关) + - 部分供应商 key 在没有 inference 权限时 /v1/models 仍返回 200 + 最终用户跑的就是 chat.completions.create,所以直接测它最忠实。 + max_tokens=1 + temperature=0 让请求开销 < 0.0001 美元、延迟 < 2s。 + """ try: - client = OpenAI(api_key=api_key, base_url=base_url) - model = client.models.list() - # for segment in model: - # print(segment) - # print(model) - logging.info("连通性测试成功") + client = build_openai_client( + api_key, base_url, key_label="模型供应商的 API Key", timeout=15.0, + ) + client.chat.completions.create( + model=model, + messages=[{"role": "user", "content": "ping"}], + max_tokens=1, + temperature=0, + ) + logging.info(f"连通性测试成功(model={model})") return True except Exception as e: - logging.info(f"连通性测试失败:{e}") - - # print(f"Error connecting to OpenAI API: {e}") + logging.warning(f"连通性测试失败(model={model}):{e}") return False \ No newline at end of file diff --git a/backend/app/gpt/universal_gpt.py b/backend/app/gpt/universal_gpt.py index 739225f..514bf4a 100644 --- a/backend/app/gpt/universal_gpt.py +++ b/backend/app/gpt/universal_gpt.py @@ -185,15 +185,40 @@ class UniversalGPT(GPT): status = getattr(exc, "status_code", None) or getattr(exc, "status", None) return status in {408, 409, 429, 500, 502, 503, 504, 524} + @staticmethod + def _is_temperature_unsupported_error(exc: Exception) -> bool: + """OpenAI o1/o3/gpt-5 系列等新模型不接受自定义 temperature, + 只允许默认值 1,传 0.7 会报 `'temperature' does not support 0.7 ...`。""" + raw = str(exc).lower() + return "temperature" in raw and ( + "does not support" in raw + or "unsupported_value" in raw + or "only the default" in raw + ) + + def _do_create(self, messages: list): + """单次调用。如果模型拒绝自定义 temperature,就地去掉该参数再试一次 + (不消耗外层的重试次数预算),仍失败则把异常抛给外层重试逻辑。""" + try: + return self.client.chat.completions.create( + model=self.model, + messages=messages, + temperature=self.temperature, + ) + except Exception as exc: + if self._is_temperature_unsupported_error(exc): + print(f"[universal_gpt] 模型 {self.model} 不支持自定义 temperature,改用默认值重试") + return self.client.chat.completions.create( + model=self.model, + messages=messages, + ) + raise + def _chat_completion_create(self, messages: list): last_exc = None for attempt in range(self._max_retry_attempts): try: - return self.client.chat.completions.create( - model=self.model, - messages=messages, - temperature=self.temperature - ) + return self._do_create(messages) except Exception as exc: last_exc = exc if attempt == self._max_retry_attempts - 1 or not self._is_retryable_error(exc): diff --git a/backend/app/routers/config.py b/backend/app/routers/config.py index 0fecc48..97687f6 100644 --- a/backend/app/routers/config.py +++ b/backend/app/routers/config.py @@ -80,6 +80,36 @@ def update_transcriber_config(data: TranscriberConfigRequest): return R.success(data=config) +# ---- 全局代理配置(作用于 LLM API + 转写 API + yt-dlp 下载)---- + +class ProxyConfigRequest(BaseModel): + enabled: bool + url: Optional[str] = None + + +@router.get("/proxy_config") +def get_proxy_config(): + from app.services.proxy_config_manager import ProxyConfigManager + mgr = ProxyConfigManager() + cfg = mgr.get_config() + # effective 给前端展示「当前实际生效的代理」——可能来自配置,也可能来自 env 兜底 + return R.success(data={ + **cfg, + "effective": mgr.get_proxy_url() or "", + }) + + +@router.post("/proxy_config") +def update_proxy_config(data: ProxyConfigRequest): + from app.services.proxy_config_manager import ProxyConfigManager + mgr = ProxyConfigManager() + cfg = mgr.update_config(enabled=data.enabled, url=data.url) + return R.success(data={ + **cfg, + "effective": mgr.get_proxy_url() or "", + }) + + # ---- Whisper 模型下载状态 & 下载触发 ---- # 用于跟踪正在进行的下载任务 @@ -87,10 +117,33 @@ _downloading: dict[str, str] = {} # model_size -> status ("downloading" | "done def _check_whisper_model_exists(model_size: str, subdir: str = "whisper") -> bool: - """检查指定 whisper 模型是否已下载到本地。""" + """检查指定 whisper 模型是否已下载完整到本地。 + + 必须 model.bin 落盘才算完成,仅有空目录或半成品不能算「已下载」—— + 否则监控页会显示绿勾但加载时报「Unable to open file 'model.bin'」。 + """ model_dir = get_model_dir(subdir) model_path = os.path.join(model_dir, f"whisper-{model_size}") - return Path(model_path).exists() + return (Path(model_path) / "model.bin").exists() + + +def _check_mlx_whisper_model_exists(model_size: str) -> bool: + """检查 mlx-whisper 模型是否已下载完整到本地。 + + 与 fast-whisper 的目录布局不同:mlx 模型按 HuggingFace repo_id + (如 mlx-community/whisper-tiny-mlx)落盘,且没有 model.bin, + 用 config.json 作为「下载完成」的判据,和 mlx_whisper_transcriber.py 保持一致。 + """ + try: + from app.transcriber.mlx_whisper_transcriber import MLX_MODEL_MAP + except Exception: + return False + repo_id = MLX_MODEL_MAP.get(model_size) + if not repo_id: + return False + model_dir = get_model_dir("mlx-whisper") + model_path = os.path.join(model_dir, repo_id) + return (Path(model_path) / "config.json").exists() @router.get("/transcriber_models_status") @@ -113,11 +166,9 @@ def get_transcriber_models_status(): from app.transcriber.mlx_whisper_transcriber import MLX_MODEL_MAP for size in WHISPER_MODEL_SIZES: mlx_key = f"mlx-{size}" - model_dir = get_model_dir("mlx-whisper") repo_id = MLX_MODEL_MAP.get(size) - # 模型在本地按 repo_id(如 mlx-community/whisper-small-mlx)落盘 - model_path = os.path.join(model_dir, repo_id) if repo_id else None - downloaded = bool(model_path and Path(model_path).exists()) + # 用 config.json 判定,和 _check_mlx_whisper_model_exists / 加载逻辑保持一致 + downloaded = _check_mlx_whisper_model_exists(size) mlx_statuses.append({ "model_size": size, "downloaded": downloaded, @@ -146,7 +197,8 @@ def _do_download_whisper(model_size: str): _downloading[model_size] = "downloading" model_dir = get_model_dir("whisper") model_path = os.path.join(model_dir, f"whisper-{model_size}") - if Path(model_path).exists(): + # 用 model.bin 判定而非目录存在:半成品目录不能算「已下载」 + if (Path(model_path) / "model.bin").exists(): _downloading[model_size] = "done" return repo_id = MODEL_MAP.get(model_size) @@ -179,7 +231,8 @@ def _do_download_mlx_whisper(model_size: str): model_dir = get_model_dir("mlx-whisper") model_path = os.path.join(model_dir, repo_id) - if Path(model_path).exists(): + # 用 config.json 判定而非目录存在:半成品目录不能算「已下载」 + if (Path(model_path) / "config.json").exists(): _downloading[key] = "done" return logger.info(f"开始下载 mlx-whisper 模型: {model_size} ← {repo_id}") @@ -214,46 +267,119 @@ def download_transcriber_model(data: ModelDownloadRequest, background_tasks: Bac @router.get("/sys_health") async def sys_health(): + """结构化健康状态——任何子项异常都不应让整个 endpoint 5xx。 + + 每个字段:'ok' | 'missing' | 'error'。 + 前端 useCheckBackend 用 /sys_check 做存活判定(不依赖外部依赖), + /sys_health 用来在设置页区分「后端没起」vs「后端起了但 ffmpeg 缺」vs「DB 写不进去」等更细的状态。 + """ + ffmpeg_status = "ok" try: ensure_ffmpeg_or_raise() - return R.success() - except EnvironmentError: - return R.error(msg="系统未安装 ffmpeg 请先进行安装") + except Exception: + ffmpeg_status = "missing" + + db_status = "ok" + try: + from app.db.engine import engine + from sqlalchemy import text + with engine.connect() as conn: + conn.execute(text("SELECT 1")) + except Exception: + db_status = "error" + + # 当前转写器配置 + 模型是否已下载(用 model.bin 落盘判定,与 transcriber 加载逻辑一致) + whisper_info: dict = {"size": None, "type": None, "downloaded": False, "checked": False} + try: + cfg = transcriber_config_manager.get_config() + size = cfg["whisper_model_size"] + ttype = cfg["transcriber_type"] + whisper_info["size"] = size + whisper_info["type"] = ttype + # 只有本地引擎才有「下载」概念;groq / bcut / kuaishou 在线引擎跳过 + if ttype == "fast-whisper": + whisper_info["downloaded"] = _check_whisper_model_exists(size, "whisper") + whisper_info["checked"] = True + elif ttype == "mlx-whisper": + whisper_info["downloaded"] = _check_mlx_whisper_model_exists(size) + whisper_info["checked"] = True + except Exception: + pass + + return R.success(data={ + "backend": "ok", + "ffmpeg": ffmpeg_status, + "db": db_status, + "whisper_model": whisper_info, + }) + @router.get("/sys_check") async def sys_check(): + """轻量存活判定:后端进程能响应这个 endpoint 就算「起来了」,不查外部依赖。 + + 给桌面端 useCheckBackend / Tauri ready-probe 用。 + """ return R.success() @router.get("/deploy_status") async def deploy_status(): - """返回部署监控所需的所有状态信息""" - import torch + """返回部署监控所需的所有状态信息。 + + 所有子项都用 try 包起来——监控页本身不应该被任何一个子项打死。 + 特别是 torch:它只在 fast-whisper 路径用得到,用 Groq / 必剪 / 快手在线 + 引擎的轻量部署完全可以不装,那种情况这个 endpoint 不应该 500。 + """ import os - + # CUDA 状态 - cuda_available = torch.cuda.is_available() - cuda_info = { - "available": cuda_available, - "version": torch.version.cuda if cuda_available else None, - "gpu_name": torch.cuda.get_device_name(0) if cuda_available else None, - } - - # Whisper 模型状态(从配置文件读取,与前端设置同步) - transcriber_cfg = transcriber_config_manager.get_config() - model_size = transcriber_cfg["whisper_model_size"] - transcriber_type = transcriber_cfg["transcriber_type"] - + try: + import torch + cuda_available = torch.cuda.is_available() + cuda_info = { + "available": cuda_available, + "torch_installed": True, + "version": torch.version.cuda if cuda_available else None, + "gpu_name": torch.cuda.get_device_name(0) if cuda_available else None, + } + except Exception: + cuda_info = { + "available": False, + "torch_installed": False, + "version": None, + "gpu_name": None, + } + + # Whisper 模型 / 转写器配置 + 本地下载状态 + try: + transcriber_cfg = transcriber_config_manager.get_config() + size = transcriber_cfg["whisper_model_size"] + ttype = transcriber_cfg["transcriber_type"] + if ttype == "fast-whisper": + downloaded = _check_whisper_model_exists(size, "whisper") + elif ttype == "mlx-whisper": + downloaded = _check_mlx_whisper_model_exists(size) + else: + downloaded = False # 在线引擎无下载概念 + whisper_info = { + "model_size": size, + "transcriber_type": ttype, + "downloaded": downloaded, + } + except Exception: + whisper_info = {"model_size": None, "transcriber_type": None, "downloaded": False} + # FFmpeg 状态 try: ensure_ffmpeg_or_raise() ffmpeg_ok = True - except: + except Exception: ffmpeg_ok = False - + return R.success(data={ "backend": {"status": "running", "port": int(os.getenv("BACKEND_PORT", 8483))}, "cuda": cuda_info, - "whisper": {"model_size": model_size, "transcriber_type": transcriber_type}, + "whisper": whisper_info, "ffmpeg": {"available": ffmpeg_ok}, }) \ No newline at end of file diff --git a/backend/app/routers/note.py b/backend/app/routers/note.py index 3bf08b4..80033c8 100644 --- a/backend/app/routers/note.py +++ b/backend/app/routers/note.py @@ -180,6 +180,24 @@ async def upload(file: UploadFile = File(...)): @router.post("/generate_note") def generate_note(data: VideoRequest, background_tasks: BackgroundTasks): try: + # 就绪门禁:本地转写引擎(fast-whisper / mlx-whisper)必须等模型下载完才能跑视频, + # 否则任务会卡在首次下载(慢 / OOM / 截断),用户只看到一个静默失败的任务。 + # 客户端已抓好字幕(prefetched_transcript)则不需要转写,跳过检查。 + if not data.prefetched_transcript: + from app.services.transcriber_config_manager import TranscriberConfigManager + readiness = TranscriberConfigManager().is_model_ready() + if not readiness["ready"]: + logger.warning(f"拒绝 generate_note:{readiness['reason']}") + return R.error( + msg=readiness["reason"], + code=300102, + data={ + "reason": "transcriber_model_not_ready", + "transcriber_type": readiness["transcriber_type"], + "model_size": readiness["model_size"], + "downloading": readiness["downloading"], + }, + ) video_id = extract_video_id(data.video_url, data.platform) # if not video_id: diff --git a/backend/app/routers/provider.py b/backend/app/routers/provider.py index 0b3215a..1d11520 100644 --- a/backend/app/routers/provider.py +++ b/backend/app/routers/provider.py @@ -20,6 +20,8 @@ class ProviderRequest(BaseModel): class TestRequest(BaseModel): id: str + # 可选:指定用哪个 model 跑连通性测试;不传则用该 provider 在 DB 里的第一个模型 + model: Optional[str] = None class ProviderUpdateRequest(BaseModel): id: str name: Optional[str] = None @@ -91,5 +93,5 @@ def update_provider(data: ProviderUpdateRequest): @router.post('/connect_test') def gpt_connect_test(data: TestRequest): - ModelService().connect_test(data.id) + ModelService().connect_test(data.id, model=data.model) return R.success(msg='连接成功') diff --git a/backend/app/services/model.py b/backend/app/services/model.py index 8bc73a3..576a2e1 100644 --- a/backend/app/services/model.py +++ b/backend/app/services/model.py @@ -100,23 +100,46 @@ class ModelService: logger.error(f"[{provider_id}] 获取模型失败: {e}") return [] @staticmethod - def connect_test(id: str) -> bool: + def connect_test(id: str, model: str | None = None) -> bool: + """连通性测试:发一条最小化 chat completion。 + model 优先级: + 1. 调用方显式传入(前端可在「模型选择」UI 里挑一个再测) + 2. DB 中该 provider 已保存的第一个模型 + 3. 都没有 → 抛错让用户先加一个模型 + """ provider = ProviderService.get_provider_by_id(id) - - if provider: - if not provider.get('api_key'): - raise ProviderError(code=ProviderErrorEnum.NOT_FOUND.code, message=ProviderErrorEnum.NOT_FOUND.message) - result = OpenAICompatibleProvider.test_connection( - api_key=provider.get('api_key'), - base_url=provider.get('base_url') + if not provider: + raise ProviderError( + code=ProviderErrorEnum.NOT_FOUND.code, + message=ProviderErrorEnum.NOT_FOUND.message, + ) + if not provider.get('api_key'): + raise ProviderError( + code=ProviderErrorEnum.NOT_FOUND.code, + message=ProviderErrorEnum.NOT_FOUND.message, ) - if result: - return True - else: - raise ProviderError(code=ProviderErrorEnum.WRONG_PARAMETER.code,message=ProviderErrorEnum.WRONG_PARAMETER.message) - raise ProviderError(code=ProviderErrorEnum.NOT_FOUND.code, message=ProviderErrorEnum.NOT_FOUND.message) + if not model: + saved_models = ModelService.get_enabled_models_by_provider(provider["id"]) + if not saved_models: + raise ProviderError( + code=ProviderErrorEnum.WRONG_PARAMETER.code, + message="请先为该供应商添加至少一个模型再测试连通性", + ) + model = saved_models[0]["model_name"] + + ok = OpenAICompatibleProvider.test_connection( + api_key=provider.get('api_key'), + base_url=provider.get('base_url'), + model=model, + ) + if ok: + return True + raise ProviderError( + code=ProviderErrorEnum.WRONG_PARAMETER.code, + message=ProviderErrorEnum.WRONG_PARAMETER.message, + ) diff --git a/backend/app/services/proxy_config_manager.py b/backend/app/services/proxy_config_manager.py new file mode 100644 index 0000000..abbc636 --- /dev/null +++ b/backend/app/services/proxy_config_manager.py @@ -0,0 +1,60 @@ +import json +import os +from pathlib import Path +from typing import Any, Dict, Optional + + +class ProxyConfigManager: + """全局代理配置,存 JSON 文件,支持前端动态修改。 + + 作用范围:LLM API + 转写 API(Groq 等)+ yt-dlp 视频下载。 + 优先级:配置文件里 enabled=true 的 url > 环境变量 HTTP_PROXY/HTTPS_PROXY/ALL_PROXY。 + 这样桌面端/web 用户在设置页填,docker/服务器部署用环境变量兜底。 + """ + + def __init__(self, filepath: str = "config/proxy.json"): + self.path = Path(filepath) + self.path.parent.mkdir(parents=True, exist_ok=True) + + def _read(self) -> Dict[str, Any]: + if not self.path.exists(): + return {} + try: + with self.path.open("r", encoding="utf-8") as f: + return json.load(f) + except Exception: + return {} + + def _write(self, data: Dict[str, Any]): + with self.path.open("w", encoding="utf-8") as f: + json.dump(data, f, ensure_ascii=False, indent=2) + + def get_config(self) -> Dict[str, Any]: + data = self._read() + return { + "enabled": bool(data.get("enabled", False)), + "url": data.get("url", "") or "", + } + + def update_config(self, enabled: bool, url: Optional[str] = None) -> Dict[str, Any]: + data = self._read() + data["enabled"] = bool(enabled) + if url is not None: + data["url"] = url.strip() + self._write(data) + return self.get_config() + + def get_proxy_url(self) -> Optional[str]: + """返回当前生效的代理 URL;没有则 None。 + + - 配置文件 enabled=true 且 url 非空 → 用配置的 url + - 否则回退到环境变量(标准的 HTTP_PROXY / HTTPS_PROXY / ALL_PROXY,大小写都认) + """ + cfg = self.get_config() + if cfg["enabled"] and cfg["url"]: + return cfg["url"] + for key in ("HTTPS_PROXY", "https_proxy", "HTTP_PROXY", "http_proxy", "ALL_PROXY", "all_proxy"): + val = os.environ.get(key) + if val: + return val + return None diff --git a/backend/app/services/transcriber_config_manager.py b/backend/app/services/transcriber_config_manager.py index 8205372..5d1f73c 100644 --- a/backend/app/services/transcriber_config_manager.py +++ b/backend/app/services/transcriber_config_manager.py @@ -61,3 +61,54 @@ class TranscriberConfigManager: def get_whisper_model_size(self) -> str: return self.get_config()["whisper_model_size"] + + def is_model_ready(self) -> Dict[str, Any]: + """当前转写器是否就绪可用。 + + 返回 {ready, transcriber_type, model_size, downloading, reason}: + - 在线引擎 (groq/bcut/kuaishou):永远 ready(不需要本地模型) + - fast-whisper:检查 whisper-{size}/model.bin 落盘 + - mlx-whisper:检查 {repo_id}/config.json 落盘 + 给 /generate_note 入口做「开始视频前先确认模型下载好」的门禁用。 + """ + cfg = self.get_config() + ttype = cfg["transcriber_type"] + size = cfg["whisper_model_size"] + result = { + "ready": True, + "transcriber_type": ttype, + "model_size": size, + "downloading": False, + "reason": "", + } + if ttype not in ("fast-whisper", "mlx-whisper"): + return result # 在线引擎无需本地模型 + + # 延迟 import 避免与 routers.config 的循环依赖;只取纯函数,不触发路由副作用 + try: + from app.routers.config import ( + _check_whisper_model_exists, + _check_mlx_whisper_model_exists, + _downloading, + ) + except Exception as e: + # 拿不到检查函数时保守放行,不要把用户卡死 + result["reason"] = f"无法检查模型状态: {e}" + return result + + if ttype == "fast-whisper": + downloaded = _check_whisper_model_exists(size, "whisper") + downloading = _downloading.get(size) == "downloading" + else: # mlx-whisper + downloaded = _check_mlx_whisper_model_exists(size) + downloading = _downloading.get(f"mlx-{size}") == "downloading" + + result["downloading"] = downloading + if downloaded: + return result + result["ready"] = False + result["reason"] = ( + f"转写模型 {ttype} / {size} 尚未下载就绪" + + (",正在下载中,请稍候" if downloading else ",请先在「设置 → 音频转写配置」页下载") + ) + return result diff --git a/backend/app/transcriber/groq.py b/backend/app/transcriber/groq.py index 15a103a..033ffa6 100644 --- a/backend/app/transcriber/groq.py +++ b/backend/app/transcriber/groq.py @@ -5,7 +5,7 @@ from app.decorators.timeit import timeit from app.models.transcriber_model import TranscriptResult, TranscriptSegment from app.services.provider import ProviderService from app.transcriber.base import Transcriber -from openai import OpenAI +from app.utils.openai_client import build_openai_client import ffmpeg import tempfile from dotenv import load_dotenv @@ -30,12 +30,14 @@ class GroqTranscriber(Transcriber, ABC): print(f"压缩完成,临时路径:{file_path}") provider = ProviderService.get_provider_by_id('groq') - if not provider: raise Exception("Groq 供应商未配置,请配置以后使用。") - client = OpenAI( + # build_openai_client 会校验 api_key 非空(空 key 会抛天书般的 + # `Illegal header value b'Bearer '`),并自动注入全局代理 + client = build_openai_client( api_key=provider.get('api_key'), - base_url=provider.get('base_url') + base_url=provider.get('base_url'), + key_label="Groq 转写引擎的 API Key", ) filename = file_path diff --git a/backend/app/transcriber/mlx_whisper_transcriber.py b/backend/app/transcriber/mlx_whisper_transcriber.py index 8c0a456..bd4fedd 100644 --- a/backend/app/transcriber/mlx_whisper_transcriber.py +++ b/backend/app/transcriber/mlx_whisper_transcriber.py @@ -58,9 +58,16 @@ class MLXWhisperTranscriber(Transcriber): # 设置模型路径 model_dir = get_model_dir("mlx-whisper") self.model_path = os.path.join(model_dir, self.model_name) - # 检查并下载模型 - if not Path(self.model_path).exists(): - logger.info(f"模型 {self.model_name} 不存在,开始下载...") + # 用 config.json 而非目录存在作为「下载完成」的判据, + # 同 fast-whisper 的 model.bin:避免半成品目录把后续下载吞掉 + config_file = Path(self.model_path) / "config.json" + if not config_file.exists(): + if Path(self.model_path).exists(): + logger.warning( + f"MLX 模型目录 {self.model_path} 存在但 config.json 缺失(上次下载未完成),重新下载" + ) + else: + logger.info(f"模型 {self.model_name} 不存在,开始下载...") snapshot_download( self.model_name, local_dir=self.model_path, diff --git a/backend/app/transcriber/whisper.py b/backend/app/transcriber/whisper.py index fca49f6..e212861 100644 --- a/backend/app/transcriber/whisper.py +++ b/backend/app/transcriber/whisper.py @@ -10,6 +10,7 @@ from app.utils.path_helper import get_model_dir from events import transcription_finished from pathlib import Path import os +import shutil from tqdm import tqdm from modelscope import snapshot_download @@ -50,22 +51,41 @@ class WhisperTranscriber(Transcriber): model_dir = get_model_dir("whisper") model_path = os.path.join(model_dir, f"whisper-{model_size}") - if not Path(model_path).exists(): - logger.info(f"模型 whisper-{model_size} 不存在,开始下载...") - repo_id = MODEL_MAP[model_size] - model_path = snapshot_download( - repo_id, + repo_id = MODEL_MAP[model_size] - local_dir=model_path, - ) + # 第一步:目录 / model.bin 不在 → 下载。 + # 关键判据用 model.bin 而不是目录存在:首次下载若被打断(网络中断 / 磁盘满 / + # 容器被 kill)会留下半成品目录,只看目录存在会跳过下载。 + model_bin = Path(model_path) / "model.bin" + if not model_bin.exists(): + if Path(model_path).exists(): + logger.warning(f"模型目录 {model_path} 存在但 model.bin 缺失(上次下载未完成),重新下载") + else: + logger.info(f"模型 whisper-{model_size} 不存在,开始下载...") + model_path = snapshot_download(repo_id, local_dir=model_path) logger.info("模型下载完成") - self.model = WhisperModel( - model_size_or_path=model_path, - device=self.device, - compute_type=self.compute_type, - download_root=model_dir - ) + # 第二步:加载。model.bin 可能存在但【内容截断】(下载到一半被 kill), + # 此时 WhisperModel() 会抛 "File model.bin is incomplete: failed to read a buffer..."。 + # 捕获后删掉损坏目录、重新下载、再试一次——自愈,避免 500 死循环。 + try: + self.model = WhisperModel( + model_size_or_path=model_path, + device=self.device, + compute_type=self.compute_type, + download_root=model_dir, + ) + except Exception as e: + logger.warning(f"加载 whisper-{model_size} 失败(疑似模型文件损坏 / 截断):{e};删除后重新下载") + shutil.rmtree(model_path, ignore_errors=True) + model_path = snapshot_download(repo_id, local_dir=model_path) + logger.info("模型重新下载完成,重试加载") + self.model = WhisperModel( + model_size_or_path=model_path, + device=self.device, + compute_type=self.compute_type, + download_root=model_dir, + ) @staticmethod def is_torch_installed() -> bool: try: diff --git a/backend/app/utils/openai_client.py b/backend/app/utils/openai_client.py new file mode 100644 index 0000000..3a0e9b1 --- /dev/null +++ b/backend/app/utils/openai_client.py @@ -0,0 +1,45 @@ +"""统一构造 OpenAI 兼容客户端:注入全局代理 + 校验 api_key。 + +为什么要这一层: + - 代理:openai SDK 默认只认进程级 HTTP_PROXY 环境变量,桌面端用户在 UI 里 + 填的代理需要显式塞进 httpx.Client 才生效。 + - api_key 校验:空 key 会让 httpx 拼出非法 header `Bearer `,抛出 + `httpx.LocalProtocolError: Illegal header value b'Bearer '` 这种天书报错。 + 在入口挡掉,给用户「xxx 的 API Key 未配置」这种能看懂的提示。 +""" +from typing import Optional + +from openai import OpenAI + +from app.services.proxy_config_manager import ProxyConfigManager +from app.utils.logger import get_logger + +logger = get_logger(__name__) + + +def build_openai_client( + api_key: Optional[str], + base_url: Optional[str], + *, + key_label: str = "API Key", + timeout: Optional[float] = None, +) -> OpenAI: + """构造 OpenAI 客户端。api_key 为空直接抛清晰错误;代理已配置则注入。 + + key_label 用于错误提示,例如 "Groq 的 API Key" / "OpenAI 供应商的 API Key"。 + """ + if not api_key or not str(api_key).strip(): + raise ValueError(f"{key_label} 未配置,请先在「设置」里填写后再使用") + + kwargs = {"api_key": str(api_key).strip(), "base_url": base_url} + if timeout is not None: + kwargs["timeout"] = timeout + + proxy_url = ProxyConfigManager().get_proxy_url() + if proxy_url: + # 延迟 import httpx:仅在确实要走代理时才需要 + import httpx + kwargs["http_client"] = httpx.Client(proxy=proxy_url, timeout=timeout or 600.0) + logger.info(f"OpenAI 客户端走代理: {proxy_url}") + + return OpenAI(**kwargs) diff --git a/backend/main.py b/backend/main.py index 31640e2..9f21a64 100644 --- a/backend/main.py +++ b/backend/main.py @@ -39,24 +39,50 @@ if not os.path.exists(out_dir): @asynccontextmanager async def lifespan(app: FastAPI): - register_handler() - init_db() - # 转写器不再在启动时强制初始化,而是在首次生成笔记时按需创建 - # 如果配置了不可用的类型(如 mlx-whisper 未安装),会在使用时报错而非静默回退 - _cfg = TranscriberConfigManager().get_config() - logger.info(f"当前转写器配置: type={_cfg['transcriber_type']}, model_size={_cfg['whisper_model_size']}") - seed_default_providers() + # 启动序列拆成 5 步、每步独立日志 + 异常时打明确的 [startup N/5 FAILED] 标记。 + # 目的:用户 docker logs 一眼能看出后端死在哪一步,避免「容器一直重启但看不出原因」。 + try: + logger.info("[startup 1/5] register_handler() — 注册事件处理器") + register_handler() + + logger.info("[startup 2/5] init_db() — 初始化 SQLite 数据库") + init_db() + + logger.info("[startup 3/5] TranscriberConfigManager — 读取转写器配置") + # 转写器不再在启动时强制初始化,而是在首次生成笔记时按需创建。 + # 如果配置了不可用的类型(如 mlx-whisper 未安装),会在使用时报错而非静默回退。 + _cfg = TranscriberConfigManager().get_config() + logger.info( + f" 当前转写器: type={_cfg['transcriber_type']}, " + f"model_size={_cfg['whisper_model_size']}" + ) + + logger.info("[startup 4/5] seed_default_providers() — 初始化默认 LLM 供应商") + seed_default_providers() + + logger.info("[startup 5/5] 启动完成,等待请求") + except Exception: + logger.exception("[startup FAILED] 后端启动期异常,详见堆栈;容器会退出并由 restart 策略决定是否重试") + raise + yield app = create_app(lifespan=lifespan) # 允许的源:本地 web 端 + Tauri 桌面端 + 浏览器扩展(chrome/edge/firefox) # 用 regex 是因为 chrome-extension:// 的 id 在每次开发版加载时不固定 +# Tauri 2 不同平台 webview origin 不一样,必须全列: +# - macOS: tauri://localhost (自定义协议) +# - Windows: https://tauri.localhost (Edge WebView2) +# - Linux: http://tauri.localhost (WebKitGTK) +# 漏掉哪个都会导致桌面端 fetch 返回 200 但 browser 因为 CORS 拒绝读响应, +# 表现为前端「连不上后端」但后端日志一片 200 OK。 CORS_ORIGIN_REGEX = ( r"^chrome-extension://[a-z]+$" r"|^moz-extension://.+$" r"|^http://(localhost|127\.0\.0\.1)(:\d+)?$" - r"|^http://tauri\.localhost$" + r"|^tauri://localhost$" + r"|^https?://tauri\.localhost$" ) app.add_middleware(