From a7c717abbdeffb5226510498538c7bd3f8f08e60 Mon Sep 17 00:00:00 2001 From: huangjianwu Date: Thu, 7 May 2026 17:22:57 +0800 Subject: [PATCH] =?UTF-8?q?feat(extension):=20=E5=A4=9A=E6=A8=A1=E6=80=81?= =?UTF-8?q?=E8=A7=86=E9=A2=91=E7=90=86=E8=A7=A3=E5=BC=80=E5=85=B3=20+=20?= =?UTF-8?q?=E6=8A=BD=E5=B8=A7/=E6=8B=BC=E5=9B=BE=E5=8F=82=E6=95=B0?= =?UTF-8?q?=EF=BC=88=E5=AF=B9=E9=BD=90=20web=20NoteForm=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit web 端 NoteForm 早就有 video_understanding / video_interval / grid_size 三件套, 插件之前没有,导致用户在视觉模型上想用「画面理解」时只能去 web 端发任务。 新增字段(types.ts Settings 与 GenerateRequest 同步): - video_understanding: boolean,默认 false(关) - video_interval: number,1-30 秒,默认 6(与 web NoteForm 默认一致) - grid_size: [number, number],1-10,默认 [2,2] UI 落地: - popup 「高级」折叠区:开关 + interval + grid_size 行/列三栏,启用时才显示后两个, 并提示需要选视觉模型 - options General 页:单独一节「视频理解(多模态)」展开同样字段 - popup start() 与 background startTask() 在 generate_note 请求里带上这三个字段; 关闭时不传(避免覆盖 backend 默认) 回归风险:默认 false,对现有用户行为不变。 依赖:feature/extension-form-parity(叠加在它之上,因为 Settings 是同一片字段域)。 Co-Authored-By: Claude Opus 4.7 (1M context) --- BillNote_extension/src/background/main.ts | 3 ++ BillNote_extension/src/logic/constants.ts | 3 ++ BillNote_extension/src/logic/types.ts | 10 +++++ .../src/options/pages/General.vue | 34 +++++++++++++++++ BillNote_extension/src/popup/Popup.vue | 38 +++++++++++++++++++ 5 files changed, 88 insertions(+) diff --git a/BillNote_extension/src/background/main.ts b/BillNote_extension/src/background/main.ts index f1f4e28..9b6ca5f 100644 --- a/BillNote_extension/src/background/main.ts +++ b/BillNote_extension/src/background/main.ts @@ -87,6 +87,9 @@ async function startTask(url: string): Promise<{ ok: boolean, taskId?: string, e link: formats.includes('link'), style: settings.style || undefined, extras: settings.extras || undefined, + video_understanding: settings.video_understanding || undefined, + video_interval: settings.video_understanding ? settings.video_interval : undefined, + grid_size: settings.video_understanding ? settings.grid_size : undefined, prefetched_transcript: prefetched ?? undefined, }), }) diff --git a/BillNote_extension/src/logic/constants.ts b/BillNote_extension/src/logic/constants.ts index f7418ce..e2dac9d 100644 --- a/BillNote_extension/src/logic/constants.ts +++ b/BillNote_extension/src/logic/constants.ts @@ -12,6 +12,9 @@ export const DEFAULT_SETTINGS: Settings = { link: false, style: 'minimal', extras: '', + video_understanding: false, + video_interval: 6, + grid_size: [2, 2], } export const MAX_TASKS = 30 diff --git a/BillNote_extension/src/logic/types.ts b/BillNote_extension/src/logic/types.ts index fd68072..96f18ca 100644 --- a/BillNote_extension/src/logic/types.ts +++ b/BillNote_extension/src/logic/types.ts @@ -40,6 +40,9 @@ export interface GenerateRequest { format?: string[] style?: string extras?: string + video_understanding?: boolean + video_interval?: number + grid_size?: [number, number] // 客户端在浏览器里直接抓到的字幕,跳过后端的 download_subtitles + 音频转写 prefetched_transcript?: { language: string @@ -117,6 +120,13 @@ export interface Settings { link: boolean style: NoteStyle extras: string + // 多模态视频理解:抽帧拼图喂给视觉模型,提升画面相关问题的回答质量 + // 要求所选 model 是视觉模型(如 gpt-4o / gemini / claude-opus 系列),文字模型会忽略图片 + video_understanding: boolean + // 抽帧间隔(秒),范围 1-30,默认 6 + video_interval: number + // 拼图网格 [rows, cols],每张拼图最多 rows*cols 帧。默认 [2,2] + grid_size: [number, number] } export interface ProviderUpdatePayload { diff --git a/BillNote_extension/src/options/pages/General.vue b/BillNote_extension/src/options/pages/General.vue index 44eea22..aa28c50 100644 --- a/BillNote_extension/src/options/pages/General.vue +++ b/BillNote_extension/src/options/pages/General.vue @@ -165,5 +165,39 @@ onMounted(async () => { /> + +
+

视频理解(多模态)

+

+ 启用后会按抽帧间隔截取视频帧拼成网格图,连同字幕一起喂给视觉模型,提升画面相关问题的回答质量。 + 需要选择视觉模型(GPT-4o / Gemini / Claude 等),文字模型会忽略图片。 +

+ +
+ + + +
+
diff --git a/BillNote_extension/src/popup/Popup.vue b/BillNote_extension/src/popup/Popup.vue index ff3c84b..696b93d 100644 --- a/BillNote_extension/src/popup/Popup.vue +++ b/BillNote_extension/src/popup/Popup.vue @@ -80,6 +80,9 @@ async function start() { link: formats.includes('link'), style: settings.value.style || undefined, extras: settings.value.extras || undefined, + video_understanding: settings.value.video_understanding || undefined, + video_interval: settings.value.video_understanding ? settings.value.video_interval : undefined, + grid_size: settings.value.video_understanding ? settings.value.grid_size : undefined, prefetched_transcript: prefetched ?? undefined, }) activeTaskId.value = task_id @@ -225,6 +228,41 @@ onUnmounted(() => { placeholder="例如:重点关注游戏开发部分;保留所有专业术语原文" /> + +
+ + + +
+

+ ⚠ 需要选择视觉模型(GPT-4o / Gemini / Claude 等),文字模型会忽略图片 +