diff --git a/BillNote_extension/src/background/main.ts b/BillNote_extension/src/background/main.ts index f1f4e28..9b6ca5f 100644 --- a/BillNote_extension/src/background/main.ts +++ b/BillNote_extension/src/background/main.ts @@ -87,6 +87,9 @@ async function startTask(url: string): Promise<{ ok: boolean, taskId?: string, e link: formats.includes('link'), style: settings.style || undefined, extras: settings.extras || undefined, + video_understanding: settings.video_understanding || undefined, + video_interval: settings.video_understanding ? settings.video_interval : undefined, + grid_size: settings.video_understanding ? settings.grid_size : undefined, prefetched_transcript: prefetched ?? undefined, }), }) diff --git a/BillNote_extension/src/logic/constants.ts b/BillNote_extension/src/logic/constants.ts index f7418ce..e2dac9d 100644 --- a/BillNote_extension/src/logic/constants.ts +++ b/BillNote_extension/src/logic/constants.ts @@ -12,6 +12,9 @@ export const DEFAULT_SETTINGS: Settings = { link: false, style: 'minimal', extras: '', + video_understanding: false, + video_interval: 6, + grid_size: [2, 2], } export const MAX_TASKS = 30 diff --git a/BillNote_extension/src/logic/types.ts b/BillNote_extension/src/logic/types.ts index fd68072..96f18ca 100644 --- a/BillNote_extension/src/logic/types.ts +++ b/BillNote_extension/src/logic/types.ts @@ -40,6 +40,9 @@ export interface GenerateRequest { format?: string[] style?: string extras?: string + video_understanding?: boolean + video_interval?: number + grid_size?: [number, number] // 客户端在浏览器里直接抓到的字幕,跳过后端的 download_subtitles + 音频转写 prefetched_transcript?: { language: string @@ -117,6 +120,13 @@ export interface Settings { link: boolean style: NoteStyle extras: string + // 多模态视频理解:抽帧拼图喂给视觉模型,提升画面相关问题的回答质量 + // 要求所选 model 是视觉模型(如 gpt-4o / gemini / claude-opus 系列),文字模型会忽略图片 + video_understanding: boolean + // 抽帧间隔(秒),范围 1-30,默认 6 + video_interval: number + // 拼图网格 [rows, cols],每张拼图最多 rows*cols 帧。默认 [2,2] + grid_size: [number, number] } export interface ProviderUpdatePayload { diff --git a/BillNote_extension/src/options/pages/General.vue b/BillNote_extension/src/options/pages/General.vue index 44eea22..aa28c50 100644 --- a/BillNote_extension/src/options/pages/General.vue +++ b/BillNote_extension/src/options/pages/General.vue @@ -165,5 +165,39 @@ onMounted(async () => { /> + +
+

视频理解(多模态)

+

+ 启用后会按抽帧间隔截取视频帧拼成网格图,连同字幕一起喂给视觉模型,提升画面相关问题的回答质量。 + 需要选择视觉模型(GPT-4o / Gemini / Claude 等),文字模型会忽略图片。 +

+ +
+ + + +
+
diff --git a/BillNote_extension/src/popup/Popup.vue b/BillNote_extension/src/popup/Popup.vue index ff3c84b..696b93d 100644 --- a/BillNote_extension/src/popup/Popup.vue +++ b/BillNote_extension/src/popup/Popup.vue @@ -80,6 +80,9 @@ async function start() { link: formats.includes('link'), style: settings.value.style || undefined, extras: settings.value.extras || undefined, + video_understanding: settings.value.video_understanding || undefined, + video_interval: settings.value.video_understanding ? settings.value.video_interval : undefined, + grid_size: settings.value.video_understanding ? settings.value.grid_size : undefined, prefetched_transcript: prefetched ?? undefined, }) activeTaskId.value = task_id @@ -225,6 +228,41 @@ onUnmounted(() => { placeholder="例如:重点关注游戏开发部分;保留所有专业术语原文" /> + +
+ + + +
+

+ ⚠ 需要选择视觉模型(GPT-4o / Gemini / Claude 等),文字模型会忽略图片 +