diff --git a/BillNote_extension/src/background/main.ts b/BillNote_extension/src/background/main.ts
index f1f4e28..9b6ca5f 100644
--- a/BillNote_extension/src/background/main.ts
+++ b/BillNote_extension/src/background/main.ts
@@ -87,6 +87,9 @@ async function startTask(url: string): Promise<{ ok: boolean, taskId?: string, e
link: formats.includes('link'),
style: settings.style || undefined,
extras: settings.extras || undefined,
+ video_understanding: settings.video_understanding || undefined,
+ video_interval: settings.video_understanding ? settings.video_interval : undefined,
+ grid_size: settings.video_understanding ? settings.grid_size : undefined,
prefetched_transcript: prefetched ?? undefined,
}),
})
diff --git a/BillNote_extension/src/logic/constants.ts b/BillNote_extension/src/logic/constants.ts
index f7418ce..e2dac9d 100644
--- a/BillNote_extension/src/logic/constants.ts
+++ b/BillNote_extension/src/logic/constants.ts
@@ -12,6 +12,9 @@ export const DEFAULT_SETTINGS: Settings = {
link: false,
style: 'minimal',
extras: '',
+ video_understanding: false,
+ video_interval: 6,
+ grid_size: [2, 2],
}
export const MAX_TASKS = 30
diff --git a/BillNote_extension/src/logic/types.ts b/BillNote_extension/src/logic/types.ts
index fd68072..96f18ca 100644
--- a/BillNote_extension/src/logic/types.ts
+++ b/BillNote_extension/src/logic/types.ts
@@ -40,6 +40,9 @@ export interface GenerateRequest {
format?: string[]
style?: string
extras?: string
+ video_understanding?: boolean
+ video_interval?: number
+ grid_size?: [number, number]
// 客户端在浏览器里直接抓到的字幕,跳过后端的 download_subtitles + 音频转写
prefetched_transcript?: {
language: string
@@ -117,6 +120,13 @@ export interface Settings {
link: boolean
style: NoteStyle
extras: string
+ // 多模态视频理解:抽帧拼图喂给视觉模型,提升画面相关问题的回答质量
+ // 要求所选 model 是视觉模型(如 gpt-4o / gemini / claude-opus 系列),文字模型会忽略图片
+ video_understanding: boolean
+ // 抽帧间隔(秒),范围 1-30,默认 6
+ video_interval: number
+ // 拼图网格 [rows, cols],每张拼图最多 rows*cols 帧。默认 [2,2]
+ grid_size: [number, number]
}
export interface ProviderUpdatePayload {
diff --git a/BillNote_extension/src/options/pages/General.vue b/BillNote_extension/src/options/pages/General.vue
index 44eea22..aa28c50 100644
--- a/BillNote_extension/src/options/pages/General.vue
+++ b/BillNote_extension/src/options/pages/General.vue
@@ -165,5 +165,39 @@ onMounted(async () => {
/>
+
+
+ 启用后会按抽帧间隔截取视频帧拼成网格图,连同字幕一起喂给视觉模型,提升画面相关问题的回答质量。
+ 需要选择视觉模型(GPT-4o / Gemini / Claude 等),文字模型会忽略图片。
+ 视频理解(多模态)
+
+ ⚠ 需要选择视觉模型(GPT-4o / Gemini / Claude 等),文字模型会忽略图片 +