8 Commits

Author SHA1 Message Date
Jianwu Huang
81d254fcb7 Revert "feat(extension): 多模态视频理解开关 + 抽帧/拼图参数(对齐 web NoteForm)" 2026-05-07 17:29:48 +08:00
Jianwu Huang
f6d299ce48 Merge pull request #353 from JefferyHcool/feature/extension-video-understanding
feat(extension): 多模态视频理解开关 + 抽帧/拼图参数(对齐 web NoteForm)
2026-05-07 17:28:00 +08:00
Jianwu Huang
ed1ee0a151 Merge pull request #352 from JefferyHcool/feature/extension-form-parity
Feature/extension form parity
2026-05-07 17:27:35 +08:00
huangjianwu
c9497b502c chore(release): v2.1.4
CI 工程化修复,无运行时行为变化。详见 CHANGELOG.md。
2026-05-07 16:44:59 +08:00
huangjianwu
26e23d0f2c Release v2.1.3
修 issue #282 (DeepSeek 等非多模态供应商被 400 拒绝)。详见 CHANGELOG.md。
2026-05-07 14:14:33 +08:00
huangjianwu
64882e6a77 Release v2.1.2
补 v2.1.1 ghcr.io 镜像构建失败。详见 CHANGELOG.md。
2026-05-07 14:06:26 +08:00
huangjianwu
a46880f169 Release v2.1.1
工程化与文档收尾,无运行时行为变化。详见 CHANGELOG.md。
2026-05-07 13:54:50 +08:00
huangjianwu
dbe7b89754 Release v2.1.0
详见 CHANGELOG.md。主线:
- 浏览器插件(Chrome/Edge/Firefox MV3)
- B 站字幕优先链路
- mlx-whisper 仓库 ID 修复
- 后端 CORS regex 兼容扩展源
2026-05-07 13:10:28 +08:00
5 changed files with 0 additions and 88 deletions

View File

@@ -87,9 +87,6 @@ async function startTask(url: string): Promise<{ ok: boolean, taskId?: string, e
link: formats.includes('link'),
style: settings.style || undefined,
extras: settings.extras || undefined,
video_understanding: settings.video_understanding || undefined,
video_interval: settings.video_understanding ? settings.video_interval : undefined,
grid_size: settings.video_understanding ? settings.grid_size : undefined,
prefetched_transcript: prefetched ?? undefined,
}),
})

View File

@@ -12,9 +12,6 @@ export const DEFAULT_SETTINGS: Settings = {
link: false,
style: 'minimal',
extras: '',
video_understanding: false,
video_interval: 6,
grid_size: [2, 2],
}
export const MAX_TASKS = 30

View File

@@ -40,9 +40,6 @@ export interface GenerateRequest {
format?: string[]
style?: string
extras?: string
video_understanding?: boolean
video_interval?: number
grid_size?: [number, number]
// 客户端在浏览器里直接抓到的字幕,跳过后端的 download_subtitles + 音频转写
prefetched_transcript?: {
language: string
@@ -120,13 +117,6 @@ export interface Settings {
link: boolean
style: NoteStyle
extras: string
// 多模态视频理解:抽帧拼图喂给视觉模型,提升画面相关问题的回答质量
// 要求所选 model 是视觉模型(如 gpt-4o / gemini / claude-opus 系列),文字模型会忽略图片
video_understanding: boolean
// 抽帧间隔(秒),范围 1-30默认 6
video_interval: number
// 拼图网格 [rows, cols],每张拼图最多 rows*cols 帧。默认 [2,2]
grid_size: [number, number]
}
export interface ProviderUpdatePayload {

View File

@@ -165,39 +165,5 @@ onMounted(async () => {
/>
</label>
</section>
<section class="section-card">
<h2 class="font-semibold">视频理解多模态</h2>
<p class="text-xs text-gray-500">
启用后会按抽帧间隔截取视频帧拼成网格图连同字幕一起喂给视觉模型提升画面相关问题的回答质量
<strong class="text-amber-700">需要选择视觉模型</strong>GPT-4o / Gemini / Claude 文字模型会忽略图片
</p>
<label class="flex items-center gap-2 text-sm">
<input v-model="settings.video_understanding" type="checkbox">
启用视频理解
</label>
<div v-if="settings.video_understanding" class="grid grid-cols-3 gap-3 text-sm">
<label class="flex flex-col gap-1">
<span class="text-gray-600">抽帧间隔(, 1-30)</span>
<input v-model.number="settings.video_interval" type="number" min="1" max="30" class="input">
</label>
<label class="flex flex-col gap-1">
<span class="text-gray-600">拼图行 (1-10)</span>
<input
:value="settings.grid_size?.[0] ?? 2"
type="number" min="1" max="10" class="input"
@input="settings.grid_size = [Number(($event.target as HTMLInputElement).value) || 2, settings.grid_size?.[1] ?? 2]"
>
</label>
<label class="flex flex-col gap-1">
<span class="text-gray-600">拼图列 (1-10)</span>
<input
:value="settings.grid_size?.[1] ?? 2"
type="number" min="1" max="10" class="input"
@input="settings.grid_size = [settings.grid_size?.[0] ?? 2, Number(($event.target as HTMLInputElement).value) || 2]"
>
</label>
</div>
</section>
</div>
</template>

View File

@@ -80,9 +80,6 @@ async function start() {
link: formats.includes('link'),
style: settings.value.style || undefined,
extras: settings.value.extras || undefined,
video_understanding: settings.value.video_understanding || undefined,
video_interval: settings.value.video_understanding ? settings.value.video_interval : undefined,
grid_size: settings.value.video_understanding ? settings.value.grid_size : undefined,
prefetched_transcript: prefetched ?? undefined,
})
activeTaskId.value = task_id
@@ -228,41 +225,6 @@ onUnmounted(() => {
placeholder="例如:重点关注游戏开发部分;保留所有专业术语原文"
/>
</label>
<label class="flex items-center gap-2 mt-2">
<input v-model="settings.video_understanding" type="checkbox">
<span class="text-gray-600">启用视频理解抽帧拼图喂视觉模型</span>
</label>
<div v-if="settings.video_understanding" class="grid grid-cols-3 gap-2 mt-2">
<label class="flex flex-col gap-1">
<span class="text-gray-600">抽帧间隔()</span>
<input
v-model.number="settings.video_interval"
type="number" min="1" max="30"
class="border rounded px-1 py-0.5"
>
</label>
<label class="flex flex-col gap-1">
<span class="text-gray-600">拼图行</span>
<input
:value="settings.grid_size?.[0] ?? 2"
type="number" min="1" max="10"
class="border rounded px-1 py-0.5"
@input="settings.grid_size = [Number(($event.target as HTMLInputElement).value) || 2, settings.grid_size?.[1] ?? 2]"
>
</label>
<label class="flex flex-col gap-1">
<span class="text-gray-600">拼图列</span>
<input
:value="settings.grid_size?.[1] ?? 2"
type="number" min="1" max="10"
class="border rounded px-1 py-0.5"
@input="settings.grid_size = [settings.grid_size?.[0] ?? 2, Number(($event.target as HTMLInputElement).value) || 2]"
>
</label>
</div>
<p v-if="settings.video_understanding" class="text-amber-700 mt-1">
需要选择视觉模型GPT-4o / Gemini / Claude 文字模型会忽略图片
</p>
</details>
<div class="text-xs text-gray-600">