feat(hermes): add stt config form

This commit is contained in:
晴天
2026-05-26 01:29:32 +08:00
parent bc7fa7b11b
commit 30dd6cc2e2
8 changed files with 764 additions and 3 deletions

View File

@@ -179,6 +179,15 @@ const BROWSER_DEFAULTS = {
browserEngine: 'auto',
}
const STT_DEFAULTS = {
sttEnabled: true,
sttProvider: 'auto',
sttLocalModel: 'base',
sttLocalLanguage: '',
sttOpenaiModel: 'whisper-1',
sttMistralModel: 'voxtral-mini-latest',
}
const TERMINAL_DEFAULTS = {
terminalBackend: 'local',
terminalCwd: '.',
@@ -197,6 +206,10 @@ const STREAMING_TRANSPORTS = ['edit', 'auto', 'draft', 'off']
const CODE_EXECUTION_MODES = ['project', 'strict']
const TERMINAL_BACKENDS = ['local', 'ssh', 'docker', 'singularity', 'modal', 'daytona', 'vercel_sandbox']
const BROWSER_ENGINES = ['auto', 'lightpanda', 'chrome']
const STT_PROVIDERS = ['auto', 'local', 'groq', 'openai', 'mistral']
const STT_LOCAL_MODELS = ['tiny', 'base', 'small', 'medium', 'large-v3', 'turbo']
const STT_OPENAI_MODELS = ['whisper-1', 'gpt-4o-mini-transcribe', 'gpt-4o-transcribe']
const STT_MISTRAL_MODELS = ['voxtral-mini-latest', 'voxtral-mini-2602']
const UNAUTHORIZED_DM_BEHAVIORS = ['pair', 'ignore']
const IMAGE_INPUT_MODES = ['auto', 'native', 'text']
const DISPLAY_TOOL_PROGRESS_VALUES = ['off', 'new', 'all', 'verbose']
@@ -238,6 +251,7 @@ export function render() {
let approvalsValues = { ...APPROVALS_DEFAULTS }
let privacyValues = { ...PRIVACY_DEFAULTS }
let browserValues = { ...BROWSER_DEFAULTS }
let sttValues = { ...STT_DEFAULTS }
let terminalValues = { ...TERMINAL_DEFAULTS }
let loading = true
let runtimeLoading = true
@@ -262,6 +276,7 @@ export function render() {
let approvalsLoading = true
let privacyLoading = true
let browserLoading = true
let sttLoading = true
let terminalLoading = true
let saving = false
let runtimeSaving = false
@@ -286,6 +301,7 @@ export function render() {
let approvalsSaving = false
let privacySaving = false
let browserSaving = false
let sttSaving = false
let terminalSaving = false
let error = null
let runtimeError = null
@@ -310,6 +326,7 @@ export function render() {
let approvalsError = null
let privacyError = null
let browserError = null
let sttError = null
let terminalError = null
function esc(value) {
@@ -321,7 +338,7 @@ export function render() {
}
function isBusy() {
return loading || runtimeLoading || compressionLoading || promptCachingLoading || toolGuardrailsLoading || memoryLoading || skillsLoading || quickCommandsLoading || agentToolsetsLoading || agentRuntimeLoading || unauthorizedDmLoading || securityLoading || displayLoading || humanDelayLoading || streamingLoading || executionLimitsLoading || ioSafetyLoading || checkpointsLoading || cronLoading || loggingLoading || approvalsLoading || privacyLoading || browserLoading || terminalLoading || saving || runtimeSaving || compressionSaving || promptCachingSaving || toolGuardrailsSaving || memorySaving || skillsSaving || quickCommandsSaving || agentToolsetsSaving || agentRuntimeSaving || unauthorizedDmSaving || securitySaving || displaySaving || humanDelaySaving || streamingSaving || executionLimitsSaving || ioSafetySaving || checkpointsSaving || cronSaving || loggingSaving || approvalsSaving || privacySaving || browserSaving || terminalSaving
return loading || runtimeLoading || compressionLoading || promptCachingLoading || toolGuardrailsLoading || memoryLoading || skillsLoading || quickCommandsLoading || agentToolsetsLoading || agentRuntimeLoading || unauthorizedDmLoading || securityLoading || displayLoading || humanDelayLoading || streamingLoading || executionLimitsLoading || ioSafetyLoading || checkpointsLoading || cronLoading || loggingLoading || approvalsLoading || privacyLoading || browserLoading || sttLoading || terminalLoading || saving || runtimeSaving || compressionSaving || promptCachingSaving || toolGuardrailsSaving || memorySaving || skillsSaving || quickCommandsSaving || agentToolsetsSaving || agentRuntimeSaving || unauthorizedDmSaving || securitySaving || displaySaving || humanDelaySaving || streamingSaving || executionLimitsSaving || ioSafetySaving || checkpointsSaving || cronSaving || loggingSaving || approvalsSaving || privacySaving || browserSaving || sttSaving || terminalSaving
}
function option(labelKey, value, selected) {
@@ -1314,7 +1331,7 @@ export function render() {
}
function renderBrowserPanel() {
const disabled = loading || saving || browserLoading || browserSaving || approvalsSaving || cronSaving || loggingSaving || privacySaving || terminalSaving || runtimeSaving || compressionSaving || promptCachingSaving || toolGuardrailsSaving || memorySaving || skillsSaving || quickCommandsSaving || agentToolsetsSaving || agentRuntimeSaving || unauthorizedDmSaving || streamingSaving || executionLimitsSaving || ioSafetySaving || checkpointsSaving
const disabled = loading || saving || browserLoading || browserSaving || approvalsSaving || cronSaving || loggingSaving || privacySaving || sttSaving || terminalSaving || runtimeSaving || compressionSaving || promptCachingSaving || toolGuardrailsSaving || memorySaving || skillsSaving || quickCommandsSaving || agentToolsetsSaving || agentRuntimeSaving || unauthorizedDmSaving || streamingSaving || executionLimitsSaving || ioSafetySaving || checkpointsSaving
return `
<div class="hm-panel hm-config-runtime-panel hm-config-browser-panel">
<div class="hm-panel-header">
@@ -1357,8 +1374,66 @@ export function render() {
`
}
function renderSttPanel() {
const disabled = loading || saving || sttLoading || sttSaving || approvalsSaving || cronSaving || loggingSaving || privacySaving || browserSaving || terminalSaving || runtimeSaving || compressionSaving || promptCachingSaving || toolGuardrailsSaving || memorySaving || skillsSaving || quickCommandsSaving || agentToolsetsSaving || agentRuntimeSaving || unauthorizedDmSaving || streamingSaving || executionLimitsSaving || ioSafetySaving || checkpointsSaving
return `
<div class="hm-panel hm-config-runtime-panel hm-config-stt-panel">
<div class="hm-panel-header">
<div>
<div class="hm-panel-title">${t('engine.hermesSttConfigTitle')}</div>
<div class="hm-channel-panel-desc">${t('engine.hermesSttConfigDesc')}</div>
</div>
<div class="hm-panel-actions">
<span class="hm-muted">${sttSaving ? t('engine.hermesConfigStatusSaving') : sttLoading ? t('engine.hermesConfigStatusLoading') : t('engine.hermesSttConfigStatusReady')}</span>
<button class="hm-btn hm-btn--cta hm-btn--sm" id="hm-stt-save" ${disabled ? 'disabled' : ''}>${t('engine.hermesSttConfigSave')}</button>
</div>
</div>
<div class="hm-panel-body">
${renderError(sttError)}
<div class="hm-config-check-grid">
<label class="hm-channel-check">
<input id="hm-stt-enabled" type="checkbox" ${sttValues.sttEnabled ? 'checked' : ''} ${disabled ? 'disabled' : ''}>
<span>${t('engine.hermesSttConfigEnabled')}</span>
</label>
</div>
<div class="hm-config-runtime-grid hm-config-stt-grid">
<label class="hm-field">
<span class="hm-field-label">${t('engine.hermesSttConfigProvider')}</span>
<select id="hm-stt-provider" class="hm-input" ${disabled ? 'disabled' : ''}>
${STT_PROVIDERS.map(mode => option(`engine.hermesSttConfigProvider_${mode}`, mode, sttValues.sttProvider)).join('')}
</select>
</label>
<label class="hm-field">
<span class="hm-field-label">${t('engine.hermesSttConfigLocalModel')}</span>
<select id="hm-stt-local-model" class="hm-input" ${disabled ? 'disabled' : ''}>
${STT_LOCAL_MODELS.map(model => option(`engine.hermesSttConfigLocalModel_${model}`, model, sttValues.sttLocalModel)).join('')}
</select>
</label>
<label class="hm-field">
<span class="hm-field-label">${t('engine.hermesSttConfigLocalLanguage')}</span>
<input id="hm-stt-local-language" class="hm-input" placeholder="zh" value="${esc(sttValues.sttLocalLanguage)}" ${disabled ? 'disabled' : ''}>
</label>
<label class="hm-field">
<span class="hm-field-label">${t('engine.hermesSttConfigOpenaiModel')}</span>
<select id="hm-stt-openai-model" class="hm-input" ${disabled ? 'disabled' : ''}>
${STT_OPENAI_MODELS.map(model => option(`engine.hermesSttConfigOpenaiModel_${model}`, model, sttValues.sttOpenaiModel)).join('')}
</select>
</label>
<label class="hm-field">
<span class="hm-field-label">${t('engine.hermesSttConfigMistralModel')}</span>
<select id="hm-stt-mistral-model" class="hm-input" ${disabled ? 'disabled' : ''}>
${STT_MISTRAL_MODELS.map(model => option(`engine.hermesSttConfigMistralModel_${model}`, model, sttValues.sttMistralModel)).join('')}
</select>
</label>
</div>
<div class="hm-channel-footnote">${t('engine.hermesSttConfigFootnote')}</div>
</div>
</div>
`
}
function renderTerminalPanel() {
const disabled = loading || saving || terminalLoading || terminalSaving || approvalsSaving || cronSaving || loggingSaving || browserSaving || runtimeSaving || compressionSaving || promptCachingSaving || toolGuardrailsSaving || memorySaving || skillsSaving || quickCommandsSaving || agentToolsetsSaving || agentRuntimeSaving || unauthorizedDmSaving || streamingSaving || executionLimitsSaving || checkpointsSaving
const disabled = loading || saving || terminalLoading || terminalSaving || approvalsSaving || cronSaving || loggingSaving || browserSaving || sttSaving || runtimeSaving || compressionSaving || promptCachingSaving || toolGuardrailsSaving || memorySaving || skillsSaving || quickCommandsSaving || agentToolsetsSaving || agentRuntimeSaving || unauthorizedDmSaving || streamingSaving || executionLimitsSaving || checkpointsSaving
return `
<div class="hm-panel hm-config-runtime-panel hm-config-terminal-panel">
<div class="hm-panel-header">
@@ -1453,6 +1528,7 @@ export function render() {
${renderApprovalsPanel()}
${renderPrivacyPanel()}
${renderBrowserPanel()}
${renderSttPanel()}
${renderCompressionPanel()}
${renderPromptCachingPanel()}
${renderToolGuardrailsPanel()}
@@ -1506,6 +1582,7 @@ export function render() {
el.querySelector('#hm-approvals-save')?.addEventListener('click', saveApprovalsConfig)
el.querySelector('#hm-privacy-save')?.addEventListener('click', savePrivacyConfig)
el.querySelector('#hm-browser-save')?.addEventListener('click', saveBrowserConfig)
el.querySelector('#hm-stt-save')?.addEventListener('click', saveSttConfig)
el.querySelector('#hm-terminal-save')?.addEventListener('click', saveTerminal)
}
@@ -1624,6 +1701,11 @@ export function render() {
browserValues = { ...BROWSER_DEFAULTS, ...(data?.values || {}) }
}
async function loadSttConfig() {
const data = await api.hermesSttConfigRead()
sttValues = { ...STT_DEFAULTS, ...(data?.values || {}) }
}
async function loadTerminal() {
const data = await api.hermesTerminalConfigRead()
terminalValues = { ...TERMINAL_DEFAULTS, ...(data?.values || {}) }
@@ -1653,6 +1735,7 @@ export function render() {
approvalsLoading = true
privacyLoading = true
browserLoading = true
sttLoading = true
terminalLoading = true
error = null
runtimeError = null
@@ -1677,6 +1760,7 @@ export function render() {
approvalsError = null
privacyError = null
browserError = null
sttError = null
terminalError = null
draw()
try {
@@ -1790,6 +1874,14 @@ export function render() {
browserLoading = false
draw()
}
try {
await loadSttConfig()
} catch (err) {
sttError = humanizeError(err, t('engine.hermesSttConfigLoadFailed') || 'Load speech transcription config failed')
} finally {
sttLoading = false
draw()
}
try {
await loadTerminal()
} catch (err) {
@@ -2604,6 +2696,36 @@ export function render() {
}
}
async function saveSttConfig() {
const form = {
sttEnabled: !!el.querySelector('#hm-stt-enabled')?.checked,
sttProvider: el.querySelector('#hm-stt-provider')?.value || 'auto',
sttLocalModel: el.querySelector('#hm-stt-local-model')?.value || 'base',
sttLocalLanguage: el.querySelector('#hm-stt-local-language')?.value || '',
sttOpenaiModel: el.querySelector('#hm-stt-openai-model')?.value || 'whisper-1',
sttMistralModel: el.querySelector('#hm-stt-mistral-model')?.value || 'voxtral-mini-latest',
}
sttSaving = true
sttError = null
draw()
try {
const result = await api.hermesSttConfigSave(form)
sttValues = { ...STT_DEFAULTS, ...(result?.values || form) }
await refreshRawAfterStructuredSave()
const backup = result?.backup || ''
toast({
message: t('engine.hermesSttConfigSaveSuccess'),
hint: backup ? t('engine.hermesConfigBackupHint', { path: backup }) : '',
}, 'success')
} catch (err) {
sttError = humanizeError(err, t('engine.hermesSttConfigSaveFailed') || 'Save speech transcription config failed')
toast(sttError, 'error')
} finally {
sttSaving = false
draw()
}
}
async function saveTerminal() {
const form = {
terminalBackend: el.querySelector('#hm-terminal-backend')?.value || 'local',

View File

@@ -553,6 +553,8 @@ export const api = {
hermesPrivacyConfigSave: (form) => invoke('hermes_privacy_config_save', { form }),
hermesBrowserConfigRead: () => invoke('hermes_browser_config_read'),
hermesBrowserConfigSave: (form) => invoke('hermes_browser_config_save', { form }),
hermesSttConfigRead: () => invoke('hermes_stt_config_read'),
hermesSttConfigSave: (form) => invoke('hermes_stt_config_save', { form }),
hermesTerminalConfigRead: () => invoke('hermes_terminal_config_read'),
hermesTerminalConfigSave: (form) => invoke('hermes_terminal_config_save', { form }),
hermesLazyDepsFeatures: () => cachedInvoke('hermes_lazy_deps_features', {}, 600000),

View File

@@ -660,6 +660,36 @@ export default {
hermesBrowserConfigEngine_lightpanda: _('Lightpanda 快速导航', 'Lightpanda fast navigation', 'Lightpanda 快速導覽'),
hermesBrowserConfigEngine_chrome: _('Chrome 完整浏览器', 'Chrome full browser', 'Chrome 完整瀏覽器'),
hermesBrowserConfigFootnote: _('Lightpanda 导航更快但不支持截图;录制会把 WebM 写入 Hermes browser_recordings 目录请只在需要审计时开启。CDP、Dialog 和 Camofox 高级字段会保留在 raw YAML 中。', 'Lightpanda navigates faster but does not support screenshots. Recording writes WebM files into the Hermes browser_recordings directory, so enable it only for audits. Advanced CDP, Dialog, and Camofox fields stay in raw YAML.', 'Lightpanda 導覽更快但不支援截圖;錄製會把 WebM 寫入 Hermes browser_recordings 目錄請只在需要稽核時開啟。CDP、Dialog 和 Camofox 進階欄位會保留在 raw YAML 中。'),
hermesSttConfigTitle: _('语音转写', 'Speech transcription', '語音轉寫'),
hermesSttConfigDesc: _('控制消息平台语音消息是否自动转写以及本地、OpenAI 和 Mistral 转写模型。适合需要处理语音反馈的渠道。', 'Control automatic voice-message transcription for messaging platforms, plus local, OpenAI, and Mistral transcription models. Useful for channels that receive voice feedback.', '控制訊息平台語音訊息是否自動轉寫以及本機、OpenAI 和 Mistral 轉寫模型。適合需要處理語音回饋的渠道。'),
hermesSttConfigStatusReady: _('结构化配置', 'structured settings', '結構化設定'),
hermesSttConfigSave: _('保存转写配置', 'Save transcription settings', '儲存轉寫設定'),
hermesSttConfigSaveSuccess: _('语音转写配置已保存,建议重启 Hermes Gateway 生效', 'Speech transcription settings saved. Restart Hermes Gateway to take effect.', '語音轉寫設定已儲存,建議重啟 Hermes Gateway 生效'),
hermesSttConfigLoadFailed: _('加载语音转写配置失败', 'Load speech transcription settings failed', '載入語音轉寫設定失敗'),
hermesSttConfigSaveFailed: _('保存语音转写配置失败', 'Save speech transcription settings failed', '儲存語音轉寫設定失敗'),
hermesSttConfigEnabled: _('启用语音消息自动转写', 'Enable voice-message transcription', '啟用語音訊息自動轉寫'),
hermesSttConfigProvider: _('转写服务', 'Transcription provider', '轉寫服務'),
hermesSttConfigProvider_auto: _('自动选择', 'Auto select', '自動選擇'),
hermesSttConfigProvider_local: _('本地 faster-whisper', 'Local faster-whisper', '本機 faster-whisper'),
hermesSttConfigProvider_groq: _('Groq Whisper', 'Groq Whisper', 'Groq Whisper'),
hermesSttConfigProvider_openai: _('OpenAI Whisper / GPT 转写', 'OpenAI Whisper / GPT transcription', 'OpenAI Whisper / GPT 轉寫'),
hermesSttConfigProvider_mistral: _('Mistral Voxtral', 'Mistral Voxtral', 'Mistral Voxtral'),
hermesSttConfigLocalModel: _('本地模型', 'Local model', '本機模型'),
hermesSttConfigLocalModel_tiny: _('tiny最快', 'tiny (fastest)', 'tiny最快'),
hermesSttConfigLocalModel_base: _('base默认', 'base (default)', 'base預設'),
hermesSttConfigLocalModel_small: _('small更准', 'small (more accurate)', 'small更準'),
hermesSttConfigLocalModel_medium: _('medium高精度', 'medium (high accuracy)', 'medium高精度'),
'hermesSttConfigLocalModel_large-v3': _('large-v3最高精度', 'large-v3 (highest accuracy)', 'large-v3最高精度'),
hermesSttConfigLocalModel_turbo: _('turbo速度优先', 'turbo (speed first)', 'turbo速度優先'),
hermesSttConfigLocalLanguage: _('强制语言(可留空)', 'Forced language, optional', '強制語言(可留空)'),
hermesSttConfigOpenaiModel: _('OpenAI 模型', 'OpenAI model', 'OpenAI 模型'),
'hermesSttConfigOpenaiModel_whisper-1': _('whisper-1经典', 'whisper-1 (classic)', 'whisper-1經典'),
'hermesSttConfigOpenaiModel_gpt-4o-mini-transcribe': _('gpt-4o-mini-transcribe低成本', 'gpt-4o-mini-transcribe (lower cost)', 'gpt-4o-mini-transcribe低成本'),
'hermesSttConfigOpenaiModel_gpt-4o-transcribe': _('gpt-4o-transcribe高质量', 'gpt-4o-transcribe (higher quality)', 'gpt-4o-transcribe高品質'),
hermesSttConfigMistralModel: _('Mistral 模型', 'Mistral model', 'Mistral 模型'),
'hermesSttConfigMistralModel_voxtral-mini-latest': _('voxtral-mini-latest推荐', 'voxtral-mini-latest (recommended)', 'voxtral-mini-latest建議'),
'hermesSttConfigMistralModel_voxtral-mini-2602': _('voxtral-mini-2602固定版本', 'voxtral-mini-2602 (pinned version)', 'voxtral-mini-2602固定版本'),
hermesSttConfigFootnote: _('这里写入 stt.*。API Key 仍通过 .env 管理Groq 使用上游默认模型,其他 provider 高级字段会保留在 raw YAML 中。', 'This writes stt.*. API keys are still managed through .env. Groq uses the upstream default model, and other provider advanced fields stay in raw YAML.', '這裡寫入 stt.*。API Key 仍透過 .env 管理Groq 使用上游預設模型,其他 provider 進階欄位會保留在 raw YAML 中。'),
hermesCompressionTitle: _('上下文压缩', 'Context compression', '上下文壓縮'),
hermesCompressionDesc: _('控制长对话何时触发压缩、压缩目标和保留范围,降低上下文过长导致的失败与费用浪费。', 'Control when long conversations are compressed, the target size, and protected message ranges to reduce failures and wasted cost from oversized context.', '控制長對話何時觸發壓縮、壓縮目標和保留範圍,降低上下文過長導致的失敗與費用浪費。'),
hermesCompressionStatusReady: _('结构化配置', 'structured settings', '結構化設定'),