From 30dd6cc2e2a52f7076c130dfc00d11270225b1c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=99=B4=E5=A4=A9?= Date: Tue, 26 May 2026 01:29:32 +0800 Subject: [PATCH] feat(hermes): add stt config form --- scripts/dev-api.js | 113 +++++++++ src-tauri/src/commands/hermes.rs | 375 ++++++++++++++++++++++++++++ src-tauri/src/lib.rs | 2 + src/engines/hermes/pages/config.js | 128 +++++++++- src/lib/tauri-api.js | 2 + src/locales/modules/engine.js | 30 +++ tests/hermes-config-page-ui.test.js | 15 ++ tests/hermes-stt-config.test.js | 102 ++++++++ 8 files changed, 764 insertions(+), 3 deletions(-) create mode 100644 tests/hermes-stt-config.test.js diff --git a/scripts/dev-api.js b/scripts/dev-api.js index f311bed..b840d12 100644 --- a/scripts/dev-api.js +++ b/scripts/dev-api.js @@ -3325,6 +3325,10 @@ const HERMES_STREAMING_TRANSPORTS = new Set(['auto', 'draft', 'edit', 'off']) const HERMES_CODE_EXECUTION_MODES = new Set(['project', 'strict']) const HERMES_TERMINAL_BACKENDS = new Set(['local', 'ssh', 'docker', 'singularity', 'modal', 'daytona', 'vercel_sandbox']) const HERMES_BROWSER_ENGINES = new Set(['auto', 'lightpanda', 'chrome']) +const HERMES_STT_PROVIDERS = new Set(['auto', 'local', 'groq', 'openai', 'mistral']) +const HERMES_STT_LOCAL_MODELS = new Set(['tiny', 'base', 'small', 'medium', 'large-v3', 'turbo']) +const HERMES_STT_OPENAI_MODELS = new Set(['whisper-1', 'gpt-4o-mini-transcribe', 'gpt-4o-transcribe']) +const HERMES_STT_MISTRAL_MODELS = new Set(['voxtral-mini-latest', 'voxtral-mini-2602']) const HERMES_APPROVAL_MODES = new Set(['manual', 'smart', 'off']) const HERMES_APPROVAL_CRON_MODES = new Set(['deny', 'approve']) const HERMES_LOGGING_LEVELS = new Set(['DEBUG', 'INFO', 'WARNING']) @@ -3417,6 +3421,42 @@ function normalizeHermesBrowserEngine(value, strict = false) { return 'auto' } +function normalizeHermesSttProvider(value, strict = false) { + const provider = String(value ?? '').trim().toLowerCase() || 'auto' + if (HERMES_STT_PROVIDERS.has(provider)) return provider + if (strict) throw new Error('stt.provider 必须是 auto、local、groq、openai 或 mistral') + return 'auto' +} + +function normalizeHermesSttLocalModel(value, strict = false) { + const model = String(value ?? '').trim().toLowerCase() || 'base' + if (HERMES_STT_LOCAL_MODELS.has(model)) return model + if (strict) throw new Error('stt.local.model 必须是 tiny、base、small、medium、large-v3 或 turbo') + return 'base' +} + +function normalizeHermesSttOpenaiModel(value, strict = false) { + const model = String(value ?? '').trim() || 'whisper-1' + if (HERMES_STT_OPENAI_MODELS.has(model)) return model + if (strict) throw new Error('stt.openai.model 必须是 whisper-1、gpt-4o-mini-transcribe 或 gpt-4o-transcribe') + return 'whisper-1' +} + +function normalizeHermesSttMistralModel(value, strict = false) { + const model = String(value ?? '').trim() || 'voxtral-mini-latest' + if (HERMES_STT_MISTRAL_MODELS.has(model)) return model + if (strict) throw new Error('stt.mistral.model 必须是 voxtral-mini-latest 或 voxtral-mini-2602') + return 'voxtral-mini-latest' +} + +function normalizeHermesSttLanguage(value, strict = false) { + const language = String(value ?? '').trim() + if (!language) return '' + if (/^[a-z]{2,3}(-[A-Za-z0-9]+)?$/.test(language)) return language + if (strict) throw new Error('stt.local.language 必须为空或合法语言标签,例如 zh、en、pt-BR') + return '' +} + function normalizeHermesApprovalMode(value, strict = false) { const mode = String(value ?? '').trim().toLowerCase() || 'manual' if (HERMES_APPROVAL_MODES.has(mode)) return mode @@ -4281,6 +4321,58 @@ export function mergeHermesBrowserConfig(config = {}, form = {}) { return next } +export function buildHermesSttConfigValues(config = {}) { + const root = config && typeof config === 'object' && !Array.isArray(config) ? config : {} + const stt = root.stt && typeof root.stt === 'object' && !Array.isArray(root.stt) + ? root.stt + : {} + const local = stt.local && typeof stt.local === 'object' && !Array.isArray(stt.local) + ? stt.local + : {} + const openai = stt.openai && typeof stt.openai === 'object' && !Array.isArray(stt.openai) + ? stt.openai + : {} + const mistral = stt.mistral && typeof stt.mistral === 'object' && !Array.isArray(stt.mistral) + ? stt.mistral + : {} + return { + sttEnabled: readHermesBool(stt.enabled, true), + sttProvider: normalizeHermesSttProvider(stt.provider, false), + sttLocalModel: normalizeHermesSttLocalModel(local.model, false), + sttLocalLanguage: normalizeHermesSttLanguage(local.language, false), + sttOpenaiModel: normalizeHermesSttOpenaiModel(openai.model, false), + sttMistralModel: normalizeHermesSttMistralModel(mistral.model, false), + } +} + +export function mergeHermesSttConfig(config = {}, form = {}) { + const next = mergeConfigsPreservingFields({}, config && typeof config === 'object' && !Array.isArray(config) ? config : {}) + const currentValues = buildHermesSttConfigValues(next) + const stt = next.stt && typeof next.stt === 'object' && !Array.isArray(next.stt) + ? mergeConfigsPreservingFields(next.stt, {}) + : {} + const local = stt.local && typeof stt.local === 'object' && !Array.isArray(stt.local) + ? mergeConfigsPreservingFields(stt.local, {}) + : {} + const openai = stt.openai && typeof stt.openai === 'object' && !Array.isArray(stt.openai) + ? mergeConfigsPreservingFields(stt.openai, {}) + : {} + const mistral = stt.mistral && typeof stt.mistral === 'object' && !Array.isArray(stt.mistral) + ? mergeConfigsPreservingFields(stt.mistral, {}) + : {} + stt.enabled = formHermesBool(form, 'sttEnabled', currentValues.sttEnabled) + stt.provider = normalizeHermesSttProvider(Object.hasOwn(form, 'sttProvider') ? form.sttProvider : currentValues.sttProvider, true) + local.model = normalizeHermesSttLocalModel(Object.hasOwn(form, 'sttLocalModel') ? form.sttLocalModel : currentValues.sttLocalModel, true) + local.language = normalizeHermesSttLanguage(Object.hasOwn(form, 'sttLocalLanguage') ? form.sttLocalLanguage : currentValues.sttLocalLanguage, true) + openai.model = normalizeHermesSttOpenaiModel(Object.hasOwn(form, 'sttOpenaiModel') ? form.sttOpenaiModel : currentValues.sttOpenaiModel, true) + mistral.model = normalizeHermesSttMistralModel(Object.hasOwn(form, 'sttMistralModel') ? form.sttMistralModel : currentValues.sttMistralModel, true) + stt.local = local + stt.openai = openai + stt.mistral = mistral + next.stt = stt + return next +} + export function buildHermesTerminalConfigValues(config = {}) { const root = config && typeof config === 'object' && !Array.isArray(config) ? config : {} const terminal = root.terminal && typeof root.terminal === 'object' && !Array.isArray(root.terminal) @@ -11072,6 +11164,27 @@ const handlers = { } }, + hermes_stt_config_read() { + const { configPath, exists, config } = readHermesConfigYamlObject() + return { + exists, + configPath, + values: buildHermesSttConfigValues(config), + } + }, + + hermes_stt_config_save({ form } = {}) { + const { configPath, config } = readHermesConfigYamlObject() + const next = mergeHermesSttConfig(config, form || {}) + const backup = writeHermesConfigYamlObject(configPath, next) + return { + ok: true, + configPath, + backup, + values: buildHermesSttConfigValues(next), + } + }, + hermes_terminal_config_read() { const { configPath, exists, config } = readHermesConfigYamlObject() return { diff --git a/src-tauri/src/commands/hermes.rs b/src-tauri/src/commands/hermes.rs index b6f82a8..843ab2d 100644 --- a/src-tauri/src/commands/hermes.rs +++ b/src-tauri/src/commands/hermes.rs @@ -4825,6 +4825,115 @@ fn normalize_hermes_browser_engine(value: Option, strict: bool) -> Resul } } +fn normalize_hermes_stt_provider(value: Option, strict: bool) -> Result { + let provider = value.unwrap_or_default().trim().to_ascii_lowercase(); + let provider = if provider.is_empty() { + "auto".to_string() + } else { + provider + }; + if matches!( + provider.as_str(), + "auto" | "local" | "groq" | "openai" | "mistral" + ) { + return Ok(provider); + } + if strict { + Err("stt.provider 必须是 auto、local、groq、openai 或 mistral".to_string()) + } else { + Ok("auto".to_string()) + } +} + +fn normalize_hermes_stt_local_model(value: Option, strict: bool) -> Result { + let model = value.unwrap_or_default().trim().to_ascii_lowercase(); + let model = if model.is_empty() { + "base".to_string() + } else { + model + }; + if matches!( + model.as_str(), + "tiny" | "base" | "small" | "medium" | "large-v3" | "turbo" + ) { + return Ok(model); + } + if strict { + Err("stt.local.model 必须是 tiny、base、small、medium、large-v3 或 turbo".to_string()) + } else { + Ok("base".to_string()) + } +} + +fn normalize_hermes_stt_openai_model( + value: Option, + strict: bool, +) -> Result { + let model = value.unwrap_or_default().trim().to_string(); + let model = if model.is_empty() { + "whisper-1".to_string() + } else { + model + }; + if matches!( + model.as_str(), + "whisper-1" | "gpt-4o-mini-transcribe" | "gpt-4o-transcribe" + ) { + return Ok(model); + } + if strict { + Err( + "stt.openai.model 必须是 whisper-1、gpt-4o-mini-transcribe 或 gpt-4o-transcribe" + .to_string(), + ) + } else { + Ok("whisper-1".to_string()) + } +} + +fn normalize_hermes_stt_mistral_model( + value: Option, + strict: bool, +) -> Result { + let model = value.unwrap_or_default().trim().to_string(); + let model = if model.is_empty() { + "voxtral-mini-latest".to_string() + } else { + model + }; + if matches!(model.as_str(), "voxtral-mini-latest" | "voxtral-mini-2602") { + return Ok(model); + } + if strict { + Err("stt.mistral.model 必须是 voxtral-mini-latest 或 voxtral-mini-2602".to_string()) + } else { + Ok("voxtral-mini-latest".to_string()) + } +} + +fn normalize_hermes_stt_language(value: Option, strict: bool) -> Result { + let language = value.unwrap_or_default().trim().to_string(); + if language.is_empty() { + return Ok(String::new()); + } + let mut parts = language.split('-'); + let Some(first) = parts.next() else { + return Ok(String::new()); + }; + let first_valid = + (2..=3).contains(&first.len()) && first.chars().all(|ch| ch.is_ascii_lowercase()); + let rest_valid = + parts.all(|part| !part.is_empty() && part.chars().all(|ch| ch.is_ascii_alphanumeric())); + if first_valid && rest_valid { + return Ok(language); + } + if strict { + Err("stt.local.language 必须为空或合法语言标签,例如 zh、en、pt-BR".to_string()) + } else { + Ok(String::new()) + } +} + fn normalize_hermes_approval_mode(value: Option, strict: bool) -> Result { let mode = value.unwrap_or_default().trim().to_ascii_lowercase(); let mode = if mode.is_empty() { @@ -5662,6 +5771,130 @@ fn merge_hermes_browser_config(config: &mut serde_yaml::Value, form: &Value) -> Ok(()) } +fn build_hermes_stt_config_values(config: &serde_yaml::Value) -> Value { + let root = config.as_mapping(); + let stt = root.and_then(|map| yaml_get_mapping(map, "stt")); + let local = stt.and_then(|map| yaml_get_mapping(map, "local")); + let openai = stt.and_then(|map| yaml_get_mapping(map, "openai")); + let mistral = stt.and_then(|map| yaml_get_mapping(map, "mistral")); + let stt_enabled = stt + .and_then(|map| yaml_bool_field(map, "enabled")) + .unwrap_or(true); + let stt_provider = normalize_hermes_stt_provider( + stt.and_then(|map| yaml_string_field(map, "provider")), + false, + ) + .unwrap_or_else(|_| "auto".to_string()); + let stt_local_model = normalize_hermes_stt_local_model( + local.and_then(|map| yaml_string_field(map, "model")), + false, + ) + .unwrap_or_else(|_| "base".to_string()); + let stt_local_language = normalize_hermes_stt_language( + local.and_then(|map| yaml_string_field(map, "language")), + false, + ) + .unwrap_or_else(|_| String::new()); + let stt_openai_model = normalize_hermes_stt_openai_model( + openai.and_then(|map| yaml_string_field(map, "model")), + false, + ) + .unwrap_or_else(|_| "whisper-1".to_string()); + let stt_mistral_model = normalize_hermes_stt_mistral_model( + mistral.and_then(|map| yaml_string_field(map, "model")), + false, + ) + .unwrap_or_else(|_| "voxtral-mini-latest".to_string()); + + serde_json::json!({ + "sttEnabled": stt_enabled, + "sttProvider": stt_provider, + "sttLocalModel": stt_local_model, + "sttLocalLanguage": stt_local_language, + "sttOpenaiModel": stt_openai_model, + "sttMistralModel": stt_mistral_model, + }) +} + +fn merge_hermes_stt_config(config: &mut serde_yaml::Value, form: &Value) -> Result<(), String> { + let current = build_hermes_stt_config_values(config); + let stt_enabled = form_bool(form, "sttEnabled") + .unwrap_or_else(|| current["sttEnabled"].as_bool().unwrap_or(true)); + let stt_provider = normalize_hermes_stt_provider( + if form.get("sttProvider").is_some() { + form_string(form, "sttProvider") + } else { + current["sttProvider"].as_str().map(ToString::to_string) + }, + true, + )?; + let stt_local_model = normalize_hermes_stt_local_model( + if form.get("sttLocalModel").is_some() { + form_string(form, "sttLocalModel") + } else { + current["sttLocalModel"].as_str().map(ToString::to_string) + }, + true, + )?; + let stt_local_language = normalize_hermes_stt_language( + if form.get("sttLocalLanguage").is_some() { + form_string(form, "sttLocalLanguage") + } else { + current["sttLocalLanguage"] + .as_str() + .map(ToString::to_string) + }, + true, + )?; + let stt_openai_model = normalize_hermes_stt_openai_model( + if form.get("sttOpenaiModel").is_some() { + form_string(form, "sttOpenaiModel") + } else { + current["sttOpenaiModel"].as_str().map(ToString::to_string) + }, + true, + )?; + let stt_mistral_model = normalize_hermes_stt_mistral_model( + if form.get("sttMistralModel").is_some() { + form_string(form, "sttMistralModel") + } else { + current["sttMistralModel"].as_str().map(ToString::to_string) + }, + true, + )?; + + let root = ensure_yaml_object(config)?; + let stt = yaml_child_object(root, "stt")?; + stt.insert(yaml_key("enabled"), serde_yaml::Value::Bool(stt_enabled)); + stt.insert( + yaml_key("provider"), + serde_yaml::Value::String(stt_provider), + ); + + let local = yaml_child_object(stt, "local")?; + local.insert( + yaml_key("model"), + serde_yaml::Value::String(stt_local_model), + ); + local.insert( + yaml_key("language"), + serde_yaml::Value::String(stt_local_language), + ); + + let openai = yaml_child_object(stt, "openai")?; + openai.insert( + yaml_key("model"), + serde_yaml::Value::String(stt_openai_model), + ); + + let mistral = yaml_child_object(stt, "mistral")?; + mistral.insert( + yaml_key("model"), + serde_yaml::Value::String(stt_mistral_model), + ); + Ok(()) +} + fn merge_hermes_execution_limits_config( config: &mut serde_yaml::Value, form: &Value, @@ -7325,6 +7558,30 @@ pub fn hermes_browser_config_save(form: Value) -> Result { })) } +#[tauri::command] +pub fn hermes_stt_config_read() -> Result { + let (config_path, exists, config) = read_hermes_channel_yaml_config()?; + ensure_yaml_object(&mut config.clone())?; + Ok(serde_json::json!({ + "exists": exists, + "configPath": config_path.to_string_lossy(), + "values": build_hermes_stt_config_values(&config), + })) +} + +#[tauri::command] +pub fn hermes_stt_config_save(form: Value) -> Result { + let (config_path, _exists, mut config) = read_hermes_channel_yaml_config()?; + merge_hermes_stt_config(&mut config, &form)?; + let backup = write_hermes_yaml_config(&config_path, &config)?; + Ok(serde_json::json!({ + "ok": true, + "configPath": config_path.to_string_lossy(), + "backup": backup, + "values": build_hermes_stt_config_values(&config), + })) +} + #[tauri::command] pub fn hermes_terminal_config_read() -> Result { let (config_path, exists, config) = read_hermes_channel_yaml_config()?; @@ -13164,6 +13421,124 @@ streaming: } } +#[cfg(test)] +mod hermes_stt_config_tests { + use super::{build_hermes_stt_config_values, merge_hermes_stt_config}; + use serde_json::json; + + #[test] + fn stt_values_have_upstream_defaults() { + let config: serde_yaml::Value = serde_yaml::from_str("{}").unwrap(); + let values = build_hermes_stt_config_values(&config); + assert_eq!(values["sttEnabled"], true); + assert_eq!(values["sttProvider"], "auto"); + assert_eq!(values["sttLocalModel"], "base"); + assert_eq!(values["sttLocalLanguage"], ""); + assert_eq!(values["sttOpenaiModel"], "whisper-1"); + assert_eq!(values["sttMistralModel"], "voxtral-mini-latest"); + } + + #[test] + fn stt_values_read_yaml_fields() { + let config: serde_yaml::Value = serde_yaml::from_str( + r#" +stt: + enabled: false + provider: openai + local: + model: small + language: zh + openai: + model: gpt-4o-mini-transcribe + mistral: + model: voxtral-mini-2602 +"#, + ) + .unwrap(); + let values = build_hermes_stt_config_values(&config); + assert_eq!(values["sttEnabled"], false); + assert_eq!(values["sttProvider"], "openai"); + assert_eq!(values["sttLocalModel"], "small"); + assert_eq!(values["sttLocalLanguage"], "zh"); + assert_eq!(values["sttOpenaiModel"], "gpt-4o-mini-transcribe"); + assert_eq!(values["sttMistralModel"], "voxtral-mini-2602"); + } + + #[test] + fn merge_stt_config_preserves_unknown_fields() { + let mut config: serde_yaml::Value = serde_yaml::from_str( + r#" +model: + provider: anthropic +stt: + enabled: true + provider: auto + custom_flag: keep-stt + local: + model: base + custom_flag: keep-local +memory: + memory_enabled: true +"#, + ) + .unwrap(); + + merge_hermes_stt_config( + &mut config, + &json!({ + "sttEnabled": false, + "sttProvider": "openai", + "sttLocalModel": "small", + "sttLocalLanguage": "zh", + "sttOpenaiModel": "gpt-4o-mini-transcribe", + "sttMistralModel": "voxtral-mini-2602", + }), + ) + .unwrap(); + + assert_eq!(config["model"]["provider"].as_str(), Some("anthropic")); + assert_eq!(config["memory"]["memory_enabled"].as_bool(), Some(true)); + assert_eq!(config["stt"]["enabled"].as_bool(), Some(false)); + assert_eq!(config["stt"]["provider"].as_str(), Some("openai")); + assert_eq!(config["stt"]["local"]["model"].as_str(), Some("small")); + assert_eq!(config["stt"]["local"]["language"].as_str(), Some("zh")); + assert_eq!( + config["stt"]["openai"]["model"].as_str(), + Some("gpt-4o-mini-transcribe") + ); + assert_eq!( + config["stt"]["mistral"]["model"].as_str(), + Some("voxtral-mini-2602") + ); + assert_eq!(config["stt"]["custom_flag"].as_str(), Some("keep-stt")); + assert_eq!( + config["stt"]["local"]["custom_flag"].as_str(), + Some("keep-local") + ); + } + + #[test] + fn merge_stt_config_rejects_invalid_values() { + let mut config = serde_yaml::Value::Mapping(serde_yaml::Mapping::new()); + let err = + merge_hermes_stt_config(&mut config, &json!({ "sttProvider": "bad" })).unwrap_err(); + assert!(err.contains("stt.provider")); + let err = + merge_hermes_stt_config(&mut config, &json!({ "sttLocalModel": "giant" })).unwrap_err(); + assert!(err.contains("stt.local.model")); + let err = merge_hermes_stt_config(&mut config, &json!({ "sttOpenaiModel": "gpt-4.1" })) + .unwrap_err(); + assert!(err.contains("stt.openai.model")); + let err = + merge_hermes_stt_config(&mut config, &json!({ "sttMistralModel": "voxtral-large" })) + .unwrap_err(); + assert!(err.contains("stt.mistral.model")); + let err = merge_hermes_stt_config(&mut config, &json!({ "sttLocalLanguage": "中文" })) + .unwrap_err(); + assert!(err.contains("stt.local.language")); + } +} + #[cfg(test)] mod hermes_checkpoints_config_tests { use super::{build_hermes_checkpoints_config_values, merge_hermes_checkpoints_config}; diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index 0f9924b..9fadd2c 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -301,6 +301,8 @@ pub fn run() { hermes::hermes_privacy_config_save, hermes::hermes_browser_config_read, hermes::hermes_browser_config_save, + hermes::hermes_stt_config_read, + hermes::hermes_stt_config_save, hermes::hermes_terminal_config_read, hermes::hermes_terminal_config_save, hermes::hermes_lazy_deps_features, diff --git a/src/engines/hermes/pages/config.js b/src/engines/hermes/pages/config.js index c9851af..c9bb972 100644 --- a/src/engines/hermes/pages/config.js +++ b/src/engines/hermes/pages/config.js @@ -179,6 +179,15 @@ const BROWSER_DEFAULTS = { browserEngine: 'auto', } +const STT_DEFAULTS = { + sttEnabled: true, + sttProvider: 'auto', + sttLocalModel: 'base', + sttLocalLanguage: '', + sttOpenaiModel: 'whisper-1', + sttMistralModel: 'voxtral-mini-latest', +} + const TERMINAL_DEFAULTS = { terminalBackend: 'local', terminalCwd: '.', @@ -197,6 +206,10 @@ const STREAMING_TRANSPORTS = ['edit', 'auto', 'draft', 'off'] const CODE_EXECUTION_MODES = ['project', 'strict'] const TERMINAL_BACKENDS = ['local', 'ssh', 'docker', 'singularity', 'modal', 'daytona', 'vercel_sandbox'] const BROWSER_ENGINES = ['auto', 'lightpanda', 'chrome'] +const STT_PROVIDERS = ['auto', 'local', 'groq', 'openai', 'mistral'] +const STT_LOCAL_MODELS = ['tiny', 'base', 'small', 'medium', 'large-v3', 'turbo'] +const STT_OPENAI_MODELS = ['whisper-1', 'gpt-4o-mini-transcribe', 'gpt-4o-transcribe'] +const STT_MISTRAL_MODELS = ['voxtral-mini-latest', 'voxtral-mini-2602'] const UNAUTHORIZED_DM_BEHAVIORS = ['pair', 'ignore'] const IMAGE_INPUT_MODES = ['auto', 'native', 'text'] const DISPLAY_TOOL_PROGRESS_VALUES = ['off', 'new', 'all', 'verbose'] @@ -238,6 +251,7 @@ export function render() { let approvalsValues = { ...APPROVALS_DEFAULTS } let privacyValues = { ...PRIVACY_DEFAULTS } let browserValues = { ...BROWSER_DEFAULTS } + let sttValues = { ...STT_DEFAULTS } let terminalValues = { ...TERMINAL_DEFAULTS } let loading = true let runtimeLoading = true @@ -262,6 +276,7 @@ export function render() { let approvalsLoading = true let privacyLoading = true let browserLoading = true + let sttLoading = true let terminalLoading = true let saving = false let runtimeSaving = false @@ -286,6 +301,7 @@ export function render() { let approvalsSaving = false let privacySaving = false let browserSaving = false + let sttSaving = false let terminalSaving = false let error = null let runtimeError = null @@ -310,6 +326,7 @@ export function render() { let approvalsError = null let privacyError = null let browserError = null + let sttError = null let terminalError = null function esc(value) { @@ -321,7 +338,7 @@ export function render() { } function isBusy() { - return loading || runtimeLoading || compressionLoading || promptCachingLoading || toolGuardrailsLoading || memoryLoading || skillsLoading || quickCommandsLoading || agentToolsetsLoading || agentRuntimeLoading || unauthorizedDmLoading || securityLoading || displayLoading || humanDelayLoading || streamingLoading || executionLimitsLoading || ioSafetyLoading || checkpointsLoading || cronLoading || loggingLoading || approvalsLoading || privacyLoading || browserLoading || terminalLoading || saving || runtimeSaving || compressionSaving || promptCachingSaving || toolGuardrailsSaving || memorySaving || skillsSaving || quickCommandsSaving || agentToolsetsSaving || agentRuntimeSaving || unauthorizedDmSaving || securitySaving || displaySaving || humanDelaySaving || streamingSaving || executionLimitsSaving || ioSafetySaving || checkpointsSaving || cronSaving || loggingSaving || approvalsSaving || privacySaving || browserSaving || terminalSaving + return loading || runtimeLoading || compressionLoading || promptCachingLoading || toolGuardrailsLoading || memoryLoading || skillsLoading || quickCommandsLoading || agentToolsetsLoading || agentRuntimeLoading || unauthorizedDmLoading || securityLoading || displayLoading || humanDelayLoading || streamingLoading || executionLimitsLoading || ioSafetyLoading || checkpointsLoading || cronLoading || loggingLoading || approvalsLoading || privacyLoading || browserLoading || sttLoading || terminalLoading || saving || runtimeSaving || compressionSaving || promptCachingSaving || toolGuardrailsSaving || memorySaving || skillsSaving || quickCommandsSaving || agentToolsetsSaving || agentRuntimeSaving || unauthorizedDmSaving || securitySaving || displaySaving || humanDelaySaving || streamingSaving || executionLimitsSaving || ioSafetySaving || checkpointsSaving || cronSaving || loggingSaving || approvalsSaving || privacySaving || browserSaving || sttSaving || terminalSaving } function option(labelKey, value, selected) { @@ -1314,7 +1331,7 @@ export function render() { } function renderBrowserPanel() { - const disabled = loading || saving || browserLoading || browserSaving || approvalsSaving || cronSaving || loggingSaving || privacySaving || terminalSaving || runtimeSaving || compressionSaving || promptCachingSaving || toolGuardrailsSaving || memorySaving || skillsSaving || quickCommandsSaving || agentToolsetsSaving || agentRuntimeSaving || unauthorizedDmSaving || streamingSaving || executionLimitsSaving || ioSafetySaving || checkpointsSaving + const disabled = loading || saving || browserLoading || browserSaving || approvalsSaving || cronSaving || loggingSaving || privacySaving || sttSaving || terminalSaving || runtimeSaving || compressionSaving || promptCachingSaving || toolGuardrailsSaving || memorySaving || skillsSaving || quickCommandsSaving || agentToolsetsSaving || agentRuntimeSaving || unauthorizedDmSaving || streamingSaving || executionLimitsSaving || ioSafetySaving || checkpointsSaving return `
@@ -1357,8 +1374,66 @@ export function render() { ` } + function renderSttPanel() { + const disabled = loading || saving || sttLoading || sttSaving || approvalsSaving || cronSaving || loggingSaving || privacySaving || browserSaving || terminalSaving || runtimeSaving || compressionSaving || promptCachingSaving || toolGuardrailsSaving || memorySaving || skillsSaving || quickCommandsSaving || agentToolsetsSaving || agentRuntimeSaving || unauthorizedDmSaving || streamingSaving || executionLimitsSaving || ioSafetySaving || checkpointsSaving + return ` +
+
+
+
${t('engine.hermesSttConfigTitle')}
+
${t('engine.hermesSttConfigDesc')}
+
+
+ ${sttSaving ? t('engine.hermesConfigStatusSaving') : sttLoading ? t('engine.hermesConfigStatusLoading') : t('engine.hermesSttConfigStatusReady')} + +
+
+
+ ${renderError(sttError)} +
+ +
+
+ + + + + +
+
${t('engine.hermesSttConfigFootnote')}
+
+
+ ` + } + function renderTerminalPanel() { - const disabled = loading || saving || terminalLoading || terminalSaving || approvalsSaving || cronSaving || loggingSaving || browserSaving || runtimeSaving || compressionSaving || promptCachingSaving || toolGuardrailsSaving || memorySaving || skillsSaving || quickCommandsSaving || agentToolsetsSaving || agentRuntimeSaving || unauthorizedDmSaving || streamingSaving || executionLimitsSaving || checkpointsSaving + const disabled = loading || saving || terminalLoading || terminalSaving || approvalsSaving || cronSaving || loggingSaving || browserSaving || sttSaving || runtimeSaving || compressionSaving || promptCachingSaving || toolGuardrailsSaving || memorySaving || skillsSaving || quickCommandsSaving || agentToolsetsSaving || agentRuntimeSaving || unauthorizedDmSaving || streamingSaving || executionLimitsSaving || checkpointsSaving return `
@@ -1453,6 +1528,7 @@ export function render() { ${renderApprovalsPanel()} ${renderPrivacyPanel()} ${renderBrowserPanel()} + ${renderSttPanel()} ${renderCompressionPanel()} ${renderPromptCachingPanel()} ${renderToolGuardrailsPanel()} @@ -1506,6 +1582,7 @@ export function render() { el.querySelector('#hm-approvals-save')?.addEventListener('click', saveApprovalsConfig) el.querySelector('#hm-privacy-save')?.addEventListener('click', savePrivacyConfig) el.querySelector('#hm-browser-save')?.addEventListener('click', saveBrowserConfig) + el.querySelector('#hm-stt-save')?.addEventListener('click', saveSttConfig) el.querySelector('#hm-terminal-save')?.addEventListener('click', saveTerminal) } @@ -1624,6 +1701,11 @@ export function render() { browserValues = { ...BROWSER_DEFAULTS, ...(data?.values || {}) } } + async function loadSttConfig() { + const data = await api.hermesSttConfigRead() + sttValues = { ...STT_DEFAULTS, ...(data?.values || {}) } + } + async function loadTerminal() { const data = await api.hermesTerminalConfigRead() terminalValues = { ...TERMINAL_DEFAULTS, ...(data?.values || {}) } @@ -1653,6 +1735,7 @@ export function render() { approvalsLoading = true privacyLoading = true browserLoading = true + sttLoading = true terminalLoading = true error = null runtimeError = null @@ -1677,6 +1760,7 @@ export function render() { approvalsError = null privacyError = null browserError = null + sttError = null terminalError = null draw() try { @@ -1790,6 +1874,14 @@ export function render() { browserLoading = false draw() } + try { + await loadSttConfig() + } catch (err) { + sttError = humanizeError(err, t('engine.hermesSttConfigLoadFailed') || 'Load speech transcription config failed') + } finally { + sttLoading = false + draw() + } try { await loadTerminal() } catch (err) { @@ -2604,6 +2696,36 @@ export function render() { } } + async function saveSttConfig() { + const form = { + sttEnabled: !!el.querySelector('#hm-stt-enabled')?.checked, + sttProvider: el.querySelector('#hm-stt-provider')?.value || 'auto', + sttLocalModel: el.querySelector('#hm-stt-local-model')?.value || 'base', + sttLocalLanguage: el.querySelector('#hm-stt-local-language')?.value || '', + sttOpenaiModel: el.querySelector('#hm-stt-openai-model')?.value || 'whisper-1', + sttMistralModel: el.querySelector('#hm-stt-mistral-model')?.value || 'voxtral-mini-latest', + } + sttSaving = true + sttError = null + draw() + try { + const result = await api.hermesSttConfigSave(form) + sttValues = { ...STT_DEFAULTS, ...(result?.values || form) } + await refreshRawAfterStructuredSave() + const backup = result?.backup || '' + toast({ + message: t('engine.hermesSttConfigSaveSuccess'), + hint: backup ? t('engine.hermesConfigBackupHint', { path: backup }) : '', + }, 'success') + } catch (err) { + sttError = humanizeError(err, t('engine.hermesSttConfigSaveFailed') || 'Save speech transcription config failed') + toast(sttError, 'error') + } finally { + sttSaving = false + draw() + } + } + async function saveTerminal() { const form = { terminalBackend: el.querySelector('#hm-terminal-backend')?.value || 'local', diff --git a/src/lib/tauri-api.js b/src/lib/tauri-api.js index 92a30ac..c886662 100644 --- a/src/lib/tauri-api.js +++ b/src/lib/tauri-api.js @@ -553,6 +553,8 @@ export const api = { hermesPrivacyConfigSave: (form) => invoke('hermes_privacy_config_save', { form }), hermesBrowserConfigRead: () => invoke('hermes_browser_config_read'), hermesBrowserConfigSave: (form) => invoke('hermes_browser_config_save', { form }), + hermesSttConfigRead: () => invoke('hermes_stt_config_read'), + hermesSttConfigSave: (form) => invoke('hermes_stt_config_save', { form }), hermesTerminalConfigRead: () => invoke('hermes_terminal_config_read'), hermesTerminalConfigSave: (form) => invoke('hermes_terminal_config_save', { form }), hermesLazyDepsFeatures: () => cachedInvoke('hermes_lazy_deps_features', {}, 600000), diff --git a/src/locales/modules/engine.js b/src/locales/modules/engine.js index cae66e2..c48e973 100644 --- a/src/locales/modules/engine.js +++ b/src/locales/modules/engine.js @@ -660,6 +660,36 @@ export default { hermesBrowserConfigEngine_lightpanda: _('Lightpanda 快速导航', 'Lightpanda fast navigation', 'Lightpanda 快速導覽'), hermesBrowserConfigEngine_chrome: _('Chrome 完整浏览器', 'Chrome full browser', 'Chrome 完整瀏覽器'), hermesBrowserConfigFootnote: _('Lightpanda 导航更快但不支持截图;录制会把 WebM 写入 Hermes browser_recordings 目录,请只在需要审计时开启。CDP、Dialog 和 Camofox 高级字段会保留在 raw YAML 中。', 'Lightpanda navigates faster but does not support screenshots. Recording writes WebM files into the Hermes browser_recordings directory, so enable it only for audits. Advanced CDP, Dialog, and Camofox fields stay in raw YAML.', 'Lightpanda 導覽更快但不支援截圖;錄製會把 WebM 寫入 Hermes browser_recordings 目錄,請只在需要稽核時開啟。CDP、Dialog 和 Camofox 進階欄位會保留在 raw YAML 中。'), + hermesSttConfigTitle: _('语音转写', 'Speech transcription', '語音轉寫'), + hermesSttConfigDesc: _('控制消息平台语音消息是否自动转写,以及本地、OpenAI 和 Mistral 转写模型。适合需要处理语音反馈的渠道。', 'Control automatic voice-message transcription for messaging platforms, plus local, OpenAI, and Mistral transcription models. Useful for channels that receive voice feedback.', '控制訊息平台語音訊息是否自動轉寫,以及本機、OpenAI 和 Mistral 轉寫模型。適合需要處理語音回饋的渠道。'), + hermesSttConfigStatusReady: _('结构化配置', 'structured settings', '結構化設定'), + hermesSttConfigSave: _('保存转写配置', 'Save transcription settings', '儲存轉寫設定'), + hermesSttConfigSaveSuccess: _('语音转写配置已保存,建议重启 Hermes Gateway 生效', 'Speech transcription settings saved. Restart Hermes Gateway to take effect.', '語音轉寫設定已儲存,建議重啟 Hermes Gateway 生效'), + hermesSttConfigLoadFailed: _('加载语音转写配置失败', 'Load speech transcription settings failed', '載入語音轉寫設定失敗'), + hermesSttConfigSaveFailed: _('保存语音转写配置失败', 'Save speech transcription settings failed', '儲存語音轉寫設定失敗'), + hermesSttConfigEnabled: _('启用语音消息自动转写', 'Enable voice-message transcription', '啟用語音訊息自動轉寫'), + hermesSttConfigProvider: _('转写服务', 'Transcription provider', '轉寫服務'), + hermesSttConfigProvider_auto: _('自动选择', 'Auto select', '自動選擇'), + hermesSttConfigProvider_local: _('本地 faster-whisper', 'Local faster-whisper', '本機 faster-whisper'), + hermesSttConfigProvider_groq: _('Groq Whisper', 'Groq Whisper', 'Groq Whisper'), + hermesSttConfigProvider_openai: _('OpenAI Whisper / GPT 转写', 'OpenAI Whisper / GPT transcription', 'OpenAI Whisper / GPT 轉寫'), + hermesSttConfigProvider_mistral: _('Mistral Voxtral', 'Mistral Voxtral', 'Mistral Voxtral'), + hermesSttConfigLocalModel: _('本地模型', 'Local model', '本機模型'), + hermesSttConfigLocalModel_tiny: _('tiny(最快)', 'tiny (fastest)', 'tiny(最快)'), + hermesSttConfigLocalModel_base: _('base(默认)', 'base (default)', 'base(預設)'), + hermesSttConfigLocalModel_small: _('small(更准)', 'small (more accurate)', 'small(更準)'), + hermesSttConfigLocalModel_medium: _('medium(高精度)', 'medium (high accuracy)', 'medium(高精度)'), + 'hermesSttConfigLocalModel_large-v3': _('large-v3(最高精度)', 'large-v3 (highest accuracy)', 'large-v3(最高精度)'), + hermesSttConfigLocalModel_turbo: _('turbo(速度优先)', 'turbo (speed first)', 'turbo(速度優先)'), + hermesSttConfigLocalLanguage: _('强制语言(可留空)', 'Forced language, optional', '強制語言(可留空)'), + hermesSttConfigOpenaiModel: _('OpenAI 模型', 'OpenAI model', 'OpenAI 模型'), + 'hermesSttConfigOpenaiModel_whisper-1': _('whisper-1(经典)', 'whisper-1 (classic)', 'whisper-1(經典)'), + 'hermesSttConfigOpenaiModel_gpt-4o-mini-transcribe': _('gpt-4o-mini-transcribe(低成本)', 'gpt-4o-mini-transcribe (lower cost)', 'gpt-4o-mini-transcribe(低成本)'), + 'hermesSttConfigOpenaiModel_gpt-4o-transcribe': _('gpt-4o-transcribe(高质量)', 'gpt-4o-transcribe (higher quality)', 'gpt-4o-transcribe(高品質)'), + hermesSttConfigMistralModel: _('Mistral 模型', 'Mistral model', 'Mistral 模型'), + 'hermesSttConfigMistralModel_voxtral-mini-latest': _('voxtral-mini-latest(推荐)', 'voxtral-mini-latest (recommended)', 'voxtral-mini-latest(建議)'), + 'hermesSttConfigMistralModel_voxtral-mini-2602': _('voxtral-mini-2602(固定版本)', 'voxtral-mini-2602 (pinned version)', 'voxtral-mini-2602(固定版本)'), + hermesSttConfigFootnote: _('这里写入 stt.*。API Key 仍通过 .env 管理;Groq 使用上游默认模型,其他 provider 高级字段会保留在 raw YAML 中。', 'This writes stt.*. API keys are still managed through .env. Groq uses the upstream default model, and other provider advanced fields stay in raw YAML.', '這裡寫入 stt.*。API Key 仍透過 .env 管理;Groq 使用上游預設模型,其他 provider 進階欄位會保留在 raw YAML 中。'), hermesCompressionTitle: _('上下文压缩', 'Context compression', '上下文壓縮'), hermesCompressionDesc: _('控制长对话何时触发压缩、压缩目标和保留范围,降低上下文过长导致的失败与费用浪费。', 'Control when long conversations are compressed, the target size, and protected message ranges to reduce failures and wasted cost from oversized context.', '控制長對話何時觸發壓縮、壓縮目標和保留範圍,降低上下文過長導致的失敗與費用浪費。'), hermesCompressionStatusReady: _('结构化配置', 'structured settings', '結構化設定'), diff --git a/tests/hermes-config-page-ui.test.js b/tests/hermes-config-page-ui.test.js index a29566b..126897f 100644 --- a/tests/hermes-config-page-ui.test.js +++ b/tests/hermes-config-page-ui.test.js @@ -283,6 +283,20 @@ test('Hermes 配置页会暴露终端执行结构化配置字段', () => { } }) +test('Hermes 配置页会暴露语音转写结构化配置字段', () => { + for (const id of [ + 'hm-stt-save', + 'hm-stt-enabled', + 'hm-stt-provider', + 'hm-stt-local-model', + 'hm-stt-local-language', + 'hm-stt-openai-model', + 'hm-stt-mistral-model', + ]) { + assert.match(source, new RegExp(`id="${id}"`), `缺少 ${id}`) + } +}) + test('Hermes 配置页数值输入会保留 0 值显示', () => { assert.doesNotMatch(source, /String\(value \|\| ''\)/, 'esc(value) 不能把合法 0 渲染为空字符串') }) @@ -305,6 +319,7 @@ test('Hermes 配置页新增结构化配置不会暴露翻译 key', () => { key.includes('PrivacyConfig') || key.includes('BrowserConfig') || key.includes('TerminalConfig') || + key.includes('SttConfig') || key.includes('CheckpointsConfig') || key.includes('ApprovalsConfig') || key.includes('CronConfig') || diff --git a/tests/hermes-stt-config.test.js b/tests/hermes-stt-config.test.js new file mode 100644 index 0000000..fffca3d --- /dev/null +++ b/tests/hermes-stt-config.test.js @@ -0,0 +1,102 @@ +import test from 'node:test' +import assert from 'node:assert/strict' + +import { + buildHermesSttConfigValues, + mergeHermesSttConfig, +} from '../scripts/dev-api.js' + +test('Hermes STT 配置读取会提供上游默认值', () => { + const values = buildHermesSttConfigValues({}) + + assert.deepEqual(values, { + sttEnabled: true, + sttProvider: 'auto', + sttLocalModel: 'base', + sttLocalLanguage: '', + sttOpenaiModel: 'whisper-1', + sttMistralModel: 'voxtral-mini-latest', + }) +}) + +test('Hermes STT 配置读取会回显语音转写模型字段', () => { + const values = buildHermesSttConfigValues({ + stt: { + enabled: false, + provider: 'openai', + local: { + model: 'small', + language: 'zh', + }, + openai: { + model: 'gpt-4o-mini-transcribe', + }, + mistral: { + model: 'voxtral-mini-2602', + }, + }, + }) + + assert.equal(values.sttEnabled, false) + assert.equal(values.sttProvider, 'openai') + assert.equal(values.sttLocalModel, 'small') + assert.equal(values.sttLocalLanguage, 'zh') + assert.equal(values.sttOpenaiModel, 'gpt-4o-mini-transcribe') + assert.equal(values.sttMistralModel, 'voxtral-mini-2602') +}) + +test('Hermes STT 配置保存会保留未知字段并写入上游结构', () => { + const next = mergeHermesSttConfig({ + model: { provider: 'anthropic' }, + stt: { + enabled: true, + custom_flag: 'keep-stt', + local: { + model: 'base', + custom_flag: 'keep-local', + }, + }, + memory: { memory_enabled: true }, + }, { + sttEnabled: false, + sttProvider: 'openai', + sttLocalModel: 'small', + sttLocalLanguage: 'zh', + sttOpenaiModel: 'gpt-4o-mini-transcribe', + sttMistralModel: 'voxtral-mini-2602', + }) + + assert.deepEqual(next.model, { provider: 'anthropic' }) + assert.deepEqual(next.memory, { memory_enabled: true }) + assert.equal(next.stt.enabled, false) + assert.equal(next.stt.provider, 'openai') + assert.equal(next.stt.local.model, 'small') + assert.equal(next.stt.local.language, 'zh') + assert.equal(next.stt.openai.model, 'gpt-4o-mini-transcribe') + assert.equal(next.stt.mistral.model, 'voxtral-mini-2602') + assert.equal(next.stt.custom_flag, 'keep-stt') + assert.equal(next.stt.local.custom_flag, 'keep-local') +}) + +test('Hermes STT 配置保存会拒绝非法枚举和语言标签', () => { + assert.throws( + () => mergeHermesSttConfig({}, { sttProvider: 'bad' }), + /stt\.provider/, + ) + assert.throws( + () => mergeHermesSttConfig({}, { sttLocalModel: 'giant' }), + /stt\.local\.model/, + ) + assert.throws( + () => mergeHermesSttConfig({}, { sttOpenaiModel: 'gpt-4.1' }), + /stt\.openai\.model/, + ) + assert.throws( + () => mergeHermesSttConfig({}, { sttMistralModel: 'voxtral-large' }), + /stt\.mistral\.model/, + ) + assert.throws( + () => mergeHermesSttConfig({}, { sttLocalLanguage: '中文' }), + /stt\.local\.language/, + ) +})