diff --git a/src-tauri/src/commands/hermes.rs b/src-tauri/src/commands/hermes.rs index bedd9b2..17117d3 100644 --- a/src-tauri/src/commands/hermes.rs +++ b/src-tauri/src/commands/hermes.rs @@ -3832,6 +3832,36 @@ pub async fn hermes_session_export(session_id: String) -> Result resp.json::().await.map_err(|e| format!("解析 JSON 失败: {e}")) } +/// Batch 3 §K: 多模态附件结构 +/// +/// 前端传过来的附件描述(图片用 base64 直传)。 +/// 支持 kind="image"(暂时只接图片,文件附件留作后续)。 +#[derive(serde::Deserialize, Clone)] +pub struct HermesAttachment { + pub kind: String, + pub mime: String, + #[serde(default)] + pub name: Option, + /// base64 编码的内容(不含 data:image/...,base64, 前缀,仅纯 base64) + pub data_base64: String, +} + +/// 构造 OpenAI 多模态 content:[{type:"text"}, {type:"image_url"}, ...] +fn build_multimodal_input(text: &str, attachments: &[HermesAttachment]) -> Value { + let mut parts: Vec = Vec::new(); + parts.push(serde_json::json!({ "type": "text", "text": text })); + for a in attachments { + if a.kind == "image" { + let url = format!("data:{};base64,{}", a.mime, a.data_base64); + parts.push(serde_json::json!({ + "type": "image_url", + "image_url": { "url": url }, + })); + } + } + Value::Array(parts) +} + #[tauri::command] pub async fn hermes_agent_run( app: tauri::AppHandle, @@ -3839,6 +3869,7 @@ pub async fn hermes_agent_run( session_id: Option, conversation_history: Option, instructions: Option, + attachments: Option>, ) -> Result { let gw_url = hermes_gateway_url(); let runs_url = format!("{gw_url}/v1/runs"); @@ -3862,7 +3893,12 @@ pub async fn hermes_agent_run( key }; - let mut payload = serde_json::json!({ "input": input }); + // Batch 3 §K: 有 attachments 时 input 改成多模态格式 + let mut payload = if let Some(atts) = attachments.as_ref().filter(|v| !v.is_empty()) { + serde_json::json!({ "input": build_multimodal_input(&input, atts) }) + } else { + serde_json::json!({ "input": input }) + }; if let Some(sid) = &session_id { payload["session_id"] = Value::String(sid.clone()); } diff --git a/src/engines/hermes/lib/chat-store.js b/src/engines/hermes/lib/chat-store.js index dac8d3c..35b362a 100644 --- a/src/engines/hermes/lib/chat-store.js +++ b/src/engines/hermes/lib/chat-store.js @@ -1063,18 +1063,23 @@ function createStore() { async function sendMessage(content, opts = {}) { const text = (content || '').trim() - if (!text || state.streaming) return + const atts = Array.isArray(opts.attachments) ? opts.attachments : [] + if ((!text && !atts.length) || state.streaming) return let s = activeSession() if (!s) { s = createLocalSession() } // Append user message. + // Batch 3 §K: 多模态附件(仅图片)— 保存 dataUrl 用于气泡内渲染 s.messages.push({ id: uid(), role: 'user', content: text, timestamp: Date.now(), + attachments: atts.length + ? atts.map(a => ({ kind: a.kind, mime: a.mime, name: a.name || '', dataUrl: `data:${a.mime};base64,${a.data_base64}` })) + : undefined, }) updateSessionTitleFromFirstUser(s) s.updatedAt = Date.now() @@ -1096,7 +1101,7 @@ function createStore() { if (isTauriRuntime()) { await attachStreamListeners(s.id) - await api.hermesAgentRun(text, s.id, history.length ? history : null, opts.instructions || null) + await api.hermesAgentRun(text, s.id, history.length ? history : null, opts.instructions || null, atts.length ? atts : null) } else { streamAbortController = new AbortController() await api.hermesAgentRunStream( diff --git a/src/engines/hermes/pages/chat.js b/src/engines/hermes/pages/chat.js index fdd108f..a834209 100644 --- a/src/engines/hermes/pages/chat.js +++ b/src/engines/hermes/pages/chat.js @@ -288,6 +288,10 @@ export function render() { let inputValue = '' let inputFocused = false let inputCaret = 0 // caret position restored after re-render + // Batch 3 §K: 多模态图片附件(仅 chat 这一帧暂存,发送后清掉) + let pendingAttachments = [] // [{ kind:'image', mime, name, data_base64 }] + const MAX_ATTACHMENTS = 5 + const MAX_ATTACHMENT_SIZE = 10 * 1024 * 1024 // 10 MB let lastActiveSessionId = store.state.activeSessionId let forceScrollBottom = true @@ -676,7 +680,22 @@ export function render() { ${escHtml(formatCost(cost))} ` : ''} ` : ''} + ${pendingAttachments.length ? ` +
+ ${pendingAttachments.map((a, i) => ` +
+ ${escAttr(a.name)} + ${escHtml(a.name)} + +
+ `).join('')} +
+ ` : ''}
+ + @@ -686,7 +705,7 @@ export function render() { ${ICONS.stop} ` : ``} @@ -1134,6 +1153,59 @@ export function render() { }) }) + // Batch 3 §K: attach 按钮 / 文件 input / 拖拽 / 移除附件 + const attachBtn = el.querySelector('#hm-chat-attach') + const attachInput = el.querySelector('#hm-chat-attach-input') + attachBtn?.addEventListener('click', () => attachInput?.click()) + attachInput?.addEventListener('change', async (e) => { + const files = Array.from(e.target.files || []) + for (const f of files) await addAttachmentFromFile(f) + e.target.value = '' // reset 让用户能重选同一张图 + }) + // 移除附件 + el.querySelectorAll('[data-attach-remove]').forEach(btn => { + btn.addEventListener('click', () => { + const idx = parseInt(btn.dataset.attachRemove, 10) + if (Number.isFinite(idx) && idx >= 0 && idx < pendingAttachments.length) { + pendingAttachments.splice(idx, 1) + draw() + } + }) + }) + // 拖拽到输入区域 + const dropZone = el.querySelector('.hm-chat-input-wrap') + if (dropZone && !dropZone.dataset.dragBound) { + dropZone.dataset.dragBound = '1' + dropZone.addEventListener('dragover', (e) => { + if (e.dataTransfer && Array.from(e.dataTransfer.items || []).some(it => it.kind === 'file')) { + e.preventDefault() + dropZone.classList.add('hm-chat-input-wrap--dragover') + } + }) + dropZone.addEventListener('dragleave', () => dropZone.classList.remove('hm-chat-input-wrap--dragover')) + dropZone.addEventListener('drop', async (e) => { + e.preventDefault() + dropZone.classList.remove('hm-chat-input-wrap--dragover') + if (store.state.streaming) return + for (const f of e.dataTransfer.files || []) { + if (f.type.startsWith('image/')) await addAttachmentFromFile(f) + } + }) + // 粘贴图片 + dropZone.addEventListener('paste', async (e) => { + if (store.state.streaming) return + const items = e.clipboardData?.items || [] + let handled = false + for (const it of items) { + if (it.kind === 'file' && it.type.startsWith('image/')) { + const f = it.getAsFile() + if (f) { await addAttachmentFromFile(f); handled = true } + } + } + if (handled) e.preventDefault() + }) + } + el.querySelectorAll('.hm-chat-slash-item').forEach(item => { item.addEventListener('click', () => { const cmd = item.dataset.cmd @@ -1215,7 +1287,8 @@ export function render() { async function handleSend() { const text = inputValue.trim() - if (!text || store.state.streaming) return + // Batch 3 §K: 允许只发图片(text 为空但有 attachments) + if ((!text && !pendingAttachments.length) || store.state.streaming) return // Local slash commands short-circuit before going to the agent. if (text === '/clear') { @@ -1273,9 +1346,55 @@ export function render() { // Normal user message → start agent run. forceScrollBottom = true + // Batch 3 §K: 在 resetInput 前先把 attachments 复制下来再清空 + const sendAttachments = pendingAttachments.slice() + pendingAttachments = [] resetInput() draw() - await store.sendMessage(text) + await store.sendMessage(text, { attachments: sendAttachments }) + } + + // Batch 3 §K: 把 File → base64(FileReader) + function fileToBase64(file) { + return new Promise((resolve, reject) => { + const r = new FileReader() + r.onload = () => { + const result = r.result || '' + // dataURL 形如 "data:image/png;base64,xxxx" — 我们要纯 base64 + const commaIdx = String(result).indexOf(',') + resolve(commaIdx >= 0 ? String(result).slice(commaIdx + 1) : String(result)) + } + r.onerror = () => reject(r.error || new Error('FileReader failed')) + r.readAsDataURL(file) + }) + } + + async function addAttachmentFromFile(file) { + if (!file) return + if (!file.type.startsWith('image/')) { + toast(t('engine.chatAttachOnlyImage'), 'error') + return + } + if (file.size > MAX_ATTACHMENT_SIZE) { + toast(t('engine.chatAttachTooBig'), 'error') + return + } + if (pendingAttachments.length >= MAX_ATTACHMENTS) { + toast(t('engine.chatAttachTooMany'), 'error') + return + } + try { + const data_base64 = await fileToBase64(file) + pendingAttachments.push({ + kind: 'image', + mime: file.type, + name: file.name || 'image', + data_base64, + }) + draw() + } catch (err) { + toast(t('engine.chatAttachReadFailed'), 'error') + } } // ----------------------------------------------------------- search modal diff --git a/src/engines/hermes/style/hermes.css b/src/engines/hermes/style/hermes.css index 410ec06..1ed7cb0 100644 --- a/src/engines/hermes/style/hermes.css +++ b/src/engines/hermes/style/hermes.css @@ -5098,6 +5098,98 @@ body[data-active-engine="hermes"][data-theme="dark"] { background: rgba(255, 255, 255, 0.04); } +/* ---- Batch 3 §K: 多模态图片附件 ---- */ +[data-engine="hermes"] .hm-chat-attach-btn { + display: inline-flex; + align-items: center; + justify-content: center; + width: 36px; + height: 36px; + border-radius: 8px; + border: 0; + background: transparent; + color: var(--hm-text-tertiary); + cursor: pointer; + transition: background 0.15s, color 0.15s; + flex-shrink: 0; +} +[data-engine="hermes"] .hm-chat-attach-btn:hover:not(:disabled) { + background: var(--hm-surface-1); + color: var(--hm-accent); +} +[data-engine="hermes"] .hm-chat-attach-btn:disabled { + opacity: 0.5; + cursor: not-allowed; +} +[data-engine="hermes"] .hm-chat-attach-preview { + display: flex; + gap: 8px; + padding: 8px 12px; + flex-wrap: wrap; + border-bottom: 1px solid var(--hm-border); +} +[data-engine="hermes"] .hm-chat-attach-chip { + display: inline-flex; + align-items: center; + gap: 6px; + padding: 4px 8px 4px 4px; + background: var(--hm-surface-1); + border-radius: 8px; + max-width: 220px; +} +[data-engine="hermes"] .hm-chat-attach-chip img { + width: 32px; + height: 32px; + object-fit: cover; + border-radius: 4px; +} +[data-engine="hermes"] .hm-chat-attach-chip-name { + font-size: 12px; + color: var(--hm-text-secondary); + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + max-width: 140px; +} +[data-engine="hermes"] .hm-chat-attach-chip-remove { + border: 0; + background: transparent; + color: var(--hm-text-tertiary); + font-size: 16px; + cursor: pointer; + padding: 0 4px; + line-height: 1; +} +[data-engine="hermes"] .hm-chat-attach-chip-remove:hover { + color: var(--hm-error, #ef4444); +} +[data-engine="hermes"] .hm-chat-input-wrap--dragover { + outline: 2px dashed var(--hm-accent); + outline-offset: -2px; + background: rgba(99, 102, 241, 0.04); +} + +/* 消息气泡内的图片渲染 */ +[data-engine="hermes"] .hm-chat-msg-attachments { + display: flex; + flex-wrap: wrap; + gap: 8px; + margin-bottom: 8px; +} +[data-engine="hermes"] .hm-chat-msg-image { + max-width: 240px; + max-height: 240px; + border-radius: 8px; + cursor: zoom-in; + object-fit: contain; + background: var(--hm-surface-1); +} +[data-engine="hermes"] .hm-chat-msg-image--zoom { + max-width: 100%; + max-height: 80vh; + cursor: zoom-out; +} + [data-engine="hermes"] .hm-chat-live-tools { display: flex; flex-direction: column; diff --git a/src/lib/tauri-api.js b/src/lib/tauri-api.js index 232fbc1..7a716f0 100644 --- a/src/lib/tauri-api.js +++ b/src/lib/tauri-api.js @@ -473,7 +473,7 @@ export const api = { hermesHealthCheck: () => invoke('hermes_health_check'), hermesCapabilities: () => invoke('hermes_capabilities'), hermesApiProxy: (method, path, body, headers) => invoke('hermes_api_proxy', { method, path, body: body || null, headers: headers || null }), - hermesAgentRun: (input, sessionId, conversationHistory, instructions) => invoke('hermes_agent_run', { input, sessionId: sessionId || null, conversationHistory: conversationHistory || null, instructions: instructions || null }), + hermesAgentRun: (input, sessionId, conversationHistory, instructions, attachments) => invoke('hermes_agent_run', { input, sessionId: sessionId || null, conversationHistory: conversationHistory || null, instructions: instructions || null, attachments: attachments && attachments.length ? attachments : null }), hermesAgentRunStream: (input, sessionId, conversationHistory, instructions, onEvent, options) => webStreamInvoke('hermes_agent_run_stream', { input, sessionId: sessionId || null, conversationHistory: conversationHistory || null, instructions: instructions || null }, onEvent, options), // Batch 1 §D + §C-bis: 真正中断 + Approval Flow(用 run_id) hermesRunStop: (runId) => invoke('hermes_run_stop', { runId }), diff --git a/src/locales/modules/engine.js b/src/locales/modules/engine.js index 2e6ffa8..83197e8 100644 --- a/src/locales/modules/engine.js +++ b/src/locales/modules/engine.js @@ -453,6 +453,13 @@ export default { chatApprovalAlways: _('永久信任', 'Always', '永久信任'), chatApprovalDeny: _('拒绝', 'Deny', '拒絕'), chatApprovalFailed: _('批准失败', 'Approval failed', '批准失敗'), + // Batch 3 §K: 多模态图片 + chatAttach: _('附加图片', 'Attach image', '附加圖片'), + chatAttachRemove: _('移除', 'Remove', '移除'), + chatAttachOnlyImage: _('只支持图片格式', 'Only image files are supported', '只支援圖片格式'), + chatAttachTooBig: _('图片过大(最大 10 MB)', 'Image too large (max 10 MB)', '圖片過大(最大 10 MB)'), + chatAttachTooMany: _('最多 5 张图片', 'Up to 5 images', '最多 5 張圖片'), + chatAttachReadFailed: _('读取图片失败', 'Failed to read image', '讀取圖片失敗'), // Web 模式(远程浏览器)下流式聊天暂不可用 chatWebModeStreamingUnsupported: _( 'Web 模式暂不支持 Hermes 实时流式聊天(依赖桌面端事件桥)。请打开桌面客户端使用此功能。',