feat(agent): mark and propagate voice input metadata in agent messages; clarify terminal tool usage in prompts

- Add `has_audio_input` flag to agent message handling and propagate through processing pipeline - Structure agent input payloads to include `input.mode` and `input.transcribed` for voice messages - Update prompts and tool descriptions to clarify that `send_voice_message` and `ask_user_choice` are terminal tools and should not be followed by redundant text replies - Enhance tests to cover voice input metadata propagation and prompt updates
2026-06-12 03:01:45 +08:00 · 2026-05-31 18:04:02 +08:00
parent 13b2163788
commit 855681ff35
11 changed files with 139 additions and 10 deletions
--- a/tests/test_agent_tool_streaming.py
+++ b/tests/test_agent_tool_streaming.py
@@ -441,7 +441,9 @@ class TestAgentToolStreaming(unittest.TestCase):
            self.assertEqual(notification.channel, channel)
            self.assertEqual(notification.voice_path, "/tmp/reply.opus")
            self.assertEqual(notification.voice_caption, "你好")
-            self.assertTrue(SendVoiceMessageTool.return_direct)
+            voice_tool = SendVoiceMessageTool(session_id="session-1", user_id="10001")
+            self.assertTrue(voice_tool.return_direct)
+            self.assertIn("terminal response tool", voice_tool.description)

    def test_send_voice_message_falls_back_for_unsupported_channels(self):
        """校验不支持语音输出的渠道继续回退为文字消息。"""