feat(agent): mark and propagate voice input metadata in agent messages; clarify terminal tool usage in prompts

- Add `has_audio_input` flag to agent message handling and propagate through processing pipeline - Structure agent input payloads to include `input.mode` and `input.transcribed` for voice messages - Update prompts and tool descriptions to clarify that `send_voice_message` and `ask_user_choice` are terminal tools and should not be followed by redundant text replies - Enhance tests to cover voice input metadata propagation and prompt updates
2026-06-12 03:01:45 +08:00 · 2026-05-31 18:04:02 +08:00
parent 13b2163788
commit 855681ff35
11 changed files with 139 additions and 10 deletions
--- a/tests/test_agent_interaction.py
+++ b/tests/test_agent_interaction.py
@@ -30,6 +30,8 @@ class TestAgentInteraction(unittest.TestCase):
        )

        self.assertIn("ask_user_choice", telegram_prompt)
+        self.assertIn("terminal interaction tool", telegram_prompt)
+        self.assertIn("do not write a final text reply after it", telegram_prompt)
        self.assertNotIn("ask_user_choice", wechat_prompt)

    def test_factory_injects_choice_tool_only_for_button_channels(self):
@@ -60,6 +62,7 @@ class TestAgentInteraction(unittest.TestCase):
        tool = AskUserChoiceTool(session_id="session-1", user_id="10001")

        self.assertTrue(tool.return_direct)
+        self.assertIn("terminal interaction tool", tool.description)

    def test_choice_tool_sends_buttons_and_registers_pending_request(self):
        tool = AskUserChoiceTool(session_id="session-1", user_id="10001")