feat(agent): mark and propagate voice input metadata in agent messages; clarify terminal tool usage in prompts

- Add `has_audio_input` flag to agent message handling and propagate through processing pipeline
- Structure agent input payloads to include `input.mode` and `input.transcribed` for voice messages
- Update prompts and tool descriptions to clarify that `send_voice_message` and `ask_user_choice` are terminal tools and should not be followed by redundant text replies
- Enhance tests to cover voice input metadata propagation and prompt updates
This commit is contained in:
jxxghp
2026-05-31 18:04:02 +08:00
parent 13b2163788
commit 855681ff35
11 changed files with 139 additions and 10 deletions

View File

@@ -30,6 +30,8 @@ class TestAgentInteraction(unittest.TestCase):
)
self.assertIn("ask_user_choice", telegram_prompt)
self.assertIn("terminal interaction tool", telegram_prompt)
self.assertIn("do not write a final text reply after it", telegram_prompt)
self.assertNotIn("ask_user_choice", wechat_prompt)
def test_factory_injects_choice_tool_only_for_button_channels(self):
@@ -60,6 +62,7 @@ class TestAgentInteraction(unittest.TestCase):
tool = AskUserChoiceTool(session_id="session-1", user_id="10001")
self.assertTrue(tool.return_direct)
self.assertIn("terminal interaction tool", tool.description)
def test_choice_tool_sends_buttons_and_registers_pending_request(self):
tool = AskUserChoiceTool(session_id="session-1", user_id="10001")