refactor: expose text content extraction helper

2026-06-25 17:54:43 +08:00 · 2026-06-21 12:56:23 +08:00
parent 495807ef4d
commit 90efb204a1
5 changed files with 18 additions and 106 deletions
--- a/app/agent/init.py
+++ b/app/agent/init.py
@@ -373,7 +373,7 @@ class MoviePilotAgent:
                HumanMessage(content=str(message).strip()[:1000]),
            ]
        )
-        content = LLMHelper._extract_text_content(getattr(response, "content", response))
+        content = LLMHelper.extract_text_content(getattr(response, "content", response))
        return self._sanitize_chat_title(content)

    async def prepare_chat_title(self, message: str) -> None:
@@ -738,39 +738,6 @@ class MoviePilotAgent:
        runtime_config = await self._resolve_llm_runtime_config()
        return await LLMHelper.get_llm(streaming=streaming, **runtime_config)

-    @staticmethod
-    def _extract_text_content(content) -> str:
-        """
-        从消息内容中提取纯文本，过滤掉思考/推理类型的内容块。
-        :param content: 消息内容，可能是字符串或内容块列表
-        :return: 纯文本内容
-        """
-        if not content:
-            return ""
-        # 跳过思考/推理类型的内容块
-        if isinstance(content, list):
-            text_parts = []
-            for block in content:
-                if isinstance(block, str):
-                    text_parts.append(block)
-                elif isinstance(block, dict):
-                    # 优先检查 thought 标志（LangChain Google GenAI 方案）
-                    if block.get("thought"):
-                        continue
-                    if block.get("type") in (
-                            "thinking",
-                            "reasoning_content",
-                            "reasoning",
-                            "thought",
-                    ):
-                        continue
-                    if block.get("type") == "text":
-                        text_parts.append(block.get("text", ""))
-                    else:
-                        text_parts.append(str(block))
-            return "".join(text_parts)
-        return str(content)
-
    @classmethod
    def _has_image_input_content(cls, content: Any) -> bool:
        """
@@ -1252,7 +1219,7 @@ class MoviePilotAgent:

                if token.content:
                    # content 可能是字符串或内容块列表，过滤掉思考类型的块
-                    content = self._extract_text_content(token.content)
+                    content = LLMHelper.extract_text_content(token.content)
                    if content:
                        stripper.process(content, on_token)

@@ -1355,7 +1322,7 @@ class MoviePilotAgent:
                for msg in reversed(final_messages):
                    if hasattr(msg, "type") and msg.type == "ai" and msg.content:
                        # 过滤掉思考/推理内容，只提取纯文本
-                        text = self._extract_text_content(msg.content)
+                        text = LLMHelper.extract_text_content(msg.content)
                        if text:
                            # 过滤掉包含在 <think> 标签中的内容
                            text = re.sub(
@@ -1388,7 +1355,7 @@ class MoviePilotAgent:
                )
                for msg in reversed(final_messages):
                    if hasattr(msg, "type") and msg.type == "ai" and msg.content:
-                        display_text = self._extract_text_content(msg.content).strip()
+                        display_text = LLMHelper.extract_text_content(msg.content).strip()
                        break
            self._save_assistant_display_message_once(display_text)

--- a/app/agent/llm/helper.py
+++ b/app/agent/llm/helper.py
@@ -1014,9 +1014,13 @@ class LLMHelper:
        return model

    @staticmethod
-    def _extract_text_content(content) -> str:
+    def extract_text_content(content: Any, fallback_to_string: bool = False) -> str:
        """
        从响应内容中提取纯文本，仅保留真实文本块。
+
+        :param content: 模型响应内容，可能是字符串、字典或内容块列表
+        :param fallback_to_string: 未识别为文本内容时是否回退为字符串
+        :return: 提取后的纯文本内容
        """
        if content is None:
            return ""
@@ -1051,7 +1055,7 @@ class LLMHelper:
                return content.get("text", "")
            if not content.get("type") and isinstance(content.get("text"), str):
                return content.get("text", "")
-        return ""
+        return str(content) if fallback_to_string else ""

    @staticmethod
    async def test_current_settings(
@@ -1092,7 +1096,7 @@ class LLMHelper:
            duration_ms = round((time.perf_counter() - start) * 1000)
            raise LLMTestError(str(err), duration_ms=duration_ms) from err

-        reply_text = LLMHelper._extract_text_content(
+        reply_text = LLMHelper.extract_text_content(
            getattr(response, "content", response)
        ).strip()
        duration_ms = round((time.perf_counter() - start) * 1000)
--- a/app/agent/middleware/subagents.py
+++ b/app/agent/middleware/subagents.py
@@ -23,6 +23,7 @@ from langchain_core.messages import AIMessage, HumanMessage
 from langchain_core.tools import BaseTool, StructuredTool
 from pydantic import BaseModel, Field

+from app.agent.llm import LLMHelper
 from app.agent.middleware.utils import append_to_system_message
 from app.agent.runtime import SubAgentDefinition, agent_runtime_manager
 from app.agent.tools.tags import ToolTag
@@ -281,34 +282,6 @@ def _format_subagent_catalog(profiles: tuple[_SubAgentProfile, ...]) -> str:
    )


-def _extract_text_content(content: Any) -> str:
-    """从模型消息内容中提取可读文本。"""
-    if content is None:
-        return ""
-    if isinstance(content, str):
-        return content
-    if isinstance(content, list):
-        text_parts: list[str] = []
-        for block in content:
-            if isinstance(block, str):
-                text_parts.append(block)
-                continue
-            if isinstance(block, dict):
-                if block.get("thought"):
-                    continue
-                if block.get("type") in {
-                    "thinking",
-                    "reasoning_content",
-                    "reasoning",
-                    "thought",
-                }:
-                    continue
-                if isinstance(block.get("text"), str):
-                    text_parts.append(block["text"])
-        return "".join(text_parts)
-    return str(content)
-
-
 def _extract_final_text(result: Any) -> str:
    """从子代理执行结果中提取最后一条 AI 文本。"""
    if isinstance(result, dict):
@@ -318,11 +291,11 @@ def _extract_final_text(result: Any) -> str:

    for message in reversed(messages):
        if isinstance(message, AIMessage) and message.content:
-            text = _extract_text_content(message.content).strip()
+            text = LLMHelper.extract_text_content(message.content).strip()
            if text:
                return text

-    return _extract_text_content(result).strip()
+    return LLMHelper.extract_text_content(result, fallback_to_string=True).strip()


 def _clip_text(text: Any, max_chars: int) -> tuple[str, bool]:
--- a/app/agent/middleware/tool_selection.py
+++ b/app/agent/middleware/tool_selection.py
@@ -26,6 +26,7 @@ from langchain_core.tools import BaseTool
 from langgraph.runtime import Runtime
 from typing_extensions import TypedDict  # noqa

+from app.agent.llm import LLMHelper
 from app.agent.tools.tags import ToolTag
 from app.log import logger

@@ -121,7 +122,7 @@ class ToolSelectorMiddleware(LLMToolSelectorMiddleware):
            else:
                continue

-            content = cls._extract_text_content(message.content).strip()
+            content = LLMHelper.extract_text_content(message.content).strip()
            if not content:
                continue
            rendered_messages.append(f"{role}: {content}")
@@ -380,39 +381,6 @@ class ToolSelectorMiddleware(LLMToolSelectorMiddleware):
                or "api.deepseek.com" in base_url
        )

-    @staticmethod
-    def _extract_text_content(content: Any) -> str:
-        """
-        从模型响应中提取纯文本。
-
-        这里不依赖上层 LLMHelper，避免中间件与 LLM 构造逻辑互相耦合。
-        """
-        if content is None:
-            return ""
-        if isinstance(content, str):
-            return content
-        if isinstance(content, list):
-            text_parts: list[str] = []
-            for block in content:
-                if isinstance(block, str):
-                    text_parts.append(block)
-                    continue
-                if isinstance(block, dict):
-                    if block.get("type") == "text" and isinstance(
-                            block.get("text"), str
-                    ):
-                        text_parts.append(block["text"])
-                        continue
-                    if not block.get("type") and isinstance(block.get("text"), str):
-                        text_parts.append(block["text"])
-            return "".join(text_parts)
-        if isinstance(content, dict):
-            if content.get("type") == "text" and isinstance(content.get("text"), str):
-                return content["text"]
-            if not content.get("type") and isinstance(content.get("text"), str):
-                return content["text"]
-        return ""
-
    @staticmethod
    def _parse_json_object(text: str) -> dict[str, Any]:
        """
@@ -504,7 +472,7 @@ class ToolSelectorMiddleware(LLMToolSelectorMiddleware):
        解析并标准化 DeepSeek JSON 模式的工具筛选结果。
        """
        content = getattr(response, "content", response)
-        text = self._extract_text_content(content)
+        text = LLMHelper.extract_text_content(content)
        logger.debug(f"工具筛选原始响应: {text}")
        payload = self._parse_json_object(text)

--- a/tests/test_llm_helper_testcall.py
+++ b/tests/test_llm_helper_testcall.py
@@ -214,7 +214,7 @@ class LlmHelperTestCallTest(unittest.TestCase):
            {"type": "text", "text": "OK"},
        ]

-        result = llm_module.LLMHelper._extract_text_content(content)
+        result = llm_module.LLMHelper.extract_text_content(content)

        self.assertEqual(result, "OK")