mirror of
https://github.com/jxxghp/MoviePilot.git
synced 2026-06-25 17:54:43 +08:00
refactor: expose text content extraction helper
This commit is contained in:
@@ -373,7 +373,7 @@ class MoviePilotAgent:
|
||||
HumanMessage(content=str(message).strip()[:1000]),
|
||||
]
|
||||
)
|
||||
content = LLMHelper._extract_text_content(getattr(response, "content", response))
|
||||
content = LLMHelper.extract_text_content(getattr(response, "content", response))
|
||||
return self._sanitize_chat_title(content)
|
||||
|
||||
async def prepare_chat_title(self, message: str) -> None:
|
||||
@@ -738,39 +738,6 @@ class MoviePilotAgent:
|
||||
runtime_config = await self._resolve_llm_runtime_config()
|
||||
return await LLMHelper.get_llm(streaming=streaming, **runtime_config)
|
||||
|
||||
@staticmethod
|
||||
def _extract_text_content(content) -> str:
|
||||
"""
|
||||
从消息内容中提取纯文本,过滤掉思考/推理类型的内容块。
|
||||
:param content: 消息内容,可能是字符串或内容块列表
|
||||
:return: 纯文本内容
|
||||
"""
|
||||
if not content:
|
||||
return ""
|
||||
# 跳过思考/推理类型的内容块
|
||||
if isinstance(content, list):
|
||||
text_parts = []
|
||||
for block in content:
|
||||
if isinstance(block, str):
|
||||
text_parts.append(block)
|
||||
elif isinstance(block, dict):
|
||||
# 优先检查 thought 标志(LangChain Google GenAI 方案)
|
||||
if block.get("thought"):
|
||||
continue
|
||||
if block.get("type") in (
|
||||
"thinking",
|
||||
"reasoning_content",
|
||||
"reasoning",
|
||||
"thought",
|
||||
):
|
||||
continue
|
||||
if block.get("type") == "text":
|
||||
text_parts.append(block.get("text", ""))
|
||||
else:
|
||||
text_parts.append(str(block))
|
||||
return "".join(text_parts)
|
||||
return str(content)
|
||||
|
||||
@classmethod
|
||||
def _has_image_input_content(cls, content: Any) -> bool:
|
||||
"""
|
||||
@@ -1252,7 +1219,7 @@ class MoviePilotAgent:
|
||||
|
||||
if token.content:
|
||||
# content 可能是字符串或内容块列表,过滤掉思考类型的块
|
||||
content = self._extract_text_content(token.content)
|
||||
content = LLMHelper.extract_text_content(token.content)
|
||||
if content:
|
||||
stripper.process(content, on_token)
|
||||
|
||||
@@ -1355,7 +1322,7 @@ class MoviePilotAgent:
|
||||
for msg in reversed(final_messages):
|
||||
if hasattr(msg, "type") and msg.type == "ai" and msg.content:
|
||||
# 过滤掉思考/推理内容,只提取纯文本
|
||||
text = self._extract_text_content(msg.content)
|
||||
text = LLMHelper.extract_text_content(msg.content)
|
||||
if text:
|
||||
# 过滤掉包含在 <think> 标签中的内容
|
||||
text = re.sub(
|
||||
@@ -1388,7 +1355,7 @@ class MoviePilotAgent:
|
||||
)
|
||||
for msg in reversed(final_messages):
|
||||
if hasattr(msg, "type") and msg.type == "ai" and msg.content:
|
||||
display_text = self._extract_text_content(msg.content).strip()
|
||||
display_text = LLMHelper.extract_text_content(msg.content).strip()
|
||||
break
|
||||
self._save_assistant_display_message_once(display_text)
|
||||
|
||||
|
||||
@@ -1014,9 +1014,13 @@ class LLMHelper:
|
||||
return model
|
||||
|
||||
@staticmethod
|
||||
def _extract_text_content(content) -> str:
|
||||
def extract_text_content(content: Any, fallback_to_string: bool = False) -> str:
|
||||
"""
|
||||
从响应内容中提取纯文本,仅保留真实文本块。
|
||||
|
||||
:param content: 模型响应内容,可能是字符串、字典或内容块列表
|
||||
:param fallback_to_string: 未识别为文本内容时是否回退为字符串
|
||||
:return: 提取后的纯文本内容
|
||||
"""
|
||||
if content is None:
|
||||
return ""
|
||||
@@ -1051,7 +1055,7 @@ class LLMHelper:
|
||||
return content.get("text", "")
|
||||
if not content.get("type") and isinstance(content.get("text"), str):
|
||||
return content.get("text", "")
|
||||
return ""
|
||||
return str(content) if fallback_to_string else ""
|
||||
|
||||
@staticmethod
|
||||
async def test_current_settings(
|
||||
@@ -1092,7 +1096,7 @@ class LLMHelper:
|
||||
duration_ms = round((time.perf_counter() - start) * 1000)
|
||||
raise LLMTestError(str(err), duration_ms=duration_ms) from err
|
||||
|
||||
reply_text = LLMHelper._extract_text_content(
|
||||
reply_text = LLMHelper.extract_text_content(
|
||||
getattr(response, "content", response)
|
||||
).strip()
|
||||
duration_ms = round((time.perf_counter() - start) * 1000)
|
||||
|
||||
@@ -23,6 +23,7 @@ from langchain_core.messages import AIMessage, HumanMessage
|
||||
from langchain_core.tools import BaseTool, StructuredTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from app.agent.llm import LLMHelper
|
||||
from app.agent.middleware.utils import append_to_system_message
|
||||
from app.agent.runtime import SubAgentDefinition, agent_runtime_manager
|
||||
from app.agent.tools.tags import ToolTag
|
||||
@@ -281,34 +282,6 @@ def _format_subagent_catalog(profiles: tuple[_SubAgentProfile, ...]) -> str:
|
||||
)
|
||||
|
||||
|
||||
def _extract_text_content(content: Any) -> str:
|
||||
"""从模型消息内容中提取可读文本。"""
|
||||
if content is None:
|
||||
return ""
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
if isinstance(content, list):
|
||||
text_parts: list[str] = []
|
||||
for block in content:
|
||||
if isinstance(block, str):
|
||||
text_parts.append(block)
|
||||
continue
|
||||
if isinstance(block, dict):
|
||||
if block.get("thought"):
|
||||
continue
|
||||
if block.get("type") in {
|
||||
"thinking",
|
||||
"reasoning_content",
|
||||
"reasoning",
|
||||
"thought",
|
||||
}:
|
||||
continue
|
||||
if isinstance(block.get("text"), str):
|
||||
text_parts.append(block["text"])
|
||||
return "".join(text_parts)
|
||||
return str(content)
|
||||
|
||||
|
||||
def _extract_final_text(result: Any) -> str:
|
||||
"""从子代理执行结果中提取最后一条 AI 文本。"""
|
||||
if isinstance(result, dict):
|
||||
@@ -318,11 +291,11 @@ def _extract_final_text(result: Any) -> str:
|
||||
|
||||
for message in reversed(messages):
|
||||
if isinstance(message, AIMessage) and message.content:
|
||||
text = _extract_text_content(message.content).strip()
|
||||
text = LLMHelper.extract_text_content(message.content).strip()
|
||||
if text:
|
||||
return text
|
||||
|
||||
return _extract_text_content(result).strip()
|
||||
return LLMHelper.extract_text_content(result, fallback_to_string=True).strip()
|
||||
|
||||
|
||||
def _clip_text(text: Any, max_chars: int) -> tuple[str, bool]:
|
||||
|
||||
@@ -26,6 +26,7 @@ from langchain_core.tools import BaseTool
|
||||
from langgraph.runtime import Runtime
|
||||
from typing_extensions import TypedDict # noqa
|
||||
|
||||
from app.agent.llm import LLMHelper
|
||||
from app.agent.tools.tags import ToolTag
|
||||
from app.log import logger
|
||||
|
||||
@@ -121,7 +122,7 @@ class ToolSelectorMiddleware(LLMToolSelectorMiddleware):
|
||||
else:
|
||||
continue
|
||||
|
||||
content = cls._extract_text_content(message.content).strip()
|
||||
content = LLMHelper.extract_text_content(message.content).strip()
|
||||
if not content:
|
||||
continue
|
||||
rendered_messages.append(f"{role}: {content}")
|
||||
@@ -380,39 +381,6 @@ class ToolSelectorMiddleware(LLMToolSelectorMiddleware):
|
||||
or "api.deepseek.com" in base_url
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _extract_text_content(content: Any) -> str:
|
||||
"""
|
||||
从模型响应中提取纯文本。
|
||||
|
||||
这里不依赖上层 LLMHelper,避免中间件与 LLM 构造逻辑互相耦合。
|
||||
"""
|
||||
if content is None:
|
||||
return ""
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
if isinstance(content, list):
|
||||
text_parts: list[str] = []
|
||||
for block in content:
|
||||
if isinstance(block, str):
|
||||
text_parts.append(block)
|
||||
continue
|
||||
if isinstance(block, dict):
|
||||
if block.get("type") == "text" and isinstance(
|
||||
block.get("text"), str
|
||||
):
|
||||
text_parts.append(block["text"])
|
||||
continue
|
||||
if not block.get("type") and isinstance(block.get("text"), str):
|
||||
text_parts.append(block["text"])
|
||||
return "".join(text_parts)
|
||||
if isinstance(content, dict):
|
||||
if content.get("type") == "text" and isinstance(content.get("text"), str):
|
||||
return content["text"]
|
||||
if not content.get("type") and isinstance(content.get("text"), str):
|
||||
return content["text"]
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _parse_json_object(text: str) -> dict[str, Any]:
|
||||
"""
|
||||
@@ -504,7 +472,7 @@ class ToolSelectorMiddleware(LLMToolSelectorMiddleware):
|
||||
解析并标准化 DeepSeek JSON 模式的工具筛选结果。
|
||||
"""
|
||||
content = getattr(response, "content", response)
|
||||
text = self._extract_text_content(content)
|
||||
text = LLMHelper.extract_text_content(content)
|
||||
logger.debug(f"工具筛选原始响应: {text}")
|
||||
payload = self._parse_json_object(text)
|
||||
|
||||
|
||||
@@ -214,7 +214,7 @@ class LlmHelperTestCallTest(unittest.TestCase):
|
||||
{"type": "text", "text": "OK"},
|
||||
]
|
||||
|
||||
result = llm_module.LLMHelper._extract_text_content(content)
|
||||
result = llm_module.LLMHelper.extract_text_content(content)
|
||||
|
||||
self.assertEqual(result, "OK")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user