diff --git a/app/agent/__init__.py b/app/agent/__init__.py index e2cb444e..dc90c9cb 100644 --- a/app/agent/__init__.py +++ b/app/agent/__init__.py @@ -251,7 +251,7 @@ class MoviePilotAgent: if start_idx > 0: on_token(buffer[:start_idx]) in_think_tag = True - buffer = buffer[start_idx + 7 :] + buffer = buffer[start_idx + 7:] else: # 检查是否以 的前缀结尾 partial_match = False @@ -269,7 +269,7 @@ class MoviePilotAgent: end_idx = buffer.find("") if end_idx != -1: in_think_tag = False - buffer = buffer[end_idx + 8 :] + buffer = buffer[end_idx + 8:] else: # 检查是否以 的前缀结尾 partial_match = False @@ -619,7 +619,7 @@ class AgentManager: await self._session_workers[session_id] except asyncio.CancelledError: pass - self._session_workers.pop(session_id, None) + self._session_workers.pop(session_id, None) # noqa stopped = True # 清空队列中待处理的消息 diff --git a/app/agent/tools/factory.py b/app/agent/tools/factory.py index ac91e351..4c741d37 100644 --- a/app/agent/tools/factory.py +++ b/app/agent/tools/factory.py @@ -52,6 +52,8 @@ from app.agent.tools.impl.query_installed_plugins import QueryInstalledPluginsTo from app.agent.tools.impl.query_plugin_capabilities import QueryPluginCapabilitiesTool from app.agent.tools.impl.run_slash_command import RunSlashCommandTool from app.agent.tools.impl.list_slash_commands import ListSlashCommandsTool +from app.agent.tools.impl.query_custom_identifiers import QueryCustomIdentifiersTool +from app.agent.tools.impl.update_custom_identifiers import UpdateCustomIdentifiersTool from app.core.plugin import PluginManager from app.log import logger from .base import MoviePilotTool @@ -128,6 +130,8 @@ class MoviePilotToolFactory: QueryPluginCapabilitiesTool, RunSlashCommandTool, ListSlashCommandsTool, + QueryCustomIdentifiersTool, + UpdateCustomIdentifiersTool, ] # 创建内置工具 for ToolClass in tool_definitions: diff --git a/app/agent/tools/impl/query_custom_identifiers.py b/app/agent/tools/impl/query_custom_identifiers.py new file mode 100644 index 00000000..ce1f1aa3 --- /dev/null +++ b/app/agent/tools/impl/query_custom_identifiers.py @@ -0,0 +1,66 @@ +"""查询自定义识别词工具""" + +import json +from typing import Optional, Type + +from pydantic import BaseModel, Field + +from app.agent.tools.base import MoviePilotTool +from app.db.systemconfig_oper import SystemConfigOper +from app.log import logger +from app.schemas.types import SystemConfigKey + + +class QueryCustomIdentifiersInput(BaseModel): + """查询自定义识别词工具的输入参数模型""" + + explanation: str = Field( + ..., + description="Clear explanation of why this tool is being used in the current context", + ) + + +class QueryCustomIdentifiersTool(MoviePilotTool): + name: str = "query_custom_identifiers" + description: str = ( + "Query all currently configured custom identifiers (自定义识别词). " + "Returns the list of identifier rules used for preprocessing torrent/file names before media recognition. " + "Use this tool to check existing rules before adding new ones to avoid duplicates." + ) + args_schema: Type[BaseModel] = QueryCustomIdentifiersInput + + def get_tool_message(self, **kwargs) -> Optional[str]: + """生成友好的提示消息""" + return "正在查询自定义识别词" + + async def run(self, **kwargs) -> str: + logger.info(f"执行工具: {self.name}") + try: + system_config_oper = SystemConfigOper() + identifiers = system_config_oper.get(SystemConfigKey.CustomIdentifiers) + if identifiers: + return json.dumps( + { + "success": True, + "count": len(identifiers), + "identifiers": identifiers, + }, + ensure_ascii=False, + indent=2, + ) + return json.dumps( + { + "success": True, + "count": 0, + "identifiers": [], + "message": "当前没有配置自定义识别词", + }, + ensure_ascii=False, + indent=2, + ) + except Exception as e: + logger.error(f"查询自定义识别词失败: {e}") + return json.dumps( + {"success": False, "message": f"查询自定义识别词时发生错误: {str(e)}"}, + ensure_ascii=False, + ) diff --git a/app/agent/tools/impl/update_custom_identifiers.py b/app/agent/tools/impl/update_custom_identifiers.py new file mode 100644 index 00000000..a926075f --- /dev/null +++ b/app/agent/tools/impl/update_custom_identifiers.py @@ -0,0 +1,95 @@ +"""更新自定义识别词工具""" + +import json +from typing import List, Optional, Type + +from pydantic import BaseModel, Field + +from app.agent.tools.base import MoviePilotTool +from app.db.systemconfig_oper import SystemConfigOper +from app.log import logger +from app.schemas.types import SystemConfigKey + + +class UpdateCustomIdentifiersInput(BaseModel): + """更新自定义识别词工具的输入参数模型""" + + explanation: str = Field( + ..., + description="Clear explanation of why this tool is being used in the current context", + ) + identifiers: List[str] = Field( + ..., + description=( + "The complete list of custom identifier rules to save. " + "This REPLACES the entire existing list. " + "Always query existing identifiers first, merge new rules, then pass the full list." + ), + ) + + +class UpdateCustomIdentifiersTool(MoviePilotTool): + name: str = "update_custom_identifiers" + description: str = ( + "Update the full list of custom identifiers (自定义识别词) used for preprocessing torrent/file names. " + "This tool REPLACES all existing identifier rules with the provided list. " + "IMPORTANT: Always use 'query_custom_identifiers' first to get existing rules, " + "then merge new rules into the list before calling this tool to avoid accidentally deleting existing rules. " + "Supported rule formats (spaces around operators are required): " + "1) Block word: just the word/regex to remove; " + "2) Replacement: '被替换词 => 替换词'; " + "3) Episode offset: '前定位词 <> 后定位词 >> EP±N'; " + "4) Combined: '被替换词 => 替换词 && 前定位词 <> 后定位词 >> EP±N'; " + "Lines starting with '#' are comments. " + "The replacement target supports: {[tmdbid=xxx;type=movie/tv;s=xxx;e=xxx]} for direct TMDB ID matching." + ) + args_schema: Type[BaseModel] = UpdateCustomIdentifiersInput + + def get_tool_message(self, **kwargs) -> Optional[str]: + """生成友好的提示消息""" + identifiers = kwargs.get("identifiers", []) + return f"正在更新自定义识别词(共 {len(identifiers)} 条规则)" + + async def run(self, identifiers: List[str] = None, **kwargs) -> str: + logger.info( + f"执行工具: {self.name}, 规则数量: {len(identifiers) if identifiers else 0}" + ) + try: + if identifiers is None: + return json.dumps( + {"success": False, "message": "必须提供 identifiers 参数"}, + ensure_ascii=False, + ) + + # 过滤空字符串 + identifiers = [i for i in identifiers if i is not None] + + system_config_oper = SystemConfigOper() + + # 保存 + value = identifiers if identifiers else None + success = await system_config_oper.async_set( + SystemConfigKey.CustomIdentifiers, value + ) + if success: + return json.dumps( + { + "success": True, + "message": f"自定义识别词已更新,共 {len(identifiers)} 条规则", + "count": len(identifiers), + "identifiers": identifiers, + }, + ensure_ascii=False, + indent=2, + ) + else: + return json.dumps( + {"success": False, "message": "保存自定义识别词失败"}, + ensure_ascii=False, + ) + except Exception as e: + logger.error(f"更新自定义识别词失败: {e}") + return json.dumps( + {"success": False, "message": f"更新自定义识别词时发生错误: {str(e)}"}, + ensure_ascii=False, + ) diff --git a/skills/generate-identifiers/SKILL.md b/skills/generate-identifiers/SKILL.md new file mode 100644 index 00000000..0dd646f4 --- /dev/null +++ b/skills/generate-identifiers/SKILL.md @@ -0,0 +1,226 @@ +--- +name: generate-identifiers +description: >- + Use this skill when a user provides a torrent name or file name and wants to fix recognition issues, + or asks to add/manage custom identifiers (自定义识别词). + This skill generates identifier rules based on the WordsMatcher preprocessing logic, + checks for duplicates against existing rules, and saves them via MCP tools. + Applicable scenarios include: + 1) A torrent or file name is incorrectly recognized (wrong title, season, episode, etc.); + 2) The user wants to block unwanted keywords from torrent names; + 3) The user needs episode offset rules for series with non-standard numbering; + 4) The user wants to force recognition of a specific media by TMDB/Douban ID. +allowed-tools: query_custom_identifiers update_custom_identifiers recognize_media +--- + +# Generate Custom Identifiers (生成自定义识别词) + +This skill helps generate custom identifier rules for MoviePilot's media recognition system. Custom identifiers preprocess torrent/file names before the recognition engine runs, correcting naming issues that cause misidentification. + +## Prerequisites + +You need the following tools: +- `query_custom_identifiers` - Query all existing custom identifier rules +- `update_custom_identifiers` - Save the updated identifier list (replaces the full list) +- `recognize_media` - Test recognition of a torrent title or file path (optional, for verification) + +## Supported Rule Formats + +There are **four formats**. Operators must have spaces on both sides. + +### 1. Block Word (屏蔽词) + +Removes matched text from the title. Supports regex. + +``` +REPACK +``` + +### 2. Replacement (被替换词 => 替换词) + +Regex substitution. The left side is a regex pattern, the right side is the replacement (supports backreferences). + +``` +被替换词 => 替换词 +``` + +**Special replacement for direct ID specification:** +``` +被替换词 => {[tmdbid=xxx;type=movie/tv;s=xxx;e=xxx]} +被替换词 => {[doubanid=xxx;type=movie/tv;s=xxx;e=xxx]} +``` +Where `s` (season) and `e` (episode) are optional. + +### 3. Episode Offset (集偏移) + +Shifts episode numbers found between the front and back delimiter words. `EP` is the placeholder for the original episode number. + +``` +前定位词 <> 后定位词 >> EP-12 +``` + +### 4. Combined Replacement + Episode Offset + +First performs replacement; episode offset only runs if replacement succeeded. + +``` +被替换词 => 替换词 && 前定位词 <> 后定位词 >> EP-12 +``` + +### Comments + +Lines starting with `#` are comments and will be skipped during processing. + +## Important Rules for Writing Identifiers + +1. **Regex support**: All patterns support regular expressions. Special characters (`. * + ? ^ $ { } [ ] ( ) | \`) must be escaped with `\` when matching literally. +2. **Spaces matter**: The operators ` => `, ` <> `, ` >> `, ` && ` must have spaces on both sides. +3. **One rule per string**: Each element in the identifiers list is one rule. +4. **EP placeholder**: In episode offset expressions, `EP` represents the original episode number. Common patterns: + - `EP-12` means subtract 12 + - `EP+5` means add 5 + - `EP*2` means multiply by 2 +5. **Chinese number support**: Episode offset handles Chinese numbers (一二三四五六七八九十). +6. **Empty replacement**: Using nothing after `=>` is equivalent to a block word. + +## Workflow + +### Step 1: Analyze the Problem + +Parse the torrent/file name provided by the user. Identify: +- What is being incorrectly recognized (title, season, episode, year, quality, etc.) +- What the correct recognition result should be +- Which identifier format(s) will solve the problem + +### Step 2: Generate the Identifier Rule(s) + +Write the rule using the appropriate format. Ensure: +- Regex special characters are properly escaped +- Add a comment line (starting with `#`) above the rule to describe what it does +- Test the regex mentally against the provided name to verify correctness + +### Step 3: Query Existing Identifiers + +Use the `query_custom_identifiers` tool to get all current rules: + +``` +query_custom_identifiers(explanation="Checking existing identifiers before adding new rules to avoid duplicates") +``` + +### Step 4: Check for Duplicates + +Compare each new rule against the existing identifiers: +- **Exact duplicate**: The rule string is identical to an existing rule — skip it +- **Functional duplicate**: A different rule that produces the same effect on the same input (e.g., same regex pattern with trivial whitespace differences) — warn the user +- **Conflict**: An existing rule modifies the same text in a different way — warn the user and ask which to keep + +### Step 5: Save the Updated Identifiers + +Merge new non-duplicate rules into the existing list, then use `update_custom_identifiers` to save the **complete** list: + +``` +update_custom_identifiers( + explanation="Adding new identifier rules for [description]", + identifiers=["existing rule 1", "existing rule 2", "# new comment", "new rule"] +) +``` + +**CRITICAL**: Always include ALL existing rules in the list. This tool replaces the entire list. + +### Step 6: Verify (Optional) + +If the user wants to verify the rule works, use `recognize_media` to test: + +``` +recognize_media(explanation="Testing recognition after adding identifier", title="the torrent title to test") +``` + +### Step 7: Report + +Tell the user: +- What rule(s) were added +- What effect they will have on the title +- Whether any duplicates or conflicts were found + +## Common Scenarios and Examples + +### Wrong Season/Episode Parsing + +**User**: "种子名 `[SubGroup] My Show - 13 [1080P]`,这是第二季第1集,但被识别成第13集" + +**Solution**: Episode offset to subtract 12: +``` +# My Show 第二季集数偏移(13->1) +\[SubGroup\] <> \[1080P\] >> EP-12 +``` + +### Unwanted Text Causing Wrong Identification + +**User**: "种子名 `My.Show.2024.REPACK.1080p.mkv`,REPACK导致识别异常" + +**Solution**: Block word: +``` +# 屏蔽REPACK标记 +REPACK +``` + +### Non-Standard Naming + +**User**: "文件名 `[OldName] EP01.mkv`,应该识别为 NewName" + +**Solution**: Replacement: +``` +# OldName替换为NewName +OldName => NewName +``` + +### Force TMDB ID Recognition + +**User**: "种子名 `Some.Weird.Name.S01E01.1080p.mkv`,识别不到,TMDB ID是12345,是电视剧" + +**Solution**: Direct ID specification: +``` +# 强制识别Some.Weird.Name为TMDB ID 12345 +Some\.Weird\.Name => {[tmdbid=12345;type=tv;s=1]} +``` + +### Combined Fix + +**User**: "种子名 `[Baha][OldTitle][13][1080P]`,标题应该是NewTitle,而且13应该是第二季第1集" + +**Solution**: Combined replacement + episode offset: +``` +# OldTitle替换为NewTitle并偏移集数 +OldTitle => NewTitle && \[Baha\] <> \[1080P\] >> EP-12 +``` + +### Multiple Episode Numbers in One Title + +**User**: "种子名 `[Group] Title - 13-14 [1080P]`,应该是第1-2集" + +**Solution**: Episode offset (handles multiple numbers between delimiters): +``` +# Title 集数偏移 +\[Group\] <> \[1080P\] >> EP-12 +``` + +## WordsMatcher Processing Logic Reference + +The `WordsMatcher.prepare()` method (in `app/core/meta/words.py`) processes each rule in order: + +1. Skip empty lines and lines starting with `#` +2. Detect format by checking operator presence: + - Contains ` => ` AND ` && ` AND ` >> ` AND ` <> ` → Combined format (4) + - Contains ` => ` → Replacement format (2) + - Contains ` >> ` AND ` <> ` → Episode offset format (3) + - Otherwise → Block word format (1) +3. For combined format, replacement runs first; episode offset only runs if replacement succeeded +4. Returns the modified title and a list of rules that were actually applied +5. Priority: per-subscribe `custom_words` parameter takes precedence over global `CustomIdentifiers` + +## Safety Notes + +- Always query existing rules first before updating +- Never remove existing rules unless the user explicitly asks +- Add comment lines before new rules for maintainability +- When uncertain about the correct approach, present multiple options and let the user choose