mirror of
https://github.com/jxxghp/MoviePilot.git
synced 2026-05-06 20:42:43 +08:00
feat(agent): add tools for querying and updating custom identifiers
This commit is contained in:
@@ -251,7 +251,7 @@ class MoviePilotAgent:
|
||||
if start_idx > 0:
|
||||
on_token(buffer[:start_idx])
|
||||
in_think_tag = True
|
||||
buffer = buffer[start_idx + 7 :]
|
||||
buffer = buffer[start_idx + 7:]
|
||||
else:
|
||||
# 检查是否以 <think> 的前缀结尾
|
||||
partial_match = False
|
||||
@@ -269,7 +269,7 @@ class MoviePilotAgent:
|
||||
end_idx = buffer.find("</think>")
|
||||
if end_idx != -1:
|
||||
in_think_tag = False
|
||||
buffer = buffer[end_idx + 8 :]
|
||||
buffer = buffer[end_idx + 8:]
|
||||
else:
|
||||
# 检查是否以 </think> 的前缀结尾
|
||||
partial_match = False
|
||||
@@ -619,7 +619,7 @@ class AgentManager:
|
||||
await self._session_workers[session_id]
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
self._session_workers.pop(session_id, None)
|
||||
self._session_workers.pop(session_id, None) # noqa
|
||||
stopped = True
|
||||
|
||||
# 清空队列中待处理的消息
|
||||
|
||||
@@ -52,6 +52,8 @@ from app.agent.tools.impl.query_installed_plugins import QueryInstalledPluginsTo
|
||||
from app.agent.tools.impl.query_plugin_capabilities import QueryPluginCapabilitiesTool
|
||||
from app.agent.tools.impl.run_slash_command import RunSlashCommandTool
|
||||
from app.agent.tools.impl.list_slash_commands import ListSlashCommandsTool
|
||||
from app.agent.tools.impl.query_custom_identifiers import QueryCustomIdentifiersTool
|
||||
from app.agent.tools.impl.update_custom_identifiers import UpdateCustomIdentifiersTool
|
||||
from app.core.plugin import PluginManager
|
||||
from app.log import logger
|
||||
from .base import MoviePilotTool
|
||||
@@ -128,6 +130,8 @@ class MoviePilotToolFactory:
|
||||
QueryPluginCapabilitiesTool,
|
||||
RunSlashCommandTool,
|
||||
ListSlashCommandsTool,
|
||||
QueryCustomIdentifiersTool,
|
||||
UpdateCustomIdentifiersTool,
|
||||
]
|
||||
# 创建内置工具
|
||||
for ToolClass in tool_definitions:
|
||||
|
||||
66
app/agent/tools/impl/query_custom_identifiers.py
Normal file
66
app/agent/tools/impl/query_custom_identifiers.py
Normal file
@@ -0,0 +1,66 @@
|
||||
"""查询自定义识别词工具"""
|
||||
|
||||
import json
|
||||
from typing import Optional, Type
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from app.agent.tools.base import MoviePilotTool
|
||||
from app.db.systemconfig_oper import SystemConfigOper
|
||||
from app.log import logger
|
||||
from app.schemas.types import SystemConfigKey
|
||||
|
||||
|
||||
class QueryCustomIdentifiersInput(BaseModel):
|
||||
"""查询自定义识别词工具的输入参数模型"""
|
||||
|
||||
explanation: str = Field(
|
||||
...,
|
||||
description="Clear explanation of why this tool is being used in the current context",
|
||||
)
|
||||
|
||||
|
||||
class QueryCustomIdentifiersTool(MoviePilotTool):
|
||||
name: str = "query_custom_identifiers"
|
||||
description: str = (
|
||||
"Query all currently configured custom identifiers (自定义识别词). "
|
||||
"Returns the list of identifier rules used for preprocessing torrent/file names before media recognition. "
|
||||
"Use this tool to check existing rules before adding new ones to avoid duplicates."
|
||||
)
|
||||
args_schema: Type[BaseModel] = QueryCustomIdentifiersInput
|
||||
|
||||
def get_tool_message(self, **kwargs) -> Optional[str]:
|
||||
"""生成友好的提示消息"""
|
||||
return "正在查询自定义识别词"
|
||||
|
||||
async def run(self, **kwargs) -> str:
|
||||
logger.info(f"执行工具: {self.name}")
|
||||
try:
|
||||
system_config_oper = SystemConfigOper()
|
||||
identifiers = system_config_oper.get(SystemConfigKey.CustomIdentifiers)
|
||||
if identifiers:
|
||||
return json.dumps(
|
||||
{
|
||||
"success": True,
|
||||
"count": len(identifiers),
|
||||
"identifiers": identifiers,
|
||||
},
|
||||
ensure_ascii=False,
|
||||
indent=2,
|
||||
)
|
||||
return json.dumps(
|
||||
{
|
||||
"success": True,
|
||||
"count": 0,
|
||||
"identifiers": [],
|
||||
"message": "当前没有配置自定义识别词",
|
||||
},
|
||||
ensure_ascii=False,
|
||||
indent=2,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"查询自定义识别词失败: {e}")
|
||||
return json.dumps(
|
||||
{"success": False, "message": f"查询自定义识别词时发生错误: {str(e)}"},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
95
app/agent/tools/impl/update_custom_identifiers.py
Normal file
95
app/agent/tools/impl/update_custom_identifiers.py
Normal file
@@ -0,0 +1,95 @@
|
||||
"""更新自定义识别词工具"""
|
||||
|
||||
import json
|
||||
from typing import List, Optional, Type
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from app.agent.tools.base import MoviePilotTool
|
||||
from app.db.systemconfig_oper import SystemConfigOper
|
||||
from app.log import logger
|
||||
from app.schemas.types import SystemConfigKey
|
||||
|
||||
|
||||
class UpdateCustomIdentifiersInput(BaseModel):
|
||||
"""更新自定义识别词工具的输入参数模型"""
|
||||
|
||||
explanation: str = Field(
|
||||
...,
|
||||
description="Clear explanation of why this tool is being used in the current context",
|
||||
)
|
||||
identifiers: List[str] = Field(
|
||||
...,
|
||||
description=(
|
||||
"The complete list of custom identifier rules to save. "
|
||||
"This REPLACES the entire existing list. "
|
||||
"Always query existing identifiers first, merge new rules, then pass the full list."
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class UpdateCustomIdentifiersTool(MoviePilotTool):
|
||||
name: str = "update_custom_identifiers"
|
||||
description: str = (
|
||||
"Update the full list of custom identifiers (自定义识别词) used for preprocessing torrent/file names. "
|
||||
"This tool REPLACES all existing identifier rules with the provided list. "
|
||||
"IMPORTANT: Always use 'query_custom_identifiers' first to get existing rules, "
|
||||
"then merge new rules into the list before calling this tool to avoid accidentally deleting existing rules. "
|
||||
"Supported rule formats (spaces around operators are required): "
|
||||
"1) Block word: just the word/regex to remove; "
|
||||
"2) Replacement: '被替换词 => 替换词'; "
|
||||
"3) Episode offset: '前定位词 <> 后定位词 >> EP±N'; "
|
||||
"4) Combined: '被替换词 => 替换词 && 前定位词 <> 后定位词 >> EP±N'; "
|
||||
"Lines starting with '#' are comments. "
|
||||
"The replacement target supports: {[tmdbid=xxx;type=movie/tv;s=xxx;e=xxx]} for direct TMDB ID matching."
|
||||
)
|
||||
args_schema: Type[BaseModel] = UpdateCustomIdentifiersInput
|
||||
|
||||
def get_tool_message(self, **kwargs) -> Optional[str]:
|
||||
"""生成友好的提示消息"""
|
||||
identifiers = kwargs.get("identifiers", [])
|
||||
return f"正在更新自定义识别词(共 {len(identifiers)} 条规则)"
|
||||
|
||||
async def run(self, identifiers: List[str] = None, **kwargs) -> str:
|
||||
logger.info(
|
||||
f"执行工具: {self.name}, 规则数量: {len(identifiers) if identifiers else 0}"
|
||||
)
|
||||
try:
|
||||
if identifiers is None:
|
||||
return json.dumps(
|
||||
{"success": False, "message": "必须提供 identifiers 参数"},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
||||
# 过滤空字符串
|
||||
identifiers = [i for i in identifiers if i is not None]
|
||||
|
||||
system_config_oper = SystemConfigOper()
|
||||
|
||||
# 保存
|
||||
value = identifiers if identifiers else None
|
||||
success = await system_config_oper.async_set(
|
||||
SystemConfigKey.CustomIdentifiers, value
|
||||
)
|
||||
if success:
|
||||
return json.dumps(
|
||||
{
|
||||
"success": True,
|
||||
"message": f"自定义识别词已更新,共 {len(identifiers)} 条规则",
|
||||
"count": len(identifiers),
|
||||
"identifiers": identifiers,
|
||||
},
|
||||
ensure_ascii=False,
|
||||
indent=2,
|
||||
)
|
||||
else:
|
||||
return json.dumps(
|
||||
{"success": False, "message": "保存自定义识别词失败"},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"更新自定义识别词失败: {e}")
|
||||
return json.dumps(
|
||||
{"success": False, "message": f"更新自定义识别词时发生错误: {str(e)}"},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
226
skills/generate-identifiers/SKILL.md
Normal file
226
skills/generate-identifiers/SKILL.md
Normal file
@@ -0,0 +1,226 @@
|
||||
---
|
||||
name: generate-identifiers
|
||||
description: >-
|
||||
Use this skill when a user provides a torrent name or file name and wants to fix recognition issues,
|
||||
or asks to add/manage custom identifiers (自定义识别词).
|
||||
This skill generates identifier rules based on the WordsMatcher preprocessing logic,
|
||||
checks for duplicates against existing rules, and saves them via MCP tools.
|
||||
Applicable scenarios include:
|
||||
1) A torrent or file name is incorrectly recognized (wrong title, season, episode, etc.);
|
||||
2) The user wants to block unwanted keywords from torrent names;
|
||||
3) The user needs episode offset rules for series with non-standard numbering;
|
||||
4) The user wants to force recognition of a specific media by TMDB/Douban ID.
|
||||
allowed-tools: query_custom_identifiers update_custom_identifiers recognize_media
|
||||
---
|
||||
|
||||
# Generate Custom Identifiers (生成自定义识别词)
|
||||
|
||||
This skill helps generate custom identifier rules for MoviePilot's media recognition system. Custom identifiers preprocess torrent/file names before the recognition engine runs, correcting naming issues that cause misidentification.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
You need the following tools:
|
||||
- `query_custom_identifiers` - Query all existing custom identifier rules
|
||||
- `update_custom_identifiers` - Save the updated identifier list (replaces the full list)
|
||||
- `recognize_media` - Test recognition of a torrent title or file path (optional, for verification)
|
||||
|
||||
## Supported Rule Formats
|
||||
|
||||
There are **four formats**. Operators must have spaces on both sides.
|
||||
|
||||
### 1. Block Word (屏蔽词)
|
||||
|
||||
Removes matched text from the title. Supports regex.
|
||||
|
||||
```
|
||||
REPACK
|
||||
```
|
||||
|
||||
### 2. Replacement (被替换词 => 替换词)
|
||||
|
||||
Regex substitution. The left side is a regex pattern, the right side is the replacement (supports backreferences).
|
||||
|
||||
```
|
||||
被替换词 => 替换词
|
||||
```
|
||||
|
||||
**Special replacement for direct ID specification:**
|
||||
```
|
||||
被替换词 => {[tmdbid=xxx;type=movie/tv;s=xxx;e=xxx]}
|
||||
被替换词 => {[doubanid=xxx;type=movie/tv;s=xxx;e=xxx]}
|
||||
```
|
||||
Where `s` (season) and `e` (episode) are optional.
|
||||
|
||||
### 3. Episode Offset (集偏移)
|
||||
|
||||
Shifts episode numbers found between the front and back delimiter words. `EP` is the placeholder for the original episode number.
|
||||
|
||||
```
|
||||
前定位词 <> 后定位词 >> EP-12
|
||||
```
|
||||
|
||||
### 4. Combined Replacement + Episode Offset
|
||||
|
||||
First performs replacement; episode offset only runs if replacement succeeded.
|
||||
|
||||
```
|
||||
被替换词 => 替换词 && 前定位词 <> 后定位词 >> EP-12
|
||||
```
|
||||
|
||||
### Comments
|
||||
|
||||
Lines starting with `#` are comments and will be skipped during processing.
|
||||
|
||||
## Important Rules for Writing Identifiers
|
||||
|
||||
1. **Regex support**: All patterns support regular expressions. Special characters (`. * + ? ^ $ { } [ ] ( ) | \`) must be escaped with `\` when matching literally.
|
||||
2. **Spaces matter**: The operators ` => `, ` <> `, ` >> `, ` && ` must have spaces on both sides.
|
||||
3. **One rule per string**: Each element in the identifiers list is one rule.
|
||||
4. **EP placeholder**: In episode offset expressions, `EP` represents the original episode number. Common patterns:
|
||||
- `EP-12` means subtract 12
|
||||
- `EP+5` means add 5
|
||||
- `EP*2` means multiply by 2
|
||||
5. **Chinese number support**: Episode offset handles Chinese numbers (一二三四五六七八九十).
|
||||
6. **Empty replacement**: Using nothing after `=>` is equivalent to a block word.
|
||||
|
||||
## Workflow
|
||||
|
||||
### Step 1: Analyze the Problem
|
||||
|
||||
Parse the torrent/file name provided by the user. Identify:
|
||||
- What is being incorrectly recognized (title, season, episode, year, quality, etc.)
|
||||
- What the correct recognition result should be
|
||||
- Which identifier format(s) will solve the problem
|
||||
|
||||
### Step 2: Generate the Identifier Rule(s)
|
||||
|
||||
Write the rule using the appropriate format. Ensure:
|
||||
- Regex special characters are properly escaped
|
||||
- Add a comment line (starting with `#`) above the rule to describe what it does
|
||||
- Test the regex mentally against the provided name to verify correctness
|
||||
|
||||
### Step 3: Query Existing Identifiers
|
||||
|
||||
Use the `query_custom_identifiers` tool to get all current rules:
|
||||
|
||||
```
|
||||
query_custom_identifiers(explanation="Checking existing identifiers before adding new rules to avoid duplicates")
|
||||
```
|
||||
|
||||
### Step 4: Check for Duplicates
|
||||
|
||||
Compare each new rule against the existing identifiers:
|
||||
- **Exact duplicate**: The rule string is identical to an existing rule — skip it
|
||||
- **Functional duplicate**: A different rule that produces the same effect on the same input (e.g., same regex pattern with trivial whitespace differences) — warn the user
|
||||
- **Conflict**: An existing rule modifies the same text in a different way — warn the user and ask which to keep
|
||||
|
||||
### Step 5: Save the Updated Identifiers
|
||||
|
||||
Merge new non-duplicate rules into the existing list, then use `update_custom_identifiers` to save the **complete** list:
|
||||
|
||||
```
|
||||
update_custom_identifiers(
|
||||
explanation="Adding new identifier rules for [description]",
|
||||
identifiers=["existing rule 1", "existing rule 2", "# new comment", "new rule"]
|
||||
)
|
||||
```
|
||||
|
||||
**CRITICAL**: Always include ALL existing rules in the list. This tool replaces the entire list.
|
||||
|
||||
### Step 6: Verify (Optional)
|
||||
|
||||
If the user wants to verify the rule works, use `recognize_media` to test:
|
||||
|
||||
```
|
||||
recognize_media(explanation="Testing recognition after adding identifier", title="the torrent title to test")
|
||||
```
|
||||
|
||||
### Step 7: Report
|
||||
|
||||
Tell the user:
|
||||
- What rule(s) were added
|
||||
- What effect they will have on the title
|
||||
- Whether any duplicates or conflicts were found
|
||||
|
||||
## Common Scenarios and Examples
|
||||
|
||||
### Wrong Season/Episode Parsing
|
||||
|
||||
**User**: "种子名 `[SubGroup] My Show - 13 [1080P]`,这是第二季第1集,但被识别成第13集"
|
||||
|
||||
**Solution**: Episode offset to subtract 12:
|
||||
```
|
||||
# My Show 第二季集数偏移(13->1)
|
||||
\[SubGroup\] <> \[1080P\] >> EP-12
|
||||
```
|
||||
|
||||
### Unwanted Text Causing Wrong Identification
|
||||
|
||||
**User**: "种子名 `My.Show.2024.REPACK.1080p.mkv`,REPACK导致识别异常"
|
||||
|
||||
**Solution**: Block word:
|
||||
```
|
||||
# 屏蔽REPACK标记
|
||||
REPACK
|
||||
```
|
||||
|
||||
### Non-Standard Naming
|
||||
|
||||
**User**: "文件名 `[OldName] EP01.mkv`,应该识别为 NewName"
|
||||
|
||||
**Solution**: Replacement:
|
||||
```
|
||||
# OldName替换为NewName
|
||||
OldName => NewName
|
||||
```
|
||||
|
||||
### Force TMDB ID Recognition
|
||||
|
||||
**User**: "种子名 `Some.Weird.Name.S01E01.1080p.mkv`,识别不到,TMDB ID是12345,是电视剧"
|
||||
|
||||
**Solution**: Direct ID specification:
|
||||
```
|
||||
# 强制识别Some.Weird.Name为TMDB ID 12345
|
||||
Some\.Weird\.Name => {[tmdbid=12345;type=tv;s=1]}
|
||||
```
|
||||
|
||||
### Combined Fix
|
||||
|
||||
**User**: "种子名 `[Baha][OldTitle][13][1080P]`,标题应该是NewTitle,而且13应该是第二季第1集"
|
||||
|
||||
**Solution**: Combined replacement + episode offset:
|
||||
```
|
||||
# OldTitle替换为NewTitle并偏移集数
|
||||
OldTitle => NewTitle && \[Baha\] <> \[1080P\] >> EP-12
|
||||
```
|
||||
|
||||
### Multiple Episode Numbers in One Title
|
||||
|
||||
**User**: "种子名 `[Group] Title - 13-14 [1080P]`,应该是第1-2集"
|
||||
|
||||
**Solution**: Episode offset (handles multiple numbers between delimiters):
|
||||
```
|
||||
# Title 集数偏移
|
||||
\[Group\] <> \[1080P\] >> EP-12
|
||||
```
|
||||
|
||||
## WordsMatcher Processing Logic Reference
|
||||
|
||||
The `WordsMatcher.prepare()` method (in `app/core/meta/words.py`) processes each rule in order:
|
||||
|
||||
1. Skip empty lines and lines starting with `#`
|
||||
2. Detect format by checking operator presence:
|
||||
- Contains ` => ` AND ` && ` AND ` >> ` AND ` <> ` → Combined format (4)
|
||||
- Contains ` => ` → Replacement format (2)
|
||||
- Contains ` >> ` AND ` <> ` → Episode offset format (3)
|
||||
- Otherwise → Block word format (1)
|
||||
3. For combined format, replacement runs first; episode offset only runs if replacement succeeded
|
||||
4. Returns the modified title and a list of rules that were actually applied
|
||||
5. Priority: per-subscribe `custom_words` parameter takes precedence over global `CustomIdentifiers`
|
||||
|
||||
## Safety Notes
|
||||
|
||||
- Always query existing rules first before updating
|
||||
- Never remove existing rules unless the user explicitly asks
|
||||
- Add comment lines before new rules for maintainability
|
||||
- When uncertain about the correct approach, present multiple options and let the user choose
|
||||
Reference in New Issue
Block a user