Files
MoviePilot/skills/feedback-issue/scripts/collect_feedback_diagnostics.py
2026-06-15 12:47:52 +08:00

412 lines
14 KiB
Python

"""收集 feedback-issue 提交流程需要的本地诊断日志。"""
from __future__ import annotations
import argparse
import re
import shutil
import subprocess
import sys
from datetime import datetime, timedelta
from pathlib import Path
from typing import Optional
from feedback_issue_common import (
MAX_LOGS_CHARS,
format_log_selection,
feedback_runtime_dir,
result_payload,
runtime_file,
sanitize_logs,
settings,
write_json_file,
)
_MAX_READ_BYTES = 512 * 1024
_DEFAULT_TIME_WINDOW_MINUTES = 30
_MIN_TIME_WINDOW_MINUTES = 5
_MAX_TIME_WINDOW_MINUTES = 24 * 60
_LOG_TIMESTAMP_RE = re.compile(r"(\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2})")
_LOG_TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S"
_LOG_MODULE_RE = re.compile(
r"^【[^】]+】\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2},\d+\s+-\s+([^\s][^\-]*?)\s+-\s+"
)
_META_NOISE_MODULES = frozenset({
"collect_feedback_diagnostics.py",
"prepare_feedback_issue.py",
"submit_feedback_issue.py",
"ask_user_choice.py",
"base.py",
"agent",
"factory.py",
"callback",
"prompt",
"memory.py",
"activity_log.py",
"message.py",
"event.py",
"chain",
"discord",
"telegram",
"telegram.py",
"execute_command.py",
})
_VAGUE_KEYWORDS = frozenset({
"错误", "异常", "失败", "error", "exception", "failed", "warn", "warning",
"日志", "问题", "bug", "log", "logs",
})
_FEEDBACK_VERB_PHRASES: tuple[str, ...] = (
"反馈", "提交", "上报", "汇报",
"提 issue", "提issue", "提 bug", "提bug",
"提需求", "提交需求", "反馈需求", "提功能", "功能请求",
"报 bug", "报bug", "报告 bug", "报告bug",
"新建 issue", "新建issue", "开 issue", "开issue",
"让上游", "给上游",
"file an issue", "report a bug", "open an upstream issue",
"submit an issue", "raise an issue", "report this upstream",
"report upstream", "feature request", "submit a feature request",
"open a feature request",
)
_FEEDBACK_TARGET_TOKENS: tuple[str, ...] = (
"issue", "bug", "问题", "错误报告",
"上游", "mp", "moviepilot", "需求", "功能", "feature",
)
_FEEDBACK_STANDALONE_PHRASES: tuple[str, ...] = (
"file an issue", "report a bug", "open an upstream issue",
"submit an issue", "raise an issue", "report this upstream",
"report upstream", "feature request", "submit a feature request",
"open a feature request",
"新建 issue", "新建issue", "开 issue", "开issue",
"提 issue", "提issue", "提 bug", "提bug",
"提需求", "提交需求", "反馈需求", "提功能请求", "功能请求",
"报 bug", "报bug", "报告 bug", "报告bug",
"让上游", "给上游",
)
_FEEDBACK_REGEX_PATTERNS: tuple[re.Pattern, ...] = (
re.compile(r"提.{0,6}(bug|issue|问题|错误报告)", re.IGNORECASE),
re.compile(r"提.{0,6}(需求|功能请求|feature request)", re.IGNORECASE),
re.compile(r"提交.{0,6}(需求|功能请求|feature request)", re.IGNORECASE),
re.compile(r"报.{0,6}(bug|issue|错误报告)", re.IGNORECASE),
re.compile(r"反馈.{0,8}(issue|bug|问题|上游|错误)", re.IGNORECASE),
re.compile(r"反馈.{0,8}(需求|功能请求|feature request)", re.IGNORECASE),
re.compile(r"开.{0,4}(issue|bug)", re.IGNORECASE),
re.compile(r"开.{0,8}(需求|功能请求|feature request)", re.IGNORECASE),
re.compile(r"上报.{0,6}(bug|issue|问题|错误)", re.IGNORECASE),
)
def read_tail(path: Path) -> str:
"""读取日志文件尾部,避免大日志一次性进入内存。"""
try:
size = path.stat().st_size
with path.open("rb") as file_obj:
if size > _MAX_READ_BYTES:
file_obj.seek(size - _MAX_READ_BYTES)
return file_obj.read().decode("utf-8", errors="replace")
except OSError:
return ""
def candidate_log_files() -> list[Path]:
"""返回反馈诊断可读取的主日志和插件日志文件。"""
files = [settings.LOG_PATH / "moviepilot.log"]
plugin_log_dir = settings.LOG_PATH / "plugins"
if plugin_log_dir.exists():
files.extend(sorted(plugin_log_dir.rglob("*.log")))
return [path for path in files if path.exists() and path.is_file()]
def collect_doctor_report() -> dict:
"""调用离线 doctor 命令收集结构化诊断报告。"""
commands = []
moviepilot_bin = shutil.which("moviepilot")
if moviepilot_bin:
commands.append([moviepilot_bin, "doctor", "--json"])
commands.append([sys.executable, "-m", "app.cli", "doctor", "--json"])
for command in commands:
try:
result = subprocess.run(
command,
cwd=str(settings.ROOT_PATH),
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
encoding="utf-8",
errors="replace",
timeout=30,
check=False,
)
except (OSError, subprocess.TimeoutExpired) as err:
last_error = str(err)
continue
output = (result.stdout or "").strip()
if not output:
last_error = f"{' '.join(command)} 没有输出"
continue
try:
payload = json_loads_from_output(output)
except ValueError as err:
last_error = str(err)
continue
payload["_command"] = " ".join(command)
payload["_returncode"] = result.returncode
return {
"success": True,
"report": payload,
}
return {
"success": False,
"error": last_error if "last_error" in locals() else "doctor 命令不可用",
}
def json_loads_from_output(output: str) -> dict:
"""从命令输出中解析 doctor JSON 对象。"""
import json
start = output.find("{")
end = output.rfind("}")
if start == -1 or end == -1 or end < start:
raise ValueError("doctor 输出中未找到 JSON 对象")
payload = json.loads(output[start:end + 1])
if not isinstance(payload, dict):
raise ValueError("doctor JSON 顶层不是对象")
return payload
def normalize_keywords(keywords: Optional[list[str]]) -> list[str]:
"""过滤掉过短或过于宽泛的日志关键词。"""
normalized: list[str] = []
for item in keywords or []:
item = str(item or "").strip()
if len(item) < 2:
continue
if item.lower() in _VAGUE_KEYWORDS:
continue
if item not in normalized:
normalized.append(item)
return normalized
def has_explicit_feedback_intent(original_user_request: str) -> bool:
"""判断用户原话里是否出现明确要求提 Issue 的意图。"""
if not original_user_request:
return False
normalized = original_user_request.lower().strip()
if any(phrase in normalized for phrase in _FEEDBACK_STANDALONE_PHRASES):
return True
if any(pattern.search(normalized) for pattern in _FEEDBACK_REGEX_PATTERNS):
return True
has_verb = any(phrase in normalized for phrase in _FEEDBACK_VERB_PHRASES)
has_target = any(token in normalized for token in _FEEDBACK_TARGET_TOKENS)
return has_verb and has_target
def normalize_window(time_window_minutes: int) -> int:
"""把传入的时间窗限制到 5 到 1440 分钟之间。"""
try:
window = int(time_window_minutes or _DEFAULT_TIME_WINDOW_MINUTES)
except (TypeError, ValueError):
window = _DEFAULT_TIME_WINDOW_MINUTES
return max(_MIN_TIME_WINDOW_MINUTES, min(_MAX_TIME_WINDOW_MINUTES, window))
def parse_line_timestamp(line: str) -> Optional[datetime]:
"""从一行日志开头提取时间戳;提取不到返回 None。"""
match = _LOG_TIMESTAMP_RE.search(line[:64])
if not match:
return None
try:
return datetime.strptime(match.group(1), _LOG_TIMESTAMP_FORMAT)
except ValueError:
return None
def is_meta_noise(line: str) -> bool:
"""判断日志行是否来自 Agent 自身的工具调度或消息框架噪音。"""
match = _LOG_MODULE_RE.match(line)
if not match:
return False
return match.group(1).strip() in _META_NOISE_MODULES
def filter_lines(
text: str,
keywords: list[str],
max_lines: int,
window_start: datetime,
) -> tuple[list[str], list[str]]:
"""按时间窗、模块噪音和关键词筛选日志行。"""
candidates: list[str] = []
last_seen_in_window: Optional[bool] = None
last_seen_was_meta = False
for line in text.splitlines():
if not line.strip():
continue
timestamp = parse_line_timestamp(line)
if timestamp is not None:
in_window = timestamp >= window_start
meta = is_meta_noise(line)
last_seen_was_meta = meta
last_seen_in_window = in_window and not meta
if in_window and not meta:
candidates.append(line)
elif last_seen_in_window and not last_seen_was_meta:
candidates.append(line)
if not candidates:
return [], []
if not keywords:
return [], []
lowered_keywords = [item.lower() for item in keywords]
matched: list[str] = []
matched_keywords: set[str] = set()
keep_block = False
for line in candidates:
has_timestamp = parse_line_timestamp(line) is not None
if has_timestamp:
line_keywords = [
keyword for keyword, lowered in zip(keywords, lowered_keywords)
if lowered in line.lower()
]
keep_block = bool(line_keywords)
if keep_block:
matched_keywords.update(line_keywords)
matched.append(line)
elif keep_block:
matched.append(line)
if matched:
return matched[-max_lines:], sorted(matched_keywords)
return [], []
def collect_diagnostics(
*,
original_user_request: str,
keywords: list[str],
max_lines: int,
time_window_minutes: int,
) -> dict:
"""读取日志、筛选、脱敏并写入运行时诊断文件。"""
if not has_explicit_feedback_intent(original_user_request):
return {
"success": False,
"reason": "no_explicit_feedback_intent",
"message": (
"用户原话里没有明确要求向上游反馈 Issue 的短语,"
"请先回到常规诊断路径;只有明确说出反馈 issue / 提 issue / 报 bug "
"等意图时才运行 feedback-issue 流程。"
),
}
normalized_max_lines = min(max(int(max_lines or 80), 20), 200)
window_minutes = normalize_window(time_window_minutes)
window_start = datetime.now() - timedelta(minutes=window_minutes)
normalized_keywords = normalize_keywords(keywords)
collected: list[str] = []
source_files: list[str] = []
matched_files: list[dict] = []
for path in candidate_log_files():
text = read_tail(path)
if not text:
continue
lines, matched_keywords = filter_lines(
text=text,
keywords=normalized_keywords,
max_lines=normalized_max_lines,
window_start=window_start,
)
if not lines:
continue
source_files.append(str(path))
matched_files.append({
"path": str(path),
"matched_keywords": matched_keywords,
"line_count": len(lines),
})
collected.append(f"### {path.name}\n" + "\n".join(lines))
logs = sanitize_logs("\n\n".join(collected), MAX_LOGS_CHARS)
log_selection = {
"strategy": "time_window_and_keyword_block_match",
"time_window_minutes": window_minutes,
"window_start": window_start.isoformat(timespec="seconds"),
"keywords": normalized_keywords,
"max_lines_per_file": normalized_max_lines,
"matched_files": matched_files,
"warning": (
"未提供具体关键词,已跳过日志正文收集以避免误带无关日志。"
if not normalized_keywords else ""
),
}
diagnostics_file = runtime_file("diagnostics", ".json")
diagnostics = {
"original_user_request": original_user_request,
"keywords": normalized_keywords,
"found": bool(logs.strip()),
"logs": logs,
"log_selection": log_selection,
"doctor": collect_doctor_report(),
"source_files": source_files,
"created_at": datetime.now().isoformat(timespec="seconds"),
}
write_json_file(diagnostics_file, diagnostics)
return {
"success": True,
"found": diagnostics["found"],
"diagnostics_file": str(diagnostics_file),
"runtime_dir": str(feedback_runtime_dir()),
"source_files": source_files,
"log_selection_summary": format_log_selection(log_selection),
"log_bytes": len(logs.encode("utf-8", errors="replace")),
"log_lines": len(logs.splitlines()) if logs else 0,
"doctor_collected": bool(diagnostics["doctor"].get("success")),
"message": (
"已收集并写入反馈诊断日志文件。"
if logs
else "已完成诊断日志收集,但未找到明显相关日志。"
),
}
def parse_args() -> argparse.Namespace:
"""解析命令行参数。"""
parser = argparse.ArgumentParser(description="收集 MoviePilot 反馈 Issue 诊断日志")
parser.add_argument("--original-user-request", required=True, help="触发反馈的用户原话")
parser.add_argument("--keyword", action="append", default=[], help="用于过滤日志的具体关键词,可重复")
parser.add_argument("--max-lines", type=int, default=80, help="最多保留的日志行数")
parser.add_argument(
"--time-window-minutes",
type=int,
default=_DEFAULT_TIME_WINDOW_MINUTES,
help="只收集最近 N 分钟日志,默认 30",
)
return parser.parse_args()
def main() -> int:
"""脚本入口:输出 JSON 结果给 Agent 解析。"""
args = parse_args()
result = collect_diagnostics(
original_user_request=args.original_user_request,
keywords=args.keyword,
max_lines=args.max_lines,
time_window_minutes=args.time_window_minutes,
)
print(result_payload(**result))
return 0 if result.get("success") else 2
if __name__ == "__main__":
raise SystemExit(main())