mirror of
https://github.com/DrizzleTime/Foxel.git
synced 2026-05-20 07:40:57 +08:00
feat(processors): add processor management
This commit is contained in:
@@ -1,33 +1,53 @@
|
||||
import pkgutil
|
||||
import inspect
|
||||
from importlib import import_module
|
||||
from typing import Dict, Callable
|
||||
import pkgutil
|
||||
from importlib import import_module, reload
|
||||
from pathlib import Path
|
||||
from types import ModuleType
|
||||
from typing import Callable, Dict, Optional
|
||||
|
||||
from .base import BaseProcessor
|
||||
|
||||
ProcessorFactory = Callable[[], BaseProcessor]
|
||||
TYPE_MAP: Dict[str, ProcessorFactory] = {}
|
||||
CONFIG_SCHEMAS: Dict[str, dict] = {}
|
||||
MODULE_MAP: Dict[str, ModuleType] = {}
|
||||
LAST_DISCOVERY_ERRORS: list[str] = []
|
||||
|
||||
|
||||
def discover_processors(force_reload: bool = False) -> list[str]:
|
||||
"""Discover available processor modules and cache their metadata."""
|
||||
import services.processors # 延迟导入以避免循环
|
||||
|
||||
def discover_processors():
|
||||
import services.processors
|
||||
processors_pkg = services.processors
|
||||
TYPE_MAP.clear()
|
||||
CONFIG_SCHEMAS.clear()
|
||||
MODULE_MAP.clear()
|
||||
|
||||
global LAST_DISCOVERY_ERRORS
|
||||
LAST_DISCOVERY_ERRORS = []
|
||||
|
||||
for modinfo in pkgutil.iter_modules(processors_pkg.__path__):
|
||||
if modinfo.name.startswith("_"):
|
||||
continue
|
||||
|
||||
full_name = f"{processors_pkg.__name__}.{modinfo.name}"
|
||||
try:
|
||||
module = import_module(full_name)
|
||||
except Exception:
|
||||
if force_reload:
|
||||
module = reload(module)
|
||||
except Exception as exc:
|
||||
LAST_DISCOVERY_ERRORS.append(f"Failed to import {full_name}: {exc}")
|
||||
continue
|
||||
|
||||
processor_type = getattr(module, "PROCESSOR_TYPE", None)
|
||||
processor_name = getattr(module, "PROCESSOR_NAME", None)
|
||||
supported_exts = getattr(module, "SUPPORTED_EXTS", None)
|
||||
schema = getattr(module, "CONFIG_SCHEMA", None)
|
||||
factory = getattr(module, "PROCESSOR_FACTORY", None)
|
||||
|
||||
if not processor_type:
|
||||
continue
|
||||
|
||||
if factory is None:
|
||||
for attr in module.__dict__.values():
|
||||
if inspect.isclass(attr) and attr.__name__.endswith("Processor"):
|
||||
@@ -35,31 +55,85 @@ def discover_processors():
|
||||
return lambda: cls()
|
||||
factory = _mk()
|
||||
break
|
||||
|
||||
if not callable(factory):
|
||||
LAST_DISCOVERY_ERRORS.append(f"Processor {full_name} missing factory")
|
||||
continue
|
||||
|
||||
try:
|
||||
sample = factory()
|
||||
except Exception as exc:
|
||||
LAST_DISCOVERY_ERRORS.append(f"Failed to instantiate processor {processor_type}: {exc}")
|
||||
continue
|
||||
|
||||
TYPE_MAP[processor_type] = factory
|
||||
MODULE_MAP[processor_type] = module
|
||||
|
||||
produces_file = getattr(module, "produces_file", None)
|
||||
if produces_file is None and hasattr(factory(), "produces_file"):
|
||||
produces_file = getattr(factory(), "produces_file")
|
||||
if produces_file is None and hasattr(sample, "produces_file"):
|
||||
produces_file = getattr(sample, "produces_file")
|
||||
|
||||
module_file = getattr(module, "__file__", None)
|
||||
module_path: Optional[str] = None
|
||||
if module_file:
|
||||
try:
|
||||
module_path = str(Path(module_file).resolve())
|
||||
except Exception:
|
||||
module_path = module_file
|
||||
|
||||
if isinstance(supported_exts, list):
|
||||
normalized_exts = [str(ext) for ext in supported_exts]
|
||||
elif supported_exts:
|
||||
normalized_exts = [str(supported_exts)]
|
||||
else:
|
||||
normalized_exts = []
|
||||
|
||||
if not normalized_exts and hasattr(sample, "supported_exts"):
|
||||
sample_exts = getattr(sample, "supported_exts") or []
|
||||
if isinstance(sample_exts, list):
|
||||
normalized_exts = [str(ext) for ext in sample_exts]
|
||||
|
||||
if isinstance(schema, list):
|
||||
CONFIG_SCHEMAS[processor_type] = {
|
||||
"type": processor_type,
|
||||
"name": processor_name or processor_type,
|
||||
"supported_exts": supported_exts or [],
|
||||
"supported_exts": normalized_exts,
|
||||
"config_schema": schema,
|
||||
"produces_file": produces_file if produces_file is not None else False
|
||||
"produces_file": produces_file if produces_file is not None else False,
|
||||
"module_path": module_path,
|
||||
}
|
||||
|
||||
return LAST_DISCOVERY_ERRORS
|
||||
|
||||
|
||||
def get_config_schemas() -> Dict[str, dict]:
|
||||
return CONFIG_SCHEMAS
|
||||
|
||||
|
||||
def get_config_schema(processor_type: str):
|
||||
return CONFIG_SCHEMAS.get(processor_type)
|
||||
|
||||
|
||||
def get(processor_type: str) -> BaseProcessor:
|
||||
factory = TYPE_MAP.get(processor_type)
|
||||
if factory:
|
||||
return factory()
|
||||
return None
|
||||
|
||||
|
||||
def get_module_path(processor_type: str) -> Optional[str]:
|
||||
meta = CONFIG_SCHEMAS.get(processor_type)
|
||||
if not meta:
|
||||
return None
|
||||
return meta.get("module_path")
|
||||
|
||||
|
||||
def get_last_discovery_errors() -> list[str]:
|
||||
return LAST_DISCOVERY_ERRORS
|
||||
|
||||
|
||||
def reload_processors() -> list[str]:
|
||||
return discover_processors(force_reload=True)
|
||||
|
||||
|
||||
discover_processors()
|
||||
|
||||
@@ -54,7 +54,8 @@ class TaskQueueService:
|
||||
path=params["path"],
|
||||
processor_type=params["processor_type"],
|
||||
config=params["config"],
|
||||
save_to=params["save_to"]
|
||||
save_to=params.get("save_to"),
|
||||
overwrite=params.get("overwrite", False),
|
||||
)
|
||||
task.result = result
|
||||
elif task.name == "automation_task":
|
||||
@@ -119,4 +120,4 @@ class TaskQueueService:
|
||||
await LogService.info("task_queue", "Task worker has been stopped.")
|
||||
|
||||
|
||||
task_queue_service = TaskQueueService()
|
||||
task_queue_service = TaskQueueService()
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from typing import Dict, Tuple, Any, Union, AsyncIterator
|
||||
from typing import Dict, Tuple, Any, Union, AsyncIterator, List
|
||||
from fastapi import HTTPException
|
||||
import mimetypes
|
||||
from fastapi.responses import Response
|
||||
@@ -59,6 +59,24 @@ async def _ensure_method(adapter: Any, method: str):
|
||||
return func
|
||||
|
||||
|
||||
async def path_is_directory(path: str) -> bool:
|
||||
"""判断给定路径是否为目录。"""
|
||||
adapter_instance, _, root, rel = await resolve_adapter_and_rel(path)
|
||||
rel = rel.rstrip('/')
|
||||
if rel == '':
|
||||
return True
|
||||
stat_func = getattr(adapter_instance, "stat_file", None)
|
||||
if not callable(stat_func):
|
||||
raise HTTPException(501, detail="Adapter does not implement stat_file")
|
||||
try:
|
||||
info = await stat_func(root, rel)
|
||||
except FileNotFoundError:
|
||||
raise HTTPException(404, detail="Path not found")
|
||||
if isinstance(info, dict):
|
||||
return bool(info.get("is_dir"))
|
||||
return False
|
||||
|
||||
|
||||
async def list_virtual_dir(path: str, page_num: int = 1, page_size: int = 50, sort_by: str = "name", sort_order: str = "asc") -> Dict:
|
||||
norm = (path if path.startswith('/') else '/' + path).rstrip('/') or '/'
|
||||
adapters = await StorageAdapter.filter(enabled=True)
|
||||
@@ -476,28 +494,110 @@ async def copy_path(src: str, dst: str, overwrite: bool = False, return_debug: b
|
||||
return debug_info if return_debug else None
|
||||
|
||||
|
||||
async def process_file(path: str, processor_type: str, config: dict, save_to: str = None):
|
||||
"""
|
||||
使用指定处理器处理文件,并可选择保存到新路径
|
||||
:param path: 源文件路径
|
||||
:param processor_type: 处理器类型
|
||||
:param config: 处理器配置
|
||||
:param save_to: 保存路径(可选),不指定则只返回处理结果
|
||||
:return: 处理后的文件内容或保存结果
|
||||
"""
|
||||
data = await read_file(path)
|
||||
async def process_file(
|
||||
path: str,
|
||||
processor_type: str,
|
||||
config: dict,
|
||||
save_to: str | None = None,
|
||||
overwrite: bool = False,
|
||||
) -> Any:
|
||||
"""处理指定路径(文件或目录)。目录会递归处理其下所有文件。"""
|
||||
|
||||
processor = get_processor(processor_type)
|
||||
if not processor:
|
||||
raise HTTPException(
|
||||
400, detail=f"Processor {processor_type} not found")
|
||||
result = await processor.process(data, path, config)
|
||||
if save_to and getattr(processor, "produces_file", False):
|
||||
raise HTTPException(400, detail=f"Processor {processor_type} not found")
|
||||
|
||||
actual_is_dir = await path_is_directory(path)
|
||||
|
||||
supported_exts = getattr(processor, "supported_exts", None) or []
|
||||
allowed_exts = {
|
||||
str(ext).lower().lstrip('.')
|
||||
for ext in supported_exts
|
||||
if isinstance(ext, str)
|
||||
}
|
||||
|
||||
def matches_extension(rel_path: str) -> bool:
|
||||
if not allowed_exts:
|
||||
return True
|
||||
if '.' not in rel_path:
|
||||
return '' in allowed_exts
|
||||
ext = rel_path.rsplit('.', 1)[-1].lower()
|
||||
return ext in allowed_exts or f'.{ext}' in allowed_exts
|
||||
|
||||
def coerce_result_bytes(result: Any) -> bytes:
|
||||
if isinstance(result, Response):
|
||||
result_bytes = result.body
|
||||
else:
|
||||
result_bytes = result
|
||||
await write_file(save_to, result_bytes)
|
||||
return {"saved_to": save_to}
|
||||
return result.body
|
||||
if isinstance(result, (bytes, bytearray)):
|
||||
return bytes(result)
|
||||
if isinstance(result, str):
|
||||
return result.encode('utf-8')
|
||||
raise HTTPException(500, detail="Processor must return bytes/Response when produces_file=True")
|
||||
|
||||
def build_absolute_path(mount_path: str, rel_path: str) -> str:
|
||||
rel_norm = rel_path.lstrip('/')
|
||||
mount_norm = mount_path.rstrip('/')
|
||||
if not mount_norm:
|
||||
return '/' + rel_norm if rel_norm else '/'
|
||||
return f"{mount_norm}/{rel_norm}" if rel_norm else mount_norm
|
||||
|
||||
if actual_is_dir:
|
||||
if save_to:
|
||||
raise HTTPException(400, detail="Directory processing does not support custom save_to path")
|
||||
if not overwrite:
|
||||
raise HTTPException(400, detail="Directory processing requires overwrite")
|
||||
|
||||
adapter_instance, adapter_model, root, rel = await resolve_adapter_and_rel(path)
|
||||
rel = rel.rstrip('/')
|
||||
list_dir = await _ensure_method(adapter_instance, "list_dir")
|
||||
processed_count = 0
|
||||
stack: List[str] = [rel]
|
||||
page_size = 200
|
||||
|
||||
while stack:
|
||||
current = stack.pop()
|
||||
page = 1
|
||||
while True:
|
||||
entries, total = await list_dir(root, current, page, page_size, "name", "asc")
|
||||
if not entries and (total or 0) == 0:
|
||||
break
|
||||
|
||||
for entry in entries:
|
||||
name = entry.get("name")
|
||||
if not name:
|
||||
continue
|
||||
child_rel = f"{current}/{name}" if current else name
|
||||
if entry.get("is_dir"):
|
||||
stack.append(child_rel)
|
||||
continue
|
||||
if not matches_extension(child_rel):
|
||||
continue
|
||||
absolute_path = build_absolute_path(adapter_model.path, child_rel)
|
||||
data = await read_file(absolute_path)
|
||||
result = await processor.process(data, absolute_path, config)
|
||||
if getattr(processor, "produces_file", False):
|
||||
result_bytes = coerce_result_bytes(result)
|
||||
await write_file(absolute_path, result_bytes)
|
||||
processed_count += 1
|
||||
|
||||
if total is None or page * page_size >= total:
|
||||
break
|
||||
page += 1
|
||||
|
||||
return {"processed_files": processed_count}
|
||||
|
||||
# 单文件处理
|
||||
data = await read_file(path)
|
||||
result = await processor.process(data, path, config)
|
||||
|
||||
target_path = save_to
|
||||
if overwrite and not target_path:
|
||||
target_path = path
|
||||
|
||||
if target_path and getattr(processor, "produces_file", False):
|
||||
result_bytes = coerce_result_bytes(result)
|
||||
await write_file(target_path, result_bytes)
|
||||
return {"saved_to": target_path}
|
||||
|
||||
return result
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user