Files
Foxel/services/virtual_fs.py

1154 lines
41 KiB
Python

from __future__ import annotations
from typing import Dict, Tuple, Any, Union, AsyncIterator, List, TYPE_CHECKING
from fastapi import HTTPException
import mimetypes
from fastapi.responses import Response
import time
import hmac
import hashlib
import base64
from pathlib import Path
import shutil
import aiofiles
from models import StorageAdapter
from .adapters.registry import runtime_registry
from api.response import page
from .thumbnail import is_image_filename, is_raw_filename
from services.processors.registry import get as get_processor
from services.tasks import task_service
from services.logging import LogService
from services.config import ConfigCenter
from services.vector_db import VectorDBService
CROSS_TRANSFER_TEMP_ROOT = Path("data/tmp/cross_transfer")
DIRECT_REDIRECT_CONFIG_KEY = "enable_direct_download_307"
if TYPE_CHECKING:
from services.task_queue import Task
def _build_absolute_path(mount_path: str, rel_path: str) -> str:
rel_norm = rel_path.lstrip('/')
mount_norm = mount_path.rstrip('/')
if not mount_norm:
return '/' + rel_norm if rel_norm else '/'
return f"{mount_norm}/{rel_norm}" if rel_norm else mount_norm
def _join_rel(base: str, name: str) -> str:
if not base:
return name.lstrip('/')
if not name:
return base
return f"{base.rstrip('/')}/{name.lstrip('/')}"
def _parent_rel(rel: str) -> str:
if not rel:
return ''
if '/' not in rel:
return ''
return rel.rsplit('/', 1)[0]
async def resolve_adapter_by_path(path: str) -> Tuple[StorageAdapter, str]:
norm = path if path.startswith('/') else '/' + path
adapters = await StorageAdapter.filter(enabled=True)
best = None
for a in adapters:
if norm == a.path or norm.startswith(a.path.rstrip('/') + '/'):
if (best is None) or len(a.path) > len(best.path):
best = a
if not best:
raise HTTPException(404, detail="No storage adapter for path")
rel = norm[len(best.path):].lstrip('/')
return best, rel
async def resolve_adapter_and_rel(path: str):
"""返回 (adapter_instance, adapter_model, effective_root, rel_path)."""
norm = path if path.startswith('/') else '/' + path
try:
adapter_model, rel = await resolve_adapter_by_path(norm)
except HTTPException as e:
raise e
adapter_instance = runtime_registry.get(adapter_model.id)
if not adapter_instance:
await runtime_registry.refresh()
adapter_instance = runtime_registry.get(adapter_model.id)
if not adapter_instance:
raise HTTPException(
404, detail=f"Adapter instance for ID {adapter_model.id} not found or failed to load."
)
effective_root = adapter_instance.get_effective_root(adapter_model.sub_path)
return adapter_instance, adapter_model, effective_root, rel
async def maybe_redirect_download(adapter_instance, adapter_model, root: str, rel: str):
"""若适配器启用了 307 直链,尝试构造重定向响应。"""
if not rel or rel.endswith('/'):
return None
config = getattr(adapter_model, "config", {}) or {}
if not config.get(DIRECT_REDIRECT_CONFIG_KEY):
return None
handler = getattr(adapter_instance, "get_direct_download_response", None)
if not callable(handler):
return None
try:
response = await handler(root, rel)
except FileNotFoundError:
raise
except Exception:
return None
if isinstance(response, Response):
return response
return None
async def _ensure_method(adapter: Any, method: str):
func = getattr(adapter, method, None)
if not callable(func):
raise HTTPException(501, detail=f"Adapter does not implement {method}")
return func
async def path_is_directory(path: str) -> bool:
"""判断给定路径是否为目录。"""
adapter_instance, _, root, rel = await resolve_adapter_and_rel(path)
rel = rel.rstrip('/')
if rel == '':
return True
stat_func = getattr(adapter_instance, "stat_file", None)
if not callable(stat_func):
raise HTTPException(501, detail="Adapter does not implement stat_file")
try:
info = await stat_func(root, rel)
except FileNotFoundError:
raise HTTPException(404, detail="Path not found")
if isinstance(info, dict):
return bool(info.get("is_dir"))
return False
async def list_virtual_dir(path: str, page_num: int = 1, page_size: int = 50, sort_by: str = "name", sort_order: str = "asc") -> Dict:
norm = (path if path.startswith('/') else '/' + path).rstrip('/') or '/'
adapters = await StorageAdapter.filter(enabled=True)
child_mount_entries: List[str] = []
norm_prefix = norm.rstrip('/')
for a in adapters:
if a.path == norm:
continue
if a.path.startswith(norm_prefix + '/'):
tail = a.path[len(norm_prefix):].lstrip('/')
if '/' not in tail:
child_mount_entries.append(tail)
child_mount_entries = sorted(set(child_mount_entries))
sort_field = sort_by.lower()
reverse = sort_order.lower() == "desc"
def build_sort_key(item: Dict) -> Tuple:
key = (not bool(item.get("is_dir")),)
if sort_field == "name":
key += (str(item.get("name", "")).lower(),)
elif sort_field == "size":
key += (int(item.get("size", 0)),)
elif sort_field == "mtime":
key += (int(item.get("mtime", 0)),)
else:
key += (str(item.get("name", "")).lower(),)
return key
def annotate_entry(entry: Dict) -> None:
if not entry.get("is_dir"):
entry["has_thumbnail"] = is_image_filename(entry.get("name", ""))
else:
entry["has_thumbnail"] = False
try:
adapter_model, rel = await resolve_adapter_by_path(norm)
adapter_instance = runtime_registry.get(adapter_model.id)
if not adapter_instance:
await runtime_registry.refresh()
adapter_instance = runtime_registry.get(adapter_model.id)
if adapter_instance:
effective_root = adapter_instance.get_effective_root(adapter_model.sub_path)
else:
adapter_model = None
effective_root = ""
rel = ""
except HTTPException:
adapter_model = None
adapter_instance = None
effective_root = ''
rel = ''
adapter_entries_page: List[Dict] = []
adapter_entries_for_merge: List[Dict] = []
adapter_total = 0
covered = set()
if adapter_model and adapter_instance:
list_dir = await _ensure_method(adapter_instance, "list_dir")
try:
adapter_entries_page, adapter_total = await list_dir(effective_root, rel, page_num, page_size, sort_by, sort_order)
except NotADirectoryError:
raise HTTPException(400, detail="Not a directory")
adapter_entries_for_merge = adapter_entries_page
# 存在挂载节点且适配器结果被分页时,补齐完整列表以便合并排序
if child_mount_entries and adapter_total > len(adapter_entries_page):
full_page_size = adapter_total
if full_page_size > 0:
adapter_entries_for_merge, adapter_total = await list_dir(
effective_root, rel, 1, full_page_size, sort_by, sort_order
)
else:
adapter_entries_for_merge = adapter_entries_page
for item in adapter_entries_for_merge:
covered.add(item["name"])
mount_entries = []
for name in child_mount_entries:
if name not in covered:
mount_entries.append({"name": name, "is_dir": True,
"size": 0, "mtime": 0, "type": "mount", "has_thumbnail": False})
if mount_entries:
for ent in adapter_entries_for_merge:
annotate_entry(ent)
combined_entries = adapter_entries_for_merge + [
{**ent, "has_thumbnail": False} for ent in mount_entries
]
combined_entries.sort(key=build_sort_key, reverse=reverse)
total_entries = len(combined_entries)
start_idx = (page_num - 1) * page_size
end_idx = start_idx + page_size
page_entries = combined_entries[start_idx:end_idx]
return page(page_entries, total_entries, page_num, page_size)
annotate_entry_list = adapter_entries_page or []
for ent in annotate_entry_list:
annotate_entry(ent)
return page(adapter_entries_page, adapter_total, page_num, page_size)
async def read_file(path: str) -> Union[bytes, Any]:
adapter_instance, _, root, rel = await resolve_adapter_and_rel(path)
if rel.endswith('/') or rel == '':
raise HTTPException(400, detail="Path is a directory")
read_func = await _ensure_method(adapter_instance, "read_file")
return await read_func(root, rel)
async def write_file(path: str, data: bytes):
adapter_instance, _, root, rel = await resolve_adapter_and_rel(path)
if rel.endswith('/'):
raise HTTPException(400, detail="Invalid file path")
write_func = await _ensure_method(adapter_instance, "write_file")
await write_func(root, rel, data)
await task_service.trigger_tasks("file_written", path)
await LogService.action(
"virtual_fs", f"Wrote file to {path}", details={"path": path, "size": len(data)}
)
async def write_file_stream(path: str, data_iter: AsyncIterator[bytes], overwrite: bool = True):
adapter_instance, _, root, rel = await resolve_adapter_and_rel(path)
if rel.endswith('/'):
raise HTTPException(400, detail="Invalid file path")
exists_func = getattr(adapter_instance, "exists", None)
if not overwrite and callable(exists_func):
try:
if await exists_func(root, rel):
raise HTTPException(409, detail="Destination exists")
except HTTPException:
raise
except Exception:
pass
size = 0
stream_func = getattr(adapter_instance, "write_file_stream", None)
if callable(stream_func):
size = await stream_func(root, rel, data_iter)
else:
buf = bytearray()
async for chunk in data_iter:
if chunk:
buf.extend(chunk)
write_func = await _ensure_method(adapter_instance, "write_file")
await write_func(root, rel, bytes(buf))
size = len(buf)
await task_service.trigger_tasks("file_written", path)
await LogService.action(
"virtual_fs",
f"Wrote file stream to {path}",
details={"path": path, "size": size},
)
return size
async def make_dir(path: str):
adapter_instance, _, root, rel = await resolve_adapter_and_rel(path)
if not rel:
raise HTTPException(400, detail="Cannot create root")
mkdir_func = await _ensure_method(adapter_instance, "mkdir")
await mkdir_func(root, rel)
await LogService.action("virtual_fs", f"Created directory {path}", details={"path": path})
async def delete_path(path: str):
adapter_instance, _, root, rel = await resolve_adapter_and_rel(path)
if not rel:
raise HTTPException(400, detail="Cannot delete root")
delete_func = await _ensure_method(adapter_instance, "delete")
await delete_func(root, rel)
await task_service.trigger_tasks("file_deleted", path)
await LogService.action("virtual_fs", f"Deleted {path}", details={"path": path})
async def move_path(
src: str,
dst: str,
overwrite: bool = False,
return_debug: bool = True,
allow_cross: bool = False,
):
adapter_s, adapter_model_s, root_s, rel_s = await resolve_adapter_and_rel(src)
adapter_d, adapter_model_d, root_d, rel_d = await resolve_adapter_and_rel(dst)
debug_info = {
"src": src, "dst": dst,
"rel_s": rel_s, "rel_d": rel_d,
"root_s": root_s, "root_d": root_d,
"overwrite": overwrite,
"operation": "move",
"queued": False,
}
if not rel_s:
raise HTTPException(400, detail="Cannot move or rename mount root")
if not rel_d:
raise HTTPException(400, detail="Invalid destination")
if adapter_model_s.id != adapter_model_d.id:
if not allow_cross:
raise HTTPException(400, detail="Cross-adapter move not supported")
queue_info = await _enqueue_cross_mount_transfer(
operation="move",
src=src,
dst=dst,
overwrite=overwrite,
)
debug_info.update(queue_info)
return debug_info if return_debug else None
exists_func = getattr(adapter_s, "exists", None)
stat_func = getattr(adapter_s, "stat_path", None)
delete_func = await _ensure_method(adapter_s, "delete")
move_func = await _ensure_method(adapter_s, "move")
dst_exists = False
dst_stat = None
if callable(exists_func):
dst_exists = await exists_func(root_d, rel_d)
if callable(stat_func):
dst_stat = await stat_func(root_d, rel_d)
debug_info["dst_exists"] = dst_exists
debug_info["dst_stat"] = dst_stat
if dst_exists and not overwrite:
kind = None
fs_path = None
if dst_stat:
kind = "dir" if dst_stat.get("is_dir") else "file"
fs_path = dst_stat.get("path")
raise HTTPException(
409,
detail=f"Destination already exists(kind={kind}, fs_path={fs_path}, rel_d={rel_d}, overwrite={overwrite})"
)
if dst_exists and overwrite:
try:
await delete_func(root_s, rel_d)
debug_info["pre_delete"] = "ok"
except Exception as e:
debug_info["pre_delete"] = f"error:{e}"
raise HTTPException(
500, detail=f"Pre-delete failed before overwrite: {e}")
if rel_s == rel_d:
debug_info["noop"] = True
return debug_info if return_debug else None
try:
await move_func(root_s, rel_s, rel_d)
debug_info["moved"] = True
except FileNotFoundError:
raise HTTPException(404, detail="Source not found")
except FileExistsError:
raise HTTPException(
409, detail="Destination already exists (race condition after pre-check)")
except IsADirectoryError:
raise HTTPException(400, detail="Invalid directory operation")
except Exception as e:
raise HTTPException(500, detail=f"Move failed: {e}")
await LogService.action(
"virtual_fs", f"Moved {src} to {dst}", details=debug_info
)
return debug_info if return_debug else None
async def rename_path(src: str, dst: str, overwrite: bool = False, return_debug: bool = True):
adapter_s, adapter_model_s, root_s, rel_s = await resolve_adapter_and_rel(src)
adapter_d, adapter_model_d, root_d, rel_d = await resolve_adapter_and_rel(dst)
debug_info = {
"src": src, "dst": dst,
"rel_s": rel_s, "rel_d": rel_d,
"root_s": root_s, "root_d": root_d,
"overwrite": overwrite
}
if adapter_model_s.id != adapter_model_d.id:
raise HTTPException(400, detail="Cross-adapter rename not supported")
if not rel_s:
raise HTTPException(400, detail="Cannot rename mount root")
if not rel_d:
raise HTTPException(400, detail="Invalid destination")
exists_func = getattr(adapter_s, "exists", None)
stat_func = getattr(adapter_s, "stat_path", None)
delete_func = await _ensure_method(adapter_s, "delete")
rename_func = await _ensure_method(adapter_s, "rename")
dst_exists = False
dst_stat = None
if callable(exists_func):
dst_exists = await exists_func(root_d, rel_d)
if callable(stat_func):
dst_stat = await stat_func(root_d, rel_d)
debug_info["dst_exists"] = dst_exists
debug_info["dst_stat"] = dst_stat
if dst_exists and not overwrite:
kind = None
fs_path = None
if dst_stat:
kind = "dir" if dst_stat.get("is_dir") else "file"
fs_path = dst_stat.get("path")
raise HTTPException(
409,
detail=f"Destination already exists(kind={kind}, fs_path={fs_path}, rel_d={rel_d}, overwrite={overwrite})"
)
if dst_exists and overwrite:
try:
await delete_func(root_s, rel_d)
debug_info["pre_delete"] = "ok"
except Exception as e:
debug_info["pre_delete"] = f"error:{e}"
raise HTTPException(
500, detail=f"Pre-delete failed before overwrite: {e}")
if rel_s == rel_d:
debug_info["noop"] = True
return debug_info if return_debug else None
try:
await rename_func(root_s, rel_s, rel_d)
debug_info["renamed"] = True
except FileNotFoundError:
raise HTTPException(404, detail="Source not found")
except FileExistsError:
raise HTTPException(
409, detail="Destination already exists (race condition after pre-check)")
except IsADirectoryError:
raise HTTPException(400, detail="Invalid directory operation")
except Exception as e:
raise HTTPException(500, detail=f"Rename failed: {e}")
await LogService.action(
"virtual_fs", f"Renamed {src} to {dst}", details=debug_info
)
return debug_info if return_debug else None
async def stream_file(path: str, range_header: str | None):
adapter_instance, adapter_model, root, rel = await resolve_adapter_and_rel(path)
if not rel or rel.endswith('/'):
raise HTTPException(400, detail="Path is a directory")
if is_raw_filename(rel):
import rawpy
from PIL import Image
import io
try:
raw_data = await read_file(path)
try:
import rawpy
with rawpy.imread(io.BytesIO(raw_data)) as raw:
try:
thumb = raw.extract_thumb()
except rawpy.LibRawNoThumbnailError:
thumb = None
if thumb is not None and thumb.format in [rawpy.ThumbFormat.JPEG, rawpy.ThumbFormat.BITMAP]:
im = Image.open(io.BytesIO(thumb.data))
else:
rgb = raw.postprocess(use_camera_wb=False, use_auto_wb=True, output_bps=8)
im = Image.fromarray(rgb)
except Exception as e:
print(f"rawpy processing failed: {e}")
raise e
buf = io.BytesIO()
im.save(buf, 'JPEG', quality=90)
content = buf.getvalue()
return Response(content=content, media_type='image/jpeg')
except Exception as e:
raise HTTPException(500, detail=f"RAW file processing failed: {e}")
redirect_response = await maybe_redirect_download(adapter_instance, adapter_model, root, rel)
if redirect_response is not None:
return redirect_response
stream_impl = getattr(adapter_instance, "stream_file", None)
if callable(stream_impl):
return await stream_impl(root, rel, range_header)
data = await read_file(path)
mime, _ = mimetypes.guess_type(rel)
return Response(content=data, media_type=mime or "application/octet-stream")
async def _gather_vector_index(full_path: str, limit: int = 20):
"""查询与文件相关的索引信息。失败时返回 None。"""
vector_db = VectorDBService()
try:
raw_results = await vector_db.search_by_path("vector_collection", full_path, max(limit * 2, 20))
except Exception:
return None
matched = []
if raw_results:
buckets = raw_results if isinstance(raw_results, list) else [raw_results]
for bucket in buckets:
if not bucket:
continue
for record in bucket:
entity = dict((record or {}).get("entity") or {})
source_path = entity.get("source_path") or entity.get("path") or ""
if source_path != full_path:
continue
entry = {
"chunk_id": str(entity.get("chunk_id")) if entity.get("chunk_id") is not None else None,
"type": entity.get("type"),
"mime": entity.get("mime"),
"name": entity.get("name"),
"start_offset": entity.get("start_offset"),
"end_offset": entity.get("end_offset"),
"vector_id": entity.get("vector_id"),
}
text = entity.get("text") or entity.get("description")
if text:
preview_limit = 400
entry["preview"] = text[:preview_limit]
entry["preview_truncated"] = len(text) > preview_limit
matched.append(entry)
if not matched:
return {"total": 0, "entries": [], "by_type": {}, "has_more": False}
type_counts: Dict[str, int] = {}
for item in matched:
key = item.get("type") or "unknown"
type_counts[key] = type_counts.get(key, 0) + 1
has_more = len(matched) > limit
return {
"total": len(matched),
"entries": matched[:limit],
"by_type": type_counts,
"has_more": has_more,
"limit": limit,
}
async def stat_file(path: str):
adapter_instance, _, root, rel = await resolve_adapter_and_rel(path)
stat_func = getattr(adapter_instance, "stat_file", None)
if not callable(stat_func):
raise HTTPException(501, detail="Adapter does not implement stat_file")
info = await stat_func(root, rel)
if isinstance(info, dict):
info.setdefault("path", path)
try:
is_dir = bool(info.get("is_dir"))
except Exception:
is_dir = False
rel_name = rel.rstrip('/').split('/')[-1] if rel else path.rstrip('/').split('/')[-1]
name_hint = info.get("name") or rel_name
info["has_thumbnail"] = bool(not is_dir and is_image_filename(str(name_hint or "")))
if not is_dir:
vector_index = await _gather_vector_index(path)
if vector_index is not None:
info["vector_index"] = vector_index
return info
async def copy_path(
src: str,
dst: str,
overwrite: bool = False,
return_debug: bool = True,
allow_cross: bool = False,
):
adapter_s, adapter_model_s, root_s, rel_s = await resolve_adapter_and_rel(src)
adapter_d, adapter_model_d, root_d, rel_d = await resolve_adapter_and_rel(dst)
debug_info = {
"src": src, "dst": dst,
"rel_s": rel_s, "rel_d": rel_d,
"root_s": root_s, "root_d": root_d,
"overwrite": overwrite,
"operation": "copy",
"queued": False,
}
if not rel_s:
raise HTTPException(400, detail="Cannot copy mount root")
if not rel_d:
raise HTTPException(400, detail="Invalid destination")
if adapter_model_s.id != adapter_model_d.id:
if not allow_cross:
raise HTTPException(400, detail="Cross-adapter copy not supported")
queue_info = await _enqueue_cross_mount_transfer(
operation="copy",
src=src,
dst=dst,
overwrite=overwrite,
)
debug_info.update(queue_info)
return debug_info if return_debug else None
exists_func = getattr(adapter_s, "exists", None)
stat_func = getattr(adapter_s, "stat_path", None)
delete_func = getattr(adapter_s, "delete", None)
copy_func = await _ensure_method(adapter_s, "copy")
dst_exists = False
dst_stat = None
if callable(exists_func):
dst_exists = await exists_func(root_d, rel_d)
if callable(stat_func):
dst_stat = await stat_func(root_d, rel_d)
debug_info["dst_exists"] = dst_exists
debug_info["dst_stat"] = dst_stat
if dst_exists and not overwrite:
raise HTTPException(409, detail="Destination already exists")
if dst_exists and overwrite and callable(delete_func):
try:
await delete_func(root_s, rel_d)
debug_info["pre_delete"] = "ok"
except Exception as e:
debug_info["pre_delete"] = f"error:{e}"
raise HTTPException(500, detail=f"Pre-delete failed: {e}")
if rel_s == rel_d:
debug_info["noop"] = True
return debug_info if return_debug else None
try:
await copy_func(root_s, rel_s, rel_d, overwrite=overwrite)
debug_info["copied"] = True
except FileNotFoundError:
raise HTTPException(404, detail="Source not found")
except FileExistsError:
raise HTTPException(
409, detail="Destination already exists (race condition)")
except Exception as e:
raise HTTPException(500, detail=f"Copy failed: {e}")
await LogService.action(
"virtual_fs", f"Copied {src} to {dst}", details=debug_info
)
return debug_info if return_debug else None
async def _enqueue_cross_mount_transfer(operation: str, src: str, dst: str, overwrite: bool) -> Dict[str, Any]:
if operation not in {"move", "copy"}:
raise HTTPException(400, detail="Unsupported transfer operation")
adapter_s, adapter_model_s, _, _ = await resolve_adapter_and_rel(src)
adapter_d, adapter_model_d, root_d, rel_d = await resolve_adapter_and_rel(dst)
if adapter_model_s.id == adapter_model_d.id:
raise HTTPException(400, detail="Cross-adapter transfer requested but adapters are identical")
dst_exists = False
exists_func = getattr(adapter_d, "exists", None)
if callable(exists_func):
dst_exists = await exists_func(root_d, rel_d)
else:
try:
await stat_file(dst)
dst_exists = True
except FileNotFoundError:
dst_exists = False
except HTTPException as exc:
if exc.status_code == 404:
dst_exists = False
else:
raise
if dst_exists and not overwrite:
raise HTTPException(409, detail="Destination already exists")
payload = {
"operation": operation,
"src": src,
"dst": dst,
"overwrite": overwrite,
}
from services.task_queue import task_queue_service
task = await task_queue_service.add_task("cross_mount_transfer", payload)
return {
"queued": True,
"task_id": task.id,
"task_name": "cross_mount_transfer",
"dst_exists": dst_exists,
"cross_adapter": True,
}
async def run_cross_mount_transfer_task(task: "Task") -> Dict[str, Any]:
from services.task_queue import task_queue_service
params = task.task_info or {}
operation = params.get("operation")
src = params.get("src")
dst = params.get("dst")
overwrite = bool(params.get("overwrite", False))
if operation not in {"move", "copy"}:
raise ValueError(f"Unsupported cross mount operation: {operation}")
if not src or not dst:
raise ValueError("Missing src or dst for cross mount transfer")
adapter_s, adapter_model_s, root_s, rel_s = await resolve_adapter_and_rel(src)
adapter_d, adapter_model_d, root_d, rel_d = await resolve_adapter_and_rel(dst)
await task_queue_service.update_meta(task.id, {
"operation": operation,
"src": src,
"dst": dst,
})
if adapter_model_s.id == adapter_model_d.id:
if operation == "move":
await move_path(src, dst, overwrite=overwrite, return_debug=False, allow_cross=False)
else:
await copy_path(src, dst, overwrite=overwrite, return_debug=False, allow_cross=False)
return {
"mode": "direct",
"operation": operation,
"src": src,
"dst": dst,
"files": 0,
"bytes": 0,
}
if not rel_s:
raise ValueError("Cannot transfer mount root")
if not rel_d:
raise ValueError("Invalid destination")
dst_exists = False
exists_func = getattr(adapter_d, "exists", None)
if callable(exists_func):
dst_exists = await exists_func(root_d, rel_d)
else:
try:
await stat_file(dst)
dst_exists = True
except FileNotFoundError:
dst_exists = False
except HTTPException as exc:
if exc.status_code != 404:
raise
if dst_exists and not overwrite:
raise ValueError("Destination already exists")
if dst_exists and overwrite:
await delete_path(dst)
try:
src_stat = await stat_file(src)
except HTTPException as exc:
if exc.status_code == 404:
raise FileNotFoundError(src) from exc
raise
src_is_dir = bool(src_stat.get("is_dir"))
files_to_transfer: List[Dict[str, Any]] = []
dirs_to_create: List[str] = []
await task_queue_service.update_progress(task.id, {
"stage": "preparing",
"percent": 0.0,
"detail": "Collecting source entries",
})
if src_is_dir:
if rel_d:
dirs_to_create.append(rel_d)
list_dir = await _ensure_method(adapter_s, "list_dir")
stack: List[Tuple[str, str, str]] = [(rel_s, rel_d, '')]
page_size = 200
while stack:
current_rel, current_dst_rel, current_relative = stack.pop()
page = 1
while True:
entries, total = await list_dir(root_s, current_rel, page, page_size, "name", "asc")
if not entries and (total or 0) == 0:
break
for entry in entries:
name = entry.get("name")
if not name:
continue
child_rel = _join_rel(current_rel, name)
child_dst_rel = _join_rel(current_dst_rel, name)
child_relative = _join_rel(current_relative, name)
if entry.get("is_dir"):
dirs_to_create.append(child_dst_rel)
stack.append((child_rel, child_dst_rel, child_relative))
else:
files_to_transfer.append({
"src_rel": child_rel,
"dst_rel": child_dst_rel,
"relative_rel": child_relative or name,
"size": entry.get("size"),
"name": name,
})
if total is None or page * page_size >= (total or 0):
break
page += 1
else:
relative_rel = rel_s or (src_stat.get("name") or "file")
files_to_transfer.append({
"src_rel": rel_s,
"dst_rel": rel_d,
"relative_rel": relative_rel,
"size": src_stat.get("size"),
"name": src_stat.get("name") or rel_s.split('/')[-1],
})
parent_dir = _parent_rel(rel_d)
if parent_dir:
dirs_to_create.append(parent_dir)
CROSS_TRANSFER_TEMP_ROOT.mkdir(parents=True, exist_ok=True)
temp_dir = CROSS_TRANSFER_TEMP_ROOT / task.id
temp_dir.mkdir(parents=True, exist_ok=True)
bytes_downloaded = 0
total_dynamic_bytes = sum((f["size"] or 0) for f in files_to_transfer)
try:
for job in files_to_transfer:
src_abs = _build_absolute_path(adapter_model_s.path, job["src_rel"])
data = await read_file(src_abs)
temp_path = temp_dir / job["relative_rel"]
temp_path.parent.mkdir(parents=True, exist_ok=True)
async with aiofiles.open(temp_path, "wb") as f:
await f.write(data)
actual_size = len(data)
job["temp_path"] = temp_path
prev_size = job.get("size") or 0
if prev_size <= 0:
total_dynamic_bytes += actual_size
job_size = actual_size
else:
job_size = prev_size
job["size"] = job_size
bytes_downloaded += actual_size
percent = None
total_for_percent = total_dynamic_bytes if total_dynamic_bytes else bytes_downloaded
if total_for_percent:
percent = min(100.0, round(bytes_downloaded / total_for_percent * 100, 2))
await task_queue_service.update_progress(task.id, {
"stage": "downloading",
"percent": percent,
"bytes_done": bytes_downloaded,
"bytes_total": total_dynamic_bytes or None,
"detail": f"Downloaded {job['name']}",
})
mkdir_func = await _ensure_method(adapter_d, "mkdir")
ensured_dirs: set[str] = set()
async def ensure_dir(rel_path: str):
if not rel_path or rel_path in ensured_dirs:
return
parent = _parent_rel(rel_path)
if parent:
await ensure_dir(parent)
try:
await mkdir_func(root_d, rel_path)
except FileExistsError:
pass
except HTTPException as exc:
if exc.status_code not in {409, 400}:
raise
except Exception:
# Assume directory already exists
pass
ensured_dirs.add(rel_path)
for dir_rel in sorted({d for d in dirs_to_create if d}, key=lambda x: x.count('/')):
await ensure_dir(dir_rel)
uploaded_bytes = 0
total_bytes = sum((f["size"] or 0) for f in files_to_transfer)
async def iter_temp_file(path: Path, chunk_size: int = 512 * 1024):
async with aiofiles.open(path, "rb") as f:
while True:
chunk = await f.read(chunk_size)
if not chunk:
break
yield chunk
for job in files_to_transfer:
parent_dir = _parent_rel(job["dst_rel"])
if parent_dir:
await ensure_dir(parent_dir)
dst_abs = _build_absolute_path(adapter_model_d.path, job["dst_rel"])
temp_path: Path = job["temp_path"]
await write_file_stream(dst_abs, iter_temp_file(temp_path), overwrite=overwrite)
uploaded_bytes += job["size"] or 0
percent = None
if total_bytes:
percent = min(100.0, round(uploaded_bytes / total_bytes * 100, 2))
await task_queue_service.update_progress(task.id, {
"stage": "uploading",
"percent": percent,
"bytes_done": uploaded_bytes,
"bytes_total": total_bytes or None,
"detail": f"Uploaded {job['name']}",
})
if operation == "move":
await delete_path(src)
await task_queue_service.update_progress(task.id, {
"stage": "completed",
"percent": 100.0,
"bytes_done": total_bytes,
"bytes_total": total_bytes,
"detail": "Completed",
})
await task_queue_service.update_meta(task.id, {
"files": len(files_to_transfer),
"directories": len({d for d in dirs_to_create if d}),
"bytes": total_bytes,
"operation": operation,
})
await LogService.action(
"virtual_fs",
f"Cross-adapter {operation} from {src} to {dst}",
details={
"src": src,
"dst": dst,
"operation": operation,
"files": len(files_to_transfer),
"bytes": total_bytes,
},
)
return {
"mode": "cross",
"operation": operation,
"src": src,
"dst": dst,
"files": len(files_to_transfer),
"bytes": total_bytes,
}
finally:
try:
if temp_dir.exists():
shutil.rmtree(temp_dir)
except Exception:
await LogService.info(
"virtual_fs",
"Failed to cleanup cross transfer temp dir",
details={"task_id": task.id, "temp_dir": str(temp_dir)},
)
async def process_file(
path: str,
processor_type: str,
config: dict,
save_to: str | None = None,
overwrite: bool = False,
) -> Any:
"""处理指定路径(文件或目录)。目录会递归处理其下所有文件。"""
processor = get_processor(processor_type)
if not processor:
raise HTTPException(400, detail=f"Processor {processor_type} not found")
actual_is_dir = await path_is_directory(path)
supported_exts = getattr(processor, "supported_exts", None) or []
allowed_exts = {
str(ext).lower().lstrip('.')
for ext in supported_exts
if isinstance(ext, str)
}
def matches_extension(rel_path: str) -> bool:
if not allowed_exts:
return True
if '.' not in rel_path:
return '' in allowed_exts
ext = rel_path.rsplit('.', 1)[-1].lower()
return ext in allowed_exts or f'.{ext}' in allowed_exts
def coerce_result_bytes(result: Any) -> bytes:
if isinstance(result, Response):
return result.body
if isinstance(result, (bytes, bytearray)):
return bytes(result)
if isinstance(result, str):
return result.encode('utf-8')
raise HTTPException(500, detail="Processor must return bytes/Response when produces_file=True")
def build_absolute_path(mount_path: str, rel_path: str) -> str:
rel_norm = rel_path.lstrip('/')
mount_norm = mount_path.rstrip('/')
if not mount_norm:
return '/' + rel_norm if rel_norm else '/'
return f"{mount_norm}/{rel_norm}" if rel_norm else mount_norm
if actual_is_dir:
if save_to:
raise HTTPException(400, detail="Directory processing does not support custom save_to path")
if not overwrite:
raise HTTPException(400, detail="Directory processing requires overwrite")
adapter_instance, adapter_model, root, rel = await resolve_adapter_and_rel(path)
rel = rel.rstrip('/')
list_dir = await _ensure_method(adapter_instance, "list_dir")
processed_count = 0
stack: List[str] = [rel]
page_size = 200
while stack:
current = stack.pop()
page = 1
while True:
entries, total = await list_dir(root, current, page, page_size, "name", "asc")
if not entries and (total or 0) == 0:
break
for entry in entries:
name = entry.get("name")
if not name:
continue
child_rel = f"{current}/{name}" if current else name
if entry.get("is_dir"):
stack.append(child_rel)
continue
if not matches_extension(child_rel):
continue
absolute_path = build_absolute_path(adapter_model.path, child_rel)
data = await read_file(absolute_path)
result = await processor.process(data, absolute_path, config)
if getattr(processor, "produces_file", False):
result_bytes = coerce_result_bytes(result)
await write_file(absolute_path, result_bytes)
processed_count += 1
if total is None or page * page_size >= total:
break
page += 1
return {"processed_files": processed_count}
# 单文件处理
data = await read_file(path)
result = await processor.process(data, path, config)
target_path = save_to
if overwrite and not target_path:
target_path = path
if target_path and getattr(processor, "produces_file", False):
result_bytes = coerce_result_bytes(result)
await write_file(target_path, result_bytes)
return {"saved_to": target_path}
return result
async def get_temp_link_secret_key() -> bytes:
"""Get the secret key for temporary links."""
return await ConfigCenter.get_secret_key(
"TEMP_LINK_SECRET_KEY", None
)
async def generate_temp_link_token(path: str, expires_in: int = 3600) -> str:
"""为文件路径生成一个有时效的令牌。expires_in <= 0 表示永久"""
if expires_in <= 0:
expiration_time = "0"
else:
expiration_time = str(int(time.time() + expires_in))
message = f"{path}:{expiration_time}".encode('utf-8')
secret_key = await get_temp_link_secret_key()
signature = hmac.new(secret_key, message, hashlib.sha256).digest()
token_data = f"{path}:{expiration_time}:{base64.urlsafe_b64encode(signature).decode('utf-8')}"
return base64.urlsafe_b64encode(token_data.encode('utf-8')).decode('utf-8')
async def verify_temp_link_token(token: str) -> str:
"""验证令牌并返回文件路径,如果无效或过期则抛出异常"""
try:
decoded_token = base64.urlsafe_b64decode(token).decode('utf-8')
path, expiration_time_str, signature_b64 = decoded_token.rsplit(':', 2)
signature = base64.urlsafe_b64decode(signature_b64)
except (ValueError, TypeError, base64.binascii.Error):
raise HTTPException(status_code=400, detail="Invalid token format")
if expiration_time_str != "0":
expiration_time = int(expiration_time_str)
if time.time() > expiration_time:
raise HTTPException(status_code=410, detail="Link has expired")
message = f"{path}:{expiration_time_str}".encode('utf-8')
secret_key = await get_temp_link_secret_key()
expected_signature = hmac.new(secret_key, message, hashlib.sha256).digest()
if not hmac.compare_digest(signature, expected_signature):
raise HTTPException(status_code=400, detail="Invalid signature")
return path