Files
Foxel/domain/virtual_fs/service.py
2025-12-08 17:46:45 +08:00

1361 lines
53 KiB
Python

from __future__ import annotations
import base64
import hashlib
import hmac
import mimetypes
import re
import shutil
import time
from pathlib import Path
from typing import Any, AsyncIterator, Dict, List, Tuple, TYPE_CHECKING, Union
import aiofiles
from fastapi import HTTPException, UploadFile
from fastapi.responses import Response
from api.response import page
from domain.adapters.registry import runtime_registry
from domain.config.service import ConfigService
from models import StorageAdapter
from domain.tasks.service import TaskService
from domain.virtual_fs.thumbnail import (
get_or_create_thumb,
is_image_filename,
is_raw_filename,
is_video_filename,
)
from domain.ai.service import VectorDBService
if TYPE_CHECKING:
from domain.tasks.task_queue import Task
class VirtualFSService:
CROSS_TRANSFER_TEMP_ROOT = Path("data/tmp/cross_transfer")
DIRECT_REDIRECT_CONFIG_KEY = "enable_direct_download_307"
@staticmethod
def _normalize_path(path: str) -> str:
return path if path.startswith("/") else f"/{path}"
@staticmethod
def _build_absolute_path(mount_path: str, rel_path: str) -> str:
rel_norm = rel_path.lstrip("/")
mount_norm = mount_path.rstrip("/")
if not mount_norm:
return "/" + rel_norm if rel_norm else "/"
return f"{mount_norm}/{rel_norm}" if rel_norm else mount_norm
@staticmethod
def _join_rel(base: str, name: str) -> str:
if not base:
return name.lstrip("/")
if not name:
return base
return f"{base.rstrip('/')}/{name.lstrip('/')}"
@staticmethod
def _parent_rel(rel: str) -> str:
if not rel or "/" not in rel:
return ""
return rel.rsplit("/", 1)[0]
@staticmethod
async def _ensure_method(adapter: Any, method: str):
func = getattr(adapter, method, None)
if not callable(func):
raise HTTPException(501, detail=f"Adapter does not implement {method}")
return func
@classmethod
async def resolve_adapter_by_path(cls, path: str) -> Tuple[StorageAdapter, str]:
norm = cls._normalize_path(path)
adapters = await StorageAdapter.filter(enabled=True)
best = None
for adapter in adapters:
if norm == adapter.path or norm.startswith(adapter.path.rstrip("/") + "/"):
if best is None or len(adapter.path) > len(best.path):
best = adapter
if not best:
raise HTTPException(404, detail="No storage adapter for path")
rel = norm[len(best.path) :].lstrip("/")
return best, rel
@classmethod
async def resolve_adapter_and_rel(cls, path: str):
norm = cls._normalize_path(path)
adapter_model, rel = await cls.resolve_adapter_by_path(norm)
adapter_instance = runtime_registry.get(adapter_model.id)
if not adapter_instance:
await runtime_registry.refresh()
adapter_instance = runtime_registry.get(adapter_model.id)
if not adapter_instance:
raise HTTPException(
404, detail=f"Adapter instance for ID {adapter_model.id} not found or failed to load."
)
effective_root = adapter_instance.get_effective_root(adapter_model.sub_path)
return adapter_instance, adapter_model, effective_root, rel
@classmethod
async def maybe_redirect_download(cls, adapter_instance, adapter_model, root: str, rel: str):
if not rel or rel.endswith("/"):
return None
config = getattr(adapter_model, "config", {}) or {}
if not config.get(cls.DIRECT_REDIRECT_CONFIG_KEY):
return None
handler = getattr(adapter_instance, "get_direct_download_response", None)
if not callable(handler):
return None
try:
response = await handler(root, rel)
except FileNotFoundError:
raise
except Exception:
return None
if isinstance(response, Response):
return response
return None
@classmethod
async def path_is_directory(cls, path: str) -> bool:
adapter_instance, _, root, rel = await cls.resolve_adapter_and_rel(path)
rel = rel.rstrip("/")
if rel == "":
return True
stat_func = getattr(adapter_instance, "stat_file", None)
if not callable(stat_func):
raise HTTPException(501, detail="Adapter does not implement stat_file")
try:
info = await stat_func(root, rel)
except FileNotFoundError:
raise HTTPException(404, detail="Path not found")
if isinstance(info, dict):
return bool(info.get("is_dir"))
return False
@classmethod
async def list_virtual_dir(
cls,
path: str,
page_num: int = 1,
page_size: int = 50,
sort_by: str = "name",
sort_order: str = "asc",
) -> Dict:
norm = cls._normalize_path(path).rstrip("/") or "/"
adapters = await StorageAdapter.filter(enabled=True)
child_mount_entries: List[str] = []
norm_prefix = norm.rstrip("/")
for adapter in adapters:
if adapter.path == norm:
continue
if adapter.path.startswith(norm_prefix + "/"):
tail = adapter.path[len(norm_prefix) :].lstrip("/")
if "/" not in tail:
child_mount_entries.append(tail)
child_mount_entries = sorted(set(child_mount_entries))
sort_field = sort_by.lower()
reverse = sort_order.lower() == "desc"
def build_sort_key(item: Dict) -> Tuple:
key = (not bool(item.get("is_dir")),)
if sort_field == "name":
key += (str(item.get("name", "")).lower(),)
elif sort_field == "size":
key += (int(item.get("size", 0)),)
elif sort_field == "mtime":
key += (int(item.get("mtime", 0)),)
else:
key += (str(item.get("name", "")).lower(),)
return key
def annotate_entry(entry: Dict) -> None:
if not entry.get("is_dir"):
name = entry.get("name", "")
entry["has_thumbnail"] = bool(is_image_filename(name) or is_video_filename(name))
else:
entry["has_thumbnail"] = False
try:
adapter_model, rel = await cls.resolve_adapter_by_path(norm)
adapter_instance = runtime_registry.get(adapter_model.id)
if not adapter_instance:
await runtime_registry.refresh()
adapter_instance = runtime_registry.get(adapter_model.id)
if adapter_instance:
effective_root = adapter_instance.get_effective_root(adapter_model.sub_path)
else:
adapter_model = None
effective_root = ""
rel = ""
except HTTPException:
adapter_model = None
adapter_instance = None
effective_root = ""
rel = ""
adapter_entries_page: List[Dict] = []
adapter_entries_for_merge: List[Dict] = []
adapter_total = 0
covered = set()
if adapter_model and adapter_instance:
list_dir = await cls._ensure_method(adapter_instance, "list_dir")
try:
adapter_entries_page, adapter_total = await list_dir(
effective_root, rel, page_num, page_size, sort_by, sort_order
)
except NotADirectoryError:
raise HTTPException(400, detail="Not a directory")
adapter_entries_for_merge = adapter_entries_page
if child_mount_entries and adapter_total > len(adapter_entries_page):
full_page_size = adapter_total
if full_page_size > 0:
adapter_entries_for_merge, adapter_total = await list_dir(
effective_root, rel, 1, full_page_size, sort_by, sort_order
)
else:
adapter_entries_for_merge = adapter_entries_page
for item in adapter_entries_for_merge:
covered.add(item["name"])
mount_entries = []
for name in child_mount_entries:
if name not in covered:
mount_entries.append(
{"name": name, "is_dir": True, "size": 0, "mtime": 0, "type": "mount", "has_thumbnail": False}
)
if mount_entries:
for ent in adapter_entries_for_merge:
annotate_entry(ent)
combined_entries = adapter_entries_for_merge + [{**ent, "has_thumbnail": False} for ent in mount_entries]
combined_entries.sort(key=build_sort_key, reverse=reverse)
total_entries = len(combined_entries)
start_idx = (page_num - 1) * page_size
end_idx = start_idx + page_size
page_entries = combined_entries[start_idx:end_idx]
return page(page_entries, total_entries, page_num, page_size)
annotate_entry_list = adapter_entries_page or []
for ent in annotate_entry_list:
annotate_entry(ent)
return page(adapter_entries_page, adapter_total, page_num, page_size)
@classmethod
async def read_file(cls, path: str) -> Union[bytes, Any]:
adapter_instance, _, root, rel = await cls.resolve_adapter_and_rel(path)
if rel.endswith("/") or rel == "":
raise HTTPException(400, detail="Path is a directory")
read_func = await cls._ensure_method(adapter_instance, "read_file")
return await read_func(root, rel)
@classmethod
async def write_file(cls, path: str, data: bytes):
adapter_instance, _, root, rel = await cls.resolve_adapter_and_rel(path)
if rel.endswith("/"):
raise HTTPException(400, detail="Invalid file path")
write_func = await cls._ensure_method(adapter_instance, "write_file")
await write_func(root, rel, data)
await TaskService.trigger_tasks("file_written", path)
@classmethod
async def write_file_stream(cls, path: str, data_iter: AsyncIterator[bytes], overwrite: bool = True):
adapter_instance, _, root, rel = await cls.resolve_adapter_and_rel(path)
if rel.endswith("/"):
raise HTTPException(400, detail="Invalid file path")
exists_func = getattr(adapter_instance, "exists", None)
if not overwrite and callable(exists_func):
try:
if await exists_func(root, rel):
raise HTTPException(409, detail="Destination exists")
except HTTPException:
raise
except Exception:
pass
size = 0
stream_func = getattr(adapter_instance, "write_file_stream", None)
if callable(stream_func):
size = await stream_func(root, rel, data_iter)
else:
buf = bytearray()
async for chunk in data_iter:
if chunk:
buf.extend(chunk)
write_func = await cls._ensure_method(adapter_instance, "write_file")
await write_func(root, rel, bytes(buf))
size = len(buf)
await TaskService.trigger_tasks("file_written", path)
return size
@classmethod
async def make_dir(cls, path: str):
adapter_instance, _, root, rel = await cls.resolve_adapter_and_rel(path)
if not rel:
return
mkdir_func = await cls._ensure_method(adapter_instance, "mkdir")
await mkdir_func(root, rel)
@classmethod
async def delete_path(cls, path: str):
adapter_instance, _, root, rel = await cls.resolve_adapter_and_rel(path)
if not rel:
raise HTTPException(400, detail="Cannot delete root")
delete_func = await cls._ensure_method(adapter_instance, "delete")
await delete_func(root, rel)
await TaskService.trigger_tasks("file_deleted", path)
@classmethod
async def move_path(
cls, src: str, dst: str, overwrite: bool = False, return_debug: bool = True, allow_cross: bool = False
):
adapter_s, adapter_model_s, root_s, rel_s = await cls.resolve_adapter_and_rel(src)
adapter_d, adapter_model_d, root_d, rel_d = await cls.resolve_adapter_and_rel(dst)
debug_info = {
"src": src,
"dst": dst,
"rel_s": rel_s,
"rel_d": rel_d,
"root_s": root_s,
"root_d": root_d,
"overwrite": overwrite,
"operation": "move",
"queued": False,
}
if not rel_s:
raise HTTPException(400, detail="Cannot move or rename mount root")
if not rel_d:
raise HTTPException(400, detail="Invalid destination")
if adapter_model_s.id != adapter_model_d.id:
if not allow_cross:
raise HTTPException(400, detail="Cross-adapter move not supported")
queue_info = await cls._enqueue_cross_mount_transfer(
operation="move",
src=src,
dst=dst,
overwrite=overwrite,
)
debug_info.update(queue_info)
return debug_info if return_debug else None
exists_func = getattr(adapter_s, "exists", None)
stat_func = getattr(adapter_s, "stat_path", None)
delete_func = await cls._ensure_method(adapter_s, "delete")
move_func = await cls._ensure_method(adapter_s, "move")
dst_exists = False
dst_stat = None
if callable(exists_func):
dst_exists = await exists_func(root_d, rel_d)
if callable(stat_func):
dst_stat = await stat_func(root_d, rel_d)
debug_info["dst_exists"] = dst_exists
debug_info["dst_stat"] = dst_stat
if dst_exists and not overwrite:
kind = None
fs_path = None
if dst_stat:
kind = "dir" if dst_stat.get("is_dir") else "file"
fs_path = dst_stat.get("path")
raise HTTPException(
409,
detail=f"Destination already exists(kind={kind}, fs_path={fs_path}, rel_d={rel_d}, overwrite={overwrite})",
)
if dst_exists and overwrite:
try:
await delete_func(root_s, rel_d)
debug_info["pre_delete"] = "ok"
except Exception as exc:
debug_info["pre_delete"] = f"error:{exc}"
raise HTTPException(500, detail=f"Pre-delete failed before overwrite: {exc}")
if rel_s == rel_d:
debug_info["noop"] = True
return debug_info if return_debug else None
try:
await move_func(root_s, rel_s, rel_d)
debug_info["moved"] = True
except FileNotFoundError:
raise HTTPException(404, detail="Source not found")
except FileExistsError:
raise HTTPException(409, detail="Destination already exists (race condition after pre-check)")
except IsADirectoryError:
raise HTTPException(400, detail="Invalid directory operation")
except Exception as exc:
raise HTTPException(500, detail=f"Move failed: {exc}")
return debug_info if return_debug else None
@classmethod
async def rename_path(cls, src: str, dst: str, overwrite: bool = False, return_debug: bool = True):
adapter_s, adapter_model_s, root_s, rel_s = await cls.resolve_adapter_and_rel(src)
adapter_d, adapter_model_d, root_d, rel_d = await cls.resolve_adapter_and_rel(dst)
debug_info = {"src": src, "dst": dst, "rel_s": rel_s, "rel_d": rel_d, "root_s": root_s, "root_d": root_d, "overwrite": overwrite}
if adapter_model_s.id != adapter_model_d.id:
raise HTTPException(400, detail="Cross-adapter rename not supported")
if not rel_s:
raise HTTPException(400, detail="Cannot rename mount root")
if not rel_d:
raise HTTPException(400, detail="Invalid destination")
exists_func = getattr(adapter_s, "exists", None)
stat_func = getattr(adapter_s, "stat_path", None)
delete_func = await cls._ensure_method(adapter_s, "delete")
rename_func = await cls._ensure_method(adapter_s, "rename")
dst_exists = False
dst_stat = None
if callable(exists_func):
dst_exists = await exists_func(root_d, rel_d)
if callable(stat_func):
dst_stat = await stat_func(root_d, rel_d)
debug_info["dst_exists"] = dst_exists
debug_info["dst_stat"] = dst_stat
if dst_exists and not overwrite:
kind = None
fs_path = None
if dst_stat:
kind = "dir" if dst_stat.get("is_dir") else "file"
fs_path = dst_stat.get("path")
raise HTTPException(
409,
detail=f"Destination already exists(kind={kind}, fs_path={fs_path}, rel_d={rel_d}, overwrite={overwrite})",
)
if dst_exists and overwrite:
try:
await delete_func(root_s, rel_d)
debug_info["pre_delete"] = "ok"
except Exception as exc:
debug_info["pre_delete"] = f"error:{exc}"
raise HTTPException(500, detail=f"Pre-delete failed before overwrite: {exc}")
if rel_s == rel_d:
debug_info["noop"] = True
return debug_info if return_debug else None
try:
await rename_func(root_s, rel_s, rel_d)
debug_info["renamed"] = True
except FileNotFoundError:
raise HTTPException(404, detail="Source not found")
except FileExistsError:
raise HTTPException(409, detail="Destination already exists (race condition after pre-check)")
except IsADirectoryError:
raise HTTPException(400, detail="Invalid directory operation")
except Exception as exc:
raise HTTPException(500, detail=f"Rename failed: {exc}")
return debug_info if return_debug else None
@classmethod
async def stream_file(cls, path: str, range_header: str | None):
adapter_instance, adapter_model, root, rel = await cls.resolve_adapter_and_rel(path)
if not rel or rel.endswith("/"):
raise HTTPException(400, detail="Path is a directory")
if is_raw_filename(rel):
import io
import rawpy
from PIL import Image
try:
raw_data = await cls.read_file(path)
try:
with rawpy.imread(io.BytesIO(raw_data)) as raw:
try:
thumb = raw.extract_thumb()
except rawpy.LibRawNoThumbnailError:
thumb = None
if thumb is not None and thumb.format in [rawpy.ThumbFormat.JPEG, rawpy.ThumbFormat.BITMAP]:
im = Image.open(io.BytesIO(thumb.data))
else:
rgb = raw.postprocess(use_camera_wb=False, use_auto_wb=True, output_bps=8)
im = Image.fromarray(rgb)
except Exception as exc:
print(f"rawpy processing failed: {exc}")
raise exc
buf = io.BytesIO()
im.save(buf, "JPEG", quality=90)
content = buf.getvalue()
return Response(content=content, media_type="image/jpeg")
except Exception as exc:
raise HTTPException(500, detail=f"RAW file processing failed: {exc}")
redirect_response = await cls.maybe_redirect_download(adapter_instance, adapter_model, root, rel)
if redirect_response is not None:
return redirect_response
stream_impl = getattr(adapter_instance, "stream_file", None)
if callable(stream_impl):
return await stream_impl(root, rel, range_header)
data = await cls.read_file(path)
mime, _ = mimetypes.guess_type(rel)
return Response(content=data, media_type=mime or "application/octet-stream")
@classmethod
async def _gather_vector_index(cls, full_path: str, limit: int = 20):
vector_db = VectorDBService()
try:
raw_results = await vector_db.search_by_path("vector_collection", full_path, max(limit * 2, 20))
except Exception:
return None
matched = []
if raw_results:
buckets = raw_results if isinstance(raw_results, list) else [raw_results]
for bucket in buckets:
if not bucket:
continue
for record in bucket:
entity = dict((record or {}).get("entity") or {})
source_path = entity.get("source_path") or entity.get("path") or ""
if source_path != full_path:
continue
entry = {
"chunk_id": str(entity.get("chunk_id")) if entity.get("chunk_id") is not None else None,
"type": entity.get("type"),
"mime": entity.get("mime"),
"name": entity.get("name"),
"start_offset": entity.get("start_offset"),
"end_offset": entity.get("end_offset"),
"vector_id": entity.get("vector_id"),
}
text = entity.get("text") or entity.get("description")
if text:
preview_limit = 400
entry["preview"] = text[:preview_limit]
entry["preview_truncated"] = len(text) > preview_limit
matched.append(entry)
if not matched:
return {"total": 0, "entries": [], "by_type": {}, "has_more": False}
type_counts: Dict[str, int] = {}
for item in matched:
key = item.get("type") or "unknown"
type_counts[key] = type_counts.get(key, 0) + 1
has_more = len(matched) > limit
return {
"total": len(matched),
"entries": matched[:limit],
"by_type": type_counts,
"has_more": has_more,
"limit": limit,
}
@classmethod
async def stat_file(cls, path: str):
adapter_instance, _, root, rel = await cls.resolve_adapter_and_rel(path)
stat_func = getattr(adapter_instance, "stat_file", None)
if not callable(stat_func):
raise HTTPException(501, detail="Adapter does not implement stat_file")
info = await stat_func(root, rel)
if isinstance(info, dict):
info.setdefault("path", path)
try:
is_dir = bool(info.get("is_dir"))
except Exception:
is_dir = False
rel_name = rel.rstrip("/").split("/")[-1] if rel else path.rstrip("/").split("/")[-1]
name_hint = str(info.get("name") or rel_name or "")
info["has_thumbnail"] = bool(not is_dir and (is_image_filename(name_hint) or is_video_filename(name_hint)))
if not is_dir:
vector_index = await cls._gather_vector_index(path)
if vector_index is not None:
info["vector_index"] = vector_index
return info
@classmethod
async def copy_path(
cls, src: str, dst: str, overwrite: bool = False, return_debug: bool = True, allow_cross: bool = False
):
adapter_s, adapter_model_s, root_s, rel_s = await cls.resolve_adapter_and_rel(src)
adapter_d, adapter_model_d, root_d, rel_d = await cls.resolve_adapter_and_rel(dst)
debug_info = {
"src": src,
"dst": dst,
"rel_s": rel_s,
"rel_d": rel_d,
"root_s": root_s,
"root_d": root_d,
"overwrite": overwrite,
"operation": "copy",
"queued": False,
}
if not rel_s:
raise HTTPException(400, detail="Cannot copy mount root")
if not rel_d:
raise HTTPException(400, detail="Invalid destination")
if adapter_model_s.id != adapter_model_d.id:
if not allow_cross:
raise HTTPException(400, detail="Cross-adapter copy not supported")
queue_info = await cls._enqueue_cross_mount_transfer(
operation="copy",
src=src,
dst=dst,
overwrite=overwrite,
)
debug_info.update(queue_info)
return debug_info if return_debug else None
exists_func = getattr(adapter_s, "exists", None)
stat_func = getattr(adapter_s, "stat_path", None)
delete_func = getattr(adapter_s, "delete", None)
copy_func = await cls._ensure_method(adapter_s, "copy")
dst_exists = False
dst_stat = None
if callable(exists_func):
dst_exists = await exists_func(root_d, rel_d)
if callable(stat_func):
dst_stat = await stat_func(root_d, rel_d)
debug_info["dst_exists"] = dst_exists
debug_info["dst_stat"] = dst_stat
if dst_exists and not overwrite:
raise HTTPException(409, detail="Destination already exists")
if dst_exists and overwrite and callable(delete_func):
try:
await delete_func(root_s, rel_d)
debug_info["pre_delete"] = "ok"
except Exception as exc:
debug_info["pre_delete"] = f"error:{exc}"
raise HTTPException(500, detail=f"Pre-delete failed: {exc}")
if rel_s == rel_d:
debug_info["noop"] = True
return debug_info if return_debug else None
try:
await copy_func(root_s, rel_s, rel_d, overwrite=overwrite)
debug_info["copied"] = True
except FileNotFoundError:
raise HTTPException(404, detail="Source not found")
except FileExistsError:
raise HTTPException(409, detail="Destination already exists (race condition)")
except Exception as exc:
raise HTTPException(500, detail=f"Copy failed: {exc}")
return debug_info if return_debug else None
@classmethod
async def _enqueue_cross_mount_transfer(cls, operation: str, src: str, dst: str, overwrite: bool) -> Dict[str, Any]:
if operation not in {"move", "copy"}:
raise HTTPException(400, detail="Unsupported transfer operation")
adapter_s, adapter_model_s, _, _ = await cls.resolve_adapter_and_rel(src)
adapter_d, adapter_model_d, root_d, rel_d = await cls.resolve_adapter_and_rel(dst)
if adapter_model_s.id == adapter_model_d.id:
raise HTTPException(400, detail="Cross-adapter transfer requested but adapters are identical")
dst_exists = False
exists_func = getattr(adapter_d, "exists", None)
if callable(exists_func):
dst_exists = await exists_func(root_d, rel_d)
else:
try:
await cls.stat_file(dst)
dst_exists = True
except FileNotFoundError:
dst_exists = False
except HTTPException as exc:
if exc.status_code == 404:
dst_exists = False
else:
raise
if dst_exists and not overwrite:
raise HTTPException(409, detail="Destination already exists")
payload = {
"operation": operation,
"src": src,
"dst": dst,
"overwrite": overwrite,
}
from domain.tasks.task_queue import task_queue_service
task = await task_queue_service.add_task("cross_mount_transfer", payload)
return {
"queued": True,
"task_id": task.id,
"task_name": "cross_mount_transfer",
"dst_exists": dst_exists,
"cross_adapter": True,
}
@classmethod
async def run_cross_mount_transfer_task(cls, task: "Task") -> Dict[str, Any]:
from domain.tasks.task_queue import task_queue_service
params = task.task_info or {}
operation = params.get("operation")
src = params.get("src")
dst = params.get("dst")
overwrite = bool(params.get("overwrite", False))
if operation not in {"move", "copy"}:
raise ValueError(f"Unsupported cross mount operation: {operation}")
if not src or not dst:
raise ValueError("Missing src or dst for cross mount transfer")
adapter_s, adapter_model_s, root_s, rel_s = await cls.resolve_adapter_and_rel(src)
adapter_d, adapter_model_d, root_d, rel_d = await cls.resolve_adapter_and_rel(dst)
await task_queue_service.update_meta(task.id, {"operation": operation, "src": src, "dst": dst})
if adapter_model_s.id == adapter_model_d.id:
if operation == "move":
await cls.move_path(src, dst, overwrite=overwrite, return_debug=False, allow_cross=False)
else:
await cls.copy_path(src, dst, overwrite=overwrite, return_debug=False, allow_cross=False)
return {
"mode": "direct",
"operation": operation,
"src": src,
"dst": dst,
"files": 0,
"bytes": 0,
}
if not rel_s:
raise ValueError("Cannot transfer mount root")
if not rel_d:
raise ValueError("Invalid destination")
dst_exists = False
exists_func = getattr(adapter_d, "exists", None)
if callable(exists_func):
dst_exists = await exists_func(root_d, rel_d)
else:
try:
await cls.stat_file(dst)
dst_exists = True
except FileNotFoundError:
dst_exists = False
except HTTPException as exc:
if exc.status_code != 404:
raise
if dst_exists and not overwrite:
raise ValueError("Destination already exists")
if dst_exists and overwrite:
await cls.delete_path(dst)
try:
src_stat = await cls.stat_file(src)
except HTTPException as exc:
if exc.status_code == 404:
raise FileNotFoundError(src) from exc
raise
src_is_dir = bool(src_stat.get("is_dir"))
files_to_transfer: List[Dict[str, Any]] = []
dirs_to_create: List[str] = []
await task_queue_service.update_progress(
task.id,
{
"stage": "preparing",
"percent": 0.0,
"detail": "Collecting source entries",
},
)
if src_is_dir:
if rel_d:
dirs_to_create.append(rel_d)
list_dir = await cls._ensure_method(adapter_s, "list_dir")
stack: List[Tuple[str, str, str]] = [(rel_s, rel_d, "")]
page_size = 200
while stack:
current_rel, current_dst_rel, current_relative = stack.pop()
page = 1
while True:
entries, total = await list_dir(root_s, current_rel, page, page_size, "name", "asc")
if not entries and (total or 0) == 0:
break
for entry in entries:
name = entry.get("name")
if not name:
continue
child_rel = cls._join_rel(current_rel, name)
child_dst_rel = cls._join_rel(current_dst_rel, name)
child_relative = cls._join_rel(current_relative, name)
if entry.get("is_dir"):
dirs_to_create.append(child_dst_rel)
stack.append((child_rel, child_dst_rel, child_relative))
else:
files_to_transfer.append(
{
"src_rel": child_rel,
"dst_rel": child_dst_rel,
"relative_rel": child_relative or name,
"size": entry.get("size"),
"name": name,
}
)
if total is None or page * page_size >= (total or 0):
break
page += 1
else:
relative_rel = rel_s or (src_stat.get("name") or "file")
files_to_transfer.append(
{
"src_rel": rel_s,
"dst_rel": rel_d,
"relative_rel": relative_rel,
"size": src_stat.get("size"),
"name": src_stat.get("name") or rel_s.split("/")[-1],
}
)
parent_dir = cls._parent_rel(rel_d)
if parent_dir:
dirs_to_create.append(parent_dir)
cls.CROSS_TRANSFER_TEMP_ROOT.mkdir(parents=True, exist_ok=True)
temp_dir = cls.CROSS_TRANSFER_TEMP_ROOT / task.id
temp_dir.mkdir(parents=True, exist_ok=True)
bytes_downloaded = 0
total_dynamic_bytes = sum((f["size"] or 0) for f in files_to_transfer)
try:
for job in files_to_transfer:
src_abs = cls._build_absolute_path(adapter_model_s.path, job["src_rel"])
data = await cls.read_file(src_abs)
temp_path = temp_dir / job["relative_rel"]
temp_path.parent.mkdir(parents=True, exist_ok=True)
async with aiofiles.open(temp_path, "wb") as f:
await f.write(data)
actual_size = len(data)
job["temp_path"] = temp_path
prev_size = job.get("size") or 0
if prev_size <= 0:
total_dynamic_bytes += actual_size
job_size = actual_size
else:
job_size = prev_size
job["size"] = job_size
bytes_downloaded += actual_size
percent = None
total_for_percent = total_dynamic_bytes if total_dynamic_bytes else bytes_downloaded
if total_for_percent:
percent = min(100.0, round(bytes_downloaded / total_for_percent * 100, 2))
await task_queue_service.update_progress(
task.id,
{
"stage": "downloading",
"percent": percent,
"bytes_done": bytes_downloaded,
"bytes_total": total_dynamic_bytes or None,
"detail": f"Downloaded {job['name']}",
},
)
mkdir_func = await cls._ensure_method(adapter_d, "mkdir")
ensured_dirs: set[str] = set()
async def ensure_dir(rel_path: str):
if not rel_path or rel_path in ensured_dirs:
return
parent = cls._parent_rel(rel_path)
if parent:
await ensure_dir(parent)
try:
await mkdir_func(root_d, rel_path)
except FileExistsError:
pass
except HTTPException as exc:
if exc.status_code not in {409, 400}:
raise
except Exception:
pass
ensured_dirs.add(rel_path)
for dir_rel in sorted({d for d in dirs_to_create if d}, key=lambda x: x.count("/")):
await ensure_dir(dir_rel)
uploaded_bytes = 0
total_bytes = sum((f["size"] or 0) for f in files_to_transfer)
async def iter_temp_file(path: Path, chunk_size: int = 512 * 1024):
async with aiofiles.open(path, "rb") as f:
while True:
chunk = await f.read(chunk_size)
if not chunk:
break
yield chunk
for job in files_to_transfer:
parent_dir = cls._parent_rel(job["dst_rel"])
if parent_dir:
await ensure_dir(parent_dir)
dst_abs = cls._build_absolute_path(adapter_model_d.path, job["dst_rel"])
temp_path: Path = job["temp_path"]
await cls.write_file_stream(dst_abs, iter_temp_file(temp_path), overwrite=overwrite)
uploaded_bytes += job["size"] or 0
percent = None
if total_bytes:
percent = min(100.0, round(uploaded_bytes / total_bytes * 100, 2))
await task_queue_service.update_progress(
task.id,
{
"stage": "uploading",
"percent": percent,
"bytes_done": uploaded_bytes,
"bytes_total": total_bytes or None,
"detail": f"Uploaded {job['name']}",
},
)
if operation == "move":
await cls.delete_path(src)
await task_queue_service.update_progress(
task.id,
{
"stage": "completed",
"percent": 100.0,
"bytes_done": total_bytes,
"bytes_total": total_bytes,
"detail": "Completed",
},
)
await task_queue_service.update_meta(
task.id,
{
"files": len(files_to_transfer),
"directories": len({d for d in dirs_to_create if d}),
"bytes": total_bytes,
"operation": operation,
},
)
return {
"mode": "cross",
"operation": operation,
"src": src,
"dst": dst,
"files": len(files_to_transfer),
"bytes": total_bytes,
}
finally:
try:
if temp_dir.exists():
shutil.rmtree(temp_dir)
except Exception:
pass
@classmethod
async def process_file(
cls,
path: str,
processor_type: str,
config: dict,
save_to: str | None = None,
overwrite: bool = False,
) -> Any:
# Local import to avoid circular dependency on module load.
from domain.processors.service import get_processor
processor = get_processor(processor_type)
if not processor:
raise HTTPException(400, detail=f"Processor {processor_type} not found")
actual_is_dir = await cls.path_is_directory(path)
supported_exts = getattr(processor, "supported_exts", None) or []
allowed_exts = {str(ext).lower().lstrip(".") for ext in supported_exts if isinstance(ext, str)}
def matches_extension(rel_path: str) -> bool:
if not allowed_exts:
return True
if "." not in rel_path:
return "" in allowed_exts
ext = rel_path.rsplit(".", 1)[-1].lower()
return ext in allowed_exts or f".{ext}" in allowed_exts
def coerce_result_bytes(result: Any) -> bytes:
if isinstance(result, Response):
return result.body
if isinstance(result, (bytes, bytearray)):
return bytes(result)
if isinstance(result, str):
return result.encode("utf-8")
raise HTTPException(500, detail="Processor must return bytes/Response when produces_file=True")
def build_absolute_path(mount_path: str, rel_path: str) -> str:
rel_norm = rel_path.lstrip("/")
mount_norm = mount_path.rstrip("/")
if not mount_norm:
return "/" + rel_norm if rel_norm else "/"
return f"{mount_norm}/{rel_norm}" if rel_norm else mount_norm
if actual_is_dir:
if save_to:
raise HTTPException(400, detail="Directory processing does not support custom save_to path")
if not overwrite:
raise HTTPException(400, detail="Directory processing requires overwrite")
adapter_instance, adapter_model, root, rel = await cls.resolve_adapter_and_rel(path)
rel = rel.rstrip("/")
list_dir = await cls._ensure_method(adapter_instance, "list_dir")
processed_count = 0
stack: List[str] = [rel]
page_size = 200
while stack:
current = stack.pop()
page = 1
while True:
entries, total = await list_dir(root, current, page, page_size, "name", "asc")
if not entries and (total or 0) == 0:
break
for entry in entries:
name = entry.get("name")
if not name:
continue
child_rel = f"{current}/{name}" if current else name
if entry.get("is_dir"):
stack.append(child_rel)
continue
if not matches_extension(child_rel):
continue
absolute_path = build_absolute_path(adapter_model.path, child_rel)
data = await cls.read_file(absolute_path)
result = await processor.process(data, absolute_path, config)
if getattr(processor, "produces_file", False):
result_bytes = coerce_result_bytes(result)
await cls.write_file(absolute_path, result_bytes)
processed_count += 1
if total is None or page * page_size >= total:
break
page += 1
return {"processed_files": processed_count}
data = await cls.read_file(path)
result = await processor.process(data, path, config)
target_path = save_to
if overwrite and not target_path:
target_path = path
if target_path and getattr(processor, "produces_file", False):
result_bytes = coerce_result_bytes(result)
await cls.write_file(target_path, result_bytes)
return {"saved_to": target_path}
return result
@classmethod
async def get_temp_link_secret_key(cls) -> bytes:
return await ConfigService.get_secret_key("TEMP_LINK_SECRET_KEY", None)
@classmethod
async def generate_temp_link_token(cls, path: str, expires_in: int = 3600) -> str:
if expires_in <= 0:
expiration_time = "0"
else:
expiration_time = str(int(time.time() + expires_in))
message = f"{path}:{expiration_time}".encode("utf-8")
secret_key = await cls.get_temp_link_secret_key()
signature = hmac.new(secret_key, message, hashlib.sha256).digest()
token_data = f"{path}:{expiration_time}:{base64.urlsafe_b64encode(signature).decode('utf-8')}"
return base64.urlsafe_b64encode(token_data.encode("utf-8")).decode("utf-8")
@classmethod
async def verify_temp_link_token(cls, token: str) -> str:
try:
decoded_token = base64.urlsafe_b64decode(token).decode("utf-8")
path, expiration_time_str, signature_b64 = decoded_token.rsplit(":", 2)
signature = base64.urlsafe_b64decode(signature_b64)
except (ValueError, TypeError, base64.binascii.Error):
raise HTTPException(status_code=400, detail="Invalid token format")
if expiration_time_str != "0":
expiration_time = int(expiration_time_str)
if time.time() > expiration_time:
raise HTTPException(status_code=410, detail="Link has expired")
message = f"{path}:{expiration_time_str}".encode("utf-8")
secret_key = await cls.get_temp_link_secret_key()
expected_signature = hmac.new(secret_key, message, hashlib.sha256).digest()
if not hmac.compare_digest(signature, expected_signature):
raise HTTPException(status_code=400, detail="Invalid signature")
return path
# --- Route-friendly helpers ---
@classmethod
async def serve_file(cls, full_path: str, range_header: str | None) -> Response:
full_path = cls._normalize_path(full_path)
if is_raw_filename(full_path):
import io
import rawpy
from PIL import Image
try:
raw_data = await cls.read_file(full_path)
with rawpy.imread(io.BytesIO(raw_data)) as raw:
rgb = raw.postprocess(use_camera_wb=True, output_bps=8)
im = Image.fromarray(rgb)
buf = io.BytesIO()
im.save(buf, "JPEG", quality=90)
content = buf.getvalue()
return Response(content=content, media_type="image/jpeg")
except FileNotFoundError:
raise HTTPException(404, detail="File not found")
except Exception as exc:
raise HTTPException(500, detail=f"RAW file processing failed: {exc}")
adapter_instance, adapter_model, root, rel = await cls.resolve_adapter_and_rel(full_path)
redirect_response = await cls.maybe_redirect_download(adapter_instance, adapter_model, root, rel)
if redirect_response is not None:
return redirect_response
try:
content = await cls.read_file(full_path)
except FileNotFoundError:
raise HTTPException(404, detail="File not found")
if not isinstance(content, (bytes, bytearray)):
return Response(content=content, media_type="application/octet-stream")
content_length = len(content)
content_type = mimetypes.guess_type(full_path)[0] or "application/octet-stream"
if range_header:
range_match = re.match(r"bytes=(\d+)-(\d*)", range_header)
if range_match:
start = int(range_match.group(1))
end = int(range_match.group(2)) if range_match.group(2) else content_length - 1
start = max(0, min(start, content_length - 1))
end = max(start, min(end, content_length - 1))
chunk = content[start : end + 1]
chunk_size = len(chunk)
headers = {
"Content-Range": f"bytes {start}-{end}/{content_length}",
"Accept-Ranges": "bytes",
"Content-Length": str(chunk_size),
"Content-Type": content_type,
}
return Response(content=chunk, status_code=206, headers=headers)
headers = {
"Accept-Ranges": "bytes",
"Content-Length": str(content_length),
"Content-Type": content_type,
}
if content_type.startswith("video/"):
headers["Cache-Control"] = "public, max-age=3600"
return Response(content=content, headers=headers)
@classmethod
async def get_thumbnail(cls, full_path: str, w: int, h: int, fit: str) -> Response:
full_path = cls._normalize_path(full_path)
if fit not in ("cover", "contain"):
raise HTTPException(400, detail="fit must be cover|contain")
adapter, mount, root, rel = await cls.resolve_adapter_and_rel(full_path)
if not rel or rel.endswith("/"):
raise HTTPException(400, detail="Not a file")
if not (is_image_filename(rel) or is_video_filename(rel)):
raise HTTPException(404, detail="Not an image or video")
data, mime, key = await get_or_create_thumb(adapter, mount.id, root, rel, w, h, fit) # type: ignore
headers = {
"Cache-Control": "public, max-age=3600",
"ETag": key,
}
return Response(content=data, media_type=mime, headers=headers)
@classmethod
async def stream_response(cls, full_path: str, range_header: str | None):
full_path = cls._normalize_path(full_path)
try:
return await cls.stream_file(full_path, range_header)
except HTTPException:
raise
except FileNotFoundError:
raise HTTPException(404, detail="File not found")
except Exception as exc:
raise HTTPException(500, detail=f"Stream error: {exc}")
@classmethod
async def create_temp_link(cls, full_path: str, expires_in: int):
full_path = cls._normalize_path(full_path)
token = await cls.generate_temp_link_token(full_path, expires_in=expires_in)
file_domain = await ConfigService.get("FILE_DOMAIN")
if file_domain:
file_domain = file_domain.rstrip("/")
url = f"{file_domain}/api/fs/public/{token}"
else:
url = f"/api/fs/public/{token}"
return {"token": token, "path": full_path, "url": url}
@classmethod
async def access_public_file(cls, token: str, range_header: str | None):
try:
path = await cls.verify_temp_link_token(token)
except HTTPException as exc:
raise exc
try:
return await cls.stream_file(path, range_header)
except FileNotFoundError:
raise HTTPException(404, detail="File not found via token")
except Exception as exc:
raise HTTPException(500, detail=f"File access error: {exc}")
@classmethod
async def stat(cls, full_path: str):
full_path = cls._normalize_path(full_path)
return await cls.stat_file(full_path)
@classmethod
async def write_uploaded_file(cls, full_path: str, data: bytes):
full_path = cls._normalize_path(full_path)
await cls.write_file(full_path, data)
return {"written": True, "path": full_path, "size": len(data)}
@classmethod
async def mkdir(cls, path: str):
path = cls._normalize_path(path)
if not path or path == "/":
raise HTTPException(400, detail="Invalid path")
await cls.make_dir(path)
return {"created": True, "path": path}
@classmethod
async def move(cls, src: str, dst: str, overwrite: bool):
src = cls._normalize_path(src)
dst = cls._normalize_path(dst)
debug_info = await cls.move_path(src, dst, overwrite=overwrite, return_debug=True, allow_cross=True)
queued = bool(debug_info.get("queued"))
response = {
"moved": not queued,
"queued": queued,
"src": src,
"dst": dst,
"overwrite": overwrite,
}
if queued:
response["task_id"] = debug_info.get("task_id")
response["task_name"] = debug_info.get("task_name")
return response
@classmethod
async def rename(cls, src: str, dst: str, overwrite: bool):
src = cls._normalize_path(src)
dst = cls._normalize_path(dst)
await cls.rename_path(src, dst, overwrite=overwrite, return_debug=False)
return {"renamed": True, "src": src, "dst": dst, "overwrite": overwrite}
@classmethod
async def copy(cls, src: str, dst: str, overwrite: bool):
src = cls._normalize_path(src)
dst = cls._normalize_path(dst)
debug_info = await cls.copy_path(src, dst, overwrite=overwrite, return_debug=True, allow_cross=True)
queued = bool(debug_info.get("queued"))
response = {
"copied": not queued,
"queued": queued,
"src": src,
"dst": dst,
"overwrite": overwrite,
}
if queued:
response["task_id"] = debug_info.get("task_id")
response["task_name"] = debug_info.get("task_name")
return response
@classmethod
async def upload_stream_from_upload_file(cls, full_path: str, file: UploadFile, chunk_size: int, overwrite: bool):
full_path = cls._normalize_path(full_path)
if full_path.endswith("/"):
raise HTTPException(400, detail="Path must be a file")
adapter, _m, root, rel = await cls.resolve_adapter_and_rel(full_path)
exists_func = getattr(adapter, "exists", None)
if not overwrite and callable(exists_func):
try:
if await exists_func(root, rel):
raise HTTPException(409, detail="Destination exists")
except HTTPException:
raise
except Exception:
pass
async def gen():
while True:
chunk = await file.read(chunk_size)
if not chunk:
break
yield chunk
size = await cls.write_file_stream(full_path, gen(), overwrite=overwrite)
return {"uploaded": True, "path": full_path, "size": size, "overwrite": overwrite}
@classmethod
async def list_directory(cls, full_path: str, page_num: int, page_size: int, sort_by: str, sort_order: str):
full_path = cls._normalize_path(full_path)
result = await cls.list_virtual_dir(full_path, page_num, page_size, sort_by, sort_order)
return {
"path": full_path,
"entries": result["items"],
"pagination": {
"total": result["total"],
"page": result["page"],
"page_size": result["page_size"],
"pages": result["pages"],
},
}
@classmethod
async def delete(cls, full_path: str):
full_path = cls._normalize_path(full_path)
await cls.delete_path(full_path)
return {"deleted": True, "path": full_path}