feat: add vector database index info query and display functionality

2026-06-03 06:29:56 +08:00 · 2025-09-27 14:01:59 +08:00
parent 7caa602d93
commit 0da64b8d9c
4 changed files with 225 additions and 4 deletions
--- a/services/virtual_fs.py
+++ b/services/virtual_fs.py
@@ -20,6 +20,7 @@ from services.processors.registry import get as get_processor
 from services.tasks import task_service
 from services.logging import LogService
 from services.config import ConfigCenter
+from services.vector_db import VectorDBService


 CROSS_TRANSFER_TEMP_ROOT = Path("data/tmp/cross_transfer")
@@ -508,12 +509,78 @@ async def stream_file(path: str, range_header: str | None):
    return Response(content=data, media_type=mime or "application/octet-stream")


+async def _gather_vector_index(full_path: str, limit: int = 20):
+    """查询与文件相关的索引信息。失败时返回 None。"""
+    vector_db = VectorDBService()
+    try:
+        raw_results = await vector_db.search_by_path("vector_collection", full_path, max(limit * 2, 20))
+    except Exception:
+        return None
+
+    matched = []
+    if raw_results:
+        buckets = raw_results if isinstance(raw_results, list) else [raw_results]
+        for bucket in buckets:
+            if not bucket:
+                continue
+            for record in bucket:
+                entity = dict((record or {}).get("entity") or {})
+                source_path = entity.get("source_path") or entity.get("path") or ""
+                if source_path != full_path:
+                    continue
+                entry = {
+                    "chunk_id": str(entity.get("chunk_id")) if entity.get("chunk_id") is not None else None,
+                    "type": entity.get("type"),
+                    "mime": entity.get("mime"),
+                    "name": entity.get("name"),
+                    "start_offset": entity.get("start_offset"),
+                    "end_offset": entity.get("end_offset"),
+                    "vector_id": entity.get("vector_id"),
+                }
+                text = entity.get("text") or entity.get("description")
+                if text:
+                    preview_limit = 400
+                    entry["preview"] = text[:preview_limit]
+                    entry["preview_truncated"] = len(text) > preview_limit
+                matched.append(entry)
+
+    if not matched:
+        return {"total": 0, "entries": [], "by_type": {}, "has_more": False}
+
+    type_counts: Dict[str, int] = {}
+    for item in matched:
+        key = item.get("type") or "unknown"
+        type_counts[key] = type_counts.get(key, 0) + 1
+
+    has_more = len(matched) > limit
+    return {
+        "total": len(matched),
+        "entries": matched[:limit],
+        "by_type": type_counts,
+        "has_more": has_more,
+        "limit": limit,
+    }
+
+
 async def stat_file(path: str):
    adapter_instance, _, root, rel = await resolve_adapter_and_rel(path)
    stat_func = getattr(adapter_instance, "stat_file", None)
    if not callable(stat_func):
        raise HTTPException(501, detail="Adapter does not implement stat_file")
-    return await stat_func(root, rel)
+    info = await stat_func(root, rel)
+
+    if isinstance(info, dict):
+        info.setdefault("path", path)
+        try:
+            is_dir = bool(info.get("is_dir"))
+        except Exception:
+            is_dir = False
+        if not is_dir:
+            vector_index = await _gather_vector_index(path)
+            if vector_index is not None:
+                info["vector_index"] = vector_index
+
+    return info


 async def copy_path(