From 2fa93a1eeb6994e054b8707fd85e04b723896f98 Mon Sep 17 00:00:00 2001
From: shiyu <im@shiyu.dev>
Date: Sat, 3 Jan 2026 15:12:20 +0800
Subject: [PATCH] feat: add vector and file collection constants, update vector
 index handling

---
 api/routers.py                             |  2 +-
 domain/ai/service.py                       |  2 ++
 domain/processors/builtin/vector_index.py  | 37 +++++++++++++---------
 domain/virtual_fs/listing.py               | 23 ++++++++++----
 domain/virtual_fs/search/search_api.py     |  6 ++--
 domain/virtual_fs/search/search_service.py | 17 ++++++----
 6 files changed, 55 insertions(+), 32 deletions(-)

diff --git a/api/routers.py b/api/routers.py
index e66e85c..5fa3d51 100644
--- a/api/routers.py
+++ b/api/routers.py
@@ -19,8 +19,8 @@ from domain.audit import router as audit
 
 def include_routers(app: FastAPI):
     app.include_router(adapters.router)
-    app.include_router(virtual_fs.router)
     app.include_router(search_api.router)
+    app.include_router(virtual_fs.router)
     app.include_router(auth.router)
     app.include_router(config.router)
     app.include_router(processors.router)
diff --git a/domain/ai/service.py b/domain/ai/service.py
index 5469cf8..9157894 100644
--- a/domain/ai/service.py
+++ b/domain/ai/service.py
@@ -19,6 +19,8 @@ from .vector_providers import (
 )
 
 DEFAULT_VECTOR_DIMENSION = 4096
+VECTOR_COLLECTION_NAME = "vector_collection"
+FILE_COLLECTION_NAME = "file_collection"
 
 OPENAI_EMBEDDING_DIMS = {
     "text-embedding-3-large": 3072,
diff --git a/domain/processors/builtin/vector_index.py b/domain/processors/builtin/vector_index.py
index e0044ad..f5ba2b1 100644
--- a/domain/processors/builtin/vector_index.py
+++ b/domain/processors/builtin/vector_index.py
@@ -9,7 +9,12 @@ from PIL import Image
 
 from ..base import BaseProcessor
 from domain.ai.inference import describe_image_base64, get_text_embedding, provider_service
-from domain.ai.service import VectorDBService, DEFAULT_VECTOR_DIMENSION
+from domain.ai.service import (
+    VectorDBService,
+    DEFAULT_VECTOR_DIMENSION,
+    VECTOR_COLLECTION_NAME,
+    FILE_COLLECTION_NAME,
+)
 
 
 CHUNK_SIZE = 800
@@ -112,18 +117,20 @@ class VectorIndexProcessor:
         action = config.get("action", "create")
         index_type = config.get("index_type", "vector")
         vector_db = VectorDBService()
-        collection_name = "vector_collection"
+        vector_collection = VECTOR_COLLECTION_NAME
+        file_collection = FILE_COLLECTION_NAME
 
         if action == "destroy":
-            await vector_db.delete_vector(collection_name, path)
+            target_collection = file_collection if index_type == "simple" else vector_collection
+            await vector_db.delete_vector(target_collection, path)
             return Response(content=f"文件 {path} 的 {index_type} 索引已销毁", media_type="text/plain")
 
         mime_type = _guess_mime(path)
 
         if index_type == "simple":
-            await vector_db.ensure_collection(collection_name, vector=False)
-            await vector_db.delete_vector(collection_name, path)
-            await vector_db.upsert_vector(collection_name, {
+            await vector_db.ensure_collection(file_collection, vector=False)
+            await vector_db.delete_vector(file_collection, path)
+            await vector_db.upsert_vector(file_collection, {
                 "path": path,
                 "source_path": path,
                 "chunk_id": "filename",
@@ -146,8 +153,8 @@ class VectorIndexProcessor:
             if vector_dim <= 0:
                 vector_dim = DEFAULT_VECTOR_DIMENSION
 
-        await vector_db.ensure_collection(collection_name, vector=True, dim=vector_dim)
-        await vector_db.delete_vector(collection_name, path)
+        await vector_db.ensure_collection(vector_collection, vector=True, dim=vector_dim)
+        await vector_db.delete_vector(vector_collection, path)
 
         if file_ext in ["jpg", "jpeg", "png", "bmp"]:
             processed_bytes, compression = _compress_image_for_embedding(input_bytes)
@@ -155,7 +162,7 @@ class VectorIndexProcessor:
             description = await describe_image_base64(base64_image)
             embedding = await get_text_embedding(description)
             image_mime = "image/jpeg" if compression else mime_type
-            await vector_db.upsert_vector(collection_name, {
+            await vector_db.upsert_vector(vector_collection, {
                 "path": _chunk_key(path, "image"),
                 "source_path": path,
                 "chunk_id": "image",
@@ -177,7 +184,7 @@ class VectorIndexProcessor:
 
             chunks = _chunk_text(text)
             if not chunks:
-                await vector_db.upsert_vector(collection_name, {
+                await vector_db.upsert_vector(vector_collection, {
                     "path": _chunk_key(path, "0"),
                     "source_path": path,
                     "chunk_id": "0",
@@ -194,7 +201,7 @@ class VectorIndexProcessor:
             chunk_count = 0
             for chunk_id, chunk_text, start, end in chunks:
                 embedding = await get_text_embedding(chunk_text)
-                await vector_db.upsert_vector(collection_name, {
+                await vector_db.upsert_vector(vector_collection, {
                     "path": _chunk_key(path, str(chunk_id)),
                     "source_path": path,
                     "chunk_id": str(chunk_id),
@@ -213,15 +220,15 @@ class VectorIndexProcessor:
             return Response(content="文本文件已索引", media_type="text/plain")
 
         # 其他类型暂未支持向量索引，回退为文件名索引
-        await vector_db.delete_vector(collection_name, path)
-        await vector_db.upsert_vector(collection_name, {
-            "path": _chunk_key(path, "fallback"),
+        await vector_db.ensure_collection(file_collection, vector=False)
+        await vector_db.delete_vector(file_collection, path)
+        await vector_db.upsert_vector(file_collection, {
+            "path": path,
             "source_path": path,
             "chunk_id": "filename",
             "mime": mime_type,
             "type": "filename",
             "name": os.path.basename(path),
-            "embedding": [0.0] * vector_dim,
         })
         return Response(content="暂不支持该类型的向量索引，已创建文件名索引", media_type="text/plain")
 
diff --git a/domain/virtual_fs/listing.py b/domain/virtual_fs/listing.py
index 4d3362e..f0b78eb 100644
--- a/domain/virtual_fs/listing.py
+++ b/domain/virtual_fs/listing.py
@@ -4,7 +4,7 @@ from fastapi import HTTPException
 
 from api.response import page
 from domain.adapters.registry import runtime_registry
-from domain.ai.service import VectorDBService
+from domain.ai.service import VectorDBService, VECTOR_COLLECTION_NAME, FILE_COLLECTION_NAME
 from domain.virtual_fs.thumbnail import is_image_filename, is_video_filename
 from models import StorageAdapter
 
@@ -161,13 +161,19 @@ class VirtualFSListingMixin(VirtualFSResolverMixin):
     @classmethod
     async def _gather_vector_index(cls, full_path: str, limit: int = 20):
         vector_db = VectorDBService()
-        try:
-            raw_results = await vector_db.search_by_path("vector_collection", full_path, max(limit * 2, 20))
-        except Exception:
-            return None
-
         matched = []
-        if raw_results:
+        had_success = False
+        fetch_limit = max(limit * 2, 20)
+        for collection_name in (VECTOR_COLLECTION_NAME, FILE_COLLECTION_NAME):
+            try:
+                raw_results = await vector_db.search_by_path(collection_name, full_path, fetch_limit)
+            except Exception:
+                continue
+
+            if not raw_results:
+                had_success = True
+                continue
+            had_success = True
             buckets = raw_results if isinstance(raw_results, list) else [raw_results]
             for bucket in buckets:
                 if not bucket:
@@ -193,6 +199,9 @@ class VirtualFSListingMixin(VirtualFSResolverMixin):
                         entry["preview_truncated"] = len(text) > preview_limit
                     matched.append(entry)
 
+        if not had_success:
+            return None
+
         if not matched:
             return {"total": 0, "entries": [], "by_type": {}, "has_more": False}
 
diff --git a/domain/virtual_fs/search/search_api.py b/domain/virtual_fs/search/search_api.py
index f5aa7e9..c53ae8b 100644
--- a/domain/virtual_fs/search/search_api.py
+++ b/domain/virtual_fs/search/search_api.py
@@ -1,5 +1,6 @@
 from fastapi import APIRouter, Depends, Query
 
+from api.response import success
 from domain.auth.service import get_current_active_user
 from domain.auth.types import User
 from domain.virtual_fs.search.search_service import VirtualFSSearchService
@@ -17,10 +18,11 @@ async def search_files(
     user: User = Depends(get_current_active_user),
 ):
     if not q.strip():
-        return {"items": [], "query": q}
+        return success({"items": [], "query": q, "mode": mode})
 
     top_k = max(top_k, 1)
     page = max(page, 1)
     page_size = max(min(page_size, 100), 1)
 
-    return await VirtualFSSearchService.search(q, top_k, mode, page, page_size)
+    data = await VirtualFSSearchService.search(q, top_k, mode, page, page_size)
+    return success(data)
diff --git a/domain/virtual_fs/search/search_service.py b/domain/virtual_fs/search/search_service.py
index 0e8fd47..53be215 100644
--- a/domain/virtual_fs/search/search_service.py
+++ b/domain/virtual_fs/search/search_service.py
@@ -2,7 +2,7 @@ from typing import Any, Dict, List, Tuple
 
 from domain.virtual_fs.types import SearchResultItem
 from domain.ai.inference import get_text_embedding
-from domain.ai.service import VectorDBService
+from domain.ai.service import VectorDBService, VECTOR_COLLECTION_NAME, FILE_COLLECTION_NAME
 
 
 def _normalize_result(raw: Dict[str, Any], source: str, fallback_score: float = 0.0) -> SearchResultItem:
@@ -53,7 +53,7 @@ async def _vector_search(query: str, top_k: int) -> List[SearchResultItem]:
         return []
 
     try:
-        raw_results = await vector_db.search_vectors("vector_collection", embedding, max(top_k, 10))
+        raw_results = await vector_db.search_vectors(VECTOR_COLLECTION_NAME, embedding, max(top_k, 10))
     except Exception:
         return []
 
@@ -68,12 +68,15 @@ async def _filename_search(query: str, page: int, page_size: int) -> Tuple[List[
     vector_db = VectorDBService()
     limit = max(page * page_size + 1, page_size * (page + 2))
     limit = min(limit, 2000)
-    try:
-        raw_results = await vector_db.search_by_path("vector_collection", query, limit)
-    except Exception:
-        return [], False
+    records: List[Dict[str, Any]] = []
+    for collection_name in (FILE_COLLECTION_NAME, VECTOR_COLLECTION_NAME):
+        try:
+            raw_results = await vector_db.search_by_path(collection_name, query, limit)
+        except Exception:
+            continue
+        if raw_results:
+            records.extend(raw_results[0] or [])
 
-    records = raw_results[0] if raw_results else []
     deduped: List[SearchResultItem] = []
     seen_paths: set[str] = set()
     for record in records or []: