feat: Enhance vector database providers with source path handling and improved search functionality

2026-05-06 18:22:44 +08:00 · 2025-09-27 13:34:18 +08:00
parent ee6e570ccb
commit a4af9475ef
10 changed files with 1082 additions and 353 deletions
--- a/api/routes/search.py
+++ b/api/routes/search.py
@@ -1,4 +1,7 @@
+from typing import Any, Dict, List, Tuple
+
 from fastapi import APIRouter, Depends, Query
+
 from schemas.fs import SearchResultItem
 from services.auth import get_current_active_user, User
 from services.ai import get_text_embedding
@@ -6,24 +9,96 @@ from services.vector_db import VectorDBService

 router = APIRouter(prefix="/api/search", tags=["search"])

-async def search_files_by_vector(q: str, top_k: int):
-    embedding = await get_text_embedding(q)
-    vector_db = VectorDBService()
-    results = await vector_db.search_vectors("vector_collection", embedding, top_k)
-    items = [
-        SearchResultItem(id=res["id"], path=res["entity"]["path"], score=res["distance"])
-        for res in results[0]
-    ]
-    return {"items": items, "query": q}

-async def search_files_by_name(q: str, top_k: int):
+def _normalize_result(raw: Dict[str, Any], source: str, fallback_score: float = 0.0) -> SearchResultItem:
+    entity = dict(raw.get("entity") or {})
+    source_path = entity.get("source_path")
+    stored_path = entity.get("path")
+    path = source_path or stored_path or ""
+    chunk_id_value = entity.get("chunk_id")
+    chunk_id = str(chunk_id_value) if chunk_id_value is not None else None
+    snippet = entity.get("text") or entity.get("description") or entity.get("name")
+    mime = entity.get("mime")
+    start_offset = entity.get("start_offset")
+    end_offset = entity.get("end_offset")
+    raw_score = raw.get("distance")
+    score = float(raw_score) if raw_score is not None else fallback_score
+
+    metadata = {
+        "retrieval_source": source,
+        "raw_distance": raw_score,
+    }
+    if stored_path and stored_path != path:
+        metadata["stored_path"] = stored_path
+    vector_id = entity.get("vector_id")
+    if vector_id:
+        metadata["vector_id"] = vector_id
+
+    return SearchResultItem(
+        id=str(raw.get("id")),
+        path=path,
+        score=score,
+        chunk_id=chunk_id,
+        snippet=snippet,
+        mime=mime,
+        source_type=entity.get("type") or source,
+        start_offset=start_offset,
+        end_offset=end_offset,
+        metadata=metadata,
+    )
+
+
+async def _vector_search(query: str, top_k: int) -> List[SearchResultItem]:
    vector_db = VectorDBService()
-    results = await vector_db.search_by_path("vector_collection", q, top_k)
-    items = [
-        SearchResultItem(id=idx, path=res["entity"]["path"], score=res["distance"])
-        for idx, res in enumerate(results[0])
-    ]
-    return {"items": items, "query": q}
+    try:
+        embedding = await get_text_embedding(query)
+    except Exception:
+        embedding = None
+    if not embedding:
+        return []
+
+    try:
+        raw_results = await vector_db.search_vectors("vector_collection", embedding, max(top_k, 10))
+    except Exception:
+        return []
+
+    results: List[SearchResultItem] = []
+    for bucket in raw_results or []:
+        for record in bucket or []:
+            results.append(_normalize_result(record, "vector"))
+    return results
+
+
+async def _filename_search(query: str, page: int, page_size: int) -> Tuple[List[SearchResultItem], bool]:
+    vector_db = VectorDBService()
+    limit = max(page * page_size + 1, page_size * (page + 2))
+    limit = min(limit, 2000)
+    try:
+        raw_results = await vector_db.search_by_path("vector_collection", query, limit)
+    except Exception:
+        return [], False
+
+    records = raw_results[0] if raw_results else []
+    deduped: List[SearchResultItem] = []
+    seen_paths: set[str] = set()
+    for record in records or []:
+        item = _normalize_result(record, "filename", fallback_score=1.0)
+        stored_path = item.metadata.get("stored_path") if item.metadata else None
+        key = item.path or stored_path or ""
+        if key in seen_paths:
+            continue
+        seen_paths.add(key)
+        deduped.append(item)
+
+    start = max(page - 1, 0) * page_size
+    end = start + page_size
+    page_items = deduped[start:end]
+    for offset, item in enumerate(page_items):
+        if item.metadata is None:
+            item.metadata = {}
+        item.metadata.setdefault("retrieval_rank", start + offset)
+    has_more = len(deduped) > end
+    return page_items, has_more


@router.get("")
@@ -31,11 +106,32 @@ async def search_files(
    q: str = Query(..., description="搜索查询"),
    top_k: int = Query(10, description="返回结果数量"),
    mode: str = Query("vector", description="搜索模式: 'vector' 或 'filename'"),
+    page: int = Query(1, description="分页页码，仅在文件名搜索模式下生效"),
+    page_size: int = Query(10, description="分页大小，仅在文件名搜索模式下生效"),
    user: User = Depends(get_current_active_user),
 ):
+    if not q.strip():
+        return {"items": [], "query": q}
+
+    top_k = max(top_k, 1)
+    page = max(page, 1)
+    page_size = max(min(page_size, 100), 1)
+
    if mode == "vector":
-        return await search_files_by_vector(q, top_k)
+        items = (await _vector_search(q, top_k))[:top_k]
    elif mode == "filename":
-        return await search_files_by_name(q, top_k)
+        items, has_more = await _filename_search(q, page, page_size)
+        return {
+            "items": items,
+            "query": q,
+            "mode": mode,
+            "pagination": {
+                "page": page,
+                "page_size": page_size,
+                "has_more": has_more,
+            },
+        }
    else:
-        return {"items": [], "query": q, "error": "Invalid search mode"}
+        items = (await _vector_search(q, top_k))[:top_k]
+
+    return {"items": items, "query": q, "mode": mode}
--- a/schemas/fs.py
+++ b/schemas/fs.py
@@ -21,6 +21,13 @@ class SearchResultItem(BaseModel):
    id: int | str
    path: str
    score: float
+    chunk_id: Optional[str] = None
+    snippet: Optional[str] = None
+    mime: Optional[str] = None
+    source_type: Optional[str] = None
+    start_offset: Optional[int] = None
+    end_offset: Optional[int] = None
+    metadata: Optional[dict] = None


 class MkdirRequest(BaseModel):
--- a/services/ai.py
+++ b/services/ai.py
@@ -68,3 +68,46 @@ async def get_text_embedding(text: str) -> List[float]:
        resp.raise_for_status()
        result = resp.json()
        return result["data"][0]["embedding"]
+
+
+async def rerank_texts(query: str, documents: List[str]) -> List[float]:
+    """调用重排序模型，为一组文档返回得分。未配置时返回空列表。"""
+    if not documents:
+        return []
+
+    api_url = await ConfigCenter.get("AI_RERANK_API_URL")
+    model = await ConfigCenter.get("AI_RERANK_MODEL")
+    api_key = await ConfigCenter.get("AI_RERANK_API_KEY")
+
+    if not api_url or not model or not api_key:
+        return []
+
+    payload = {
+        "model": model,
+        "query": query,
+        "documents": documents,
+    }
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json",
+    }
+
+    async with httpx.AsyncClient() as client:
+        try:
+            resp = await client.post(api_url, headers=headers, json=payload)
+            resp.raise_for_status()
+        except httpx.HTTPStatusError:
+            return []
+        data = resp.json()
+        if isinstance(data, dict):
+            results = data.get("results")
+            if isinstance(results, list):
+                scores = []
+                for item in results:
+                    if isinstance(item, dict) and "score" in item:
+                        try:
+                            scores.append(float(item["score"]))
+                        except (TypeError, ValueError):
+                            scores.append(0.0)
+                return scores
+        return []
--- a/services/processors/vector_index.py
+++ b/services/processors/vector_index.py
@@ -1,11 +1,95 @@
-from typing import Dict, Any
+from typing import Dict, Any, List, Tuple
 from fastapi.responses import Response
 import base64
+import mimetypes
+import os
+from io import BytesIO
+
 from services.ai import describe_image_base64, get_text_embedding
 from services.vector_db import VectorDBService, DEFAULT_VECTOR_DIMENSION
 from services.logging import LogService
 from services.config import ConfigCenter

+try:  # Pillow is optional but bundled with the project dependencies
+    from PIL import Image
+except ImportError:  # pragma: no cover - fallback when pillow missing
+    Image = None
+
+
+CHUNK_SIZE = 800
+CHUNK_OVERLAP = 200
+MAX_IMAGE_EDGE = 1600
+JPEG_QUALITY = 85
+
+
+def _chunk_text(content: str, chunk_size: int = CHUNK_SIZE, overlap: int = CHUNK_OVERLAP) -> List[Tuple[int, str, int, int]]:
+    """按固定窗口拆分文本，返回(chunk_id, chunk_text, start, end)。"""
+    if chunk_size <= 0:
+        chunk_size = CHUNK_SIZE
+    if overlap >= chunk_size:
+        overlap = max(chunk_size // 4, 1)
+
+    chunks: List[Tuple[int, str, int, int]] = []
+    step = chunk_size - overlap
+    idx = 0
+    start = 0
+    length = len(content)
+
+    while start < length:
+        end = min(length, start + chunk_size)
+        chunk = content[start:end].strip()
+        if chunk:
+            chunks.append((idx, chunk, start, end))
+            idx += 1
+        if end >= length:
+            break
+        start += step
+    return chunks
+
+
+def _guess_mime(path: str) -> str:
+    mime, _ = mimetypes.guess_type(path)
+    return mime or "application/octet-stream"
+
+
+def _chunk_key(path: str, chunk_id: str) -> str:
+    return f"{path}#chunk={chunk_id}"
+
+
+def _compress_image_for_embedding(input_bytes: bytes) -> Tuple[bytes, Dict[str, Any] | None]:
+    """压缩图片，降低发送到视觉模型的体积。"""
+    if Image is None:
+        return input_bytes, None
+
+    try:
+        with Image.open(BytesIO(input_bytes)) as img:
+            img = img.convert("RGB")
+            width, height = img.size
+            longest_edge = max(width, height)
+            scale = 1.0
+            if longest_edge > MAX_IMAGE_EDGE:
+                scale = MAX_IMAGE_EDGE / float(longest_edge)
+                new_size = (max(int(width * scale), 1), max(int(height * scale), 1))
+                resample_mode = getattr(getattr(Image, "Resampling", Image), "LANCZOS")
+                img = img.resize(new_size, resample=resample_mode)
+
+            buffer = BytesIO()
+            img.save(buffer, format="JPEG", quality=JPEG_QUALITY, optimize=True)
+            compressed = buffer.getvalue()
+
+            if len(compressed) < len(input_bytes):
+                return compressed, {
+                    "original_bytes": len(input_bytes),
+                    "compressed_bytes": len(compressed),
+                    "scaled": scale < 1.0,
+                    "width": img.width,
+                    "height": img.height,
+                }
+    except Exception:  # pragma: no cover - 任意图像处理异常时回退
+        return input_bytes, None
+
+    return input_bytes, None
+

 class VectorIndexProcessor:
    name = "向量索引"
@@ -33,6 +117,7 @@ class VectorIndexProcessor:
        index_type = config.get("index_type", "vector")
        vector_db = VectorDBService()
        collection_name = "vector_collection"
+
        if action == "destroy":
            await vector_db.delete_vector(collection_name, path)
            await LogService.info(
@@ -42,9 +127,19 @@ class VectorIndexProcessor:
            )
            return Response(content=f"文件 {path} 的 {index_type} 索引已销毁", media_type="text/plain")

-        if index_type == 'simple':
+        mime_type = _guess_mime(path)
+
+        if index_type == "simple":
            await vector_db.ensure_collection(collection_name, vector=False)
-            await vector_db.upsert_vector(collection_name, {'path': path})
+            await vector_db.delete_vector(collection_name, path)
+            await vector_db.upsert_vector(collection_name, {
+                "path": path,
+                "source_path": path,
+                "chunk_id": "filename",
+                "mime": mime_type,
+                "type": "filename",
+                "name": os.path.basename(path),
+            })
            await LogService.info(
                "processor:vector_index",
                f"Created simple index for {path}",
@@ -53,24 +148,7 @@ class VectorIndexProcessor:
            return Response(content=f"文件 {path} 的普通索引已创建", media_type="text/plain")

        file_ext = path.split('.')[-1].lower()
-        description = ""
-        embedding = None
-
-        if file_ext in ["jpg", "jpeg", "png", "bmp"]:
-            base64_image = base64.b64encode(input_bytes).decode("utf-8")
-            description = await describe_image_base64(base64_image)
-            embedding = await get_text_embedding(description)
-            log_message = f"Indexed image {path}"
-            response_message = f"图片已索引，描述：{description}"
-        elif file_ext in ["txt", "md"]:
-            text = input_bytes.decode("utf-8")
-            embedding = await get_text_embedding(text)
-            description = text[:100] + "..." if len(text) > 100 else text
-            log_message = f"Indexed text file {path}"
-            response_message = f"文本文件已索引"
-        
-        if embedding is None:
-            return Response(content="不支持的文件类型", status_code=400)
+        details: Dict[str, Any] = {"path": path, "action": "create", "index_type": "vector"}

        raw_dim = await ConfigCenter.get('AI_EMBED_DIM', DEFAULT_VECTOR_DIMENSION)
        try:
@@ -81,15 +159,103 @@ class VectorIndexProcessor:
            vector_dim = DEFAULT_VECTOR_DIMENSION

        await vector_db.ensure_collection(collection_name, vector=True, dim=vector_dim)
-        await vector_db.upsert_vector(
-            collection_name, {'path': path, 'embedding': embedding})
-        
+        await vector_db.delete_vector(collection_name, path)
+
+        if file_ext in ["jpg", "jpeg", "png", "bmp"]:
+            processed_bytes, compression = _compress_image_for_embedding(input_bytes)
+            base64_image = base64.b64encode(processed_bytes).decode("utf-8")
+            description = await describe_image_base64(base64_image)
+            embedding = await get_text_embedding(description)
+            image_mime = "image/jpeg" if compression else mime_type
+            await vector_db.upsert_vector(collection_name, {
+                "path": _chunk_key(path, "image"),
+                "source_path": path,
+                "chunk_id": "image",
+                "embedding": embedding,
+                "text": description,
+                "mime": image_mime,
+                "type": "image",
+            })
+            details["description"] = description
+            if compression:
+                details["image_compression"] = compression
+            await LogService.info(
+                "processor:vector_index",
+                f"Indexed image {path}",
+                details=details,
+            )
+            return Response(content=f"图片已索引，描述：{description}", media_type="text/plain")
+
+        if file_ext in ["txt", "md"]:
+            try:
+                text = input_bytes.decode("utf-8")
+            except UnicodeDecodeError:
+                return Response(content="文本文件解码失败", status_code=400)
+
+            chunks = _chunk_text(text)
+            if not chunks:
+                await vector_db.upsert_vector(collection_name, {
+                    "path": _chunk_key(path, "0"),
+                    "source_path": path,
+                    "chunk_id": "0",
+                    "embedding": await get_text_embedding(text or path),
+                    "text": text,
+                    "mime": mime_type,
+                    "type": "text",
+                    "start_offset": 0,
+                    "end_offset": len(text),
+                })
+                details["chunks"] = 1
+                await LogService.info(
+                    "processor:vector_index",
+                    f"Indexed text file {path}",
+                    details=details,
+                )
+                return Response(content="文本文件已索引", media_type="text/plain")
+
+            chunk_count = 0
+            for chunk_id, chunk_text, start, end in chunks:
+                embedding = await get_text_embedding(chunk_text)
+                await vector_db.upsert_vector(collection_name, {
+                    "path": _chunk_key(path, str(chunk_id)),
+                    "source_path": path,
+                    "chunk_id": str(chunk_id),
+                    "embedding": embedding,
+                    "text": chunk_text,
+                    "mime": mime_type,
+                    "type": "text",
+                    "start_offset": start,
+                    "end_offset": end,
+                })
+                chunk_count += 1
+
+            details["chunks"] = chunk_count
+            sample = chunks[0][1]
+            details["sample"] = sample[:120]
+            await LogService.info(
+                "processor:vector_index",
+                f"Indexed text file {path}",
+                details=details,
+            )
+            return Response(content="文本文件已索引", media_type="text/plain")
+
+        # 其他类型暂未支持向量索引，回退为文件名索引
+        await vector_db.delete_vector(collection_name, path)
+        await vector_db.upsert_vector(collection_name, {
+            "path": _chunk_key(path, "fallback"),
+            "source_path": path,
+            "chunk_id": "filename",
+            "mime": mime_type,
+            "type": "filename",
+            "name": os.path.basename(path),
+            "embedding": [0.0] * vector_dim,
+        })
        await LogService.info(
            "processor:vector_index",
-            log_message,
-            details={"path": path, "description": description, "action": "create", "index_type": "vector"},
+            f"File type fallback to simple index for {path}",
+            details={"path": path, "action": "create", "index_type": "simple", "original_type": file_ext},
        )
-        return Response(content=response_message, media_type="text/plain")
+        return Response(content="暂不支持该类型的向量索引，已创建文件名索引", media_type="text/plain")


 PROCESSOR_TYPE = "vector_index"
--- a/services/vector_db/providers/milvus_lite.py
+++ b/services/vector_db/providers/milvus_lite.py
@@ -50,15 +50,20 @@ class MilvusLiteProvider(BaseVectorProvider):
        client = self._get_client()
        if client.has_collection(collection_name):
            return
+        common_fields = [
+            FieldSchema(name="path", dtype=DataType.VARCHAR, max_length=512, is_primary=True, auto_id=False),
+            FieldSchema(name="source_path", dtype=DataType.VARCHAR, max_length=512, is_primary=False, auto_id=False),
+        ]
+
        if vector:
            vector_dim = dim if isinstance(dim, int) and dim > 0 else 0
            if vector_dim <= 0:
                vector_dim = 4096
            fields = [
-                FieldSchema(name="path", dtype=DataType.VARCHAR, max_length=512, is_primary=True, auto_id=False),
+                *common_fields,
                FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=vector_dim),
            ]
-            schema = CollectionSchema(fields, description="Image vector collection")
+            schema = CollectionSchema(fields, description="Vector collection", enable_dynamic_field=True)
            client.create_collection(collection_name, schema=schema)
            index_params = MilvusClient.prepare_index_params()
            index_params.add_index(
@@ -70,38 +75,98 @@ class MilvusLiteProvider(BaseVectorProvider):
            )
            client.create_index(collection_name, index_params=index_params)
        else:
-            fields = [
-                FieldSchema(name="path", dtype=DataType.VARCHAR, max_length=512, is_primary=True, auto_id=False),
-            ]
-            schema = CollectionSchema(fields, description="Simple file index")
+            schema = CollectionSchema(common_fields, description="Simple file index", enable_dynamic_field=True)
            client.create_collection(collection_name, schema=schema)

    def upsert_vector(self, collection_name: str, data: Dict[str, Any]) -> None:
-        self._get_client().upsert(collection_name, data)
+        payload = dict(data)
+        payload.setdefault("source_path", payload.get("path"))
+        payload.setdefault("vector_id", payload.get("path"))
+        self._get_client().upsert(collection_name, data=[payload])

    def delete_vector(self, collection_name: str, path: str) -> None:
-        self._get_client().delete(collection_name, ids=[path])
+        client = self._get_client()
+        escaped = path.replace('"', '\\"')
+        client.delete(collection_name, filter=f'source_path == "{escaped}"')

    def search_vectors(self, collection_name: str, query_embedding, top_k: int):
        search_params = {"metric_type": "COSINE"}
-        return self._get_client().search(
+        output_fields = [
+            "path",
+            "source_path",
+            "chunk_id",
+            "mime",
+            "text",
+            "start_offset",
+            "end_offset",
+            "type",
+            "name",
+        ]
+        raw_results = self._get_client().search(
            collection_name,
            data=[query_embedding],
            anns_field="embedding",
            search_params=search_params,
            limit=top_k,
-            output_fields=["path"],
+            output_fields=output_fields,
        )
+        formatted: List[List[Dict[str, Any]]] = []
+        for hits in raw_results:
+            bucket: List[Dict[str, Any]] = []
+            for hit in hits:
+                if hasattr(hit, "entity"):
+                    entity = dict(getattr(hit, "entity", {}) or {})
+                    hit_id = getattr(hit, "id", None)
+                    distance = getattr(hit, "distance", None)
+                elif isinstance(hit, dict):
+                    entity = dict((hit.get("entity") or {}))
+                    hit_id = hit.get("id")
+                    distance = hit.get("distance")
+                else:
+                    entity = {}
+                    hit_id = None
+                    distance = None
+                entity.setdefault("path", entity.get("source_path"))
+                bucket.append({
+                    "id": hit_id,
+                    "distance": distance,
+                    "entity": entity,
+                })
+            formatted.append(bucket)
+        return formatted

    def search_by_path(self, collection_name: str, query_path: str, top_k: int):
-        filter_expr = f"path like '%{query_path}%'" if query_path else "path like '%%'"
+        if query_path:
+            escaped = query_path.replace('"', '\\"')
+            filter_expr = f'source_path like "%{escaped}%"'
+        else:
+            filter_expr = "source_path like '%%'"
        results = self._get_client().query(
            collection_name,
            filter=filter_expr,
            limit=top_k,
-            output_fields=["path"],
+            output_fields=[
+                "path",
+                "source_path",
+                "chunk_id",
+                "mime",
+                "text",
+                "start_offset",
+                "end_offset",
+                "type",
+                "name",
+            ],
        )
-        return [[{"id": r["path"], "distance": 1.0, "entity": {"path": r["path"]}} for r in results]]
+        formatted = []
+        for row in results:
+            entity = dict(row)
+            entity.setdefault("path", entity.get("source_path"))
+            formatted.append({
+                "id": entity.get("path"),
+                "distance": 1.0,
+                "entity": entity,
+            })
+        return [formatted]

    def get_all_stats(self) -> Dict[str, Any]:
        client = self._get_client()
--- a/services/vector_db/providers/milvus_server.py
+++ b/services/vector_db/providers/milvus_server.py
@@ -58,15 +58,19 @@ class MilvusServerProvider(BaseVectorProvider):
        client = self._get_client()
        if client.has_collection(collection_name):
            return
+        common_fields = [
+            FieldSchema(name="path", dtype=DataType.VARCHAR, max_length=512, is_primary=True, auto_id=False),
+            FieldSchema(name="source_path", dtype=DataType.VARCHAR, max_length=512, is_primary=False, auto_id=False),
+        ]
        if vector:
            vector_dim = dim if isinstance(dim, int) and dim > 0 else 0
            if vector_dim <= 0:
                vector_dim = 4096
            fields = [
-                FieldSchema(name="path", dtype=DataType.VARCHAR, max_length=512, is_primary=True, auto_id=False),
+                *common_fields,
                FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=vector_dim),
            ]
-            schema = CollectionSchema(fields, description="Image vector collection")
+            schema = CollectionSchema(fields, description="Vector collection", enable_dynamic_field=True)
            client.create_collection(collection_name, schema=schema)
            index_params = MilvusClient.prepare_index_params()
            index_params.add_index(
@@ -78,38 +82,98 @@ class MilvusServerProvider(BaseVectorProvider):
            )
            client.create_index(collection_name, index_params=index_params)
        else:
-            fields = [
-                FieldSchema(name="path", dtype=DataType.VARCHAR, max_length=512, is_primary=True, auto_id=False),
-            ]
-            schema = CollectionSchema(fields, description="Simple file index")
+            schema = CollectionSchema(common_fields, description="Simple file index", enable_dynamic_field=True)
            client.create_collection(collection_name, schema=schema)

    def upsert_vector(self, collection_name: str, data: Dict[str, Any]) -> None:
-        self._get_client().upsert(collection_name, data)
+        payload = dict(data)
+        payload.setdefault("source_path", payload.get("path"))
+        payload.setdefault("vector_id", payload.get("path"))
+        self._get_client().upsert(collection_name, data=[payload])

    def delete_vector(self, collection_name: str, path: str) -> None:
-        self._get_client().delete(collection_name, ids=[path])
+        client = self._get_client()
+        escaped = path.replace('"', '\\"')
+        client.delete(collection_name, filter=f'source_path == "{escaped}"')

    def search_vectors(self, collection_name: str, query_embedding, top_k: int):
        search_params = {"metric_type": "COSINE"}
-        return self._get_client().search(
+        output_fields = [
+            "path",
+            "source_path",
+            "chunk_id",
+            "mime",
+            "text",
+            "start_offset",
+            "end_offset",
+            "type",
+            "name",
+        ]
+        raw_results = self._get_client().search(
            collection_name,
            data=[query_embedding],
            anns_field="embedding",
            search_params=search_params,
            limit=top_k,
-            output_fields=["path"],
+            output_fields=output_fields,
        )
+        formatted: List[List[Dict[str, Any]]] = []
+        for hits in raw_results:
+            bucket: List[Dict[str, Any]] = []
+            for hit in hits:
+                if hasattr(hit, "entity"):
+                    entity = dict(getattr(hit, "entity", {}) or {})
+                    hit_id = getattr(hit, "id", None)
+                    distance = getattr(hit, "distance", None)
+                elif isinstance(hit, dict):
+                    entity = dict((hit.get("entity") or {}))
+                    hit_id = hit.get("id")
+                    distance = hit.get("distance")
+                else:
+                    entity = {}
+                    hit_id = None
+                    distance = None
+                entity.setdefault("path", entity.get("source_path"))
+                bucket.append({
+                    "id": hit_id,
+                    "distance": distance,
+                    "entity": entity,
+                })
+            formatted.append(bucket)
+        return formatted

    def search_by_path(self, collection_name: str, query_path: str, top_k: int):
-        filter_expr = f"path like '%{query_path}%'" if query_path else "path like '%%'"
+        if query_path:
+            escaped = query_path.replace('"', '\\"')
+            filter_expr = f'source_path like "%{escaped}%"'
+        else:
+            filter_expr = "source_path like '%%'"
        results = self._get_client().query(
            collection_name,
            filter=filter_expr,
            limit=top_k,
-            output_fields=["path"],
+            output_fields=[
+                "path",
+                "source_path",
+                "chunk_id",
+                "mime",
+                "text",
+                "start_offset",
+                "end_offset",
+                "type",
+                "name",
+            ],
        )
-        return [[{"id": r["path"], "distance": 1.0, "entity": {"path": r["path"]}} for r in results]]
+        formatted = []
+        for row in results:
+            entity = dict(row)
+            entity.setdefault("path", entity.get("source_path"))
+            formatted.append({
+                "id": entity.get("path"),
+                "distance": 1.0,
+                "entity": entity,
+            })
+        return [formatted]

    def get_all_stats(self) -> Dict[str, Any]:
        client = self._get_client()
--- a/services/vector_db/providers/qdrant.py
+++ b/services/vector_db/providers/qdrant.py
@@ -58,29 +58,59 @@ class QdrantProvider(BaseVectorProvider):
        size = dim if vector and isinstance(dim, int) and dim > 0 else 1
        return qmodels.VectorParams(size=size, distance=qmodels.Distance.COSINE)

+    def _ensure_payload_indexes(self, client: QdrantClient, collection_name: str) -> None:
+        for field in ("path", "source_path"):
+            try:
+                client.create_payload_index(
+                    collection_name=collection_name,
+                    field_name=field,
+                    field_schema="keyword",
+                )
+            except Exception as exc:  # pragma: no cover - 依赖外部服务
+                message = str(exc).lower()
+                if "already exists" in message or "index exists" in message:
+                    continue
+                # 旧版本 qdrant 可能返回带状态码的异常，这里容忍重复创建
+                raise
+
    def ensure_collection(self, collection_name: str, vector: bool, dim: int) -> None:
        client = self._get_client()
        try:
-            if client.collection_exists(collection_name):
-                return
+            exists = client.collection_exists(collection_name)
        except Exception as exc:  # pragma: no cover - 依赖外部服务
            raise RuntimeError(f"Failed to check Qdrant collection '{collection_name}': {exc}") from exc

+        if exists:
+            try:
+                self._ensure_payload_indexes(client, collection_name)
+            except Exception:
+                pass
+            return
+
        vectors_config = self._vector_params(vector, dim)
        try:
            client.create_collection(collection_name=collection_name, vectors_config=vectors_config)
        except Exception as exc:  # pragma: no cover
            if "already exists" in str(exc).lower():
+                try:
+                    self._ensure_payload_indexes(client, collection_name)
+                except Exception:
+                    pass
                return
            raise RuntimeError(f"Failed to create Qdrant collection '{collection_name}': {exc}") from exc

+        try:
+            self._ensure_payload_indexes(client, collection_name)
+        except Exception:
+            pass
+
    @staticmethod
-    def _point_id(path: str) -> str:
-        return str(uuid5(NAMESPACE_URL, path))
+    def _point_id(uid: str) -> str:
+        return str(uuid5(NAMESPACE_URL, uid))

    def _prepare_point(self, data: Dict[str, Any]) -> qmodels.PointStruct:
-        path = data.get("path")
-        if not path:
+        uid = data.get("path")
+        if not uid:
            raise ValueError("Qdrant upsert requires 'path' in data")

        embedding = data.get("embedding")
@@ -89,8 +119,11 @@ class QdrantProvider(BaseVectorProvider):
        else:
            vector = [float(x) for x in embedding]

-        payload = {"path": path}
-        return qmodels.PointStruct(id=self._point_id(path), vector=vector, payload=payload)
+        payload = {k: v for k, v in data.items() if k != "embedding"}
+        payload.setdefault("vector_id", uid)
+        source_path = payload.get("source_path") or payload.get("path")
+        payload["path"] = source_path
+        return qmodels.PointStruct(id=self._point_id(str(uid)), vector=vector, payload=payload)

    def upsert_vector(self, collection_name: str, data: Dict[str, Any]) -> None:
        client = self._get_client()
@@ -99,7 +132,12 @@ class QdrantProvider(BaseVectorProvider):

    def delete_vector(self, collection_name: str, path: str) -> None:
        client = self._get_client()
-        selector = qmodels.PointIdsList(points=[self._point_id(path)])
+        condition = qmodels.FieldCondition(
+            key="path",
+            match=qmodels.MatchValue(value=path),
+        )
+        flt = qmodels.Filter(must=[condition])
+        selector = qmodels.FilterSelector(filter=flt)
        client.delete(collection_name=collection_name, points_selector=selector, wait=True)

    def _format_search_results(self, points: Sequence[qmodels.ScoredPoint]):
@@ -107,7 +145,7 @@ class QdrantProvider(BaseVectorProvider):
            {
                "id": point.id,
                "distance": point.score,
-                "entity": {"path": (point.payload or {}).get("path")},
+                "entity": point.payload or {},
            }
            for point in points
        ]
@@ -141,11 +179,11 @@ class QdrantProvider(BaseVectorProvider):
                break

            for record in records:
-                path = (record.payload or {}).get("path")
-                if query_path and path:
-                    if query_path not in path:
-                        continue
-                results.append({"id": record.id, "distance": 1.0, "entity": {"path": path}})
+                payload = record.payload or {}
+                path = payload.get("path")
+                if query_path and path and query_path not in path:
+                    continue
+                results.append({"id": record.id, "distance": 1.0, "entity": payload})
                if len(results) >= top_k:
                    break

--- a/web/src/api/vfs.ts
+++ b/web/src/api/vfs.ts
@@ -21,9 +21,29 @@ export interface DirListing {
 }

 export interface SearchResultItem {
-  id: number;
+  id: string;
  path: string;
  score: number;
+  chunk_id?: string;
+  snippet?: string;
+  mime?: string;
+  source_type?: string;
+  start_offset?: number;
+  end_offset?: number;
+  metadata?: Record<string, any>;
+}
+
+export interface SearchPagination {
+  page: number;
+  page_size: number;
+  has_more: boolean;
+}
+
+export interface SearchResponse {
+  items: SearchResultItem[];
+  query: string;
+  mode?: string;
+  pagination?: SearchPagination;
 }

 export const vfsApi = {
@@ -105,6 +125,20 @@ export const vfsApi = {
      xhr.send(fd);
    });
  },
-  searchFiles: (q: string, top_k: number = 10, mode: 'vector' | 'filename' = 'vector') =>
-    request<{ items: SearchResultItem[]; query: string }>(`/search?q=${encodeURIComponent(q)}&top_k=${top_k}&mode=${mode}`),
+  searchFiles: (
+    q: string,
+    top_k: number = 10,
+    mode: 'vector' | 'filename' = 'vector',
+    page?: number,
+    page_size?: number,
+  ) => {
+    const params = new URLSearchParams({
+      q,
+      top_k: String(top_k),
+      mode,
+    });
+    if (page !== undefined) params.set('page', String(page));
+    if (page_size !== undefined) params.set('page_size', String(page_size));
+    return request<SearchResponse>(`/search?${params.toString()}`);
+  },
 };
--- a/web/src/layout/SearchDialog.tsx
+++ b/web/src/layout/SearchDialog.tsx
@@ -1,128 +1,313 @@
-import { Modal, Input, List, Divider, Spin, Select, Space } from 'antd';
+import { Modal, Input, List, Divider, Spin, Space, Tag, Typography, Empty, Flex, Segmented, Pagination } from 'antd';
 import { SearchOutlined, FileTextOutlined } from '@ant-design/icons';
-import React, { useState } from 'react';
+import React, { useRef, useState } from 'react';
 import { vfsApi, type SearchResultItem } from '../api/vfs';
 import { useI18n } from '../i18n';
 import { useNavigate } from 'react-router';

-
 interface SearchDialogProps {
  open: boolean;
  onClose: () => void;
 }

-const SEARCH_MODES = (t: (k: string)=>string) => [
-  { label: t('Smart Search'), value: 'vector' },
-  { label: t('Name Search'), value: 'filename' },
-];
+type SearchMode = 'vector' | 'filename';
+const PAGE_SIZE = 10;

 const SearchDialog: React.FC<SearchDialogProps> = ({ open, onClose }) => {
  const [search, setSearch] = useState('');
  const [loading, setLoading] = useState(false);
  const [results, setResults] = useState<SearchResultItem[]>([]);
  const [searched, setSearched] = useState(false);
-  const [searchMode, setSearchMode] = useState<'vector' | 'filename'>('vector');
+  const [searchMode, setSearchMode] = useState<SearchMode>('vector');
+  const [page, setPage] = useState(1);
+  const [hasMore, setHasMore] = useState(false);
+  const requestIdRef = useRef(0);
  const { t } = useI18n();
  const navigate = useNavigate();

-  const handleSearch = async () => {
-    if (!search.trim()) return;
+  const renderSourceLabel = (value?: string) => {
+    switch ((value || '').toLowerCase()) {
+      case 'vector':
+        return t('Vector Search');
+      case 'filename':
+        return t('Name Search');
+      case 'text':
+        return t('Text Chunk');
+      case 'image':
+        return t('Image Description');
+      default:
+        return t('Vector Search');
+    }
+  };
+
+  const sourceColor = (value?: string) => {
+    switch ((value || '').toLowerCase()) {
+      case 'vector':
+        return 'blue';
+      case 'filename':
+        return 'green';
+      case 'image':
+        return 'volcano';
+      case 'text':
+        return 'geekblue';
+      default:
+        return 'purple';
+    }
+  };
+
+  const performSearch = async (options?: { page?: number; mode?: SearchMode }) => {
+    const query = search.trim();
+    if (!query) {
+      setSearched(false);
+      setResults([]);
+      setHasMore(false);
+      return;
+    }
+
+    const currentMode = options?.mode ?? searchMode;
+    const targetPage = currentMode === 'filename' ? (options?.page ?? (currentMode === searchMode ? page : 1)) : 1;
+
+    const requestId = requestIdRef.current + 1;
+    requestIdRef.current = requestId;
+
    setLoading(true);
    setSearched(true);
-    try {
-      const res = await vfsApi.searchFiles(search, 10, searchMode);
-      setResults(res.items);
-    } catch (e) {
-      setResults([]);
+    if (currentMode === 'filename') {
+      setPage(targetPage);
+    } else {
+      setPage(1);
+      setHasMore(false);
+    }
+
+    try {
+      const res = await vfsApi.searchFiles(
+        query,
+        currentMode === 'filename' ? PAGE_SIZE : 10,
+        currentMode,
+        currentMode === 'filename' ? targetPage : undefined,
+        currentMode === 'filename' ? PAGE_SIZE : undefined,
+      );
+      if (requestId !== requestIdRef.current) {
+        return;
+      }
+      setResults(res.items);
+      if (currentMode === 'filename') {
+        const pagination = res.pagination;
+        setHasMore(Boolean(pagination?.has_more));
+        if (pagination?.page) {
+          setPage(pagination.page);
+        }
+      } else {
+        setHasMore(false);
+      }
+    } catch (e) {
+      if (requestId !== requestIdRef.current) {
+        return;
+      }
+      setResults([]);
+      if (currentMode === 'filename') {
+        setHasMore(false);
+      }
+    } finally {
+      if (requestId === requestIdRef.current) {
+        setLoading(false);
+      }
    }
-    setLoading(false);
  };

+  const handleSearch = () => {
+    if (!search.trim()) {
+      setResults([]);
+      setSearched(false);
+      setHasMore(false);
+      setPage(1);
+      return;
+    }
+    void performSearch({ page: searchMode === 'filename' ? 1 : undefined });
+  };
+
+  const handleModeChange = (value: string | number) => {
+    const nextMode = value as SearchMode;
+    setHasMore(false);
+    setPage(1);
+    setSearchMode(nextMode);
+    if (search.trim()) {
+      void performSearch({ mode: nextMode, page: nextMode === 'filename' ? 1 : undefined });
+    } else {
+      setResults([]);
+      setSearched(false);
+    }
+  };
+
+  const handleClose = () => {
+    setSearch('');
+    setResults([]);
+    setSearched(false);
+    setSearchMode('vector');
+    setPage(1);
+    setHasMore(false);
+    requestIdRef.current = 0;
+    setLoading(false);
+    onClose();
+  };
+
+  const totalItems = searchMode === 'filename'
+    ? (hasMore ? page * PAGE_SIZE + 1 : (page - 1) * PAGE_SIZE + results.length)
+    : results.length;
+
  return (
    <Modal
      open={open}
-      onCancel={onClose}
+      onCancel={handleClose}
      footer={null}
-      width={600}
+      width={720}
      centered
      title={null}
      closable={false}
+      styles={{
+        body: {
+          padding: '12px 16px 16px',
+          maxHeight: '70vh',
+          overflow: 'hidden',
+          display: 'flex',
+          flexDirection: 'column',
+          gap: 12,
+        },
+      }}
    >
-      <Space.Compact style={{ marginBottom: 0, width: '100%' }}>
-        <Select
-          options={SEARCH_MODES(t)}
-          value={searchMode}
-          onChange={v => setSearchMode(v as 'vector' | 'filename')}
-          style={{
-            width: 120,
-            fontSize: 18,
-            height: 40,
-            lineHeight: '40px',
-            borderTopRightRadius: 0,
-            borderBottomRightRadius: 0,
-            borderRight: 0,
-            verticalAlign: 'top',
-          }}
-          styles={{ popup: { root: { fontSize: 18 } } }}
-          popupMatchSelectWidth={false}
-        />
-        <Input
-          allowClear
-          prefix={<SearchOutlined />}
-          placeholder={t('Search files / tags / types')}
-          value={search}
-          onChange={e => setSearch(e.target.value)}
-          style={{
-            fontSize: 18,
-            height: 40,
-            width: 'calc(100% - 120px)',
-            borderTopLeftRadius: 0,
-            borderBottomLeftRadius: 0,
-            verticalAlign: 'top',
-          }}
-          autoFocus
-          onPressEnter={handleSearch}
-        />
-      </Space.Compact>
-      {searched && (
-        <>
-          <Divider style={{ margin: '12px 0' }}>{t('Search Results')}</Divider>
-          {loading ? (
-            <Spin />
-          ) : (
-            <List
-              itemLayout="horizontal"
-              dataSource={results}
-              locale={{ emptyText: t('No files found') }}
-              renderItem={item => {
-                const fullPath = item.path || '';
-                const trimmed = fullPath.replace(/\/+$/, '');
-                const parts = trimmed.split('/');
-                const filename = parts.pop() || '';
-                const dir = parts.length ? '/' + parts.join('/') : '/';
-                return (
-                  <List.Item>
-                    <List.Item.Meta
-                      avatar={<FileTextOutlined />}
-                      title={
-                        <a
-                          onClick={() => {
-                            navigate(`/files${dir === '/' ? '' : dir}`, { state: { highlight: { name: filename } } });
-                            onClose();
-                          }}
-                        >
-                          {fullPath}
-                        </a>
-                      }
-                      description={`${t('Relevance')}: ${item.score.toFixed(2)}`}
-                    />
-                  </List.Item>
-                );
-              }}
-            />
-          )}
-        </>
-      )}
+      <Flex vertical style={{ gap: 12, flex: 1, minHeight: 0 }}>
+        <Flex align="center" style={{ width: '100%', gap: 12, flexWrap: 'wrap' }}>
+          <Segmented
+            options={[
+              { label: t('Smart Search'), value: 'vector' },
+              { label: t('Name Search'), value: 'filename' },
+            ]}
+            value={searchMode}
+            onChange={handleModeChange}
+            style={{
+              minWidth: 160,
+              height: 40,
+              borderRadius: 20,
+              display: 'flex',
+              alignItems: 'center',
+            }}
+            size="large"
+          />
+          <Input
+            allowClear
+            prefix={<SearchOutlined />}
+            placeholder={t('Search files / tags / types')}
+            value={search}
+            onChange={e => {
+              const value = e.target.value;
+              setSearch(value);
+              if (!value.trim()) {
+                setResults([]);
+                setSearched(false);
+                setHasMore(false);
+                setPage(1);
+                requestIdRef.current += 1;
+                setLoading(false);
+              }
+            }}
+            style={{ fontSize: 18, height: 40, flex: 1, minWidth: 240 }}
+            styles={{
+              input: {
+                borderRadius: 20,
+              },
+            }}
+            autoFocus
+            onPressEnter={handleSearch}
+          />
+        </Flex>
+
+        {!searched ? null : (
+          <Flex vertical style={{ flex: 1, minHeight: 0 }}>
+            <Divider style={{ margin: 0, padding: '0 0 12px' }}>{t('Search Results')}</Divider>
+            {loading ? (
+              <Flex align="center" justify="center" style={{ flex: 1 }}>
+                <Spin />
+              </Flex>
+            ) : results.length === 0 ? (
+              <Flex align="center" justify="center" style={{ flex: 1 }}>
+                <Empty description={t('No files found')} image={Empty.PRESENTED_IMAGE_SIMPLE} />
+              </Flex>
+            ) : (
+              <div style={{ flex: 1, minHeight: 0, display: 'flex', flexDirection: 'column' }}>
+                <div style={{ flex: 1, minHeight: 0, overflowY: 'auto', paddingRight: 6 }}>
+                  <List
+                    itemLayout="horizontal"
+                    dataSource={results}
+                    split={false}
+                    renderItem={item => {
+                      const fullPath = item.path || '';
+                      const trimmed = fullPath.replace(/\/+$/, '');
+                      const parts = trimmed.split('/');
+                      const filename = parts.pop() || '';
+                      const dir = parts.length ? '/' + parts.join('/') : '/';
+                    const snippet = item.snippet || '';
+                    const retrieval = item.metadata?.retrieval_source || item.source_type;
+                    const retrievalLabel = renderSourceLabel(retrieval);
+                    const scoreText = Number.isFinite(item.score) ? item.score.toFixed(2) : '-';
+
+                      return (
+                        <List.Item style={{ padding: '10px 12px', borderRadius: 6, background: '#fafafa', marginBottom: 8 }}>
+                          <List.Item.Meta
+                            avatar={<FileTextOutlined style={{ fontSize: 18, color: '#8c8c8c' }} />}
+                            title={
+                              <a
+                                onClick={() => {
+                                  navigate(`/files${dir === '/' ? '' : dir}`, { state: { highlight: { name: filename } } });
+                                  handleClose();
+                                }}
+                                style={{ fontSize: 16 }}
+                              >
+                                {fullPath}
+                              </a>
+                            }
+                            description={(
+                              <Space direction="vertical" size={6} style={{ width: '100%' }}>
+                                {snippet ? (
+                                  <Typography.Paragraph ellipsis={{ rows: 3 }} style={{ marginBottom: 0 }}>
+                                    {snippet}
+                                  </Typography.Paragraph>
+                                ) : null}
+                                <Space size={10} wrap>
+                                  {retrieval ? (
+                                    <Tag color={sourceColor(retrieval)} style={{ marginRight: 0 }}>
+                                      {retrievalLabel}
+                                    </Tag>
+                                  ) : null}
+                                  <Typography.Text type="secondary">
+                                    {t('Relevance')}: {scoreText}
+                                  </Typography.Text>
+                                </Space>
+                              </Space>
+                            )}
+                          />
+                        </List.Item>
+                      );
+                    }}
+                  />
+                </div>
+                {searchMode === 'filename' && results.length > 0 ? (
+                  <Pagination
+                    current={page}
+                    pageSize={PAGE_SIZE}
+                    total={Math.max(totalItems, 1)}
+                    showSizeChanger={false}
+                    size="small"
+                    style={{ marginTop: 12, textAlign: 'right' }}
+                    onChange={(nextPage) => {
+                      void performSearch({ page: nextPage });
+                    }}
+                  />
+                ) : null}
+              </div>
+            )}
+          </Flex>
+        )}
+      </Flex>
    </Modal>
  );
 };
--- a/web/src/pages/SystemSettingsPage/SystemSettingsPage.tsx
+++ b/web/src/pages/SystemSettingsPage/SystemSettingsPage.tsx
@@ -8,14 +8,23 @@ import { useTheme } from '../../contexts/ThemeContext';
 import '../../styles/settings-tabs.css';
 import { useI18n } from '../../i18n';

-const APP_CONFIG_KEYS: {key: string, label: string, default?: string}[] = [
+const APP_CONFIG_KEYS: { key: string, label: string, default?: string }[] = [
  { key: 'APP_NAME', label: 'App Name' },
  { key: 'APP_LOGO', label: 'Logo URL' },
  { key: 'APP_DOMAIN', label: 'App Domain' },
  { key: 'FILE_DOMAIN', label: 'File Domain' },
 ];

-const VISION_CONFIG_KEYS = [
+interface AiConfigKeyBase {
+  key: string;
+  default?: string | number;
+}
+
+interface AiConfigKeyWithLabel extends AiConfigKeyBase {
+  label: string;
+}
+
+const VISION_CONFIG_KEYS: AiConfigKeyWithLabel[] = [
  { key: 'AI_VISION_API_URL', label: 'Vision API URL' },
  { key: 'AI_VISION_MODEL', label: 'Vision Model', default: 'Qwen/Qwen2.5-VL-32B-Instruct' },
  { key: 'AI_VISION_API_KEY', label: 'Vision API Key' },
@@ -24,13 +33,24 @@ const VISION_CONFIG_KEYS = [
 const DEFAULT_EMBED_DIMENSION = 4096;
 const EMBED_DIM_KEY = 'AI_EMBED_DIM';

-const EMBED_CONFIG_KEYS = [
+const EMBED_CONFIG_KEYS: AiConfigKeyWithLabel[] = [
  { key: 'AI_EMBED_API_URL', label: 'Embedding API URL' },
  { key: 'AI_EMBED_MODEL', label: 'Embedding Model', default: 'Qwen/Qwen3-Embedding-8B' },
  { key: 'AI_EMBED_API_KEY', label: 'Embedding API Key' },
 ];

-const ALL_AI_KEYS = [...VISION_CONFIG_KEYS, ...EMBED_CONFIG_KEYS, { key: EMBED_DIM_KEY, default: DEFAULT_EMBED_DIMENSION }];
+const RERANK_CONFIG_KEYS: AiConfigKeyWithLabel[] = [
+  { key: 'AI_RERANK_API_URL', label: 'Rerank API URL' },
+  { key: 'AI_RERANK_MODEL', label: 'Rerank Model' },
+  { key: 'AI_RERANK_API_KEY', label: 'Rerank API Key' },
+];
+
+const ALL_AI_KEYS: AiConfigKeyBase[] = [
+  ...VISION_CONFIG_KEYS,
+  ...EMBED_CONFIG_KEYS,
+  ...RERANK_CONFIG_KEYS,
+  { key: EMBED_DIM_KEY, default: DEFAULT_EMBED_DIMENSION },
+];

 const formatBytes = (bytes?: number | null) => {
  if (bytes === null || bytes === undefined) return '-';
@@ -194,6 +214,8 @@ export default function SystemSettingsPage() {
    }
  }, [buildProviderConfigValues, message, t, vectorConfigForm, vectorProviders]);

+  const vectorSectionLoading = vectorStatsLoading || vectorConfigLoading;
+
  // 离开“外观设置”时，恢复后端持久化配置（取消未保存的预览）
  useEffect(() => {
    if (activeTab !== 'appearance') {
@@ -303,7 +325,7 @@ export default function SystemSettingsPage() {
                    </Form.Item>
                  </Card>
                  <Card title={t('Advanced')} style={{ marginTop: 24 }}>
-                    <Form.Item name={THEME_KEYS.TOKENS} label={t('Override AntD Tokens (JSON)')} tooltip={t('e.g. {"colorText": "#222"}') }>
+                    <Form.Item name={THEME_KEYS.TOKENS} label={t('Override AntD Tokens (JSON)')} tooltip={t('e.g. {"colorText": "#222"}')}>
                      <Input.TextArea autoSize={{ minRows: 4 }} placeholder='{ "colorText": "#222" }' />
                    </Form.Item>
                    <Form.Item name={THEME_KEYS.CSS} label={t('Custom CSS')}>
@@ -402,6 +424,13 @@ export default function SystemSettingsPage() {
                      <InputNumber min={1} max={32768} style={{ width: '100%' }} />
                    </Form.Item>
                  </Card>
+                  <Card title={t('Rerank Model')} style={{ marginTop: 24 }}>
+                    {RERANK_CONFIG_KEYS.map(({ key, label }) => (
+                      <Form.Item key={key} name={key} label={t(label)}>
+                        <Input size="large" />
+                      </Form.Item>
+                    ))}
+                  </Card>
                  <Form.Item style={{ marginTop: 24 }}>
                    <Button type="primary" htmlType="submit" loading={loading} block>
                      {t('Save')}
@@ -428,178 +457,180 @@ export default function SystemSettingsPage() {
                          {t('Refresh')}
                        </Button>
                      </div>
-                      {vectorMetaError ? (
-                        <Alert type="error" showIcon message={vectorMetaError} />
-                      ) : null}
-                      {vectorStatsLoading && !vectorStats ? (
-                        <Spin />
-                      ) : vectorStats ? (
-                        <Space direction="vertical" size={16} style={{ width: '100%' }}>
-                          <div style={{ display: 'flex', flexWrap: 'wrap', gap: 24 }}>
-                            <div>
-                              <div style={{ color: '#888' }}>{t('Collections')}</div>
-                              <div style={{ fontSize: 20, fontWeight: 600 }}>{vectorStats.collection_count}</div>
-                            </div>
-                            <div>
-                              <div style={{ color: '#888' }}>{t('Vectors')}</div>
-                              <div style={{ fontSize: 20, fontWeight: 600 }}>{vectorStats.total_vectors}</div>
-                            </div>
-                            <div>
-                              <div style={{ color: '#888' }}>{t('Database Size')}</div>
-                              <div style={{ fontSize: 20, fontWeight: 600 }}>{formatBytes(vectorStats.db_file_size_bytes)}</div>
-                            </div>
-                            <div>
-                              <div style={{ color: '#888' }}>{t('Estimated Memory')}</div>
-                              <div style={{ fontSize: 20, fontWeight: 600 }}>{formatBytes(vectorStats.estimated_total_memory_bytes)}</div>
-                            </div>
-                          </div>
-                          {vectorStats.collections.length ? (
-                            <Space direction="vertical" style={{ width: '100%' }} size={16}>
-                              {vectorStats.collections.map((collection) => (
-                                <div key={collection.name} style={{ border: '1px solid #f0f0f0', borderRadius: 8, padding: 16 }}>
-                                  <Space direction="vertical" size={12} style={{ width: '100%' }}>
-                                    <div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', flexWrap: 'wrap', gap: 12 }}>
-                                      <strong>{collection.name}</strong>
-                                      <span style={{ color: '#888' }}>
-                                        {collection.is_vector_collection && collection.dimension
-                                          ? `${t('Dimension')}: ${collection.dimension}`
-                                          : t('Non-vector collection')}
-                                      </span>
-                                    </div>
-                                    <div>{t('Vectors')}: {collection.row_count}</div>
-                                    {collection.is_vector_collection ? (
-                                      <div>{t('Estimated memory')}: {formatBytes(collection.estimated_memory_bytes)}</div>
-                                    ) : null}
-                                    {collection.indexes.length ? (
-                                      <Space direction="vertical" size={4} style={{ width: '100%' }}>
-                                        <span>{t('Indexes')}:</span>
-                                        <ul style={{ paddingLeft: 20, margin: 0 }}>
-                                          {collection.indexes.map((index) => (
-                                            <li key={`${collection.name}-${index.index_name || 'default'}`}>
-                                              <span>{index.index_name || t('Unnamed index')}</span>
-                                              <span>{' · '}{index.index_type || '-'}</span>
-                                              <span>{' · '}{index.metric_type || '-'}</span>
-                                              <span>{' · '}{t('Indexed rows')}: {index.indexed_rows}</span>
-                                              <span>{' · '}{t('Pending rows')}: {index.pending_index_rows}</span>
-                                              <span>{' · '}{t('Status')}: {index.state || '-'}</span>
-                                            </li>
-                                          ))}
-                                        </ul>
-                                      </Space>
-                                    ) : null}
-                                  </Space>
+                      {vectorSectionLoading ? (
+                        <div style={{ display: 'flex', justifyContent: 'center', padding: '24px 0' }}>
+                          <Spin />
+                        </div>
+                      ) : (
+                        <>
+                          {vectorMetaError ? (
+                            <Alert type="error" showIcon message={vectorMetaError} />
+                          ) : null}
+                          {vectorStats ? (
+                            <Space direction="vertical" size={16} style={{ width: '100%' }}>
+                              <div style={{ display: 'flex', flexWrap: 'wrap', gap: 24 }}>
+                                <div>
+                                  <div style={{ color: '#888' }}>{t('Collections')}</div>
+                                  <div style={{ fontSize: 20, fontWeight: 600 }}>{vectorStats.collection_count}</div>
                                </div>
-                              ))}
+                                <div>
+                                  <div style={{ color: '#888' }}>{t('Vectors')}</div>
+                                  <div style={{ fontSize: 20, fontWeight: 600 }}>{vectorStats.total_vectors}</div>
+                                </div>
+                                <div>
+                                  <div style={{ color: '#888' }}>{t('Database Size')}</div>
+                                  <div style={{ fontSize: 20, fontWeight: 600 }}>{formatBytes(vectorStats.db_file_size_bytes)}</div>
+                                </div>
+                                <div>
+                                  <div style={{ color: '#888' }}>{t('Estimated Memory')}</div>
+                                  <div style={{ fontSize: 20, fontWeight: 600 }}>{formatBytes(vectorStats.estimated_total_memory_bytes)}</div>
+                                </div>
+                              </div>
+                              {vectorStats.collections.length ? (
+                                <Space direction="vertical" style={{ width: '100%' }} size={16}>
+                                  {vectorStats.collections.map((collection) => (
+                                    <div key={collection.name} style={{ border: '1px solid #f0f0f0', borderRadius: 8, padding: 16 }}>
+                                      <Space direction="vertical" size={12} style={{ width: '100%' }}>
+                                        <div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', flexWrap: 'wrap', gap: 12 }}>
+                                          <strong>{collection.name}</strong>
+                                          <span style={{ color: '#888' }}>
+                                            {collection.is_vector_collection && collection.dimension
+                                              ? `${t('Dimension')}: ${collection.dimension}`
+                                              : t('Non-vector collection')}
+                                          </span>
+                                        </div>
+                                        <div>{t('Vectors')}: {collection.row_count}</div>
+                                        {collection.is_vector_collection ? (
+                                          <div>{t('Estimated memory')}: {formatBytes(collection.estimated_memory_bytes)}</div>
+                                        ) : null}
+                                        {collection.indexes.length ? (
+                                          <Space direction="vertical" size={4} style={{ width: '100%' }}>
+                                            <span>{t('Indexes')}:</span>
+                                            <ul style={{ paddingLeft: 20, margin: 0 }}>
+                                              {collection.indexes.map((index) => (
+                                                <li key={`${collection.name}-${index.index_name || 'default'}`}>
+                                                  <span>{index.index_name || t('Unnamed index')}</span>
+                                                  <span>{' · '}{index.index_type || '-'}</span>
+                                                  <span>{' · '}{index.metric_type || '-'}</span>
+                                                  <span>{' · '}{t('Indexed rows')}: {index.indexed_rows}</span>
+                                                  <span>{' · '}{t('Pending rows')}: {index.pending_index_rows}</span>
+                                                  <span>{' · '}{t('Status')}: {index.state || '-'}</span>
+                                                </li>
+                                              ))}
+                                            </ul>
+                                          </Space>
+                                        ) : null}
+                                      </Space>
+                                    </div>
+                                  ))}
+                                </Space>
+                              ) : (
+                                <Empty description={t('No collections')} />
+                              )}
+                              <div style={{ color: '#888' }}>
+                                {t('Estimated memory is calculated as vectors x dimension x 4 bytes (float32).')}
+                              </div>
                            </Space>
+                          ) : vectorStatsError ? (
+                            <div style={{ color: '#ff4d4f' }}>{vectorStatsError}</div>
                          ) : (
                            <Empty description={t('No collections')} />
                          )}
-                          <div style={{ color: '#888' }}>
-                            {t('Estimated memory is calculated as vectors x dimension x 4 bytes (float32).')}
-                          </div>
-                        </Space>
-                      ) : vectorStatsError ? (
-                        <div style={{ color: '#ff4d4f' }}>{vectorStatsError}</div>
-                      ) : (
-                        <Empty description={t('No collections')} />
+                          <Form
+                            layout="vertical"
+                            form={vectorConfigForm}
+                            onFinish={handleVectorConfigSave}
+                            initialValues={{ type: selectedProviderType || undefined, config: {} }}
+                          >
+                            <Form.Item
+                              name="type"
+                              label={t('Database Provider')}
+                              rules={[{ required: true, message: t('Please select a provider') }]}
+                            >
+                              <Select
+                                size="large"
+                                options={vectorProviders.map((provider) => ({
+                                  value: provider.type,
+                                  label: provider.enabled ? provider.label : `${provider.label} (${t('Coming soon')})`,
+                                  disabled: !provider.enabled,
+                                }))}
+                                onChange={handleProviderChange}
+                                loading={vectorConfigLoading && !vectorProviders.length}
+                              />
+                            </Form.Item>
+                            {selectedProvider?.description ? (
+                              <Alert
+                                type="info"
+                                showIcon
+                                message={t(selectedProvider.description)}
+                                style={{ marginBottom: 16 }}
+                              />
+                            ) : null}
+                            {selectedProvider?.config_schema?.map((field) => (
+                              <Form.Item
+                                key={field.key}
+                                name={['config', field.key]}
+                                label={t(field.label)}
+                                rules={field.required ? [{ required: true, message: t('Please input {label}', { label: t(field.label) }) }] : []}
+                              >
+                                {field.type === 'password' ? (
+                                  <Input.Password size="large" placeholder={field.placeholder ? t(field.placeholder) : undefined} />
+                                ) : (
+                                  <Input size="large" placeholder={field.placeholder ? t(field.placeholder) : undefined} />
+                                )}
+                              </Form.Item>
+                            ))}
+                            {selectedProvider && !selectedProvider.enabled ? (
+                              <Alert
+                                type="warning"
+                                showIcon
+                                message={t('This provider is not available yet')}
+                                style={{ marginBottom: 16 }}
+                              />
+                            ) : null}
+                            <Form.Item>
+                              <Space direction="vertical" style={{ width: '100%' }}>
+                                <Button
+                                  type="primary"
+                                  htmlType="submit"
+                                  loading={vectorConfigSaving}
+                                  block
+                                  disabled={!selectedProvider?.enabled}
+                                >
+                                  {t('Save')}
+                                </Button>
+                                <Button
+                                  danger
+                                  htmlType="button"
+                                  block
+                                  onClick={() => {
+                                    Modal.confirm({
+                                      title: t('Confirm clear vector database?'),
+                                      content: t('This will delete all collections irreversibly.'),
+                                      okText: t('Confirm Clear'),
+                                      okType: 'danger',
+                                      cancelText: t('Cancel'),
+                                      onOk: async () => {
+                                        try {
+                                          await vectorDBApi.clearAll();
+                                          message.success(t('Vector database cleared'));
+                                          await fetchVectorStats();
+                                          await fetchVectorMeta();
+                                        } catch (e: any) {
+                                          message.error(e.message || t('Clear failed'));
+                                        }
+                                      },
+                                    });
+                                  }}
+                                >
+                                  {t('Clear Vector DB')}
+                                </Button>
+                              </Space>
+                            </Form.Item>
+                          </Form>
+                        </>
                      )}
                    </Space>
-                    {vectorConfigLoading && !vectorProviders.length ? (
-                      <Spin />
-                    ) : (
-                      <Form
-                        layout="vertical"
-                        form={vectorConfigForm}
-                        onFinish={handleVectorConfigSave}
-                        initialValues={{ type: selectedProviderType || undefined, config: {} }}
-                      >
-                        <Form.Item
-                          name="type"
-                          label={t('Database Provider')}
-                          rules={[{ required: true, message: t('Please select a provider') }]}
-                        >
-                          <Select
-                            size="large"
-                            options={vectorProviders.map((provider) => ({
-                              value: provider.type,
-                              label: provider.enabled ? provider.label : `${provider.label} (${t('Coming soon')})`,
-                              disabled: !provider.enabled,
-                            }))}
-                            onChange={handleProviderChange}
-                            loading={vectorConfigLoading && !vectorProviders.length}
-                          />
-                        </Form.Item>
-                        {selectedProvider?.description ? (
-                          <Alert
-                            type="info"
-                            showIcon
-                            message={t(selectedProvider.description)}
-                            style={{ marginBottom: 16 }}
-                          />
-                        ) : null}
-                        {selectedProvider?.config_schema?.map((field) => (
-                          <Form.Item
-                            key={field.key}
-                            name={['config', field.key]}
-                            label={t(field.label)}
-                            rules={field.required ? [{ required: true, message: t('Please input {label}', { label: t(field.label) }) }] : []}
-                          >
-                            {field.type === 'password' ? (
-                              <Input.Password size="large" placeholder={field.placeholder ? t(field.placeholder) : undefined} />
-                            ) : (
-                              <Input size="large" placeholder={field.placeholder ? t(field.placeholder) : undefined} />
-                            )}
-                          </Form.Item>
-                        ))}
-                        {selectedProvider && !selectedProvider.enabled ? (
-                          <Alert
-                            type="warning"
-                            showIcon
-                            message={t('This provider is not available yet')}
-                            style={{ marginBottom: 16 }}
-                          />
-                        ) : null}
-                        <Form.Item>
-                          <Space direction="vertical" style={{ width: '100%' }}>
-                            <Button
-                              type="primary"
-                              htmlType="submit"
-                              loading={vectorConfigSaving}
-                              block
-                              disabled={!selectedProvider?.enabled}
-                            >
-                              {t('Save')}
-                            </Button>
-                            <Button
-                              danger
-                              htmlType="button"
-                              block
-                              onClick={() => {
-                                Modal.confirm({
-                                  title: t('Confirm clear vector database?'),
-                                  content: t('This will delete all collections irreversibly.'),
-                                  okText: t('Confirm Clear'),
-                                  okType: 'danger',
-                                  cancelText: t('Cancel'),
-                                  onOk: async () => {
-                                    try {
-                                      await vectorDBApi.clearAll();
-                                      message.success(t('Vector database cleared'));
-                                      await fetchVectorStats();
-                                      await fetchVectorMeta();
-                                    } catch (e: any) {
-                                      message.error(e.message || t('Clear failed'));
-                                    }
-                                  },
-                                });
-                              }}
-                            >
-                              {t('Clear Vector DB')}
-                            </Button>
-                          </Space>
-                        </Form.Item>
-                      </Form>
-                    )}
                  </Space>
                </Card>
              ),