feat: Enhance vector database providers with source path handling and improved search functionality

This commit is contained in:
shiyu
2025-09-27 13:34:18 +08:00
parent ee6e570ccb
commit a4af9475ef
10 changed files with 1082 additions and 353 deletions

View File

@@ -1,4 +1,7 @@
from typing import Any, Dict, List, Tuple
from fastapi import APIRouter, Depends, Query from fastapi import APIRouter, Depends, Query
from schemas.fs import SearchResultItem from schemas.fs import SearchResultItem
from services.auth import get_current_active_user, User from services.auth import get_current_active_user, User
from services.ai import get_text_embedding from services.ai import get_text_embedding
@@ -6,24 +9,96 @@ from services.vector_db import VectorDBService
router = APIRouter(prefix="/api/search", tags=["search"]) router = APIRouter(prefix="/api/search", tags=["search"])
async def search_files_by_vector(q: str, top_k: int):
embedding = await get_text_embedding(q)
vector_db = VectorDBService()
results = await vector_db.search_vectors("vector_collection", embedding, top_k)
items = [
SearchResultItem(id=res["id"], path=res["entity"]["path"], score=res["distance"])
for res in results[0]
]
return {"items": items, "query": q}
async def search_files_by_name(q: str, top_k: int): def _normalize_result(raw: Dict[str, Any], source: str, fallback_score: float = 0.0) -> SearchResultItem:
entity = dict(raw.get("entity") or {})
source_path = entity.get("source_path")
stored_path = entity.get("path")
path = source_path or stored_path or ""
chunk_id_value = entity.get("chunk_id")
chunk_id = str(chunk_id_value) if chunk_id_value is not None else None
snippet = entity.get("text") or entity.get("description") or entity.get("name")
mime = entity.get("mime")
start_offset = entity.get("start_offset")
end_offset = entity.get("end_offset")
raw_score = raw.get("distance")
score = float(raw_score) if raw_score is not None else fallback_score
metadata = {
"retrieval_source": source,
"raw_distance": raw_score,
}
if stored_path and stored_path != path:
metadata["stored_path"] = stored_path
vector_id = entity.get("vector_id")
if vector_id:
metadata["vector_id"] = vector_id
return SearchResultItem(
id=str(raw.get("id")),
path=path,
score=score,
chunk_id=chunk_id,
snippet=snippet,
mime=mime,
source_type=entity.get("type") or source,
start_offset=start_offset,
end_offset=end_offset,
metadata=metadata,
)
async def _vector_search(query: str, top_k: int) -> List[SearchResultItem]:
vector_db = VectorDBService() vector_db = VectorDBService()
results = await vector_db.search_by_path("vector_collection", q, top_k) try:
items = [ embedding = await get_text_embedding(query)
SearchResultItem(id=idx, path=res["entity"]["path"], score=res["distance"]) except Exception:
for idx, res in enumerate(results[0]) embedding = None
] if not embedding:
return {"items": items, "query": q} return []
try:
raw_results = await vector_db.search_vectors("vector_collection", embedding, max(top_k, 10))
except Exception:
return []
results: List[SearchResultItem] = []
for bucket in raw_results or []:
for record in bucket or []:
results.append(_normalize_result(record, "vector"))
return results
async def _filename_search(query: str, page: int, page_size: int) -> Tuple[List[SearchResultItem], bool]:
vector_db = VectorDBService()
limit = max(page * page_size + 1, page_size * (page + 2))
limit = min(limit, 2000)
try:
raw_results = await vector_db.search_by_path("vector_collection", query, limit)
except Exception:
return [], False
records = raw_results[0] if raw_results else []
deduped: List[SearchResultItem] = []
seen_paths: set[str] = set()
for record in records or []:
item = _normalize_result(record, "filename", fallback_score=1.0)
stored_path = item.metadata.get("stored_path") if item.metadata else None
key = item.path or stored_path or ""
if key in seen_paths:
continue
seen_paths.add(key)
deduped.append(item)
start = max(page - 1, 0) * page_size
end = start + page_size
page_items = deduped[start:end]
for offset, item in enumerate(page_items):
if item.metadata is None:
item.metadata = {}
item.metadata.setdefault("retrieval_rank", start + offset)
has_more = len(deduped) > end
return page_items, has_more
@router.get("") @router.get("")
@@ -31,11 +106,32 @@ async def search_files(
q: str = Query(..., description="搜索查询"), q: str = Query(..., description="搜索查询"),
top_k: int = Query(10, description="返回结果数量"), top_k: int = Query(10, description="返回结果数量"),
mode: str = Query("vector", description="搜索模式: 'vector''filename'"), mode: str = Query("vector", description="搜索模式: 'vector''filename'"),
page: int = Query(1, description="分页页码,仅在文件名搜索模式下生效"),
page_size: int = Query(10, description="分页大小,仅在文件名搜索模式下生效"),
user: User = Depends(get_current_active_user), user: User = Depends(get_current_active_user),
): ):
if not q.strip():
return {"items": [], "query": q}
top_k = max(top_k, 1)
page = max(page, 1)
page_size = max(min(page_size, 100), 1)
if mode == "vector": if mode == "vector":
return await search_files_by_vector(q, top_k) items = (await _vector_search(q, top_k))[:top_k]
elif mode == "filename": elif mode == "filename":
return await search_files_by_name(q, top_k) items, has_more = await _filename_search(q, page, page_size)
return {
"items": items,
"query": q,
"mode": mode,
"pagination": {
"page": page,
"page_size": page_size,
"has_more": has_more,
},
}
else: else:
return {"items": [], "query": q, "error": "Invalid search mode"} items = (await _vector_search(q, top_k))[:top_k]
return {"items": items, "query": q, "mode": mode}

View File

@@ -21,6 +21,13 @@ class SearchResultItem(BaseModel):
id: int | str id: int | str
path: str path: str
score: float score: float
chunk_id: Optional[str] = None
snippet: Optional[str] = None
mime: Optional[str] = None
source_type: Optional[str] = None
start_offset: Optional[int] = None
end_offset: Optional[int] = None
metadata: Optional[dict] = None
class MkdirRequest(BaseModel): class MkdirRequest(BaseModel):

View File

@@ -68,3 +68,46 @@ async def get_text_embedding(text: str) -> List[float]:
resp.raise_for_status() resp.raise_for_status()
result = resp.json() result = resp.json()
return result["data"][0]["embedding"] return result["data"][0]["embedding"]
async def rerank_texts(query: str, documents: List[str]) -> List[float]:
"""调用重排序模型,为一组文档返回得分。未配置时返回空列表。"""
if not documents:
return []
api_url = await ConfigCenter.get("AI_RERANK_API_URL")
model = await ConfigCenter.get("AI_RERANK_MODEL")
api_key = await ConfigCenter.get("AI_RERANK_API_KEY")
if not api_url or not model or not api_key:
return []
payload = {
"model": model,
"query": query,
"documents": documents,
}
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
}
async with httpx.AsyncClient() as client:
try:
resp = await client.post(api_url, headers=headers, json=payload)
resp.raise_for_status()
except httpx.HTTPStatusError:
return []
data = resp.json()
if isinstance(data, dict):
results = data.get("results")
if isinstance(results, list):
scores = []
for item in results:
if isinstance(item, dict) and "score" in item:
try:
scores.append(float(item["score"]))
except (TypeError, ValueError):
scores.append(0.0)
return scores
return []

View File

@@ -1,11 +1,95 @@
from typing import Dict, Any from typing import Dict, Any, List, Tuple
from fastapi.responses import Response from fastapi.responses import Response
import base64 import base64
import mimetypes
import os
from io import BytesIO
from services.ai import describe_image_base64, get_text_embedding from services.ai import describe_image_base64, get_text_embedding
from services.vector_db import VectorDBService, DEFAULT_VECTOR_DIMENSION from services.vector_db import VectorDBService, DEFAULT_VECTOR_DIMENSION
from services.logging import LogService from services.logging import LogService
from services.config import ConfigCenter from services.config import ConfigCenter
try: # Pillow is optional but bundled with the project dependencies
from PIL import Image
except ImportError: # pragma: no cover - fallback when pillow missing
Image = None
CHUNK_SIZE = 800
CHUNK_OVERLAP = 200
MAX_IMAGE_EDGE = 1600
JPEG_QUALITY = 85
def _chunk_text(content: str, chunk_size: int = CHUNK_SIZE, overlap: int = CHUNK_OVERLAP) -> List[Tuple[int, str, int, int]]:
"""按固定窗口拆分文本,返回(chunk_id, chunk_text, start, end)。"""
if chunk_size <= 0:
chunk_size = CHUNK_SIZE
if overlap >= chunk_size:
overlap = max(chunk_size // 4, 1)
chunks: List[Tuple[int, str, int, int]] = []
step = chunk_size - overlap
idx = 0
start = 0
length = len(content)
while start < length:
end = min(length, start + chunk_size)
chunk = content[start:end].strip()
if chunk:
chunks.append((idx, chunk, start, end))
idx += 1
if end >= length:
break
start += step
return chunks
def _guess_mime(path: str) -> str:
mime, _ = mimetypes.guess_type(path)
return mime or "application/octet-stream"
def _chunk_key(path: str, chunk_id: str) -> str:
return f"{path}#chunk={chunk_id}"
def _compress_image_for_embedding(input_bytes: bytes) -> Tuple[bytes, Dict[str, Any] | None]:
"""压缩图片,降低发送到视觉模型的体积。"""
if Image is None:
return input_bytes, None
try:
with Image.open(BytesIO(input_bytes)) as img:
img = img.convert("RGB")
width, height = img.size
longest_edge = max(width, height)
scale = 1.0
if longest_edge > MAX_IMAGE_EDGE:
scale = MAX_IMAGE_EDGE / float(longest_edge)
new_size = (max(int(width * scale), 1), max(int(height * scale), 1))
resample_mode = getattr(getattr(Image, "Resampling", Image), "LANCZOS")
img = img.resize(new_size, resample=resample_mode)
buffer = BytesIO()
img.save(buffer, format="JPEG", quality=JPEG_QUALITY, optimize=True)
compressed = buffer.getvalue()
if len(compressed) < len(input_bytes):
return compressed, {
"original_bytes": len(input_bytes),
"compressed_bytes": len(compressed),
"scaled": scale < 1.0,
"width": img.width,
"height": img.height,
}
except Exception: # pragma: no cover - 任意图像处理异常时回退
return input_bytes, None
return input_bytes, None
class VectorIndexProcessor: class VectorIndexProcessor:
name = "向量索引" name = "向量索引"
@@ -33,6 +117,7 @@ class VectorIndexProcessor:
index_type = config.get("index_type", "vector") index_type = config.get("index_type", "vector")
vector_db = VectorDBService() vector_db = VectorDBService()
collection_name = "vector_collection" collection_name = "vector_collection"
if action == "destroy": if action == "destroy":
await vector_db.delete_vector(collection_name, path) await vector_db.delete_vector(collection_name, path)
await LogService.info( await LogService.info(
@@ -42,9 +127,19 @@ class VectorIndexProcessor:
) )
return Response(content=f"文件 {path}{index_type} 索引已销毁", media_type="text/plain") return Response(content=f"文件 {path}{index_type} 索引已销毁", media_type="text/plain")
if index_type == 'simple': mime_type = _guess_mime(path)
if index_type == "simple":
await vector_db.ensure_collection(collection_name, vector=False) await vector_db.ensure_collection(collection_name, vector=False)
await vector_db.upsert_vector(collection_name, {'path': path}) await vector_db.delete_vector(collection_name, path)
await vector_db.upsert_vector(collection_name, {
"path": path,
"source_path": path,
"chunk_id": "filename",
"mime": mime_type,
"type": "filename",
"name": os.path.basename(path),
})
await LogService.info( await LogService.info(
"processor:vector_index", "processor:vector_index",
f"Created simple index for {path}", f"Created simple index for {path}",
@@ -53,24 +148,7 @@ class VectorIndexProcessor:
return Response(content=f"文件 {path} 的普通索引已创建", media_type="text/plain") return Response(content=f"文件 {path} 的普通索引已创建", media_type="text/plain")
file_ext = path.split('.')[-1].lower() file_ext = path.split('.')[-1].lower()
description = "" details: Dict[str, Any] = {"path": path, "action": "create", "index_type": "vector"}
embedding = None
if file_ext in ["jpg", "jpeg", "png", "bmp"]:
base64_image = base64.b64encode(input_bytes).decode("utf-8")
description = await describe_image_base64(base64_image)
embedding = await get_text_embedding(description)
log_message = f"Indexed image {path}"
response_message = f"图片已索引,描述:{description}"
elif file_ext in ["txt", "md"]:
text = input_bytes.decode("utf-8")
embedding = await get_text_embedding(text)
description = text[:100] + "..." if len(text) > 100 else text
log_message = f"Indexed text file {path}"
response_message = f"文本文件已索引"
if embedding is None:
return Response(content="不支持的文件类型", status_code=400)
raw_dim = await ConfigCenter.get('AI_EMBED_DIM', DEFAULT_VECTOR_DIMENSION) raw_dim = await ConfigCenter.get('AI_EMBED_DIM', DEFAULT_VECTOR_DIMENSION)
try: try:
@@ -81,15 +159,103 @@ class VectorIndexProcessor:
vector_dim = DEFAULT_VECTOR_DIMENSION vector_dim = DEFAULT_VECTOR_DIMENSION
await vector_db.ensure_collection(collection_name, vector=True, dim=vector_dim) await vector_db.ensure_collection(collection_name, vector=True, dim=vector_dim)
await vector_db.upsert_vector( await vector_db.delete_vector(collection_name, path)
collection_name, {'path': path, 'embedding': embedding})
if file_ext in ["jpg", "jpeg", "png", "bmp"]:
processed_bytes, compression = _compress_image_for_embedding(input_bytes)
base64_image = base64.b64encode(processed_bytes).decode("utf-8")
description = await describe_image_base64(base64_image)
embedding = await get_text_embedding(description)
image_mime = "image/jpeg" if compression else mime_type
await vector_db.upsert_vector(collection_name, {
"path": _chunk_key(path, "image"),
"source_path": path,
"chunk_id": "image",
"embedding": embedding,
"text": description,
"mime": image_mime,
"type": "image",
})
details["description"] = description
if compression:
details["image_compression"] = compression
await LogService.info(
"processor:vector_index",
f"Indexed image {path}",
details=details,
)
return Response(content=f"图片已索引,描述:{description}", media_type="text/plain")
if file_ext in ["txt", "md"]:
try:
text = input_bytes.decode("utf-8")
except UnicodeDecodeError:
return Response(content="文本文件解码失败", status_code=400)
chunks = _chunk_text(text)
if not chunks:
await vector_db.upsert_vector(collection_name, {
"path": _chunk_key(path, "0"),
"source_path": path,
"chunk_id": "0",
"embedding": await get_text_embedding(text or path),
"text": text,
"mime": mime_type,
"type": "text",
"start_offset": 0,
"end_offset": len(text),
})
details["chunks"] = 1
await LogService.info(
"processor:vector_index",
f"Indexed text file {path}",
details=details,
)
return Response(content="文本文件已索引", media_type="text/plain")
chunk_count = 0
for chunk_id, chunk_text, start, end in chunks:
embedding = await get_text_embedding(chunk_text)
await vector_db.upsert_vector(collection_name, {
"path": _chunk_key(path, str(chunk_id)),
"source_path": path,
"chunk_id": str(chunk_id),
"embedding": embedding,
"text": chunk_text,
"mime": mime_type,
"type": "text",
"start_offset": start,
"end_offset": end,
})
chunk_count += 1
details["chunks"] = chunk_count
sample = chunks[0][1]
details["sample"] = sample[:120]
await LogService.info(
"processor:vector_index",
f"Indexed text file {path}",
details=details,
)
return Response(content="文本文件已索引", media_type="text/plain")
# 其他类型暂未支持向量索引,回退为文件名索引
await vector_db.delete_vector(collection_name, path)
await vector_db.upsert_vector(collection_name, {
"path": _chunk_key(path, "fallback"),
"source_path": path,
"chunk_id": "filename",
"mime": mime_type,
"type": "filename",
"name": os.path.basename(path),
"embedding": [0.0] * vector_dim,
})
await LogService.info( await LogService.info(
"processor:vector_index", "processor:vector_index",
log_message, f"File type fallback to simple index for {path}",
details={"path": path, "description": description, "action": "create", "index_type": "vector"}, details={"path": path, "action": "create", "index_type": "simple", "original_type": file_ext},
) )
return Response(content=response_message, media_type="text/plain") return Response(content="暂不支持该类型的向量索引,已创建文件名索引", media_type="text/plain")
PROCESSOR_TYPE = "vector_index" PROCESSOR_TYPE = "vector_index"

View File

@@ -50,15 +50,20 @@ class MilvusLiteProvider(BaseVectorProvider):
client = self._get_client() client = self._get_client()
if client.has_collection(collection_name): if client.has_collection(collection_name):
return return
common_fields = [
FieldSchema(name="path", dtype=DataType.VARCHAR, max_length=512, is_primary=True, auto_id=False),
FieldSchema(name="source_path", dtype=DataType.VARCHAR, max_length=512, is_primary=False, auto_id=False),
]
if vector: if vector:
vector_dim = dim if isinstance(dim, int) and dim > 0 else 0 vector_dim = dim if isinstance(dim, int) and dim > 0 else 0
if vector_dim <= 0: if vector_dim <= 0:
vector_dim = 4096 vector_dim = 4096
fields = [ fields = [
FieldSchema(name="path", dtype=DataType.VARCHAR, max_length=512, is_primary=True, auto_id=False), *common_fields,
FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=vector_dim), FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=vector_dim),
] ]
schema = CollectionSchema(fields, description="Image vector collection") schema = CollectionSchema(fields, description="Vector collection", enable_dynamic_field=True)
client.create_collection(collection_name, schema=schema) client.create_collection(collection_name, schema=schema)
index_params = MilvusClient.prepare_index_params() index_params = MilvusClient.prepare_index_params()
index_params.add_index( index_params.add_index(
@@ -70,38 +75,98 @@ class MilvusLiteProvider(BaseVectorProvider):
) )
client.create_index(collection_name, index_params=index_params) client.create_index(collection_name, index_params=index_params)
else: else:
fields = [ schema = CollectionSchema(common_fields, description="Simple file index", enable_dynamic_field=True)
FieldSchema(name="path", dtype=DataType.VARCHAR, max_length=512, is_primary=True, auto_id=False),
]
schema = CollectionSchema(fields, description="Simple file index")
client.create_collection(collection_name, schema=schema) client.create_collection(collection_name, schema=schema)
def upsert_vector(self, collection_name: str, data: Dict[str, Any]) -> None: def upsert_vector(self, collection_name: str, data: Dict[str, Any]) -> None:
self._get_client().upsert(collection_name, data) payload = dict(data)
payload.setdefault("source_path", payload.get("path"))
payload.setdefault("vector_id", payload.get("path"))
self._get_client().upsert(collection_name, data=[payload])
def delete_vector(self, collection_name: str, path: str) -> None: def delete_vector(self, collection_name: str, path: str) -> None:
self._get_client().delete(collection_name, ids=[path]) client = self._get_client()
escaped = path.replace('"', '\\"')
client.delete(collection_name, filter=f'source_path == "{escaped}"')
def search_vectors(self, collection_name: str, query_embedding, top_k: int): def search_vectors(self, collection_name: str, query_embedding, top_k: int):
search_params = {"metric_type": "COSINE"} search_params = {"metric_type": "COSINE"}
return self._get_client().search( output_fields = [
"path",
"source_path",
"chunk_id",
"mime",
"text",
"start_offset",
"end_offset",
"type",
"name",
]
raw_results = self._get_client().search(
collection_name, collection_name,
data=[query_embedding], data=[query_embedding],
anns_field="embedding", anns_field="embedding",
search_params=search_params, search_params=search_params,
limit=top_k, limit=top_k,
output_fields=["path"], output_fields=output_fields,
) )
formatted: List[List[Dict[str, Any]]] = []
for hits in raw_results:
bucket: List[Dict[str, Any]] = []
for hit in hits:
if hasattr(hit, "entity"):
entity = dict(getattr(hit, "entity", {}) or {})
hit_id = getattr(hit, "id", None)
distance = getattr(hit, "distance", None)
elif isinstance(hit, dict):
entity = dict((hit.get("entity") or {}))
hit_id = hit.get("id")
distance = hit.get("distance")
else:
entity = {}
hit_id = None
distance = None
entity.setdefault("path", entity.get("source_path"))
bucket.append({
"id": hit_id,
"distance": distance,
"entity": entity,
})
formatted.append(bucket)
return formatted
def search_by_path(self, collection_name: str, query_path: str, top_k: int): def search_by_path(self, collection_name: str, query_path: str, top_k: int):
filter_expr = f"path like '%{query_path}%'" if query_path else "path like '%%'" if query_path:
escaped = query_path.replace('"', '\\"')
filter_expr = f'source_path like "%{escaped}%"'
else:
filter_expr = "source_path like '%%'"
results = self._get_client().query( results = self._get_client().query(
collection_name, collection_name,
filter=filter_expr, filter=filter_expr,
limit=top_k, limit=top_k,
output_fields=["path"], output_fields=[
"path",
"source_path",
"chunk_id",
"mime",
"text",
"start_offset",
"end_offset",
"type",
"name",
],
) )
return [[{"id": r["path"], "distance": 1.0, "entity": {"path": r["path"]}} for r in results]] formatted = []
for row in results:
entity = dict(row)
entity.setdefault("path", entity.get("source_path"))
formatted.append({
"id": entity.get("path"),
"distance": 1.0,
"entity": entity,
})
return [formatted]
def get_all_stats(self) -> Dict[str, Any]: def get_all_stats(self) -> Dict[str, Any]:
client = self._get_client() client = self._get_client()

View File

@@ -58,15 +58,19 @@ class MilvusServerProvider(BaseVectorProvider):
client = self._get_client() client = self._get_client()
if client.has_collection(collection_name): if client.has_collection(collection_name):
return return
common_fields = [
FieldSchema(name="path", dtype=DataType.VARCHAR, max_length=512, is_primary=True, auto_id=False),
FieldSchema(name="source_path", dtype=DataType.VARCHAR, max_length=512, is_primary=False, auto_id=False),
]
if vector: if vector:
vector_dim = dim if isinstance(dim, int) and dim > 0 else 0 vector_dim = dim if isinstance(dim, int) and dim > 0 else 0
if vector_dim <= 0: if vector_dim <= 0:
vector_dim = 4096 vector_dim = 4096
fields = [ fields = [
FieldSchema(name="path", dtype=DataType.VARCHAR, max_length=512, is_primary=True, auto_id=False), *common_fields,
FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=vector_dim), FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=vector_dim),
] ]
schema = CollectionSchema(fields, description="Image vector collection") schema = CollectionSchema(fields, description="Vector collection", enable_dynamic_field=True)
client.create_collection(collection_name, schema=schema) client.create_collection(collection_name, schema=schema)
index_params = MilvusClient.prepare_index_params() index_params = MilvusClient.prepare_index_params()
index_params.add_index( index_params.add_index(
@@ -78,38 +82,98 @@ class MilvusServerProvider(BaseVectorProvider):
) )
client.create_index(collection_name, index_params=index_params) client.create_index(collection_name, index_params=index_params)
else: else:
fields = [ schema = CollectionSchema(common_fields, description="Simple file index", enable_dynamic_field=True)
FieldSchema(name="path", dtype=DataType.VARCHAR, max_length=512, is_primary=True, auto_id=False),
]
schema = CollectionSchema(fields, description="Simple file index")
client.create_collection(collection_name, schema=schema) client.create_collection(collection_name, schema=schema)
def upsert_vector(self, collection_name: str, data: Dict[str, Any]) -> None: def upsert_vector(self, collection_name: str, data: Dict[str, Any]) -> None:
self._get_client().upsert(collection_name, data) payload = dict(data)
payload.setdefault("source_path", payload.get("path"))
payload.setdefault("vector_id", payload.get("path"))
self._get_client().upsert(collection_name, data=[payload])
def delete_vector(self, collection_name: str, path: str) -> None: def delete_vector(self, collection_name: str, path: str) -> None:
self._get_client().delete(collection_name, ids=[path]) client = self._get_client()
escaped = path.replace('"', '\\"')
client.delete(collection_name, filter=f'source_path == "{escaped}"')
def search_vectors(self, collection_name: str, query_embedding, top_k: int): def search_vectors(self, collection_name: str, query_embedding, top_k: int):
search_params = {"metric_type": "COSINE"} search_params = {"metric_type": "COSINE"}
return self._get_client().search( output_fields = [
"path",
"source_path",
"chunk_id",
"mime",
"text",
"start_offset",
"end_offset",
"type",
"name",
]
raw_results = self._get_client().search(
collection_name, collection_name,
data=[query_embedding], data=[query_embedding],
anns_field="embedding", anns_field="embedding",
search_params=search_params, search_params=search_params,
limit=top_k, limit=top_k,
output_fields=["path"], output_fields=output_fields,
) )
formatted: List[List[Dict[str, Any]]] = []
for hits in raw_results:
bucket: List[Dict[str, Any]] = []
for hit in hits:
if hasattr(hit, "entity"):
entity = dict(getattr(hit, "entity", {}) or {})
hit_id = getattr(hit, "id", None)
distance = getattr(hit, "distance", None)
elif isinstance(hit, dict):
entity = dict((hit.get("entity") or {}))
hit_id = hit.get("id")
distance = hit.get("distance")
else:
entity = {}
hit_id = None
distance = None
entity.setdefault("path", entity.get("source_path"))
bucket.append({
"id": hit_id,
"distance": distance,
"entity": entity,
})
formatted.append(bucket)
return formatted
def search_by_path(self, collection_name: str, query_path: str, top_k: int): def search_by_path(self, collection_name: str, query_path: str, top_k: int):
filter_expr = f"path like '%{query_path}%'" if query_path else "path like '%%'" if query_path:
escaped = query_path.replace('"', '\\"')
filter_expr = f'source_path like "%{escaped}%"'
else:
filter_expr = "source_path like '%%'"
results = self._get_client().query( results = self._get_client().query(
collection_name, collection_name,
filter=filter_expr, filter=filter_expr,
limit=top_k, limit=top_k,
output_fields=["path"], output_fields=[
"path",
"source_path",
"chunk_id",
"mime",
"text",
"start_offset",
"end_offset",
"type",
"name",
],
) )
return [[{"id": r["path"], "distance": 1.0, "entity": {"path": r["path"]}} for r in results]] formatted = []
for row in results:
entity = dict(row)
entity.setdefault("path", entity.get("source_path"))
formatted.append({
"id": entity.get("path"),
"distance": 1.0,
"entity": entity,
})
return [formatted]
def get_all_stats(self) -> Dict[str, Any]: def get_all_stats(self) -> Dict[str, Any]:
client = self._get_client() client = self._get_client()

View File

@@ -58,29 +58,59 @@ class QdrantProvider(BaseVectorProvider):
size = dim if vector and isinstance(dim, int) and dim > 0 else 1 size = dim if vector and isinstance(dim, int) and dim > 0 else 1
return qmodels.VectorParams(size=size, distance=qmodels.Distance.COSINE) return qmodels.VectorParams(size=size, distance=qmodels.Distance.COSINE)
def _ensure_payload_indexes(self, client: QdrantClient, collection_name: str) -> None:
for field in ("path", "source_path"):
try:
client.create_payload_index(
collection_name=collection_name,
field_name=field,
field_schema="keyword",
)
except Exception as exc: # pragma: no cover - 依赖外部服务
message = str(exc).lower()
if "already exists" in message or "index exists" in message:
continue
# 旧版本 qdrant 可能返回带状态码的异常,这里容忍重复创建
raise
def ensure_collection(self, collection_name: str, vector: bool, dim: int) -> None: def ensure_collection(self, collection_name: str, vector: bool, dim: int) -> None:
client = self._get_client() client = self._get_client()
try: try:
if client.collection_exists(collection_name): exists = client.collection_exists(collection_name)
return
except Exception as exc: # pragma: no cover - 依赖外部服务 except Exception as exc: # pragma: no cover - 依赖外部服务
raise RuntimeError(f"Failed to check Qdrant collection '{collection_name}': {exc}") from exc raise RuntimeError(f"Failed to check Qdrant collection '{collection_name}': {exc}") from exc
if exists:
try:
self._ensure_payload_indexes(client, collection_name)
except Exception:
pass
return
vectors_config = self._vector_params(vector, dim) vectors_config = self._vector_params(vector, dim)
try: try:
client.create_collection(collection_name=collection_name, vectors_config=vectors_config) client.create_collection(collection_name=collection_name, vectors_config=vectors_config)
except Exception as exc: # pragma: no cover except Exception as exc: # pragma: no cover
if "already exists" in str(exc).lower(): if "already exists" in str(exc).lower():
try:
self._ensure_payload_indexes(client, collection_name)
except Exception:
pass
return return
raise RuntimeError(f"Failed to create Qdrant collection '{collection_name}': {exc}") from exc raise RuntimeError(f"Failed to create Qdrant collection '{collection_name}': {exc}") from exc
try:
self._ensure_payload_indexes(client, collection_name)
except Exception:
pass
@staticmethod @staticmethod
def _point_id(path: str) -> str: def _point_id(uid: str) -> str:
return str(uuid5(NAMESPACE_URL, path)) return str(uuid5(NAMESPACE_URL, uid))
def _prepare_point(self, data: Dict[str, Any]) -> qmodels.PointStruct: def _prepare_point(self, data: Dict[str, Any]) -> qmodels.PointStruct:
path = data.get("path") uid = data.get("path")
if not path: if not uid:
raise ValueError("Qdrant upsert requires 'path' in data") raise ValueError("Qdrant upsert requires 'path' in data")
embedding = data.get("embedding") embedding = data.get("embedding")
@@ -89,8 +119,11 @@ class QdrantProvider(BaseVectorProvider):
else: else:
vector = [float(x) for x in embedding] vector = [float(x) for x in embedding]
payload = {"path": path} payload = {k: v for k, v in data.items() if k != "embedding"}
return qmodels.PointStruct(id=self._point_id(path), vector=vector, payload=payload) payload.setdefault("vector_id", uid)
source_path = payload.get("source_path") or payload.get("path")
payload["path"] = source_path
return qmodels.PointStruct(id=self._point_id(str(uid)), vector=vector, payload=payload)
def upsert_vector(self, collection_name: str, data: Dict[str, Any]) -> None: def upsert_vector(self, collection_name: str, data: Dict[str, Any]) -> None:
client = self._get_client() client = self._get_client()
@@ -99,7 +132,12 @@ class QdrantProvider(BaseVectorProvider):
def delete_vector(self, collection_name: str, path: str) -> None: def delete_vector(self, collection_name: str, path: str) -> None:
client = self._get_client() client = self._get_client()
selector = qmodels.PointIdsList(points=[self._point_id(path)]) condition = qmodels.FieldCondition(
key="path",
match=qmodels.MatchValue(value=path),
)
flt = qmodels.Filter(must=[condition])
selector = qmodels.FilterSelector(filter=flt)
client.delete(collection_name=collection_name, points_selector=selector, wait=True) client.delete(collection_name=collection_name, points_selector=selector, wait=True)
def _format_search_results(self, points: Sequence[qmodels.ScoredPoint]): def _format_search_results(self, points: Sequence[qmodels.ScoredPoint]):
@@ -107,7 +145,7 @@ class QdrantProvider(BaseVectorProvider):
{ {
"id": point.id, "id": point.id,
"distance": point.score, "distance": point.score,
"entity": {"path": (point.payload or {}).get("path")}, "entity": point.payload or {},
} }
for point in points for point in points
] ]
@@ -141,11 +179,11 @@ class QdrantProvider(BaseVectorProvider):
break break
for record in records: for record in records:
path = (record.payload or {}).get("path") payload = record.payload or {}
if query_path and path: path = payload.get("path")
if query_path not in path: if query_path and path and query_path not in path:
continue continue
results.append({"id": record.id, "distance": 1.0, "entity": {"path": path}}) results.append({"id": record.id, "distance": 1.0, "entity": payload})
if len(results) >= top_k: if len(results) >= top_k:
break break

View File

@@ -21,9 +21,29 @@ export interface DirListing {
} }
export interface SearchResultItem { export interface SearchResultItem {
id: number; id: string;
path: string; path: string;
score: number; score: number;
chunk_id?: string;
snippet?: string;
mime?: string;
source_type?: string;
start_offset?: number;
end_offset?: number;
metadata?: Record<string, any>;
}
export interface SearchPagination {
page: number;
page_size: number;
has_more: boolean;
}
export interface SearchResponse {
items: SearchResultItem[];
query: string;
mode?: string;
pagination?: SearchPagination;
} }
export const vfsApi = { export const vfsApi = {
@@ -105,6 +125,20 @@ export const vfsApi = {
xhr.send(fd); xhr.send(fd);
}); });
}, },
searchFiles: (q: string, top_k: number = 10, mode: 'vector' | 'filename' = 'vector') => searchFiles: (
request<{ items: SearchResultItem[]; query: string }>(`/search?q=${encodeURIComponent(q)}&top_k=${top_k}&mode=${mode}`), q: string,
top_k: number = 10,
mode: 'vector' | 'filename' = 'vector',
page?: number,
page_size?: number,
) => {
const params = new URLSearchParams({
q,
top_k: String(top_k),
mode,
});
if (page !== undefined) params.set('page', String(page));
if (page_size !== undefined) params.set('page_size', String(page_size));
return request<SearchResponse>(`/search?${params.toString()}`);
},
}; };

View File

@@ -1,128 +1,313 @@
import { Modal, Input, List, Divider, Spin, Select, Space } from 'antd'; import { Modal, Input, List, Divider, Spin, Space, Tag, Typography, Empty, Flex, Segmented, Pagination } from 'antd';
import { SearchOutlined, FileTextOutlined } from '@ant-design/icons'; import { SearchOutlined, FileTextOutlined } from '@ant-design/icons';
import React, { useState } from 'react'; import React, { useRef, useState } from 'react';
import { vfsApi, type SearchResultItem } from '../api/vfs'; import { vfsApi, type SearchResultItem } from '../api/vfs';
import { useI18n } from '../i18n'; import { useI18n } from '../i18n';
import { useNavigate } from 'react-router'; import { useNavigate } from 'react-router';
interface SearchDialogProps { interface SearchDialogProps {
open: boolean; open: boolean;
onClose: () => void; onClose: () => void;
} }
const SEARCH_MODES = (t: (k: string)=>string) => [ type SearchMode = 'vector' | 'filename';
{ label: t('Smart Search'), value: 'vector' }, const PAGE_SIZE = 10;
{ label: t('Name Search'), value: 'filename' },
];
const SearchDialog: React.FC<SearchDialogProps> = ({ open, onClose }) => { const SearchDialog: React.FC<SearchDialogProps> = ({ open, onClose }) => {
const [search, setSearch] = useState(''); const [search, setSearch] = useState('');
const [loading, setLoading] = useState(false); const [loading, setLoading] = useState(false);
const [results, setResults] = useState<SearchResultItem[]>([]); const [results, setResults] = useState<SearchResultItem[]>([]);
const [searched, setSearched] = useState(false); const [searched, setSearched] = useState(false);
const [searchMode, setSearchMode] = useState<'vector' | 'filename'>('vector'); const [searchMode, setSearchMode] = useState<SearchMode>('vector');
const [page, setPage] = useState(1);
const [hasMore, setHasMore] = useState(false);
const requestIdRef = useRef(0);
const { t } = useI18n(); const { t } = useI18n();
const navigate = useNavigate(); const navigate = useNavigate();
const handleSearch = async () => { const renderSourceLabel = (value?: string) => {
if (!search.trim()) return; switch ((value || '').toLowerCase()) {
case 'vector':
return t('Vector Search');
case 'filename':
return t('Name Search');
case 'text':
return t('Text Chunk');
case 'image':
return t('Image Description');
default:
return t('Vector Search');
}
};
const sourceColor = (value?: string) => {
switch ((value || '').toLowerCase()) {
case 'vector':
return 'blue';
case 'filename':
return 'green';
case 'image':
return 'volcano';
case 'text':
return 'geekblue';
default:
return 'purple';
}
};
const performSearch = async (options?: { page?: number; mode?: SearchMode }) => {
const query = search.trim();
if (!query) {
setSearched(false);
setResults([]);
setHasMore(false);
return;
}
const currentMode = options?.mode ?? searchMode;
const targetPage = currentMode === 'filename' ? (options?.page ?? (currentMode === searchMode ? page : 1)) : 1;
const requestId = requestIdRef.current + 1;
requestIdRef.current = requestId;
setLoading(true); setLoading(true);
setSearched(true); setSearched(true);
try { if (currentMode === 'filename') {
const res = await vfsApi.searchFiles(search, 10, searchMode); setPage(targetPage);
setResults(res.items); } else {
} catch (e) { setPage(1);
setResults([]); setHasMore(false);
}
try {
const res = await vfsApi.searchFiles(
query,
currentMode === 'filename' ? PAGE_SIZE : 10,
currentMode,
currentMode === 'filename' ? targetPage : undefined,
currentMode === 'filename' ? PAGE_SIZE : undefined,
);
if (requestId !== requestIdRef.current) {
return;
}
setResults(res.items);
if (currentMode === 'filename') {
const pagination = res.pagination;
setHasMore(Boolean(pagination?.has_more));
if (pagination?.page) {
setPage(pagination.page);
}
} else {
setHasMore(false);
}
} catch (e) {
if (requestId !== requestIdRef.current) {
return;
}
setResults([]);
if (currentMode === 'filename') {
setHasMore(false);
}
} finally {
if (requestId === requestIdRef.current) {
setLoading(false);
}
} }
setLoading(false);
}; };
const handleSearch = () => {
if (!search.trim()) {
setResults([]);
setSearched(false);
setHasMore(false);
setPage(1);
return;
}
void performSearch({ page: searchMode === 'filename' ? 1 : undefined });
};
const handleModeChange = (value: string | number) => {
const nextMode = value as SearchMode;
setHasMore(false);
setPage(1);
setSearchMode(nextMode);
if (search.trim()) {
void performSearch({ mode: nextMode, page: nextMode === 'filename' ? 1 : undefined });
} else {
setResults([]);
setSearched(false);
}
};
const handleClose = () => {
setSearch('');
setResults([]);
setSearched(false);
setSearchMode('vector');
setPage(1);
setHasMore(false);
requestIdRef.current = 0;
setLoading(false);
onClose();
};
const totalItems = searchMode === 'filename'
? (hasMore ? page * PAGE_SIZE + 1 : (page - 1) * PAGE_SIZE + results.length)
: results.length;
return ( return (
<Modal <Modal
open={open} open={open}
onCancel={onClose} onCancel={handleClose}
footer={null} footer={null}
width={600} width={720}
centered centered
title={null} title={null}
closable={false} closable={false}
styles={{
body: {
padding: '12px 16px 16px',
maxHeight: '70vh',
overflow: 'hidden',
display: 'flex',
flexDirection: 'column',
gap: 12,
},
}}
> >
<Space.Compact style={{ marginBottom: 0, width: '100%' }}> <Flex vertical style={{ gap: 12, flex: 1, minHeight: 0 }}>
<Select <Flex align="center" style={{ width: '100%', gap: 12, flexWrap: 'wrap' }}>
options={SEARCH_MODES(t)} <Segmented
value={searchMode} options={[
onChange={v => setSearchMode(v as 'vector' | 'filename')} { label: t('Smart Search'), value: 'vector' },
style={{ { label: t('Name Search'), value: 'filename' },
width: 120, ]}
fontSize: 18, value={searchMode}
height: 40, onChange={handleModeChange}
lineHeight: '40px', style={{
borderTopRightRadius: 0, minWidth: 160,
borderBottomRightRadius: 0, height: 40,
borderRight: 0, borderRadius: 20,
verticalAlign: 'top', display: 'flex',
}} alignItems: 'center',
styles={{ popup: { root: { fontSize: 18 } } }} }}
popupMatchSelectWidth={false} size="large"
/> />
<Input <Input
allowClear allowClear
prefix={<SearchOutlined />} prefix={<SearchOutlined />}
placeholder={t('Search files / tags / types')} placeholder={t('Search files / tags / types')}
value={search} value={search}
onChange={e => setSearch(e.target.value)} onChange={e => {
style={{ const value = e.target.value;
fontSize: 18, setSearch(value);
height: 40, if (!value.trim()) {
width: 'calc(100% - 120px)', setResults([]);
borderTopLeftRadius: 0, setSearched(false);
borderBottomLeftRadius: 0, setHasMore(false);
verticalAlign: 'top', setPage(1);
}} requestIdRef.current += 1;
autoFocus setLoading(false);
onPressEnter={handleSearch} }
/> }}
</Space.Compact> style={{ fontSize: 18, height: 40, flex: 1, minWidth: 240 }}
{searched && ( styles={{
<> input: {
<Divider style={{ margin: '12px 0' }}>{t('Search Results')}</Divider> borderRadius: 20,
{loading ? ( },
<Spin /> }}
) : ( autoFocus
<List onPressEnter={handleSearch}
itemLayout="horizontal" />
dataSource={results} </Flex>
locale={{ emptyText: t('No files found') }}
renderItem={item => { {!searched ? null : (
const fullPath = item.path || ''; <Flex vertical style={{ flex: 1, minHeight: 0 }}>
const trimmed = fullPath.replace(/\/+$/, ''); <Divider style={{ margin: 0, padding: '0 0 12px' }}>{t('Search Results')}</Divider>
const parts = trimmed.split('/'); {loading ? (
const filename = parts.pop() || ''; <Flex align="center" justify="center" style={{ flex: 1 }}>
const dir = parts.length ? '/' + parts.join('/') : '/'; <Spin />
return ( </Flex>
<List.Item> ) : results.length === 0 ? (
<List.Item.Meta <Flex align="center" justify="center" style={{ flex: 1 }}>
avatar={<FileTextOutlined />} <Empty description={t('No files found')} image={Empty.PRESENTED_IMAGE_SIMPLE} />
title={ </Flex>
<a ) : (
onClick={() => { <div style={{ flex: 1, minHeight: 0, display: 'flex', flexDirection: 'column' }}>
navigate(`/files${dir === '/' ? '' : dir}`, { state: { highlight: { name: filename } } }); <div style={{ flex: 1, minHeight: 0, overflowY: 'auto', paddingRight: 6 }}>
onClose(); <List
}} itemLayout="horizontal"
> dataSource={results}
{fullPath} split={false}
</a> renderItem={item => {
} const fullPath = item.path || '';
description={`${t('Relevance')}: ${item.score.toFixed(2)}`} const trimmed = fullPath.replace(/\/+$/, '');
/> const parts = trimmed.split('/');
</List.Item> const filename = parts.pop() || '';
); const dir = parts.length ? '/' + parts.join('/') : '/';
}} const snippet = item.snippet || '';
/> const retrieval = item.metadata?.retrieval_source || item.source_type;
)} const retrievalLabel = renderSourceLabel(retrieval);
</> const scoreText = Number.isFinite(item.score) ? item.score.toFixed(2) : '-';
)}
return (
<List.Item style={{ padding: '10px 12px', borderRadius: 6, background: '#fafafa', marginBottom: 8 }}>
<List.Item.Meta
avatar={<FileTextOutlined style={{ fontSize: 18, color: '#8c8c8c' }} />}
title={
<a
onClick={() => {
navigate(`/files${dir === '/' ? '' : dir}`, { state: { highlight: { name: filename } } });
handleClose();
}}
style={{ fontSize: 16 }}
>
{fullPath}
</a>
}
description={(
<Space direction="vertical" size={6} style={{ width: '100%' }}>
{snippet ? (
<Typography.Paragraph ellipsis={{ rows: 3 }} style={{ marginBottom: 0 }}>
{snippet}
</Typography.Paragraph>
) : null}
<Space size={10} wrap>
{retrieval ? (
<Tag color={sourceColor(retrieval)} style={{ marginRight: 0 }}>
{retrievalLabel}
</Tag>
) : null}
<Typography.Text type="secondary">
{t('Relevance')}: {scoreText}
</Typography.Text>
</Space>
</Space>
)}
/>
</List.Item>
);
}}
/>
</div>
{searchMode === 'filename' && results.length > 0 ? (
<Pagination
current={page}
pageSize={PAGE_SIZE}
total={Math.max(totalItems, 1)}
showSizeChanger={false}
size="small"
style={{ marginTop: 12, textAlign: 'right' }}
onChange={(nextPage) => {
void performSearch({ page: nextPage });
}}
/>
) : null}
</div>
)}
</Flex>
)}
</Flex>
</Modal> </Modal>
); );
}; };

View File

@@ -8,14 +8,23 @@ import { useTheme } from '../../contexts/ThemeContext';
import '../../styles/settings-tabs.css'; import '../../styles/settings-tabs.css';
import { useI18n } from '../../i18n'; import { useI18n } from '../../i18n';
const APP_CONFIG_KEYS: {key: string, label: string, default?: string}[] = [ const APP_CONFIG_KEYS: { key: string, label: string, default?: string }[] = [
{ key: 'APP_NAME', label: 'App Name' }, { key: 'APP_NAME', label: 'App Name' },
{ key: 'APP_LOGO', label: 'Logo URL' }, { key: 'APP_LOGO', label: 'Logo URL' },
{ key: 'APP_DOMAIN', label: 'App Domain' }, { key: 'APP_DOMAIN', label: 'App Domain' },
{ key: 'FILE_DOMAIN', label: 'File Domain' }, { key: 'FILE_DOMAIN', label: 'File Domain' },
]; ];
const VISION_CONFIG_KEYS = [ interface AiConfigKeyBase {
key: string;
default?: string | number;
}
interface AiConfigKeyWithLabel extends AiConfigKeyBase {
label: string;
}
const VISION_CONFIG_KEYS: AiConfigKeyWithLabel[] = [
{ key: 'AI_VISION_API_URL', label: 'Vision API URL' }, { key: 'AI_VISION_API_URL', label: 'Vision API URL' },
{ key: 'AI_VISION_MODEL', label: 'Vision Model', default: 'Qwen/Qwen2.5-VL-32B-Instruct' }, { key: 'AI_VISION_MODEL', label: 'Vision Model', default: 'Qwen/Qwen2.5-VL-32B-Instruct' },
{ key: 'AI_VISION_API_KEY', label: 'Vision API Key' }, { key: 'AI_VISION_API_KEY', label: 'Vision API Key' },
@@ -24,13 +33,24 @@ const VISION_CONFIG_KEYS = [
const DEFAULT_EMBED_DIMENSION = 4096; const DEFAULT_EMBED_DIMENSION = 4096;
const EMBED_DIM_KEY = 'AI_EMBED_DIM'; const EMBED_DIM_KEY = 'AI_EMBED_DIM';
const EMBED_CONFIG_KEYS = [ const EMBED_CONFIG_KEYS: AiConfigKeyWithLabel[] = [
{ key: 'AI_EMBED_API_URL', label: 'Embedding API URL' }, { key: 'AI_EMBED_API_URL', label: 'Embedding API URL' },
{ key: 'AI_EMBED_MODEL', label: 'Embedding Model', default: 'Qwen/Qwen3-Embedding-8B' }, { key: 'AI_EMBED_MODEL', label: 'Embedding Model', default: 'Qwen/Qwen3-Embedding-8B' },
{ key: 'AI_EMBED_API_KEY', label: 'Embedding API Key' }, { key: 'AI_EMBED_API_KEY', label: 'Embedding API Key' },
]; ];
const ALL_AI_KEYS = [...VISION_CONFIG_KEYS, ...EMBED_CONFIG_KEYS, { key: EMBED_DIM_KEY, default: DEFAULT_EMBED_DIMENSION }]; const RERANK_CONFIG_KEYS: AiConfigKeyWithLabel[] = [
{ key: 'AI_RERANK_API_URL', label: 'Rerank API URL' },
{ key: 'AI_RERANK_MODEL', label: 'Rerank Model' },
{ key: 'AI_RERANK_API_KEY', label: 'Rerank API Key' },
];
const ALL_AI_KEYS: AiConfigKeyBase[] = [
...VISION_CONFIG_KEYS,
...EMBED_CONFIG_KEYS,
...RERANK_CONFIG_KEYS,
{ key: EMBED_DIM_KEY, default: DEFAULT_EMBED_DIMENSION },
];
const formatBytes = (bytes?: number | null) => { const formatBytes = (bytes?: number | null) => {
if (bytes === null || bytes === undefined) return '-'; if (bytes === null || bytes === undefined) return '-';
@@ -194,6 +214,8 @@ export default function SystemSettingsPage() {
} }
}, [buildProviderConfigValues, message, t, vectorConfigForm, vectorProviders]); }, [buildProviderConfigValues, message, t, vectorConfigForm, vectorProviders]);
const vectorSectionLoading = vectorStatsLoading || vectorConfigLoading;
// 离开“外观设置”时,恢复后端持久化配置(取消未保存的预览) // 离开“外观设置”时,恢复后端持久化配置(取消未保存的预览)
useEffect(() => { useEffect(() => {
if (activeTab !== 'appearance') { if (activeTab !== 'appearance') {
@@ -303,7 +325,7 @@ export default function SystemSettingsPage() {
</Form.Item> </Form.Item>
</Card> </Card>
<Card title={t('Advanced')} style={{ marginTop: 24 }}> <Card title={t('Advanced')} style={{ marginTop: 24 }}>
<Form.Item name={THEME_KEYS.TOKENS} label={t('Override AntD Tokens (JSON)')} tooltip={t('e.g. {"colorText": "#222"}') }> <Form.Item name={THEME_KEYS.TOKENS} label={t('Override AntD Tokens (JSON)')} tooltip={t('e.g. {"colorText": "#222"}')}>
<Input.TextArea autoSize={{ minRows: 4 }} placeholder='{ "colorText": "#222" }' /> <Input.TextArea autoSize={{ minRows: 4 }} placeholder='{ "colorText": "#222" }' />
</Form.Item> </Form.Item>
<Form.Item name={THEME_KEYS.CSS} label={t('Custom CSS')}> <Form.Item name={THEME_KEYS.CSS} label={t('Custom CSS')}>
@@ -402,6 +424,13 @@ export default function SystemSettingsPage() {
<InputNumber min={1} max={32768} style={{ width: '100%' }} /> <InputNumber min={1} max={32768} style={{ width: '100%' }} />
</Form.Item> </Form.Item>
</Card> </Card>
<Card title={t('Rerank Model')} style={{ marginTop: 24 }}>
{RERANK_CONFIG_KEYS.map(({ key, label }) => (
<Form.Item key={key} name={key} label={t(label)}>
<Input size="large" />
</Form.Item>
))}
</Card>
<Form.Item style={{ marginTop: 24 }}> <Form.Item style={{ marginTop: 24 }}>
<Button type="primary" htmlType="submit" loading={loading} block> <Button type="primary" htmlType="submit" loading={loading} block>
{t('Save')} {t('Save')}
@@ -428,178 +457,180 @@ export default function SystemSettingsPage() {
{t('Refresh')} {t('Refresh')}
</Button> </Button>
</div> </div>
{vectorMetaError ? ( {vectorSectionLoading ? (
<Alert type="error" showIcon message={vectorMetaError} /> <div style={{ display: 'flex', justifyContent: 'center', padding: '24px 0' }}>
) : null} <Spin />
{vectorStatsLoading && !vectorStats ? ( </div>
<Spin /> ) : (
) : vectorStats ? ( <>
<Space direction="vertical" size={16} style={{ width: '100%' }}> {vectorMetaError ? (
<div style={{ display: 'flex', flexWrap: 'wrap', gap: 24 }}> <Alert type="error" showIcon message={vectorMetaError} />
<div> ) : null}
<div style={{ color: '#888' }}>{t('Collections')}</div> {vectorStats ? (
<div style={{ fontSize: 20, fontWeight: 600 }}>{vectorStats.collection_count}</div> <Space direction="vertical" size={16} style={{ width: '100%' }}>
</div> <div style={{ display: 'flex', flexWrap: 'wrap', gap: 24 }}>
<div> <div>
<div style={{ color: '#888' }}>{t('Vectors')}</div> <div style={{ color: '#888' }}>{t('Collections')}</div>
<div style={{ fontSize: 20, fontWeight: 600 }}>{vectorStats.total_vectors}</div> <div style={{ fontSize: 20, fontWeight: 600 }}>{vectorStats.collection_count}</div>
</div>
<div>
<div style={{ color: '#888' }}>{t('Database Size')}</div>
<div style={{ fontSize: 20, fontWeight: 600 }}>{formatBytes(vectorStats.db_file_size_bytes)}</div>
</div>
<div>
<div style={{ color: '#888' }}>{t('Estimated Memory')}</div>
<div style={{ fontSize: 20, fontWeight: 600 }}>{formatBytes(vectorStats.estimated_total_memory_bytes)}</div>
</div>
</div>
{vectorStats.collections.length ? (
<Space direction="vertical" style={{ width: '100%' }} size={16}>
{vectorStats.collections.map((collection) => (
<div key={collection.name} style={{ border: '1px solid #f0f0f0', borderRadius: 8, padding: 16 }}>
<Space direction="vertical" size={12} style={{ width: '100%' }}>
<div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', flexWrap: 'wrap', gap: 12 }}>
<strong>{collection.name}</strong>
<span style={{ color: '#888' }}>
{collection.is_vector_collection && collection.dimension
? `${t('Dimension')}: ${collection.dimension}`
: t('Non-vector collection')}
</span>
</div>
<div>{t('Vectors')}: {collection.row_count}</div>
{collection.is_vector_collection ? (
<div>{t('Estimated memory')}: {formatBytes(collection.estimated_memory_bytes)}</div>
) : null}
{collection.indexes.length ? (
<Space direction="vertical" size={4} style={{ width: '100%' }}>
<span>{t('Indexes')}:</span>
<ul style={{ paddingLeft: 20, margin: 0 }}>
{collection.indexes.map((index) => (
<li key={`${collection.name}-${index.index_name || 'default'}`}>
<span>{index.index_name || t('Unnamed index')}</span>
<span>{' · '}{index.index_type || '-'}</span>
<span>{' · '}{index.metric_type || '-'}</span>
<span>{' · '}{t('Indexed rows')}: {index.indexed_rows}</span>
<span>{' · '}{t('Pending rows')}: {index.pending_index_rows}</span>
<span>{' · '}{t('Status')}: {index.state || '-'}</span>
</li>
))}
</ul>
</Space>
) : null}
</Space>
</div> </div>
))} <div>
<div style={{ color: '#888' }}>{t('Vectors')}</div>
<div style={{ fontSize: 20, fontWeight: 600 }}>{vectorStats.total_vectors}</div>
</div>
<div>
<div style={{ color: '#888' }}>{t('Database Size')}</div>
<div style={{ fontSize: 20, fontWeight: 600 }}>{formatBytes(vectorStats.db_file_size_bytes)}</div>
</div>
<div>
<div style={{ color: '#888' }}>{t('Estimated Memory')}</div>
<div style={{ fontSize: 20, fontWeight: 600 }}>{formatBytes(vectorStats.estimated_total_memory_bytes)}</div>
</div>
</div>
{vectorStats.collections.length ? (
<Space direction="vertical" style={{ width: '100%' }} size={16}>
{vectorStats.collections.map((collection) => (
<div key={collection.name} style={{ border: '1px solid #f0f0f0', borderRadius: 8, padding: 16 }}>
<Space direction="vertical" size={12} style={{ width: '100%' }}>
<div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', flexWrap: 'wrap', gap: 12 }}>
<strong>{collection.name}</strong>
<span style={{ color: '#888' }}>
{collection.is_vector_collection && collection.dimension
? `${t('Dimension')}: ${collection.dimension}`
: t('Non-vector collection')}
</span>
</div>
<div>{t('Vectors')}: {collection.row_count}</div>
{collection.is_vector_collection ? (
<div>{t('Estimated memory')}: {formatBytes(collection.estimated_memory_bytes)}</div>
) : null}
{collection.indexes.length ? (
<Space direction="vertical" size={4} style={{ width: '100%' }}>
<span>{t('Indexes')}:</span>
<ul style={{ paddingLeft: 20, margin: 0 }}>
{collection.indexes.map((index) => (
<li key={`${collection.name}-${index.index_name || 'default'}`}>
<span>{index.index_name || t('Unnamed index')}</span>
<span>{' · '}{index.index_type || '-'}</span>
<span>{' · '}{index.metric_type || '-'}</span>
<span>{' · '}{t('Indexed rows')}: {index.indexed_rows}</span>
<span>{' · '}{t('Pending rows')}: {index.pending_index_rows}</span>
<span>{' · '}{t('Status')}: {index.state || '-'}</span>
</li>
))}
</ul>
</Space>
) : null}
</Space>
</div>
))}
</Space>
) : (
<Empty description={t('No collections')} />
)}
<div style={{ color: '#888' }}>
{t('Estimated memory is calculated as vectors x dimension x 4 bytes (float32).')}
</div>
</Space> </Space>
) : vectorStatsError ? (
<div style={{ color: '#ff4d4f' }}>{vectorStatsError}</div>
) : ( ) : (
<Empty description={t('No collections')} /> <Empty description={t('No collections')} />
)} )}
<div style={{ color: '#888' }}> <Form
{t('Estimated memory is calculated as vectors x dimension x 4 bytes (float32).')} layout="vertical"
</div> form={vectorConfigForm}
</Space> onFinish={handleVectorConfigSave}
) : vectorStatsError ? ( initialValues={{ type: selectedProviderType || undefined, config: {} }}
<div style={{ color: '#ff4d4f' }}>{vectorStatsError}</div> >
) : ( <Form.Item
<Empty description={t('No collections')} /> name="type"
label={t('Database Provider')}
rules={[{ required: true, message: t('Please select a provider') }]}
>
<Select
size="large"
options={vectorProviders.map((provider) => ({
value: provider.type,
label: provider.enabled ? provider.label : `${provider.label} (${t('Coming soon')})`,
disabled: !provider.enabled,
}))}
onChange={handleProviderChange}
loading={vectorConfigLoading && !vectorProviders.length}
/>
</Form.Item>
{selectedProvider?.description ? (
<Alert
type="info"
showIcon
message={t(selectedProvider.description)}
style={{ marginBottom: 16 }}
/>
) : null}
{selectedProvider?.config_schema?.map((field) => (
<Form.Item
key={field.key}
name={['config', field.key]}
label={t(field.label)}
rules={field.required ? [{ required: true, message: t('Please input {label}', { label: t(field.label) }) }] : []}
>
{field.type === 'password' ? (
<Input.Password size="large" placeholder={field.placeholder ? t(field.placeholder) : undefined} />
) : (
<Input size="large" placeholder={field.placeholder ? t(field.placeholder) : undefined} />
)}
</Form.Item>
))}
{selectedProvider && !selectedProvider.enabled ? (
<Alert
type="warning"
showIcon
message={t('This provider is not available yet')}
style={{ marginBottom: 16 }}
/>
) : null}
<Form.Item>
<Space direction="vertical" style={{ width: '100%' }}>
<Button
type="primary"
htmlType="submit"
loading={vectorConfigSaving}
block
disabled={!selectedProvider?.enabled}
>
{t('Save')}
</Button>
<Button
danger
htmlType="button"
block
onClick={() => {
Modal.confirm({
title: t('Confirm clear vector database?'),
content: t('This will delete all collections irreversibly.'),
okText: t('Confirm Clear'),
okType: 'danger',
cancelText: t('Cancel'),
onOk: async () => {
try {
await vectorDBApi.clearAll();
message.success(t('Vector database cleared'));
await fetchVectorStats();
await fetchVectorMeta();
} catch (e: any) {
message.error(e.message || t('Clear failed'));
}
},
});
}}
>
{t('Clear Vector DB')}
</Button>
</Space>
</Form.Item>
</Form>
</>
)} )}
</Space> </Space>
{vectorConfigLoading && !vectorProviders.length ? (
<Spin />
) : (
<Form
layout="vertical"
form={vectorConfigForm}
onFinish={handleVectorConfigSave}
initialValues={{ type: selectedProviderType || undefined, config: {} }}
>
<Form.Item
name="type"
label={t('Database Provider')}
rules={[{ required: true, message: t('Please select a provider') }]}
>
<Select
size="large"
options={vectorProviders.map((provider) => ({
value: provider.type,
label: provider.enabled ? provider.label : `${provider.label} (${t('Coming soon')})`,
disabled: !provider.enabled,
}))}
onChange={handleProviderChange}
loading={vectorConfigLoading && !vectorProviders.length}
/>
</Form.Item>
{selectedProvider?.description ? (
<Alert
type="info"
showIcon
message={t(selectedProvider.description)}
style={{ marginBottom: 16 }}
/>
) : null}
{selectedProvider?.config_schema?.map((field) => (
<Form.Item
key={field.key}
name={['config', field.key]}
label={t(field.label)}
rules={field.required ? [{ required: true, message: t('Please input {label}', { label: t(field.label) }) }] : []}
>
{field.type === 'password' ? (
<Input.Password size="large" placeholder={field.placeholder ? t(field.placeholder) : undefined} />
) : (
<Input size="large" placeholder={field.placeholder ? t(field.placeholder) : undefined} />
)}
</Form.Item>
))}
{selectedProvider && !selectedProvider.enabled ? (
<Alert
type="warning"
showIcon
message={t('This provider is not available yet')}
style={{ marginBottom: 16 }}
/>
) : null}
<Form.Item>
<Space direction="vertical" style={{ width: '100%' }}>
<Button
type="primary"
htmlType="submit"
loading={vectorConfigSaving}
block
disabled={!selectedProvider?.enabled}
>
{t('Save')}
</Button>
<Button
danger
htmlType="button"
block
onClick={() => {
Modal.confirm({
title: t('Confirm clear vector database?'),
content: t('This will delete all collections irreversibly.'),
okText: t('Confirm Clear'),
okType: 'danger',
cancelText: t('Cancel'),
onOk: async () => {
try {
await vectorDBApi.clearAll();
message.success(t('Vector database cleared'));
await fetchVectorStats();
await fetchVectorMeta();
} catch (e: any) {
message.error(e.message || t('Clear failed'));
}
},
});
}}
>
{t('Clear Vector DB')}
</Button>
</Space>
</Form.Item>
</Form>
)}
</Space> </Space>
</Card> </Card>
), ),