feat: Enhance vector database providers with source path handling and improved search functionality

This commit is contained in:
shiyu
2025-09-27 13:34:18 +08:00
parent ee6e570ccb
commit a4af9475ef
10 changed files with 1082 additions and 353 deletions

View File

@@ -1,4 +1,7 @@
from typing import Any, Dict, List, Tuple
from fastapi import APIRouter, Depends, Query
from schemas.fs import SearchResultItem
from services.auth import get_current_active_user, User
from services.ai import get_text_embedding
@@ -6,24 +9,96 @@ from services.vector_db import VectorDBService
router = APIRouter(prefix="/api/search", tags=["search"])
async def search_files_by_vector(q: str, top_k: int):
embedding = await get_text_embedding(q)
vector_db = VectorDBService()
results = await vector_db.search_vectors("vector_collection", embedding, top_k)
items = [
SearchResultItem(id=res["id"], path=res["entity"]["path"], score=res["distance"])
for res in results[0]
]
return {"items": items, "query": q}
async def search_files_by_name(q: str, top_k: int):
def _normalize_result(raw: Dict[str, Any], source: str, fallback_score: float = 0.0) -> SearchResultItem:
entity = dict(raw.get("entity") or {})
source_path = entity.get("source_path")
stored_path = entity.get("path")
path = source_path or stored_path or ""
chunk_id_value = entity.get("chunk_id")
chunk_id = str(chunk_id_value) if chunk_id_value is not None else None
snippet = entity.get("text") or entity.get("description") or entity.get("name")
mime = entity.get("mime")
start_offset = entity.get("start_offset")
end_offset = entity.get("end_offset")
raw_score = raw.get("distance")
score = float(raw_score) if raw_score is not None else fallback_score
metadata = {
"retrieval_source": source,
"raw_distance": raw_score,
}
if stored_path and stored_path != path:
metadata["stored_path"] = stored_path
vector_id = entity.get("vector_id")
if vector_id:
metadata["vector_id"] = vector_id
return SearchResultItem(
id=str(raw.get("id")),
path=path,
score=score,
chunk_id=chunk_id,
snippet=snippet,
mime=mime,
source_type=entity.get("type") or source,
start_offset=start_offset,
end_offset=end_offset,
metadata=metadata,
)
async def _vector_search(query: str, top_k: int) -> List[SearchResultItem]:
vector_db = VectorDBService()
results = await vector_db.search_by_path("vector_collection", q, top_k)
items = [
SearchResultItem(id=idx, path=res["entity"]["path"], score=res["distance"])
for idx, res in enumerate(results[0])
]
return {"items": items, "query": q}
try:
embedding = await get_text_embedding(query)
except Exception:
embedding = None
if not embedding:
return []
try:
raw_results = await vector_db.search_vectors("vector_collection", embedding, max(top_k, 10))
except Exception:
return []
results: List[SearchResultItem] = []
for bucket in raw_results or []:
for record in bucket or []:
results.append(_normalize_result(record, "vector"))
return results
async def _filename_search(query: str, page: int, page_size: int) -> Tuple[List[SearchResultItem], bool]:
vector_db = VectorDBService()
limit = max(page * page_size + 1, page_size * (page + 2))
limit = min(limit, 2000)
try:
raw_results = await vector_db.search_by_path("vector_collection", query, limit)
except Exception:
return [], False
records = raw_results[0] if raw_results else []
deduped: List[SearchResultItem] = []
seen_paths: set[str] = set()
for record in records or []:
item = _normalize_result(record, "filename", fallback_score=1.0)
stored_path = item.metadata.get("stored_path") if item.metadata else None
key = item.path or stored_path or ""
if key in seen_paths:
continue
seen_paths.add(key)
deduped.append(item)
start = max(page - 1, 0) * page_size
end = start + page_size
page_items = deduped[start:end]
for offset, item in enumerate(page_items):
if item.metadata is None:
item.metadata = {}
item.metadata.setdefault("retrieval_rank", start + offset)
has_more = len(deduped) > end
return page_items, has_more
@router.get("")
@@ -31,11 +106,32 @@ async def search_files(
q: str = Query(..., description="搜索查询"),
top_k: int = Query(10, description="返回结果数量"),
mode: str = Query("vector", description="搜索模式: 'vector''filename'"),
page: int = Query(1, description="分页页码,仅在文件名搜索模式下生效"),
page_size: int = Query(10, description="分页大小,仅在文件名搜索模式下生效"),
user: User = Depends(get_current_active_user),
):
if not q.strip():
return {"items": [], "query": q}
top_k = max(top_k, 1)
page = max(page, 1)
page_size = max(min(page_size, 100), 1)
if mode == "vector":
return await search_files_by_vector(q, top_k)
items = (await _vector_search(q, top_k))[:top_k]
elif mode == "filename":
return await search_files_by_name(q, top_k)
items, has_more = await _filename_search(q, page, page_size)
return {
"items": items,
"query": q,
"mode": mode,
"pagination": {
"page": page,
"page_size": page_size,
"has_more": has_more,
},
}
else:
return {"items": [], "query": q, "error": "Invalid search mode"}
items = (await _vector_search(q, top_k))[:top_k]
return {"items": items, "query": q, "mode": mode}

View File

@@ -21,6 +21,13 @@ class SearchResultItem(BaseModel):
id: int | str
path: str
score: float
chunk_id: Optional[str] = None
snippet: Optional[str] = None
mime: Optional[str] = None
source_type: Optional[str] = None
start_offset: Optional[int] = None
end_offset: Optional[int] = None
metadata: Optional[dict] = None
class MkdirRequest(BaseModel):

View File

@@ -68,3 +68,46 @@ async def get_text_embedding(text: str) -> List[float]:
resp.raise_for_status()
result = resp.json()
return result["data"][0]["embedding"]
async def rerank_texts(query: str, documents: List[str]) -> List[float]:
"""调用重排序模型,为一组文档返回得分。未配置时返回空列表。"""
if not documents:
return []
api_url = await ConfigCenter.get("AI_RERANK_API_URL")
model = await ConfigCenter.get("AI_RERANK_MODEL")
api_key = await ConfigCenter.get("AI_RERANK_API_KEY")
if not api_url or not model or not api_key:
return []
payload = {
"model": model,
"query": query,
"documents": documents,
}
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
}
async with httpx.AsyncClient() as client:
try:
resp = await client.post(api_url, headers=headers, json=payload)
resp.raise_for_status()
except httpx.HTTPStatusError:
return []
data = resp.json()
if isinstance(data, dict):
results = data.get("results")
if isinstance(results, list):
scores = []
for item in results:
if isinstance(item, dict) and "score" in item:
try:
scores.append(float(item["score"]))
except (TypeError, ValueError):
scores.append(0.0)
return scores
return []

View File

@@ -1,11 +1,95 @@
from typing import Dict, Any
from typing import Dict, Any, List, Tuple
from fastapi.responses import Response
import base64
import mimetypes
import os
from io import BytesIO
from services.ai import describe_image_base64, get_text_embedding
from services.vector_db import VectorDBService, DEFAULT_VECTOR_DIMENSION
from services.logging import LogService
from services.config import ConfigCenter
try: # Pillow is optional but bundled with the project dependencies
from PIL import Image
except ImportError: # pragma: no cover - fallback when pillow missing
Image = None
CHUNK_SIZE = 800
CHUNK_OVERLAP = 200
MAX_IMAGE_EDGE = 1600
JPEG_QUALITY = 85
def _chunk_text(content: str, chunk_size: int = CHUNK_SIZE, overlap: int = CHUNK_OVERLAP) -> List[Tuple[int, str, int, int]]:
"""按固定窗口拆分文本,返回(chunk_id, chunk_text, start, end)。"""
if chunk_size <= 0:
chunk_size = CHUNK_SIZE
if overlap >= chunk_size:
overlap = max(chunk_size // 4, 1)
chunks: List[Tuple[int, str, int, int]] = []
step = chunk_size - overlap
idx = 0
start = 0
length = len(content)
while start < length:
end = min(length, start + chunk_size)
chunk = content[start:end].strip()
if chunk:
chunks.append((idx, chunk, start, end))
idx += 1
if end >= length:
break
start += step
return chunks
def _guess_mime(path: str) -> str:
mime, _ = mimetypes.guess_type(path)
return mime or "application/octet-stream"
def _chunk_key(path: str, chunk_id: str) -> str:
return f"{path}#chunk={chunk_id}"
def _compress_image_for_embedding(input_bytes: bytes) -> Tuple[bytes, Dict[str, Any] | None]:
"""压缩图片,降低发送到视觉模型的体积。"""
if Image is None:
return input_bytes, None
try:
with Image.open(BytesIO(input_bytes)) as img:
img = img.convert("RGB")
width, height = img.size
longest_edge = max(width, height)
scale = 1.0
if longest_edge > MAX_IMAGE_EDGE:
scale = MAX_IMAGE_EDGE / float(longest_edge)
new_size = (max(int(width * scale), 1), max(int(height * scale), 1))
resample_mode = getattr(getattr(Image, "Resampling", Image), "LANCZOS")
img = img.resize(new_size, resample=resample_mode)
buffer = BytesIO()
img.save(buffer, format="JPEG", quality=JPEG_QUALITY, optimize=True)
compressed = buffer.getvalue()
if len(compressed) < len(input_bytes):
return compressed, {
"original_bytes": len(input_bytes),
"compressed_bytes": len(compressed),
"scaled": scale < 1.0,
"width": img.width,
"height": img.height,
}
except Exception: # pragma: no cover - 任意图像处理异常时回退
return input_bytes, None
return input_bytes, None
class VectorIndexProcessor:
name = "向量索引"
@@ -33,6 +117,7 @@ class VectorIndexProcessor:
index_type = config.get("index_type", "vector")
vector_db = VectorDBService()
collection_name = "vector_collection"
if action == "destroy":
await vector_db.delete_vector(collection_name, path)
await LogService.info(
@@ -42,9 +127,19 @@ class VectorIndexProcessor:
)
return Response(content=f"文件 {path}{index_type} 索引已销毁", media_type="text/plain")
if index_type == 'simple':
mime_type = _guess_mime(path)
if index_type == "simple":
await vector_db.ensure_collection(collection_name, vector=False)
await vector_db.upsert_vector(collection_name, {'path': path})
await vector_db.delete_vector(collection_name, path)
await vector_db.upsert_vector(collection_name, {
"path": path,
"source_path": path,
"chunk_id": "filename",
"mime": mime_type,
"type": "filename",
"name": os.path.basename(path),
})
await LogService.info(
"processor:vector_index",
f"Created simple index for {path}",
@@ -53,24 +148,7 @@ class VectorIndexProcessor:
return Response(content=f"文件 {path} 的普通索引已创建", media_type="text/plain")
file_ext = path.split('.')[-1].lower()
description = ""
embedding = None
if file_ext in ["jpg", "jpeg", "png", "bmp"]:
base64_image = base64.b64encode(input_bytes).decode("utf-8")
description = await describe_image_base64(base64_image)
embedding = await get_text_embedding(description)
log_message = f"Indexed image {path}"
response_message = f"图片已索引,描述:{description}"
elif file_ext in ["txt", "md"]:
text = input_bytes.decode("utf-8")
embedding = await get_text_embedding(text)
description = text[:100] + "..." if len(text) > 100 else text
log_message = f"Indexed text file {path}"
response_message = f"文本文件已索引"
if embedding is None:
return Response(content="不支持的文件类型", status_code=400)
details: Dict[str, Any] = {"path": path, "action": "create", "index_type": "vector"}
raw_dim = await ConfigCenter.get('AI_EMBED_DIM', DEFAULT_VECTOR_DIMENSION)
try:
@@ -81,15 +159,103 @@ class VectorIndexProcessor:
vector_dim = DEFAULT_VECTOR_DIMENSION
await vector_db.ensure_collection(collection_name, vector=True, dim=vector_dim)
await vector_db.upsert_vector(
collection_name, {'path': path, 'embedding': embedding})
await vector_db.delete_vector(collection_name, path)
if file_ext in ["jpg", "jpeg", "png", "bmp"]:
processed_bytes, compression = _compress_image_for_embedding(input_bytes)
base64_image = base64.b64encode(processed_bytes).decode("utf-8")
description = await describe_image_base64(base64_image)
embedding = await get_text_embedding(description)
image_mime = "image/jpeg" if compression else mime_type
await vector_db.upsert_vector(collection_name, {
"path": _chunk_key(path, "image"),
"source_path": path,
"chunk_id": "image",
"embedding": embedding,
"text": description,
"mime": image_mime,
"type": "image",
})
details["description"] = description
if compression:
details["image_compression"] = compression
await LogService.info(
"processor:vector_index",
f"Indexed image {path}",
details=details,
)
return Response(content=f"图片已索引,描述:{description}", media_type="text/plain")
if file_ext in ["txt", "md"]:
try:
text = input_bytes.decode("utf-8")
except UnicodeDecodeError:
return Response(content="文本文件解码失败", status_code=400)
chunks = _chunk_text(text)
if not chunks:
await vector_db.upsert_vector(collection_name, {
"path": _chunk_key(path, "0"),
"source_path": path,
"chunk_id": "0",
"embedding": await get_text_embedding(text or path),
"text": text,
"mime": mime_type,
"type": "text",
"start_offset": 0,
"end_offset": len(text),
})
details["chunks"] = 1
await LogService.info(
"processor:vector_index",
f"Indexed text file {path}",
details=details,
)
return Response(content="文本文件已索引", media_type="text/plain")
chunk_count = 0
for chunk_id, chunk_text, start, end in chunks:
embedding = await get_text_embedding(chunk_text)
await vector_db.upsert_vector(collection_name, {
"path": _chunk_key(path, str(chunk_id)),
"source_path": path,
"chunk_id": str(chunk_id),
"embedding": embedding,
"text": chunk_text,
"mime": mime_type,
"type": "text",
"start_offset": start,
"end_offset": end,
})
chunk_count += 1
details["chunks"] = chunk_count
sample = chunks[0][1]
details["sample"] = sample[:120]
await LogService.info(
"processor:vector_index",
f"Indexed text file {path}",
details=details,
)
return Response(content="文本文件已索引", media_type="text/plain")
# 其他类型暂未支持向量索引,回退为文件名索引
await vector_db.delete_vector(collection_name, path)
await vector_db.upsert_vector(collection_name, {
"path": _chunk_key(path, "fallback"),
"source_path": path,
"chunk_id": "filename",
"mime": mime_type,
"type": "filename",
"name": os.path.basename(path),
"embedding": [0.0] * vector_dim,
})
await LogService.info(
"processor:vector_index",
log_message,
details={"path": path, "description": description, "action": "create", "index_type": "vector"},
f"File type fallback to simple index for {path}",
details={"path": path, "action": "create", "index_type": "simple", "original_type": file_ext},
)
return Response(content=response_message, media_type="text/plain")
return Response(content="暂不支持该类型的向量索引,已创建文件名索引", media_type="text/plain")
PROCESSOR_TYPE = "vector_index"

View File

@@ -50,15 +50,20 @@ class MilvusLiteProvider(BaseVectorProvider):
client = self._get_client()
if client.has_collection(collection_name):
return
common_fields = [
FieldSchema(name="path", dtype=DataType.VARCHAR, max_length=512, is_primary=True, auto_id=False),
FieldSchema(name="source_path", dtype=DataType.VARCHAR, max_length=512, is_primary=False, auto_id=False),
]
if vector:
vector_dim = dim if isinstance(dim, int) and dim > 0 else 0
if vector_dim <= 0:
vector_dim = 4096
fields = [
FieldSchema(name="path", dtype=DataType.VARCHAR, max_length=512, is_primary=True, auto_id=False),
*common_fields,
FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=vector_dim),
]
schema = CollectionSchema(fields, description="Image vector collection")
schema = CollectionSchema(fields, description="Vector collection", enable_dynamic_field=True)
client.create_collection(collection_name, schema=schema)
index_params = MilvusClient.prepare_index_params()
index_params.add_index(
@@ -70,38 +75,98 @@ class MilvusLiteProvider(BaseVectorProvider):
)
client.create_index(collection_name, index_params=index_params)
else:
fields = [
FieldSchema(name="path", dtype=DataType.VARCHAR, max_length=512, is_primary=True, auto_id=False),
]
schema = CollectionSchema(fields, description="Simple file index")
schema = CollectionSchema(common_fields, description="Simple file index", enable_dynamic_field=True)
client.create_collection(collection_name, schema=schema)
def upsert_vector(self, collection_name: str, data: Dict[str, Any]) -> None:
self._get_client().upsert(collection_name, data)
payload = dict(data)
payload.setdefault("source_path", payload.get("path"))
payload.setdefault("vector_id", payload.get("path"))
self._get_client().upsert(collection_name, data=[payload])
def delete_vector(self, collection_name: str, path: str) -> None:
self._get_client().delete(collection_name, ids=[path])
client = self._get_client()
escaped = path.replace('"', '\\"')
client.delete(collection_name, filter=f'source_path == "{escaped}"')
def search_vectors(self, collection_name: str, query_embedding, top_k: int):
search_params = {"metric_type": "COSINE"}
return self._get_client().search(
output_fields = [
"path",
"source_path",
"chunk_id",
"mime",
"text",
"start_offset",
"end_offset",
"type",
"name",
]
raw_results = self._get_client().search(
collection_name,
data=[query_embedding],
anns_field="embedding",
search_params=search_params,
limit=top_k,
output_fields=["path"],
output_fields=output_fields,
)
formatted: List[List[Dict[str, Any]]] = []
for hits in raw_results:
bucket: List[Dict[str, Any]] = []
for hit in hits:
if hasattr(hit, "entity"):
entity = dict(getattr(hit, "entity", {}) or {})
hit_id = getattr(hit, "id", None)
distance = getattr(hit, "distance", None)
elif isinstance(hit, dict):
entity = dict((hit.get("entity") or {}))
hit_id = hit.get("id")
distance = hit.get("distance")
else:
entity = {}
hit_id = None
distance = None
entity.setdefault("path", entity.get("source_path"))
bucket.append({
"id": hit_id,
"distance": distance,
"entity": entity,
})
formatted.append(bucket)
return formatted
def search_by_path(self, collection_name: str, query_path: str, top_k: int):
filter_expr = f"path like '%{query_path}%'" if query_path else "path like '%%'"
if query_path:
escaped = query_path.replace('"', '\\"')
filter_expr = f'source_path like "%{escaped}%"'
else:
filter_expr = "source_path like '%%'"
results = self._get_client().query(
collection_name,
filter=filter_expr,
limit=top_k,
output_fields=["path"],
output_fields=[
"path",
"source_path",
"chunk_id",
"mime",
"text",
"start_offset",
"end_offset",
"type",
"name",
],
)
return [[{"id": r["path"], "distance": 1.0, "entity": {"path": r["path"]}} for r in results]]
formatted = []
for row in results:
entity = dict(row)
entity.setdefault("path", entity.get("source_path"))
formatted.append({
"id": entity.get("path"),
"distance": 1.0,
"entity": entity,
})
return [formatted]
def get_all_stats(self) -> Dict[str, Any]:
client = self._get_client()

View File

@@ -58,15 +58,19 @@ class MilvusServerProvider(BaseVectorProvider):
client = self._get_client()
if client.has_collection(collection_name):
return
common_fields = [
FieldSchema(name="path", dtype=DataType.VARCHAR, max_length=512, is_primary=True, auto_id=False),
FieldSchema(name="source_path", dtype=DataType.VARCHAR, max_length=512, is_primary=False, auto_id=False),
]
if vector:
vector_dim = dim if isinstance(dim, int) and dim > 0 else 0
if vector_dim <= 0:
vector_dim = 4096
fields = [
FieldSchema(name="path", dtype=DataType.VARCHAR, max_length=512, is_primary=True, auto_id=False),
*common_fields,
FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=vector_dim),
]
schema = CollectionSchema(fields, description="Image vector collection")
schema = CollectionSchema(fields, description="Vector collection", enable_dynamic_field=True)
client.create_collection(collection_name, schema=schema)
index_params = MilvusClient.prepare_index_params()
index_params.add_index(
@@ -78,38 +82,98 @@ class MilvusServerProvider(BaseVectorProvider):
)
client.create_index(collection_name, index_params=index_params)
else:
fields = [
FieldSchema(name="path", dtype=DataType.VARCHAR, max_length=512, is_primary=True, auto_id=False),
]
schema = CollectionSchema(fields, description="Simple file index")
schema = CollectionSchema(common_fields, description="Simple file index", enable_dynamic_field=True)
client.create_collection(collection_name, schema=schema)
def upsert_vector(self, collection_name: str, data: Dict[str, Any]) -> None:
self._get_client().upsert(collection_name, data)
payload = dict(data)
payload.setdefault("source_path", payload.get("path"))
payload.setdefault("vector_id", payload.get("path"))
self._get_client().upsert(collection_name, data=[payload])
def delete_vector(self, collection_name: str, path: str) -> None:
self._get_client().delete(collection_name, ids=[path])
client = self._get_client()
escaped = path.replace('"', '\\"')
client.delete(collection_name, filter=f'source_path == "{escaped}"')
def search_vectors(self, collection_name: str, query_embedding, top_k: int):
search_params = {"metric_type": "COSINE"}
return self._get_client().search(
output_fields = [
"path",
"source_path",
"chunk_id",
"mime",
"text",
"start_offset",
"end_offset",
"type",
"name",
]
raw_results = self._get_client().search(
collection_name,
data=[query_embedding],
anns_field="embedding",
search_params=search_params,
limit=top_k,
output_fields=["path"],
output_fields=output_fields,
)
formatted: List[List[Dict[str, Any]]] = []
for hits in raw_results:
bucket: List[Dict[str, Any]] = []
for hit in hits:
if hasattr(hit, "entity"):
entity = dict(getattr(hit, "entity", {}) or {})
hit_id = getattr(hit, "id", None)
distance = getattr(hit, "distance", None)
elif isinstance(hit, dict):
entity = dict((hit.get("entity") or {}))
hit_id = hit.get("id")
distance = hit.get("distance")
else:
entity = {}
hit_id = None
distance = None
entity.setdefault("path", entity.get("source_path"))
bucket.append({
"id": hit_id,
"distance": distance,
"entity": entity,
})
formatted.append(bucket)
return formatted
def search_by_path(self, collection_name: str, query_path: str, top_k: int):
filter_expr = f"path like '%{query_path}%'" if query_path else "path like '%%'"
if query_path:
escaped = query_path.replace('"', '\\"')
filter_expr = f'source_path like "%{escaped}%"'
else:
filter_expr = "source_path like '%%'"
results = self._get_client().query(
collection_name,
filter=filter_expr,
limit=top_k,
output_fields=["path"],
output_fields=[
"path",
"source_path",
"chunk_id",
"mime",
"text",
"start_offset",
"end_offset",
"type",
"name",
],
)
return [[{"id": r["path"], "distance": 1.0, "entity": {"path": r["path"]}} for r in results]]
formatted = []
for row in results:
entity = dict(row)
entity.setdefault("path", entity.get("source_path"))
formatted.append({
"id": entity.get("path"),
"distance": 1.0,
"entity": entity,
})
return [formatted]
def get_all_stats(self) -> Dict[str, Any]:
client = self._get_client()

View File

@@ -58,29 +58,59 @@ class QdrantProvider(BaseVectorProvider):
size = dim if vector and isinstance(dim, int) and dim > 0 else 1
return qmodels.VectorParams(size=size, distance=qmodels.Distance.COSINE)
def _ensure_payload_indexes(self, client: QdrantClient, collection_name: str) -> None:
for field in ("path", "source_path"):
try:
client.create_payload_index(
collection_name=collection_name,
field_name=field,
field_schema="keyword",
)
except Exception as exc: # pragma: no cover - 依赖外部服务
message = str(exc).lower()
if "already exists" in message or "index exists" in message:
continue
# 旧版本 qdrant 可能返回带状态码的异常,这里容忍重复创建
raise
def ensure_collection(self, collection_name: str, vector: bool, dim: int) -> None:
client = self._get_client()
try:
if client.collection_exists(collection_name):
return
exists = client.collection_exists(collection_name)
except Exception as exc: # pragma: no cover - 依赖外部服务
raise RuntimeError(f"Failed to check Qdrant collection '{collection_name}': {exc}") from exc
if exists:
try:
self._ensure_payload_indexes(client, collection_name)
except Exception:
pass
return
vectors_config = self._vector_params(vector, dim)
try:
client.create_collection(collection_name=collection_name, vectors_config=vectors_config)
except Exception as exc: # pragma: no cover
if "already exists" in str(exc).lower():
try:
self._ensure_payload_indexes(client, collection_name)
except Exception:
pass
return
raise RuntimeError(f"Failed to create Qdrant collection '{collection_name}': {exc}") from exc
try:
self._ensure_payload_indexes(client, collection_name)
except Exception:
pass
@staticmethod
def _point_id(path: str) -> str:
return str(uuid5(NAMESPACE_URL, path))
def _point_id(uid: str) -> str:
return str(uuid5(NAMESPACE_URL, uid))
def _prepare_point(self, data: Dict[str, Any]) -> qmodels.PointStruct:
path = data.get("path")
if not path:
uid = data.get("path")
if not uid:
raise ValueError("Qdrant upsert requires 'path' in data")
embedding = data.get("embedding")
@@ -89,8 +119,11 @@ class QdrantProvider(BaseVectorProvider):
else:
vector = [float(x) for x in embedding]
payload = {"path": path}
return qmodels.PointStruct(id=self._point_id(path), vector=vector, payload=payload)
payload = {k: v for k, v in data.items() if k != "embedding"}
payload.setdefault("vector_id", uid)
source_path = payload.get("source_path") or payload.get("path")
payload["path"] = source_path
return qmodels.PointStruct(id=self._point_id(str(uid)), vector=vector, payload=payload)
def upsert_vector(self, collection_name: str, data: Dict[str, Any]) -> None:
client = self._get_client()
@@ -99,7 +132,12 @@ class QdrantProvider(BaseVectorProvider):
def delete_vector(self, collection_name: str, path: str) -> None:
client = self._get_client()
selector = qmodels.PointIdsList(points=[self._point_id(path)])
condition = qmodels.FieldCondition(
key="path",
match=qmodels.MatchValue(value=path),
)
flt = qmodels.Filter(must=[condition])
selector = qmodels.FilterSelector(filter=flt)
client.delete(collection_name=collection_name, points_selector=selector, wait=True)
def _format_search_results(self, points: Sequence[qmodels.ScoredPoint]):
@@ -107,7 +145,7 @@ class QdrantProvider(BaseVectorProvider):
{
"id": point.id,
"distance": point.score,
"entity": {"path": (point.payload or {}).get("path")},
"entity": point.payload or {},
}
for point in points
]
@@ -141,11 +179,11 @@ class QdrantProvider(BaseVectorProvider):
break
for record in records:
path = (record.payload or {}).get("path")
if query_path and path:
if query_path not in path:
continue
results.append({"id": record.id, "distance": 1.0, "entity": {"path": path}})
payload = record.payload or {}
path = payload.get("path")
if query_path and path and query_path not in path:
continue
results.append({"id": record.id, "distance": 1.0, "entity": payload})
if len(results) >= top_k:
break

View File

@@ -21,9 +21,29 @@ export interface DirListing {
}
export interface SearchResultItem {
id: number;
id: string;
path: string;
score: number;
chunk_id?: string;
snippet?: string;
mime?: string;
source_type?: string;
start_offset?: number;
end_offset?: number;
metadata?: Record<string, any>;
}
export interface SearchPagination {
page: number;
page_size: number;
has_more: boolean;
}
export interface SearchResponse {
items: SearchResultItem[];
query: string;
mode?: string;
pagination?: SearchPagination;
}
export const vfsApi = {
@@ -105,6 +125,20 @@ export const vfsApi = {
xhr.send(fd);
});
},
searchFiles: (q: string, top_k: number = 10, mode: 'vector' | 'filename' = 'vector') =>
request<{ items: SearchResultItem[]; query: string }>(`/search?q=${encodeURIComponent(q)}&top_k=${top_k}&mode=${mode}`),
searchFiles: (
q: string,
top_k: number = 10,
mode: 'vector' | 'filename' = 'vector',
page?: number,
page_size?: number,
) => {
const params = new URLSearchParams({
q,
top_k: String(top_k),
mode,
});
if (page !== undefined) params.set('page', String(page));
if (page_size !== undefined) params.set('page_size', String(page_size));
return request<SearchResponse>(`/search?${params.toString()}`);
},
};

View File

@@ -1,128 +1,313 @@
import { Modal, Input, List, Divider, Spin, Select, Space } from 'antd';
import { Modal, Input, List, Divider, Spin, Space, Tag, Typography, Empty, Flex, Segmented, Pagination } from 'antd';
import { SearchOutlined, FileTextOutlined } from '@ant-design/icons';
import React, { useState } from 'react';
import React, { useRef, useState } from 'react';
import { vfsApi, type SearchResultItem } from '../api/vfs';
import { useI18n } from '../i18n';
import { useNavigate } from 'react-router';
interface SearchDialogProps {
open: boolean;
onClose: () => void;
}
const SEARCH_MODES = (t: (k: string)=>string) => [
{ label: t('Smart Search'), value: 'vector' },
{ label: t('Name Search'), value: 'filename' },
];
type SearchMode = 'vector' | 'filename';
const PAGE_SIZE = 10;
const SearchDialog: React.FC<SearchDialogProps> = ({ open, onClose }) => {
const [search, setSearch] = useState('');
const [loading, setLoading] = useState(false);
const [results, setResults] = useState<SearchResultItem[]>([]);
const [searched, setSearched] = useState(false);
const [searchMode, setSearchMode] = useState<'vector' | 'filename'>('vector');
const [searchMode, setSearchMode] = useState<SearchMode>('vector');
const [page, setPage] = useState(1);
const [hasMore, setHasMore] = useState(false);
const requestIdRef = useRef(0);
const { t } = useI18n();
const navigate = useNavigate();
const handleSearch = async () => {
if (!search.trim()) return;
const renderSourceLabel = (value?: string) => {
switch ((value || '').toLowerCase()) {
case 'vector':
return t('Vector Search');
case 'filename':
return t('Name Search');
case 'text':
return t('Text Chunk');
case 'image':
return t('Image Description');
default:
return t('Vector Search');
}
};
const sourceColor = (value?: string) => {
switch ((value || '').toLowerCase()) {
case 'vector':
return 'blue';
case 'filename':
return 'green';
case 'image':
return 'volcano';
case 'text':
return 'geekblue';
default:
return 'purple';
}
};
const performSearch = async (options?: { page?: number; mode?: SearchMode }) => {
const query = search.trim();
if (!query) {
setSearched(false);
setResults([]);
setHasMore(false);
return;
}
const currentMode = options?.mode ?? searchMode;
const targetPage = currentMode === 'filename' ? (options?.page ?? (currentMode === searchMode ? page : 1)) : 1;
const requestId = requestIdRef.current + 1;
requestIdRef.current = requestId;
setLoading(true);
setSearched(true);
try {
const res = await vfsApi.searchFiles(search, 10, searchMode);
setResults(res.items);
} catch (e) {
setResults([]);
if (currentMode === 'filename') {
setPage(targetPage);
} else {
setPage(1);
setHasMore(false);
}
try {
const res = await vfsApi.searchFiles(
query,
currentMode === 'filename' ? PAGE_SIZE : 10,
currentMode,
currentMode === 'filename' ? targetPage : undefined,
currentMode === 'filename' ? PAGE_SIZE : undefined,
);
if (requestId !== requestIdRef.current) {
return;
}
setResults(res.items);
if (currentMode === 'filename') {
const pagination = res.pagination;
setHasMore(Boolean(pagination?.has_more));
if (pagination?.page) {
setPage(pagination.page);
}
} else {
setHasMore(false);
}
} catch (e) {
if (requestId !== requestIdRef.current) {
return;
}
setResults([]);
if (currentMode === 'filename') {
setHasMore(false);
}
} finally {
if (requestId === requestIdRef.current) {
setLoading(false);
}
}
setLoading(false);
};
const handleSearch = () => {
if (!search.trim()) {
setResults([]);
setSearched(false);
setHasMore(false);
setPage(1);
return;
}
void performSearch({ page: searchMode === 'filename' ? 1 : undefined });
};
const handleModeChange = (value: string | number) => {
const nextMode = value as SearchMode;
setHasMore(false);
setPage(1);
setSearchMode(nextMode);
if (search.trim()) {
void performSearch({ mode: nextMode, page: nextMode === 'filename' ? 1 : undefined });
} else {
setResults([]);
setSearched(false);
}
};
const handleClose = () => {
setSearch('');
setResults([]);
setSearched(false);
setSearchMode('vector');
setPage(1);
setHasMore(false);
requestIdRef.current = 0;
setLoading(false);
onClose();
};
const totalItems = searchMode === 'filename'
? (hasMore ? page * PAGE_SIZE + 1 : (page - 1) * PAGE_SIZE + results.length)
: results.length;
return (
<Modal
open={open}
onCancel={onClose}
onCancel={handleClose}
footer={null}
width={600}
width={720}
centered
title={null}
closable={false}
styles={{
body: {
padding: '12px 16px 16px',
maxHeight: '70vh',
overflow: 'hidden',
display: 'flex',
flexDirection: 'column',
gap: 12,
},
}}
>
<Space.Compact style={{ marginBottom: 0, width: '100%' }}>
<Select
options={SEARCH_MODES(t)}
value={searchMode}
onChange={v => setSearchMode(v as 'vector' | 'filename')}
style={{
width: 120,
fontSize: 18,
height: 40,
lineHeight: '40px',
borderTopRightRadius: 0,
borderBottomRightRadius: 0,
borderRight: 0,
verticalAlign: 'top',
}}
styles={{ popup: { root: { fontSize: 18 } } }}
popupMatchSelectWidth={false}
/>
<Input
allowClear
prefix={<SearchOutlined />}
placeholder={t('Search files / tags / types')}
value={search}
onChange={e => setSearch(e.target.value)}
style={{
fontSize: 18,
height: 40,
width: 'calc(100% - 120px)',
borderTopLeftRadius: 0,
borderBottomLeftRadius: 0,
verticalAlign: 'top',
}}
autoFocus
onPressEnter={handleSearch}
/>
</Space.Compact>
{searched && (
<>
<Divider style={{ margin: '12px 0' }}>{t('Search Results')}</Divider>
{loading ? (
<Spin />
) : (
<List
itemLayout="horizontal"
dataSource={results}
locale={{ emptyText: t('No files found') }}
renderItem={item => {
const fullPath = item.path || '';
const trimmed = fullPath.replace(/\/+$/, '');
const parts = trimmed.split('/');
const filename = parts.pop() || '';
const dir = parts.length ? '/' + parts.join('/') : '/';
return (
<List.Item>
<List.Item.Meta
avatar={<FileTextOutlined />}
title={
<a
onClick={() => {
navigate(`/files${dir === '/' ? '' : dir}`, { state: { highlight: { name: filename } } });
onClose();
}}
>
{fullPath}
</a>
}
description={`${t('Relevance')}: ${item.score.toFixed(2)}`}
/>
</List.Item>
);
}}
/>
)}
</>
)}
<Flex vertical style={{ gap: 12, flex: 1, minHeight: 0 }}>
<Flex align="center" style={{ width: '100%', gap: 12, flexWrap: 'wrap' }}>
<Segmented
options={[
{ label: t('Smart Search'), value: 'vector' },
{ label: t('Name Search'), value: 'filename' },
]}
value={searchMode}
onChange={handleModeChange}
style={{
minWidth: 160,
height: 40,
borderRadius: 20,
display: 'flex',
alignItems: 'center',
}}
size="large"
/>
<Input
allowClear
prefix={<SearchOutlined />}
placeholder={t('Search files / tags / types')}
value={search}
onChange={e => {
const value = e.target.value;
setSearch(value);
if (!value.trim()) {
setResults([]);
setSearched(false);
setHasMore(false);
setPage(1);
requestIdRef.current += 1;
setLoading(false);
}
}}
style={{ fontSize: 18, height: 40, flex: 1, minWidth: 240 }}
styles={{
input: {
borderRadius: 20,
},
}}
autoFocus
onPressEnter={handleSearch}
/>
</Flex>
{!searched ? null : (
<Flex vertical style={{ flex: 1, minHeight: 0 }}>
<Divider style={{ margin: 0, padding: '0 0 12px' }}>{t('Search Results')}</Divider>
{loading ? (
<Flex align="center" justify="center" style={{ flex: 1 }}>
<Spin />
</Flex>
) : results.length === 0 ? (
<Flex align="center" justify="center" style={{ flex: 1 }}>
<Empty description={t('No files found')} image={Empty.PRESENTED_IMAGE_SIMPLE} />
</Flex>
) : (
<div style={{ flex: 1, minHeight: 0, display: 'flex', flexDirection: 'column' }}>
<div style={{ flex: 1, minHeight: 0, overflowY: 'auto', paddingRight: 6 }}>
<List
itemLayout="horizontal"
dataSource={results}
split={false}
renderItem={item => {
const fullPath = item.path || '';
const trimmed = fullPath.replace(/\/+$/, '');
const parts = trimmed.split('/');
const filename = parts.pop() || '';
const dir = parts.length ? '/' + parts.join('/') : '/';
const snippet = item.snippet || '';
const retrieval = item.metadata?.retrieval_source || item.source_type;
const retrievalLabel = renderSourceLabel(retrieval);
const scoreText = Number.isFinite(item.score) ? item.score.toFixed(2) : '-';
return (
<List.Item style={{ padding: '10px 12px', borderRadius: 6, background: '#fafafa', marginBottom: 8 }}>
<List.Item.Meta
avatar={<FileTextOutlined style={{ fontSize: 18, color: '#8c8c8c' }} />}
title={
<a
onClick={() => {
navigate(`/files${dir === '/' ? '' : dir}`, { state: { highlight: { name: filename } } });
handleClose();
}}
style={{ fontSize: 16 }}
>
{fullPath}
</a>
}
description={(
<Space direction="vertical" size={6} style={{ width: '100%' }}>
{snippet ? (
<Typography.Paragraph ellipsis={{ rows: 3 }} style={{ marginBottom: 0 }}>
{snippet}
</Typography.Paragraph>
) : null}
<Space size={10} wrap>
{retrieval ? (
<Tag color={sourceColor(retrieval)} style={{ marginRight: 0 }}>
{retrievalLabel}
</Tag>
) : null}
<Typography.Text type="secondary">
{t('Relevance')}: {scoreText}
</Typography.Text>
</Space>
</Space>
)}
/>
</List.Item>
);
}}
/>
</div>
{searchMode === 'filename' && results.length > 0 ? (
<Pagination
current={page}
pageSize={PAGE_SIZE}
total={Math.max(totalItems, 1)}
showSizeChanger={false}
size="small"
style={{ marginTop: 12, textAlign: 'right' }}
onChange={(nextPage) => {
void performSearch({ page: nextPage });
}}
/>
) : null}
</div>
)}
</Flex>
)}
</Flex>
</Modal>
);
};

View File

@@ -8,14 +8,23 @@ import { useTheme } from '../../contexts/ThemeContext';
import '../../styles/settings-tabs.css';
import { useI18n } from '../../i18n';
const APP_CONFIG_KEYS: {key: string, label: string, default?: string}[] = [
const APP_CONFIG_KEYS: { key: string, label: string, default?: string }[] = [
{ key: 'APP_NAME', label: 'App Name' },
{ key: 'APP_LOGO', label: 'Logo URL' },
{ key: 'APP_DOMAIN', label: 'App Domain' },
{ key: 'FILE_DOMAIN', label: 'File Domain' },
];
const VISION_CONFIG_KEYS = [
interface AiConfigKeyBase {
key: string;
default?: string | number;
}
interface AiConfigKeyWithLabel extends AiConfigKeyBase {
label: string;
}
const VISION_CONFIG_KEYS: AiConfigKeyWithLabel[] = [
{ key: 'AI_VISION_API_URL', label: 'Vision API URL' },
{ key: 'AI_VISION_MODEL', label: 'Vision Model', default: 'Qwen/Qwen2.5-VL-32B-Instruct' },
{ key: 'AI_VISION_API_KEY', label: 'Vision API Key' },
@@ -24,13 +33,24 @@ const VISION_CONFIG_KEYS = [
const DEFAULT_EMBED_DIMENSION = 4096;
const EMBED_DIM_KEY = 'AI_EMBED_DIM';
const EMBED_CONFIG_KEYS = [
const EMBED_CONFIG_KEYS: AiConfigKeyWithLabel[] = [
{ key: 'AI_EMBED_API_URL', label: 'Embedding API URL' },
{ key: 'AI_EMBED_MODEL', label: 'Embedding Model', default: 'Qwen/Qwen3-Embedding-8B' },
{ key: 'AI_EMBED_API_KEY', label: 'Embedding API Key' },
];
const ALL_AI_KEYS = [...VISION_CONFIG_KEYS, ...EMBED_CONFIG_KEYS, { key: EMBED_DIM_KEY, default: DEFAULT_EMBED_DIMENSION }];
const RERANK_CONFIG_KEYS: AiConfigKeyWithLabel[] = [
{ key: 'AI_RERANK_API_URL', label: 'Rerank API URL' },
{ key: 'AI_RERANK_MODEL', label: 'Rerank Model' },
{ key: 'AI_RERANK_API_KEY', label: 'Rerank API Key' },
];
const ALL_AI_KEYS: AiConfigKeyBase[] = [
...VISION_CONFIG_KEYS,
...EMBED_CONFIG_KEYS,
...RERANK_CONFIG_KEYS,
{ key: EMBED_DIM_KEY, default: DEFAULT_EMBED_DIMENSION },
];
const formatBytes = (bytes?: number | null) => {
if (bytes === null || bytes === undefined) return '-';
@@ -194,6 +214,8 @@ export default function SystemSettingsPage() {
}
}, [buildProviderConfigValues, message, t, vectorConfigForm, vectorProviders]);
const vectorSectionLoading = vectorStatsLoading || vectorConfigLoading;
// 离开“外观设置”时,恢复后端持久化配置(取消未保存的预览)
useEffect(() => {
if (activeTab !== 'appearance') {
@@ -303,7 +325,7 @@ export default function SystemSettingsPage() {
</Form.Item>
</Card>
<Card title={t('Advanced')} style={{ marginTop: 24 }}>
<Form.Item name={THEME_KEYS.TOKENS} label={t('Override AntD Tokens (JSON)')} tooltip={t('e.g. {"colorText": "#222"}') }>
<Form.Item name={THEME_KEYS.TOKENS} label={t('Override AntD Tokens (JSON)')} tooltip={t('e.g. {"colorText": "#222"}')}>
<Input.TextArea autoSize={{ minRows: 4 }} placeholder='{ "colorText": "#222" }' />
</Form.Item>
<Form.Item name={THEME_KEYS.CSS} label={t('Custom CSS')}>
@@ -402,6 +424,13 @@ export default function SystemSettingsPage() {
<InputNumber min={1} max={32768} style={{ width: '100%' }} />
</Form.Item>
</Card>
<Card title={t('Rerank Model')} style={{ marginTop: 24 }}>
{RERANK_CONFIG_KEYS.map(({ key, label }) => (
<Form.Item key={key} name={key} label={t(label)}>
<Input size="large" />
</Form.Item>
))}
</Card>
<Form.Item style={{ marginTop: 24 }}>
<Button type="primary" htmlType="submit" loading={loading} block>
{t('Save')}
@@ -428,178 +457,180 @@ export default function SystemSettingsPage() {
{t('Refresh')}
</Button>
</div>
{vectorMetaError ? (
<Alert type="error" showIcon message={vectorMetaError} />
) : null}
{vectorStatsLoading && !vectorStats ? (
<Spin />
) : vectorStats ? (
<Space direction="vertical" size={16} style={{ width: '100%' }}>
<div style={{ display: 'flex', flexWrap: 'wrap', gap: 24 }}>
<div>
<div style={{ color: '#888' }}>{t('Collections')}</div>
<div style={{ fontSize: 20, fontWeight: 600 }}>{vectorStats.collection_count}</div>
</div>
<div>
<div style={{ color: '#888' }}>{t('Vectors')}</div>
<div style={{ fontSize: 20, fontWeight: 600 }}>{vectorStats.total_vectors}</div>
</div>
<div>
<div style={{ color: '#888' }}>{t('Database Size')}</div>
<div style={{ fontSize: 20, fontWeight: 600 }}>{formatBytes(vectorStats.db_file_size_bytes)}</div>
</div>
<div>
<div style={{ color: '#888' }}>{t('Estimated Memory')}</div>
<div style={{ fontSize: 20, fontWeight: 600 }}>{formatBytes(vectorStats.estimated_total_memory_bytes)}</div>
</div>
</div>
{vectorStats.collections.length ? (
<Space direction="vertical" style={{ width: '100%' }} size={16}>
{vectorStats.collections.map((collection) => (
<div key={collection.name} style={{ border: '1px solid #f0f0f0', borderRadius: 8, padding: 16 }}>
<Space direction="vertical" size={12} style={{ width: '100%' }}>
<div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', flexWrap: 'wrap', gap: 12 }}>
<strong>{collection.name}</strong>
<span style={{ color: '#888' }}>
{collection.is_vector_collection && collection.dimension
? `${t('Dimension')}: ${collection.dimension}`
: t('Non-vector collection')}
</span>
</div>
<div>{t('Vectors')}: {collection.row_count}</div>
{collection.is_vector_collection ? (
<div>{t('Estimated memory')}: {formatBytes(collection.estimated_memory_bytes)}</div>
) : null}
{collection.indexes.length ? (
<Space direction="vertical" size={4} style={{ width: '100%' }}>
<span>{t('Indexes')}:</span>
<ul style={{ paddingLeft: 20, margin: 0 }}>
{collection.indexes.map((index) => (
<li key={`${collection.name}-${index.index_name || 'default'}`}>
<span>{index.index_name || t('Unnamed index')}</span>
<span>{' · '}{index.index_type || '-'}</span>
<span>{' · '}{index.metric_type || '-'}</span>
<span>{' · '}{t('Indexed rows')}: {index.indexed_rows}</span>
<span>{' · '}{t('Pending rows')}: {index.pending_index_rows}</span>
<span>{' · '}{t('Status')}: {index.state || '-'}</span>
</li>
))}
</ul>
</Space>
) : null}
</Space>
{vectorSectionLoading ? (
<div style={{ display: 'flex', justifyContent: 'center', padding: '24px 0' }}>
<Spin />
</div>
) : (
<>
{vectorMetaError ? (
<Alert type="error" showIcon message={vectorMetaError} />
) : null}
{vectorStats ? (
<Space direction="vertical" size={16} style={{ width: '100%' }}>
<div style={{ display: 'flex', flexWrap: 'wrap', gap: 24 }}>
<div>
<div style={{ color: '#888' }}>{t('Collections')}</div>
<div style={{ fontSize: 20, fontWeight: 600 }}>{vectorStats.collection_count}</div>
</div>
))}
<div>
<div style={{ color: '#888' }}>{t('Vectors')}</div>
<div style={{ fontSize: 20, fontWeight: 600 }}>{vectorStats.total_vectors}</div>
</div>
<div>
<div style={{ color: '#888' }}>{t('Database Size')}</div>
<div style={{ fontSize: 20, fontWeight: 600 }}>{formatBytes(vectorStats.db_file_size_bytes)}</div>
</div>
<div>
<div style={{ color: '#888' }}>{t('Estimated Memory')}</div>
<div style={{ fontSize: 20, fontWeight: 600 }}>{formatBytes(vectorStats.estimated_total_memory_bytes)}</div>
</div>
</div>
{vectorStats.collections.length ? (
<Space direction="vertical" style={{ width: '100%' }} size={16}>
{vectorStats.collections.map((collection) => (
<div key={collection.name} style={{ border: '1px solid #f0f0f0', borderRadius: 8, padding: 16 }}>
<Space direction="vertical" size={12} style={{ width: '100%' }}>
<div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', flexWrap: 'wrap', gap: 12 }}>
<strong>{collection.name}</strong>
<span style={{ color: '#888' }}>
{collection.is_vector_collection && collection.dimension
? `${t('Dimension')}: ${collection.dimension}`
: t('Non-vector collection')}
</span>
</div>
<div>{t('Vectors')}: {collection.row_count}</div>
{collection.is_vector_collection ? (
<div>{t('Estimated memory')}: {formatBytes(collection.estimated_memory_bytes)}</div>
) : null}
{collection.indexes.length ? (
<Space direction="vertical" size={4} style={{ width: '100%' }}>
<span>{t('Indexes')}:</span>
<ul style={{ paddingLeft: 20, margin: 0 }}>
{collection.indexes.map((index) => (
<li key={`${collection.name}-${index.index_name || 'default'}`}>
<span>{index.index_name || t('Unnamed index')}</span>
<span>{' · '}{index.index_type || '-'}</span>
<span>{' · '}{index.metric_type || '-'}</span>
<span>{' · '}{t('Indexed rows')}: {index.indexed_rows}</span>
<span>{' · '}{t('Pending rows')}: {index.pending_index_rows}</span>
<span>{' · '}{t('Status')}: {index.state || '-'}</span>
</li>
))}
</ul>
</Space>
) : null}
</Space>
</div>
))}
</Space>
) : (
<Empty description={t('No collections')} />
)}
<div style={{ color: '#888' }}>
{t('Estimated memory is calculated as vectors x dimension x 4 bytes (float32).')}
</div>
</Space>
) : vectorStatsError ? (
<div style={{ color: '#ff4d4f' }}>{vectorStatsError}</div>
) : (
<Empty description={t('No collections')} />
)}
<div style={{ color: '#888' }}>
{t('Estimated memory is calculated as vectors x dimension x 4 bytes (float32).')}
</div>
</Space>
) : vectorStatsError ? (
<div style={{ color: '#ff4d4f' }}>{vectorStatsError}</div>
) : (
<Empty description={t('No collections')} />
<Form
layout="vertical"
form={vectorConfigForm}
onFinish={handleVectorConfigSave}
initialValues={{ type: selectedProviderType || undefined, config: {} }}
>
<Form.Item
name="type"
label={t('Database Provider')}
rules={[{ required: true, message: t('Please select a provider') }]}
>
<Select
size="large"
options={vectorProviders.map((provider) => ({
value: provider.type,
label: provider.enabled ? provider.label : `${provider.label} (${t('Coming soon')})`,
disabled: !provider.enabled,
}))}
onChange={handleProviderChange}
loading={vectorConfigLoading && !vectorProviders.length}
/>
</Form.Item>
{selectedProvider?.description ? (
<Alert
type="info"
showIcon
message={t(selectedProvider.description)}
style={{ marginBottom: 16 }}
/>
) : null}
{selectedProvider?.config_schema?.map((field) => (
<Form.Item
key={field.key}
name={['config', field.key]}
label={t(field.label)}
rules={field.required ? [{ required: true, message: t('Please input {label}', { label: t(field.label) }) }] : []}
>
{field.type === 'password' ? (
<Input.Password size="large" placeholder={field.placeholder ? t(field.placeholder) : undefined} />
) : (
<Input size="large" placeholder={field.placeholder ? t(field.placeholder) : undefined} />
)}
</Form.Item>
))}
{selectedProvider && !selectedProvider.enabled ? (
<Alert
type="warning"
showIcon
message={t('This provider is not available yet')}
style={{ marginBottom: 16 }}
/>
) : null}
<Form.Item>
<Space direction="vertical" style={{ width: '100%' }}>
<Button
type="primary"
htmlType="submit"
loading={vectorConfigSaving}
block
disabled={!selectedProvider?.enabled}
>
{t('Save')}
</Button>
<Button
danger
htmlType="button"
block
onClick={() => {
Modal.confirm({
title: t('Confirm clear vector database?'),
content: t('This will delete all collections irreversibly.'),
okText: t('Confirm Clear'),
okType: 'danger',
cancelText: t('Cancel'),
onOk: async () => {
try {
await vectorDBApi.clearAll();
message.success(t('Vector database cleared'));
await fetchVectorStats();
await fetchVectorMeta();
} catch (e: any) {
message.error(e.message || t('Clear failed'));
}
},
});
}}
>
{t('Clear Vector DB')}
</Button>
</Space>
</Form.Item>
</Form>
</>
)}
</Space>
{vectorConfigLoading && !vectorProviders.length ? (
<Spin />
) : (
<Form
layout="vertical"
form={vectorConfigForm}
onFinish={handleVectorConfigSave}
initialValues={{ type: selectedProviderType || undefined, config: {} }}
>
<Form.Item
name="type"
label={t('Database Provider')}
rules={[{ required: true, message: t('Please select a provider') }]}
>
<Select
size="large"
options={vectorProviders.map((provider) => ({
value: provider.type,
label: provider.enabled ? provider.label : `${provider.label} (${t('Coming soon')})`,
disabled: !provider.enabled,
}))}
onChange={handleProviderChange}
loading={vectorConfigLoading && !vectorProviders.length}
/>
</Form.Item>
{selectedProvider?.description ? (
<Alert
type="info"
showIcon
message={t(selectedProvider.description)}
style={{ marginBottom: 16 }}
/>
) : null}
{selectedProvider?.config_schema?.map((field) => (
<Form.Item
key={field.key}
name={['config', field.key]}
label={t(field.label)}
rules={field.required ? [{ required: true, message: t('Please input {label}', { label: t(field.label) }) }] : []}
>
{field.type === 'password' ? (
<Input.Password size="large" placeholder={field.placeholder ? t(field.placeholder) : undefined} />
) : (
<Input size="large" placeholder={field.placeholder ? t(field.placeholder) : undefined} />
)}
</Form.Item>
))}
{selectedProvider && !selectedProvider.enabled ? (
<Alert
type="warning"
showIcon
message={t('This provider is not available yet')}
style={{ marginBottom: 16 }}
/>
) : null}
<Form.Item>
<Space direction="vertical" style={{ width: '100%' }}>
<Button
type="primary"
htmlType="submit"
loading={vectorConfigSaving}
block
disabled={!selectedProvider?.enabled}
>
{t('Save')}
</Button>
<Button
danger
htmlType="button"
block
onClick={() => {
Modal.confirm({
title: t('Confirm clear vector database?'),
content: t('This will delete all collections irreversibly.'),
okText: t('Confirm Clear'),
okType: 'danger',
cancelText: t('Cancel'),
onOk: async () => {
try {
await vectorDBApi.clearAll();
message.success(t('Vector database cleared'));
await fetchVectorStats();
await fetchVectorMeta();
} catch (e: any) {
message.error(e.message || t('Clear failed'));
}
},
});
}}
>
{t('Clear Vector DB')}
</Button>
</Space>
</Form.Item>
</Form>
)}
</Space>
</Card>
),