mirror of
https://github.com/DrizzleTime/Foxel.git
synced 2026-05-07 03:12:40 +08:00
feat: Enhance vector database providers with source path handling and improved search functionality
This commit is contained in:
@@ -1,4 +1,7 @@
|
|||||||
|
from typing import Any, Dict, List, Tuple
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, Query
|
from fastapi import APIRouter, Depends, Query
|
||||||
|
|
||||||
from schemas.fs import SearchResultItem
|
from schemas.fs import SearchResultItem
|
||||||
from services.auth import get_current_active_user, User
|
from services.auth import get_current_active_user, User
|
||||||
from services.ai import get_text_embedding
|
from services.ai import get_text_embedding
|
||||||
@@ -6,24 +9,96 @@ from services.vector_db import VectorDBService
|
|||||||
|
|
||||||
router = APIRouter(prefix="/api/search", tags=["search"])
|
router = APIRouter(prefix="/api/search", tags=["search"])
|
||||||
|
|
||||||
async def search_files_by_vector(q: str, top_k: int):
|
|
||||||
embedding = await get_text_embedding(q)
|
|
||||||
vector_db = VectorDBService()
|
|
||||||
results = await vector_db.search_vectors("vector_collection", embedding, top_k)
|
|
||||||
items = [
|
|
||||||
SearchResultItem(id=res["id"], path=res["entity"]["path"], score=res["distance"])
|
|
||||||
for res in results[0]
|
|
||||||
]
|
|
||||||
return {"items": items, "query": q}
|
|
||||||
|
|
||||||
async def search_files_by_name(q: str, top_k: int):
|
def _normalize_result(raw: Dict[str, Any], source: str, fallback_score: float = 0.0) -> SearchResultItem:
|
||||||
|
entity = dict(raw.get("entity") or {})
|
||||||
|
source_path = entity.get("source_path")
|
||||||
|
stored_path = entity.get("path")
|
||||||
|
path = source_path or stored_path or ""
|
||||||
|
chunk_id_value = entity.get("chunk_id")
|
||||||
|
chunk_id = str(chunk_id_value) if chunk_id_value is not None else None
|
||||||
|
snippet = entity.get("text") or entity.get("description") or entity.get("name")
|
||||||
|
mime = entity.get("mime")
|
||||||
|
start_offset = entity.get("start_offset")
|
||||||
|
end_offset = entity.get("end_offset")
|
||||||
|
raw_score = raw.get("distance")
|
||||||
|
score = float(raw_score) if raw_score is not None else fallback_score
|
||||||
|
|
||||||
|
metadata = {
|
||||||
|
"retrieval_source": source,
|
||||||
|
"raw_distance": raw_score,
|
||||||
|
}
|
||||||
|
if stored_path and stored_path != path:
|
||||||
|
metadata["stored_path"] = stored_path
|
||||||
|
vector_id = entity.get("vector_id")
|
||||||
|
if vector_id:
|
||||||
|
metadata["vector_id"] = vector_id
|
||||||
|
|
||||||
|
return SearchResultItem(
|
||||||
|
id=str(raw.get("id")),
|
||||||
|
path=path,
|
||||||
|
score=score,
|
||||||
|
chunk_id=chunk_id,
|
||||||
|
snippet=snippet,
|
||||||
|
mime=mime,
|
||||||
|
source_type=entity.get("type") or source,
|
||||||
|
start_offset=start_offset,
|
||||||
|
end_offset=end_offset,
|
||||||
|
metadata=metadata,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _vector_search(query: str, top_k: int) -> List[SearchResultItem]:
|
||||||
vector_db = VectorDBService()
|
vector_db = VectorDBService()
|
||||||
results = await vector_db.search_by_path("vector_collection", q, top_k)
|
try:
|
||||||
items = [
|
embedding = await get_text_embedding(query)
|
||||||
SearchResultItem(id=idx, path=res["entity"]["path"], score=res["distance"])
|
except Exception:
|
||||||
for idx, res in enumerate(results[0])
|
embedding = None
|
||||||
]
|
if not embedding:
|
||||||
return {"items": items, "query": q}
|
return []
|
||||||
|
|
||||||
|
try:
|
||||||
|
raw_results = await vector_db.search_vectors("vector_collection", embedding, max(top_k, 10))
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
|
||||||
|
results: List[SearchResultItem] = []
|
||||||
|
for bucket in raw_results or []:
|
||||||
|
for record in bucket or []:
|
||||||
|
results.append(_normalize_result(record, "vector"))
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
async def _filename_search(query: str, page: int, page_size: int) -> Tuple[List[SearchResultItem], bool]:
|
||||||
|
vector_db = VectorDBService()
|
||||||
|
limit = max(page * page_size + 1, page_size * (page + 2))
|
||||||
|
limit = min(limit, 2000)
|
||||||
|
try:
|
||||||
|
raw_results = await vector_db.search_by_path("vector_collection", query, limit)
|
||||||
|
except Exception:
|
||||||
|
return [], False
|
||||||
|
|
||||||
|
records = raw_results[0] if raw_results else []
|
||||||
|
deduped: List[SearchResultItem] = []
|
||||||
|
seen_paths: set[str] = set()
|
||||||
|
for record in records or []:
|
||||||
|
item = _normalize_result(record, "filename", fallback_score=1.0)
|
||||||
|
stored_path = item.metadata.get("stored_path") if item.metadata else None
|
||||||
|
key = item.path or stored_path or ""
|
||||||
|
if key in seen_paths:
|
||||||
|
continue
|
||||||
|
seen_paths.add(key)
|
||||||
|
deduped.append(item)
|
||||||
|
|
||||||
|
start = max(page - 1, 0) * page_size
|
||||||
|
end = start + page_size
|
||||||
|
page_items = deduped[start:end]
|
||||||
|
for offset, item in enumerate(page_items):
|
||||||
|
if item.metadata is None:
|
||||||
|
item.metadata = {}
|
||||||
|
item.metadata.setdefault("retrieval_rank", start + offset)
|
||||||
|
has_more = len(deduped) > end
|
||||||
|
return page_items, has_more
|
||||||
|
|
||||||
|
|
||||||
@router.get("")
|
@router.get("")
|
||||||
@@ -31,11 +106,32 @@ async def search_files(
|
|||||||
q: str = Query(..., description="搜索查询"),
|
q: str = Query(..., description="搜索查询"),
|
||||||
top_k: int = Query(10, description="返回结果数量"),
|
top_k: int = Query(10, description="返回结果数量"),
|
||||||
mode: str = Query("vector", description="搜索模式: 'vector' 或 'filename'"),
|
mode: str = Query("vector", description="搜索模式: 'vector' 或 'filename'"),
|
||||||
|
page: int = Query(1, description="分页页码,仅在文件名搜索模式下生效"),
|
||||||
|
page_size: int = Query(10, description="分页大小,仅在文件名搜索模式下生效"),
|
||||||
user: User = Depends(get_current_active_user),
|
user: User = Depends(get_current_active_user),
|
||||||
):
|
):
|
||||||
|
if not q.strip():
|
||||||
|
return {"items": [], "query": q}
|
||||||
|
|
||||||
|
top_k = max(top_k, 1)
|
||||||
|
page = max(page, 1)
|
||||||
|
page_size = max(min(page_size, 100), 1)
|
||||||
|
|
||||||
if mode == "vector":
|
if mode == "vector":
|
||||||
return await search_files_by_vector(q, top_k)
|
items = (await _vector_search(q, top_k))[:top_k]
|
||||||
elif mode == "filename":
|
elif mode == "filename":
|
||||||
return await search_files_by_name(q, top_k)
|
items, has_more = await _filename_search(q, page, page_size)
|
||||||
|
return {
|
||||||
|
"items": items,
|
||||||
|
"query": q,
|
||||||
|
"mode": mode,
|
||||||
|
"pagination": {
|
||||||
|
"page": page,
|
||||||
|
"page_size": page_size,
|
||||||
|
"has_more": has_more,
|
||||||
|
},
|
||||||
|
}
|
||||||
else:
|
else:
|
||||||
return {"items": [], "query": q, "error": "Invalid search mode"}
|
items = (await _vector_search(q, top_k))[:top_k]
|
||||||
|
|
||||||
|
return {"items": items, "query": q, "mode": mode}
|
||||||
|
|||||||
@@ -21,6 +21,13 @@ class SearchResultItem(BaseModel):
|
|||||||
id: int | str
|
id: int | str
|
||||||
path: str
|
path: str
|
||||||
score: float
|
score: float
|
||||||
|
chunk_id: Optional[str] = None
|
||||||
|
snippet: Optional[str] = None
|
||||||
|
mime: Optional[str] = None
|
||||||
|
source_type: Optional[str] = None
|
||||||
|
start_offset: Optional[int] = None
|
||||||
|
end_offset: Optional[int] = None
|
||||||
|
metadata: Optional[dict] = None
|
||||||
|
|
||||||
|
|
||||||
class MkdirRequest(BaseModel):
|
class MkdirRequest(BaseModel):
|
||||||
|
|||||||
@@ -68,3 +68,46 @@ async def get_text_embedding(text: str) -> List[float]:
|
|||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
result = resp.json()
|
result = resp.json()
|
||||||
return result["data"][0]["embedding"]
|
return result["data"][0]["embedding"]
|
||||||
|
|
||||||
|
|
||||||
|
async def rerank_texts(query: str, documents: List[str]) -> List[float]:
|
||||||
|
"""调用重排序模型,为一组文档返回得分。未配置时返回空列表。"""
|
||||||
|
if not documents:
|
||||||
|
return []
|
||||||
|
|
||||||
|
api_url = await ConfigCenter.get("AI_RERANK_API_URL")
|
||||||
|
model = await ConfigCenter.get("AI_RERANK_MODEL")
|
||||||
|
api_key = await ConfigCenter.get("AI_RERANK_API_KEY")
|
||||||
|
|
||||||
|
if not api_url or not model or not api_key:
|
||||||
|
return []
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"model": model,
|
||||||
|
"query": query,
|
||||||
|
"documents": documents,
|
||||||
|
}
|
||||||
|
headers = {
|
||||||
|
"Authorization": f"Bearer {api_key}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
}
|
||||||
|
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
try:
|
||||||
|
resp = await client.post(api_url, headers=headers, json=payload)
|
||||||
|
resp.raise_for_status()
|
||||||
|
except httpx.HTTPStatusError:
|
||||||
|
return []
|
||||||
|
data = resp.json()
|
||||||
|
if isinstance(data, dict):
|
||||||
|
results = data.get("results")
|
||||||
|
if isinstance(results, list):
|
||||||
|
scores = []
|
||||||
|
for item in results:
|
||||||
|
if isinstance(item, dict) and "score" in item:
|
||||||
|
try:
|
||||||
|
scores.append(float(item["score"]))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
scores.append(0.0)
|
||||||
|
return scores
|
||||||
|
return []
|
||||||
|
|||||||
@@ -1,11 +1,95 @@
|
|||||||
from typing import Dict, Any
|
from typing import Dict, Any, List, Tuple
|
||||||
from fastapi.responses import Response
|
from fastapi.responses import Response
|
||||||
import base64
|
import base64
|
||||||
|
import mimetypes
|
||||||
|
import os
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
from services.ai import describe_image_base64, get_text_embedding
|
from services.ai import describe_image_base64, get_text_embedding
|
||||||
from services.vector_db import VectorDBService, DEFAULT_VECTOR_DIMENSION
|
from services.vector_db import VectorDBService, DEFAULT_VECTOR_DIMENSION
|
||||||
from services.logging import LogService
|
from services.logging import LogService
|
||||||
from services.config import ConfigCenter
|
from services.config import ConfigCenter
|
||||||
|
|
||||||
|
try: # Pillow is optional but bundled with the project dependencies
|
||||||
|
from PIL import Image
|
||||||
|
except ImportError: # pragma: no cover - fallback when pillow missing
|
||||||
|
Image = None
|
||||||
|
|
||||||
|
|
||||||
|
CHUNK_SIZE = 800
|
||||||
|
CHUNK_OVERLAP = 200
|
||||||
|
MAX_IMAGE_EDGE = 1600
|
||||||
|
JPEG_QUALITY = 85
|
||||||
|
|
||||||
|
|
||||||
|
def _chunk_text(content: str, chunk_size: int = CHUNK_SIZE, overlap: int = CHUNK_OVERLAP) -> List[Tuple[int, str, int, int]]:
|
||||||
|
"""按固定窗口拆分文本,返回(chunk_id, chunk_text, start, end)。"""
|
||||||
|
if chunk_size <= 0:
|
||||||
|
chunk_size = CHUNK_SIZE
|
||||||
|
if overlap >= chunk_size:
|
||||||
|
overlap = max(chunk_size // 4, 1)
|
||||||
|
|
||||||
|
chunks: List[Tuple[int, str, int, int]] = []
|
||||||
|
step = chunk_size - overlap
|
||||||
|
idx = 0
|
||||||
|
start = 0
|
||||||
|
length = len(content)
|
||||||
|
|
||||||
|
while start < length:
|
||||||
|
end = min(length, start + chunk_size)
|
||||||
|
chunk = content[start:end].strip()
|
||||||
|
if chunk:
|
||||||
|
chunks.append((idx, chunk, start, end))
|
||||||
|
idx += 1
|
||||||
|
if end >= length:
|
||||||
|
break
|
||||||
|
start += step
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
|
def _guess_mime(path: str) -> str:
|
||||||
|
mime, _ = mimetypes.guess_type(path)
|
||||||
|
return mime or "application/octet-stream"
|
||||||
|
|
||||||
|
|
||||||
|
def _chunk_key(path: str, chunk_id: str) -> str:
|
||||||
|
return f"{path}#chunk={chunk_id}"
|
||||||
|
|
||||||
|
|
||||||
|
def _compress_image_for_embedding(input_bytes: bytes) -> Tuple[bytes, Dict[str, Any] | None]:
|
||||||
|
"""压缩图片,降低发送到视觉模型的体积。"""
|
||||||
|
if Image is None:
|
||||||
|
return input_bytes, None
|
||||||
|
|
||||||
|
try:
|
||||||
|
with Image.open(BytesIO(input_bytes)) as img:
|
||||||
|
img = img.convert("RGB")
|
||||||
|
width, height = img.size
|
||||||
|
longest_edge = max(width, height)
|
||||||
|
scale = 1.0
|
||||||
|
if longest_edge > MAX_IMAGE_EDGE:
|
||||||
|
scale = MAX_IMAGE_EDGE / float(longest_edge)
|
||||||
|
new_size = (max(int(width * scale), 1), max(int(height * scale), 1))
|
||||||
|
resample_mode = getattr(getattr(Image, "Resampling", Image), "LANCZOS")
|
||||||
|
img = img.resize(new_size, resample=resample_mode)
|
||||||
|
|
||||||
|
buffer = BytesIO()
|
||||||
|
img.save(buffer, format="JPEG", quality=JPEG_QUALITY, optimize=True)
|
||||||
|
compressed = buffer.getvalue()
|
||||||
|
|
||||||
|
if len(compressed) < len(input_bytes):
|
||||||
|
return compressed, {
|
||||||
|
"original_bytes": len(input_bytes),
|
||||||
|
"compressed_bytes": len(compressed),
|
||||||
|
"scaled": scale < 1.0,
|
||||||
|
"width": img.width,
|
||||||
|
"height": img.height,
|
||||||
|
}
|
||||||
|
except Exception: # pragma: no cover - 任意图像处理异常时回退
|
||||||
|
return input_bytes, None
|
||||||
|
|
||||||
|
return input_bytes, None
|
||||||
|
|
||||||
|
|
||||||
class VectorIndexProcessor:
|
class VectorIndexProcessor:
|
||||||
name = "向量索引"
|
name = "向量索引"
|
||||||
@@ -33,6 +117,7 @@ class VectorIndexProcessor:
|
|||||||
index_type = config.get("index_type", "vector")
|
index_type = config.get("index_type", "vector")
|
||||||
vector_db = VectorDBService()
|
vector_db = VectorDBService()
|
||||||
collection_name = "vector_collection"
|
collection_name = "vector_collection"
|
||||||
|
|
||||||
if action == "destroy":
|
if action == "destroy":
|
||||||
await vector_db.delete_vector(collection_name, path)
|
await vector_db.delete_vector(collection_name, path)
|
||||||
await LogService.info(
|
await LogService.info(
|
||||||
@@ -42,9 +127,19 @@ class VectorIndexProcessor:
|
|||||||
)
|
)
|
||||||
return Response(content=f"文件 {path} 的 {index_type} 索引已销毁", media_type="text/plain")
|
return Response(content=f"文件 {path} 的 {index_type} 索引已销毁", media_type="text/plain")
|
||||||
|
|
||||||
if index_type == 'simple':
|
mime_type = _guess_mime(path)
|
||||||
|
|
||||||
|
if index_type == "simple":
|
||||||
await vector_db.ensure_collection(collection_name, vector=False)
|
await vector_db.ensure_collection(collection_name, vector=False)
|
||||||
await vector_db.upsert_vector(collection_name, {'path': path})
|
await vector_db.delete_vector(collection_name, path)
|
||||||
|
await vector_db.upsert_vector(collection_name, {
|
||||||
|
"path": path,
|
||||||
|
"source_path": path,
|
||||||
|
"chunk_id": "filename",
|
||||||
|
"mime": mime_type,
|
||||||
|
"type": "filename",
|
||||||
|
"name": os.path.basename(path),
|
||||||
|
})
|
||||||
await LogService.info(
|
await LogService.info(
|
||||||
"processor:vector_index",
|
"processor:vector_index",
|
||||||
f"Created simple index for {path}",
|
f"Created simple index for {path}",
|
||||||
@@ -53,24 +148,7 @@ class VectorIndexProcessor:
|
|||||||
return Response(content=f"文件 {path} 的普通索引已创建", media_type="text/plain")
|
return Response(content=f"文件 {path} 的普通索引已创建", media_type="text/plain")
|
||||||
|
|
||||||
file_ext = path.split('.')[-1].lower()
|
file_ext = path.split('.')[-1].lower()
|
||||||
description = ""
|
details: Dict[str, Any] = {"path": path, "action": "create", "index_type": "vector"}
|
||||||
embedding = None
|
|
||||||
|
|
||||||
if file_ext in ["jpg", "jpeg", "png", "bmp"]:
|
|
||||||
base64_image = base64.b64encode(input_bytes).decode("utf-8")
|
|
||||||
description = await describe_image_base64(base64_image)
|
|
||||||
embedding = await get_text_embedding(description)
|
|
||||||
log_message = f"Indexed image {path}"
|
|
||||||
response_message = f"图片已索引,描述:{description}"
|
|
||||||
elif file_ext in ["txt", "md"]:
|
|
||||||
text = input_bytes.decode("utf-8")
|
|
||||||
embedding = await get_text_embedding(text)
|
|
||||||
description = text[:100] + "..." if len(text) > 100 else text
|
|
||||||
log_message = f"Indexed text file {path}"
|
|
||||||
response_message = f"文本文件已索引"
|
|
||||||
|
|
||||||
if embedding is None:
|
|
||||||
return Response(content="不支持的文件类型", status_code=400)
|
|
||||||
|
|
||||||
raw_dim = await ConfigCenter.get('AI_EMBED_DIM', DEFAULT_VECTOR_DIMENSION)
|
raw_dim = await ConfigCenter.get('AI_EMBED_DIM', DEFAULT_VECTOR_DIMENSION)
|
||||||
try:
|
try:
|
||||||
@@ -81,15 +159,103 @@ class VectorIndexProcessor:
|
|||||||
vector_dim = DEFAULT_VECTOR_DIMENSION
|
vector_dim = DEFAULT_VECTOR_DIMENSION
|
||||||
|
|
||||||
await vector_db.ensure_collection(collection_name, vector=True, dim=vector_dim)
|
await vector_db.ensure_collection(collection_name, vector=True, dim=vector_dim)
|
||||||
await vector_db.upsert_vector(
|
await vector_db.delete_vector(collection_name, path)
|
||||||
collection_name, {'path': path, 'embedding': embedding})
|
|
||||||
|
if file_ext in ["jpg", "jpeg", "png", "bmp"]:
|
||||||
|
processed_bytes, compression = _compress_image_for_embedding(input_bytes)
|
||||||
|
base64_image = base64.b64encode(processed_bytes).decode("utf-8")
|
||||||
|
description = await describe_image_base64(base64_image)
|
||||||
|
embedding = await get_text_embedding(description)
|
||||||
|
image_mime = "image/jpeg" if compression else mime_type
|
||||||
|
await vector_db.upsert_vector(collection_name, {
|
||||||
|
"path": _chunk_key(path, "image"),
|
||||||
|
"source_path": path,
|
||||||
|
"chunk_id": "image",
|
||||||
|
"embedding": embedding,
|
||||||
|
"text": description,
|
||||||
|
"mime": image_mime,
|
||||||
|
"type": "image",
|
||||||
|
})
|
||||||
|
details["description"] = description
|
||||||
|
if compression:
|
||||||
|
details["image_compression"] = compression
|
||||||
|
await LogService.info(
|
||||||
|
"processor:vector_index",
|
||||||
|
f"Indexed image {path}",
|
||||||
|
details=details,
|
||||||
|
)
|
||||||
|
return Response(content=f"图片已索引,描述:{description}", media_type="text/plain")
|
||||||
|
|
||||||
|
if file_ext in ["txt", "md"]:
|
||||||
|
try:
|
||||||
|
text = input_bytes.decode("utf-8")
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
return Response(content="文本文件解码失败", status_code=400)
|
||||||
|
|
||||||
|
chunks = _chunk_text(text)
|
||||||
|
if not chunks:
|
||||||
|
await vector_db.upsert_vector(collection_name, {
|
||||||
|
"path": _chunk_key(path, "0"),
|
||||||
|
"source_path": path,
|
||||||
|
"chunk_id": "0",
|
||||||
|
"embedding": await get_text_embedding(text or path),
|
||||||
|
"text": text,
|
||||||
|
"mime": mime_type,
|
||||||
|
"type": "text",
|
||||||
|
"start_offset": 0,
|
||||||
|
"end_offset": len(text),
|
||||||
|
})
|
||||||
|
details["chunks"] = 1
|
||||||
|
await LogService.info(
|
||||||
|
"processor:vector_index",
|
||||||
|
f"Indexed text file {path}",
|
||||||
|
details=details,
|
||||||
|
)
|
||||||
|
return Response(content="文本文件已索引", media_type="text/plain")
|
||||||
|
|
||||||
|
chunk_count = 0
|
||||||
|
for chunk_id, chunk_text, start, end in chunks:
|
||||||
|
embedding = await get_text_embedding(chunk_text)
|
||||||
|
await vector_db.upsert_vector(collection_name, {
|
||||||
|
"path": _chunk_key(path, str(chunk_id)),
|
||||||
|
"source_path": path,
|
||||||
|
"chunk_id": str(chunk_id),
|
||||||
|
"embedding": embedding,
|
||||||
|
"text": chunk_text,
|
||||||
|
"mime": mime_type,
|
||||||
|
"type": "text",
|
||||||
|
"start_offset": start,
|
||||||
|
"end_offset": end,
|
||||||
|
})
|
||||||
|
chunk_count += 1
|
||||||
|
|
||||||
|
details["chunks"] = chunk_count
|
||||||
|
sample = chunks[0][1]
|
||||||
|
details["sample"] = sample[:120]
|
||||||
|
await LogService.info(
|
||||||
|
"processor:vector_index",
|
||||||
|
f"Indexed text file {path}",
|
||||||
|
details=details,
|
||||||
|
)
|
||||||
|
return Response(content="文本文件已索引", media_type="text/plain")
|
||||||
|
|
||||||
|
# 其他类型暂未支持向量索引,回退为文件名索引
|
||||||
|
await vector_db.delete_vector(collection_name, path)
|
||||||
|
await vector_db.upsert_vector(collection_name, {
|
||||||
|
"path": _chunk_key(path, "fallback"),
|
||||||
|
"source_path": path,
|
||||||
|
"chunk_id": "filename",
|
||||||
|
"mime": mime_type,
|
||||||
|
"type": "filename",
|
||||||
|
"name": os.path.basename(path),
|
||||||
|
"embedding": [0.0] * vector_dim,
|
||||||
|
})
|
||||||
await LogService.info(
|
await LogService.info(
|
||||||
"processor:vector_index",
|
"processor:vector_index",
|
||||||
log_message,
|
f"File type fallback to simple index for {path}",
|
||||||
details={"path": path, "description": description, "action": "create", "index_type": "vector"},
|
details={"path": path, "action": "create", "index_type": "simple", "original_type": file_ext},
|
||||||
)
|
)
|
||||||
return Response(content=response_message, media_type="text/plain")
|
return Response(content="暂不支持该类型的向量索引,已创建文件名索引", media_type="text/plain")
|
||||||
|
|
||||||
|
|
||||||
PROCESSOR_TYPE = "vector_index"
|
PROCESSOR_TYPE = "vector_index"
|
||||||
|
|||||||
@@ -50,15 +50,20 @@ class MilvusLiteProvider(BaseVectorProvider):
|
|||||||
client = self._get_client()
|
client = self._get_client()
|
||||||
if client.has_collection(collection_name):
|
if client.has_collection(collection_name):
|
||||||
return
|
return
|
||||||
|
common_fields = [
|
||||||
|
FieldSchema(name="path", dtype=DataType.VARCHAR, max_length=512, is_primary=True, auto_id=False),
|
||||||
|
FieldSchema(name="source_path", dtype=DataType.VARCHAR, max_length=512, is_primary=False, auto_id=False),
|
||||||
|
]
|
||||||
|
|
||||||
if vector:
|
if vector:
|
||||||
vector_dim = dim if isinstance(dim, int) and dim > 0 else 0
|
vector_dim = dim if isinstance(dim, int) and dim > 0 else 0
|
||||||
if vector_dim <= 0:
|
if vector_dim <= 0:
|
||||||
vector_dim = 4096
|
vector_dim = 4096
|
||||||
fields = [
|
fields = [
|
||||||
FieldSchema(name="path", dtype=DataType.VARCHAR, max_length=512, is_primary=True, auto_id=False),
|
*common_fields,
|
||||||
FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=vector_dim),
|
FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=vector_dim),
|
||||||
]
|
]
|
||||||
schema = CollectionSchema(fields, description="Image vector collection")
|
schema = CollectionSchema(fields, description="Vector collection", enable_dynamic_field=True)
|
||||||
client.create_collection(collection_name, schema=schema)
|
client.create_collection(collection_name, schema=schema)
|
||||||
index_params = MilvusClient.prepare_index_params()
|
index_params = MilvusClient.prepare_index_params()
|
||||||
index_params.add_index(
|
index_params.add_index(
|
||||||
@@ -70,38 +75,98 @@ class MilvusLiteProvider(BaseVectorProvider):
|
|||||||
)
|
)
|
||||||
client.create_index(collection_name, index_params=index_params)
|
client.create_index(collection_name, index_params=index_params)
|
||||||
else:
|
else:
|
||||||
fields = [
|
schema = CollectionSchema(common_fields, description="Simple file index", enable_dynamic_field=True)
|
||||||
FieldSchema(name="path", dtype=DataType.VARCHAR, max_length=512, is_primary=True, auto_id=False),
|
|
||||||
]
|
|
||||||
schema = CollectionSchema(fields, description="Simple file index")
|
|
||||||
client.create_collection(collection_name, schema=schema)
|
client.create_collection(collection_name, schema=schema)
|
||||||
|
|
||||||
def upsert_vector(self, collection_name: str, data: Dict[str, Any]) -> None:
|
def upsert_vector(self, collection_name: str, data: Dict[str, Any]) -> None:
|
||||||
self._get_client().upsert(collection_name, data)
|
payload = dict(data)
|
||||||
|
payload.setdefault("source_path", payload.get("path"))
|
||||||
|
payload.setdefault("vector_id", payload.get("path"))
|
||||||
|
self._get_client().upsert(collection_name, data=[payload])
|
||||||
|
|
||||||
def delete_vector(self, collection_name: str, path: str) -> None:
|
def delete_vector(self, collection_name: str, path: str) -> None:
|
||||||
self._get_client().delete(collection_name, ids=[path])
|
client = self._get_client()
|
||||||
|
escaped = path.replace('"', '\\"')
|
||||||
|
client.delete(collection_name, filter=f'source_path == "{escaped}"')
|
||||||
|
|
||||||
def search_vectors(self, collection_name: str, query_embedding, top_k: int):
|
def search_vectors(self, collection_name: str, query_embedding, top_k: int):
|
||||||
search_params = {"metric_type": "COSINE"}
|
search_params = {"metric_type": "COSINE"}
|
||||||
return self._get_client().search(
|
output_fields = [
|
||||||
|
"path",
|
||||||
|
"source_path",
|
||||||
|
"chunk_id",
|
||||||
|
"mime",
|
||||||
|
"text",
|
||||||
|
"start_offset",
|
||||||
|
"end_offset",
|
||||||
|
"type",
|
||||||
|
"name",
|
||||||
|
]
|
||||||
|
raw_results = self._get_client().search(
|
||||||
collection_name,
|
collection_name,
|
||||||
data=[query_embedding],
|
data=[query_embedding],
|
||||||
anns_field="embedding",
|
anns_field="embedding",
|
||||||
search_params=search_params,
|
search_params=search_params,
|
||||||
limit=top_k,
|
limit=top_k,
|
||||||
output_fields=["path"],
|
output_fields=output_fields,
|
||||||
)
|
)
|
||||||
|
formatted: List[List[Dict[str, Any]]] = []
|
||||||
|
for hits in raw_results:
|
||||||
|
bucket: List[Dict[str, Any]] = []
|
||||||
|
for hit in hits:
|
||||||
|
if hasattr(hit, "entity"):
|
||||||
|
entity = dict(getattr(hit, "entity", {}) or {})
|
||||||
|
hit_id = getattr(hit, "id", None)
|
||||||
|
distance = getattr(hit, "distance", None)
|
||||||
|
elif isinstance(hit, dict):
|
||||||
|
entity = dict((hit.get("entity") or {}))
|
||||||
|
hit_id = hit.get("id")
|
||||||
|
distance = hit.get("distance")
|
||||||
|
else:
|
||||||
|
entity = {}
|
||||||
|
hit_id = None
|
||||||
|
distance = None
|
||||||
|
entity.setdefault("path", entity.get("source_path"))
|
||||||
|
bucket.append({
|
||||||
|
"id": hit_id,
|
||||||
|
"distance": distance,
|
||||||
|
"entity": entity,
|
||||||
|
})
|
||||||
|
formatted.append(bucket)
|
||||||
|
return formatted
|
||||||
|
|
||||||
def search_by_path(self, collection_name: str, query_path: str, top_k: int):
|
def search_by_path(self, collection_name: str, query_path: str, top_k: int):
|
||||||
filter_expr = f"path like '%{query_path}%'" if query_path else "path like '%%'"
|
if query_path:
|
||||||
|
escaped = query_path.replace('"', '\\"')
|
||||||
|
filter_expr = f'source_path like "%{escaped}%"'
|
||||||
|
else:
|
||||||
|
filter_expr = "source_path like '%%'"
|
||||||
results = self._get_client().query(
|
results = self._get_client().query(
|
||||||
collection_name,
|
collection_name,
|
||||||
filter=filter_expr,
|
filter=filter_expr,
|
||||||
limit=top_k,
|
limit=top_k,
|
||||||
output_fields=["path"],
|
output_fields=[
|
||||||
|
"path",
|
||||||
|
"source_path",
|
||||||
|
"chunk_id",
|
||||||
|
"mime",
|
||||||
|
"text",
|
||||||
|
"start_offset",
|
||||||
|
"end_offset",
|
||||||
|
"type",
|
||||||
|
"name",
|
||||||
|
],
|
||||||
)
|
)
|
||||||
return [[{"id": r["path"], "distance": 1.0, "entity": {"path": r["path"]}} for r in results]]
|
formatted = []
|
||||||
|
for row in results:
|
||||||
|
entity = dict(row)
|
||||||
|
entity.setdefault("path", entity.get("source_path"))
|
||||||
|
formatted.append({
|
||||||
|
"id": entity.get("path"),
|
||||||
|
"distance": 1.0,
|
||||||
|
"entity": entity,
|
||||||
|
})
|
||||||
|
return [formatted]
|
||||||
|
|
||||||
def get_all_stats(self) -> Dict[str, Any]:
|
def get_all_stats(self) -> Dict[str, Any]:
|
||||||
client = self._get_client()
|
client = self._get_client()
|
||||||
|
|||||||
@@ -58,15 +58,19 @@ class MilvusServerProvider(BaseVectorProvider):
|
|||||||
client = self._get_client()
|
client = self._get_client()
|
||||||
if client.has_collection(collection_name):
|
if client.has_collection(collection_name):
|
||||||
return
|
return
|
||||||
|
common_fields = [
|
||||||
|
FieldSchema(name="path", dtype=DataType.VARCHAR, max_length=512, is_primary=True, auto_id=False),
|
||||||
|
FieldSchema(name="source_path", dtype=DataType.VARCHAR, max_length=512, is_primary=False, auto_id=False),
|
||||||
|
]
|
||||||
if vector:
|
if vector:
|
||||||
vector_dim = dim if isinstance(dim, int) and dim > 0 else 0
|
vector_dim = dim if isinstance(dim, int) and dim > 0 else 0
|
||||||
if vector_dim <= 0:
|
if vector_dim <= 0:
|
||||||
vector_dim = 4096
|
vector_dim = 4096
|
||||||
fields = [
|
fields = [
|
||||||
FieldSchema(name="path", dtype=DataType.VARCHAR, max_length=512, is_primary=True, auto_id=False),
|
*common_fields,
|
||||||
FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=vector_dim),
|
FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=vector_dim),
|
||||||
]
|
]
|
||||||
schema = CollectionSchema(fields, description="Image vector collection")
|
schema = CollectionSchema(fields, description="Vector collection", enable_dynamic_field=True)
|
||||||
client.create_collection(collection_name, schema=schema)
|
client.create_collection(collection_name, schema=schema)
|
||||||
index_params = MilvusClient.prepare_index_params()
|
index_params = MilvusClient.prepare_index_params()
|
||||||
index_params.add_index(
|
index_params.add_index(
|
||||||
@@ -78,38 +82,98 @@ class MilvusServerProvider(BaseVectorProvider):
|
|||||||
)
|
)
|
||||||
client.create_index(collection_name, index_params=index_params)
|
client.create_index(collection_name, index_params=index_params)
|
||||||
else:
|
else:
|
||||||
fields = [
|
schema = CollectionSchema(common_fields, description="Simple file index", enable_dynamic_field=True)
|
||||||
FieldSchema(name="path", dtype=DataType.VARCHAR, max_length=512, is_primary=True, auto_id=False),
|
|
||||||
]
|
|
||||||
schema = CollectionSchema(fields, description="Simple file index")
|
|
||||||
client.create_collection(collection_name, schema=schema)
|
client.create_collection(collection_name, schema=schema)
|
||||||
|
|
||||||
def upsert_vector(self, collection_name: str, data: Dict[str, Any]) -> None:
|
def upsert_vector(self, collection_name: str, data: Dict[str, Any]) -> None:
|
||||||
self._get_client().upsert(collection_name, data)
|
payload = dict(data)
|
||||||
|
payload.setdefault("source_path", payload.get("path"))
|
||||||
|
payload.setdefault("vector_id", payload.get("path"))
|
||||||
|
self._get_client().upsert(collection_name, data=[payload])
|
||||||
|
|
||||||
def delete_vector(self, collection_name: str, path: str) -> None:
|
def delete_vector(self, collection_name: str, path: str) -> None:
|
||||||
self._get_client().delete(collection_name, ids=[path])
|
client = self._get_client()
|
||||||
|
escaped = path.replace('"', '\\"')
|
||||||
|
client.delete(collection_name, filter=f'source_path == "{escaped}"')
|
||||||
|
|
||||||
def search_vectors(self, collection_name: str, query_embedding, top_k: int):
|
def search_vectors(self, collection_name: str, query_embedding, top_k: int):
|
||||||
search_params = {"metric_type": "COSINE"}
|
search_params = {"metric_type": "COSINE"}
|
||||||
return self._get_client().search(
|
output_fields = [
|
||||||
|
"path",
|
||||||
|
"source_path",
|
||||||
|
"chunk_id",
|
||||||
|
"mime",
|
||||||
|
"text",
|
||||||
|
"start_offset",
|
||||||
|
"end_offset",
|
||||||
|
"type",
|
||||||
|
"name",
|
||||||
|
]
|
||||||
|
raw_results = self._get_client().search(
|
||||||
collection_name,
|
collection_name,
|
||||||
data=[query_embedding],
|
data=[query_embedding],
|
||||||
anns_field="embedding",
|
anns_field="embedding",
|
||||||
search_params=search_params,
|
search_params=search_params,
|
||||||
limit=top_k,
|
limit=top_k,
|
||||||
output_fields=["path"],
|
output_fields=output_fields,
|
||||||
)
|
)
|
||||||
|
formatted: List[List[Dict[str, Any]]] = []
|
||||||
|
for hits in raw_results:
|
||||||
|
bucket: List[Dict[str, Any]] = []
|
||||||
|
for hit in hits:
|
||||||
|
if hasattr(hit, "entity"):
|
||||||
|
entity = dict(getattr(hit, "entity", {}) or {})
|
||||||
|
hit_id = getattr(hit, "id", None)
|
||||||
|
distance = getattr(hit, "distance", None)
|
||||||
|
elif isinstance(hit, dict):
|
||||||
|
entity = dict((hit.get("entity") or {}))
|
||||||
|
hit_id = hit.get("id")
|
||||||
|
distance = hit.get("distance")
|
||||||
|
else:
|
||||||
|
entity = {}
|
||||||
|
hit_id = None
|
||||||
|
distance = None
|
||||||
|
entity.setdefault("path", entity.get("source_path"))
|
||||||
|
bucket.append({
|
||||||
|
"id": hit_id,
|
||||||
|
"distance": distance,
|
||||||
|
"entity": entity,
|
||||||
|
})
|
||||||
|
formatted.append(bucket)
|
||||||
|
return formatted
|
||||||
|
|
||||||
def search_by_path(self, collection_name: str, query_path: str, top_k: int):
|
def search_by_path(self, collection_name: str, query_path: str, top_k: int):
|
||||||
filter_expr = f"path like '%{query_path}%'" if query_path else "path like '%%'"
|
if query_path:
|
||||||
|
escaped = query_path.replace('"', '\\"')
|
||||||
|
filter_expr = f'source_path like "%{escaped}%"'
|
||||||
|
else:
|
||||||
|
filter_expr = "source_path like '%%'"
|
||||||
results = self._get_client().query(
|
results = self._get_client().query(
|
||||||
collection_name,
|
collection_name,
|
||||||
filter=filter_expr,
|
filter=filter_expr,
|
||||||
limit=top_k,
|
limit=top_k,
|
||||||
output_fields=["path"],
|
output_fields=[
|
||||||
|
"path",
|
||||||
|
"source_path",
|
||||||
|
"chunk_id",
|
||||||
|
"mime",
|
||||||
|
"text",
|
||||||
|
"start_offset",
|
||||||
|
"end_offset",
|
||||||
|
"type",
|
||||||
|
"name",
|
||||||
|
],
|
||||||
)
|
)
|
||||||
return [[{"id": r["path"], "distance": 1.0, "entity": {"path": r["path"]}} for r in results]]
|
formatted = []
|
||||||
|
for row in results:
|
||||||
|
entity = dict(row)
|
||||||
|
entity.setdefault("path", entity.get("source_path"))
|
||||||
|
formatted.append({
|
||||||
|
"id": entity.get("path"),
|
||||||
|
"distance": 1.0,
|
||||||
|
"entity": entity,
|
||||||
|
})
|
||||||
|
return [formatted]
|
||||||
|
|
||||||
def get_all_stats(self) -> Dict[str, Any]:
|
def get_all_stats(self) -> Dict[str, Any]:
|
||||||
client = self._get_client()
|
client = self._get_client()
|
||||||
|
|||||||
@@ -58,29 +58,59 @@ class QdrantProvider(BaseVectorProvider):
|
|||||||
size = dim if vector and isinstance(dim, int) and dim > 0 else 1
|
size = dim if vector and isinstance(dim, int) and dim > 0 else 1
|
||||||
return qmodels.VectorParams(size=size, distance=qmodels.Distance.COSINE)
|
return qmodels.VectorParams(size=size, distance=qmodels.Distance.COSINE)
|
||||||
|
|
||||||
|
def _ensure_payload_indexes(self, client: QdrantClient, collection_name: str) -> None:
|
||||||
|
for field in ("path", "source_path"):
|
||||||
|
try:
|
||||||
|
client.create_payload_index(
|
||||||
|
collection_name=collection_name,
|
||||||
|
field_name=field,
|
||||||
|
field_schema="keyword",
|
||||||
|
)
|
||||||
|
except Exception as exc: # pragma: no cover - 依赖外部服务
|
||||||
|
message = str(exc).lower()
|
||||||
|
if "already exists" in message or "index exists" in message:
|
||||||
|
continue
|
||||||
|
# 旧版本 qdrant 可能返回带状态码的异常,这里容忍重复创建
|
||||||
|
raise
|
||||||
|
|
||||||
def ensure_collection(self, collection_name: str, vector: bool, dim: int) -> None:
|
def ensure_collection(self, collection_name: str, vector: bool, dim: int) -> None:
|
||||||
client = self._get_client()
|
client = self._get_client()
|
||||||
try:
|
try:
|
||||||
if client.collection_exists(collection_name):
|
exists = client.collection_exists(collection_name)
|
||||||
return
|
|
||||||
except Exception as exc: # pragma: no cover - 依赖外部服务
|
except Exception as exc: # pragma: no cover - 依赖外部服务
|
||||||
raise RuntimeError(f"Failed to check Qdrant collection '{collection_name}': {exc}") from exc
|
raise RuntimeError(f"Failed to check Qdrant collection '{collection_name}': {exc}") from exc
|
||||||
|
|
||||||
|
if exists:
|
||||||
|
try:
|
||||||
|
self._ensure_payload_indexes(client, collection_name)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return
|
||||||
|
|
||||||
vectors_config = self._vector_params(vector, dim)
|
vectors_config = self._vector_params(vector, dim)
|
||||||
try:
|
try:
|
||||||
client.create_collection(collection_name=collection_name, vectors_config=vectors_config)
|
client.create_collection(collection_name=collection_name, vectors_config=vectors_config)
|
||||||
except Exception as exc: # pragma: no cover
|
except Exception as exc: # pragma: no cover
|
||||||
if "already exists" in str(exc).lower():
|
if "already exists" in str(exc).lower():
|
||||||
|
try:
|
||||||
|
self._ensure_payload_indexes(client, collection_name)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
return
|
return
|
||||||
raise RuntimeError(f"Failed to create Qdrant collection '{collection_name}': {exc}") from exc
|
raise RuntimeError(f"Failed to create Qdrant collection '{collection_name}': {exc}") from exc
|
||||||
|
|
||||||
|
try:
|
||||||
|
self._ensure_payload_indexes(client, collection_name)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _point_id(path: str) -> str:
|
def _point_id(uid: str) -> str:
|
||||||
return str(uuid5(NAMESPACE_URL, path))
|
return str(uuid5(NAMESPACE_URL, uid))
|
||||||
|
|
||||||
def _prepare_point(self, data: Dict[str, Any]) -> qmodels.PointStruct:
|
def _prepare_point(self, data: Dict[str, Any]) -> qmodels.PointStruct:
|
||||||
path = data.get("path")
|
uid = data.get("path")
|
||||||
if not path:
|
if not uid:
|
||||||
raise ValueError("Qdrant upsert requires 'path' in data")
|
raise ValueError("Qdrant upsert requires 'path' in data")
|
||||||
|
|
||||||
embedding = data.get("embedding")
|
embedding = data.get("embedding")
|
||||||
@@ -89,8 +119,11 @@ class QdrantProvider(BaseVectorProvider):
|
|||||||
else:
|
else:
|
||||||
vector = [float(x) for x in embedding]
|
vector = [float(x) for x in embedding]
|
||||||
|
|
||||||
payload = {"path": path}
|
payload = {k: v for k, v in data.items() if k != "embedding"}
|
||||||
return qmodels.PointStruct(id=self._point_id(path), vector=vector, payload=payload)
|
payload.setdefault("vector_id", uid)
|
||||||
|
source_path = payload.get("source_path") or payload.get("path")
|
||||||
|
payload["path"] = source_path
|
||||||
|
return qmodels.PointStruct(id=self._point_id(str(uid)), vector=vector, payload=payload)
|
||||||
|
|
||||||
def upsert_vector(self, collection_name: str, data: Dict[str, Any]) -> None:
|
def upsert_vector(self, collection_name: str, data: Dict[str, Any]) -> None:
|
||||||
client = self._get_client()
|
client = self._get_client()
|
||||||
@@ -99,7 +132,12 @@ class QdrantProvider(BaseVectorProvider):
|
|||||||
|
|
||||||
def delete_vector(self, collection_name: str, path: str) -> None:
|
def delete_vector(self, collection_name: str, path: str) -> None:
|
||||||
client = self._get_client()
|
client = self._get_client()
|
||||||
selector = qmodels.PointIdsList(points=[self._point_id(path)])
|
condition = qmodels.FieldCondition(
|
||||||
|
key="path",
|
||||||
|
match=qmodels.MatchValue(value=path),
|
||||||
|
)
|
||||||
|
flt = qmodels.Filter(must=[condition])
|
||||||
|
selector = qmodels.FilterSelector(filter=flt)
|
||||||
client.delete(collection_name=collection_name, points_selector=selector, wait=True)
|
client.delete(collection_name=collection_name, points_selector=selector, wait=True)
|
||||||
|
|
||||||
def _format_search_results(self, points: Sequence[qmodels.ScoredPoint]):
|
def _format_search_results(self, points: Sequence[qmodels.ScoredPoint]):
|
||||||
@@ -107,7 +145,7 @@ class QdrantProvider(BaseVectorProvider):
|
|||||||
{
|
{
|
||||||
"id": point.id,
|
"id": point.id,
|
||||||
"distance": point.score,
|
"distance": point.score,
|
||||||
"entity": {"path": (point.payload or {}).get("path")},
|
"entity": point.payload or {},
|
||||||
}
|
}
|
||||||
for point in points
|
for point in points
|
||||||
]
|
]
|
||||||
@@ -141,11 +179,11 @@ class QdrantProvider(BaseVectorProvider):
|
|||||||
break
|
break
|
||||||
|
|
||||||
for record in records:
|
for record in records:
|
||||||
path = (record.payload or {}).get("path")
|
payload = record.payload or {}
|
||||||
if query_path and path:
|
path = payload.get("path")
|
||||||
if query_path not in path:
|
if query_path and path and query_path not in path:
|
||||||
continue
|
continue
|
||||||
results.append({"id": record.id, "distance": 1.0, "entity": {"path": path}})
|
results.append({"id": record.id, "distance": 1.0, "entity": payload})
|
||||||
if len(results) >= top_k:
|
if len(results) >= top_k:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|||||||
@@ -21,9 +21,29 @@ export interface DirListing {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export interface SearchResultItem {
|
export interface SearchResultItem {
|
||||||
id: number;
|
id: string;
|
||||||
path: string;
|
path: string;
|
||||||
score: number;
|
score: number;
|
||||||
|
chunk_id?: string;
|
||||||
|
snippet?: string;
|
||||||
|
mime?: string;
|
||||||
|
source_type?: string;
|
||||||
|
start_offset?: number;
|
||||||
|
end_offset?: number;
|
||||||
|
metadata?: Record<string, any>;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SearchPagination {
|
||||||
|
page: number;
|
||||||
|
page_size: number;
|
||||||
|
has_more: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SearchResponse {
|
||||||
|
items: SearchResultItem[];
|
||||||
|
query: string;
|
||||||
|
mode?: string;
|
||||||
|
pagination?: SearchPagination;
|
||||||
}
|
}
|
||||||
|
|
||||||
export const vfsApi = {
|
export const vfsApi = {
|
||||||
@@ -105,6 +125,20 @@ export const vfsApi = {
|
|||||||
xhr.send(fd);
|
xhr.send(fd);
|
||||||
});
|
});
|
||||||
},
|
},
|
||||||
searchFiles: (q: string, top_k: number = 10, mode: 'vector' | 'filename' = 'vector') =>
|
searchFiles: (
|
||||||
request<{ items: SearchResultItem[]; query: string }>(`/search?q=${encodeURIComponent(q)}&top_k=${top_k}&mode=${mode}`),
|
q: string,
|
||||||
|
top_k: number = 10,
|
||||||
|
mode: 'vector' | 'filename' = 'vector',
|
||||||
|
page?: number,
|
||||||
|
page_size?: number,
|
||||||
|
) => {
|
||||||
|
const params = new URLSearchParams({
|
||||||
|
q,
|
||||||
|
top_k: String(top_k),
|
||||||
|
mode,
|
||||||
|
});
|
||||||
|
if (page !== undefined) params.set('page', String(page));
|
||||||
|
if (page_size !== undefined) params.set('page_size', String(page_size));
|
||||||
|
return request<SearchResponse>(`/search?${params.toString()}`);
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -1,128 +1,313 @@
|
|||||||
import { Modal, Input, List, Divider, Spin, Select, Space } from 'antd';
|
import { Modal, Input, List, Divider, Spin, Space, Tag, Typography, Empty, Flex, Segmented, Pagination } from 'antd';
|
||||||
import { SearchOutlined, FileTextOutlined } from '@ant-design/icons';
|
import { SearchOutlined, FileTextOutlined } from '@ant-design/icons';
|
||||||
import React, { useState } from 'react';
|
import React, { useRef, useState } from 'react';
|
||||||
import { vfsApi, type SearchResultItem } from '../api/vfs';
|
import { vfsApi, type SearchResultItem } from '../api/vfs';
|
||||||
import { useI18n } from '../i18n';
|
import { useI18n } from '../i18n';
|
||||||
import { useNavigate } from 'react-router';
|
import { useNavigate } from 'react-router';
|
||||||
|
|
||||||
|
|
||||||
interface SearchDialogProps {
|
interface SearchDialogProps {
|
||||||
open: boolean;
|
open: boolean;
|
||||||
onClose: () => void;
|
onClose: () => void;
|
||||||
}
|
}
|
||||||
|
|
||||||
const SEARCH_MODES = (t: (k: string)=>string) => [
|
type SearchMode = 'vector' | 'filename';
|
||||||
{ label: t('Smart Search'), value: 'vector' },
|
const PAGE_SIZE = 10;
|
||||||
{ label: t('Name Search'), value: 'filename' },
|
|
||||||
];
|
|
||||||
|
|
||||||
const SearchDialog: React.FC<SearchDialogProps> = ({ open, onClose }) => {
|
const SearchDialog: React.FC<SearchDialogProps> = ({ open, onClose }) => {
|
||||||
const [search, setSearch] = useState('');
|
const [search, setSearch] = useState('');
|
||||||
const [loading, setLoading] = useState(false);
|
const [loading, setLoading] = useState(false);
|
||||||
const [results, setResults] = useState<SearchResultItem[]>([]);
|
const [results, setResults] = useState<SearchResultItem[]>([]);
|
||||||
const [searched, setSearched] = useState(false);
|
const [searched, setSearched] = useState(false);
|
||||||
const [searchMode, setSearchMode] = useState<'vector' | 'filename'>('vector');
|
const [searchMode, setSearchMode] = useState<SearchMode>('vector');
|
||||||
|
const [page, setPage] = useState(1);
|
||||||
|
const [hasMore, setHasMore] = useState(false);
|
||||||
|
const requestIdRef = useRef(0);
|
||||||
const { t } = useI18n();
|
const { t } = useI18n();
|
||||||
const navigate = useNavigate();
|
const navigate = useNavigate();
|
||||||
|
|
||||||
const handleSearch = async () => {
|
const renderSourceLabel = (value?: string) => {
|
||||||
if (!search.trim()) return;
|
switch ((value || '').toLowerCase()) {
|
||||||
|
case 'vector':
|
||||||
|
return t('Vector Search');
|
||||||
|
case 'filename':
|
||||||
|
return t('Name Search');
|
||||||
|
case 'text':
|
||||||
|
return t('Text Chunk');
|
||||||
|
case 'image':
|
||||||
|
return t('Image Description');
|
||||||
|
default:
|
||||||
|
return t('Vector Search');
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const sourceColor = (value?: string) => {
|
||||||
|
switch ((value || '').toLowerCase()) {
|
||||||
|
case 'vector':
|
||||||
|
return 'blue';
|
||||||
|
case 'filename':
|
||||||
|
return 'green';
|
||||||
|
case 'image':
|
||||||
|
return 'volcano';
|
||||||
|
case 'text':
|
||||||
|
return 'geekblue';
|
||||||
|
default:
|
||||||
|
return 'purple';
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const performSearch = async (options?: { page?: number; mode?: SearchMode }) => {
|
||||||
|
const query = search.trim();
|
||||||
|
if (!query) {
|
||||||
|
setSearched(false);
|
||||||
|
setResults([]);
|
||||||
|
setHasMore(false);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const currentMode = options?.mode ?? searchMode;
|
||||||
|
const targetPage = currentMode === 'filename' ? (options?.page ?? (currentMode === searchMode ? page : 1)) : 1;
|
||||||
|
|
||||||
|
const requestId = requestIdRef.current + 1;
|
||||||
|
requestIdRef.current = requestId;
|
||||||
|
|
||||||
setLoading(true);
|
setLoading(true);
|
||||||
setSearched(true);
|
setSearched(true);
|
||||||
try {
|
if (currentMode === 'filename') {
|
||||||
const res = await vfsApi.searchFiles(search, 10, searchMode);
|
setPage(targetPage);
|
||||||
setResults(res.items);
|
} else {
|
||||||
} catch (e) {
|
setPage(1);
|
||||||
setResults([]);
|
setHasMore(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const res = await vfsApi.searchFiles(
|
||||||
|
query,
|
||||||
|
currentMode === 'filename' ? PAGE_SIZE : 10,
|
||||||
|
currentMode,
|
||||||
|
currentMode === 'filename' ? targetPage : undefined,
|
||||||
|
currentMode === 'filename' ? PAGE_SIZE : undefined,
|
||||||
|
);
|
||||||
|
if (requestId !== requestIdRef.current) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
setResults(res.items);
|
||||||
|
if (currentMode === 'filename') {
|
||||||
|
const pagination = res.pagination;
|
||||||
|
setHasMore(Boolean(pagination?.has_more));
|
||||||
|
if (pagination?.page) {
|
||||||
|
setPage(pagination.page);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
setHasMore(false);
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
if (requestId !== requestIdRef.current) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
setResults([]);
|
||||||
|
if (currentMode === 'filename') {
|
||||||
|
setHasMore(false);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
if (requestId === requestIdRef.current) {
|
||||||
|
setLoading(false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
setLoading(false);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const handleSearch = () => {
|
||||||
|
if (!search.trim()) {
|
||||||
|
setResults([]);
|
||||||
|
setSearched(false);
|
||||||
|
setHasMore(false);
|
||||||
|
setPage(1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
void performSearch({ page: searchMode === 'filename' ? 1 : undefined });
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleModeChange = (value: string | number) => {
|
||||||
|
const nextMode = value as SearchMode;
|
||||||
|
setHasMore(false);
|
||||||
|
setPage(1);
|
||||||
|
setSearchMode(nextMode);
|
||||||
|
if (search.trim()) {
|
||||||
|
void performSearch({ mode: nextMode, page: nextMode === 'filename' ? 1 : undefined });
|
||||||
|
} else {
|
||||||
|
setResults([]);
|
||||||
|
setSearched(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleClose = () => {
|
||||||
|
setSearch('');
|
||||||
|
setResults([]);
|
||||||
|
setSearched(false);
|
||||||
|
setSearchMode('vector');
|
||||||
|
setPage(1);
|
||||||
|
setHasMore(false);
|
||||||
|
requestIdRef.current = 0;
|
||||||
|
setLoading(false);
|
||||||
|
onClose();
|
||||||
|
};
|
||||||
|
|
||||||
|
const totalItems = searchMode === 'filename'
|
||||||
|
? (hasMore ? page * PAGE_SIZE + 1 : (page - 1) * PAGE_SIZE + results.length)
|
||||||
|
: results.length;
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<Modal
|
<Modal
|
||||||
open={open}
|
open={open}
|
||||||
onCancel={onClose}
|
onCancel={handleClose}
|
||||||
footer={null}
|
footer={null}
|
||||||
width={600}
|
width={720}
|
||||||
centered
|
centered
|
||||||
title={null}
|
title={null}
|
||||||
closable={false}
|
closable={false}
|
||||||
|
styles={{
|
||||||
|
body: {
|
||||||
|
padding: '12px 16px 16px',
|
||||||
|
maxHeight: '70vh',
|
||||||
|
overflow: 'hidden',
|
||||||
|
display: 'flex',
|
||||||
|
flexDirection: 'column',
|
||||||
|
gap: 12,
|
||||||
|
},
|
||||||
|
}}
|
||||||
>
|
>
|
||||||
<Space.Compact style={{ marginBottom: 0, width: '100%' }}>
|
<Flex vertical style={{ gap: 12, flex: 1, minHeight: 0 }}>
|
||||||
<Select
|
<Flex align="center" style={{ width: '100%', gap: 12, flexWrap: 'wrap' }}>
|
||||||
options={SEARCH_MODES(t)}
|
<Segmented
|
||||||
value={searchMode}
|
options={[
|
||||||
onChange={v => setSearchMode(v as 'vector' | 'filename')}
|
{ label: t('Smart Search'), value: 'vector' },
|
||||||
style={{
|
{ label: t('Name Search'), value: 'filename' },
|
||||||
width: 120,
|
]}
|
||||||
fontSize: 18,
|
value={searchMode}
|
||||||
height: 40,
|
onChange={handleModeChange}
|
||||||
lineHeight: '40px',
|
style={{
|
||||||
borderTopRightRadius: 0,
|
minWidth: 160,
|
||||||
borderBottomRightRadius: 0,
|
height: 40,
|
||||||
borderRight: 0,
|
borderRadius: 20,
|
||||||
verticalAlign: 'top',
|
display: 'flex',
|
||||||
}}
|
alignItems: 'center',
|
||||||
styles={{ popup: { root: { fontSize: 18 } } }}
|
}}
|
||||||
popupMatchSelectWidth={false}
|
size="large"
|
||||||
/>
|
/>
|
||||||
<Input
|
<Input
|
||||||
allowClear
|
allowClear
|
||||||
prefix={<SearchOutlined />}
|
prefix={<SearchOutlined />}
|
||||||
placeholder={t('Search files / tags / types')}
|
placeholder={t('Search files / tags / types')}
|
||||||
value={search}
|
value={search}
|
||||||
onChange={e => setSearch(e.target.value)}
|
onChange={e => {
|
||||||
style={{
|
const value = e.target.value;
|
||||||
fontSize: 18,
|
setSearch(value);
|
||||||
height: 40,
|
if (!value.trim()) {
|
||||||
width: 'calc(100% - 120px)',
|
setResults([]);
|
||||||
borderTopLeftRadius: 0,
|
setSearched(false);
|
||||||
borderBottomLeftRadius: 0,
|
setHasMore(false);
|
||||||
verticalAlign: 'top',
|
setPage(1);
|
||||||
}}
|
requestIdRef.current += 1;
|
||||||
autoFocus
|
setLoading(false);
|
||||||
onPressEnter={handleSearch}
|
}
|
||||||
/>
|
}}
|
||||||
</Space.Compact>
|
style={{ fontSize: 18, height: 40, flex: 1, minWidth: 240 }}
|
||||||
{searched && (
|
styles={{
|
||||||
<>
|
input: {
|
||||||
<Divider style={{ margin: '12px 0' }}>{t('Search Results')}</Divider>
|
borderRadius: 20,
|
||||||
{loading ? (
|
},
|
||||||
<Spin />
|
}}
|
||||||
) : (
|
autoFocus
|
||||||
<List
|
onPressEnter={handleSearch}
|
||||||
itemLayout="horizontal"
|
/>
|
||||||
dataSource={results}
|
</Flex>
|
||||||
locale={{ emptyText: t('No files found') }}
|
|
||||||
renderItem={item => {
|
{!searched ? null : (
|
||||||
const fullPath = item.path || '';
|
<Flex vertical style={{ flex: 1, minHeight: 0 }}>
|
||||||
const trimmed = fullPath.replace(/\/+$/, '');
|
<Divider style={{ margin: 0, padding: '0 0 12px' }}>{t('Search Results')}</Divider>
|
||||||
const parts = trimmed.split('/');
|
{loading ? (
|
||||||
const filename = parts.pop() || '';
|
<Flex align="center" justify="center" style={{ flex: 1 }}>
|
||||||
const dir = parts.length ? '/' + parts.join('/') : '/';
|
<Spin />
|
||||||
return (
|
</Flex>
|
||||||
<List.Item>
|
) : results.length === 0 ? (
|
||||||
<List.Item.Meta
|
<Flex align="center" justify="center" style={{ flex: 1 }}>
|
||||||
avatar={<FileTextOutlined />}
|
<Empty description={t('No files found')} image={Empty.PRESENTED_IMAGE_SIMPLE} />
|
||||||
title={
|
</Flex>
|
||||||
<a
|
) : (
|
||||||
onClick={() => {
|
<div style={{ flex: 1, minHeight: 0, display: 'flex', flexDirection: 'column' }}>
|
||||||
navigate(`/files${dir === '/' ? '' : dir}`, { state: { highlight: { name: filename } } });
|
<div style={{ flex: 1, minHeight: 0, overflowY: 'auto', paddingRight: 6 }}>
|
||||||
onClose();
|
<List
|
||||||
}}
|
itemLayout="horizontal"
|
||||||
>
|
dataSource={results}
|
||||||
{fullPath}
|
split={false}
|
||||||
</a>
|
renderItem={item => {
|
||||||
}
|
const fullPath = item.path || '';
|
||||||
description={`${t('Relevance')}: ${item.score.toFixed(2)}`}
|
const trimmed = fullPath.replace(/\/+$/, '');
|
||||||
/>
|
const parts = trimmed.split('/');
|
||||||
</List.Item>
|
const filename = parts.pop() || '';
|
||||||
);
|
const dir = parts.length ? '/' + parts.join('/') : '/';
|
||||||
}}
|
const snippet = item.snippet || '';
|
||||||
/>
|
const retrieval = item.metadata?.retrieval_source || item.source_type;
|
||||||
)}
|
const retrievalLabel = renderSourceLabel(retrieval);
|
||||||
</>
|
const scoreText = Number.isFinite(item.score) ? item.score.toFixed(2) : '-';
|
||||||
)}
|
|
||||||
|
return (
|
||||||
|
<List.Item style={{ padding: '10px 12px', borderRadius: 6, background: '#fafafa', marginBottom: 8 }}>
|
||||||
|
<List.Item.Meta
|
||||||
|
avatar={<FileTextOutlined style={{ fontSize: 18, color: '#8c8c8c' }} />}
|
||||||
|
title={
|
||||||
|
<a
|
||||||
|
onClick={() => {
|
||||||
|
navigate(`/files${dir === '/' ? '' : dir}`, { state: { highlight: { name: filename } } });
|
||||||
|
handleClose();
|
||||||
|
}}
|
||||||
|
style={{ fontSize: 16 }}
|
||||||
|
>
|
||||||
|
{fullPath}
|
||||||
|
</a>
|
||||||
|
}
|
||||||
|
description={(
|
||||||
|
<Space direction="vertical" size={6} style={{ width: '100%' }}>
|
||||||
|
{snippet ? (
|
||||||
|
<Typography.Paragraph ellipsis={{ rows: 3 }} style={{ marginBottom: 0 }}>
|
||||||
|
{snippet}
|
||||||
|
</Typography.Paragraph>
|
||||||
|
) : null}
|
||||||
|
<Space size={10} wrap>
|
||||||
|
{retrieval ? (
|
||||||
|
<Tag color={sourceColor(retrieval)} style={{ marginRight: 0 }}>
|
||||||
|
{retrievalLabel}
|
||||||
|
</Tag>
|
||||||
|
) : null}
|
||||||
|
<Typography.Text type="secondary">
|
||||||
|
{t('Relevance')}: {scoreText}
|
||||||
|
</Typography.Text>
|
||||||
|
</Space>
|
||||||
|
</Space>
|
||||||
|
)}
|
||||||
|
/>
|
||||||
|
</List.Item>
|
||||||
|
);
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
{searchMode === 'filename' && results.length > 0 ? (
|
||||||
|
<Pagination
|
||||||
|
current={page}
|
||||||
|
pageSize={PAGE_SIZE}
|
||||||
|
total={Math.max(totalItems, 1)}
|
||||||
|
showSizeChanger={false}
|
||||||
|
size="small"
|
||||||
|
style={{ marginTop: 12, textAlign: 'right' }}
|
||||||
|
onChange={(nextPage) => {
|
||||||
|
void performSearch({ page: nextPage });
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
) : null}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</Flex>
|
||||||
|
)}
|
||||||
|
</Flex>
|
||||||
</Modal>
|
</Modal>
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -8,14 +8,23 @@ import { useTheme } from '../../contexts/ThemeContext';
|
|||||||
import '../../styles/settings-tabs.css';
|
import '../../styles/settings-tabs.css';
|
||||||
import { useI18n } from '../../i18n';
|
import { useI18n } from '../../i18n';
|
||||||
|
|
||||||
const APP_CONFIG_KEYS: {key: string, label: string, default?: string}[] = [
|
const APP_CONFIG_KEYS: { key: string, label: string, default?: string }[] = [
|
||||||
{ key: 'APP_NAME', label: 'App Name' },
|
{ key: 'APP_NAME', label: 'App Name' },
|
||||||
{ key: 'APP_LOGO', label: 'Logo URL' },
|
{ key: 'APP_LOGO', label: 'Logo URL' },
|
||||||
{ key: 'APP_DOMAIN', label: 'App Domain' },
|
{ key: 'APP_DOMAIN', label: 'App Domain' },
|
||||||
{ key: 'FILE_DOMAIN', label: 'File Domain' },
|
{ key: 'FILE_DOMAIN', label: 'File Domain' },
|
||||||
];
|
];
|
||||||
|
|
||||||
const VISION_CONFIG_KEYS = [
|
interface AiConfigKeyBase {
|
||||||
|
key: string;
|
||||||
|
default?: string | number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface AiConfigKeyWithLabel extends AiConfigKeyBase {
|
||||||
|
label: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
const VISION_CONFIG_KEYS: AiConfigKeyWithLabel[] = [
|
||||||
{ key: 'AI_VISION_API_URL', label: 'Vision API URL' },
|
{ key: 'AI_VISION_API_URL', label: 'Vision API URL' },
|
||||||
{ key: 'AI_VISION_MODEL', label: 'Vision Model', default: 'Qwen/Qwen2.5-VL-32B-Instruct' },
|
{ key: 'AI_VISION_MODEL', label: 'Vision Model', default: 'Qwen/Qwen2.5-VL-32B-Instruct' },
|
||||||
{ key: 'AI_VISION_API_KEY', label: 'Vision API Key' },
|
{ key: 'AI_VISION_API_KEY', label: 'Vision API Key' },
|
||||||
@@ -24,13 +33,24 @@ const VISION_CONFIG_KEYS = [
|
|||||||
const DEFAULT_EMBED_DIMENSION = 4096;
|
const DEFAULT_EMBED_DIMENSION = 4096;
|
||||||
const EMBED_DIM_KEY = 'AI_EMBED_DIM';
|
const EMBED_DIM_KEY = 'AI_EMBED_DIM';
|
||||||
|
|
||||||
const EMBED_CONFIG_KEYS = [
|
const EMBED_CONFIG_KEYS: AiConfigKeyWithLabel[] = [
|
||||||
{ key: 'AI_EMBED_API_URL', label: 'Embedding API URL' },
|
{ key: 'AI_EMBED_API_URL', label: 'Embedding API URL' },
|
||||||
{ key: 'AI_EMBED_MODEL', label: 'Embedding Model', default: 'Qwen/Qwen3-Embedding-8B' },
|
{ key: 'AI_EMBED_MODEL', label: 'Embedding Model', default: 'Qwen/Qwen3-Embedding-8B' },
|
||||||
{ key: 'AI_EMBED_API_KEY', label: 'Embedding API Key' },
|
{ key: 'AI_EMBED_API_KEY', label: 'Embedding API Key' },
|
||||||
];
|
];
|
||||||
|
|
||||||
const ALL_AI_KEYS = [...VISION_CONFIG_KEYS, ...EMBED_CONFIG_KEYS, { key: EMBED_DIM_KEY, default: DEFAULT_EMBED_DIMENSION }];
|
const RERANK_CONFIG_KEYS: AiConfigKeyWithLabel[] = [
|
||||||
|
{ key: 'AI_RERANK_API_URL', label: 'Rerank API URL' },
|
||||||
|
{ key: 'AI_RERANK_MODEL', label: 'Rerank Model' },
|
||||||
|
{ key: 'AI_RERANK_API_KEY', label: 'Rerank API Key' },
|
||||||
|
];
|
||||||
|
|
||||||
|
const ALL_AI_KEYS: AiConfigKeyBase[] = [
|
||||||
|
...VISION_CONFIG_KEYS,
|
||||||
|
...EMBED_CONFIG_KEYS,
|
||||||
|
...RERANK_CONFIG_KEYS,
|
||||||
|
{ key: EMBED_DIM_KEY, default: DEFAULT_EMBED_DIMENSION },
|
||||||
|
];
|
||||||
|
|
||||||
const formatBytes = (bytes?: number | null) => {
|
const formatBytes = (bytes?: number | null) => {
|
||||||
if (bytes === null || bytes === undefined) return '-';
|
if (bytes === null || bytes === undefined) return '-';
|
||||||
@@ -194,6 +214,8 @@ export default function SystemSettingsPage() {
|
|||||||
}
|
}
|
||||||
}, [buildProviderConfigValues, message, t, vectorConfigForm, vectorProviders]);
|
}, [buildProviderConfigValues, message, t, vectorConfigForm, vectorProviders]);
|
||||||
|
|
||||||
|
const vectorSectionLoading = vectorStatsLoading || vectorConfigLoading;
|
||||||
|
|
||||||
// 离开“外观设置”时,恢复后端持久化配置(取消未保存的预览)
|
// 离开“外观设置”时,恢复后端持久化配置(取消未保存的预览)
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (activeTab !== 'appearance') {
|
if (activeTab !== 'appearance') {
|
||||||
@@ -303,7 +325,7 @@ export default function SystemSettingsPage() {
|
|||||||
</Form.Item>
|
</Form.Item>
|
||||||
</Card>
|
</Card>
|
||||||
<Card title={t('Advanced')} style={{ marginTop: 24 }}>
|
<Card title={t('Advanced')} style={{ marginTop: 24 }}>
|
||||||
<Form.Item name={THEME_KEYS.TOKENS} label={t('Override AntD Tokens (JSON)')} tooltip={t('e.g. {"colorText": "#222"}') }>
|
<Form.Item name={THEME_KEYS.TOKENS} label={t('Override AntD Tokens (JSON)')} tooltip={t('e.g. {"colorText": "#222"}')}>
|
||||||
<Input.TextArea autoSize={{ minRows: 4 }} placeholder='{ "colorText": "#222" }' />
|
<Input.TextArea autoSize={{ minRows: 4 }} placeholder='{ "colorText": "#222" }' />
|
||||||
</Form.Item>
|
</Form.Item>
|
||||||
<Form.Item name={THEME_KEYS.CSS} label={t('Custom CSS')}>
|
<Form.Item name={THEME_KEYS.CSS} label={t('Custom CSS')}>
|
||||||
@@ -402,6 +424,13 @@ export default function SystemSettingsPage() {
|
|||||||
<InputNumber min={1} max={32768} style={{ width: '100%' }} />
|
<InputNumber min={1} max={32768} style={{ width: '100%' }} />
|
||||||
</Form.Item>
|
</Form.Item>
|
||||||
</Card>
|
</Card>
|
||||||
|
<Card title={t('Rerank Model')} style={{ marginTop: 24 }}>
|
||||||
|
{RERANK_CONFIG_KEYS.map(({ key, label }) => (
|
||||||
|
<Form.Item key={key} name={key} label={t(label)}>
|
||||||
|
<Input size="large" />
|
||||||
|
</Form.Item>
|
||||||
|
))}
|
||||||
|
</Card>
|
||||||
<Form.Item style={{ marginTop: 24 }}>
|
<Form.Item style={{ marginTop: 24 }}>
|
||||||
<Button type="primary" htmlType="submit" loading={loading} block>
|
<Button type="primary" htmlType="submit" loading={loading} block>
|
||||||
{t('Save')}
|
{t('Save')}
|
||||||
@@ -428,178 +457,180 @@ export default function SystemSettingsPage() {
|
|||||||
{t('Refresh')}
|
{t('Refresh')}
|
||||||
</Button>
|
</Button>
|
||||||
</div>
|
</div>
|
||||||
{vectorMetaError ? (
|
{vectorSectionLoading ? (
|
||||||
<Alert type="error" showIcon message={vectorMetaError} />
|
<div style={{ display: 'flex', justifyContent: 'center', padding: '24px 0' }}>
|
||||||
) : null}
|
<Spin />
|
||||||
{vectorStatsLoading && !vectorStats ? (
|
</div>
|
||||||
<Spin />
|
) : (
|
||||||
) : vectorStats ? (
|
<>
|
||||||
<Space direction="vertical" size={16} style={{ width: '100%' }}>
|
{vectorMetaError ? (
|
||||||
<div style={{ display: 'flex', flexWrap: 'wrap', gap: 24 }}>
|
<Alert type="error" showIcon message={vectorMetaError} />
|
||||||
<div>
|
) : null}
|
||||||
<div style={{ color: '#888' }}>{t('Collections')}</div>
|
{vectorStats ? (
|
||||||
<div style={{ fontSize: 20, fontWeight: 600 }}>{vectorStats.collection_count}</div>
|
<Space direction="vertical" size={16} style={{ width: '100%' }}>
|
||||||
</div>
|
<div style={{ display: 'flex', flexWrap: 'wrap', gap: 24 }}>
|
||||||
<div>
|
<div>
|
||||||
<div style={{ color: '#888' }}>{t('Vectors')}</div>
|
<div style={{ color: '#888' }}>{t('Collections')}</div>
|
||||||
<div style={{ fontSize: 20, fontWeight: 600 }}>{vectorStats.total_vectors}</div>
|
<div style={{ fontSize: 20, fontWeight: 600 }}>{vectorStats.collection_count}</div>
|
||||||
</div>
|
|
||||||
<div>
|
|
||||||
<div style={{ color: '#888' }}>{t('Database Size')}</div>
|
|
||||||
<div style={{ fontSize: 20, fontWeight: 600 }}>{formatBytes(vectorStats.db_file_size_bytes)}</div>
|
|
||||||
</div>
|
|
||||||
<div>
|
|
||||||
<div style={{ color: '#888' }}>{t('Estimated Memory')}</div>
|
|
||||||
<div style={{ fontSize: 20, fontWeight: 600 }}>{formatBytes(vectorStats.estimated_total_memory_bytes)}</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
{vectorStats.collections.length ? (
|
|
||||||
<Space direction="vertical" style={{ width: '100%' }} size={16}>
|
|
||||||
{vectorStats.collections.map((collection) => (
|
|
||||||
<div key={collection.name} style={{ border: '1px solid #f0f0f0', borderRadius: 8, padding: 16 }}>
|
|
||||||
<Space direction="vertical" size={12} style={{ width: '100%' }}>
|
|
||||||
<div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', flexWrap: 'wrap', gap: 12 }}>
|
|
||||||
<strong>{collection.name}</strong>
|
|
||||||
<span style={{ color: '#888' }}>
|
|
||||||
{collection.is_vector_collection && collection.dimension
|
|
||||||
? `${t('Dimension')}: ${collection.dimension}`
|
|
||||||
: t('Non-vector collection')}
|
|
||||||
</span>
|
|
||||||
</div>
|
|
||||||
<div>{t('Vectors')}: {collection.row_count}</div>
|
|
||||||
{collection.is_vector_collection ? (
|
|
||||||
<div>{t('Estimated memory')}: {formatBytes(collection.estimated_memory_bytes)}</div>
|
|
||||||
) : null}
|
|
||||||
{collection.indexes.length ? (
|
|
||||||
<Space direction="vertical" size={4} style={{ width: '100%' }}>
|
|
||||||
<span>{t('Indexes')}:</span>
|
|
||||||
<ul style={{ paddingLeft: 20, margin: 0 }}>
|
|
||||||
{collection.indexes.map((index) => (
|
|
||||||
<li key={`${collection.name}-${index.index_name || 'default'}`}>
|
|
||||||
<span>{index.index_name || t('Unnamed index')}</span>
|
|
||||||
<span>{' · '}{index.index_type || '-'}</span>
|
|
||||||
<span>{' · '}{index.metric_type || '-'}</span>
|
|
||||||
<span>{' · '}{t('Indexed rows')}: {index.indexed_rows}</span>
|
|
||||||
<span>{' · '}{t('Pending rows')}: {index.pending_index_rows}</span>
|
|
||||||
<span>{' · '}{t('Status')}: {index.state || '-'}</span>
|
|
||||||
</li>
|
|
||||||
))}
|
|
||||||
</ul>
|
|
||||||
</Space>
|
|
||||||
) : null}
|
|
||||||
</Space>
|
|
||||||
</div>
|
</div>
|
||||||
))}
|
<div>
|
||||||
|
<div style={{ color: '#888' }}>{t('Vectors')}</div>
|
||||||
|
<div style={{ fontSize: 20, fontWeight: 600 }}>{vectorStats.total_vectors}</div>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<div style={{ color: '#888' }}>{t('Database Size')}</div>
|
||||||
|
<div style={{ fontSize: 20, fontWeight: 600 }}>{formatBytes(vectorStats.db_file_size_bytes)}</div>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<div style={{ color: '#888' }}>{t('Estimated Memory')}</div>
|
||||||
|
<div style={{ fontSize: 20, fontWeight: 600 }}>{formatBytes(vectorStats.estimated_total_memory_bytes)}</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{vectorStats.collections.length ? (
|
||||||
|
<Space direction="vertical" style={{ width: '100%' }} size={16}>
|
||||||
|
{vectorStats.collections.map((collection) => (
|
||||||
|
<div key={collection.name} style={{ border: '1px solid #f0f0f0', borderRadius: 8, padding: 16 }}>
|
||||||
|
<Space direction="vertical" size={12} style={{ width: '100%' }}>
|
||||||
|
<div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', flexWrap: 'wrap', gap: 12 }}>
|
||||||
|
<strong>{collection.name}</strong>
|
||||||
|
<span style={{ color: '#888' }}>
|
||||||
|
{collection.is_vector_collection && collection.dimension
|
||||||
|
? `${t('Dimension')}: ${collection.dimension}`
|
||||||
|
: t('Non-vector collection')}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<div>{t('Vectors')}: {collection.row_count}</div>
|
||||||
|
{collection.is_vector_collection ? (
|
||||||
|
<div>{t('Estimated memory')}: {formatBytes(collection.estimated_memory_bytes)}</div>
|
||||||
|
) : null}
|
||||||
|
{collection.indexes.length ? (
|
||||||
|
<Space direction="vertical" size={4} style={{ width: '100%' }}>
|
||||||
|
<span>{t('Indexes')}:</span>
|
||||||
|
<ul style={{ paddingLeft: 20, margin: 0 }}>
|
||||||
|
{collection.indexes.map((index) => (
|
||||||
|
<li key={`${collection.name}-${index.index_name || 'default'}`}>
|
||||||
|
<span>{index.index_name || t('Unnamed index')}</span>
|
||||||
|
<span>{' · '}{index.index_type || '-'}</span>
|
||||||
|
<span>{' · '}{index.metric_type || '-'}</span>
|
||||||
|
<span>{' · '}{t('Indexed rows')}: {index.indexed_rows}</span>
|
||||||
|
<span>{' · '}{t('Pending rows')}: {index.pending_index_rows}</span>
|
||||||
|
<span>{' · '}{t('Status')}: {index.state || '-'}</span>
|
||||||
|
</li>
|
||||||
|
))}
|
||||||
|
</ul>
|
||||||
|
</Space>
|
||||||
|
) : null}
|
||||||
|
</Space>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</Space>
|
||||||
|
) : (
|
||||||
|
<Empty description={t('No collections')} />
|
||||||
|
)}
|
||||||
|
<div style={{ color: '#888' }}>
|
||||||
|
{t('Estimated memory is calculated as vectors x dimension x 4 bytes (float32).')}
|
||||||
|
</div>
|
||||||
</Space>
|
</Space>
|
||||||
|
) : vectorStatsError ? (
|
||||||
|
<div style={{ color: '#ff4d4f' }}>{vectorStatsError}</div>
|
||||||
) : (
|
) : (
|
||||||
<Empty description={t('No collections')} />
|
<Empty description={t('No collections')} />
|
||||||
)}
|
)}
|
||||||
<div style={{ color: '#888' }}>
|
<Form
|
||||||
{t('Estimated memory is calculated as vectors x dimension x 4 bytes (float32).')}
|
layout="vertical"
|
||||||
</div>
|
form={vectorConfigForm}
|
||||||
</Space>
|
onFinish={handleVectorConfigSave}
|
||||||
) : vectorStatsError ? (
|
initialValues={{ type: selectedProviderType || undefined, config: {} }}
|
||||||
<div style={{ color: '#ff4d4f' }}>{vectorStatsError}</div>
|
>
|
||||||
) : (
|
<Form.Item
|
||||||
<Empty description={t('No collections')} />
|
name="type"
|
||||||
|
label={t('Database Provider')}
|
||||||
|
rules={[{ required: true, message: t('Please select a provider') }]}
|
||||||
|
>
|
||||||
|
<Select
|
||||||
|
size="large"
|
||||||
|
options={vectorProviders.map((provider) => ({
|
||||||
|
value: provider.type,
|
||||||
|
label: provider.enabled ? provider.label : `${provider.label} (${t('Coming soon')})`,
|
||||||
|
disabled: !provider.enabled,
|
||||||
|
}))}
|
||||||
|
onChange={handleProviderChange}
|
||||||
|
loading={vectorConfigLoading && !vectorProviders.length}
|
||||||
|
/>
|
||||||
|
</Form.Item>
|
||||||
|
{selectedProvider?.description ? (
|
||||||
|
<Alert
|
||||||
|
type="info"
|
||||||
|
showIcon
|
||||||
|
message={t(selectedProvider.description)}
|
||||||
|
style={{ marginBottom: 16 }}
|
||||||
|
/>
|
||||||
|
) : null}
|
||||||
|
{selectedProvider?.config_schema?.map((field) => (
|
||||||
|
<Form.Item
|
||||||
|
key={field.key}
|
||||||
|
name={['config', field.key]}
|
||||||
|
label={t(field.label)}
|
||||||
|
rules={field.required ? [{ required: true, message: t('Please input {label}', { label: t(field.label) }) }] : []}
|
||||||
|
>
|
||||||
|
{field.type === 'password' ? (
|
||||||
|
<Input.Password size="large" placeholder={field.placeholder ? t(field.placeholder) : undefined} />
|
||||||
|
) : (
|
||||||
|
<Input size="large" placeholder={field.placeholder ? t(field.placeholder) : undefined} />
|
||||||
|
)}
|
||||||
|
</Form.Item>
|
||||||
|
))}
|
||||||
|
{selectedProvider && !selectedProvider.enabled ? (
|
||||||
|
<Alert
|
||||||
|
type="warning"
|
||||||
|
showIcon
|
||||||
|
message={t('This provider is not available yet')}
|
||||||
|
style={{ marginBottom: 16 }}
|
||||||
|
/>
|
||||||
|
) : null}
|
||||||
|
<Form.Item>
|
||||||
|
<Space direction="vertical" style={{ width: '100%' }}>
|
||||||
|
<Button
|
||||||
|
type="primary"
|
||||||
|
htmlType="submit"
|
||||||
|
loading={vectorConfigSaving}
|
||||||
|
block
|
||||||
|
disabled={!selectedProvider?.enabled}
|
||||||
|
>
|
||||||
|
{t('Save')}
|
||||||
|
</Button>
|
||||||
|
<Button
|
||||||
|
danger
|
||||||
|
htmlType="button"
|
||||||
|
block
|
||||||
|
onClick={() => {
|
||||||
|
Modal.confirm({
|
||||||
|
title: t('Confirm clear vector database?'),
|
||||||
|
content: t('This will delete all collections irreversibly.'),
|
||||||
|
okText: t('Confirm Clear'),
|
||||||
|
okType: 'danger',
|
||||||
|
cancelText: t('Cancel'),
|
||||||
|
onOk: async () => {
|
||||||
|
try {
|
||||||
|
await vectorDBApi.clearAll();
|
||||||
|
message.success(t('Vector database cleared'));
|
||||||
|
await fetchVectorStats();
|
||||||
|
await fetchVectorMeta();
|
||||||
|
} catch (e: any) {
|
||||||
|
message.error(e.message || t('Clear failed'));
|
||||||
|
}
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{t('Clear Vector DB')}
|
||||||
|
</Button>
|
||||||
|
</Space>
|
||||||
|
</Form.Item>
|
||||||
|
</Form>
|
||||||
|
</>
|
||||||
)}
|
)}
|
||||||
</Space>
|
</Space>
|
||||||
{vectorConfigLoading && !vectorProviders.length ? (
|
|
||||||
<Spin />
|
|
||||||
) : (
|
|
||||||
<Form
|
|
||||||
layout="vertical"
|
|
||||||
form={vectorConfigForm}
|
|
||||||
onFinish={handleVectorConfigSave}
|
|
||||||
initialValues={{ type: selectedProviderType || undefined, config: {} }}
|
|
||||||
>
|
|
||||||
<Form.Item
|
|
||||||
name="type"
|
|
||||||
label={t('Database Provider')}
|
|
||||||
rules={[{ required: true, message: t('Please select a provider') }]}
|
|
||||||
>
|
|
||||||
<Select
|
|
||||||
size="large"
|
|
||||||
options={vectorProviders.map((provider) => ({
|
|
||||||
value: provider.type,
|
|
||||||
label: provider.enabled ? provider.label : `${provider.label} (${t('Coming soon')})`,
|
|
||||||
disabled: !provider.enabled,
|
|
||||||
}))}
|
|
||||||
onChange={handleProviderChange}
|
|
||||||
loading={vectorConfigLoading && !vectorProviders.length}
|
|
||||||
/>
|
|
||||||
</Form.Item>
|
|
||||||
{selectedProvider?.description ? (
|
|
||||||
<Alert
|
|
||||||
type="info"
|
|
||||||
showIcon
|
|
||||||
message={t(selectedProvider.description)}
|
|
||||||
style={{ marginBottom: 16 }}
|
|
||||||
/>
|
|
||||||
) : null}
|
|
||||||
{selectedProvider?.config_schema?.map((field) => (
|
|
||||||
<Form.Item
|
|
||||||
key={field.key}
|
|
||||||
name={['config', field.key]}
|
|
||||||
label={t(field.label)}
|
|
||||||
rules={field.required ? [{ required: true, message: t('Please input {label}', { label: t(field.label) }) }] : []}
|
|
||||||
>
|
|
||||||
{field.type === 'password' ? (
|
|
||||||
<Input.Password size="large" placeholder={field.placeholder ? t(field.placeholder) : undefined} />
|
|
||||||
) : (
|
|
||||||
<Input size="large" placeholder={field.placeholder ? t(field.placeholder) : undefined} />
|
|
||||||
)}
|
|
||||||
</Form.Item>
|
|
||||||
))}
|
|
||||||
{selectedProvider && !selectedProvider.enabled ? (
|
|
||||||
<Alert
|
|
||||||
type="warning"
|
|
||||||
showIcon
|
|
||||||
message={t('This provider is not available yet')}
|
|
||||||
style={{ marginBottom: 16 }}
|
|
||||||
/>
|
|
||||||
) : null}
|
|
||||||
<Form.Item>
|
|
||||||
<Space direction="vertical" style={{ width: '100%' }}>
|
|
||||||
<Button
|
|
||||||
type="primary"
|
|
||||||
htmlType="submit"
|
|
||||||
loading={vectorConfigSaving}
|
|
||||||
block
|
|
||||||
disabled={!selectedProvider?.enabled}
|
|
||||||
>
|
|
||||||
{t('Save')}
|
|
||||||
</Button>
|
|
||||||
<Button
|
|
||||||
danger
|
|
||||||
htmlType="button"
|
|
||||||
block
|
|
||||||
onClick={() => {
|
|
||||||
Modal.confirm({
|
|
||||||
title: t('Confirm clear vector database?'),
|
|
||||||
content: t('This will delete all collections irreversibly.'),
|
|
||||||
okText: t('Confirm Clear'),
|
|
||||||
okType: 'danger',
|
|
||||||
cancelText: t('Cancel'),
|
|
||||||
onOk: async () => {
|
|
||||||
try {
|
|
||||||
await vectorDBApi.clearAll();
|
|
||||||
message.success(t('Vector database cleared'));
|
|
||||||
await fetchVectorStats();
|
|
||||||
await fetchVectorMeta();
|
|
||||||
} catch (e: any) {
|
|
||||||
message.error(e.message || t('Clear failed'));
|
|
||||||
}
|
|
||||||
},
|
|
||||||
});
|
|
||||||
}}
|
|
||||||
>
|
|
||||||
{t('Clear Vector DB')}
|
|
||||||
</Button>
|
|
||||||
</Space>
|
|
||||||
</Form.Item>
|
|
||||||
</Form>
|
|
||||||
)}
|
|
||||||
</Space>
|
</Space>
|
||||||
</Card>
|
</Card>
|
||||||
),
|
),
|
||||||
|
|||||||
Reference in New Issue
Block a user