mirror of
https://github.com/JefferyHcool/BiliNote.git
synced 2026-05-06 20:42:52 +08:00
fix(chat): 按固定配额检索,确保三种来源均被召回
之前各来源各取 n_results 条再按距离排序取 top-n, markdown 距离普遍更近导致 transcript 被挤掉。 改为固定配额:meta 1 条、markdown 2 条、transcript 3 条。 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -173,10 +173,10 @@ class VectorStoreManager:
|
||||
})
|
||||
return chunks
|
||||
|
||||
def query(self, task_id: str, query_text: str, n_results: int = 5) -> list[dict]:
|
||||
def query(self, task_id: str, query_text: str, n_results: int = 6) -> list[dict]:
|
||||
"""
|
||||
分别从 markdown 和 transcript 各检索,确保两种来源都被召回,
|
||||
最后按距离排序返回 top-n。
|
||||
按固定配额从各来源检索:meta 1 条、markdown 2 条、transcript 3 条,
|
||||
确保三种来源都被召回。
|
||||
"""
|
||||
col_name = self._collection_name(task_id)
|
||||
try:
|
||||
@@ -187,21 +187,21 @@ class VectorStoreManager:
|
||||
|
||||
all_chunks = []
|
||||
|
||||
# 分别从各来源检索
|
||||
for source_type in ("meta", "markdown", "transcript"):
|
||||
# 每种来源的配额
|
||||
quotas = {"meta": 1, "markdown": 2, "transcript": 3}
|
||||
|
||||
for source_type, quota in quotas.items():
|
||||
try:
|
||||
results = collection.query(
|
||||
query_texts=[query_text],
|
||||
n_results=n_results,
|
||||
n_results=quota,
|
||||
where={"source_type": source_type},
|
||||
)
|
||||
all_chunks.extend(self._parse_results(results))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 按距离排序,取 top-n
|
||||
all_chunks.sort(key=lambda c: c.get("distance", 999))
|
||||
return all_chunks[:n_results]
|
||||
return all_chunks
|
||||
|
||||
def delete_index(self, task_id: str) -> None:
|
||||
"""删除指定任务的向量索引。"""
|
||||
|
||||
Reference in New Issue
Block a user