From 3e9f908d7ba8ad7ebf3c94a5b9cf0eb87c382a31 Mon Sep 17 00:00:00 2001 From: huangjianwu Date: Mon, 23 Mar 2026 15:44:25 +0800 Subject: [PATCH] =?UTF-8?q?fix(chat):=20=E6=8C=89=E5=9B=BA=E5=AE=9A?= =?UTF-8?q?=E9=85=8D=E9=A2=9D=E6=A3=80=E7=B4=A2=EF=BC=8C=E7=A1=AE=E4=BF=9D?= =?UTF-8?q?=E4=B8=89=E7=A7=8D=E6=9D=A5=E6=BA=90=E5=9D=87=E8=A2=AB=E5=8F=AC?= =?UTF-8?q?=E5=9B=9E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 之前各来源各取 n_results 条再按距离排序取 top-n, markdown 距离普遍更近导致 transcript 被挤掉。 改为固定配额:meta 1 条、markdown 2 条、transcript 3 条。 Co-Authored-By: Claude Opus 4.6 --- backend/app/services/vector_store.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/backend/app/services/vector_store.py b/backend/app/services/vector_store.py index e938c25..464e9f2 100644 --- a/backend/app/services/vector_store.py +++ b/backend/app/services/vector_store.py @@ -173,10 +173,10 @@ class VectorStoreManager: }) return chunks - def query(self, task_id: str, query_text: str, n_results: int = 5) -> list[dict]: + def query(self, task_id: str, query_text: str, n_results: int = 6) -> list[dict]: """ - 分别从 markdown 和 transcript 各检索,确保两种来源都被召回, - 最后按距离排序返回 top-n。 + 按固定配额从各来源检索:meta 1 条、markdown 2 条、transcript 3 条, + 确保三种来源都被召回。 """ col_name = self._collection_name(task_id) try: @@ -187,21 +187,21 @@ class VectorStoreManager: all_chunks = [] - # 分别从各来源检索 - for source_type in ("meta", "markdown", "transcript"): + # 每种来源的配额 + quotas = {"meta": 1, "markdown": 2, "transcript": 3} + + for source_type, quota in quotas.items(): try: results = collection.query( query_texts=[query_text], - n_results=n_results, + n_results=quota, where={"source_type": source_type}, ) all_chunks.extend(self._parse_results(results)) except Exception: pass - # 按距离排序,取 top-n - all_chunks.sort(key=lambda c: c.get("distance", 999)) - return all_chunks[:n_results] + return all_chunks def delete_index(self, task_id: str) -> None: """删除指定任务的向量索引。"""