fix: replace fast jieba dependency

This commit is contained in:
jxxghp
2026-05-23 12:59:33 +08:00
parent 00fc8b2f53
commit 134c441754
6 changed files with 68 additions and 14 deletions

View File

@@ -69,7 +69,7 @@ class QueryTransferHistoryTool(MoviePilotTool):
async with AsyncSessionFactory() as db:
# 处理标题搜索
if title:
# 使用 fast-jieba 分词处理标题
# 使用统一分词封装处理标题,便于替换底层实现
words = jieba_cut(title, HMM=False)
title_search = "%".join(words)
# 查询记录

View File

@@ -1,10 +1,10 @@
"""中文分词工具。"""
from fast_jieba import cut as fast_jieba_cut
from jieba_next import cut as jieba_next_cut
def cut(text: str, HMM: bool = True, cut_all: bool = False) -> list[str]:
"""
使用 fast-jieba 执行中文分词,并兼容 jieba.cut 的常用参数名。
使用 jieba-next 执行中文分词,并兼容 jieba.cut 的常用参数名。
"""
return fast_jieba_cut(text, hmm=HMM, cut_all=cut_all)
return list(jieba_next_cut(text, HMM=HMM, cut_all=cut_all))

44
jieba/__init__.py Normal file
View File

@@ -0,0 +1,44 @@
"""jieba 兼容入口。"""
from collections.abc import Iterator
from typing import Any
import jieba_next as _jieba_next
from jieba_next import cut_for_search as _cut_for_search
from jieba_next import lcut as _lcut
from jieba_next import lcut_for_search as _lcut_for_search
def cut(sentence: str, cut_all: bool = False, HMM: bool = True, use_paddle: bool = False) -> Iterator[str]:
"""
兼容旧 jieba.cut 入口,底层委托给 jieba-next 的 Rust 加速实现。
"""
return _jieba_next.cut(sentence, cut_all=cut_all, HMM=HMM)
def lcut(sentence: str, cut_all: bool = False, HMM: bool = True, use_paddle: bool = False) -> list[str]:
"""
兼容旧 jieba.lcut 入口,保持返回列表的调用习惯。
"""
return _lcut(sentence, cut_all=cut_all, HMM=HMM)
def cut_for_search(sentence: str, HMM: bool = True) -> Iterator[str]:
"""
兼容旧 jieba.cut_for_search 入口,用于搜索模式分词。
"""
return _cut_for_search(sentence, HMM=HMM)
def lcut_for_search(sentence: str, HMM: bool = True) -> list[str]:
"""
兼容旧 jieba.lcut_for_search 入口,用于搜索模式分词列表。
"""
return _lcut_for_search(sentence, HMM=HMM)
def __getattr__(name: str) -> Any:
"""
将未显式封装的 jieba 属性回退到 jieba-next减少旧调用面的迁移成本。
"""
return getattr(_jieba_next, name)

View File

@@ -64,7 +64,7 @@ pywebpush~=2.0.3
aiosqlite~=0.21.0
psycopg2-binary~=2.9.10
asyncpg~=0.30.0
fast-jieba~=0.4.0
jieba-next~=1.0.0rc1
rsa~=4.9
redis~=6.2.0
async_timeout~=5.0.1; python_full_version < "3.11.3"

View File

@@ -1,9 +0,0 @@
from app.utils.jieba import cut
def test_cut_accepts_legacy_hmm_argument():
"""验证兼容封装仍支持旧 jieba.cut 的 HMM 参数名。"""
words = cut("台湾后台测试", HMM=False)
assert "".join(words) == "台湾后台测试"
assert "后台" in words

19
tests/test_jieba_utils.py Normal file
View File

@@ -0,0 +1,19 @@
import jieba
from app.utils.jieba import cut
def test_cut_accepts_legacy_hmm_argument():
"""验证兼容封装仍支持旧 jieba.cut 的 HMM 参数名。"""
words = cut("台湾后台测试", HMM=False)
assert "".join(words) == "台湾后台测试"
assert "后台" in words
def test_legacy_jieba_import_uses_compat_entrypoint():
"""验证插件仍可通过旧 jieba.cut 入口使用主程序分词实现。"""
words = list(jieba.cut("台湾后台测试", HMM=False))
assert "".join(words) == "台湾后台测试"
assert "后台" in words