mirror of
https://github.com/jxxghp/MoviePilot.git
synced 2026-06-03 14:39:56 +08:00
fix: replace fast jieba dependency
This commit is contained in:
@@ -69,7 +69,7 @@ class QueryTransferHistoryTool(MoviePilotTool):
|
||||
async with AsyncSessionFactory() as db:
|
||||
# 处理标题搜索
|
||||
if title:
|
||||
# 使用 fast-jieba 分词处理标题。
|
||||
# 使用统一分词封装处理标题,便于替换底层实现。
|
||||
words = jieba_cut(title, HMM=False)
|
||||
title_search = "%".join(words)
|
||||
# 查询记录
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
"""中文分词工具。"""
|
||||
|
||||
from fast_jieba import cut as fast_jieba_cut
|
||||
from jieba_next import cut as jieba_next_cut
|
||||
|
||||
|
||||
def cut(text: str, HMM: bool = True, cut_all: bool = False) -> list[str]:
|
||||
"""
|
||||
使用 fast-jieba 执行中文分词,并兼容 jieba.cut 的常用参数名。
|
||||
使用 jieba-next 执行中文分词,并兼容 jieba.cut 的常用参数名。
|
||||
"""
|
||||
return fast_jieba_cut(text, hmm=HMM, cut_all=cut_all)
|
||||
return list(jieba_next_cut(text, HMM=HMM, cut_all=cut_all))
|
||||
|
||||
44
jieba/__init__.py
Normal file
44
jieba/__init__.py
Normal file
@@ -0,0 +1,44 @@
|
||||
"""jieba 兼容入口。"""
|
||||
|
||||
from collections.abc import Iterator
|
||||
from typing import Any
|
||||
|
||||
import jieba_next as _jieba_next
|
||||
from jieba_next import cut_for_search as _cut_for_search
|
||||
from jieba_next import lcut as _lcut
|
||||
from jieba_next import lcut_for_search as _lcut_for_search
|
||||
|
||||
|
||||
def cut(sentence: str, cut_all: bool = False, HMM: bool = True, use_paddle: bool = False) -> Iterator[str]:
|
||||
"""
|
||||
兼容旧 jieba.cut 入口,底层委托给 jieba-next 的 Rust 加速实现。
|
||||
"""
|
||||
return _jieba_next.cut(sentence, cut_all=cut_all, HMM=HMM)
|
||||
|
||||
|
||||
def lcut(sentence: str, cut_all: bool = False, HMM: bool = True, use_paddle: bool = False) -> list[str]:
|
||||
"""
|
||||
兼容旧 jieba.lcut 入口,保持返回列表的调用习惯。
|
||||
"""
|
||||
return _lcut(sentence, cut_all=cut_all, HMM=HMM)
|
||||
|
||||
|
||||
def cut_for_search(sentence: str, HMM: bool = True) -> Iterator[str]:
|
||||
"""
|
||||
兼容旧 jieba.cut_for_search 入口,用于搜索模式分词。
|
||||
"""
|
||||
return _cut_for_search(sentence, HMM=HMM)
|
||||
|
||||
|
||||
def lcut_for_search(sentence: str, HMM: bool = True) -> list[str]:
|
||||
"""
|
||||
兼容旧 jieba.lcut_for_search 入口,用于搜索模式分词列表。
|
||||
"""
|
||||
return _lcut_for_search(sentence, HMM=HMM)
|
||||
|
||||
|
||||
def __getattr__(name: str) -> Any:
|
||||
"""
|
||||
将未显式封装的 jieba 属性回退到 jieba-next,减少旧调用面的迁移成本。
|
||||
"""
|
||||
return getattr(_jieba_next, name)
|
||||
@@ -64,7 +64,7 @@ pywebpush~=2.0.3
|
||||
aiosqlite~=0.21.0
|
||||
psycopg2-binary~=2.9.10
|
||||
asyncpg~=0.30.0
|
||||
fast-jieba~=0.4.0
|
||||
jieba-next~=1.0.0rc1
|
||||
rsa~=4.9
|
||||
redis~=6.2.0
|
||||
async_timeout~=5.0.1; python_full_version < "3.11.3"
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
from app.utils.jieba import cut
|
||||
|
||||
|
||||
def test_cut_accepts_legacy_hmm_argument():
|
||||
"""验证兼容封装仍支持旧 jieba.cut 的 HMM 参数名。"""
|
||||
words = cut("台湾后台测试", HMM=False)
|
||||
|
||||
assert "".join(words) == "台湾后台测试"
|
||||
assert "后台" in words
|
||||
19
tests/test_jieba_utils.py
Normal file
19
tests/test_jieba_utils.py
Normal file
@@ -0,0 +1,19 @@
|
||||
import jieba
|
||||
|
||||
from app.utils.jieba import cut
|
||||
|
||||
|
||||
def test_cut_accepts_legacy_hmm_argument():
|
||||
"""验证兼容封装仍支持旧 jieba.cut 的 HMM 参数名。"""
|
||||
words = cut("台湾后台测试", HMM=False)
|
||||
|
||||
assert "".join(words) == "台湾后台测试"
|
||||
assert "后台" in words
|
||||
|
||||
|
||||
def test_legacy_jieba_import_uses_compat_entrypoint():
|
||||
"""验证插件仍可通过旧 jieba.cut 入口使用主程序分词实现。"""
|
||||
words = list(jieba.cut("台湾后台测试", HMM=False))
|
||||
|
||||
assert "".join(words) == "台湾后台测试"
|
||||
assert "后台" in words
|
||||
Reference in New Issue
Block a user