feat: accelerate RSS parsing with Rust

This commit is contained in:
jxxghp
2026-05-22 21:31:18 +08:00
parent 052e1ca8e4
commit 4de4044a3e
15 changed files with 467 additions and 102 deletions

View File

@@ -99,13 +99,37 @@ def test_rust_indexer_search_url_keeps_existing_query_and_category():
assert "search_field=imdb0049406" in search_url
def test_rust_filesize_parser_matches_site_units():
def test_rust_rss_parser_extracts_common_rss_and_atom_fields():
"""
Rust 文件大小解析应覆盖站点解析器常见单位
Rust RSS 解析应同时覆盖 RSS item 和 Atom entry 的核心字段
"""
assert rust_accel.parse_filesize("1.5 GB") == 1610612736
assert rust_accel.parse_filesize("2 TiB") == 2199023255552
assert rust_accel.parse_filesize("42") == 42
xml_text = """
<rss><channel>
<item>
<title>Example Torrent</title>
<description><![CDATA[Desc]]></description>
<link>https://example.org/details/1</link>
<enclosure url="https://example.org/download/1.torrent" length="1024" />
<pubDate>Tue, 19 May 2026 08:30:00 GMT</pubDate>
<dc:creator>豆瓣用户</dc:creator>
</item>
<entry>
<title>Atom Torrent</title>
<summary>Atom Desc</summary>
<link href="https://example.org/atom/2" />
<updated>2026-05-19T09:30:00Z</updated>
</entry>
</channel></rss>
"""
items = rust_accel.parse_rss_items(xml_text, 100)
assert items[0]["title"] == "Example Torrent"
assert items[0]["enclosure"] == "https://example.org/download/1.torrent"
assert items[0]["size"] == 1024
assert items[0]["nickname"] == "豆瓣用户"
assert items[1]["title"] == "Atom Torrent"
assert items[1]["enclosure"] == "https://example.org/atom/2"
def test_rust_indexer_page_parser_handles_common_fields():

View File

@@ -16,13 +16,13 @@ def _load_subscribe_chain_class():
module = sys.modules[module_name]
return module, module.SubscribeChain
injected_modules = {}
original_modules = {}
def ensure_module(name: str, module: types.ModuleType):
if name in sys.modules:
return sys.modules[name]
"""临时替换模块依赖,并记录原模块以便加载完成后恢复。"""
if name not in original_modules:
original_modules[name] = sys.modules.get(name)
sys.modules[name] = module
injected_modules[name] = module
return module
chain_module = ensure_module("app.chain", types.ModuleType("app.chain"))
@@ -270,9 +270,15 @@ def _load_subscribe_chain_class():
sys.modules[module_name] = module
assert spec and spec.loader
spec.loader.exec_module(module)
module._injected_modules = injected_modules
for injected_name in injected_modules:
sys.modules.pop(injected_name, None)
module._injected_modules = {
name: sys.modules.get(name)
for name in original_modules
}
for injected_name, original_module in original_modules.items():
if original_module is None:
sys.modules.pop(injected_name, None)
else:
sys.modules[injected_name] = original_module
return module, module.SubscribeChain