fix: render indexer jinja fields in rust

This commit is contained in:
jxxghp
2026-05-23 01:02:08 +08:00
parent efdb4d1b28
commit da54f3a302
2 changed files with 94 additions and 34 deletions

View File

@@ -37,8 +37,9 @@ static NUMERIC_FACTOR_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(\d+\.?\d*)").
static FIELD_EXPR_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"^fields(?:\.([A-Za-z0-9_]+)|\[\s*['"]([^'"]+)['"]\s*\])$"#).unwrap()
});
static JINJA_EXPR_RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"\{\{-?\s*(.*?)\s*-?\}\}"#).unwrap());
static FIELD_REF_RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"fields(?:\.([A-Za-z0-9_]+)|\[\s*['"]([^'"]+)['"]\s*\])"#).unwrap());
static JINJA_EXPR_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r#"\{\{-?\s*(.*?)\s*-?\}\}"#).unwrap());
static JINJA_TAG_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r#"\{%-?\s*(.*?)\s*-?%\}"#).unwrap());
enum RowParseResult {
@@ -371,7 +372,7 @@ fn parse_indexer_row(
Ok(RowParseResult::Item(output.into()))
}
/// 解析标题字段,支持直接 selector 和常见的 title_default/title_optional 模板
/// 解析标题字段,支持直接 selector 和按模板引用字段渲染 title.text
fn parse_title(
py: Python<'_>,
row: ElementRef<'_>,
@@ -384,23 +385,12 @@ fn parse_title(
let mut title = if selector.contains("selector")? {
safe_query(row, &selector)?
} else if let Some(template) = get_optional_string(&selector, "text")? {
let Some(default_selector) = get_field_dict(fields, "title_default")? else {
return Ok(false);
};
let title_default = safe_query(row, &default_selector)?.unwrap_or_default();
let title_optional =
if let Some(optional_selector) = get_field_dict(fields, "title_optional")? {
safe_query(row, &optional_selector)?.unwrap_or_default()
} else {
String::new()
};
let Some(rendered) = render_known_template(
&template,
&[
("title_default", title_default.as_str()),
("title_optional", title_optional.as_str()),
],
) else {
let values = collect_template_field_values(row, fields, &template)?;
let refs: Vec<(&str, &str)> = values
.iter()
.map(|(key, value)| (key.as_str(), value.as_str()))
.collect();
let Some(rendered) = render_known_template(&template, &refs) else {
return Ok(false);
};
Some(rendered)
@@ -414,7 +404,7 @@ fn parse_title(
Ok(true)
}
/// 解析描述字段,支持直接 selector 和常见 description 模板
/// 解析描述字段,支持直接 selector 和按模板引用字段渲染 description.text
fn parse_description(
py: Python<'_>,
row: ElementRef<'_>,
@@ -427,18 +417,7 @@ fn parse_description(
let mut description = if selector.contains("selector")? || selector.contains("selectors")? {
safe_query(row, &selector)?
} else if let Some(template) = get_optional_string(&selector, "text")? {
let mut values = Vec::new();
for key in [
"tags",
"subject",
"description_free_forever",
"description_normal",
] {
if let Some(field_selector) = get_field_dict(fields, key)? {
let value = safe_query(row, &field_selector)?.unwrap_or_default();
values.push((key.to_string(), value));
}
}
let values = collect_template_field_values(row, fields, &template)?;
let refs: Vec<(&str, &str)> = values
.iter()
.map(|(key, value)| (key.as_str(), value.as_str()))
@@ -457,6 +436,33 @@ fn parse_description(
Ok(true)
}
/// 按 Jinja 模板实际引用的 fields 字段提取当前行数据,避免把模板能力绑死在固定字段名上。
fn collect_template_field_values(
row: ElementRef<'_>,
fields: &Bound<'_, PyDict>,
template: &str,
) -> PyResult<Vec<(String, String)>> {
let mut keys = Vec::new();
for captures in FIELD_REF_RE.captures_iter(template) {
let Some(key) = captures.get(1).or_else(|| captures.get(2)) else {
continue;
};
let key = key.as_str();
if !keys.iter().any(|item: &String| item == key) {
keys.push(key.to_string());
}
}
let mut values = Vec::new();
for key in keys {
if let Some(field_selector) = get_field_dict(fields, &key)? {
let value = safe_query(row, &field_selector)?.unwrap_or_default();
values.push((key, value));
}
}
Ok(values)
}
/// 解析普通文本字段。
fn parse_plain_field(
py: Python<'_>,
@@ -1112,7 +1118,11 @@ fn eval_field_atom(expression: &str, values: &[(&str, &str)]) -> Option<String>
return Some(value);
}
let key = parse_field_key(expression)?;
Some(get_template_value(values, &key).unwrap_or_default().to_string())
Some(
get_template_value(values, &key)
.unwrap_or_default()
.to_string(),
)
}
/// 解析单引号或双引号字符串字面量。

View File

@@ -249,6 +249,56 @@ def test_rust_indexer_page_parser_renders_common_title_template():
assert [item["title"] for item in torrents] == ["Optional Name", "Default Fallback"]
def test_rust_indexer_page_parser_renders_literal_title_template_without_default_field():
"""
Rust 普通 indexer 页面解析应在没有 title_default 时渲染 title_optional 的纯文本兜底模板。
"""
spider = SiteSpider(
indexer={
"id": "demo",
"name": "Demo",
"domain": "https://example.org/",
"search": {"paths": [{"path": "torrents.php"}]},
"torrents": {
"list": {"selector": "tr.torrent"},
"fields": {
"title_optional": {
"selector": "a.title",
"attribute": "title",
"optional": True,
},
"title": {
"text": (
"{% if fields['title_optional'] %}"
"{{ fields['title_optional'] }}"
"{% else %}"
"For All Mankind S05 2019 2160p ATVP WEB-DL "
"DDP5.1 Atmos DV H 265-HHWEB [新]"
"{% endif %}"
)
},
"download": {"selector": "a.dl", "attribute": "href"},
},
},
},
)
html = """
<table>
<tr class="torrent">
<td><a class="title" title="" href="/details/1">Ignored</a></td>
<td><a class="dl" href="/download/1">DL</a></td>
</tr>
</table>
"""
torrents = spider.parse(html)
assert torrents == [{
"title": "For All Mankind S05 2019 2160p ATVP WEB-DL DDP5.1 Atmos DV H 265-HHWEB [新]",
"enclosure": "https://example.org/download/1",
}]
def test_rust_indexer_page_parser_renders_common_description_templates():
"""
Rust 普通 indexer 页面解析应兼容站点构建项目里的 description 字段模板。