mirror of
https://github.com/jxxghp/MoviePilot.git
synced 2026-06-17 05:30:43 +08:00
fix: improve subtitle parsing and matching
This commit is contained in:
@@ -450,7 +450,9 @@ class SiteSpider:
|
||||
return
|
||||
selector = self.fields.get('size', {})
|
||||
item = self._safe_query(torrent, selector)
|
||||
if item:
|
||||
if not item:
|
||||
item = self.__get_default_value(selector)
|
||||
if item is not None and item != "":
|
||||
size_val = item.replace("\n", "").strip()
|
||||
size_val = self.__filter_text(size_val,
|
||||
selector.get('filters'))
|
||||
@@ -492,7 +494,9 @@ class SiteSpider:
|
||||
return
|
||||
selector = self.fields.get('grabs', {})
|
||||
item = self._safe_query(torrent, selector)
|
||||
if item:
|
||||
if not item:
|
||||
item = self.__get_default_value(selector)
|
||||
if item is not None and item != "":
|
||||
grabs_val = item.split("/")[0]
|
||||
grabs_val = grabs_val.replace(",", "")
|
||||
grabs_val = self.__filter_text(grabs_val, selector.get('filters'))
|
||||
@@ -646,6 +650,8 @@ class SiteSpider:
|
||||
return
|
||||
item = self._safe_query(torrent, selector)
|
||||
value = self.__filter_text(item, selector.get('filters'))
|
||||
if not value:
|
||||
value = self.__get_default_value(selector)
|
||||
if value is not None:
|
||||
self.torrents_info[field_name] = value
|
||||
|
||||
@@ -773,6 +779,8 @@ class SiteSpider:
|
||||
):
|
||||
self.__get_subtitle_field(subtitle, field_name)
|
||||
self.__fill_subtitle_ids()
|
||||
if not self.torrents_info.get("title") or not self.torrents_info.get("enclosure"):
|
||||
return {}
|
||||
return self.torrents_info.copy() if self.torrents_info else {}
|
||||
except Exception as err:
|
||||
logger.error("%s 字幕搜索出现错误:%s" % (self.indexername, str(err)))
|
||||
@@ -831,6 +839,9 @@ class SiteSpider:
|
||||
|
||||
@staticmethod
|
||||
def __attribute_or_text(item: Any, selector: Optional[dict]) -> list:
|
||||
"""
|
||||
获取查询结果的属性或文本列表。
|
||||
"""
|
||||
if not selector:
|
||||
return item
|
||||
if not item:
|
||||
@@ -843,6 +854,9 @@ class SiteSpider:
|
||||
|
||||
@staticmethod
|
||||
def __index(items: Optional[list], selector: Optional[dict]) -> Optional[str]:
|
||||
"""
|
||||
按配置下标读取查询结果。
|
||||
"""
|
||||
if not items:
|
||||
return None
|
||||
if selector:
|
||||
@@ -858,6 +872,33 @@ class SiteSpider:
|
||||
item = items[0]
|
||||
return item
|
||||
|
||||
@staticmethod
|
||||
def __fallback_rows(html_doc: Any, selector_text: Optional[str]) -> Optional[list]:
|
||||
"""
|
||||
为 PyQuery 不支持的列表选择器提供等价行集合。
|
||||
"""
|
||||
if selector_text in ("tr:has(td.rowfollow)", "table tr:has(td.rowfollow)"):
|
||||
return [
|
||||
row
|
||||
for row in html_doc("tr")
|
||||
if len(PyQuery(row).children("td.rowfollow")) >= 3
|
||||
and len(PyQuery(row).children("td.rowfollow").find("table")) == 0
|
||||
and len(PyQuery(row).children("td.rowfollow").find('a[href*="downloadsubs.php"]')) > 0
|
||||
]
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def __get_default_value(selector: Optional[dict]) -> Optional[str]:
|
||||
"""
|
||||
读取字段默认值,兼容历史配置里的 defualt_value 拼写。
|
||||
"""
|
||||
if not selector:
|
||||
return None
|
||||
value = selector.get("default_value", selector.get("defualt_value"))
|
||||
if value is None:
|
||||
return None
|
||||
return str(value)
|
||||
|
||||
def parse(self, html_text: str) -> List[dict]:
|
||||
"""
|
||||
解析整个页面
|
||||
@@ -896,8 +937,12 @@ class SiteSpider:
|
||||
html_doc = PyQuery(html_text)
|
||||
# 种子筛选器
|
||||
torrents_selector = self.list.get('selector', '')
|
||||
rows = html_doc(torrents_selector)
|
||||
fallback_rows = self.__fallback_rows(html_doc, torrents_selector)
|
||||
if fallback_rows is not None:
|
||||
rows = fallback_rows
|
||||
# 遍历种子html列表
|
||||
for i, torn in enumerate(html_doc(torrents_selector)):
|
||||
for i, torn in enumerate(rows):
|
||||
if i >= int(self.result_num):
|
||||
break
|
||||
# 创建临时PyQuery对象进行解析
|
||||
|
||||
Reference in New Issue
Block a user