fix etree

2026-06-05 23:49:54 +08:00 · 2025-07-08 11:00:38 +08:00
parent a8aedba6ff
commit d336bcbf1f
17 changed files with 897 additions and 749 deletions
--- a/app/modules/indexer/parser/unit3d.py
+++ b/app/modules/indexer/parser/unit3d.py
@@ -14,21 +14,24 @@ class Unit3dSiteUserInfo(SiteParserBase):
    def _parse_user_base_info(self, html_text: str):
        html_text = self._prepare_html_text(html_text)
        html = etree.HTML(html_text)
+        try:
+            tmps = html.xpath('//a[contains(@href, "/users/") and contains(@href, "settings")]/@href')
+            if tmps:
+                user_name_match = re.search(r"/users/(.+)/settings", tmps[0])
+                if user_name_match and user_name_match.group().strip():
+                    self.username = user_name_match.group(1)
+                    self._torrent_seeding_page = f"/users/{self.username}/active?perPage=100&client=&seeding=include"
+                    self._user_detail_page = f"/users/{self.username}"

-        tmps = html.xpath('//a[contains(@href, "/users/") and contains(@href, "settings")]/@href')
-        if tmps:
-            user_name_match = re.search(r"/users/(.+)/settings", tmps[0])
-            if user_name_match and user_name_match.group().strip():
-                self.username = user_name_match.group(1)
-                self._torrent_seeding_page = f"/users/{self.username}/active?perPage=100&client=&seeding=include"
-                self._user_detail_page = f"/users/{self.username}"
-
-        tmps = html.xpath('//a[contains(@href, "bonus/earnings")]')
-        if tmps:
-            bonus_text = tmps[0].xpath("string(.)")
-            bonus_match = re.search(r"([\d,.]+)", bonus_text)
-            if bonus_match and bonus_match.group(1).strip():
-                self.bonus = StringUtils.str_float(bonus_match.group(1))
+            tmps = html.xpath('//a[contains(@href, "bonus/earnings")]')
+            if tmps:
+                bonus_text = tmps[0].xpath("string(.)")
+                bonus_match = re.search(r"([\d,.]+)", bonus_text)
+                if bonus_match and bonus_match.group(1).strip():
+                    self.bonus = StringUtils.str_float(bonus_match.group(1))
+        finally:
+            if html is not None:
+                del html

    def _parse_site_page(self, html_text: str):
        pass
@@ -40,21 +43,25 @@ class Unit3dSiteUserInfo(SiteParserBase):
        :return:
        """
        html = etree.HTML(html_text)
-        if not StringUtils.is_valid_html_element(html):
-            return None
+        try:
+            if not StringUtils.is_valid_html_element(html):
+                return None

-        # 用户等级
-        user_levels_text = html.xpath('//div[contains(@class, "content")]//span[contains(@class, "badge-user")]/text()')
-        if user_levels_text:
-            self.user_level = user_levels_text[0].strip()
+            # 用户等级
+            user_levels_text = html.xpath('//div[contains(@class, "content")]//span[contains(@class, "badge-user")]/text()')
+            if user_levels_text:
+                self.user_level = user_levels_text[0].strip()

-        # 加入日期
-        join_at_text = html.xpath('//div[contains(@class, "content")]//h4[contains(text(), "注册日期") '
-                                  'or contains(text(), "註冊日期") '
-                                  'or contains(text(), "Registration date")]/text()')
-        if join_at_text:
-            self.join_at = StringUtils.unify_datetime_str(
-                join_at_text[0].replace('注册日期', '').replace('註冊日期', '').replace('Registration date', ''))
+            # 加入日期
+            join_at_text = html.xpath('//div[contains(@class, "content")]//h4[contains(text(), "注册日期") '
+                                      'or contains(text(), "註冊日期") '
+                                      'or contains(text(), "Registration date")]/text()')
+            if join_at_text:
+                self.join_at = StringUtils.unify_datetime_str(
+                    join_at_text[0].replace('注册日期', '').replace('註冊日期', '').replace('Registration date', ''))
+        finally:
+            if html is not None:
+                del html

    def _parse_user_torrent_seeding_info(self, html_text: str, multi_page: Optional[bool] = False) -> Optional[str]:
        """
@@ -64,44 +71,48 @@ class Unit3dSiteUserInfo(SiteParserBase):
        :return: 下页地址
        """
        html = etree.HTML(html_text)
-        if not StringUtils.is_valid_html_element(html):
-            return None
+        try:
+            if not StringUtils.is_valid_html_element(html):
+                return None

-        size_col = 9
-        seeders_col = 2
-        # 搜索size列
-        if html.xpath('//thead//th[contains(@class,"size")]'):
-            size_col = len(html.xpath('//thead//th[contains(@class,"size")][1]/preceding-sibling::th')) + 1
-        # 搜索seeders列
-        if html.xpath('//thead//th[contains(@class,"seeders")]'):
-            seeders_col = len(html.xpath('//thead//th[contains(@class,"seeders")]/preceding-sibling::th')) + 1
+            size_col = 9
+            seeders_col = 2
+            # 搜索size列
+            if html.xpath('//thead//th[contains(@class,"size")]'):
+                size_col = len(html.xpath('//thead//th[contains(@class,"size")][1]/preceding-sibling::th')) + 1
+            # 搜索seeders列
+            if html.xpath('//thead//th[contains(@class,"seeders")]'):
+                seeders_col = len(html.xpath('//thead//th[contains(@class,"seeders")]/preceding-sibling::th')) + 1

-        page_seeding = 0
-        page_seeding_size = 0
-        page_seeding_info = []
-        seeding_sizes = html.xpath(f'//tr[position()]/td[{size_col}]')
-        seeding_seeders = html.xpath(f'//tr[position()]/td[{seeders_col}]')
-        if seeding_sizes and seeding_seeders:
-            page_seeding = len(seeding_sizes)
+            page_seeding = 0
+            page_seeding_size = 0
+            page_seeding_info = []
+            seeding_sizes = html.xpath(f'//tr[position()]/td[{size_col}]')
+            seeding_seeders = html.xpath(f'//tr[position()]/td[{seeders_col}]')
+            if seeding_sizes and seeding_seeders:
+                page_seeding = len(seeding_sizes)

-            for i in range(0, len(seeding_sizes)):
-                size = StringUtils.num_filesize(seeding_sizes[i].xpath("string(.)").strip())
-                seeders = StringUtils.str_int(seeding_seeders[i].xpath("string(.)").strip())
+                for i in range(0, len(seeding_sizes)):
+                    size = StringUtils.num_filesize(seeding_sizes[i].xpath("string(.)").strip())
+                    seeders = StringUtils.str_int(seeding_seeders[i].xpath("string(.)").strip())

-                page_seeding_size += size
-                page_seeding_info.append([seeders, size])
+                    page_seeding_size += size
+                    page_seeding_info.append([seeders, size])

-        self.seeding += page_seeding
-        self.seeding_size += page_seeding_size
-        self.seeding_info.extend(page_seeding_info)
+            self.seeding += page_seeding
+            self.seeding_size += page_seeding_size
+            self.seeding_info.extend(page_seeding_info)

-        # 是否存在下页数据
-        next_page = None
-        next_pages = html.xpath('//ul[@class="pagination"]/li[contains(@class,"active")]/following-sibling::li')
-        if next_pages and len(next_pages) > 1:
-            page_num = next_pages[0].xpath("string(.)").strip()
-            if page_num.isdigit():
-                next_page = f"{self._torrent_seeding_page}&page={page_num}"
+            # 是否存在下页数据
+            next_page = None
+            next_pages = html.xpath('//ul[@class="pagination"]/li[contains(@class,"active")]/following-sibling::li')
+            if next_pages and len(next_pages) > 1:
+                page_num = next_pages[0].xpath("string(.)").strip()
+                if page_num.isdigit():
+                    next_page = f"{self._torrent_seeding_page}&page={page_num}"
+        finally:
+            if html is not None:
+                del html

        return next_page