Merge pull request #3785 from InfinityPacer/feature/cache

fix(cache): enhance tmdb match_web rate-limiting and caching
2026-06-28 19:21:47 +08:00 · 2025-01-22 15:27:08 +08:00
parent 8189de589a 5c7bd95f6b
commit 9afb904d40
2 changed files with 94 additions and 47 deletions
--- a/app/core/cache.py
+++ b/app/core/cache.py
@@ -35,6 +35,17 @@ class CacheBackend(ABC):
        """
        pass

+    @abstractmethod
+    def exists(self, key: str, region: str = DEFAULT_CACHE_REGION) -> bool:
+        """
+        判断缓存键是否存在
+
+        :param key: 缓存的键
+        :param region: 缓存的区
+        :return: 存在返回 True，否则返回 False
+        """
+        pass
+
    @abstractmethod
    def get(self, key: str, region: str = DEFAULT_CACHE_REGION) -> Any:
        """
@@ -130,6 +141,19 @@ class CacheToolsBackend(CacheBackend):
        # 设置缓存值
        region_cache[key] = value

+    def exists(self, key: str, region: str = DEFAULT_CACHE_REGION) -> bool:
+        """
+        判断缓存键是否存在
+
+        :param key: 缓存的键
+        :param region: 缓存的区
+        :return: 存在返回 True，否则返回 False
+        """
+        region_cache = self.__get_region_cache(region)
+        if region_cache is None:
+            return False
+        return key in region_cache
+
    def get(self, key: str, region: str = DEFAULT_CACHE_REGION) -> Any:
        """
        获取缓存的值
@@ -294,6 +318,21 @@ class RedisBackend(CacheBackend):
        except Exception as e:
            logger.error(f"Failed to set key: {key} in region: {region}, error: {e}")

+    def exists(self, key: str, region: str = DEFAULT_CACHE_REGION) -> bool:
+        """
+        判断缓存键是否存在
+
+        :param key: 缓存的键
+        :param region: 缓存的区
+        :return: 存在返回 True，否则返回 False
+        """
+        try:
+            redis_key = self.get_redis_key(region, key)
+            return self.client.exists(redis_key) == 1
+        except Exception as e:
+            logger.error(f"Failed to exists key: {key} region: {region}, error: {e}")
+            return False
+
    def get(self, key: str, region: str = DEFAULT_CACHE_REGION) -> Optional[Any]:
        """
        获取缓存的值
@@ -392,7 +431,7 @@ def cached(region: Optional[str] = None, maxsize: int = 1000, ttl: int = 1800,
    :param maxsize: 缓存的最大条目数，默认值为 1000
    :param ttl: 缓存的存活时间，单位秒，默认值为 1800
    :param skip_none: 跳过 None 缓存，默认为 True
-    :param skip_empty: 跳过空值缓存（如 [], {}, "", set()），默认为 False
+    :param skip_empty: 跳过空值缓存（如 None, [], {}, "", set()），默认为 False
    :return: 装饰器函数
    """

@@ -405,7 +444,7 @@ def cached(region: Optional[str] = None, maxsize: int = 1000, ttl: int = 1800,
        """
        if skip_none and value is None:
            return False
-        # if disable_empty and value in [[], {}, "", set()]:
+        # if skip_empty and value in [None, [], {}, "", set()]:
        if skip_empty and not value:
            return False
        return True
--- a/app/modules/themoviedb/tmdbapi.py
+++ b/app/modules/themoviedb/tmdbapi.py
@@ -8,8 +8,10 @@ from lxml import etree
 from app.core.cache import cached
 from app.core.config import settings
 from app.log import logger
+from app.schemas import APIRateLimitException
 from app.schemas.types import MediaType
 from app.utils.http import RequestUtils
+from app.utils.limit import rate_limit_exponential
 from app.utils.string import StringUtils
 from .tmdbv3api import TMDb, Search, Movie, TV, Season, Episode, Discover, Trending, Person, Collection
 from .tmdbv3api.exceptions import TMDbException
@@ -492,6 +494,7 @@ class TmdbApi:
            return ret_info

    @cached(maxsize=settings.CACHE_CONF["tmdb"], ttl=settings.CACHE_CONF["meta"])
+    @rate_limit_exponential(source="match_tmdb_web", max_wait=1800, enable_logging=True)
    def match_web(self, name: str, mtype: MediaType) -> Optional[dict]:
        """
        搜索TMDB网站，直接抓取结果，结果只有一条时才返回
@@ -504,51 +507,56 @@ class TmdbApi:
            return {}
        logger.info("正在从TheDbMovie网站查询：%s ..." % name)
        tmdb_url = "https://www.themoviedb.org/search?query=%s" % quote(name)
-        res = RequestUtils(timeout=5, ua=settings.USER_AGENT).get_res(url=tmdb_url)
-        if res and res.status_code == 200:
-            html_text = res.text
-            if not html_text:
-                return None
-            try:
-                tmdb_links = []
-                html = etree.HTML(html_text)
-                if mtype == MediaType.TV:
-                    links = html.xpath("//a[@data-id and @data-media-type='tv']/@href")
-                else:
-                    links = html.xpath("//a[@data-id]/@href")
-                for link in links:
-                    if not link or (not link.startswith("/tv") and not link.startswith("/movie")):
-                        continue
-                    if link not in tmdb_links:
-                        tmdb_links.append(link)
-                if len(tmdb_links) == 1:
-                    tmdbinfo = self.get_info(
-                        mtype=MediaType.TV if tmdb_links[0].startswith("/tv") else MediaType.MOVIE,
-                        tmdbid=tmdb_links[0].split("/")[-1])
-                    if tmdbinfo:
-                        if mtype == MediaType.TV and tmdbinfo.get('media_type') != MediaType.TV:
-                            return {}
-                        if tmdbinfo.get('media_type') == MediaType.MOVIE:
-                            logger.info("%s 从WEB识别到 电影：TMDBID=%s, 名称=%s, 上映日期=%s" % (
-                                name,
-                                tmdbinfo.get('id'),
-                                tmdbinfo.get('title'),
-                                tmdbinfo.get('release_date')))
-                        else:
-                            logger.info("%s 从WEB识别到 电视剧：TMDBID=%s, 名称=%s, 首播日期=%s" % (
-                                name,
-                                tmdbinfo.get('id'),
-                                tmdbinfo.get('name'),
-                                tmdbinfo.get('first_air_date')))
-                    return tmdbinfo
-                elif len(tmdb_links) > 1:
-                    logger.info("%s TMDB网站返回数据过多：%s" % (name, len(tmdb_links)))
-                else:
-                    logger.info("%s TMDB网站未查询到媒体信息！" % name)
-            except Exception as err:
-                logger.error(f"从TheDbMovie网站查询出错：{str(err)}")
-                return None
-        return None
+        res = RequestUtils(timeout=5, ua=settings.USER_AGENT, proxies=settings.PROXY).get_res(url=tmdb_url)
+        if res is None:
+            return None
+        if res.status_code == 429:
+            raise APIRateLimitException("触发TheDbMovie网站限流，获取媒体信息失败")
+        if res.status_code != 200:
+            return {}
+        html_text = res.text
+        if not html_text:
+            return {}
+        try:
+            tmdb_links = []
+            html = etree.HTML(html_text)
+            if mtype == MediaType.TV:
+                links = html.xpath("//a[@data-id and @data-media-type='tv']/@href")
+            else:
+                links = html.xpath("//a[@data-id]/@href")
+            for link in links:
+                if not link or (not link.startswith("/tv") and not link.startswith("/movie")):
+                    continue
+                if link not in tmdb_links:
+                    tmdb_links.append(link)
+            if len(tmdb_links) == 1:
+                tmdbinfo = self.get_info(
+                    mtype=MediaType.TV if tmdb_links[0].startswith("/tv") else MediaType.MOVIE,
+                    tmdbid=tmdb_links[0].split("/")[-1])
+                if tmdbinfo:
+                    if mtype == MediaType.TV and tmdbinfo.get('media_type') != MediaType.TV:
+                        return {}
+                    if tmdbinfo.get('media_type') == MediaType.MOVIE:
+                        logger.info("%s 从WEB识别到 电影：TMDBID=%s, 名称=%s, 上映日期=%s" % (
+                            name,
+                            tmdbinfo.get('id'),
+                            tmdbinfo.get('title'),
+                            tmdbinfo.get('release_date')))
+                    else:
+                        logger.info("%s 从WEB识别到 电视剧：TMDBID=%s, 名称=%s, 首播日期=%s" % (
+                            name,
+                            tmdbinfo.get('id'),
+                            tmdbinfo.get('name'),
+                            tmdbinfo.get('first_air_date')))
+                return tmdbinfo
+            elif len(tmdb_links) > 1:
+                logger.info("%s TMDB网站返回数据过多：%s" % (name, len(tmdb_links)))
+            else:
+                logger.info("%s TMDB网站未查询到媒体信息！" % name)
+        except Exception as err:
+            logger.error(f"从TheDbMovie网站查询出错：{str(err)}")
+            return {}
+        return {}

    def get_info(self,
                 mtype: MediaType,