From c02b7eecc94a0723d011929cc0de34e95613db20 Mon Sep 17 00:00:00 2001
From: hotyue <52734432+hotyue@users.noreply.github.com>
Date: Wed, 22 Apr 2026 13:35:20 +0000
Subject: [PATCH] =?UTF-8?q?feat(scripts):=20=E5=BC=95=E5=85=A5=20v3.6.4=20?=
 =?UTF-8?q?=E6=B4=BB=E4=BD=93=E6=96=B0=E9=97=BB=E8=9E=8D=E5=90=88=E5=BC=95?=
 =?UTF-8?q?=E6=93=8E=EF=BC=8C=E9=87=8D=E6=9E=84=20Action=20=E6=B5=81?=
 =?UTF-8?q?=E6=B0=B4=E7=BA=BF=E5=AE=9E=E7=8E=B0=E5=86=B7=E7=83=AD=E6=95=B0?=
 =?UTF-8?q?=E6=8D=AE=E5=90=88=E5=B9=B6=E5=8E=9F=E5=AD=90=E6=8F=90=E4=BA=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/daily_keywords.yml |  21 +++---
 scripts/fetch_trust_urls.py          | 102 +++++++++++++++++++++++++++
 2 files changed, 115 insertions(+), 8 deletions(-)
 create mode 100644 scripts/fetch_trust_urls.py

diff --git a/.github/workflows/daily_keywords.yml b/.github/workflows/daily_keywords.yml
index 2644910..4045837 100644
--- a/.github/workflows/daily_keywords.yml
+++ b/.github/workflows/daily_keywords.yml
@@ -1,8 +1,8 @@
-name: Daily Trends Factory
+name: Daily Data Factory
 
 on:
   schedule:
-    # 每天 UTC 18:00 运行 (北京时间凌晨 02:00)
+    # 每天 UTC 18:00 (北京时间凌晨 02:00) 执行，赶在节点凌晨3点更新前造好子弹
     - cron: '0 18 * * *'
   workflow_dispatch: 
 
@@ -10,7 +10,7 @@ permissions:
   contents: write
 
 jobs:
-  update-trends:
+  update-data:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout Code
@@ -23,15 +23,20 @@ jobs:
         with:
           python-version: '3.10'
 
-      - name: Execute Trends Engine
+      - name: Execute Trends Engine (搜索词库)
         run: python scripts/fetch_trends.py
 
-      - name: Commit and Push
+      - name: Execute Trust URL Engine (活体新闻流融合)
+        run: python scripts/fetch_trust_urls.py
+
+      - name: Commit and Push All Data
         run: |
           git config --global user.name "github-actions[bot]"
           git config --global user.email "github-actions[bot]@users.noreply.github.com"
           
+          # 一揽子添加搜索词库和地区白名单的变化
           git add data/keywords/
+          git add data/regions/
           
           # 防御机制：如果没有新数据，就静默退出，不产生空提交
           if git diff --staged --quiet; then
@@ -39,6 +44,6 @@ jobs:
             exit 0
           fi
           
-          # 策略：放弃危险的 amend 强制覆盖，采用带日期的标准安全提交
-          git commit -m "chore(data): 🤖 自动机兵：刷新全战区热点词库 [$(date +'%Y-%m-%d')]"
-          git push origin main
\ No newline at end of file
+          # 策略：将两路数据的更新合并为一个原子提交
+          git commit -m "chore(data): 🤖 自动机兵：同步全战区热点词库与活体新闻流 [$(date +'%Y-%m-%d')]"
+          git push origin main
diff --git a/scripts/fetch_trust_urls.py b/scripts/fetch_trust_urls.py
new file mode 100644
index 0000000..09a08c1
--- /dev/null
+++ b/scripts/fetch_trust_urls.py
@@ -0,0 +1,102 @@
+import urllib.request
+import xml.etree.ElementTree as ET
+import os
+import json
+import random
+
+# ================== [路径防弹装甲] ==================
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+PROJECT_ROOT = os.path.dirname(SCRIPT_DIR)
+REGIONS_DIR = os.path.join(PROJECT_ROOT, "data", "regions")
+# ====================================================
+
+HEADERS = {
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
+}
+
+# 全球骨干新闻 RSS 监听矩阵
+RSS_FEEDS = {
+    "US": ["http://rss.cnn.com/rss/cnn_topstories.rss", "https://feeds.npr.org/1001/rss.xml"],
+    "UK": ["http://feeds.bbci.co.uk/news/rss.xml"],
+    "AU": ["https://www.abc.net.au/news/feed/51120/rss.xml"],
+    "CA": ["https://www.cbc.ca/cmlink/rss-topstories"],
+    "DE": ["https://www.tagesschau.de/xml/rss2"],
+    "FR": ["https://www.france24.com/fr/rss"],
+    "ES": ["https://feeds.elpais.com/mrss-s/pages/ep/site/elpais.com/portada"],
+    "JP": ["https://news.yahoo.co.jp/rss/topics/top-picks.xml"],
+    "HK": ["https://rthk.hk/rss/expressnews_cnews.xml"],
+    "TW": ["https://news.pts.org.tw/xml/news.xml"],
+    "KR": ["https://www.yonhapnewstv.co.kr/category/news/headline/feed/"],
+    "SG": ["https://www.channelnewsasia.com/api/v1/rss-outbound-feed?_format=xml"],
+    "NL": ["https://feeds.nos.nl/nosnieuwsalgemeen"],
+    "VN": ["https://vnexpress.net/rss/tin-moi-nhat.rss"]
+}
+
+def fetch_rss_links(region_code, max_items=15):
+    """抓取该战区最新的 RSS 新闻链接"""
+    feeds = RSS_FEEDS.get(region_code, [])
+    if not feeds:
+        return []
+    
+    links = []
+    for url in feeds:
+        try:
+            req = urllib.request.Request(url, headers=HEADERS)
+            with urllib.request.urlopen(req, timeout=10) as response:
+                xml_data = response.read()
+                root = ET.fromstring(xml_data)
+                for item in root.findall('.//item'):
+                    link = item.find('link')
+                    if link is not None and link.text:
+                        clean_link = link.text.strip()
+                        if clean_link.startswith('http'):
+                            links.append(clean_link)
+        except Exception as e:
+            print(f"⚠️ [{region_code}] RSS 抓取异常 ({url}): {e}")
+            
+    # 去重并截取最新
+    return list(set(links))[:max_items]
+
+def process_json_file(file_path, region_code):
+    """融合静态基石与动态新闻"""
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+            
+        trust_mod = data.get("trust_module", {})
+        if not trust_mod or "static_urls" not in trust_mod:
+            return
+            
+        static_urls = trust_mod.get("static_urls", [])
+        
+        # 抓取今日该战区的活体新闻流
+        daily_news_urls = fetch_rss_links(region_code)
+        
+        # 战术混合：基石(保证高权重) + 新闻(保证活体动态)
+        combined_urls = static_urls + daily_news_urls
+        
+        # 深度洗牌，打破机械顺序特征
+        combined_urls = list(set(combined_urls))
+        random.shuffle(combined_urls)
+        
+        # 覆写回供 Agent 拉取的 white_urls
+        trust_mod["white_urls"] = combined_urls
+        data["trust_module"] = trust_mod
+        
+        with open(file_path, 'w', encoding='utf-8') as f:
+            json.dump(data, f, ensure_ascii=False, indent=2)
+            
+        print(f"✅ [信用融合] {os.path.basename(file_path)}: 骨干 {len(static_urls)} 条 + 活体 {len(daily_news_urls)} 条")
+        
+    except Exception as e:
+        print(f"❌ [处理失败] {file_path}: {e}")
+
+if __name__ == '__main__':
+    print("========== 启动 IP-Sentinel 活体新闻流融合引擎 ==========")
+    for root_dir, _, files in os.walk(REGIONS_DIR):
+        for file in files:
+            if file.endswith(".json"):
+                file_path = os.path.join(root_dir, file)
+                region_code = os.path.relpath(file_path, REGIONS_DIR).split(os.sep)[0]
+                process_json_file(file_path, region_code)
+    print("========== 融合引擎执行完毕 ==========")