mirror of
https://github.com/halfwaystudent/douyin-sparkflow.git
synced 2026-06-27 10:21:24 +08:00
Make friend list refresh resilient
This commit is contained in:
@@ -1,25 +1,27 @@
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
import time
|
||||
|
||||
from core.browser import get_browser
|
||||
|
||||
|
||||
CHAT_PAGE_URL = "https://creator.douyin.com/creator-micro/data/following/chat"
|
||||
FRIENDS_TAB_SELECTOR = 'xpath=//*[@id="sub-app"]/div/div/div[1]/div[2]'
|
||||
TARGET_SELECTOR = (
|
||||
'xpath=//*[@id="sub-app"]/div/div[1]/div[2]/div[2]'
|
||||
'//div[contains(@class, "semi-list-item-body semi-list-item-body-flex-start")]'
|
||||
FRIENDS_TAB_SELECTORS = (
|
||||
'xpath=//*[@id="sub-app"]/div/div/div[1]/div[2]',
|
||||
'xpath=//*[@id="sub-app"]//*[self::div or self::span or self::button][contains(normalize-space(.), "朋友私信") and string-length(normalize-space(.)) <= 20]',
|
||||
'xpath=//*[@id="sub-app"]//*[self::div or self::span or self::button][normalize-space()="朋友"]',
|
||||
)
|
||||
SCROLLABLE_FRIENDS_SELECTOR = (
|
||||
'xpath=//*[@id="sub-app"]/div/div[1]/div[2]/div[2]/div/div/div[3]/div/div/div/ul/div'
|
||||
FRIEND_NAME_SELECTOR = 'xpath=//*[@id="sub-app"]//span[contains(@class, "item-header-name-")]'
|
||||
SCROLLABLE_FRIENDS_SELECTORS = (
|
||||
'xpath=//*[@id="sub-app"]/div/div[1]/div[2]/div[2]/div/div/div[3]/div/div/div/ul/div',
|
||||
'xpath=//*[@id="sub-app"]//ul/div',
|
||||
'xpath=//*[@id="sub-app"]//ul',
|
||||
)
|
||||
NO_MORE_SELECTOR = 'xpath=//div[contains(@class, "no-more-tip-ftdJnu")]'
|
||||
LOADING_SELECTOR = 'xpath=//div[contains(@class, "semi-spin")]'
|
||||
FIRST_FRIEND_SELECTOR = (
|
||||
'xpath=//*[@id="sub-app"]/div/div/div[2]/div[2]/div/div/div[1]/div/div/div/ul/div/div/div[1]/li/div'
|
||||
)
|
||||
FRIEND_NAME_SELECTOR = """xpath=.//span[contains(@class, "item-header-name-")]"""
|
||||
LOGIN_MASK_SELECTORS = [".login-mask", ".login-guide-container", ".login-img-code-wrapper"]
|
||||
EMPTY_LIST_KEYWORDS = ("暂无", "没有", "空空", "还没有")
|
||||
LOGIN_KEYWORDS = ("扫码登录", "登录抖音", "请登录", "登录已过期", "重新登录")
|
||||
|
||||
|
||||
def update_collection_progress(new_names_count, no_more_visible, scroll_moved, idle_rounds, stuck_rounds, idle_limit=5, stuck_limit=2):
|
||||
@@ -30,6 +32,10 @@ def update_collection_progress(new_names_count, no_more_visible, scroll_moved, i
|
||||
|
||||
|
||||
async def _ensure_logged_in(page):
|
||||
current_url = page.url or ""
|
||||
if "login" in current_url or "passport" in current_url:
|
||||
raise RuntimeError("账号登录已失效,请重新扫码登录")
|
||||
|
||||
for selector in LOGIN_MASK_SELECTORS:
|
||||
try:
|
||||
locator = page.locator(selector).first
|
||||
@@ -41,12 +47,115 @@ async def _ensure_logged_in(page):
|
||||
continue
|
||||
|
||||
|
||||
async def collect_friend_names(page):
|
||||
await page.wait_for_selector(FRIENDS_TAB_SELECTOR, timeout=30000)
|
||||
await page.locator(FRIENDS_TAB_SELECTOR).click()
|
||||
async def _body_text(page, limit=600):
|
||||
try:
|
||||
text = await page.locator("body").inner_text(timeout=2000)
|
||||
except Exception:
|
||||
return ""
|
||||
return " ".join(text.split())[:limit]
|
||||
|
||||
await page.wait_for_selector(FIRST_FRIEND_SELECTOR, timeout=30000)
|
||||
await page.locator(FIRST_FRIEND_SELECTOR).click()
|
||||
|
||||
async def _page_diagnosis(page):
|
||||
await _ensure_logged_in(page)
|
||||
body_text = await _body_text(page)
|
||||
if any(keyword in body_text for keyword in LOGIN_KEYWORDS):
|
||||
raise RuntimeError("账号登录已失效或页面要求重新登录,请重新扫码登录")
|
||||
if any(keyword in body_text for keyword in EMPTY_LIST_KEYWORDS):
|
||||
return "页面提示当前没有可读取的朋友私信好友"
|
||||
return f"未等到好友列表。当前URL={page.url},页面提示={body_text or '空'}"
|
||||
|
||||
|
||||
async def _open_friends_tab(page):
|
||||
if await page.locator(FRIEND_NAME_SELECTOR).count() > 0:
|
||||
return
|
||||
|
||||
last_error = None
|
||||
for selector in FRIENDS_TAB_SELECTORS:
|
||||
locator = page.locator(selector).first
|
||||
try:
|
||||
await locator.wait_for(state="visible", timeout=10000)
|
||||
await locator.click(timeout=5000)
|
||||
await asyncio.sleep(1.5)
|
||||
return
|
||||
except Exception as exc:
|
||||
last_error = exc
|
||||
|
||||
raise RuntimeError(f"未找到“朋友私信”入口,可能页面结构变化或账号未登录。最后错误:{last_error}")
|
||||
|
||||
|
||||
async def _wait_for_friend_name_or_empty(page, timeout_ms=45000):
|
||||
deadline = time.monotonic() + timeout_ms / 1000
|
||||
while time.monotonic() < deadline:
|
||||
await _ensure_logged_in(page)
|
||||
names = page.locator(FRIEND_NAME_SELECTOR)
|
||||
if await names.count() > 0:
|
||||
first = names.first
|
||||
try:
|
||||
if await first.is_visible():
|
||||
return True
|
||||
except Exception:
|
||||
return True
|
||||
|
||||
body_text = await _body_text(page, limit=300)
|
||||
if any(keyword in body_text for keyword in EMPTY_LIST_KEYWORDS):
|
||||
return False
|
||||
if any(keyword in body_text for keyword in LOGIN_KEYWORDS):
|
||||
raise RuntimeError("账号登录已失效或页面要求重新登录,请重新扫码登录")
|
||||
|
||||
loading = page.locator(LOADING_SELECTOR).first
|
||||
if await loading.count() > 0 and await loading.is_visible():
|
||||
await asyncio.sleep(1.5)
|
||||
else:
|
||||
await asyncio.sleep(1)
|
||||
|
||||
diagnosis = await _page_diagnosis(page)
|
||||
raise RuntimeError(diagnosis)
|
||||
|
||||
|
||||
async def _collect_visible_friend_names(page):
|
||||
names = []
|
||||
for raw_name in await page.locator(FRIEND_NAME_SELECTOR).all_inner_texts():
|
||||
name = raw_name.strip()
|
||||
if name:
|
||||
names.append(name)
|
||||
return names
|
||||
|
||||
|
||||
async def _find_scrollable_friends_element(page):
|
||||
for selector in SCROLLABLE_FRIENDS_SELECTORS:
|
||||
try:
|
||||
handle = await page.locator(selector).first.element_handle(timeout=2000)
|
||||
if handle:
|
||||
return handle
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
try:
|
||||
handle = await page.evaluate_handle(
|
||||
"""() => {
|
||||
const firstName = document.querySelector('#sub-app span[class*="item-header-name-"]');
|
||||
let node = firstName;
|
||||
while (node && node !== document.body) {
|
||||
const style = window.getComputedStyle(node);
|
||||
const overflow = `${style.overflow} ${style.overflowY}`;
|
||||
if (node.scrollHeight > node.clientHeight + 20 && /(auto|scroll|overlay)/.test(overflow)) {
|
||||
return node;
|
||||
}
|
||||
node = node.parentElement;
|
||||
}
|
||||
return document.scrollingElement || document.documentElement;
|
||||
}"""
|
||||
)
|
||||
return handle.as_element()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
async def collect_friend_names(page):
|
||||
await _open_friends_tab(page)
|
||||
has_friends = await _wait_for_friend_name_or_empty(page)
|
||||
if not has_friends:
|
||||
return []
|
||||
await asyncio.sleep(2)
|
||||
|
||||
found_names = []
|
||||
@@ -55,13 +164,8 @@ async def collect_friend_names(page):
|
||||
stuck_rounds = 0
|
||||
|
||||
while True:
|
||||
target_elements = await page.locator(TARGET_SELECTOR).all()
|
||||
new_names_count = 0
|
||||
for element in target_elements:
|
||||
try:
|
||||
name = (await element.locator(FRIEND_NAME_SELECTOR).inner_text()).strip()
|
||||
except Exception:
|
||||
continue
|
||||
for name in await _collect_visible_friend_names(page):
|
||||
if not name or name in seen_names:
|
||||
continue
|
||||
seen_names.add(name)
|
||||
@@ -76,11 +180,12 @@ async def collect_friend_names(page):
|
||||
if await loading.count() > 0 and await loading.is_visible():
|
||||
await asyncio.sleep(1.5)
|
||||
|
||||
scrollable_element = await page.locator(SCROLLABLE_FRIENDS_SELECTOR).element_handle()
|
||||
scrollable_element = await _find_scrollable_friends_element(page)
|
||||
if not scrollable_element:
|
||||
if found_names:
|
||||
return found_names
|
||||
raise RuntimeError("未找到好友列表滚动容器")
|
||||
diagnosis = await _page_diagnosis(page)
|
||||
raise RuntimeError(f"未找到好友列表滚动容器;{diagnosis}")
|
||||
|
||||
before_top = await page.evaluate("(element) => element.scrollTop", scrollable_element)
|
||||
await page.evaluate("(element) => element.scrollTop += 800", scrollable_element)
|
||||
|
||||
@@ -9,6 +9,14 @@ from pathlib import Path
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
from core.browser import get_browser
|
||||
from core.friends import (
|
||||
FRIEND_NAME_SELECTOR,
|
||||
LOADING_SELECTOR,
|
||||
NO_MORE_SELECTOR,
|
||||
_find_scrollable_friends_element,
|
||||
_open_friends_tab,
|
||||
_wait_for_friend_name_or_empty,
|
||||
)
|
||||
from core.msg_builder import build_message
|
||||
from core.protocol_dispatch import run_protocol_tasks
|
||||
from utils.config import get_config, get_userData, normalize_unique_id, save_userData
|
||||
@@ -147,6 +155,8 @@ def classify_browser_failure(stage, exc):
|
||||
detail = str(exc or "")
|
||||
lowered = detail.lower()
|
||||
|
||||
if "登录" in detail or "login" in lowered or "passport" in lowered:
|
||||
return "login_expired"
|
||||
if "page crashed" in lowered or "target page, context or browser has been closed" in lowered:
|
||||
return "page_crashed"
|
||||
if "timeout" in lowered:
|
||||
@@ -157,9 +167,11 @@ def classify_browser_failure(stage, exc):
|
||||
if stage == "friend_list":
|
||||
return "friend_list_timeout"
|
||||
return "timeout"
|
||||
if "未找到“朋友私信”入口" in detail:
|
||||
return "friend_tab_not_found"
|
||||
if "unable to locate chat input" in lowered:
|
||||
return "chat_input_not_found"
|
||||
if "could not find the friend list scroll container" in lowered:
|
||||
if "could not find the friend list scroll container" in lowered or "未找到好友列表滚动容器" in detail:
|
||||
return "friend_list_container_missing"
|
||||
if "chat input still contains" in lowered:
|
||||
return "send_unconfirmed"
|
||||
@@ -170,27 +182,43 @@ def classify_browser_failure(stage, exc):
|
||||
return "unknown"
|
||||
|
||||
|
||||
async def _click_friend_entry(name_locator, account_name, target_name):
|
||||
click_candidates = (
|
||||
"xpath=ancestor::li[1]",
|
||||
'xpath=ancestor::div[contains(@class, "semi-list-item")][1]',
|
||||
'xpath=ancestor::div[contains(@class, "semi-list-item-body")][1]',
|
||||
'xpath=ancestor::*[@role="listitem"][1]',
|
||||
"xpath=ancestor::div[3]",
|
||||
)
|
||||
|
||||
last_error = None
|
||||
for selector in click_candidates:
|
||||
try:
|
||||
entry = name_locator.locator(selector).first
|
||||
if await entry.count() == 0:
|
||||
continue
|
||||
await entry.scroll_into_view_if_needed(timeout=5000)
|
||||
await entry.click(timeout=5000)
|
||||
return
|
||||
except Exception as exc:
|
||||
last_error = exc
|
||||
|
||||
try:
|
||||
await name_locator.scroll_into_view_if_needed(timeout=5000)
|
||||
await name_locator.click(timeout=5000)
|
||||
except Exception as exc:
|
||||
raise RuntimeError(
|
||||
f"Account {account_name} found target friend {target_name}, but could not click the friend entry: {exc}"
|
||||
) from (last_error or exc)
|
||||
|
||||
|
||||
async def scroll_and_select_user(page, account_name, targets):
|
||||
friends_tab_selector = 'xpath=//*[@id="sub-app"]/div/div/div[1]/div[2]'
|
||||
target_selector = (
|
||||
'xpath=//*[@id="sub-app"]/div/div[1]/div[2]/div[2]'
|
||||
'//div[contains(@class, "semi-list-item-body semi-list-item-body-flex-start")]'
|
||||
)
|
||||
scrollable_friends_selector = (
|
||||
'xpath=//*[@id="sub-app"]/div/div[1]/div[2]/div[2]/div/div/div[3]/div/div/div/ul/div'
|
||||
)
|
||||
no_more_selector = 'xpath=//div[contains(@class, "no-more-tip-ftdJnu")]'
|
||||
loading_selector = 'xpath=//div[contains(@class, "semi-spin")]'
|
||||
first_friend_selector = (
|
||||
'xpath=//*[@id="sub-app"]/div/div/div[2]/div[2]/div/div/div[1]/div/div/div/ul/div/div/div[1]/li/div'
|
||||
)
|
||||
|
||||
logger.debug("Account %s is opening the friends tab", account_name)
|
||||
await page.wait_for_selector(friends_tab_selector)
|
||||
await page.locator(friends_tab_selector).click()
|
||||
|
||||
await page.wait_for_selector(first_friend_selector)
|
||||
await page.locator(first_friend_selector).click()
|
||||
await _open_friends_tab(page)
|
||||
has_friends = await _wait_for_friend_name_or_empty(page, timeout_ms=45000)
|
||||
if not has_friends:
|
||||
logger.warning("Account %s friend list is empty or unavailable", account_name)
|
||||
return
|
||||
await asyncio.sleep(2)
|
||||
|
||||
normalized_targets = {
|
||||
@@ -204,10 +232,15 @@ async def scroll_and_select_user(page, account_name, targets):
|
||||
last_new_friend_at = scan_started_at
|
||||
max_scan_seconds = 300
|
||||
idle_scan_seconds = 120
|
||||
stuck_rounds = 0
|
||||
|
||||
def missing_target_names():
|
||||
return sorted(normalized_targets[item] for item in remaining_targets)
|
||||
|
||||
if not remaining_targets:
|
||||
logger.info("Account %s has no normalized target friends to scan", account_name)
|
||||
return
|
||||
|
||||
while True:
|
||||
now_monotonic = asyncio.get_running_loop().time()
|
||||
if now_monotonic - scan_started_at > max_scan_seconds:
|
||||
@@ -220,12 +253,11 @@ async def scroll_and_select_user(page, account_name, targets):
|
||||
)
|
||||
return
|
||||
|
||||
target_elements = await page.locator(target_selector).all()
|
||||
name_elements = await page.locator(FRIEND_NAME_SELECTOR).all()
|
||||
|
||||
for element in target_elements:
|
||||
for name_locator in name_elements:
|
||||
try:
|
||||
span = element.locator("""xpath=.//span[contains(@class, "item-header-name-")]""")
|
||||
target_name = await span.inner_text()
|
||||
target_name = (await name_locator.inner_text(timeout=3000)).strip()
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
@@ -241,7 +273,7 @@ async def scroll_and_select_user(page, account_name, targets):
|
||||
|
||||
matched_target_name = normalized_targets.get(normalized_target_name)
|
||||
if matched_target_name:
|
||||
await element.click()
|
||||
await _click_friend_entry(name_locator, account_name, target_name)
|
||||
logger.info("Account %s selected target friend %s", account_name, target_name)
|
||||
if matched_target_name != target_name:
|
||||
logger.info(
|
||||
@@ -258,7 +290,8 @@ async def scroll_and_select_user(page, account_name, targets):
|
||||
return
|
||||
break
|
||||
else:
|
||||
if await page.locator(no_more_selector).count() > 0:
|
||||
no_more = page.locator(NO_MORE_SELECTOR).first
|
||||
if await no_more.count() > 0 and await no_more.is_visible():
|
||||
logger.warning(
|
||||
"Account %s reached the end of the friend list. Missing targets: %s",
|
||||
account_name,
|
||||
@@ -277,16 +310,31 @@ async def scroll_and_select_user(page, account_name, targets):
|
||||
)
|
||||
return
|
||||
|
||||
if await page.locator(loading_selector).count() > 0:
|
||||
loading = page.locator(LOADING_SELECTOR).first
|
||||
if await loading.count() > 0 and await loading.is_visible():
|
||||
logger.debug("Account %s is waiting for more friends to load", account_name)
|
||||
await asyncio.sleep(1.5)
|
||||
|
||||
scrollable_element = await page.locator(scrollable_friends_selector).element_handle()
|
||||
scrollable_element = await _find_scrollable_friends_element(page)
|
||||
if not scrollable_element:
|
||||
raise RuntimeError(f"Account {account_name} could not find the friend list scroll container")
|
||||
|
||||
before_top = await page.evaluate("(element) => element.scrollTop", scrollable_element)
|
||||
await page.evaluate("(element) => element.scrollTop += 800", scrollable_element)
|
||||
await asyncio.sleep(1.5)
|
||||
after_top = await page.evaluate("(element) => element.scrollTop", scrollable_element)
|
||||
if after_top > before_top:
|
||||
stuck_rounds = 0
|
||||
else:
|
||||
stuck_rounds += 1
|
||||
if stuck_rounds >= 3:
|
||||
logger.warning(
|
||||
"Account %s friend list stopped scrolling. Missing targets: %s; scannedFriends=%s",
|
||||
account_name,
|
||||
missing_target_names(),
|
||||
len(found_usernames),
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
def _is_manual_run():
|
||||
|
||||
Reference in New Issue
Block a user