diff --git a/packages/boss-auto-browse-and-chat/chat-handler.mjs b/packages/boss-auto-browse-and-chat/chat-handler.mjs index febdc6c..97d7334 100644 --- a/packages/boss-auto-browse-and-chat/chat-handler.mjs +++ b/packages/boss-auto-browse-and-chat/chat-handler.mjs @@ -66,7 +66,7 @@ export async function viewCandidateDetail (frame, candidateItem, options = {}) { if (getInterceptedData) { const intercepted = getInterceptedData() - const resumeResult = await getResumeData(frame, intercepted) + const resumeResult = await getResumeData(frame, intercepted, { getCapturedText: options.getCapturedText }) if (resumeResult.source === 'api' && resumeResult.data) { resumeSource = 'api' resumeText = typeof resumeResult.data === 'string' ? resumeResult.data : JSON.stringify(resumeResult.data) diff --git a/packages/boss-auto-browse-and-chat/chat-page-processor.mjs b/packages/boss-auto-browse-and-chat/chat-page-processor.mjs index 5d77d37..3cf5963 100644 --- a/packages/boss-auto-browse-and-chat/chat-page-processor.mjs +++ b/packages/boss-auto-browse-and-chat/chat-page-processor.mjs @@ -323,6 +323,7 @@ export default async function startBossChatPageProcess (hooksFromCaller, options page: existingPage, getCapturedText, clearCapturedText, + peekCapturedText, jobId = null, retryCandidate = null, processContext = null @@ -636,7 +637,9 @@ export default async function startBossChatPageProcess (hooksFromCaller, options let stableCount = 0 while (Date.now() < canvasDeadline) { await new Promise(r => setTimeout(r, POLL_INTERVAL_MS)) - const currentCount = await page.evaluate(() => (window.__canvasCapturedText || []).length) + const currentCount = typeof peekCapturedText === 'function' + ? await peekCapturedText(page) + : await page.evaluate(() => (window.__canvasCapturedText || []).length) if (currentCount > 0 && currentCount === lastCount) { stableCount++ if (stableCount >= STABLE_POLLS_NEEDED) break diff --git a/packages/boss-auto-browse-and-chat/humanMouse.mjs b/packages/boss-auto-browse-and-chat/humanMouse.mjs index 90ef438..2139643 100644 --- a/packages/boss-auto-browse-and-chat/humanMouse.mjs +++ b/packages/boss-auto-browse-and-chat/humanMouse.mjs @@ -1,111 +1,150 @@ -/** - * 拟人鼠标轨迹封装(招聘端专用) - * - * BOSS 对招聘端鼠标移动轨迹进行埋点,直接 page.click() 或 page.mouse.click(x,y) - * 的"瞬移"方式容易被识别为脚本。本模块封装 ghost-cursor,以贝塞尔曲线生成拟人 - * 移动路径,替换所有在招聘端页面上的点击操作。 - * - * 用法: - * import { createHumanCursor } from './humanMouse.mjs' - * const cursor = await createHumanCursor(page) - * await cursor.click(selector) // 先沿轨迹移动,再点击 - * await cursor.move(selector) // 仅移动,不点击 - */ - -/** - * 为给定 Puppeteer page 创建拟人鼠标 cursor。 - * 内部使用 ghost-cursor;若 ghost-cursor 不可用(如包未安装), - * 则 fallback 到普通 page.click(),并打印警告。 - * - * @param {import('puppeteer').Page} page - Puppeteer 页面实例 - * @returns {Promise<{ - * click: (selectorOrPos: string | {x: number, y: number}) => Promise, - * move: (selectorOrPos: string | {x: number, y: number}) => Promise - * }>} - */ -export async function createHumanCursor (page) { - let ghostCursorCreate - try { - const mod = await import('ghost-cursor') - // ghost-cursor 同时支持 ESM default export 和命名 export - ghostCursorCreate = mod.createCursor ?? mod.default?.createCursor - } catch { - ghostCursorCreate = null - } - - if (ghostCursorCreate) { - const cursor = ghostCursorCreate(page) - - /** - * 将 selector 字符串或 ElementHandle 解析成 {x, y} 坐标。 - * ghost-cursor 的 click/move 只接受 string selector 或 ElementHandle, - * 传 {x,y} 坐标对象会被误当 ElementHandle 调 element.remoteObject() 崩溃。 - * 统一在封装层解析成坐标,再用 moveTo({x,y}) + page.mouse.click(x,y) 执行。 - */ - const resolvePos = async (selectorOrPos) => { - if (typeof selectorOrPos === 'string') { - const el = await page.$(selectorOrPos) - if (!el) throw new Error(`[humanMouse] element not found: ${selectorOrPos}`) - const box = await el.boundingBox() - if (!box) throw new Error(`[humanMouse] element has no bounding box: ${selectorOrPos}`) - return { x: box.x + box.width / 2, y: box.y + box.height / 2 } - } - // ElementHandle(有 boundingBox 方法) - if (selectorOrPos && typeof selectorOrPos.boundingBox === 'function') { - const box = await selectorOrPos.boundingBox() - if (!box) throw new Error('[humanMouse] ElementHandle has no bounding box') - return { x: box.x + box.width / 2, y: box.y + box.height / 2 } - } - // 已是 {x, y} 坐标对象 - return selectorOrPos - } - - return { - /** - * 沿拟人轨迹移动到目标后点击。使用 moveTo({x,y}) + page.mouse.click(x,y) - * 规避 ghost-cursor 传坐标/ElementHandle 时调 element.evaluate 的崩溃问题。 - * @param {string | {x: number, y: number} | import('puppeteer').ElementHandle} selectorOrPos - */ - async click (selectorOrPos) { - const pos = await resolvePos(selectorOrPos) - await cursor.moveTo(pos) - await page.mouse.click(pos.x, pos.y) - }, - /** - * 沿拟人轨迹移动到目标(不点击) - * @param {string | {x: number, y: number} | import('puppeteer').ElementHandle} selectorOrPos - */ - async move (selectorOrPos) { - const pos = await resolvePos(selectorOrPos) - await cursor.moveTo(pos) - } - } - } - - // Fallback: ghost-cursor 未安装时退化为普通点击(打印警告) - console.warn('[humanMouse] ghost-cursor 未安装,退化为普通 page.click()。建议安装 ghost-cursor 以规避 BOSS 鼠标轨迹埋点检测。') - return { - async click (selectorOrPos) { - if (typeof selectorOrPos === 'string') { - await page.click(selectorOrPos) - } else if (selectorOrPos && typeof selectorOrPos.x === 'number') { - await page.mouse.click(selectorOrPos.x, selectorOrPos.y) - } - }, - async move (selectorOrPos) { - if (typeof selectorOrPos === 'string') { - try { - const el = await page.$(selectorOrPos) - if (el) { - const box = await el.boundingBox() - if (box) { - await page.mouse.move(box.x + box.width / 2, box.y + box.height / 2) - } - } - } catch (_) { /* ignore */ } - } else if (selectorOrPos && typeof selectorOrPos.x === 'number') { - await page.mouse.move(selectorOrPos.x, selectorOrPos.y) - } - } - } -} +/** + * 拟人鼠标轨迹封装(招聘端专用) + * + * BOSS 对招聘端鼠标移动轨迹进行埋点,直接 page.click() 或 page.mouse.click(x,y) + * 的"瞬移"方式容易被识别为脚本。本模块封装 ghost-cursor,以贝塞尔曲线生成拟人 + * 移动路径,替换所有在招聘端页面上的点击操作。 + * + * 用法: + * import { createHumanCursor, randomizeInitialCursorPosition } from './humanMouse.mjs' + * const cursor = await createHumanCursor(page) + * await randomizeInitialCursorPosition(page) + * await cursor.click(selector) // 先沿轨迹移动,再点击 + * await cursor.move(selector) // 仅移动,不点击 + */ + +// 模块级缓存:首次成功 preflight 后避免重复 import +let cachedGhostCursorCreate = null + +/** + * 预检查 ghost-cursor 是否可用,返回其 createCursor 函数。 + * 失败时抛出明确错误,避免静默退化为 page.click() 这种"以为隐身实则裸奔"的最坏情况。 + * + * @returns {Promise} ghost-cursor 的 createCursor 函数 + */ +export async function preflightGhostCursor () { + if (cachedGhostCursorCreate) return cachedGhostCursorCreate + let createCursor + try { + const mod = await import('ghost-cursor') + // ghost-cursor 同时支持 ESM default export 和命名 export + createCursor = mod.createCursor ?? mod.default?.createCursor + } catch (e) { + throw new Error( + 'GHOST_CURSOR_UNAVAILABLE: ghost-cursor failed to load — refusing to run with bot-like clicks. Reinstall dependencies (pnpm -F @geekgeekrun/boss-auto-browse-and-chat install).' + ) + } + if (typeof createCursor !== 'function') { + throw new Error( + 'GHOST_CURSOR_UNAVAILABLE: ghost-cursor failed to load — refusing to run with bot-like clicks. Reinstall dependencies (pnpm -F @geekgeekrun/boss-auto-browse-and-chat install).' + ) + } + cachedGhostCursorCreate = createCursor + return createCursor +} + +/** + * 在 box 的中心 60% 区域内随机一个落点(默认 centerBiasFraction=0.3,即中心 ±30%)。 + * 避免每次点击都落在精确几何中心,这本身就是脚本特征。 + * + * @param {{x: number, y: number, width: number, height: number}} box + * @param {number} [centerBiasFraction=0.3] + * @returns {{x: number, y: number}} + */ +function randomizePointInBox (box, centerBiasFraction = 0.3) { + const minXFrac = 0.5 - centerBiasFraction + const maxXFrac = 0.5 + centerBiasFraction + const minYFrac = 0.5 - centerBiasFraction + const maxYFrac = 0.5 + centerBiasFraction + const xFrac = minXFrac + Math.random() * (maxXFrac - minXFrac) + const yFrac = minYFrac + Math.random() * (maxYFrac - minYFrac) + return { + x: box.x + box.width * xFrac, + y: box.y + box.height * yFrac + } +} + +/** + * 为给定 Puppeteer page 创建拟人鼠标 cursor。 + * 内部强依赖 ghost-cursor;若不可用直接抛错(fail-fast),不再静默退化。 + * + * @param {import('puppeteer').Page} page - Puppeteer 页面实例 + * @returns {Promise<{ + * click: (selectorOrPos: string | {x: number, y: number}) => Promise, + * move: (selectorOrPos: string | {x: number, y: number}) => Promise + * }>} + */ +export async function createHumanCursor (page) { + const ghostCursorCreate = await preflightGhostCursor() + const cursor = ghostCursorCreate(page) + + /** + * 将 selector 字符串或 ElementHandle 解析成 {x, y} 坐标。 + * ghost-cursor 的 click/move 只接受 string selector 或 ElementHandle, + * 传 {x,y} 坐标对象会被误当 ElementHandle 调 element.remoteObject() 崩溃。 + * 统一在封装层解析成坐标,再用 moveTo({x,y}) + page.mouse.click(x,y) 执行。 + * 对 selector / ElementHandle 输入会在中心 60% 范围内随机落点; + * 对显式 {x,y} 输入保持原样(调用方已选定精确坐标)。 + */ + const resolvePos = async (selectorOrPos) => { + if (typeof selectorOrPos === 'string') { + const el = await page.$(selectorOrPos) + if (!el) throw new Error(`[humanMouse] element not found: ${selectorOrPos}`) + const box = await el.boundingBox() + if (!box) throw new Error(`[humanMouse] element has no bounding box: ${selectorOrPos}`) + return randomizePointInBox(box) + } + // ElementHandle(有 boundingBox 方法) + if (selectorOrPos && typeof selectorOrPos.boundingBox === 'function') { + const box = await selectorOrPos.boundingBox() + if (!box) throw new Error('[humanMouse] ElementHandle has no bounding box') + return randomizePointInBox(box) + } + // 已是 {x, y} 坐标对象,调用方已选定精确坐标,不再随机化 + return selectorOrPos + } + + return { + /** + * 沿拟人轨迹移动到目标后点击。使用 moveTo({x,y}) + page.mouse.click(x,y) + * 规避 ghost-cursor 传坐标/ElementHandle 时调 element.evaluate 的崩溃问题。 + * 以约 0.5 概率先做一次轻微 overshoot 移动,模拟真实用户在按钮附近犹豫/减速。 + * @param {string | {x: number, y: number} | import('puppeteer').ElementHandle} selectorOrPos + */ + async click (selectorOrPos) { + const pos = await resolvePos(selectorOrPos) + if (Math.random() < 0.5) { + const vp = page.viewport() || { width: 1280, height: 720 } + const overshoot = { + x: Math.max(1, Math.min(vp.width - 1, pos.x + (Math.random() * 60 - 30))), + y: Math.max(1, Math.min(vp.height - 1, pos.y + (Math.random() * 30 - 15))) + } + await cursor.moveTo(overshoot) + } + await cursor.moveTo(pos) + await page.mouse.click(pos.x, pos.y) + }, + /** + * 沿拟人轨迹移动到目标(不点击) + * @param {string | {x: number, y: number} | import('puppeteer').ElementHandle} selectorOrPos + */ + async move (selectorOrPos) { + const pos = await resolvePos(selectorOrPos) + await cursor.moveTo(pos) + } + } +} + +/** + * 将鼠标移动到 viewport 内一个随机位置,避免每次会话都从 (0,0) 起步这一明显特征。 + * 由集成方在合适时机(如打开页面后)显式调用,createHumanCursor 不会自动调用它。 + * + * @param {import('puppeteer').Page} page + */ +export async function randomizeInitialCursorPosition (page) { + // Move cursor to a random viewport position (avoids the (0,0) start signature) + const viewport = page.viewport() || { width: 1280, height: 720 } + const x = 200 + Math.floor(Math.random() * (viewport.width - 400)) + const y = 100 + Math.floor(Math.random() * (viewport.height - 200)) + await page.mouse.move(x, y, { steps: 5 + Math.floor(Math.random() * 10) }) +} diff --git a/packages/boss-auto-browse-and-chat/index.mjs b/packages/boss-auto-browse-and-chat/index.mjs index 3aef3c2..ba51b33 100644 --- a/packages/boss-auto-browse-and-chat/index.mjs +++ b/packages/boss-auto-browse-and-chat/index.mjs @@ -18,6 +18,9 @@ import { parseCandidateList, filterCandidates, scrollAndLoadMore } from './candi import { processCandidate, checkDailyLimit, clickNotInterested } from './chat-handler.mjs' import { dismissBlockingOverlays } from './dialog-dismisser.mjs' import { setLevel, debug as logDebug, info as logInfo, warn as logWarn, error as logError } from './logger.mjs' +import { preflightGhostCursor, randomizeInitialCursorPosition } from './humanMouse.mjs' +import { buildRecruiterLaunchOptions } from './launch-options.mjs' +import { checkpointRiskControl } from './risk-detector.mjs' export { default as startBossChatPageProcess } from './chat-page-processor.mjs' @@ -54,7 +57,9 @@ export async function initPuppeteer () { puppeteer.use(StealthPlugin()) puppeteer.use(LaodengPlugin()) puppeteer.use(AnonymizeUaPlugin({ makeWindows: false })) - logDebug('[boss-auto-browse] initPuppeteer: 插件已注册') + // ghost-cursor preflight:fail-fast,避免后续静默退化为裸 page.click() + await preflightGhostCursor() + logDebug('[boss-auto-browse] initPuppeteer: 插件已注册(含 ghost-cursor preflight)') return { puppeteer, StealthPlugin, @@ -106,17 +111,14 @@ const localStoragePageUrl = 'https://www.zhipin.com/desktop/' */ export async function launchBrowserAndNavigateToChat () { if (!puppeteer) await initPuppeteer() - const headless = process.env.HEADLESS === '1' - const browser = await puppeteer.launch({ - headless, - ignoreHTTPSErrors: true, - protocolTimeout: 120000, - defaultViewport: { width: 1440, height: 900 - 140 } - }) + const launchOpts = await buildRecruiterLaunchOptions() + const browser = await puppeteer.launch(launchOpts) const page = (await browser.pages())[0] + await randomizeInitialCursorPosition(page).catch(() => {}) const bossCookies = readStorageFile('boss-cookies.json') const bossLocalStorage = readStorageFile('boss-local-storage.json') - if (Array.isArray(bossCookies) && bossCookies.length > 0) { + // persistProfile=true 时 profile 已持久化 cookies,跳过注入避免用过期文件覆盖有效 session + if (!launchOpts.userDataDir && Array.isArray(bossCookies) && bossCookies.length > 0) { await page.setCookie(...bossCookies) } await setDomainLocalStorage(browser, localStoragePageUrl, bossLocalStorage || {}) @@ -251,20 +253,12 @@ export default async function startBossAutoBrowse (hooksFromCaller, opts = {}) { } else { await hooks.beforeBrowserLaunch?.promise?.() - const headlessEnv = process.env.HEADLESS - const headless = headlessEnv === '1' - logDebug('[boss-auto-browse] 即将启动浏览器', { headless, HEADLESS_env: headlessEnv ?? null }) - browser = await puppeteer.launch({ - headless, - ignoreHTTPSErrors: true, - protocolTimeout: 120000, - defaultViewport: { - width: 1440, - height: 900 - 140 - } - }) + const launchOpts = await buildRecruiterLaunchOptions() + logDebug('[boss-auto-browse] 即将启动浏览器', { headless: launchOpts.headless, persistProfile: !!launchOpts.userDataDir }) + browser = await puppeteer.launch(launchOpts) page = (await browser.pages())[0] + await randomizeInitialCursorPosition(page).catch(() => {}) await hooks.afterBrowserLaunch?.promise?.() } @@ -276,7 +270,9 @@ export default async function startBossAutoBrowse (hooksFromCaller, opts = {}) { // 直接导航到推荐牛人页(注入 Cookie / localStorage 后 goto;复用浏览器时若已在推荐页可跳过 goto) // ----------------------------------------------------------------------- await hooks.beforeNavigateToRecommend?.promise?.() - if (Array.isArray(bossCookies) && bossCookies.length > 0) { + // persistProfile=true 时 profile 已持久化 cookies,跳过注入避免用过期文件覆盖有效 session + const persistProfile = (readConfigFile('boss-recruiter.json') || {})?.advanced?.persistProfile === true + if (!persistProfile && Array.isArray(bossCookies) && bossCookies.length > 0) { await page.setCookie(...bossCookies) } await setDomainLocalStorage(browser, localStoragePageUrl, bossLocalStorage || {}) @@ -577,10 +573,16 @@ export default async function startBossAutoBrowse (hooksFromCaller, opts = {}) { logInfo('[boss-auto-browse] ✓ 已向', candidate.geekName, '发送招呼(本次共', chatCount, '人)') } else { logInfo('[boss-auto-browse] ✗', candidate.geekName, '开聊失败:', chatResult.reason) - if (chatResult.reason === 'DAILY_LIMIT_REACHED' || chatResult.reason === 'RISK_CONTROL') { + if (chatResult.reason === 'DAILY_LIMIT_REACHED') { break mainLoop } + // 'RISK_CONTROL' 落到下面统一 checkpoint 处理 } + + // 每位候选人处理完都做一次 checkpoint:检测到验证则在循环内等待用户完成,避免崩出 catch + 3s 重试导致连环触发 + // 不传 expectedUrlPrefix:仅依赖 !detectRiskControl 判断完成,避免 URL query-params 导致误判超时 + const cpStatus = await checkpointRiskControl(page, { log: logWarn }) + if (cpStatus === 'timed-out') break mainLoop } // e. 滚动加载 / 翻页(在 iframe frame 内操作) diff --git a/packages/boss-auto-browse-and-chat/launch-options.mjs b/packages/boss-auto-browse-and-chat/launch-options.mjs new file mode 100644 index 0000000..c7ed14d --- /dev/null +++ b/packages/boss-auto-browse-and-chat/launch-options.mjs @@ -0,0 +1,69 @@ +/** + * boss-recruiter.json `advanced` section schema: + * { + * "advanced": { + * "persistProfile": false // opt-in: persist Chromium profile across launches (better anti-detection; + * // BUT cannot run BOSS in system Chrome simultaneously) + * } + * } + */ + +import path from 'node:path' +import fs from 'node:fs' +import crypto from 'node:crypto' +import { readConfigFile, storageFilePath } from './runtime-file-utils.mjs' + +const VIEWPORT_POOL = [ + { w: 1366, h: 768 }, + { w: 1440, h: 900 - 140 }, + { w: 1536, h: 864 }, + { w: 1600, h: 900 }, + { w: 1680, h: 1050 - 150 } +] + +const DEFAULT_VIEWPORT = { width: 1440, height: 760 } + +function pickViewportForPath(seed) { + const digest = crypto.createHash('md5').update(seed).digest() + const intVal = digest.readInt32BE(0) + const idx = Math.abs(intVal) % VIEWPORT_POOL.length + const picked = VIEWPORT_POOL[idx] + return { width: picked.w, height: picked.h } +} + +/** + * Build the puppeteer.launch() options object for the recruiter side. + * Reads boss-recruiter.json's `advanced` section for opt-in features. + * + * @param {object} [overrides] - shallow-merged onto the result (e.g. { headless: false } for force) + * @returns {Promise} + */ +export async function buildRecruiterLaunchOptions(overrides = {}) { + const cfg = readConfigFile('boss-recruiter.json') || {} + const advanced = cfg.advanced || {} + const persistProfile = advanced.persistProfile === true + + const headless = process.env.HEADLESS === '1' + + let userDataDir + let viewport + if (persistProfile) { + userDataDir = path.join(storageFilePath, 'boss-chrome-profile') + fs.mkdirSync(userDataDir, { recursive: true }) + viewport = pickViewportForPath(userDataDir) + } else { + viewport = { ...DEFAULT_VIEWPORT } + } + + const args = ['--lang=zh-CN', '--disable-blink-features=AutomationControlled'] + + const opts = { + headless, + ignoreHTTPSErrors: true, + protocolTimeout: 120000, + defaultViewport: viewport, + args: [...args] + } + if (userDataDir) opts.userDataDir = userDataDir + return { ...opts, ...overrides } +} diff --git a/packages/boss-auto-browse-and-chat/resume-extractor.mjs b/packages/boss-auto-browse-and-chat/resume-extractor.mjs index d44f01a..cd417a0 100644 --- a/packages/boss-auto-browse-and-chat/resume-extractor.mjs +++ b/packages/boss-auto-browse-and-chat/resume-extractor.mjs @@ -141,103 +141,134 @@ export function parseGeekInfoFromIntercepted (interceptedMap) { // Canvas 文字 Hook(与 laodeng 兼容)— 非 BOSS 自带,可能被反爬检测,沟通页请用 API 拦截 // --------------------------------------------------------------------------- +const CANVAS_HOOK_DEBUG = process.env.GEEKGEEKRUN_CANVAS_HOOK_DEBUG === '1' + /** - * 在页面上通过 evaluateOnNewDocument 注入 Canvas fillText hook,将绘制文字收集到主页面 window.__canvasCapturedText。 + * 在页面上通过 evaluateOnNewDocument 注入 Canvas fillText hook,将绘制文字收集到主页面随机命名的 marker 属性上。 * * 实现原理: * - evaluateOnNewDocument 会在主页面和每一个 iframe 中各执行一次。 * - 在线简历 iframe 带有 sandbox 属性且不含 allow-same-origin,主页面无法访问其 contentWindow, * 因此必须在 iframe 自身的执行上下文内直接 hook CanvasRenderingContext2D.prototype.fillText。 * - iframe 内 hook 到的文字通过 window.top.postMessage 批量发回主页面(同 origin 或跨 origin 均可用)。 - * - 主页面监听 message 事件并累积到 window.__canvasCapturedText。 + * - 主页面监听 message 事件并累积到随机命名的 window 属性。 + * + * 反检测:marker 属性名(capturedTextProp / messageKey / hookedFlag)每次调用本函数时随机生成, + * 不同 session 不同;同时通过 laodeng.registerFakeNativeSource 让 fillText 包装函数的 toString 返回原生外观。 * * @param {import('puppeteer').Page} page - Puppeteer 页面实例(必须在 page.goto 之前调用) - * @returns {Promise<{ getCapturedText: (page: import('puppeteer').Page) => Promise> }>} + * @returns {Promise<{ getCapturedText: (page: import('puppeteer').Page) => Promise>, clearCapturedText: (page: import('puppeteer').Page) => Promise, peekCapturedText: (page: import('puppeteer').Page) => Promise }>} */ export async function setupCanvasTextHook (page) { - // 转发浏览器内部 [canvasHook] 日志到 Node 侧,便于调试 - page.on('console', (msg) => { - const text = msg.text() - if (text.startsWith('[canvasHook]')) { - console.log('[canvasHook-browser]', text) - } - }) + const markerSuffix = Math.random().toString(36).slice(2, 10) + Date.now().toString(36).slice(-4) + const capturedTextProp = '__cct_' + markerSuffix + const messageKey = '__mk_' + markerSuffix + const hookedFlag = '_h_' + markerSuffix - await page.evaluateOnNewDocument(() => { - // 此脚本在每个 frame(主页面 + 所有 iframe)中各执行一次。 - // 策略: - // 主页面 → 初始化收集数组,监听来自 iframe 的 postMessage - // iframe → 直接 hook 当前窗口的 fillText,批量 postMessage 到 window.top - - const isTopFrame = (window === window.top) - - if (isTopFrame) { - window.__canvasCapturedText = [] - window.addEventListener('message', (evt) => { - if (evt.data && evt.data.__bossCanvasHook && Array.isArray(evt.data.__bossCanvasHook)) { - if (!window.__canvasCapturedText) window.__canvasCapturedText = [] - for (const item of evt.data.__bossCanvasHook) { - window.__canvasCapturedText.push(item) - } - console.log('[canvasHook] main received ' + evt.data.__bossCanvasHook.length + ' items, total ' + window.__canvasCapturedText.length) - } - }) - console.log('[canvasHook] main: message listener set') - } - - // 在当前 window(无论是主页面还是 iframe)上 hook fillText - try { - const proto = window.CanvasRenderingContext2D?.prototype - if (!proto) { console.log('[canvasHook] CanvasRenderingContext2D.prototype not found'); return } - if (proto._bossHooked) { console.log('[canvasHook] already hooked, skip'); return } - proto._bossHooked = true - - const origFillText = proto.fillText - if (typeof origFillText !== 'function') { console.log('[canvasHook] fillText is not a function'); return } - - // 批量缓冲,用 setTimeout(0) 在一个事件循环 tick 后统一发送(WASM 会在同一个同步调用栈内连续 fillText) - const captured = [] - let flushScheduled = false - const flush = () => { - flushScheduled = false - if (captured.length === 0) return - const items = captured.splice(0) - if (isTopFrame) { - if (!window.__canvasCapturedText) window.__canvasCapturedText = [] - for (const item of items) window.__canvasCapturedText.push(item) - console.log('[canvasHook] main fillText wrote ' + items.length + ' items') - } else { - try { - window.top.postMessage({ __bossCanvasHook: items }, '*') - console.log('[canvasHook] iframe postMessage sent ' + items.length + ' items') - } catch (e) { - console.log('[canvasHook] postMessage failed: ' + e.message) - } - } - } - const scheduleFlush = () => { - if (!flushScheduled) { flushScheduled = true; setTimeout(flush, 0) } + // 转发浏览器内部 [canvasHook] 日志到 Node 侧(仅 debug 模式) + if (CANVAS_HOOK_DEBUG) { + page.on('console', (msg) => { + const text = msg.text() + if (text.startsWith('[canvasHook]')) { + console.log('[canvasHook-browser]', text) } + }) + } - Object.defineProperty(proto, 'fillText', { - value: new Proxy(origFillText, { - apply (target, thisArg, args) { - const [text, x, y] = args - if (typeof text === 'string' && text.trim()) { - captured.push({ text, x: Number(x) || 0, y: Number(y) || 0 }) - scheduleFlush() + // 注册 fillText 包装的伪原生 toString(依赖 laodeng 已被 puppeteer.use 装载) + try { + const laodengMod = await import('@geekgeekrun/puppeteer-extra-plugin-laodeng') + const registerFakeNativeSource = + laodengMod.registerFakeNativeSource ?? laodengMod.default?.registerFakeNativeSource + if (typeof registerFakeNativeSource === 'function') { + await registerFakeNativeSource( + page, + 'CanvasRenderingContext2D.prototype.fillText', + 'function fillText() { [native code] }' + ) + } + } catch (e) { + // non-fatal: hook still works, just one more detectable surface + } + + await page.evaluateOnNewDocument( + (capturedTextProp, messageKey, hookedFlag, DEBUG) => { + // 此脚本在每个 frame(主页面 + 所有 iframe)中各执行一次。 + const isTopFrame = (window === window.top) + + if (isTopFrame) { + window[capturedTextProp] = [] + window.addEventListener('message', (evt) => { + if (evt.data && evt.data[messageKey] && Array.isArray(evt.data[messageKey])) { + if (!window[capturedTextProp]) window[capturedTextProp] = [] + for (const item of evt.data[messageKey]) { + window[capturedTextProp].push(item) } - return Reflect.apply(target, thisArg, args) + if (DEBUG) console.log('[canvasHook] main received ' + evt.data[messageKey].length + ' items, total ' + window[capturedTextProp].length) } - }), - writable: true, - configurable: true - }) - console.log('[canvasHook] fillText hook installed, isTopFrame=' + isTopFrame + ' href=' + window.location.href) - } catch (e) { - console.log('[canvasHook] hook install error: ' + e.message) - } - }) + }) + if (DEBUG) console.log('[canvasHook] main: message listener set') + } + + // 在当前 window(无论是主页面还是 iframe)上 hook fillText + try { + const proto = window.CanvasRenderingContext2D?.prototype + if (!proto) { if (DEBUG) console.log('[canvasHook] CanvasRenderingContext2D.prototype not found'); return } + if (proto[hookedFlag]) { if (DEBUG) console.log('[canvasHook] already hooked, skip'); return } + proto[hookedFlag] = true + + const origFillText = proto.fillText + if (typeof origFillText !== 'function') { if (DEBUG) console.log('[canvasHook] fillText is not a function'); return } + + const captured = [] + let flushScheduled = false + const flush = () => { + flushScheduled = false + if (captured.length === 0) return + const items = captured.splice(0) + if (isTopFrame) { + if (!window[capturedTextProp]) window[capturedTextProp] = [] + for (const item of items) window[capturedTextProp].push(item) + if (DEBUG) console.log('[canvasHook] main fillText wrote ' + items.length + ' items') + } else { + try { + const payload = {} + payload[messageKey] = items + window.top.postMessage(payload, '*') + if (DEBUG) console.log('[canvasHook] iframe postMessage sent ' + items.length + ' items') + } catch (e) { + if (DEBUG) console.log('[canvasHook] postMessage failed: ' + e.message) + } + } + } + const scheduleFlush = () => { + if (!flushScheduled) { flushScheduled = true; setTimeout(flush, 0) } + } + + Object.defineProperty(proto, 'fillText', { + value: new Proxy(origFillText, { + apply (target, thisArg, args) { + const [text, x, y] = args + if (typeof text === 'string' && text.trim()) { + captured.push({ text, x: Number(x) || 0, y: Number(y) || 0 }) + scheduleFlush() + } + return Reflect.apply(target, thisArg, args) + } + }), + writable: true, + configurable: true + }) + if (DEBUG) console.log('[canvasHook] fillText hook installed, isTopFrame=' + isTopFrame + ' href=' + window.location.href) + } catch (e) { + if (DEBUG) console.log('[canvasHook] hook install error: ' + e.message) + } + }, + capturedTextProp, + messageKey, + hookedFlag, + CANVAS_HOOK_DEBUG + ) /** * 从主页面读取当前收集的 Canvas 文字并清空。 @@ -245,14 +276,13 @@ export async function setupCanvasTextHook (page) { * @returns {Promise>} */ async function getCapturedText (p) { - // 给浏览器 150ms 处理待发送的 setTimeout(0)/postMessage 队列 await p.evaluate(() => new Promise(resolve => setTimeout(resolve, 150))) - const result = await p.evaluate(() => { - const arr = window.__canvasCapturedText || [] + const result = await p.evaluate((prop) => { + const arr = window[prop] || [] const copy = arr.map(({ text, x, y }) => ({ text, x, y })) - window.__canvasCapturedText = [] + window[prop] = [] return copy - }) + }, capturedTextProp) return result } @@ -261,10 +291,20 @@ export async function setupCanvasTextHook (page) { * @param {import('puppeteer').Page} p - 同一页面实例 */ async function clearCapturedText (p) { - await p.evaluate(() => { window.__canvasCapturedText = [] }) + await p.evaluate((prop) => { window[prop] = [] }, capturedTextProp) } - return { getCapturedText, clearCapturedText } + /** + * Peek at how many canvas text items have been captured so far, without consuming them. + * Used for "stable count" polling to detect when Canvas rendering has finished. + * @param {import('puppeteer').Page} p + * @returns {Promise} + */ + async function peekCapturedText (p) { + return p.evaluate((prop) => (window[prop] || []).length, capturedTextProp) + } + + return { getCapturedText, clearCapturedText, peekCapturedText } } // --------------------------------------------------------------------------- @@ -316,13 +356,15 @@ export function extractResumeText (capturedTextArray) { // --------------------------------------------------------------------------- /** - * 优先从拦截的 API 数据中取简历,若无则从页面 window.__canvasCapturedText 中提取(需先调用 setupCanvasTextHook)。 + * 优先从拦截的 API 数据中取简历,若无则从页面 Canvas hook 中提取(需先调用 setupCanvasTextHook)。 * * @param {import('puppeteer').Page} page - Puppeteer 页面实例 * @param {Map} interceptedData - setupNetworkInterceptor 返回的 getInterceptedData() 的结果 + * @param {{ getCapturedText?: (page: import('puppeteer').Page) => Promise> }} [opts] + * opts.getCapturedText — setupCanvasTextHook 返回的同名函数(支持随机 marker 名);不传时降级读 window.__canvasCapturedText(旧行为,仅向后兼容) * @returns {Promise<{ source: 'api' | 'canvas', data: unknown }>} source 为 'api' 时 data 为 API 响应对象;为 'canvas' 时为 extractResumeText 的结果(字符串数组) */ -export async function getResumeData (page, interceptedData) { +export async function getResumeData (page, interceptedData, opts = {}) { if (interceptedData && interceptedData.size > 0) { const firstEntry = interceptedData.entries().next() if (!firstEntry.done) { @@ -330,12 +372,21 @@ export async function getResumeData (page, interceptedData) { return { source: 'api', data: { path, ...(typeof data === 'object' && data !== null ? data : { value: data }) } } } } - const captured = await page.evaluate(() => { - const arr = window.__canvasCapturedText || [] - const copy = arr.map(({ text, x, y }) => ({ text, x, y })) - window.__canvasCapturedText = [] - return copy - }) + + // Canvas fallback: use getCapturedText closure if provided (supports randomized marker names) + // Fall back to legacy window.__canvasCapturedText for callers that don't yet pass it + const getCapturedTextFn = opts.getCapturedText + let captured + if (typeof getCapturedTextFn === 'function') { + captured = await getCapturedTextFn(page) + } else { + captured = await page.evaluate(() => { + const arr = window.__canvasCapturedText || [] + const copy = arr.map(({ text, x, y }) => ({ text, x, y })) + window.__canvasCapturedText = [] + return copy + }) + } const lines = extractResumeText(captured) return { source: 'canvas', data: lines } } diff --git a/packages/boss-auto-browse-and-chat/risk-detector.mjs b/packages/boss-auto-browse-and-chat/risk-detector.mjs new file mode 100644 index 0000000..c96311d --- /dev/null +++ b/packages/boss-auto-browse-and-chat/risk-detector.mjs @@ -0,0 +1,108 @@ +import { sleep } from '@geekgeekrun/utils/sleep.mjs' + +/** + * Detect whether the page is currently showing BOSS security verification + * (CAPTCHA / slider / 安全验证 / etc). + * + * Multi-signal: URL match + DOM elements + body text fallback. + * Element-first (more robust than text, which can false-positive on candidate + * descriptions that mention 验证). + * + * @param {import('puppeteer').Page} page + * @returns {Promise} + */ +export async function detectRiskControl(page) { + try { + const url = page.url() + if (/verify|captcha|security.?check|safe\b|\/safe\/|安全验证/.test(url)) return true + return await page.evaluate(() => { + const hasVerifyEl = !!( + document.querySelector('#nc_mask') || + document.querySelector('.verify-container') || + document.querySelector('.captcha-wrap') || + document.querySelector('.nc-container') || + document.querySelector('[class*="verify"][class*="wrap"]') || + document.querySelector('[class*="captcha"]') || + document.querySelector('.geetest_panel') || + document.querySelector('.geetest_box') || + document.querySelector('[id^="__yidun"]') || + document.querySelector('iframe[src*="captcha"]') || + document.querySelector('iframe[src*="verify"]') || + document.querySelector('.boss-popup__wrapper.dialog-verify') + ) + if (hasVerifyEl) return true + const bodyText = document.body?.innerText || '' + const hasVerifyText = + /请完成.{0,10}验证|安全验证|滑动.{0,6}滑块|人机验证|完成验证后继续|异常.{0,6}操作|操作过于频繁|请稍后再试.*继续|存在风险.*操作/.test( + bodyText + ) + return hasVerifyText + }) + } catch { + return false + } +} + +/** + * Block until user manually completes verification, OR timeout. + * Polls every 2s. Sends a desktop notification once on entry. + * + * @param {import('puppeteer').Page} page + * @param {object} [opts] + * @param {string} [opts.expectedUrlPrefix] - if provided, only consider verification done when url returns to this prefix + * @param {number} [opts.timeoutMs=300000] - default 5 min + * @param {(msg: string) => void} [opts.log] - optional logger + * @returns {Promise} true if completed, false if timed out + */ +export async function waitForRiskControlCompletion(page, opts = {}) { + const { expectedUrlPrefix, timeoutMs = 300000, log } = opts + const logFn = typeof log === 'function' ? log : () => {} + + logFn('⚠️ 检测到 BOSS 安全验证...') + + try { + const { Notification } = await import('electron') + new Notification({ + title: 'GeekGeekRun - 需要人工验证', + body: '检测到 BOSS 直聘安全验证,请在浏览器窗口中完成验证,完成后程序将自动继续。' + }).show() + } catch { + /* Notification 不可用时静默忽略 */ + } + + const deadline = Date.now() + timeoutMs + while (Date.now() < deadline) { + await sleep(2000) + try { + const isStillVerify = await detectRiskControl(page) + if (expectedUrlPrefix) { + const url = page.url() + if (url.startsWith(expectedUrlPrefix) && !isStillVerify) { + logFn('✅ 安全验证已完成') + return true + } + } else if (!isStillVerify) { + logFn('✅ 安全验证已完成') + return true + } + } catch { + /* 页面可能正在跳转,继续等待 */ + } + } + logFn('验证等待超时(5 分钟)') + return false +} + +/** + * Convenience: detect and, if positive, wait for completion. + * + * @param {import('puppeteer').Page} page + * @param {object} [opts] - same as waitForRiskControlCompletion + * @returns {Promise<'no-risk'|'completed'|'timed-out'>} + */ +export async function checkpointRiskControl(page, opts = {}) { + const detected = await detectRiskControl(page) + if (!detected) return 'no-risk' + const completed = await waitForRiskControlCompletion(page, opts) + return completed ? 'completed' : 'timed-out' +} diff --git a/packages/laodeng/index.js b/packages/laodeng/index.js index 575f8f5..21bc0cc 100644 --- a/packages/laodeng/index.js +++ b/packages/laodeng/index.js @@ -34,6 +34,21 @@ async function handle(p) { if (nativeSourceMap.has(this)) { return nativeSourceMap.get(this); } + // Path-based extra registrations + try { + const extras = window.__laodengExtraNativeSources; + if (extras && extras.size) { + for (const [path, src] of extras) { + const parts = path.split("."); + let obj = window; + for (let i = 0; i < parts.length; i++) { + if (obj == null) break; + obj = obj[parts[i]]; + } + if (obj === this) return src; + } + } + } catch (_) {} return nativeFunctionToString.call(this); }, }); @@ -143,6 +158,31 @@ class Plugin extends PuppeteerExtraPlugin { } } -module.exports = function (pluginConfig) { +/** + * Register a fake native source for a function in the target page. + * Must be called AFTER the laodeng plugin has been applied to the browser. + * The wrapped function should already exist (or be created shortly after) — this + * adds a deferred registration that runs on every new document. + * + * @param {import('puppeteer').Page} page + * @param {string} accessorPath - dotted path to the wrapped function in window scope, e.g. "CanvasRenderingContext2D.prototype.fillText" + * @param {string} fakeNativeSource - what `.toString()` should return, e.g. "function fillText() { [native code] }" + */ +async function registerFakeNativeSource(page, accessorPath, fakeNativeSource) { + await page.evaluateOnNewDocument( + function (path, src) { + try { + if (!window.__laodengExtraNativeSources) window.__laodengExtraNativeSources = new Map(); + window.__laodengExtraNativeSources.set(path, src); + } catch (_) {} + }, + accessorPath, + fakeNativeSource + ); +} + +const pluginFactory = function (pluginConfig) { return new Plugin(pluginConfig); }; +pluginFactory.registerFakeNativeSource = registerFakeNativeSource; +module.exports = pluginFactory; diff --git a/packages/ui/src/main/flow/BOSS_CHAT_PAGE_MAIN/index.ts b/packages/ui/src/main/flow/BOSS_CHAT_PAGE_MAIN/index.ts index 047c03b..119a273 100644 --- a/packages/ui/src/main/flow/BOSS_CHAT_PAGE_MAIN/index.ts +++ b/packages/ui/src/main/flow/BOSS_CHAT_PAGE_MAIN/index.ts @@ -140,7 +140,7 @@ const runChatPage = async () => { log('正在动态 import boss package...') type BossAutoBrowseModule = { startBossChatPageProcess: (hooks: any, options?: { - browser?: any; page?: any; getCapturedText?: any; clearCapturedText?: any; + browser?: any; page?: any; getCapturedText?: any; clearCapturedText?: any; peekCapturedText?: any; jobId?: string | null; retryCandidate?: { encryptGeekId: string; geekName: string; jobTitle: string } | null; processContext?: { currentCandidate: any } | null; @@ -207,6 +207,7 @@ const runChatPage = async () => { let page: any = null let getCapturedText: any = null let clearCapturedText: any = null + let peekCapturedText: any = null // processContext 提升到循环外,catch 块中可读取被中断的候选人 const processContext: { currentCandidate: any } = { currentCandidate: null } @@ -230,13 +231,12 @@ const runChatPage = async () => { log('启动浏览器...') await hooks.beforeBrowserLaunch?.promise?.() - const headless = process.env.HEADLESS === '1' - browser = await puppeteer.launch({ - headless, - ignoreHTTPSErrors: true, - protocolTimeout: 120000, - defaultViewport: { width: 1440, height: 900 - 140 } - }) + const { buildRecruiterLaunchOptions } = (await import( + '@geekgeekrun/boss-auto-browse-and-chat/launch-options.mjs' + )) as any + const launchOpts = await buildRecruiterLaunchOptions() + log(`使用 launch options:persistProfile=${!!launchOpts.userDataDir}`) + browser = await puppeteer.launch(launchOpts) await hooks.afterBrowserLaunch?.promise?.() @@ -248,7 +248,15 @@ const runChatPage = async () => { const canvasHooks = await setupCanvasTextHook(page) getCapturedText = canvasHooks.getCapturedText clearCapturedText = canvasHooks.clearCapturedText - if (Array.isArray(bossCookies) && bossCookies.length > 0) { + peekCapturedText = canvasHooks.peekCapturedText + + const { randomizeInitialCursorPosition } = (await import( + '@geekgeekrun/boss-auto-browse-and-chat/humanMouse.mjs' + )) as any + await randomizeInitialCursorPosition(page).catch(() => {}) + + // persistProfile=true 时 profile 已持久化 cookies,跳过注入避免用过期文件覆盖有效 session + if (!launchOpts.userDataDir && Array.isArray(bossCookies) && bossCookies.length > 0) { await page.setCookie(...bossCookies) } await setDomainLocalStorage(browser, localStoragePageUrl, bossLocalStorage || {}) @@ -287,7 +295,7 @@ const runChatPage = async () => { const jname = job.jobName ?? job.name log(`开始处理职位 ${jid}(${jname})的沟通页...`) processContext.currentCandidate = null - await startBossChatPageProcess(hooks, { browser, page, getCapturedText, clearCapturedText, jobId: jid, processContext }) + await startBossChatPageProcess(hooks, { browser, page, getCapturedText, clearCapturedText, peekCapturedText, jobId: jid, processContext }) log(`职位 ${jid} 沟通页处理完成`) } } else { @@ -296,7 +304,7 @@ const runChatPage = async () => { } else { log('未配置职位队列,开始执行 startBossChatPageProcess(处理所有未读)...') processContext.currentCandidate = null - await startBossChatPageProcess(hooks, { browser, page, getCapturedText, clearCapturedText, processContext }) + await startBossChatPageProcess(hooks, { browser, page, getCapturedText, clearCapturedText, peekCapturedText, processContext }) } log('startBossChatPageProcess 完成') @@ -318,6 +326,7 @@ const runChatPage = async () => { page = null getCapturedText = null clearCapturedText = null + peekCapturedText = null const rerunMs = cfg?.chatPage?.rerunIntervalMs ?? rerunInterval log(`下次运行将在 ${rerunMs}ms 后开始`) await sleep(rerunMs) @@ -346,7 +355,7 @@ const runChatPage = async () => { log(`🔄 正在重试被验证中断的候选人:${interruptedCandidate.geekName}...`) try { await startBossChatPageProcess(hooks, { - browser, page, getCapturedText, clearCapturedText, + browser, page, getCapturedText, clearCapturedText, peekCapturedText, retryCandidate: interruptedCandidate, processContext: { currentCandidate: null } }) @@ -369,6 +378,7 @@ const runChatPage = async () => { page = null getCapturedText = null clearCapturedText = null + peekCapturedText = null } if (err instanceof Error) { if (err.message.includes('LOGIN_STATUS_INVALID')) { diff --git a/packages/ui/src/main/flow/OPEN_SETTING_WINDOW/ipc/index.ts b/packages/ui/src/main/flow/OPEN_SETTING_WINDOW/ipc/index.ts index dac6742..28602e9 100644 --- a/packages/ui/src/main/flow/OPEN_SETTING_WINDOW/ipc/index.ts +++ b/packages/ui/src/main/flow/OPEN_SETTING_WINDOW/ipc/index.ts @@ -991,6 +991,12 @@ export default function initIpc() { ...payload.recommendPage } } + if (payload.advanced && typeof payload.advanced === 'object') { + bossRecruiterConfig.advanced = bossRecruiterConfig.advanced || {} + if (typeof payload.advanced.persistProfile === 'boolean') { + bossRecruiterConfig.advanced.persistProfile = payload.advanced.persistProfile + } + } const candidateFilterConfig = readBossConfigFile('candidate-filter.json') || {} if (hasOwn(payload, 'expectCityList')) { diff --git a/packages/ui/src/renderer/src/page/MainLayout/BossAutoBrowseAndChat/index.vue b/packages/ui/src/renderer/src/page/MainLayout/BossAutoBrowseAndChat/index.vue index 3229f92..a1913f2 100644 --- a/packages/ui/src/renderer/src/page/MainLayout/BossAutoBrowseAndChat/index.vue +++ b/packages/ui/src/renderer/src/page/MainLayout/BossAutoBrowseAndChat/index.vue @@ -90,6 +90,22 @@ + + +
高级反检测(实验性)
+
+ + + 持久化浏览器 profile(更难被识别为新设备) + +
+ 启用后 BOSS 看到的是「老设备」而非「每次都是新设备」,能显著降低人工验证触发率。
+ 副作用:bot 运行期间不能在系统 Chrome 同时登录 BOSS(会被挤掉);profile 文件夹长期会占用 1-2GB 磁盘空间。
+ 路径:~/.geekgeekrun/storage/boss-chrome-profile/ +
+
+
+
仅保存配置 @@ -167,7 +183,8 @@ const formContent = reactive({ recommendSkipViewedCandidates: false, recommendRerunIntervalMs: 3000, recommendDelayBetweenNotInterestedMs: [800, 2500] as [number, number], - recommendKeepBrowserOpenAfterRun: false + recommendKeepBrowserOpenAfterRun: false, + advancedPersistProfile: false }) onMounted(async () => { @@ -194,6 +211,9 @@ onMounted(async () => { : [800, 2500] formContent.recommendKeepBrowserOpenAfterRun = recommendPage.keepBrowserOpenAfterRun ?? false + + const advanced = recruiterConfig.advanced ?? {} + formContent.advancedPersistProfile = advanced.persistProfile ?? false } catch (err) { console.error(err) } @@ -213,6 +233,9 @@ const doSave = async () => { rerunIntervalMs: formContent.recommendRerunIntervalMs, delayBetweenNotInterestedMs: formContent.recommendDelayBetweenNotInterestedMs, keepBrowserOpenAfterRun: formContent.recommendKeepBrowserOpenAfterRun + }, + advanced: { + persistProfile: formContent.advancedPersistProfile } } await ipcRenderer.invoke('save-boss-recruiter-config', JSON.stringify(payload))