From e70c99f205a0d6841a85feaacdd99c4b2bfcb4ba Mon Sep 17 00:00:00 2001 From: cnlimiter Date: Sun, 15 Mar 2026 02:43:00 +0800 Subject: [PATCH] =?UTF-8?q?feat(core):=20=E5=AE=9E=E7=8E=B0=E4=BA=86=20Out?= =?UTF-8?q?look=20=E9=82=AE=E7=AE=B1=E9=AA=8C=E8=AF=81=E7=A0=81=E8=8E=B7?= =?UTF-8?q?=E5=8F=96=E6=94=B9=E8=BF=9B=E6=96=B9=E6=A1=88=20=20=201.=20?= =?UTF-8?q?=E5=A2=9E=E5=BC=BA=E9=82=AE=E4=BB=B6=E8=AF=86=E5=88=AB=E9=80=BB?= =?UTF-8?q?=E8=BE=91=20(=5Fis=5Fopenai=5Fverification=5Fmail)=20=20=20=20?= =?UTF-8?q?=20-=20=E4=B8=A5=E6=A0=BC=E9=AA=8C=E8=AF=81=E5=8F=91=E4=BB=B6?= =?UTF-8?q?=E4=BA=BA=E5=BF=85=E9=A1=BB=E6=98=AF=20OpenAI=20=20=20=20=20-?= =?UTF-8?q?=20=E9=AA=8C=E8=AF=81=E4=B8=BB=E9=A2=98/=E6=AD=A3=E6=96=87?= =?UTF-8?q?=E5=8C=85=E5=90=AB=E9=AA=8C=E8=AF=81=E5=85=B3=E9=94=AE=E8=AF=8D?= =?UTF-8?q?=20=20=20=20=20-=20=E9=AA=8C=E8=AF=81=E6=94=B6=E4=BB=B6?= =?UTF-8?q?=E4=BA=BA=E5=8C=B9=E9=85=8D=E7=9B=AE=E6=A0=87=E9=82=AE=E7=AE=B1?= =?UTF-8?q?=20=20=202.=20=E9=82=AE=E4=BB=B6=E6=97=B6=E9=97=B4=E6=88=B3?= =?UTF-8?q?=E8=BF=87=E6=BB=A4=20=20=20=20=20-=20=E5=9F=BA=E4=BA=8E=20otp?= =?UTF-8?q?=5Fsent=5Fat=20=E8=BF=87=E6=BB=A4=E5=8F=91=E9=80=81=E5=89=8D?= =?UTF-8?q?=E7=9A=84=E6=97=A7=E9=82=AE=E4=BB=B6=20=20=20=20=20-=20?= =?UTF-8?q?=E9=A2=84=E7=95=99=2060=20=E7=A7=92=E6=97=B6=E9=92=9F=E5=81=8F?= =?UTF-8?q?=E5=B7=AE=E5=AE=B9=E5=BF=8D=20=20=203.=20=E9=AA=8C=E8=AF=81?= =?UTF-8?q?=E7=A0=81=E6=8F=90=E5=8F=96=E4=BC=98=E5=8C=96=20(=5Fextract=5Fc?= =?UTF-8?q?ode=5Ffrom=5Fmail)=20=20=20=20=20-=20=E4=BC=98=E5=85=88?= =?UTF-8?q?=E4=BB=8E=E4=B8=BB=E9=A2=98=E6=8F=90=E5=8F=96=206=20=E4=BD=8D?= =?UTF-8?q?=E6=95=B0=E5=AD=97=20=20=20=20=20-=20=E8=AF=AD=E4=B9=89?= =?UTF-8?q?=E6=AD=A3=E5=88=99=E5=8C=B9=E9=85=8D=EF=BC=88"code=20is",=20"?= =?UTF-8?q?=E9=AA=8C=E8=AF=81=E7=A0=81"=EF=BC=89=20=20=20=20=20-=20?= =?UTF-8?q?=E5=85=9C=E5=BA=95=E4=BB=BB=E6=84=8F=206=20=E4=BD=8D=E6=95=B0?= =?UTF-8?q?=E5=AD=97=20=20=204.=20=E9=AA=8C=E8=AF=81=E7=A0=81=E5=8E=BB?= =?UTF-8?q?=E9=87=8D=E6=9C=BA=E5=88=B6=20=20=20=20=20-=20=E6=96=B0?= =?UTF-8?q?=E5=A2=9E=20=5Fused=5Fcodes=20=E5=AE=9E=E4=BE=8B=E5=8F=98?= =?UTF-8?q?=E9=87=8F=20=20=20=20=20-=20=E9=81=BF=E5=85=8D=E9=87=8D?= =?UTF-8?q?=E5=A4=8D=E4=BD=BF=E7=94=A8=E5=90=8C=E4=B8=80=E9=AA=8C=E8=AF=81?= =?UTF-8?q?=E7=A0=81=20=20=205.=20=E6=B8=90=E8=BF=9B=E5=BC=8F=E9=82=AE?= =?UTF-8?q?=E4=BB=B6=E6=A3=80=E6=9F=A5=20=20=20=20=20-=20=E5=89=8D=203=20?= =?UTF-8?q?=E6=AC=A1=E8=BD=AE=E8=AF=A2=E5=8F=AA=E6=A3=80=E6=9F=A5=E6=9C=AA?= =?UTF-8?q?=E8=AF=BB=E9=82=AE=E4=BB=B6=20=20=20=20=20-=20=E4=B9=8B?= =?UTF-8?q?=E5=90=8E=E6=A3=80=E6=9F=A5=E6=89=80=E6=9C=89=E9=82=AE=E4=BB=B6?= =?UTF-8?q?=EF=BC=88=E9=81=BF=E5=85=8D=E5=B7=B2=E8=AF=BB=E9=82=AE=E4=BB=B6?= =?UTF-8?q?=E8=A2=AB=E5=BF=BD=E7=95=A5=EF=BC=89=20=20=206.=20=E5=8F=AF?= =?UTF-8?q?=E9=85=8D=E7=BD=AE=E8=B6=85=E6=97=B6=E6=97=B6=E9=97=B4=20=20=20?= =?UTF-8?q?=20=20-=20=E6=96=B0=E5=A2=9E=E9=85=8D=E7=BD=AE=E9=A1=B9=20email?= =?UTF-8?q?=5Fcode=5Ftimeout=EF=BC=88=E9=BB=98=E8=AE=A4=20120=20=E7=A7=92?= =?UTF-8?q?=EF=BC=89=20=20=20=20=20-=20=E6=96=B0=E5=A2=9E=E9=85=8D?= =?UTF-8?q?=E7=BD=AE=E9=A1=B9=20email=5Fcode=5Fpoll=5Finterval=EF=BC=88?= =?UTF-8?q?=E9=BB=98=E8=AE=A4=203=20=E7=A7=92=EF=BC=89=20=20=207.=20?= =?UTF-8?q?=E8=AF=A6=E7=BB=86=E6=97=B6=E9=97=B4=E6=88=B3=E6=97=A5=E5=BF=97?= =?UTF-8?q?=20=20=20=20=20-=20=E8=AE=B0=E5=BD=95=20IMAP=20=E8=BF=9E?= =?UTF-8?q?=E6=8E=A5=E8=80=97=E6=97=B6=20=20=20=20=20-=20=E8=AE=B0?= =?UTF-8?q?=E5=BD=95=E9=82=AE=E4=BB=B6=E6=90=9C=E7=B4=A2=E8=80=97=E6=97=B6?= =?UTF-8?q?=20=20=20=20=20-=20=E8=AE=B0=E5=BD=95=E6=80=BB=E8=80=97?= =?UTF-8?q?=E6=97=B6=E5=92=8C=E8=BD=AE=E8=AF=A2=E6=AC=A1=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/config/constants.py | 23 ++++ src/config/settings.py | 4 + src/core/register.py | 7 +- src/services/base.py | 4 +- src/services/custom_domain.py | 4 +- src/services/outlook.py | 194 ++++++++++++++++++++++++++++------ src/services/tempmail.py | 4 +- 7 files changed, 202 insertions(+), 38 deletions(-) diff --git a/src/config/constants.py b/src/config/constants.py index 109b433..8f97e16 100644 --- a/src/config/constants.py +++ b/src/config/constants.py @@ -118,6 +118,29 @@ OTP_WAIT_TIMEOUT = 120 # 秒 OTP_POLL_INTERVAL = 3 # 秒 OTP_MAX_ATTEMPTS = 40 # 最大轮询次数 +# 验证码提取正则(增强版) +# 简单匹配:任意 6 位数字 +OTP_CODE_SIMPLE_PATTERN = r"(? bool: """发送验证码""" try: + # 记录发送时间戳 + self._otp_sent_at = time.time() + response = self.session.get( OPENAI_API_ENDPOINTS["send_otp"], headers={ @@ -371,7 +375,8 @@ class RegistrationEngine: email=self.email, email_id=email_id, timeout=120, - pattern=OTP_CODE_PATTERN + pattern=OTP_CODE_PATTERN, + otp_sent_at=self._otp_sent_at, ) if code: diff --git a/src/services/base.py b/src/services/base.py index 92d98af..aba923f 100644 --- a/src/services/base.py +++ b/src/services/base.py @@ -81,7 +81,8 @@ class BaseEmailService(abc.ABC): email: str, email_id: str = None, timeout: int = 120, - pattern: str = r"(? Optional[str]: """ 获取验证码 @@ -91,6 +92,7 @@ class BaseEmailService(abc.ABC): email_id: 邮箱服务中的 ID(如果需要) timeout: 超时时间(秒) pattern: 验证码正则表达式 + otp_sent_at: OTP 发送时间戳,用于过滤旧邮件 Returns: 验证码字符串,如果超时或未找到返回 None diff --git a/src/services/custom_domain.py b/src/services/custom_domain.py index 5c70f11..db96b8a 100644 --- a/src/services/custom_domain.py +++ b/src/services/custom_domain.py @@ -235,7 +235,8 @@ class CustomDomainEmailService(BaseEmailService): email: str, email_id: str = None, timeout: int = 120, - pattern: str = OTP_CODE_PATTERN + pattern: str = OTP_CODE_PATTERN, + otp_sent_at: Optional[float] = None, ) -> Optional[str]: """ 从自定义域名邮箱获取验证码 @@ -245,6 +246,7 @@ class CustomDomainEmailService(BaseEmailService): email_id: 邮箱 ID(如果不提供,从缓存中查找) timeout: 超时时间(秒) pattern: 验证码正则表达式 + otp_sent_at: OTP 发送时间戳(自定义域名服务暂不使用此参数) Returns: 验证码字符串,如果超时或未找到返回 None diff --git a/src/services/outlook.py b/src/services/outlook.py index 72e593d..5bbdfdf 100644 --- a/src/services/outlook.py +++ b/src/services/outlook.py @@ -21,7 +21,14 @@ from email.utils import parsedate_to_datetime from urllib.error import HTTPError from .base import BaseEmailService, EmailServiceError, EmailServiceType -from ..config.constants import OTP_CODE_PATTERN +from ..config.constants import ( + OTP_CODE_PATTERN, + OTP_CODE_SIMPLE_PATTERN, + OTP_CODE_SEMANTIC_PATTERN, + OPENAI_EMAIL_SENDERS, + OPENAI_VERIFICATION_KEYWORDS, +) +from ..config import get_settings logger = logging.getLogger(__name__) @@ -397,6 +404,9 @@ class OutlookService(BaseEmailService): # IMAP 连接限制(防止限流) self._imap_semaphore = threading.Semaphore(5) + # 验证码去重机制:email -> set of used codes + self._used_codes: Dict[str, set] = {} + def create_email(self, config: Dict[str, Any] = None) -> Dict[str, Any]: """ 选择可用的 Outlook 账户 @@ -436,8 +446,9 @@ class OutlookService(BaseEmailService): self, email: str, email_id: str = None, - timeout: int = 120, - pattern: str = OTP_CODE_PATTERN + timeout: int = None, + pattern: str = OTP_CODE_PATTERN, + otp_sent_at: Optional[float] = None, ) -> Optional[str]: """ 从 Outlook 邮箱获取验证码 @@ -445,8 +456,9 @@ class OutlookService(BaseEmailService): Args: email: 邮箱地址 email_id: 未使用(对于 Outlook,email 就是标识) - timeout: 超时时间(秒) + timeout: 超时时间(秒),默认使用配置值 pattern: 验证码正则表达式 + otp_sent_at: OTP 发送时间戳,用于过滤旧邮件 Returns: 验证码字符串,如果超时或未找到返回 None @@ -462,21 +474,33 @@ class OutlookService(BaseEmailService): self.update_status(False, EmailServiceError(f"未找到邮箱对应的账户: {email}")) return None - logger.info(f"正在从 Outlook 邮箱 {email} 获取验证码...") + # 使用配置的超时时间 + settings = get_settings() + actual_timeout = timeout or settings.email_code_timeout + poll_interval = settings.email_code_poll_interval + + logger.info(f"[{email}] 开始获取验证码,超时 {actual_timeout}s,OTP发送时间: {otp_sent_at}") + + # 初始化验证码去重集合 + if email not in self._used_codes: + self._used_codes[email] = set() + used_codes = self._used_codes[email] + + # 计算最小时间戳(留出 60 秒时钟偏差) + min_timestamp = (otp_sent_at - 60) if otp_sent_at else 0 start_time = time.time() - last_check_time = 0 - check_count = 0 + poll_count = 0 - while time.time() - start_time < timeout: - check_count += 1 + while time.time() - start_time < actual_timeout: + poll_count += 1 + loop_start = time.time() - # 控制检查频率 - if time.time() - last_check_time < 3: - time.sleep(1) - continue + # 渐进式邮件检查:前 3 次只检查未读,之后检查全部 + only_unseen = poll_count <= 3 try: + connect_start = time.time() with self._imap_semaphore: with OutlookIMAPClient( account, @@ -484,38 +508,49 @@ class OutlookService(BaseEmailService): port=self.config["imap_port"], timeout=10 ) as client: - emails = client.get_recent_emails(count=10, only_unseen=True) + connect_elapsed = time.time() - connect_start + logger.debug(f"[{email}] IMAP 连接耗时 {connect_elapsed:.2f}s") + + # 搜索邮件 + search_start = time.time() + emails = client.get_recent_emails(count=15, only_unseen=only_unseen) + search_elapsed = time.time() - search_start + logger.debug(f"[{email}] 搜索到 {len(emails)} 封邮件(未读={only_unseen}),耗时 {search_elapsed:.2f}s") for mail in emails: - # 检查是否是 OpenAI 相关邮件 - if not self._is_oai_mail(mail): + # 时间戳过滤 + mail_ts = mail.get("date_timestamp", 0) + if min_timestamp > 0 and mail_ts > 0 and mail_ts < min_timestamp: + logger.debug(f"[{email}] 跳过旧邮件: {mail.get('subject', '')[:50]}") + continue + + # 检查是否是 OpenAI 验证邮件 + if not self._is_openai_verification_mail(mail, email): continue # 提取验证码 - content = f"{mail.get('from', '')} {mail.get('subject', '')} {mail.get('body', '')}" - match = re.search(pattern, content) - if match: - code = match.group(1) - logger.info(f"从 Outlook 邮箱 {email} 找到验证码: {code}") - - # 可选:标记邮件为已读(避免重复获取) - # 注意:这需要修改 IMAP 客户端的实现 + code = self._extract_code_from_mail(mail, pattern) + if code: + # 去重检查 + if code in used_codes: + logger.debug(f"[{email}] 跳过已使用的验证码: {code}") + continue + used_codes.add(code) + elapsed = int(time.time() - start_time) + logger.info(f"[{email}] 找到验证码: {code},总耗时 {elapsed}s,轮询 {poll_count} 次") self.update_status(True) return code - last_check_time = time.time() - - if check_count % 5 == 0: - logger.debug(f"检查 {email} 的验证码,已检查 {check_count} 次") - except Exception as e: - logger.warning(f"检查 Outlook 邮箱 {email} 时出错: {e}") - last_check_time = time.time() + loop_elapsed = time.time() - loop_start + logger.warning(f"[{email}] 检查出错: {e},循环耗时 {loop_elapsed:.2f}s") - time.sleep(3) + # 等待下次轮询 + time.sleep(poll_interval) - logger.warning(f"等待验证码超时: {email}") + elapsed = int(time.time() - start_time) + logger.warning(f"[{email}] 验证码超时 ({actual_timeout}s),共轮询 {poll_count} 次") return None def list_emails(self, **kwargs) -> List[Dict[str, Any]]: @@ -574,11 +609,102 @@ class OutlookService(BaseEmailService): return False def _is_oai_mail(self, mail: Dict[str, Any]) -> bool: - """判断是否为 OpenAI 相关邮件""" + """判断是否为 OpenAI 相关邮件(旧方法,保留兼容)""" combined = f"{mail.get('from', '')} {mail.get('subject', '')} {mail.get('body', '')}".lower() keywords = ["openai", "chatgpt", "verification", "验证码", "code"] return any(keyword in combined for keyword in keywords) + def _is_openai_verification_mail( + self, + mail: Dict[str, Any], + target_email: str = None + ) -> bool: + """ + 严格判断是否为 OpenAI 验证邮件 + + Args: + mail: 邮件信息字典 + target_email: 目标邮箱地址(用于验证收件人) + + Returns: + 是否为 OpenAI 验证邮件 + """ + sender = mail.get("from", "").lower() + + # 1. 发件人必须是 OpenAI + valid_senders = OPENAI_EMAIL_SENDERS + if not any(s in sender for s in valid_senders): + logger.debug(f"邮件发件人非 OpenAI: {sender}") + return False + + # 2. 主题或正文包含验证关键词 + subject = mail.get("subject", "").lower() + body = mail.get("body", "").lower() + verification_keywords = OPENAI_VERIFICATION_KEYWORDS + combined = f"{subject} {body}" + if not any(kw in combined for kw in verification_keywords): + logger.debug(f"邮件未包含验证关键词: {subject[:50]}") + return False + + # 3. 验证收件人(可选) + if target_email: + recipients = f"{mail.get('to', '')} {mail.get('delivered_to', '')} {mail.get('x_original_to', '')}".lower() + if target_email.lower() not in recipients: + logger.debug(f"邮件收件人不匹配: {recipients[:50]}") + return False + + logger.debug(f"识别为 OpenAI 验证邮件: {subject[:50]}") + return True + + def _extract_code_from_mail( + self, + mail: Dict[str, Any], + fallback_pattern: str = OTP_CODE_PATTERN + ) -> Optional[str]: + """ + 从邮件中提取验证码 + + 优先级: + 1. 从主题提取(6位数字) + 2. 从正文用语义正则提取(如 "code is 123456") + 3. 兜底:任意 6 位数字 + + Args: + mail: 邮件信息字典 + fallback_pattern: 兜底正则表达式 + + Returns: + 验证码字符串,如果未找到返回 None + """ + # 编译正则 + re_simple = re.compile(OTP_CODE_SIMPLE_PATTERN) + re_semantic = re.compile(OTP_CODE_SEMANTIC_PATTERN, re.IGNORECASE) + + # 1. 主题优先 + subject = mail.get("subject", "") + match = re_simple.search(subject) + if match: + code = match.group(1) + logger.debug(f"从主题提取验证码: {code}") + return code + + # 2. 正文语义匹配 + body = mail.get("body", "") + match = re_semantic.search(body) + if match: + code = match.group(1) + logger.debug(f"从正文语义提取验证码: {code}") + return code + + # 3. 兜底:任意 6 位数字 + match = re_simple.search(body) + if match: + code = match.group(1) + logger.debug(f"从正文兜底提取验证码: {code}") + return code + + return None + def get_account_stats(self) -> Dict[str, Any]: """获取账户统计信息""" total = len(self.accounts) diff --git a/src/services/tempmail.py b/src/services/tempmail.py index b117e95..48d30f8 100644 --- a/src/services/tempmail.py +++ b/src/services/tempmail.py @@ -123,7 +123,8 @@ class TempmailService(BaseEmailService): email: str, email_id: str = None, timeout: int = 120, - pattern: str = OTP_CODE_PATTERN + pattern: str = OTP_CODE_PATTERN, + otp_sent_at: Optional[float] = None, ) -> Optional[str]: """ 从 Tempmail.lol 获取验证码 @@ -133,6 +134,7 @@ class TempmailService(BaseEmailService): email_id: 邮箱 token(如果不提供,从缓存中查找) timeout: 超时时间(秒) pattern: 验证码正则表达式 + otp_sent_at: OTP 发送时间戳(Tempmail 服务暂不使用此参数) Returns: 验证码字符串,如果超时或未找到返回 None