mirror of
https://github.com/dreamhunter2333/cloudflare_temp_email.git
synced 2026-06-06 16:14:57 +08:00
feat: add AI email extraction with Cloudflare Workers AI
Add AI-powered email content extraction feature using Cloudflare Workers AI to automatically identify and extract important information from emails including verification codes, authentication links, service links, and subscription links. Features: - AI extraction with priority-based logic (auth_code > auth_link > service_link > subscription_link > other_link) - Admin allowlist configuration with wildcard support (*@example.com) - Frontend display in both email list (compact) and detail view (full mode) - Bilingual documentation (Chinese/English) - Database migration: add metadata field to raw_mails (v0.0.3 -> v0.0.4) Technical highlights: - Proper regex escaping for wildcard pattern matching - Content truncation to avoid AI token limits - Error handling that won't affect email receiving - JSON schema validation for AI responses - Type-safe TypeScript implementation - Vue I18n support with special character escaping References: - Inspired by Alle Project: https://github.com/bestruirui/Alle - Uses Cloudflare Workers AI JSON Mode 🤖 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -11,26 +11,26 @@
|
||||
"build": "wrangler deploy --dry-run --outdir dist --minify"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@cloudflare/workers-types": "^4.20251111.0",
|
||||
"@cloudflare/workers-types": "^4.20251205.0",
|
||||
"@eslint/js": "9.18.0",
|
||||
"@simplewebauthn/types": "10.0.0",
|
||||
"@types/node": "^22.19.1",
|
||||
"eslint": "9.18.0",
|
||||
"globals": "^15.15.0",
|
||||
"typescript-eslint": "^8.46.4",
|
||||
"wrangler": "^4.47.0"
|
||||
"typescript-eslint": "^8.48.1",
|
||||
"wrangler": "^4.53.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"@aws-sdk/client-s3": "3.888.0",
|
||||
"@aws-sdk/s3-request-presigner": "3.888.0",
|
||||
"@simplewebauthn/server": "10.0.1",
|
||||
"hono": "^4.10.5",
|
||||
"hono": "^4.10.7",
|
||||
"jsonpath-plus": "^10.3.0",
|
||||
"mimetext": "^3.0.27",
|
||||
"postal-mime": "^2.6.0",
|
||||
"postal-mime": "^2.6.1",
|
||||
"resend": "^4.8.0",
|
||||
"telegraf": "4.16.3",
|
||||
"worker-mailer": "^1.2.0"
|
||||
"worker-mailer": "^1.2.1"
|
||||
},
|
||||
"pnpm": {
|
||||
"patchedDependencies": {
|
||||
|
||||
833
worker/pnpm-lock.yaml
generated
833
worker/pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
27
worker/src/admin_api/ai_extract_settings.ts
Normal file
27
worker/src/admin_api/ai_extract_settings.ts
Normal file
@@ -0,0 +1,27 @@
|
||||
import { Context } from "hono";
|
||||
import { CONSTANTS } from "../constants";
|
||||
import { getJsonSetting, saveSetting } from "../utils";
|
||||
|
||||
export type AiExtractSettings = {
|
||||
enableAllowList: boolean;
|
||||
allowList: string[];
|
||||
}
|
||||
|
||||
async function getAiExtractSettings(c: Context<HonoCustomType>): Promise<Response> {
|
||||
const settings = await getJsonSetting<AiExtractSettings>(c, CONSTANTS.AI_EXTRACT_SETTINGS_KEY) || {
|
||||
enableAllowList: false,
|
||||
allowList: []
|
||||
};
|
||||
return c.json(settings);
|
||||
}
|
||||
|
||||
async function saveAiExtractSettings(c: Context<HonoCustomType>): Promise<Response> {
|
||||
const settings = await c.req.json<AiExtractSettings>();
|
||||
await saveSetting(c, CONSTANTS.AI_EXTRACT_SETTINGS_KEY, JSON.stringify(settings));
|
||||
return c.json({ success: true })
|
||||
}
|
||||
|
||||
export default {
|
||||
getAiExtractSettings,
|
||||
saveAiExtractSettings,
|
||||
}
|
||||
@@ -9,6 +9,7 @@ CREATE TABLE IF NOT EXISTS raw_mails (
|
||||
source TEXT,
|
||||
address TEXT,
|
||||
raw TEXT,
|
||||
metadata TEXT,
|
||||
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
@@ -142,8 +143,11 @@ export default {
|
||||
const query = `ALTER TABLE address ADD password TEXT;`
|
||||
await c.env.DB.exec(query);
|
||||
}
|
||||
if (version == "v0.0.3") {
|
||||
// migration from v0.0.3 to v0.0.4
|
||||
await c.env.DB.exec(`ALTER TABLE raw_mails ADD COLUMN metadata TEXT;`);
|
||||
}
|
||||
if (version != CONSTANTS.DB_VERSION) {
|
||||
// TODO: Perform migration logic here
|
||||
// remove all \r and \n characters from the query string
|
||||
// split by ; and join with a ;\n
|
||||
const query = DB_INIT_QUERIES.replace(/[\r\n]/g, "")
|
||||
|
||||
@@ -15,6 +15,7 @@ import admin_mail_api from './admin_mail_api'
|
||||
import { sendMailbyAdmin } from './send_mail'
|
||||
import db_api from './db_api'
|
||||
import ip_blacklist_settings from './ip_blacklist_settings'
|
||||
import ai_extract_settings from './ai_extract_settings'
|
||||
import { EmailRuleSettings } from '../models'
|
||||
|
||||
export const api = new Hono<HonoCustomType>()
|
||||
@@ -377,3 +378,7 @@ api.post('admin/db_migration', db_api.migrate);
|
||||
// IP blacklist settings
|
||||
api.get("/admin/ip_blacklist/settings", ip_blacklist_settings.getIpBlacklistSettings);
|
||||
api.post("/admin/ip_blacklist/settings", ip_blacklist_settings.saveIpBlacklistSettings);
|
||||
|
||||
// AI extract settings
|
||||
api.get("/admin/ai_extract/settings", ai_extract_settings.getAiExtractSettings);
|
||||
api.post("/admin/ai_extract/settings", ai_extract_settings.saveAiExtractSettings);
|
||||
|
||||
@@ -3,7 +3,7 @@ export const CONSTANTS = {
|
||||
|
||||
// DB Version
|
||||
DB_VERSION_KEY: 'db_version',
|
||||
DB_VERSION: "v0.0.3",
|
||||
DB_VERSION: "v0.0.4",
|
||||
|
||||
// DB settings
|
||||
ADDRESS_BLOCK_LIST_KEY: 'address_block_list',
|
||||
@@ -16,6 +16,7 @@ export const CONSTANTS = {
|
||||
EMAIL_RULE_SETTINGS_KEY: 'email_rule_settings',
|
||||
ROLE_ADDRESS_CONFIG_KEY: 'role_address_config',
|
||||
IP_BLACKLIST_SETTINGS_KEY: 'ip_blacklist_settings',
|
||||
AI_EXTRACT_SETTINGS_KEY: 'ai_extract_settings',
|
||||
|
||||
// KV
|
||||
TG_KV_PREFIX: "temp-mail-telegram",
|
||||
|
||||
235
worker/src/email/ai_extract.ts
Normal file
235
worker/src/email/ai_extract.ts
Normal file
@@ -0,0 +1,235 @@
|
||||
/**
|
||||
* AI Email Extraction Module
|
||||
*
|
||||
* This module provides email content analysis using Cloudflare Workers AI.
|
||||
* It extracts important information like verification codes, authentication links,
|
||||
* service links, and subscription management links from email content.
|
||||
*/
|
||||
|
||||
import { commonParseMail } from "../common";
|
||||
import { getBooleanValue, getJsonSetting } from "../utils";
|
||||
import { CONSTANTS } from "../constants";
|
||||
import { Context } from "hono";
|
||||
import type { AiExtractSettings } from "../admin_api/ai_extract_settings";
|
||||
|
||||
// AI Prompt for email analysis
|
||||
const PROMPT = `
|
||||
You are an expert email analyzer. Your task is to first UNDERSTAND the email content, then EXTRACT the most relevant information based on priority.
|
||||
|
||||
# Step 1: UNDERSTAND the Email
|
||||
Read the entire email carefully and determine its:
|
||||
- Overall purpose (verification, marketing, notification, etc.)
|
||||
- Key context and situation
|
||||
- What the sender wants the recipient to do
|
||||
- Any security-sensitive content
|
||||
|
||||
# Step 2: EXTRACT Based on Priority
|
||||
After understanding, extract the most important item according to this priority order:
|
||||
|
||||
**Priority 1: auth_code (Authentication Code)**
|
||||
- Numeric or alphanumeric codes used for login verification
|
||||
- Keywords: verification code, OTP, security code, confirmation code, auth code, 验证码, 校验码
|
||||
- Extract ONLY the code itself (remove spaces, hyphens, etc.)
|
||||
- Example: "123456" from "Your verification code is 123-456"
|
||||
|
||||
**Priority 2: auth_link (Authentication Link)**
|
||||
- Links used for login, email verification, account activation, or password reset
|
||||
- Keywords: verify, confirm, activate, login, signin, signup, reset, 验证, 激活, 登录
|
||||
- Must be a real, complete URL (http:// or https://)
|
||||
- Never fabricate or infer links that don't exist in the content
|
||||
- Example: "https://example.com/verify?token=abc123"
|
||||
|
||||
**Priority 3: service_link (Service Link)**
|
||||
- Links related to specific services or actions
|
||||
- Keywords: commit, pull request, issue, repository, deployment, GitHub, GitLab, code review
|
||||
- Real URLs for technical or service-related notifications
|
||||
- Example: GitHub commit link, deployment notification link
|
||||
|
||||
**Priority 4: subscription_link (Subscription Management Link)**
|
||||
- Links for managing email subscriptions, typically unsubscribe
|
||||
- Keywords: unsubscribe, opt-out, manage preferences, 退订, 取消订阅
|
||||
- Usually found at the bottom of marketing emails
|
||||
- Real URLs for subscription control
|
||||
|
||||
**Priority 5: other_link (Other Valuable Link)**
|
||||
- Any other link that might be useful or important
|
||||
- Only extract if no higher-priority items exist
|
||||
- Must be a real, complete URL from the content
|
||||
|
||||
**Priority 6: none**
|
||||
- No relevant codes, links, or valuable content found
|
||||
- Email appears to be plain text or irrelevant
|
||||
|
||||
# Special Case: Markdown Link Format
|
||||
If the extracted content is in markdown link format [text](url):
|
||||
|
||||
- Extract the text inside the brackets as result_text
|
||||
- When brackets are empty, analyze the email context and language
|
||||
- Generate a concise, meaningful description (2-5 words) for result_text
|
||||
- Match the email's language (Chinese → Chinese description, English → English)
|
||||
|
||||
# Critical Rules
|
||||
1. **Understand First**: Always analyze the email's purpose before extracting
|
||||
2. **Single Selection**: Choose ONLY ONE type based on the highest priority match
|
||||
3. **Real Data Only**: Never invent, guess, or fabricate content
|
||||
4. **Complete URLs**: Links must be full, valid URLs as they appear in the email
|
||||
5. **Clean Extraction**: Return only the raw extracted content, no extra text
|
||||
|
||||
# Output Format (JSON only)
|
||||
{
|
||||
"type": "auth_code|auth_link|service_link|subscription_link|other_link|none",
|
||||
"result": "the extracted code/link OR empty string",
|
||||
"result_text": "the display text from markdown-format links."
|
||||
}
|
||||
|
||||
IMPORTANT: Return ONLY the JSON, no explanations or additional text.
|
||||
`;
|
||||
|
||||
/**
|
||||
* Extract important information from email content using Cloudflare Workers AI
|
||||
*
|
||||
* @param content - The email content to analyze (plain text or HTML)
|
||||
* @param env - Cloudflare Workers environment bindings
|
||||
* @returns Promise<ExtractResult> - The extracted information
|
||||
*/
|
||||
async function extractWithCloudflareAI(
|
||||
content: string,
|
||||
env: Bindings
|
||||
): Promise<ExtractResult> {
|
||||
// Get the AI model name from environment variable or use default
|
||||
const modelName = env.AI_EXTRACT_MODEL || '@cf/meta/llama-3.1-8b-instruct';
|
||||
|
||||
const result = await env.AI.run(modelName as keyof AiModels, {
|
||||
messages: [
|
||||
{ role: 'system', content: PROMPT },
|
||||
{ role: 'user', content },
|
||||
],
|
||||
response_format: {
|
||||
type: 'json_schema',
|
||||
json_schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
type: {
|
||||
type: 'string',
|
||||
enum: ['auth_code', 'auth_link', 'service_link', 'subscription_link', 'other_link', 'none']
|
||||
},
|
||||
result: { type: 'string' },
|
||||
result_text: { type: 'string' },
|
||||
},
|
||||
required: ['type', 'result', 'result_text'],
|
||||
},
|
||||
},
|
||||
stream: false,
|
||||
});
|
||||
|
||||
// @ts-expect-error result.response
|
||||
const response = result.response;
|
||||
|
||||
if (typeof response === 'string') {
|
||||
return JSON.parse(response) as ExtractResult;
|
||||
}
|
||||
|
||||
if (response && typeof response === 'object') {
|
||||
return response as ExtractResult;
|
||||
}
|
||||
|
||||
throw new Error('Unexpected response format from Cloudflare AI');
|
||||
}
|
||||
|
||||
/**
|
||||
* Main extraction function
|
||||
* Checks if AI extraction is enabled, processes the email content, and saves to database
|
||||
*
|
||||
* @param parsedEmailContext - The parsed email context
|
||||
* @param env - Cloudflare Workers environment bindings
|
||||
* @param message_id - The email message ID
|
||||
* @param address - The recipient email address
|
||||
* @returns Promise<void>
|
||||
*/
|
||||
export async function extractEmailInfo(
|
||||
parsedEmailContext: ParsedEmailContext,
|
||||
env: Bindings,
|
||||
message_id: string | null,
|
||||
address: string
|
||||
): Promise<void> {
|
||||
try {
|
||||
// Check if AI extraction is enabled via environment variable
|
||||
if (!getBooleanValue(env.ENABLE_AI_EMAIL_EXTRACT)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Ensure AI binding is available
|
||||
if (!env.AI) {
|
||||
console.error('AI binding not available');
|
||||
return;
|
||||
}
|
||||
|
||||
// Check allowlist if enabled
|
||||
const aiSettings = await getJsonSetting<AiExtractSettings>(
|
||||
{ env: env } as Context<HonoCustomType>,
|
||||
CONSTANTS.AI_EXTRACT_SETTINGS_KEY
|
||||
);
|
||||
|
||||
if (aiSettings?.enableAllowList && aiSettings.allowList?.length > 0) {
|
||||
const isAllowed = aiSettings.allowList.some(pattern => {
|
||||
// Support wildcard matching
|
||||
if (pattern.includes('*')) {
|
||||
// Escape special regex characters except *
|
||||
const escapedPattern = pattern
|
||||
.replace(/[.+?^${}()|[\]\\]/g, '\\$&')
|
||||
.replace(/\*/g, '.*');
|
||||
const regex = new RegExp('^' + escapedPattern + '$');
|
||||
return regex.test(address);
|
||||
}
|
||||
// Exact match
|
||||
return address === pattern;
|
||||
});
|
||||
|
||||
if (!isAllowed) {
|
||||
console.log(`AI extraction skipped for ${address}: not in allowlist`);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Parse email to get content
|
||||
const parsedEmail = await commonParseMail(parsedEmailContext);
|
||||
const emailContent = parsedEmail?.text || parsedEmail?.html || "";
|
||||
|
||||
if (!emailContent) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Truncate content if too long (max 4000 characters to avoid token limits)
|
||||
const truncatedContent = emailContent.length > 4000
|
||||
? emailContent.substring(0, 4000) + '...[truncated]'
|
||||
: emailContent;
|
||||
|
||||
const result = await extractWithCloudflareAI(truncatedContent, env);
|
||||
|
||||
// If extraction found something useful, save it to database
|
||||
if (result.type !== 'none' && result.result) {
|
||||
const metadata = JSON.stringify({
|
||||
ai_extract: result,
|
||||
extracted_at: new Date().toISOString()
|
||||
});
|
||||
|
||||
// Update the raw_mails record with metadata
|
||||
await env.DB.prepare(
|
||||
`UPDATE raw_mails SET metadata = ? WHERE message_id = ?`
|
||||
).bind(metadata, message_id).run();
|
||||
|
||||
console.log(`AI extraction completed for ${message_id}: ${result.type}`);
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('AI email extraction error:', e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Type definition for extraction result
|
||||
*/
|
||||
export type ExtractResult = {
|
||||
type: 'auth_code' | 'auth_link' | 'service_link' | 'subscription_link' | 'other_link' | 'none';
|
||||
result: string;
|
||||
result_text: string;
|
||||
};
|
||||
@@ -7,6 +7,7 @@ import { isBlocked } from "./black_list";
|
||||
import { triggerWebhook, triggerAnotherWorker, commonParseMail } from "../common";
|
||||
import { check_if_junk_mail } from "./check_junk";
|
||||
import { remove_attachment_if_need } from "./check_attachment";
|
||||
import { extractEmailInfo } from "./ai_extract";
|
||||
import { EmailRuleSettings } from "../models";
|
||||
import { CONSTANTS } from "../constants";
|
||||
|
||||
@@ -155,6 +156,9 @@ async function email(message: ForwardableEmailMessage, env: Bindings, ctx: Execu
|
||||
|
||||
// auto reply email
|
||||
await auto_reply(message, env);
|
||||
|
||||
// AI email content extraction
|
||||
await extractEmailInfo(parsedEmailContext, env, message_id, message.to);
|
||||
}
|
||||
|
||||
export { email }
|
||||
|
||||
5
worker/src/types.d.ts
vendored
5
worker/src/types.d.ts
vendored
@@ -11,6 +11,7 @@ type Bindings = {
|
||||
RATE_LIMITER: any
|
||||
SEND_MAIL: any
|
||||
ASSETS: Fetcher
|
||||
AI: Ai
|
||||
|
||||
// config
|
||||
DEFAULT_LANG: string | undefined
|
||||
@@ -86,6 +87,10 @@ type Bindings = {
|
||||
|
||||
// webhook config
|
||||
FRONTEND_URL: string | undefined
|
||||
|
||||
// AI extraction config
|
||||
ENABLE_AI_EMAIL_EXTRACT: string | boolean | undefined
|
||||
AI_EXTRACT_MODEL: string | undefined
|
||||
}
|
||||
|
||||
type JwtPayload = {
|
||||
|
||||
@@ -107,6 +107,10 @@ ENABLE_AUTO_REPLY = false
|
||||
# REMOVE_EXCEED_SIZE_ATTACHMENT = true
|
||||
# remove all attachment, mail maybe mising some information due to parsing
|
||||
# REMOVE_ALL_ATTACHMENT = true
|
||||
# AI email extraction, automatically extract verification codes, auth links, etc.
|
||||
# ENABLE_AI_EMAIL_EXTRACT = true
|
||||
# AI model name, choose from https://developers.cloudflare.com/workers-ai/models/#text-generation
|
||||
# AI_EXTRACT_MODEL = "@cf/meta/llama-3.1-8b-instruct"
|
||||
# Calling other woker to process email
|
||||
# ENABLE_ANOTHER_WORKER = false
|
||||
# ANOTHER_WORKER_LIST = """
|
||||
@@ -127,6 +131,10 @@ binding = "DB"
|
||||
database_name = "xxx"
|
||||
database_id = "xxx"
|
||||
|
||||
# Workers AI binding (required for AI email extraction)
|
||||
# [ai]
|
||||
# binding = "AI"
|
||||
|
||||
# kv config for send email verification code
|
||||
# [[kv_namespaces]]
|
||||
# binding = "KV"
|
||||
|
||||
Reference in New Issue
Block a user