Files
cloudflare_temp_email/smtp_proxy_server/imap_mailbox.py
Dream Hunter 03965f3612 fix(imap): fix mojibake in nested emails, empty headers, and date handling (#909)
* fix(imap): fix mojibake in nested emails, empty headers, and date handling

- Add line-by-line mojibake fix fallback for complex emails with mixed content
- Apply empty header cleanup globally to fix nested message/rfc822 parts
- Add locale-independent date formatting (format_imap_date, format_rfc2822_date)
- Fill missing Date header from created_at field
- Fix getSubPart for non-multipart messages
- Accept CREATE requests from clients (e.g. Gmail creating Drafts)
- Strip whitespace from IMAP password
- Use MIMEText instead of MIMEMultipart for sent mail generation
- Keep body in original CTE encoding for correct BODYSTRUCTURE
- Update CHANGELOG (zh/en)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* docs: consolidate IMAP changelog entries into single line

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-22 20:52:18 +08:00

361 lines
11 KiB
Python

import bisect
import logging
import time
from collections import OrderedDict
from twisted.internet import defer
from twisted.mail import imap4
from zope.interface import implementer
from config import settings
from imap_http_client import BackendClient
from imap_message import SimpleMessage
from parse_email import generate_email_model, parse_email, clean_raw_headers, fix_mojibake
_logger = logging.getLogger(__name__)
_logger.setLevel(logging.INFO)
# Use process start time as UIDVALIDITY so clients resync after restart
_UID_VALIDITY = int(time.time())
class MessageCache:
"""LRU cache for parsed email messages, keyed by backend id (=UID)."""
def __init__(self, max_size: int = 500):
self._cache: OrderedDict[int, SimpleMessage] = OrderedDict()
self._max_size = max_size
def get(self, uid: int):
if uid in self._cache:
self._cache.move_to_end(uid)
return self._cache[uid]
return None
def put(self, uid: int, message: SimpleMessage):
if uid in self._cache:
self._cache.move_to_end(uid)
self._cache[uid] = message
else:
if len(self._cache) >= self._max_size:
self._cache.popitem(last=False)
self._cache[uid] = message
def __contains__(self, uid: int) -> bool:
return uid in self._cache
def __len__(self) -> int:
return len(self._cache)
@implementer(imap4.IMailboxInfo, imap4.IMailbox, imap4.ISearchableMailbox)
class SimpleMailbox:
def __init__(self, name: str, client: BackendClient):
self.name = name
self._client = client
self.listeners = []
self.addListener = self.listeners.append
self.removeListener = self.listeners.remove
self._message_count = 0
self._uid_index: list[int] = []
self._flags: dict[int, set[str]] = {}
self._cache = MessageCache(max_size=settings.imap_cache_size)
self._uid_index_built = False
def getFlags(self):
return [r"\Seen", r"\Answered", r"\Flagged", r"\Deleted", r"\Draft"]
def getUIDValidity(self):
return _UID_VALIDITY
def getMessageCount(self):
return self._message_count
def getRecentCount(self):
return 0
def getUnseenCount(self):
return 0
def isWriteable(self):
return 1
def destroy(self):
pass
def getHierarchicalDelimiter(self):
return "/"
@defer.inlineCallbacks
def requestStatus(self, names):
if not self._uid_index_built:
yield self._build_uid_index()
else:
count = yield self._refresh_count()
if count != self._message_count:
self._message_count = count
yield self._build_uid_index()
r = {}
if "MESSAGES" in names:
r["MESSAGES"] = self._message_count
if "RECENT" in names:
r["RECENT"] = self.getRecentCount()
if "UIDNEXT" in names:
r["UIDNEXT"] = self.getUIDNext()
if "UIDVALIDITY" in names:
r["UIDVALIDITY"] = self.getUIDValidity()
if "UNSEEN" in names:
r["UNSEEN"] = self.getUnseenCount()
return r
def _refresh_count(self) -> defer.Deferred:
return self._client.get_message_count(self.name)
@defer.inlineCallbacks
def _build_uid_index(self):
"""Build UID index by fetching all message IDs from backend."""
count = yield self._client.get_message_count(self.name)
self._message_count = count
_logger.info("Building UID index for %s: count=%d", self.name, count)
if count == 0:
self._uid_index = []
self._uid_index_built = True
return
uid_set = set()
batch_size = 100
offset = 0
while offset < count:
limit = min(batch_size, count - offset)
results, _ = yield self._client.get_messages(
self.name, limit, offset
)
for item in results:
item_id = item.get("id")
if item_id is not None and item_id not in uid_set:
uid_set.add(item_id)
_logger.info(
"UID index batch: offset=%d limit=%d got=%d total_uids=%d",
offset, limit, len(results), len(uid_set),
)
offset += limit
self._uid_index = sorted(uid_set)
self._uid_index_built = True
_logger.info(
"UID index built for %s: %d UIDs, range=%s..%s",
self.name, len(self._uid_index),
self._uid_index[0] if self._uid_index else "N/A",
self._uid_index[-1] if self._uid_index else "N/A",
)
def _seq_to_uid(self, seq: int) -> int | None:
"""Convert 1-based sequence number to UID."""
if 1 <= seq <= len(self._uid_index):
return self._uid_index[seq - 1]
return None
def _uid_to_seq(self, uid: int) -> int | None:
"""Convert UID to 1-based sequence number."""
idx = bisect.bisect_left(self._uid_index, uid)
if idx < len(self._uid_index) and self._uid_index[idx] == uid:
return idx + 1
return None
def _resolve_message_set(self, messages, uid: bool) -> list[int]:
"""Resolve an IMAP MessageSet to a list of UIDs."""
result_uids = []
if not self._uid_index:
return result_uids
max_uid = self._uid_index[-1]
max_seq = len(self._uid_index)
_logger.info(
"Resolving message_set: uid=%s ranges=%s max_uid=%d max_seq=%d",
uid, list(messages.ranges), max_uid, max_seq,
)
for start, end in messages.ranges:
if uid:
actual_end = end if end is not None else max_uid
for u in self._uid_index:
if start <= u <= actual_end:
result_uids.append(u)
else:
actual_end = end if end is not None else max_seq
actual_start = max(start, 1)
actual_end = min(actual_end, max_seq)
for seq in range(actual_start, actual_end + 1):
u = self._seq_to_uid(seq)
if u is not None:
result_uids.append(u)
return result_uids
@defer.inlineCallbacks
def _fetch_and_cache_messages(self, uids: list[int]):
"""Fetch uncached messages from backend in batches."""
uncached = [u for u in uids if u not in self._cache]
if not uncached:
return
uncached_set = set(uncached)
id_to_data = {}
batch_size = 50
total = self._message_count
_logger.info(
"Fetching %d uncached messages (total=%d) for %s",
len(uncached), total, self.name,
)
if total == 0:
return
fetched_ids = set()
offset = 0
while offset < total and len(fetched_ids) < len(uncached):
limit = min(batch_size, total - offset)
results, _ = yield self._client.get_messages(
self.name, limit, offset
)
for item in results:
item_id = item.get("id")
if item_id in uncached_set and item_id not in fetched_ids:
id_to_data[item_id] = item
fetched_ids.add(item_id)
if len(fetched_ids) >= len(uncached):
break
offset += limit
_logger.info(
"Fetched %d/%d messages for %s",
len(id_to_data), len(uncached), self.name,
)
for uid_val in uncached:
if uid_val in id_to_data:
item = id_to_data[uid_val]
try:
if self.name == "INBOX":
raw = item.get("raw", "")
raw = fix_mojibake(raw)
raw = clean_raw_headers(raw)
email_model = parse_email(raw)
elif self.name == "SENT":
email_model, raw = generate_email_model(item)
else:
continue
if uid_val not in self._flags:
self._flags[uid_val] = {r"\Seen"}
flags = self._flags[uid_val]
msg = SimpleMessage(
uid_val, email_model, flags=flags, raw=raw,
created_at=item.get("created_at"),
)
self._cache.put(uid_val, msg)
except Exception as e:
_logger.error(f"Failed to parse message uid={uid_val}: {e}")
@defer.inlineCallbacks
def fetch(self, messages, uid):
if not self._uid_index_built:
yield self._build_uid_index()
else:
count = yield self._refresh_count()
if count != self._message_count:
self._message_count = count
yield self._build_uid_index()
target_uids = self._resolve_message_set(messages, uid)
_logger.info(
"FETCH: uid=%s target_uids=%d message_set=%s",
uid, len(target_uids),
target_uids[:5] if len(target_uids) > 5 else target_uids,
)
if not target_uids:
return []
yield self._fetch_and_cache_messages(target_uids)
result = []
for u in target_uids:
cached = self._cache.get(u)
if cached is not None:
flags = self._flags.get(u, set())
cached._flags = flags
seq = self._uid_to_seq(u)
if seq is not None:
result.append((seq, cached))
return result
def getUID(self, message):
return message
@defer.inlineCallbacks
def store(self, messages, flags, mode, uid):
if not self._uid_index_built:
yield self._build_uid_index()
if not self._uid_index:
return {}
target_uids = self._resolve_message_set(messages, uid)
result = {}
for u in target_uids:
current_flags = self._flags.get(u, set())
if mode == 1: # +FLAGS
current_flags = current_flags | set(flags)
elif mode == -1: # -FLAGS
current_flags = current_flags - set(flags)
elif mode == 0: # FLAGS (replace)
current_flags = set(flags)
self._flags[u] = current_flags
seq = self._uid_to_seq(u)
if seq is not None:
result[seq] = current_flags
return result
@defer.inlineCallbacks
def search(self, query, uid):
if not self._uid_index_built:
yield self._build_uid_index()
results = []
for term in query:
if isinstance(term, str) and term.upper() == "ALL":
if uid:
results = list(self._uid_index)
else:
results = list(range(1, len(self._uid_index) + 1))
break
if not results:
if uid:
results = list(self._uid_index)
else:
results = list(range(1, len(self._uid_index) + 1))
return results
def getUIDNext(self):
if self._uid_index:
return self._uid_index[-1] + 1
return 1
def expunge(self):
return defer.succeed([])