fetch_requests.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. imap_curator_export.py
  5. ----------------------
  6. Fetch emails via IMAP, find Steam curator key requests since a chosen date,
  7. and export results to an Excel file.
  8. Features:
  9. - Connectivity test (--test) that does NOT read/write state.
  10. - Stateful resume via UID bookmarking (JSON). Next run scans only newer UIDs.
  11. - Extracts curator/social links and requested key counts (defaults to 2 if not found).
  12. - Filters OUT reply/forward messages (replies to you), detected via headers and subject prefixes.
  13. - Outputs Excel with requested column order:
  14. 1) No. (auto-increment)
  15. 2) Mailbox Key (UID of the message)
  16. 3) Requested Key Count
  17. 4) Date (Beijing time, "YYYY-MM-DD HH:MM")
  18. 5) Curator/Name
  19. 6) Email
  20. 7) Subject
  21. 8) Curator/Social Links
  22. 9) Body (preview)
  23. - Prints a summary after export:
  24. * Total keys requested
  25. * Keys with social links present
  26. * Keys without social links
  27. USAGE
  28. -----
  29. 1) Install deps (ideally in a virtualenv):
  30. pip install -r requirements.txt
  31. 2) Set environment variables (examples for macOS/Linux bash/zsh):
  32. export IMAP_HOST="imap.gmail.com" # e.g. imap.gmail.com, outlook.office365.com
  33. export IMAP_PORT="993" # usually 993
  34. export EMAIL_USER="your_email@example.com"
  35. export EMAIL_PASS="your_app_password" # For Gmail/Outlook, use an App Password or OAuth token string
  36. export MAILBOX="INBOX" # optional, default INBOX
  37. # Start date used ONLY for the first run (no state yet). IMAP format DD-Mon-YYYY.
  38. export START_DATE="18-Oct-2025"
  39. # Optional: where to store/read bookmark state per mailbox
  40. export STATE_FILE="curator_state.json"
  41. 3) Run a connectivity test (no state read/write):
  42. python fetch_requests.py --test
  43. 4) Run extraction:
  44. python fetch_requests.py
  45. 5) Reset bookmark (force full scan from START_DATE next time):
  46. python fetch_requests.py --reset-state
  47. Output:
  48. ./curator_requests.xlsx
  49. """
  50. import os
  51. import re
  52. import sys
  53. import json
  54. import imaplib
  55. import email
  56. import argparse
  57. from email.header import decode_header, make_header
  58. from email.policy import default as default_policy
  59. from email.utils import parsedate_to_datetime
  60. from datetime import datetime, timezone
  61. from zoneinfo import ZoneInfo
  62. from typing import List, Dict, Any, Optional
  63. # Third-party
  64. from bs4 import BeautifulSoup # for HTML -> text
  65. from openpyxl import load_workbook
  66. import pandas as pd
  67. import re
  68. CURATOR_LINK_PATTERNS = [
  69. r"store\.steampowered\.com/curator/",
  70. r"steamcommunity\.com/groups/",
  71. r"steamcommunity\.com/id/",
  72. r"x\.com/|twitter\.com/",
  73. r"youtube\.com/|youtu\.be/",
  74. r"twitch\.tv/",
  75. r"discord\.gg/|discord\.com/invite/",
  76. r"facebook\.com/",
  77. r"instagram\.com/",
  78. r"tiktok\.com/",
  79. r"bilibili\.com/",
  80. r"weibo\.com/",
  81. ]
  82. # Heuristics for detecting a request for Steam keys
  83. KEY_REQUEST_PATTERNS = [
  84. r"\b(\d{1,3})\s*(?:keys?|key codes?|steam\s*keys?)\b",
  85. r"(?:需要|申请|索取|来点|要)\s*(\d{1,3})\s*(?:个)?\s*(?:key|激活码|序列号|钥匙)",
  86. r"\bup to\s*(\d{1,3})\s*keys?\b",
  87. r"\brequest(?:ing)?\s*(\d{1,3})\s*keys?\b",
  88. r"\b(\d{1,3})\s*x\s*keys?\b",
  89. r"\b(\d{1,3})-(\d{1,3})\s*keys?\b",
  90. r"\b(\d{1,3})\s*(?:steam\s*)?(?:activation\s+(?:codes?|keys?))\b",
  91. ]
  92. CURATOR_KEYWORDS = [
  93. "curator", "steam curator", "reviewer",
  94. "鉴赏家", "评测", "媒体", "KOL", "influencer", "press",
  95. "key", "keys", "激活码", "序列号", "steam key",
  96. ]
  97. # Subject prefixes indicating replies/forwards (EN/ZH)
  98. REPLY_FWD_PREFIX = re.compile(
  99. r"^\s*(re(\[\d+\])?|回复|答复|答覆|转发|fw|fwd)\s*[::]\s*",
  100. re.IGNORECASE
  101. )
  102. DEFAULT_CONFIG_PATH = os.path.join("configs", "settings.json")
  103. def load_settings(config_path: Optional[str]) -> Dict[str, Any]:
  104. path = config_path or DEFAULT_CONFIG_PATH
  105. if not os.path.isabs(path):
  106. path = os.path.abspath(path)
  107. if not os.path.exists(path):
  108. print(f"ERROR: settings file not found: {path}", file=sys.stderr)
  109. sys.exit(2)
  110. try:
  111. with open(path, "r", encoding="utf-8") as f:
  112. data = json.load(f)
  113. except Exception as e:
  114. print(f"ERROR: failed to read settings.json: {e}", file=sys.stderr)
  115. sys.exit(2)
  116. files = data.get("files", {})
  117. for key in ["excel", "state", "template", "keys", "log"]:
  118. if key in files and not os.path.isabs(files[key]):
  119. files[key] = os.path.abspath(files[key])
  120. data["files"] = files
  121. return data
  122. def ensure_parent_dir(path: str) -> None:
  123. directory = os.path.dirname(path)
  124. if directory:
  125. os.makedirs(directory, exist_ok=True)
  126. def extract_name_from_body(body: str) -> str:
  127. """
  128. 从正文中根据常见短语提取名字:
  129. 例如 "My name is Alex", "I am John", "This is Lily"。
  130. """
  131. if not body:
  132. return ""
  133. text = body.strip().replace("\r", "").replace("\n", " ")
  134. # 常见英语自我介绍
  135. patterns = [
  136. r"\bmy name is ([A-Z][a-z]+(?: [A-Z][a-z]+)?)",
  137. r"\bi am ([A-Z][a-z]+(?: [A-Z][a-z]+)?)",
  138. r"\bthis is ([A-Z][a-z]+(?: [A-Z][a-z]+)?)",
  139. ]
  140. for pat in patterns:
  141. m = re.search(pat, text, flags=re.IGNORECASE)
  142. if m:
  143. # 返回匹配到的名字(首字母大写格式化)
  144. name = m.group(1).strip()
  145. return name.title()
  146. # 末尾签名行的简易提取(如 “Thanks, Alex”)
  147. m = re.search(r"(?:regards|thanks|cheers)[,:\s]+([A-Z][a-z]+(?: [A-Z][a-z]+)?)", text, flags=re.IGNORECASE)
  148. if m:
  149. return m.group(1).strip().title()
  150. return ""
  151. def norm_email(addr: str) -> str:
  152. return (addr or "").strip().lower()
  153. def chunked(seq, n):
  154. for i in range(0, len(seq), n):
  155. yield seq[i:i+n]
  156. def mark_seen_batch(M, mailbox: str, uids: list[str], batch_size: int = 500) -> int:
  157. """
  158. 以批为单位,把同一 mailbox 中的多封邮件标记为已读。
  159. 返回成功提交 STORE 的 UID 总数(不逐一校验)。
  160. """
  161. if not uids:
  162. return 0
  163. # 切换到可写
  164. typ, _ = M.select(mailbox, readonly=False)
  165. if typ != "OK":
  166. print(f"[WARN] cannot select {mailbox} in write mode; skip mark-read")
  167. return 0
  168. total = 0
  169. for batch in chunked(uids, batch_size):
  170. # UID 序列(逗号拼接)
  171. seqset = ",".join(str(u.decode() if isinstance(u, (bytes, bytearray)) else u) for u in batch)
  172. # 先静默,减少服务器返回
  173. typ, _ = M.uid("STORE", seqset, "+FLAGS.SILENT", r"(\Seen)")
  174. if typ != "OK":
  175. # 兼容某些服务器
  176. typ, _ = M.uid("STORE", seqset, "+FLAGS", r"(\Seen)")
  177. if typ == "OK":
  178. total += len(batch)
  179. else:
  180. print(f"[WARN] batch STORE failed for {len(batch)} UIDs (len={len(seqset)})")
  181. return total
  182. def imap_login(conf: Dict[str, Any]) -> imaplib.IMAP4_SSL:
  183. host = conf.get("host")
  184. port = int(conf.get("port", 993))
  185. user = conf.get("user")
  186. pwd = conf.get("pass")
  187. if not (host and user and pwd):
  188. print("ERROR: Missing IMAP configuration in settings.json", file=sys.stderr)
  189. sys.exit(1)
  190. M = imaplib.IMAP4_SSL(host, port)
  191. M.login(user, pwd)
  192. return M
  193. def select_mailbox(M: imaplib.IMAP4_SSL, mailbox: str) -> str:
  194. typ, data = M.select(mailbox, readonly=True)
  195. if typ != "OK":
  196. raise RuntimeError(f"Cannot select mailbox {mailbox}: {typ} {data}")
  197. return mailbox
  198. def imap_date_string(start_date: str) -> str:
  199. try:
  200. datetime.strptime(start_date, "%d-%b-%Y")
  201. except Exception:
  202. print("WARNING: START_DATE is not in 'DD-Mon-YYYY' (e.g., 18-Oct-2025). Using 18-Oct-2025 by default.", file=sys.stderr)
  203. start_date = "18-Oct-2025"
  204. return start_date
  205. def load_state(path: str) -> Dict[str, Any]:
  206. if not os.path.exists(path):
  207. return {"_path": path, "mailboxes": {}}
  208. try:
  209. with open(path, "r", encoding="utf-8") as f:
  210. data = json.load(f)
  211. data["_path"] = path
  212. if "mailboxes" not in data or not isinstance(data["mailboxes"], dict):
  213. data["mailboxes"] = {}
  214. return data
  215. except Exception:
  216. return {"_path": path, "mailboxes": {}}
  217. def save_state(state: Dict[str, Any]) -> None:
  218. path = state.get("_path")
  219. if not path:
  220. return
  221. ensure_parent_dir(path)
  222. tmp = path + ".tmp"
  223. with open(tmp, "w", encoding="utf-8") as f:
  224. json.dump({k: v for k, v in state.items() if k != "_path"}, f, ensure_ascii=False, indent=2)
  225. os.replace(tmp, path)
  226. def get_mailbox_key(host: str, user: str, mailbox: str) -> str:
  227. return f"{host}|{user}|{mailbox}"
  228. def uid_search(M: imaplib.IMAP4_SSL, criterion: str) -> List[bytes]:
  229. typ, data = M.uid("search", None, criterion)
  230. if typ != "OK":
  231. raise RuntimeError(f"UID SEARCH failed: {typ} {data}")
  232. return data[0].split()
  233. def search_initial(M: imaplib.IMAP4_SSL, since_date: str) -> List[bytes]:
  234. typ, data = M.search(None, f'(SINCE "{since_date}")')
  235. if typ != "OK":
  236. raise RuntimeError(f"SEARCH failed: {typ} {data}")
  237. ids = data[0].split()
  238. uids: List[bytes] = []
  239. if not ids:
  240. return uids
  241. for i in range(0, len(ids), 500):
  242. batch = ids[i:i+500]
  243. typ2, data2 = M.fetch(b",".join(batch), "(UID)")
  244. if typ2 != "OK":
  245. continue
  246. for part in data2:
  247. if not isinstance(part, tuple):
  248. continue
  249. header = part[0].decode("utf-8", "ignore")
  250. m = re.search(r"UID\s+(\d+)", header)
  251. if m:
  252. uids.append(m.group(1).encode())
  253. return uids
  254. def decode_str(s: Optional[str]) -> str:
  255. if not s:
  256. return ""
  257. try:
  258. return str(make_header(decode_header(s)))
  259. except Exception:
  260. return s
  261. def get_address(msg: email.message.Message) -> (str, str):
  262. from_raw = msg.get("From", "")
  263. name = email.utils.parseaddr(from_raw)[0]
  264. addr = email.utils.parseaddr(from_raw)[1]
  265. return decode_str(name).strip(), addr.strip()
  266. def get_subject(msg: email.message.Message) -> str:
  267. return decode_str(msg.get("Subject", "")).strip()
  268. def get_payload_text(msg: email.message.Message) -> (str, str):
  269. plain_parts = []
  270. html_parts = []
  271. if msg.is_multipart():
  272. for part in msg.walk():
  273. ctype = part.get_content_type()
  274. disp = str(part.get_content_disposition() or "").lower()
  275. if disp == "attachment":
  276. continue
  277. try:
  278. payload = part.get_payload(decode=True)
  279. except Exception:
  280. payload = None
  281. if payload is None:
  282. continue
  283. charset = part.get_content_charset() or "utf-8"
  284. try:
  285. text = payload.decode(charset, errors="replace")
  286. except Exception:
  287. text = payload.decode("utf-8", errors="replace")
  288. if ctype == "text/plain":
  289. plain_parts.append(text)
  290. elif ctype == "text/html":
  291. html_parts.append(text)
  292. else:
  293. payload = msg.get_payload(decode=True) or b""
  294. charset = msg.get_content_charset() or "utf-8"
  295. try:
  296. text = payload.decode(charset, errors="replace")
  297. except Exception:
  298. text = payload.decode("utf-8", errors="replace")
  299. if msg.get_content_type() == "text/html":
  300. html_parts.append(text)
  301. else:
  302. plain_parts.append(text)
  303. return ("\n".join(plain_parts).strip(), "\n".join(html_parts).strip())
  304. def html_to_text(html: str) -> str:
  305. if not html:
  306. return ""
  307. soup = BeautifulSoup(html, "html.parser")
  308. for tag in soup(["script", "style"]):
  309. tag.decompose()
  310. return soup.get_text(separator="\n").strip()
  311. def extract_links(text: str) -> List[str]:
  312. if not text:
  313. return []
  314. urls = re.findall(r"https?://[^\s<>()\"\']+", text, flags=re.IGNORECASE)
  315. seen = set()
  316. out = []
  317. for u in urls:
  318. if u not in seen:
  319. seen.add(u)
  320. out.append(u)
  321. return out
  322. def filter_curator_links(urls: List[str]) -> List[str]:
  323. if not urls:
  324. return []
  325. combined = "|".join(CURATOR_LINK_PATTERNS)
  326. pat = re.compile(combined, re.IGNORECASE)
  327. return [u for u in urls if pat.search(u)]
  328. def detect_key_count(text: str) -> Optional[int]:
  329. if not text:
  330. return None
  331. best = None
  332. for pat in KEY_REQUEST_PATTERNS:
  333. m = re.search(pat, text, flags=re.IGNORECASE)
  334. if m:
  335. if m.lastindex and m.lastindex >= 2 and m.group(1) and m.group(2):
  336. try:
  337. a = int(m.group(1))
  338. b = int(m.group(2))
  339. best = max(a, b)
  340. break
  341. except Exception:
  342. continue
  343. else:
  344. nums = [int(g) for g in m.groups() if g and g.isdigit()]
  345. if nums:
  346. best = max(nums)
  347. break
  348. return best
  349. def looks_like_curator_request(subject: str, body_text: str) -> bool:
  350. blob = f"{subject}\n{body_text}".lower()
  351. return any(k.lower() in blob for k in CURATOR_KEYWORDS)
  352. def fetch_by_uid(M: imaplib.IMAP4_SSL, uid: bytes) -> email.message.Message:
  353. typ, data = M.uid("fetch", uid, "(RFC822)")
  354. if typ != "OK" or not data or not isinstance(data[0], tuple):
  355. raise RuntimeError(f"UID FETCH failed for {uid!r}: {typ} {data}")
  356. raw = data[0][1]
  357. msg = email.message_from_bytes(raw, policy=default_policy)
  358. return msg
  359. def parse_msg_date_bj(msg: email.message.Message) -> str:
  360. raw = msg.get("Date") or ""
  361. try:
  362. dt = parsedate_to_datetime(raw)
  363. if dt is None:
  364. raise ValueError("parsedate_to_datetime returned None")
  365. if dt.tzinfo is None:
  366. dt = dt.replace(tzinfo=timezone.utc)
  367. dt_cst = dt.astimezone(ZoneInfo("Asia/Shanghai"))
  368. return dt_cst.strftime("%Y-%m-%d %H:%M")
  369. except Exception:
  370. return ""
  371. def is_reply_or_forward(msg: email.message.Message, subject: str) -> bool:
  372. # Header-based
  373. if msg.get("In-Reply-To") or msg.get("References"):
  374. return True
  375. # Subject-based
  376. if REPLY_FWD_PREFIX.search(subject):
  377. return True
  378. return False
  379. def fetch_and_parse(M: imaplib.IMAP4_SSL, uid: bytes) -> Dict[str, Any]:
  380. msg = fetch_by_uid(M, uid)
  381. name, addr = get_address(msg)
  382. subject = get_subject(msg)
  383. date_local = parse_msg_date_bj(msg)
  384. reply_flag = is_reply_or_forward(msg, subject)
  385. message_id = (msg.get("Message-ID") or "").strip()
  386. plain, html = get_payload_text(msg)
  387. merged_text = plain.strip()
  388. if html and (not merged_text or len(merged_text) < 20):
  389. merged_text = html_to_text(html)
  390. links_all = extract_links(plain + "\n" + html)
  391. curator_links = filter_curator_links(links_all)
  392. key_count = detect_key_count(merged_text)
  393. if key_count is None:
  394. key_count = 2 # default when not specified
  395. return {
  396. "uid": uid.decode() if isinstance(uid, (bytes, bytearray)) else str(uid),
  397. "from_name": name or "",
  398. "from_email": addr or "",
  399. "subject": subject,
  400. "message_id": message_id,
  401. "date_local": date_local,
  402. "body_preview": (merged_text[:3000] + ("..." if len(merged_text) > 3000 else "")),
  403. "curator_links": curator_links,
  404. "key_count": int(key_count),
  405. "is_reply": reply_flag,
  406. }
  407. def connectivity_test(settings: Dict[str, Any]) -> int:
  408. try:
  409. imap_conf = settings.get("imap", {})
  410. mailbox_name = imap_conf.get("mailbox", "INBOX")
  411. M = imap_login(imap_conf)
  412. try:
  413. mailbox = select_mailbox(M, mailbox_name)
  414. uids = uid_search(M, "ALL")
  415. count = len(uids)
  416. if count == 0:
  417. print(f"[TEST] Connected to {mailbox}, but it has no messages.")
  418. return 0
  419. latest_uid = uids[-1]
  420. msg = fetch_by_uid(M, latest_uid)
  421. subject = get_subject(msg)
  422. from_name, from_addr = get_address(msg)
  423. date_local = parse_msg_date_bj(msg)
  424. print("[TEST] IMAP OK.")
  425. print(f"[TEST] Mailbox: {mailbox}")
  426. print(f"[TEST] Total messages: {count}")
  427. print(f"[TEST] Latest UID: {latest_uid.decode()}")
  428. print(f"[TEST] Latest From: {from_name} <{from_addr}>")
  429. print(f"[TEST] Latest Subject: {subject}")
  430. print(f"[TEST] Latest Date (BJ): {date_local}")
  431. return 0
  432. finally:
  433. try:
  434. M.logout()
  435. except Exception:
  436. pass
  437. except Exception as e:
  438. print(f"[TEST] IMAP failed: {e}", file=sys.stderr)
  439. return 2
  440. def run_export(settings: Dict[str, Any], reset_state: bool = False, mark_read: bool = False) -> int:
  441. files_conf = settings.get("files", {})
  442. state_path = os.path.abspath(files_conf.get("state", os.path.join("config", "curator_state.json")))
  443. out_path = os.path.abspath(files_conf.get("excel", os.path.join("records", "curator_requests.xlsx")))
  444. start_date = imap_date_string(settings.get("start_date", "18-Oct-2025"))
  445. imap_conf = settings.get("imap", {})
  446. mailbox_name = imap_conf.get("mailbox", "INBOX")
  447. host = imap_conf.get("host", "")
  448. user = imap_conf.get("user", "")
  449. min_uid = int(settings.get("min_uid", 125))
  450. M = imap_login(imap_conf)
  451. state = load_state(state_path)
  452. try:
  453. mailbox = select_mailbox(M, mailbox_name)
  454. mailbox_key = get_mailbox_key(host, user, mailbox)
  455. if reset_state and mailbox_key in state["mailboxes"]:
  456. del state["mailboxes"][mailbox_key]
  457. save_state(state)
  458. print(f"[STATE] Reset bookmark for {mailbox_key}")
  459. last_uid = None
  460. if mailbox_key in state["mailboxes"]:
  461. last_uid = state["mailboxes"][mailbox_key].get("last_uid")
  462. if last_uid:
  463. criterion = f"(UID {int(last_uid)+1}:*)"
  464. uids = uid_search(M, criterion)
  465. print(f"[SCAN] Using bookmark last_uid={last_uid}; new UIDs found: {len(uids)}")
  466. else:
  467. uids = search_initial(M, start_date)
  468. print(f"[SCAN] Initial run since {start_date}; candidate UIDs: {len(uids)}")
  469. rows: List[Dict[str, Any]] = []
  470. all_uids_for_mark: list[str] = []
  471. max_seen_uid = int(last_uid) if last_uid else 0
  472. row_no = 1
  473. best_by_email: dict[str, dict] = {}
  474. dup_skipped = 0
  475. interrupted = False
  476. try:
  477. for i, uid in enumerate(uids, 1):
  478. try:
  479. uid_int = int(uid)
  480. if uid_int < min_uid:
  481. continue
  482. rec = fetch_and_parse(M, uid)
  483. if uid_int > max_seen_uid:
  484. max_seen_uid = uid_int
  485. # Exclude replies/forwards
  486. if rec["is_reply"]:
  487. continue
  488. has_links = False
  489. if looks_like_curator_request(rec["subject"], rec["body_preview"]):
  490. has_links = bool(rec["curator_links"])
  491. curator_email = norm_email(rec["from_email"])
  492. dedup_key = curator_email if curator_email else f"uid:{rec['uid']}"
  493. # 首选:邮件头中的名字
  494. if rec["from_name"]:
  495. curator_name = rec["from_name"].strip()
  496. else:
  497. # 尝试从正文里提取名字
  498. extracted = extract_name_from_body(rec["body_preview"])
  499. if extracted:
  500. curator_name = extracted
  501. else:
  502. curator_name = "Curator"
  503. candidate = {
  504. "uid_int": int(uid),
  505. "record": {
  506. "Mailbox Key": rec["uid"], # UID
  507. "Requested Key Count": rec["key_count"],
  508. "Date": rec["date_local"],
  509. "Curator/Name": curator_name,
  510. "Email": rec["from_email"],
  511. "Subject": rec["subject"],
  512. "Curator/Social Links": ", ".join(rec["curator_links"]) if has_links else "",
  513. "Body (preview)": rec["body_preview"],
  514. "Original Message-ID": rec.get("message_id", ""),
  515. "_has_links": has_links,
  516. }
  517. }
  518. all_uids_for_mark.append(int(uid))
  519. prev = best_by_email.get(dedup_key)
  520. if prev is None or candidate["uid_int"] > prev["uid_int"]:
  521. best_by_email[dedup_key] = candidate
  522. else:
  523. dup_skipped += 1
  524. if i % 10 == 0:
  525. pct = (i / len(uids)) * 100
  526. print(f" Processed {pct:.1f}% ({i}/{len(uids)})")
  527. except Exception as e:
  528. print(f"[WARN] Failed to parse UID {uid!r}: {e}", file=sys.stderr)
  529. continue
  530. except KeyboardInterrupt:
  531. interrupted = True
  532. print("\n[INTERRUPTED] Stopping scan early. Partial results will be written.")
  533. if mark_read:
  534. done = mark_seen_batch(M, mailbox, all_uids_for_mark, batch_size=500)
  535. print(f"[INFO] Marked {done} message(s) as read in batches.")
  536. rows = []
  537. row_no = 1
  538. selected_uids_for_mark = []
  539. for _, v in best_by_email.items():
  540. rec = v["record"]
  541. rows.append({
  542. "No.": row_no,
  543. "Mailbox Key": rec["Mailbox Key"],
  544. "Requested Key Count": rec["Requested Key Count"],
  545. "Date": rec["Date"],
  546. "Curator/Name": rec["Curator/Name"],
  547. "Email": rec["Email"],
  548. "Subject": rec["Subject"],
  549. "Curator/Social Links": rec["Curator/Social Links"],
  550. "Body (preview)": rec["Body (preview)"],
  551. "Original Message-ID": rec["Original Message-ID"],
  552. "_has_links": rec["_has_links"],
  553. })
  554. selected_uids_for_mark.append(rec["Mailbox Key"])
  555. row_no += 1
  556. # Save bookmark even if no rows matched, so daily runs skip already-seen messages
  557. state["mailboxes"][mailbox_key] = {"last_uid": str(max_seen_uid)}
  558. save_state(state)
  559. print(f"[STATE] Updated last_uid={max_seen_uid} for {mailbox_key}")
  560. columns = [
  561. "No.", "Mailbox Key", "Requested Key Count", "Date", "Curator/Name",
  562. "Email", "Subject", "Curator/Social Links", "Body (preview)", "Original Message-ID"
  563. ]
  564. if not rows:
  565. print("No curator key requests matched the filters.")
  566. df = pd.DataFrame(columns=columns)
  567. total_keys = with_links = without_links = 0
  568. else:
  569. df = pd.DataFrame(rows)
  570. df["Requested Key Count"] = pd.to_numeric(df["Requested Key Count"], errors="coerce").fillna(0).astype(int)
  571. total_keys = int(df["Requested Key Count"].sum())
  572. with_links = int(df.loc[df["_has_links"], "Requested Key Count"].sum()) if "_has_links" in df.columns else 0
  573. without_links = total_keys - with_links
  574. # drop helper
  575. if "_has_links" in df.columns:
  576. df = df.drop(columns=["_has_links"])
  577. for col in columns:
  578. if col not in df.columns:
  579. df[col] = ""
  580. df = df[columns]
  581. ensure_parent_dir(out_path)
  582. date_tag = datetime.now(ZoneInfo("Asia/Shanghai")).strftime("%Y%m%d")
  583. sheet_prefix = f"curtor_{date_tag}"
  584. existing_sheets = set()
  585. if os.path.exists(out_path):
  586. try:
  587. wb = load_workbook(out_path, read_only=True)
  588. existing_sheets = set(wb.sheetnames)
  589. wb.close()
  590. except Exception:
  591. existing_sheets = set()
  592. suffix = 1
  593. while True:
  594. sheet_name = f"{sheet_prefix}{suffix:02d}"
  595. if sheet_name not in existing_sheets:
  596. break
  597. suffix += 1
  598. writer_args: Dict[str, Any] = {"engine": "openpyxl"}
  599. if os.path.exists(out_path):
  600. writer_args["mode"] = "a"
  601. with pd.ExcelWriter(out_path, **writer_args) as writer:
  602. df.to_excel(writer, sheet_name=sheet_name, index=False)
  603. book = writer.book
  604. if sheet_name in book.sheetnames and book.sheetnames[0] != sheet_name:
  605. ws = book[sheet_name]
  606. sheets = book._sheets # type: ignore[attr-defined]
  607. sheets.insert(0, sheets.pop(sheets.index(ws)))
  608. # Summary printout
  609. print("\n=== SUMMARY ===")
  610. print(f"Total requested keys: {total_keys}")
  611. print(f"With social links: {with_links}")
  612. print(f"Without social links: {without_links}")
  613. if interrupted:
  614. print("[INTERRUPTED] Export stopped early; only the processed portion is included.")
  615. print(f"\nExported {len(df)} row(s) to {out_path}")
  616. print("\nDRY-RUN 命令:")
  617. print(f" python send_keys.py \\")
  618. print(f" --excel {os.path.basename(out_path)} \\")
  619. print(f" --sheet {sheet_name} \\")
  620. print(f" --template \"email_template.html\" \\")
  621. print(f" --keys steam_key.txt \\")
  622. print(f" --out send_log.xlsx \\")
  623. print(f" --subject \"Steam Keys for Such A Guy - \" \\")
  624. print(f" --dry-run")
  625. print("\nTEST 命令:")
  626. print(f" python send_keys.py \\")
  627. print(f" --excel {os.path.basename(out_path)} \\")
  628. print(f" --sheet {sheet_name} \\")
  629. print(f" --template \"email_template.html\" \\")
  630. print(f" --keys steam_key.txt \\")
  631. print(f" --out send_log.xlsx \\")
  632. print(f" --subject \"Steam Keys for Such A Guy -\" \\")
  633. print(f" --test \\")
  634. print(f" --no-consume --no-sentemail")
  635. print("\n正式执行命令:")
  636. print(f" python send_keys.py \\")
  637. print(f" --excel {os.path.basename(out_path)} \\")
  638. print(f" --sheet {sheet_name} \\")
  639. print(f" --template \"email_template.html\" \\")
  640. print(f" --keys steam_key.txt \\")
  641. print(f" --out send_log.xlsx \\")
  642. print(f" --subject \"Steam Keys for Such A Guy -\" \\")
  643. print(f" --mark-answered")
  644. return 0
  645. finally:
  646. try:
  647. M.logout()
  648. except Exception:
  649. pass
  650. def parse_args(argv: Optional[List[str]] = None) -> argparse.Namespace:
  651. p = argparse.ArgumentParser(description="Export Steam curator key requests to Excel via IMAP.")
  652. p.add_argument("--config", help="Path to settings.json (default: configs/settings.json).")
  653. p.add_argument("--test", action="store_true", help="Run a quick IMAP connectivity test and exit (does not read/write state).")
  654. p.add_argument("--reset-state", action="store_true", help="Reset stored UID bookmark before running.")
  655. p.add_argument("--mark-read", action="store_true", help="After exporting, mark those emails as read on the IMAP server.")
  656. return p.parse_args(argv)
  657. def main(argv: Optional[List[str]] = None) -> int:
  658. args = parse_args(argv)
  659. config_path = args.config or os.environ.get("SETTINGS_FILE") or DEFAULT_CONFIG_PATH
  660. settings = load_settings(config_path)
  661. if args.test:
  662. return connectivity_test(settings) # no state read/write
  663. return run_export(settings=settings, reset_state=args.reset_state, mark_read=args.mark_read)
  664. if __name__ == "__main__":
  665. raise SystemExit(main())