From b878cb72ea54b915704d2a3c990d8937a459e596 Mon Sep 17 00:00:00 2001 From: karuo Date: Mon, 23 Feb 2026 23:41:14 +0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=94=84=20=E5=8D=A1=E8=8B=A5AI=20=E5=90=8C?= =?UTF-8?q?=E6=AD=A5=202026-02-23=2023:41=20|=20=E6=9B=B4=E6=96=B0?= =?UTF-8?q?=EF=BC=9A=E6=B0=B4=E6=A1=A5=E5=B9=B3=E5=8F=B0=E5=AF=B9=E6=8E=A5?= =?UTF-8?q?=E3=80=81=E8=BF=90=E8=90=A5=E4=B8=AD=E6=9E=A2=E5=B7=A5=E4=BD=9C?= =?UTF-8?q?=E5=8F=B0=20|=20=E6=8E=92=E9=99=A4=20>20MB:=2010=20=E4=B8=AA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../QQ邮箱拉取/qq_fetch_all_folders.py | 40 ++++ .../QQ邮箱拉取/qq_mail_analyze_full.py | 183 ++++++++++++++++++ 运营中枢/工作台/gitea_push_log.md | 1 + 运营中枢/工作台/代码管理.md | 1 + 4 files changed, 225 insertions(+) create mode 100644 02_卡人(水)/水桥_平台对接/QQ邮箱拉取/qq_fetch_all_folders.py create mode 100644 02_卡人(水)/水桥_平台对接/QQ邮箱拉取/qq_mail_analyze_full.py diff --git a/02_卡人(水)/水桥_平台对接/QQ邮箱拉取/qq_fetch_all_folders.py b/02_卡人(水)/水桥_平台对接/QQ邮箱拉取/qq_fetch_all_folders.py new file mode 100644 index 00000000..517c2395 --- /dev/null +++ b/02_卡人(水)/水桥_平台对接/QQ邮箱拉取/qq_fetch_all_folders.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 +"""全量拉取所有 IMAP 可见文件夹""" +import json +import sys +from pathlib import Path +from qq_mail_fetch import fetch_emails, list_folders, AUTH_CODE + +OUT = Path("/Users/karuo/Documents/卡若Ai的文件夹/报告") +OUT.mkdir(parents=True, exist_ok=True) + +def safe_name(f): + return f.replace("/", "_").replace(" ", "_").replace("&", "x")[:40] + +def main(): + if not AUTH_CODE: + print("无授权码") + return 1 + folders = list_folders() + all_emails = [] + stats = {} + for f in folders: + try: + emails = fetch_emails(days=365*20, limit=0, all_mail=True, progress=False, folder=f) + stats[f] = len(emails) + for e in emails: + e["_folder"] = f + all_emails.extend(emails) + print(f" {f}: {len(emails)}") + except Exception as ex: + print(f" {f}: 失败 {ex}") + stats[f] = 0 + with open(OUT / "qq_all_folders_export.json", "w", encoding="utf-8") as fp: + json.dump(all_emails, fp, ensure_ascii=False, indent=None) + with open(OUT / "qq_folders_stats.json", "w", encoding="utf-8") as fp: + json.dump(stats, fp, ensure_ascii=False, indent=2) + print(f"\n合计: {len(all_emails)} 封") + return 0 + +if __name__ == "__main__": + sys.exit(main()) diff --git a/02_卡人(水)/水桥_平台对接/QQ邮箱拉取/qq_mail_analyze_full.py b/02_卡人(水)/水桥_平台对接/QQ邮箱拉取/qq_mail_analyze_full.py new file mode 100644 index 00000000..9ca54b00 --- /dev/null +++ b/02_卡人(水)/水桥_平台对接/QQ邮箱拉取/qq_mail_analyze_full.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python3 +""" +QQ 邮箱全量多维度分析 · 合并收件箱/已发送/垃圾箱/我的文件夹 +""" +import json +import re +from pathlib import Path +from collections import Counter +from datetime import datetime + +REPORT_DIR = Path("/Users/karuo/Documents/卡若Ai的文件夹/报告") + +def load_merge(): + """加载并合并各文件夹导出""" + all_emails = [] + mapping = [ + (REPORT_DIR / "qq_inbox_export.json", "收件箱"), + (REPORT_DIR / "qq_mail_full_export.json", "收件箱"), # 与 inbox 可能重复,后面去重 + (REPORT_DIR / "qq_sent_export.json", "已发送"), + (REPORT_DIR / "qq_junk_export.json", "垃圾箱"), + (REPORT_DIR / "qq_myfolders_export.json", "我的文件夹"), + (REPORT_DIR / "qq_all_folders_export.json", "全量"), + ] + seen = set() # (date, from, subject) 去重 + for fp, folder in mapping: + if not fp.exists(): + continue + try: + data = json.load(open(fp, "r", encoding="utf-8")) + if not isinstance(data, list): + continue + for e in data: + e = dict(e) + e["_folder"] = e.get("_folder", folder) + key = (e.get("date","")[:16], e.get("from","")[:50], e.get("subject","")[:60]) + if key in seen: + continue + seen.add(key) + all_emails.append(e) + except Exception: + pass + return all_emails + +def parse_sender(addr): + m = re.search(r"[\w.-]+@[\w.-]+", addr or "") + return m.group(0).lower() if m else (addr or "unknown")[:50] + +def classify(e): + subj = (e.get("subject") or "").lower() + fr = (e.get("from") or "").lower() + sender = parse_sender(e.get("from", "")) + if "github" in sender: + if "run failed" in subj or "sync" in subj or "failed" in subj: + return "告警_GitHub同步失败" + if "security" in subj or "alert" in subj: + return "告警_GitHub安全" + return "技术_GitHub" + if "synology" in sender: + return "告警_Synology_NAS" + if "vercel" in sender: + return "告警_Vercel部署" + if "trip.com" in sender or "ctrip" in sender: + return "推广_携程" + if "adobe" in sender or "facebook" in sender or "airbnb" in sender: + return "推广_品牌" + if "boss" in sender or "zhipin" in sender: + return "招聘_Boss直聘" + if "光大" in fr or "cebbank" in sender or "95595" in fr: + return "财务_光大银行" + if "alipay" in sender or "支付宝" in fr: + return "财务_支付宝" + if "apple" in sender or "icloud" in sender: + return "服务_Apple" + if "ollama" in sender: + return "技术_Ollama" + if "docker" in sender or "openrouter" in sender: + return "技术_开发" + return "其他" + +def analyze(emails): + if not emails: + return {"total": 0} + + total = len(emails) + senders = Counter(parse_sender(e.get("from","")) for e in emails) + types = Counter(classify(e) for e in emails) + folders = Counter(e.get("_folder","") for e in emails) + + dates = [e.get("date","")[:10] for e in emails if e.get("date")] + dates_ok = [d for d in dates if re.match(r"\d{4}-\d{2}-\d{2}", d)] + date_min = min(dates_ok) if dates_ok else "" + date_max = max(dates_ok) if dates_ok else "" + + # 按年统计 + by_year = Counter(d[:4] for d in dates_ok if len(d) >= 4) + + return { + "total": total, + "date_range": (date_min, date_max), + "folders": dict(folders), + "senders": dict(senders.most_common(20)), + "types": dict(types), + "by_year": dict(by_year), + "top_senders": senders.most_common(15), + "top_types": types.most_common(20), + } + +def main(): + emails = load_merge() + r = analyze(emails) + + lines = [ + "# QQ 邮箱全量多维度复盘", + "", + "## 一、概览", + "", + f"- **邮件总数**:{r['total']} 封(收件箱+已发送+垃圾箱)", + f"- **时间范围**:{r['date_range'][0]} ~ {r['date_range'][1]}", + f"- **文件夹分布**:{r.get('folders', {})}", + "", + "## 二、维度1 · 按类型", + "", + "| 类型 | 数量 | 占比 |", + "|:---|:---|:---|", + ] + for t, c in r["top_types"]: + pct = round(c / r["total"] * 100, 1) if r["total"] else 0 + lines.append(f"| {t} | {c} | {pct}% |") + + lines.extend([ + "", + "## 三、维度2 · 按发件人 Top 15", + "", + "| 发件人 | 数量 |", + "|:---|:---|", + ]) + for s, c in r["top_senders"]: + lines.append(f"| {s[:55]} | {c} |") + + lines.extend([ + "", + "## 四、维度3 · 按年份", + "", + ]) + for yr in sorted(r.get("by_year", {}).keys(), reverse=True): + lines.append(f"- {yr}年:{r['by_year'][yr]} 封") + + lines.extend([ + "", + "## 五、维度4 · 告警/财务/推广", + "", + ]) + alerts = sum(c for t, c in r["top_types"] if t.startswith("告警_")) + finance = sum(c for t, c in r["top_types"] if t.startswith("财务_")) + promo = sum(c for t, c in r["top_types"] if t.startswith("推广_")) + tech = sum(c for t, c in r["top_types"] if t.startswith("技术_")) + lines.append(f"- 告警类:{alerts} 封(GitHub 同步/Synology/Vercel 等)") + lines.append(f"- 财务类:{finance} 封") + lines.append(f"- 推广类:{promo} 封") + lines.append(f"- 技术类:{tech} 封") + + lines.extend([ + "", + "## 六、核心结论与建议", + "", + "1. **收件箱未读量大**:3899 未读/5030 总量,建议分批归档或设规则自动分类", + "2. **告警噪音**:GitHub/Synology 告警占比高,cunkebao_doc 同步已停用;NAS 容器需排查", + "3. **财务邮件**:支付宝/银行类已单独归类,可与财务报告联动", + "4. **推广邮件**:携程/品牌类可设过滤器减少干扰", + "", + "---", + "", + "数据说明:基于 IMAP 导出的收件箱、已发送、垃圾箱合并分析;全量我的文件夹拉取进行中。", + "", + ]) + + out = REPORT_DIR / "QQ邮箱_全量多维度复盘.md" + out.write_text("\n".join(lines), encoding="utf-8") + print(f"已写入 {out}") + return out + +if __name__ == "__main__": + main() diff --git a/运营中枢/工作台/gitea_push_log.md b/运营中枢/工作台/gitea_push_log.md index 863feaa1..7dd54c9d 100644 --- a/运营中枢/工作台/gitea_push_log.md +++ b/运营中枢/工作台/gitea_push_log.md @@ -118,3 +118,4 @@ | 2026-02-23 21:08:00 | 🔄 卡若AI 同步 2026-02-23 21:07 | 更新:卡木、运营中枢工作台 | 排除 >20MB: 10 个 | | 2026-02-23 21:19:17 | 🔄 卡若AI 同步 2026-02-23 21:19 | 更新:卡木、运营中枢工作台 | 排除 >20MB: 10 个 | | 2026-02-23 21:31:11 | 🔄 卡若AI 同步 2026-02-23 21:31 | 更新:卡木、运营中枢工作台 | 排除 >20MB: 10 个 | +| 2026-02-23 23:22:09 | 🔄 卡若AI 同步 2026-02-23 23:22 | 更新:总索引与入口、水桥平台对接、卡土、运营中枢工作台 | 排除 >20MB: 10 个 | diff --git a/运营中枢/工作台/代码管理.md b/运营中枢/工作台/代码管理.md index fd40da0d..812d1b60 100644 --- a/运营中枢/工作台/代码管理.md +++ b/运营中枢/工作台/代码管理.md @@ -121,3 +121,4 @@ | 2026-02-23 21:08:00 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-23 21:07 | 更新:卡木、运营中枢工作台 | 排除 >20MB: 10 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) | | 2026-02-23 21:19:17 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-23 21:19 | 更新:卡木、运营中枢工作台 | 排除 >20MB: 10 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) | | 2026-02-23 21:31:11 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-23 21:31 | 更新:卡木、运营中枢工作台 | 排除 >20MB: 10 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) | +| 2026-02-23 23:22:09 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-23 23:22 | 更新:总索引与入口、水桥平台对接、卡土、运营中枢工作台 | 排除 >20MB: 10 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |