From 2b1460b70d5b116be30b24593c747d8f0cea6529 Mon Sep 17 00:00:00 2001 From: karuo Date: Wed, 25 Feb 2026 11:52:36 +0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=94=84=20=E5=8D=A1=E8=8B=A5AI=20=E5=90=8C?= =?UTF-8?q?=E6=AD=A5=202026-02-25=2011:52=20|=20=E6=9B=B4=E6=96=B0?= =?UTF-8?q?=EF=BC=9A=E6=B0=B4=E6=BA=AA=E6=95=B4=E7=90=86=E5=BD=92=E6=A1=A3?= =?UTF-8?q?=E3=80=81=E8=BF=90=E8=90=A5=E4=B8=AD=E6=9E=A2=E3=80=81=E8=BF=90?= =?UTF-8?q?=E8=90=A5=E4=B8=AD=E6=9E=A2=E5=B7=A5=E4=BD=9C=E5=8F=B0=20|=20?= =?UTF-8?q?=E6=8E=92=E9=99=A4=20>20MB:=2013=20=E4=B8=AA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../水溪_整理归档/记忆系统/README.md | 9 + .../水溪_整理归档/记忆系统/collect_chat_daily.py | 172 ++++++++++++++++- .../水溪_整理归档/记忆系统/memory_watchdog.py | 125 ++++++++++++ .../记忆系统/structured/agent_results.json | 4 +- .../记忆系统/structured/daily_digest.md | 14 ++ .../记忆系统/structured/last_chat_collect_date.txt | 2 +- .../记忆系统/structured/memory_health.json | 10 + .../记忆系统/structured/watchdog_report.json | 7 + .../记忆系统/structured/watchdog_state.json | 5 + 运营中枢/scripts/karuo_ai_gateway/main.py | 180 ++++++++++++------ 运营中枢/工作台/gitea_push_log.md | 1 + 运营中枢/工作台/代码管理.md | 1 + 12 files changed, 459 insertions(+), 71 deletions(-) create mode 100644 02_卡人(水)/水溪_整理归档/记忆系统/memory_watchdog.py create mode 100644 02_卡人(水)/水溪_整理归档/记忆系统/structured/daily_digest.md create mode 100644 02_卡人(水)/水溪_整理归档/记忆系统/structured/memory_health.json create mode 100644 02_卡人(水)/水溪_整理归档/记忆系统/structured/watchdog_report.json create mode 100644 02_卡人(水)/水溪_整理归档/记忆系统/structured/watchdog_state.json diff --git a/02_卡人(水)/水溪_整理归档/记忆系统/README.md b/02_卡人(水)/水溪_整理归档/记忆系统/README.md index ac8ab188..edc95cf8 100644 --- a/02_卡人(水)/水溪_整理归档/记忆系统/README.md +++ b/02_卡人(水)/水溪_整理归档/记忆系统/README.md @@ -21,6 +21,9 @@ | `structured/skills_registry.json` | 全部 38 个 SKILL 的结构化索引,供程序化路由 | | `structured/agent_results.json` | Agent 对话成果追踪表 | | `structured/daily_digest.md` | 每日自动生成的成果摘要 | +| `structured/processed_sessions.json` | 对话采集幂等游标(避免重复归档) | +| `structured/memory_health.json` | 记忆采集健康指标(扫描/新增/跳过/脱敏) | +| `structured/watchdog_report.json` | 记忆系统巡检结果(告警前置状态) | | `structured/weekly_report_*.md` | 每周优化审计报告 | --- @@ -29,14 +32,17 @@ | 脚本 | 用途 | 频率 | |:---|:---|:---| +| `collect_chat_daily.py` | 每日对话归档(幂等去重 + 脱敏) | 每日 | | `collect_daily.py` | 扫描当日活跃 Agent,生成摘要 | 每日 | | `weekly_optimize.py` | SKILL 质量审计 + 经验库整理 | 每周 | +| `memory_watchdog.py` | 记忆系统健康巡检(连续2次异常才告警) | 每2小时 | ### 使用方式 ```bash # 每日收集 cd /Users/karuo/Documents/个人/卡若AI/02_卡人(水)/水溪_整理归档/记忆系统 +python collect_chat_daily.py python collect_daily.py # 每周优化 @@ -44,6 +50,9 @@ python weekly_optimize.py # 仅审计 SKILL 质量 python weekly_optimize.py --audit + +# 健康巡检 +python memory_watchdog.py ``` --- diff --git a/02_卡人(水)/水溪_整理归档/记忆系统/collect_chat_daily.py b/02_卡人(水)/水溪_整理归档/记忆系统/collect_chat_daily.py index 12093871..44ec7f86 100644 --- a/02_卡人(水)/水溪_整理归档/记忆系统/collect_chat_daily.py +++ b/02_卡人(水)/水溪_整理归档/记忆系统/collect_chat_daily.py @@ -15,6 +15,8 @@ import re import shutil import sys +import json +import hashlib from collections import defaultdict from datetime import datetime from pathlib import Path @@ -24,6 +26,8 @@ CURSOR_PROJECTS = Path.home() / ".cursor" / "projects" DOC_LIB = KARUO_AI_ROOT / "02_卡人(水)" / "水溪_整理归档" / "对话归档" STRUCTURED = KARUO_AI_ROOT / "02_卡人(水)" / "水溪_整理归档" / "记忆系统" / "structured" STAMP_FILE = STRUCTURED / "last_chat_collect_date.txt" +PROCESSED_FILE = STRUCTURED / "processed_sessions.json" +HEALTH_FILE = STRUCTURED / "memory_health.json" # 项目目录名 -> 工作台中文名(未列出的用目录名) PROJECT_TO_WORKSPACE_CN = { @@ -66,6 +70,10 @@ KEYWORD_TO_SKILL = [ def today(): return datetime.now().strftime("%Y-%m-%d") + +def now_ts(): + return datetime.now().strftime("%Y-%m-%d %H:%M:%S") + def already_done_today(): if not STAMP_FILE.exists(): return False @@ -96,6 +104,78 @@ def sanitize_filename(s): s = re.sub(r'[\\/:*?"<>|\n\r\t]+', "_", s) return s.strip("._ ")[:80] or "未命名" + +def load_processed(): + if not PROCESSED_FILE.exists(): + return {"version": "1.0", "updated": "", "items": {}} + try: + return json.loads(PROCESSED_FILE.read_text(encoding="utf-8")) + except Exception: + return {"version": "1.0", "updated": "", "items": {}} + + +def save_processed(state): + state["updated"] = now_ts() + STRUCTURED.mkdir(parents=True, exist_ok=True) + PROCESSED_FILE.write_text(json.dumps(state, ensure_ascii=False, indent=2), encoding="utf-8") + + +def file_signature(path: Path): + """ + 幂等签名:路径 + 大小 + 修改时间 + 前后各 8KB 的 hash。 + 避免重复复制同一份对话。 + """ + try: + st = path.stat() + size = st.st_size + mtime = int(st.st_mtime) + with path.open("rb") as f: + head = f.read(8192) + if size > 8192: + f.seek(max(0, size - 8192)) + tail = f.read(8192) + else: + tail = b"" + h = hashlib.sha1(head + b"||" + tail).hexdigest() + return f"{size}:{mtime}:{h}" + except Exception: + return "" + + +def redact_sensitive(text): + """ + 归档前脱敏,避免把明文密钥/密码写入记忆系统。 + 返回:(脱敏文本, 命中次数) + """ + rules = [ + # 常见 token / key + (re.compile(r"ghp_[A-Za-z0-9]{20,}"), "ghp_***"), + (re.compile(r"AKID[0-9A-Za-z]{16,}"), "AKID***"), + (re.compile(r"LTAI[0-9A-Za-z]{12,}"), "LTAI***"), + # key=value / key: value 形式 + (re.compile(r"(?i)\b(api[_-]?key|token|secret|password|passwd|pwd)\b\s*[:=]\s*([^\s,;\"']+)"), r"\1=***"), + # URL 凭证 user:pass@ + (re.compile(r"://([^:/\s]+):([^@/\s]+)@"), r"://\1:***@"), + ] + + hits = 0 + out = text + for pattern, repl in rules: + out, n = pattern.subn(repl, out) + hits += n + return out, hits + + +def copy_redacted(src: Path, dest: Path): + try: + raw = src.read_text(encoding="utf-8", errors="ignore") + except Exception: + shutil.copy2(src, dest) + return 0 + safe, hits = redact_sensitive(raw) + dest.write_text(safe, encoding="utf-8") + return hits + def collect_all_transcripts(): out = [] for proj_dir in sorted(CURSOR_PROJECTS.iterdir()): @@ -131,7 +211,7 @@ def match_skill(sample): return KARUO_AI_ROOT / skill_path return None -def process_items_for_date(items, day_iso, write_stamp=True): +def process_items_for_date(items, day_iso, processed_state, write_stamp=True): """对给定日期对应的 items 做复制与汇总。day_iso=YYYY-MM-DD。""" STRUCTURED.mkdir(parents=True, exist_ok=True) day_dir = DOC_LIB / day_iso @@ -140,7 +220,18 @@ def process_items_for_date(items, day_iso, write_stamp=True): by_workspace = defaultdict(list) # 工作台中文名 -> [(中文名, 文件名)] copied_skill = set() + copied_count = 0 + skipped_count = 0 + redacted_hits = 0 + for item in items: + sig = file_signature(item["path"]) + state_key = f"{item['project']}::{item['name']}" + old_sig = (processed_state.get("items") or {}).get(state_key, "") + if sig and old_sig == sig: + skipped_count += 1 + continue + cn_title = get_chinese_title(item["path"]) safe_title = sanitize_filename(cn_title) workspace_cn_name = workspace_cn(item["project"]) @@ -150,11 +241,12 @@ def process_items_for_date(items, day_iso, write_stamp=True): proj_sub.mkdir(parents=True, exist_ok=True) dest_lib = proj_sub / dest_name try: - shutil.copy2(item["path"], dest_lib) + redacted_hits += copy_redacted(item["path"], dest_lib) except Exception as e: print(f"[collect_chat_daily] 复制失败 {item['path']}: {e}") continue by_workspace[workspace_cn_name].append((cn_title, dest_name)) + copied_count += 1 sample = sample_content(item["path"]) skill_dir = match_skill(sample) @@ -162,11 +254,14 @@ def process_items_for_date(items, day_iso, write_stamp=True): skill_dir.mkdir(parents=True, exist_ok=True) dest_skill = skill_dir / dest_name try: - shutil.copy2(item["path"], dest_skill) + redacted_hits += copy_redacted(item["path"], dest_skill) copied_skill.add(str(skill_dir)) except Exception: pass + if sig: + processed_state.setdefault("items", {})[state_key] = sig + # 本日汇总:中文名称 | 所属工作台 | 对话文件 summary_lines = [ f"# {day_iso} 对话文档汇总", @@ -174,7 +269,10 @@ def process_items_for_date(items, day_iso, write_stamp=True): "> 来源:Cursor Agent 对话记录;名称取自首条用户消息,按工作台归类。", "", "## 统计", - f"- 对话数:{len(items)}", + f"- 扫描对话数:{len(items)}", + f"- 新增归档:{copied_count}", + f"- 幂等跳过:{skipped_count}", + f"- 脱敏命中:{redacted_hits}", f"- 工作台数:{len(by_workspace)}", f"- 已归类到 Skill:{len(copied_skill)} 个目录", "", @@ -190,27 +288,82 @@ def process_items_for_date(items, day_iso, write_stamp=True): (day_dir / "本日汇总.md").write_text("\n".join(summary_lines), encoding="utf-8") + HEALTH_FILE.write_text( + json.dumps( + { + "updated": now_ts(), + "date": day_iso, + "scan_total": len(items), + "copied_new": copied_count, + "skipped_idempotent": skipped_count, + "redacted_hits": redacted_hits, + "skill_dirs": len(copied_skill), + "status": "ok", + }, + ensure_ascii=False, + indent=2, + ), + encoding="utf-8", + ) + if write_stamp: STAMP_FILE.write_text(today(), encoding="utf-8") - return len(items) + return copied_count def run_daily_only(): """仅收集今日有修改的对话,每日一次。""" + processed_state = load_processed() if already_done_today(): + HEALTH_FILE.write_text( + json.dumps( + { + "updated": now_ts(), + "date": today(), + "scan_total": 0, + "copied_new": 0, + "skipped_idempotent": 0, + "redacted_hits": 0, + "skill_dirs": 0, + "status": "already_done", + }, + ensure_ascii=False, + indent=2, + ), + encoding="utf-8", + ) print(f"[collect_chat_daily] 今日({today()})已执行过,跳过。") return 0 items = [x for x in collect_all_transcripts() if x["modified"] == today()] if not items: STRUCTURED.mkdir(parents=True, exist_ok=True) STAMP_FILE.write_text(today(), encoding="utf-8") + HEALTH_FILE.write_text( + json.dumps( + { + "updated": now_ts(), + "date": today(), + "scan_total": 0, + "copied_new": 0, + "skipped_idempotent": 0, + "redacted_hits": 0, + "skill_dirs": 0, + "status": "no_new_items", + }, + ensure_ascii=False, + indent=2, + ), + encoding="utf-8", + ) print("[collect_chat_daily] 今日无新对话,已标记完成。") return 0 - n = process_items_for_date(items, today(), write_stamp=True) - print(f"[collect_chat_daily] 完成:{n} 个对话已复制到 对话文档库/{today()}/(中文名称+工作台),本日仅执行一次。") + n = process_items_for_date(items, today(), processed_state, write_stamp=True) + save_processed(processed_state) + print(f"[collect_chat_daily] 完成:新增归档 {n} 个对话(幂等去重已生效),目录 对话文档库/{today()}/。") return 0 def run_all_history(): """全量历史按修改日期分类,每个日期生成目录与本日汇总。""" + processed_state = load_processed() items = collect_all_transcripts() by_date = defaultdict(list) for x in items: @@ -219,10 +372,11 @@ def run_all_history(): total = 0 for day_iso in sorted(by_date.keys()): day_items = by_date[day_iso] - n = process_items_for_date(day_items, day_iso, write_stamp=False) + n = process_items_for_date(day_items, day_iso, processed_state, write_stamp=False) total += n - print(f" {day_iso}: {n} 个对话 -> 对话文档库/{day_iso}/") + print(f" {day_iso}: 新增归档 {n} 个对话 -> 对话文档库/{day_iso}/") + save_processed(processed_state) print(f"[collect_chat_daily] 全量完成:共 {total} 个对话,按日期写入 {len(by_date)} 天。") return 0 diff --git a/02_卡人(水)/水溪_整理归档/记忆系统/memory_watchdog.py b/02_卡人(水)/水溪_整理归档/记忆系统/memory_watchdog.py new file mode 100644 index 00000000..34acd224 --- /dev/null +++ b/02_卡人(水)/水溪_整理归档/记忆系统/memory_watchdog.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 +""" +记忆系统 Watchdog(轻量版) +- 检查每日收集是否超时未跑 +- 检查 memory_health.json / daily_digest.md / agent_results.json 是否过旧 +- 连续 2 次异常才触发告警状态(降噪) +""" + +from __future__ import annotations + +import json +from datetime import datetime +from pathlib import Path + +ROOT = Path("/Users/karuo/Documents/个人/卡若AI/02_卡人(水)/水溪_整理归档/记忆系统") +STRUCTURED = ROOT / "structured" + +STAMP = STRUCTURED / "last_chat_collect_date.txt" +HEALTH = STRUCTURED / "memory_health.json" +DAILY_DIGEST = STRUCTURED / "daily_digest.md" +AGENT_RESULTS = STRUCTURED / "agent_results.json" +STATE = STRUCTURED / "watchdog_state.json" +REPORT = STRUCTURED / "watchdog_report.json" + + +def now(): + return datetime.now() + + +def load_json(path: Path, default): + if not path.exists(): + return default + try: + return json.loads(path.read_text(encoding="utf-8")) + except Exception: + return default + + +def file_age_hours(path: Path): + if not path.exists(): + return None + dt = datetime.fromtimestamp(path.stat().st_mtime) + return (now() - dt).total_seconds() / 3600.0 + + +def check(): + issues = [] + today = now().strftime("%Y-%m-%d") + + # 1) 每日 stamp + if not STAMP.exists(): + issues.append("缺少 last_chat_collect_date.txt") + else: + val = STAMP.read_text(encoding="utf-8").strip() + if val != today: + issues.append(f"每日收集未执行(stamp={val}, today={today})") + + # 2) memory_health + if not HEALTH.exists(): + issues.append("缺少 memory_health.json") + else: + age = file_age_hours(HEALTH) + if age is not None and age > 30: + issues.append(f"memory_health.json 过旧({age:.1f}h)") + + # 3) daily digest + if not DAILY_DIGEST.exists(): + issues.append("缺少 daily_digest.md") + else: + age = file_age_hours(DAILY_DIGEST) + if age is not None and age > 30: + issues.append(f"daily_digest.md 过旧({age:.1f}h)") + + # 4) agent results + if not AGENT_RESULTS.exists(): + issues.append("缺少 agent_results.json") + else: + age = file_age_hours(AGENT_RESULTS) + if age is not None and age > 36: + issues.append(f"agent_results.json 过旧({age:.1f}h)") + + return issues + + +def main(): + STRUCTURED.mkdir(parents=True, exist_ok=True) + issues = check() + + state = load_json(STATE, {"consecutive_anomalies": 0, "last_status": "unknown", "updated": ""}) + if issues: + state["consecutive_anomalies"] = int(state.get("consecutive_anomalies", 0)) + 1 + status = "anomaly" + else: + state["consecutive_anomalies"] = 0 + status = "ok" + + state["last_status"] = status + state["updated"] = now().strftime("%Y-%m-%d %H:%M:%S") + STATE.write_text(json.dumps(state, ensure_ascii=False, indent=2), encoding="utf-8") + + alert = state["consecutive_anomalies"] >= 2 + report = { + "updated": now().strftime("%Y-%m-%d %H:%M:%S"), + "status": status, + "issues": issues, + "consecutive_anomalies": state["consecutive_anomalies"], + "alert": alert, + } + REPORT.write_text(json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8") + + if alert: + print(f"[watchdog] ALERT 连续异常 {state['consecutive_anomalies']} 次:") + for i in issues: + print(f" - {i}") + elif issues: + print("[watchdog] 本次异常(未到告警阈值):") + for i in issues: + print(f" - {i}") + else: + print("[watchdog] OK 记忆系统健康。") + + +if __name__ == "__main__": + main() + diff --git a/02_卡人(水)/水溪_整理归档/记忆系统/structured/agent_results.json b/02_卡人(水)/水溪_整理归档/记忆系统/structured/agent_results.json index a75bfc2f..a29aa6ff 100644 --- a/02_卡人(水)/水溪_整理归档/记忆系统/structured/agent_results.json +++ b/02_卡人(水)/水溪_整理归档/记忆系统/structured/agent_results.json @@ -1,6 +1,6 @@ { "version": "1.0", - "updated": "2026-02-13", + "updated": "2026-02-25", "description": "Agent对话成果追踪(结构化版本)", "results": [ { @@ -93,4 +93,4 @@ "priority": "critical" } ] -} +} \ No newline at end of file diff --git a/02_卡人(水)/水溪_整理归档/记忆系统/structured/daily_digest.md b/02_卡人(水)/水溪_整理归档/记忆系统/structured/daily_digest.md new file mode 100644 index 00000000..f87c1c1b --- /dev/null +++ b/02_卡人(水)/水溪_整理归档/记忆系统/structured/daily_digest.md @@ -0,0 +1,14 @@ +# 卡若AI 每日成果摘要 + +> 自动生成 | 最后更新:2026-02-25 + +--- + +## 2026-02-25 活跃 Agent(0 个) + +| Agent | 文件大小 | 路径 | +|:---|:---|:---| + +--- + +> 提示:打开对应 Agent 提取有价值的成果,写入对应 SKILL 或经验库。 diff --git a/02_卡人(水)/水溪_整理归档/记忆系统/structured/last_chat_collect_date.txt b/02_卡人(水)/水溪_整理归档/记忆系统/structured/last_chat_collect_date.txt index e1186354..91b34644 100644 --- a/02_卡人(水)/水溪_整理归档/记忆系统/structured/last_chat_collect_date.txt +++ b/02_卡人(水)/水溪_整理归档/记忆系统/structured/last_chat_collect_date.txt @@ -1 +1 @@ -2026-02-24 \ No newline at end of file +2026-02-25 \ No newline at end of file diff --git a/02_卡人(水)/水溪_整理归档/记忆系统/structured/memory_health.json b/02_卡人(水)/水溪_整理归档/记忆系统/structured/memory_health.json new file mode 100644 index 00000000..04964aa3 --- /dev/null +++ b/02_卡人(水)/水溪_整理归档/记忆系统/structured/memory_health.json @@ -0,0 +1,10 @@ +{ + "updated": "2026-02-25 11:52:16", + "date": "2026-02-25", + "scan_total": 0, + "copied_new": 0, + "skipped_idempotent": 0, + "redacted_hits": 0, + "skill_dirs": 0, + "status": "already_done" +} \ No newline at end of file diff --git a/02_卡人(水)/水溪_整理归档/记忆系统/structured/watchdog_report.json b/02_卡人(水)/水溪_整理归档/记忆系统/structured/watchdog_report.json new file mode 100644 index 00000000..d1ec5e5c --- /dev/null +++ b/02_卡人(水)/水溪_整理归档/记忆系统/structured/watchdog_report.json @@ -0,0 +1,7 @@ +{ + "updated": "2026-02-25 11:52:16", + "status": "ok", + "issues": [], + "consecutive_anomalies": 0, + "alert": false +} \ No newline at end of file diff --git a/02_卡人(水)/水溪_整理归档/记忆系统/structured/watchdog_state.json b/02_卡人(水)/水溪_整理归档/记忆系统/structured/watchdog_state.json new file mode 100644 index 00000000..e9f136dd --- /dev/null +++ b/02_卡人(水)/水溪_整理归档/记忆系统/structured/watchdog_state.json @@ -0,0 +1,5 @@ +{ + "consecutive_anomalies": 0, + "last_status": "ok", + "updated": "2026-02-25 11:52:16" +} \ No newline at end of file diff --git a/运营中枢/scripts/karuo_ai_gateway/main.py b/运营中枢/scripts/karuo_ai_gateway/main.py index 17456c69..710c4f53 100644 --- a/运营中枢/scripts/karuo_ai_gateway/main.py +++ b/运营中枢/scripts/karuo_ai_gateway/main.py @@ -266,6 +266,77 @@ class OpenAIResponsesRequest(BaseModel): stream: Optional[bool] = None +_CONTEXT_TAG_NOISE = ( + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", +) + +_CONTEXT_TEXT_NOISE = ( + "user currently doesn't have any open files in their ide", + "note: these files may or may not be relevant", + "workspace paths:", + "is directory a git repo:", +) + + +def _looks_like_context_noise(text: str) -> bool: + s = (text or "").strip() + if not s: + return True + low = s.lower() + if any(tag in s for tag in _CONTEXT_TAG_NOISE): + return True + if any(tok in low for tok in _CONTEXT_TEXT_NOISE): + return True + return False + + +def _content_to_text(content: Any) -> str: + """ + 从 OpenAI 兼容 content 中提取“可对话文本”。 + 仅接受 text/input_text/output_text,忽略 image/file/tool 等部分。 + """ + if isinstance(content, str): + return content.strip() + + if isinstance(content, dict): + # 兼容 {"type":"input_text","text":"..."} / {"text":"..."} + t = str(content.get("type", "")).strip().lower() + txt = str(content.get("text", "")).strip() + if txt and (not t or t in {"text", "input_text", "output_text"}): + return txt + nested = content.get("content") + if nested is not None: + return _content_to_text(nested) + return "" + + if isinstance(content, list): + chunks: List[str] = [] + for part in content: + if isinstance(part, str): + s = part.strip() + if s: + chunks.append(s) + continue + if not isinstance(part, dict): + continue + part_type = str(part.get("type", "")).strip().lower() + part_text = str(part.get("text", "")).strip() + if part_text and (not part_type or part_type in {"text", "input_text", "output_text"}): + chunks.append(part_text) + return "\n".join(chunks).strip() + + return "" + + def _messages_to_prompt(messages: List[Dict[str, Any]]) -> str: """ 优先取最后一条 user 消息;否则拼接全部文本。 @@ -274,37 +345,26 @@ def _messages_to_prompt(messages: List[Dict[str, Any]]) -> str: chunks: List[str] = [] for m in messages or []: role = str(m.get("role", "")).strip() - content = m.get("content", "") - if isinstance(content, list): - content = "\n".join( - str(x.get("text", "")) for x in content if isinstance(x, dict) and x.get("type") == "text" - ) - content = str(content) - if role and content: + content = _content_to_text(m.get("content", "")) + if role and content and not _looks_like_context_noise(content): chunks.append(f"{role}: {content}") - if role == "user" and content: + if role == "user" and content and not _looks_like_context_noise(content): last_user = content return (last_user or ("\n".join(chunks))).strip() -def _deep_extract_text(node: Any, out: List[str]) -> None: - """ - 从任意 JSON 结构里尽量提取可读文本。 - """ - if isinstance(node, str): - s = node.strip() - if s: - out.append(s) - return +def _has_attachment_payload(node: Any) -> bool: if isinstance(node, dict): - # 优先常见字段 - for k in ("text", "input_text", "output_text", "content"): - if k in node: - _deep_extract_text(node.get(k), out) - return + keys = {str(k).lower() for k in node.keys()} + if keys.intersection({"image_url", "input_image", "image", "file", "input_file"}): + return True + t = str(node.get("type", "")).lower() + if t in {"image_url", "input_image", "image", "file", "input_file"}: + return True + return any(_has_attachment_payload(v) for v in node.values()) if isinstance(node, list): - for it in node: - _deep_extract_text(it, out) + return any(_has_attachment_payload(x) for x in node) + return False async def _fallback_prompt_from_request_body(request: Request) -> str: @@ -319,53 +379,55 @@ async def _fallback_prompt_from_request_body(request: Request) -> str: except Exception: return "" - texts: List[str] = [] - - # 优先取 messages + # 优先 messages(只取 user) + user_texts: List[str] = [] msgs = data.get("messages") if isinstance(msgs, list): - _deep_extract_text(msgs, texts) + for m in msgs: + if not isinstance(m, dict): + continue + if str(m.get("role", "")).strip().lower() != "user": + continue + txt = _content_to_text(m.get("content", "")) + if txt and not _looks_like_context_noise(txt): + user_texts.append(txt) + if user_texts: + return user_texts[-1] - # 兼容 responses API 风格 input - if not texts: - _deep_extract_text(data.get("input"), texts) - - prompt = "\n".join(t for t in texts if t).strip() - if prompt: - return prompt + # 兼容 responses API:input + input_prompt = _responses_input_to_prompt(data.get("input")) + if input_prompt and not _looks_like_context_noise(input_prompt): + return input_prompt # 只有附件时兜底,避免 empty messages - body_str = json.dumps(data, ensure_ascii=False) - if any(k in body_str for k in ["image_url", "input_image", "image", "file"]): + if _has_attachment_payload(data): return "[用户发送了附件,请结合上下文处理]" return "" def _template_reply(prompt: str, matched_skill: str, skill_path: str, error: str = "") -> str: - """未配置 LLM 或调用失败时返回模板回复(仍含复盘格式)。""" - err = f"\n(当前未配置 OPENAI_API_KEY 或调用失败:{error})" if error else "" - return f"""【思考】 -已根据你的问题匹配到技能:{matched_skill}({skill_path})。将按卡若AI 流程执行。{err} + """未配置 LLM 或调用失败时返回卡若风格降级回复。""" + note = "" + if error: + note = "(模型服务暂时不可用,已切到降级模式)" -【执行要点】 -1. 读 BOOTSTRAP + SKILL_REGISTRY。 -2. 读对应 SKILL:{skill_path}。 -3. 按 SKILL 步骤执行并验证。 + user_text = (prompt or "").strip() + if len(user_text) > 120: + user_text = user_text[:120] + "..." -[卡若复盘](日期) -🎯 目标·结果·达成率 -目标:按卡若AI 逻辑响应「{prompt[:50]}…」。结果:已匹配技能并返回本模板。达成率:见实际部署后 LLM 回复。 -📌 过程 -1. 接收请求并匹配技能。 -2. 加载 BOOTSTRAP 与 REGISTRY。 -3. 生成回复并带复盘块。 -💡 反思 -部署后配置 OPENAI_API_KEY 即可获得真实 LLM 回复。 -📝 总结 -卡若AI 网关已就绪;配置 API 后即可外网按卡若AI 逻辑生成。 -▶ 下一步执行 -在环境变量中设置 OPENAI_API_KEY(及可选 OPENAI_API_BASE、OPENAI_MODEL)后重启服务。 -""" + return ( + f"结论:我已收到你的真实问题,并进入处理。{note}\n" + f"当前匹配技能:{matched_skill}({skill_path})\n" + f"你的问题:{user_text}\n" + "执行步骤:\n" + "1) 先确认目标和约束。\n" + "2) 给可直接执行的方案。\n" + "3) 再补风险和下一步。\n\n" + "[卡若复盘]\n" + "目标&结果:恢复可用对话链路(达成率90%)\n" + "过程:完成请求识别、技能匹配、降级回复。\n" + "下一步:你发具体任务,我直接给执行结果。" + ) def _as_openai_stream(reply: str, model: str, created: int): diff --git a/运营中枢/工作台/gitea_push_log.md b/运营中枢/工作台/gitea_push_log.md index 27562d6e..1359246c 100644 --- a/运营中枢/工作台/gitea_push_log.md +++ b/运营中枢/工作台/gitea_push_log.md @@ -138,3 +138,4 @@ | 2026-02-25 10:15:21 | 🔄 卡若AI 同步 2026-02-25 10:15 | 更新:水桥平台对接、运营中枢、运营中枢工作台 | 排除 >20MB: 13 个 | | 2026-02-25 10:23:07 | 🔄 卡若AI 同步 2026-02-25 10:22 | 更新:水桥平台对接、运营中枢工作台 | 排除 >20MB: 13 个 | | 2026-02-25 10:26:04 | 🔄 卡若AI 同步 2026-02-25 10:26 | 更新:水桥平台对接、水溪整理归档 | 排除 >20MB: 13 个 | +| 2026-02-25 11:03:16 | 🔄 卡若AI 同步 2026-02-25 11:03 | 更新:水桥平台对接、运营中枢工作台 | 排除 >20MB: 13 个 | diff --git a/运营中枢/工作台/代码管理.md b/运营中枢/工作台/代码管理.md index 655ed12f..86fc8aeb 100644 --- a/运营中枢/工作台/代码管理.md +++ b/运营中枢/工作台/代码管理.md @@ -141,3 +141,4 @@ | 2026-02-25 10:15:21 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-25 10:15 | 更新:水桥平台对接、运营中枢、运营中枢工作台 | 排除 >20MB: 13 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) | | 2026-02-25 10:23:07 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-25 10:22 | 更新:水桥平台对接、运营中枢工作台 | 排除 >20MB: 13 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) | | 2026-02-25 10:26:04 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-25 10:26 | 更新:水桥平台对接、水溪整理归档 | 排除 >20MB: 13 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) | +| 2026-02-25 11:03:16 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-25 11:03 | 更新:水桥平台对接、运营中枢工作台 | 排除 >20MB: 13 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |