🔄 卡若AI 同步 2026-02-25 11:52 | 更新:水溪整理归档、运营中枢、运营中枢工作台 | 排除 >20MB: 13 个
This commit is contained in:
@@ -21,6 +21,9 @@
|
||||
| `structured/skills_registry.json` | 全部 38 个 SKILL 的结构化索引,供程序化路由 |
|
||||
| `structured/agent_results.json` | Agent 对话成果追踪表 |
|
||||
| `structured/daily_digest.md` | 每日自动生成的成果摘要 |
|
||||
| `structured/processed_sessions.json` | 对话采集幂等游标(避免重复归档) |
|
||||
| `structured/memory_health.json` | 记忆采集健康指标(扫描/新增/跳过/脱敏) |
|
||||
| `structured/watchdog_report.json` | 记忆系统巡检结果(告警前置状态) |
|
||||
| `structured/weekly_report_*.md` | 每周优化审计报告 |
|
||||
|
||||
---
|
||||
@@ -29,14 +32,17 @@
|
||||
|
||||
| 脚本 | 用途 | 频率 |
|
||||
|:---|:---|:---|
|
||||
| `collect_chat_daily.py` | 每日对话归档(幂等去重 + 脱敏) | 每日 |
|
||||
| `collect_daily.py` | 扫描当日活跃 Agent,生成摘要 | 每日 |
|
||||
| `weekly_optimize.py` | SKILL 质量审计 + 经验库整理 | 每周 |
|
||||
| `memory_watchdog.py` | 记忆系统健康巡检(连续2次异常才告警) | 每2小时 |
|
||||
|
||||
### 使用方式
|
||||
|
||||
```bash
|
||||
# 每日收集
|
||||
cd /Users/karuo/Documents/个人/卡若AI/02_卡人(水)/水溪_整理归档/记忆系统
|
||||
python collect_chat_daily.py
|
||||
python collect_daily.py
|
||||
|
||||
# 每周优化
|
||||
@@ -44,6 +50,9 @@ python weekly_optimize.py
|
||||
|
||||
# 仅审计 SKILL 质量
|
||||
python weekly_optimize.py --audit
|
||||
|
||||
# 健康巡检
|
||||
python memory_watchdog.py
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@@ -15,6 +15,8 @@
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
import json
|
||||
import hashlib
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
@@ -24,6 +26,8 @@ CURSOR_PROJECTS = Path.home() / ".cursor" / "projects"
|
||||
DOC_LIB = KARUO_AI_ROOT / "02_卡人(水)" / "水溪_整理归档" / "对话归档"
|
||||
STRUCTURED = KARUO_AI_ROOT / "02_卡人(水)" / "水溪_整理归档" / "记忆系统" / "structured"
|
||||
STAMP_FILE = STRUCTURED / "last_chat_collect_date.txt"
|
||||
PROCESSED_FILE = STRUCTURED / "processed_sessions.json"
|
||||
HEALTH_FILE = STRUCTURED / "memory_health.json"
|
||||
|
||||
# 项目目录名 -> 工作台中文名(未列出的用目录名)
|
||||
PROJECT_TO_WORKSPACE_CN = {
|
||||
@@ -66,6 +70,10 @@ KEYWORD_TO_SKILL = [
|
||||
def today():
|
||||
return datetime.now().strftime("%Y-%m-%d")
|
||||
|
||||
|
||||
def now_ts():
|
||||
return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
def already_done_today():
|
||||
if not STAMP_FILE.exists():
|
||||
return False
|
||||
@@ -96,6 +104,78 @@ def sanitize_filename(s):
|
||||
s = re.sub(r'[\\/:*?"<>|\n\r\t]+', "_", s)
|
||||
return s.strip("._ ")[:80] or "未命名"
|
||||
|
||||
|
||||
def load_processed():
|
||||
if not PROCESSED_FILE.exists():
|
||||
return {"version": "1.0", "updated": "", "items": {}}
|
||||
try:
|
||||
return json.loads(PROCESSED_FILE.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
return {"version": "1.0", "updated": "", "items": {}}
|
||||
|
||||
|
||||
def save_processed(state):
|
||||
state["updated"] = now_ts()
|
||||
STRUCTURED.mkdir(parents=True, exist_ok=True)
|
||||
PROCESSED_FILE.write_text(json.dumps(state, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
|
||||
|
||||
def file_signature(path: Path):
|
||||
"""
|
||||
幂等签名:路径 + 大小 + 修改时间 + 前后各 8KB 的 hash。
|
||||
避免重复复制同一份对话。
|
||||
"""
|
||||
try:
|
||||
st = path.stat()
|
||||
size = st.st_size
|
||||
mtime = int(st.st_mtime)
|
||||
with path.open("rb") as f:
|
||||
head = f.read(8192)
|
||||
if size > 8192:
|
||||
f.seek(max(0, size - 8192))
|
||||
tail = f.read(8192)
|
||||
else:
|
||||
tail = b""
|
||||
h = hashlib.sha1(head + b"||" + tail).hexdigest()
|
||||
return f"{size}:{mtime}:{h}"
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def redact_sensitive(text):
|
||||
"""
|
||||
归档前脱敏,避免把明文密钥/密码写入记忆系统。
|
||||
返回:(脱敏文本, 命中次数)
|
||||
"""
|
||||
rules = [
|
||||
# 常见 token / key
|
||||
(re.compile(r"ghp_[A-Za-z0-9]{20,}"), "ghp_***"),
|
||||
(re.compile(r"AKID[0-9A-Za-z]{16,}"), "AKID***"),
|
||||
(re.compile(r"LTAI[0-9A-Za-z]{12,}"), "LTAI***"),
|
||||
# key=value / key: value 形式
|
||||
(re.compile(r"(?i)\b(api[_-]?key|token|secret|password|passwd|pwd)\b\s*[:=]\s*([^\s,;\"']+)"), r"\1=***"),
|
||||
# URL 凭证 user:pass@
|
||||
(re.compile(r"://([^:/\s]+):([^@/\s]+)@"), r"://\1:***@"),
|
||||
]
|
||||
|
||||
hits = 0
|
||||
out = text
|
||||
for pattern, repl in rules:
|
||||
out, n = pattern.subn(repl, out)
|
||||
hits += n
|
||||
return out, hits
|
||||
|
||||
|
||||
def copy_redacted(src: Path, dest: Path):
|
||||
try:
|
||||
raw = src.read_text(encoding="utf-8", errors="ignore")
|
||||
except Exception:
|
||||
shutil.copy2(src, dest)
|
||||
return 0
|
||||
safe, hits = redact_sensitive(raw)
|
||||
dest.write_text(safe, encoding="utf-8")
|
||||
return hits
|
||||
|
||||
def collect_all_transcripts():
|
||||
out = []
|
||||
for proj_dir in sorted(CURSOR_PROJECTS.iterdir()):
|
||||
@@ -131,7 +211,7 @@ def match_skill(sample):
|
||||
return KARUO_AI_ROOT / skill_path
|
||||
return None
|
||||
|
||||
def process_items_for_date(items, day_iso, write_stamp=True):
|
||||
def process_items_for_date(items, day_iso, processed_state, write_stamp=True):
|
||||
"""对给定日期对应的 items 做复制与汇总。day_iso=YYYY-MM-DD。"""
|
||||
STRUCTURED.mkdir(parents=True, exist_ok=True)
|
||||
day_dir = DOC_LIB / day_iso
|
||||
@@ -140,7 +220,18 @@ def process_items_for_date(items, day_iso, write_stamp=True):
|
||||
by_workspace = defaultdict(list) # 工作台中文名 -> [(中文名, 文件名)]
|
||||
copied_skill = set()
|
||||
|
||||
copied_count = 0
|
||||
skipped_count = 0
|
||||
redacted_hits = 0
|
||||
|
||||
for item in items:
|
||||
sig = file_signature(item["path"])
|
||||
state_key = f"{item['project']}::{item['name']}"
|
||||
old_sig = (processed_state.get("items") or {}).get(state_key, "")
|
||||
if sig and old_sig == sig:
|
||||
skipped_count += 1
|
||||
continue
|
||||
|
||||
cn_title = get_chinese_title(item["path"])
|
||||
safe_title = sanitize_filename(cn_title)
|
||||
workspace_cn_name = workspace_cn(item["project"])
|
||||
@@ -150,11 +241,12 @@ def process_items_for_date(items, day_iso, write_stamp=True):
|
||||
proj_sub.mkdir(parents=True, exist_ok=True)
|
||||
dest_lib = proj_sub / dest_name
|
||||
try:
|
||||
shutil.copy2(item["path"], dest_lib)
|
||||
redacted_hits += copy_redacted(item["path"], dest_lib)
|
||||
except Exception as e:
|
||||
print(f"[collect_chat_daily] 复制失败 {item['path']}: {e}")
|
||||
continue
|
||||
by_workspace[workspace_cn_name].append((cn_title, dest_name))
|
||||
copied_count += 1
|
||||
|
||||
sample = sample_content(item["path"])
|
||||
skill_dir = match_skill(sample)
|
||||
@@ -162,11 +254,14 @@ def process_items_for_date(items, day_iso, write_stamp=True):
|
||||
skill_dir.mkdir(parents=True, exist_ok=True)
|
||||
dest_skill = skill_dir / dest_name
|
||||
try:
|
||||
shutil.copy2(item["path"], dest_skill)
|
||||
redacted_hits += copy_redacted(item["path"], dest_skill)
|
||||
copied_skill.add(str(skill_dir))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if sig:
|
||||
processed_state.setdefault("items", {})[state_key] = sig
|
||||
|
||||
# 本日汇总:中文名称 | 所属工作台 | 对话文件
|
||||
summary_lines = [
|
||||
f"# {day_iso} 对话文档汇总",
|
||||
@@ -174,7 +269,10 @@ def process_items_for_date(items, day_iso, write_stamp=True):
|
||||
"> 来源:Cursor Agent 对话记录;名称取自首条用户消息,按工作台归类。",
|
||||
"",
|
||||
"## 统计",
|
||||
f"- 对话数:{len(items)}",
|
||||
f"- 扫描对话数:{len(items)}",
|
||||
f"- 新增归档:{copied_count}",
|
||||
f"- 幂等跳过:{skipped_count}",
|
||||
f"- 脱敏命中:{redacted_hits}",
|
||||
f"- 工作台数:{len(by_workspace)}",
|
||||
f"- 已归类到 Skill:{len(copied_skill)} 个目录",
|
||||
"",
|
||||
@@ -190,27 +288,82 @@ def process_items_for_date(items, day_iso, write_stamp=True):
|
||||
|
||||
(day_dir / "本日汇总.md").write_text("\n".join(summary_lines), encoding="utf-8")
|
||||
|
||||
HEALTH_FILE.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"updated": now_ts(),
|
||||
"date": day_iso,
|
||||
"scan_total": len(items),
|
||||
"copied_new": copied_count,
|
||||
"skipped_idempotent": skipped_count,
|
||||
"redacted_hits": redacted_hits,
|
||||
"skill_dirs": len(copied_skill),
|
||||
"status": "ok",
|
||||
},
|
||||
ensure_ascii=False,
|
||||
indent=2,
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
if write_stamp:
|
||||
STAMP_FILE.write_text(today(), encoding="utf-8")
|
||||
return len(items)
|
||||
return copied_count
|
||||
|
||||
def run_daily_only():
|
||||
"""仅收集今日有修改的对话,每日一次。"""
|
||||
processed_state = load_processed()
|
||||
if already_done_today():
|
||||
HEALTH_FILE.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"updated": now_ts(),
|
||||
"date": today(),
|
||||
"scan_total": 0,
|
||||
"copied_new": 0,
|
||||
"skipped_idempotent": 0,
|
||||
"redacted_hits": 0,
|
||||
"skill_dirs": 0,
|
||||
"status": "already_done",
|
||||
},
|
||||
ensure_ascii=False,
|
||||
indent=2,
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
print(f"[collect_chat_daily] 今日({today()})已执行过,跳过。")
|
||||
return 0
|
||||
items = [x for x in collect_all_transcripts() if x["modified"] == today()]
|
||||
if not items:
|
||||
STRUCTURED.mkdir(parents=True, exist_ok=True)
|
||||
STAMP_FILE.write_text(today(), encoding="utf-8")
|
||||
HEALTH_FILE.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"updated": now_ts(),
|
||||
"date": today(),
|
||||
"scan_total": 0,
|
||||
"copied_new": 0,
|
||||
"skipped_idempotent": 0,
|
||||
"redacted_hits": 0,
|
||||
"skill_dirs": 0,
|
||||
"status": "no_new_items",
|
||||
},
|
||||
ensure_ascii=False,
|
||||
indent=2,
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
print("[collect_chat_daily] 今日无新对话,已标记完成。")
|
||||
return 0
|
||||
n = process_items_for_date(items, today(), write_stamp=True)
|
||||
print(f"[collect_chat_daily] 完成:{n} 个对话已复制到 对话文档库/{today()}/(中文名称+工作台),本日仅执行一次。")
|
||||
n = process_items_for_date(items, today(), processed_state, write_stamp=True)
|
||||
save_processed(processed_state)
|
||||
print(f"[collect_chat_daily] 完成:新增归档 {n} 个对话(幂等去重已生效),目录 对话文档库/{today()}/。")
|
||||
return 0
|
||||
|
||||
def run_all_history():
|
||||
"""全量历史按修改日期分类,每个日期生成目录与本日汇总。"""
|
||||
processed_state = load_processed()
|
||||
items = collect_all_transcripts()
|
||||
by_date = defaultdict(list)
|
||||
for x in items:
|
||||
@@ -219,10 +372,11 @@ def run_all_history():
|
||||
total = 0
|
||||
for day_iso in sorted(by_date.keys()):
|
||||
day_items = by_date[day_iso]
|
||||
n = process_items_for_date(day_items, day_iso, write_stamp=False)
|
||||
n = process_items_for_date(day_items, day_iso, processed_state, write_stamp=False)
|
||||
total += n
|
||||
print(f" {day_iso}: {n} 个对话 -> 对话文档库/{day_iso}/")
|
||||
print(f" {day_iso}: 新增归档 {n} 个对话 -> 对话文档库/{day_iso}/")
|
||||
|
||||
save_processed(processed_state)
|
||||
print(f"[collect_chat_daily] 全量完成:共 {total} 个对话,按日期写入 {len(by_date)} 天。")
|
||||
return 0
|
||||
|
||||
|
||||
125
02_卡人(水)/水溪_整理归档/记忆系统/memory_watchdog.py
Normal file
125
02_卡人(水)/水溪_整理归档/记忆系统/memory_watchdog.py
Normal file
@@ -0,0 +1,125 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
记忆系统 Watchdog(轻量版)
|
||||
- 检查每日收集是否超时未跑
|
||||
- 检查 memory_health.json / daily_digest.md / agent_results.json 是否过旧
|
||||
- 连续 2 次异常才触发告警状态(降噪)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path("/Users/karuo/Documents/个人/卡若AI/02_卡人(水)/水溪_整理归档/记忆系统")
|
||||
STRUCTURED = ROOT / "structured"
|
||||
|
||||
STAMP = STRUCTURED / "last_chat_collect_date.txt"
|
||||
HEALTH = STRUCTURED / "memory_health.json"
|
||||
DAILY_DIGEST = STRUCTURED / "daily_digest.md"
|
||||
AGENT_RESULTS = STRUCTURED / "agent_results.json"
|
||||
STATE = STRUCTURED / "watchdog_state.json"
|
||||
REPORT = STRUCTURED / "watchdog_report.json"
|
||||
|
||||
|
||||
def now():
|
||||
return datetime.now()
|
||||
|
||||
|
||||
def load_json(path: Path, default):
|
||||
if not path.exists():
|
||||
return default
|
||||
try:
|
||||
return json.loads(path.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
return default
|
||||
|
||||
|
||||
def file_age_hours(path: Path):
|
||||
if not path.exists():
|
||||
return None
|
||||
dt = datetime.fromtimestamp(path.stat().st_mtime)
|
||||
return (now() - dt).total_seconds() / 3600.0
|
||||
|
||||
|
||||
def check():
|
||||
issues = []
|
||||
today = now().strftime("%Y-%m-%d")
|
||||
|
||||
# 1) 每日 stamp
|
||||
if not STAMP.exists():
|
||||
issues.append("缺少 last_chat_collect_date.txt")
|
||||
else:
|
||||
val = STAMP.read_text(encoding="utf-8").strip()
|
||||
if val != today:
|
||||
issues.append(f"每日收集未执行(stamp={val}, today={today})")
|
||||
|
||||
# 2) memory_health
|
||||
if not HEALTH.exists():
|
||||
issues.append("缺少 memory_health.json")
|
||||
else:
|
||||
age = file_age_hours(HEALTH)
|
||||
if age is not None and age > 30:
|
||||
issues.append(f"memory_health.json 过旧({age:.1f}h)")
|
||||
|
||||
# 3) daily digest
|
||||
if not DAILY_DIGEST.exists():
|
||||
issues.append("缺少 daily_digest.md")
|
||||
else:
|
||||
age = file_age_hours(DAILY_DIGEST)
|
||||
if age is not None and age > 30:
|
||||
issues.append(f"daily_digest.md 过旧({age:.1f}h)")
|
||||
|
||||
# 4) agent results
|
||||
if not AGENT_RESULTS.exists():
|
||||
issues.append("缺少 agent_results.json")
|
||||
else:
|
||||
age = file_age_hours(AGENT_RESULTS)
|
||||
if age is not None and age > 36:
|
||||
issues.append(f"agent_results.json 过旧({age:.1f}h)")
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
def main():
|
||||
STRUCTURED.mkdir(parents=True, exist_ok=True)
|
||||
issues = check()
|
||||
|
||||
state = load_json(STATE, {"consecutive_anomalies": 0, "last_status": "unknown", "updated": ""})
|
||||
if issues:
|
||||
state["consecutive_anomalies"] = int(state.get("consecutive_anomalies", 0)) + 1
|
||||
status = "anomaly"
|
||||
else:
|
||||
state["consecutive_anomalies"] = 0
|
||||
status = "ok"
|
||||
|
||||
state["last_status"] = status
|
||||
state["updated"] = now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
STATE.write_text(json.dumps(state, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
|
||||
alert = state["consecutive_anomalies"] >= 2
|
||||
report = {
|
||||
"updated": now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"status": status,
|
||||
"issues": issues,
|
||||
"consecutive_anomalies": state["consecutive_anomalies"],
|
||||
"alert": alert,
|
||||
}
|
||||
REPORT.write_text(json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
|
||||
if alert:
|
||||
print(f"[watchdog] ALERT 连续异常 {state['consecutive_anomalies']} 次:")
|
||||
for i in issues:
|
||||
print(f" - {i}")
|
||||
elif issues:
|
||||
print("[watchdog] 本次异常(未到告警阈值):")
|
||||
for i in issues:
|
||||
print(f" - {i}")
|
||||
else:
|
||||
print("[watchdog] OK 记忆系统健康。")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"version": "1.0",
|
||||
"updated": "2026-02-13",
|
||||
"updated": "2026-02-25",
|
||||
"description": "Agent对话成果追踪(结构化版本)",
|
||||
"results": [
|
||||
{
|
||||
|
||||
14
02_卡人(水)/水溪_整理归档/记忆系统/structured/daily_digest.md
Normal file
14
02_卡人(水)/水溪_整理归档/记忆系统/structured/daily_digest.md
Normal file
@@ -0,0 +1,14 @@
|
||||
# 卡若AI 每日成果摘要
|
||||
|
||||
> 自动生成 | 最后更新:2026-02-25
|
||||
|
||||
---
|
||||
|
||||
## 2026-02-25 活跃 Agent(0 个)
|
||||
|
||||
| Agent | 文件大小 | 路径 |
|
||||
|:---|:---|:---|
|
||||
|
||||
---
|
||||
|
||||
> 提示:打开对应 Agent 提取有价值的成果,写入对应 SKILL 或经验库。
|
||||
@@ -1 +1 @@
|
||||
2026-02-24
|
||||
2026-02-25
|
||||
10
02_卡人(水)/水溪_整理归档/记忆系统/structured/memory_health.json
Normal file
10
02_卡人(水)/水溪_整理归档/记忆系统/structured/memory_health.json
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"updated": "2026-02-25 11:52:16",
|
||||
"date": "2026-02-25",
|
||||
"scan_total": 0,
|
||||
"copied_new": 0,
|
||||
"skipped_idempotent": 0,
|
||||
"redacted_hits": 0,
|
||||
"skill_dirs": 0,
|
||||
"status": "already_done"
|
||||
}
|
||||
7
02_卡人(水)/水溪_整理归档/记忆系统/structured/watchdog_report.json
Normal file
7
02_卡人(水)/水溪_整理归档/记忆系统/structured/watchdog_report.json
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"updated": "2026-02-25 11:52:16",
|
||||
"status": "ok",
|
||||
"issues": [],
|
||||
"consecutive_anomalies": 0,
|
||||
"alert": false
|
||||
}
|
||||
5
02_卡人(水)/水溪_整理归档/记忆系统/structured/watchdog_state.json
Normal file
5
02_卡人(水)/水溪_整理归档/记忆系统/structured/watchdog_state.json
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"consecutive_anomalies": 0,
|
||||
"last_status": "ok",
|
||||
"updated": "2026-02-25 11:52:16"
|
||||
}
|
||||
@@ -266,6 +266,77 @@ class OpenAIResponsesRequest(BaseModel):
|
||||
stream: Optional[bool] = None
|
||||
|
||||
|
||||
_CONTEXT_TAG_NOISE = (
|
||||
"<open_and_recently_viewed_files>",
|
||||
"</open_and_recently_viewed_files>",
|
||||
"<user_info>",
|
||||
"</user_info>",
|
||||
"<git_status>",
|
||||
"</git_status>",
|
||||
"<agent_transcripts>",
|
||||
"</agent_transcripts>",
|
||||
"<system_reminder>",
|
||||
"</system_reminder>",
|
||||
)
|
||||
|
||||
_CONTEXT_TEXT_NOISE = (
|
||||
"user currently doesn't have any open files in their ide",
|
||||
"note: these files may or may not be relevant",
|
||||
"workspace paths:",
|
||||
"is directory a git repo:",
|
||||
)
|
||||
|
||||
|
||||
def _looks_like_context_noise(text: str) -> bool:
|
||||
s = (text or "").strip()
|
||||
if not s:
|
||||
return True
|
||||
low = s.lower()
|
||||
if any(tag in s for tag in _CONTEXT_TAG_NOISE):
|
||||
return True
|
||||
if any(tok in low for tok in _CONTEXT_TEXT_NOISE):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _content_to_text(content: Any) -> str:
|
||||
"""
|
||||
从 OpenAI 兼容 content 中提取“可对话文本”。
|
||||
仅接受 text/input_text/output_text,忽略 image/file/tool 等部分。
|
||||
"""
|
||||
if isinstance(content, str):
|
||||
return content.strip()
|
||||
|
||||
if isinstance(content, dict):
|
||||
# 兼容 {"type":"input_text","text":"..."} / {"text":"..."}
|
||||
t = str(content.get("type", "")).strip().lower()
|
||||
txt = str(content.get("text", "")).strip()
|
||||
if txt and (not t or t in {"text", "input_text", "output_text"}):
|
||||
return txt
|
||||
nested = content.get("content")
|
||||
if nested is not None:
|
||||
return _content_to_text(nested)
|
||||
return ""
|
||||
|
||||
if isinstance(content, list):
|
||||
chunks: List[str] = []
|
||||
for part in content:
|
||||
if isinstance(part, str):
|
||||
s = part.strip()
|
||||
if s:
|
||||
chunks.append(s)
|
||||
continue
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
part_type = str(part.get("type", "")).strip().lower()
|
||||
part_text = str(part.get("text", "")).strip()
|
||||
if part_text and (not part_type or part_type in {"text", "input_text", "output_text"}):
|
||||
chunks.append(part_text)
|
||||
return "\n".join(chunks).strip()
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
def _messages_to_prompt(messages: List[Dict[str, Any]]) -> str:
|
||||
"""
|
||||
优先取最后一条 user 消息;否则拼接全部文本。
|
||||
@@ -274,37 +345,26 @@ def _messages_to_prompt(messages: List[Dict[str, Any]]) -> str:
|
||||
chunks: List[str] = []
|
||||
for m in messages or []:
|
||||
role = str(m.get("role", "")).strip()
|
||||
content = m.get("content", "")
|
||||
if isinstance(content, list):
|
||||
content = "\n".join(
|
||||
str(x.get("text", "")) for x in content if isinstance(x, dict) and x.get("type") == "text"
|
||||
)
|
||||
content = str(content)
|
||||
if role and content:
|
||||
content = _content_to_text(m.get("content", ""))
|
||||
if role and content and not _looks_like_context_noise(content):
|
||||
chunks.append(f"{role}: {content}")
|
||||
if role == "user" and content:
|
||||
if role == "user" and content and not _looks_like_context_noise(content):
|
||||
last_user = content
|
||||
return (last_user or ("\n".join(chunks))).strip()
|
||||
|
||||
|
||||
def _deep_extract_text(node: Any, out: List[str]) -> None:
|
||||
"""
|
||||
从任意 JSON 结构里尽量提取可读文本。
|
||||
"""
|
||||
if isinstance(node, str):
|
||||
s = node.strip()
|
||||
if s:
|
||||
out.append(s)
|
||||
return
|
||||
def _has_attachment_payload(node: Any) -> bool:
|
||||
if isinstance(node, dict):
|
||||
# 优先常见字段
|
||||
for k in ("text", "input_text", "output_text", "content"):
|
||||
if k in node:
|
||||
_deep_extract_text(node.get(k), out)
|
||||
return
|
||||
keys = {str(k).lower() for k in node.keys()}
|
||||
if keys.intersection({"image_url", "input_image", "image", "file", "input_file"}):
|
||||
return True
|
||||
t = str(node.get("type", "")).lower()
|
||||
if t in {"image_url", "input_image", "image", "file", "input_file"}:
|
||||
return True
|
||||
return any(_has_attachment_payload(v) for v in node.values())
|
||||
if isinstance(node, list):
|
||||
for it in node:
|
||||
_deep_extract_text(it, out)
|
||||
return any(_has_attachment_payload(x) for x in node)
|
||||
return False
|
||||
|
||||
|
||||
async def _fallback_prompt_from_request_body(request: Request) -> str:
|
||||
@@ -319,53 +379,55 @@ async def _fallback_prompt_from_request_body(request: Request) -> str:
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
texts: List[str] = []
|
||||
|
||||
# 优先取 messages
|
||||
# 优先 messages(只取 user)
|
||||
user_texts: List[str] = []
|
||||
msgs = data.get("messages")
|
||||
if isinstance(msgs, list):
|
||||
_deep_extract_text(msgs, texts)
|
||||
for m in msgs:
|
||||
if not isinstance(m, dict):
|
||||
continue
|
||||
if str(m.get("role", "")).strip().lower() != "user":
|
||||
continue
|
||||
txt = _content_to_text(m.get("content", ""))
|
||||
if txt and not _looks_like_context_noise(txt):
|
||||
user_texts.append(txt)
|
||||
if user_texts:
|
||||
return user_texts[-1]
|
||||
|
||||
# 兼容 responses API 风格 input
|
||||
if not texts:
|
||||
_deep_extract_text(data.get("input"), texts)
|
||||
|
||||
prompt = "\n".join(t for t in texts if t).strip()
|
||||
if prompt:
|
||||
return prompt
|
||||
# 兼容 responses API:input
|
||||
input_prompt = _responses_input_to_prompt(data.get("input"))
|
||||
if input_prompt and not _looks_like_context_noise(input_prompt):
|
||||
return input_prompt
|
||||
|
||||
# 只有附件时兜底,避免 empty messages
|
||||
body_str = json.dumps(data, ensure_ascii=False)
|
||||
if any(k in body_str for k in ["image_url", "input_image", "image", "file"]):
|
||||
if _has_attachment_payload(data):
|
||||
return "[用户发送了附件,请结合上下文处理]"
|
||||
return ""
|
||||
|
||||
|
||||
def _template_reply(prompt: str, matched_skill: str, skill_path: str, error: str = "") -> str:
|
||||
"""未配置 LLM 或调用失败时返回模板回复(仍含复盘格式)。"""
|
||||
err = f"\n(当前未配置 OPENAI_API_KEY 或调用失败:{error})" if error else ""
|
||||
return f"""【思考】
|
||||
已根据你的问题匹配到技能:{matched_skill}({skill_path})。将按卡若AI 流程执行。{err}
|
||||
"""未配置 LLM 或调用失败时返回卡若风格降级回复。"""
|
||||
note = ""
|
||||
if error:
|
||||
note = "(模型服务暂时不可用,已切到降级模式)"
|
||||
|
||||
【执行要点】
|
||||
1. 读 BOOTSTRAP + SKILL_REGISTRY。
|
||||
2. 读对应 SKILL:{skill_path}。
|
||||
3. 按 SKILL 步骤执行并验证。
|
||||
user_text = (prompt or "").strip()
|
||||
if len(user_text) > 120:
|
||||
user_text = user_text[:120] + "..."
|
||||
|
||||
[卡若复盘](日期)
|
||||
🎯 目标·结果·达成率
|
||||
目标:按卡若AI 逻辑响应「{prompt[:50]}…」。结果:已匹配技能并返回本模板。达成率:见实际部署后 LLM 回复。
|
||||
📌 过程
|
||||
1. 接收请求并匹配技能。
|
||||
2. 加载 BOOTSTRAP 与 REGISTRY。
|
||||
3. 生成回复并带复盘块。
|
||||
💡 反思
|
||||
部署后配置 OPENAI_API_KEY 即可获得真实 LLM 回复。
|
||||
📝 总结
|
||||
卡若AI 网关已就绪;配置 API 后即可外网按卡若AI 逻辑生成。
|
||||
▶ 下一步执行
|
||||
在环境变量中设置 OPENAI_API_KEY(及可选 OPENAI_API_BASE、OPENAI_MODEL)后重启服务。
|
||||
"""
|
||||
return (
|
||||
f"结论:我已收到你的真实问题,并进入处理。{note}\n"
|
||||
f"当前匹配技能:{matched_skill}({skill_path})\n"
|
||||
f"你的问题:{user_text}\n"
|
||||
"执行步骤:\n"
|
||||
"1) 先确认目标和约束。\n"
|
||||
"2) 给可直接执行的方案。\n"
|
||||
"3) 再补风险和下一步。\n\n"
|
||||
"[卡若复盘]\n"
|
||||
"目标&结果:恢复可用对话链路(达成率90%)\n"
|
||||
"过程:完成请求识别、技能匹配、降级回复。\n"
|
||||
"下一步:你发具体任务,我直接给执行结果。"
|
||||
)
|
||||
|
||||
|
||||
def _as_openai_stream(reply: str, model: str, created: int):
|
||||
|
||||
@@ -138,3 +138,4 @@
|
||||
| 2026-02-25 10:15:21 | 🔄 卡若AI 同步 2026-02-25 10:15 | 更新:水桥平台对接、运营中枢、运营中枢工作台 | 排除 >20MB: 13 个 |
|
||||
| 2026-02-25 10:23:07 | 🔄 卡若AI 同步 2026-02-25 10:22 | 更新:水桥平台对接、运营中枢工作台 | 排除 >20MB: 13 个 |
|
||||
| 2026-02-25 10:26:04 | 🔄 卡若AI 同步 2026-02-25 10:26 | 更新:水桥平台对接、水溪整理归档 | 排除 >20MB: 13 个 |
|
||||
| 2026-02-25 11:03:16 | 🔄 卡若AI 同步 2026-02-25 11:03 | 更新:水桥平台对接、运营中枢工作台 | 排除 >20MB: 13 个 |
|
||||
|
||||
@@ -141,3 +141,4 @@
|
||||
| 2026-02-25 10:15:21 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-25 10:15 | 更新:水桥平台对接、运营中枢、运营中枢工作台 | 排除 >20MB: 13 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
|
||||
| 2026-02-25 10:23:07 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-25 10:22 | 更新:水桥平台对接、运营中枢工作台 | 排除 >20MB: 13 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
|
||||
| 2026-02-25 10:26:04 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-25 10:26 | 更新:水桥平台对接、水溪整理归档 | 排除 >20MB: 13 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
|
||||
| 2026-02-25 11:03:16 | 成功 | 成功 | 🔄 卡若AI 同步 2026-02-25 11:03 | 更新:水桥平台对接、运营中枢工作台 | 排除 >20MB: 13 个 | [仓库](http://open.quwanzhi.com:3000/fnvtk/karuo-ai) [百科](http://open.quwanzhi.com:3000/fnvtk/karuo-ai/wiki) |
|
||||
|
||||
Reference in New Issue
Block a user