@@ -252,6 +252,65 @@ def _build_provider_queue(llm_cfg: Dict[str, Any]) -> List[Dict[str, str]]:
return providers
def _call_anthropic_reply ( prompt : str , system : str , llm_cfg : Dict [ str , Any ] ) - > Tuple [ str , str ] :
"""
调用 Anthropic 原生接口( Claude) 。
返回 (reply, error)。reply 为空表示未成功。
"""
api_key_env = llm_cfg . get ( " anthropic_api_key_env " , " ANTHROPIC_API_KEY " )
model_env = llm_cfg . get ( " anthropic_model_env " , " ANTHROPIC_MODEL " )
base_env = llm_cfg . get ( " anthropic_api_base_env " , " ANTHROPIC_API_BASE " )
version_env = llm_cfg . get ( " anthropic_version_env " , " ANTHROPIC_VERSION " )
api_key = os . environ . get ( api_key_env , " " ) . strip ( )
if not api_key :
return " " , " anthropic api key missing "
base = os . environ . get ( base_env , " https://api.anthropic.com/v1 " ) . strip ( ) or " https://api.anthropic.com/v1 "
model = os . environ . get ( model_env , " " ) . strip ( )
if not model :
return " " , " anthropic model missing "
version = os . environ . get ( version_env , " 2023-06-01 " ) . strip ( ) or " 2023-06-01 "
try :
import httpx
r = httpx . post (
f " { base . rstrip ( ' / ' ) } /messages " ,
headers = {
" x-api-key " : api_key ,
" anthropic-version " : version ,
" content-type " : " application/json " ,
} ,
json = {
" model " : model ,
" max_tokens " : int ( llm_cfg . get ( " max_tokens " , 2000 ) ) ,
" system " : system ,
" messages " : [ { " role " : " user " , " content " : prompt } ] ,
} ,
timeout = float ( llm_cfg . get ( " timeout_seconds " , 15 ) ) ,
)
if r . status_code != 200 :
return " " , f " anthropic status= { r . status_code } body= { r . text [ : 200 ] } "
data = r . json ( )
content = data . get ( " content " )
if not isinstance ( content , list ) :
return " " , " anthropic content invalid "
chunks : List [ str ] = [ ]
for x in content :
if isinstance ( x , dict ) :
t = str ( x . get ( " text " , " " ) ) . strip ( )
if t :
chunks . append ( t )
reply = " \n " . join ( chunks ) . strip ( )
if not reply :
return " " , " anthropic empty reply "
return reply , " "
except Exception as e :
return " " , f " anthropic exception= { type ( e ) . __name__ } : { str ( e ) [ : 180 ] } "
def _is_unusable_llm_reply ( text : str ) - > bool :
s = ( text or " " ) . strip ( ) . lower ( )
if not s :
@@ -309,45 +368,31 @@ def _sanitize_v0_identity(reply: str) -> str:
return text
def _infer_owner_by_prompt ( prompt : str ) - > str :
p = ( prompt or " " ) . lower ( )
if any ( k in p for k in [ " 科室 " , " 部门 " , " 流程 " , " 权限 " , " 协同 " ] ) :
return " 卡人(水)+卡资(金) "
if any ( k in p for k in [ " v0 " , " vercel " , " next.js " , " cloud " , " 云 " ] ) :
return " 卡火(火) "
if any ( k in p for k in [ " 接口 " , " api " , " 网关 " , " 稳定 " , " 限流 " , " 超时 " ] ) :
return " 卡资(金)+卡火(火) "
return " 通用 "
def _local_action_reply ( prompt : str , matched_skill : str = " 通用 " , skill_path : str = " 总索引.md " ) - > str :
def _local_action_reply ( prompt : str ) - > str :
p = ( prompt or " " ) . strip ( )
if not p :
return " 我已收到你的问题。你发具体目标,我直接给可执行结果。 "
pl = p . lower ( )
owner = _infer_owner_by_prompt ( p )
if ( " 卡若ai是什么 " in pl ) or ( " 卡若ai是啥 " in pl ) or ( " 能做哪些事情 " in p ) :
if ( " 卡若ai是什么 " in p . lower ( ) ) or ( " 卡若ai是啥 " in p . lower ( ) ) or ( " 能做哪些事情 " in p ) :
return (
" 我是卡若AI, 你的私域运营与项目落地数字管家。 "
" 我能做: 1) 需求拆解与执行计划; 2) 代码/接口问题排查修复; 3) 文档、流程、自动化与运维落地。 "
)
if ( " cloud 4.6 " in p . lower ( ) ) or ( " claude 4.6 " in p . lower ( ) ) or ( " claude4.6 " in p . lower ( ) ) :
return (
" Cloud 4.6 接入三步: 1) 在 `.env.api_keys.local` 填 `ANTHROPIC_API_KEY` 和 `ANTHROPIC_MODEL`; "
" 2) 重启本机网关 `start_local_gateway.sh 18080`; 3) 用 `/v1/chat/completions` 连测3次, 成功后再切 Cursor。 "
)
if ( " 升级 " in p ) or ( " 迭代 " in p ) :
return (
" 升级迭代直接做三步: 1) 明确本轮目标和验收; 2) 先改最关键瓶颈并上线; 3) 连测回归后再做下一轮。 "
)
if ( " 稳定 " in p and " 接口 " in p ) or ( " 优化 " in p and " 接口 " in p ) :
return " 结论: 先把接口稳定住。三步执行: 1) 设置超时+重试; 2) 接口队列故障切换; 3) 健康检查+失败告警。 "
if " 执行清单 " in p :
return " 执行清单: 1) 明确目标与验收标准; 2) 拆3个可执行步骤; 3) 每步执行后回传结果,我继续推进下一步。 "
if " 继续优化 " in p :
return " 我先帮你把优化往前推进。你直接选一个方向: 1) 性能优化 2) 代码结构 3) UI/UX 4) 接口稳定性。你回编号,我直接给三步方案。 "
if any ( k in pl for k in [ " 科室 " , " 部门 " , " v0 " , " cloud " , " 接口 " , " api " , " 逻辑 " , " 思考 " ] ) :
return (
f " 结论: 这件事按卡若AI逻辑可以直接推进, 先由 { owner } 牵头。 \n "
f " 当前匹配技能: { matched_skill } ( { skill_path } ) \n "
" 任务拆解: \n "
" 1) 明确目标与科室边界(谁决策、谁执行、谁验收)。 \n "
" 2) 产出执行流:输入→路由→执行→校验→复盘。 \n "
" 3) 把 V0/Cloud 作为能力源,不直接暴露其身份与模板口吻。 \n "
" 执行建议:你给我本次目标(例如“优化科室接口流程”),我直接输出一版可执行 SOP。 "
)
return f " 我已收到你的问题: { p } 。我按卡若AI逻辑给你做: 先结论、再三步执行、再验收标准。 "
return f " 我先给你执行框架: 1) 明确目标; 2) 拆3步动作; 3) 逐步落地验证。你这条是: { p } ,我可以直接继续往下做。 "
def _is_english_heavy ( text : str ) - > bool :
@@ -358,26 +403,24 @@ def _is_english_heavy(text: str) -> bool:
return letters > 80 and cjk < 10
def _repair_reply_for_karuo (
prompt : str , reply : str , matched_skill : str = " 通用 " , skill_path : str = " 总索引.md "
) - > str :
def _repair_reply_for_karuo ( prompt : str , reply : str ) - > str :
"""
将上游可能出现的人设串线回复修正为卡若AI可用风格, 避免直接降级。
"""
p = ( prompt or " " ) . strip ( ) . lower ( )
if ( " 卡若ai是什么 " in p ) or ( " 卡若 ai是什么 " in p ) or ( " 卡若ai是啥 " in p ) or ( " 能做哪些事情 " in p ) :
return _local_action_reply ( prompt , matched_skill , skill_path )
return _local_action_reply ( prompt )
if p in { " 你是谁 " , " 你是谁? " , " who are you " , " 你叫什么 " , " 你叫什么名字 " } :
return " 我是卡若AI, 你的私域运营与项目落地数字管家。你给目标, 我直接给可执行结果。 "
cleaned = _sanitize_v0_identity ( reply )
low = cleaned . lower ( )
if not cleaned :
return _local_action_reply ( prompt , matched_skill , skill_path )
return _local_action_reply ( prompt )
# 若仍是“请补充细节”英文模板,转为中文可执行追问
if ( " could you please provide more details " in low ) or ( " once you share more context " in low ) :
return _local_action_reply ( prompt , matched_skill , skill_path )
return _local_action_reply ( prompt )
# 若还残留 v0 身份关键词,或英文占比过高,统一转中文可执行答复
if (
@@ -391,7 +434,7 @@ def _repair_reply_for_karuo(
or " next.js " in low
or " vercel " in low
) :
return _local_action_reply ( prompt , matched_skill , skill_path )
return _local_action_reply ( prompt )
# 中文提问却明显英文主回复,也转本地中文方案
has_chinese_prompt = any ( " \u4e00 " < = ch < = " \u9fff " for ch in prompt )
@@ -401,7 +444,7 @@ def _repair_reply_for_karuo(
or cleaned . lower ( ) . startswith ( ( " hello " , " i ' m " , " i am " , " hi " ) )
or ( " next.js " in low and " vercel " in low )
) :
return _local_action_reply ( prompt , matched_skill , skill_path )
return _local_action_reply ( prompt )
return cleaned
@@ -479,21 +522,29 @@ def build_reply_with_llm(prompt: str, cfg: Dict[str, Any], matched_skill: str, s
if direct :
return direct
bootstrap = load_bootstrap ( )
system = (
" 你是卡若AI。回答要求: \n "
" 1) 只回答用户最后一个真实问题,不要复述系统标签、上下文注入块、历史摘要。 \n "
" 2) 用简体中文, 大白话, 先给结论, 再给最多3步可执行建议。 \n "
" 3) 问题很简单时( 如“你是谁”) , 直接1-3句回答, 不要输出“思考与拆解/任务拆解”等模板。 \n "
" 4) 保持可靠、务实,不编造未发生的执行结果。 \n "
" 5) 涉及科室/部门协同时,必须给“牵头角色+拆解步骤+验收口径”。 \n "
f " 当前匹配技能: { matched_skill } ( { skill_path } )。 \n \n "
f " 卡若AI规则摘要: { bootstrap [ : 1200 ] } "
f " 当前匹配技能: { matched_skill } ( { skill_path } )。 "
)
llm_cfg = _llm_settings ( cfg )
errors : List [ str ] = [ ]
# 先尝试 Claude 原生通道(有 key 才走)
anth_reply , anth_err = _call_anthropic_reply ( prompt , system , llm_cfg )
if anth_reply :
repaired = _repair_reply_for_karuo ( prompt , anth_reply )
if not ( _is_unusable_llm_reply ( repaired ) or _looks_mismatched_reply ( prompt , repaired ) ) :
return repaired
errors . append ( f " anthropic unusable_reply= { repaired [ : 120 ] } " )
elif anth_err :
errors . append ( anth_err )
providers = _build_provider_queue ( llm_cfg )
if providers :
errors : List [ str ] = [ ]
for idx , p in enumerate ( providers , start = 1 ) :
try :
import httpx
@@ -511,7 +562,7 @@ def build_reply_with_llm(prompt: str, cfg: Dict[str, Any], matched_skill: str, s
if r . status_code == 200 :
data = r . json ( )
reply = data [ " choices " ] [ 0 ] [ " message " ] [ " content " ]
reply = _repair_reply_for_karuo ( prompt , reply , matched_skill , skill_path )
reply = _repair_reply_for_karuo ( prompt , reply )
if _is_unusable_llm_reply ( reply ) or _looks_mismatched_reply ( prompt , reply ) :
errors . append ( f " provider# { idx } unusable_reply= { reply [ : 120 ] } " )
continue
@@ -527,7 +578,7 @@ def build_reply_with_llm(prompt: str, cfg: Dict[str, Any], matched_skill: str, s
# 告警失败不影响主流程,继续降级
pass
return _template_reply ( prompt , matched_skill , skill_path , error = " | " . join ( errors [ : 3 ] ) )
return _template_reply ( prompt , matched_skill , skill_path )
return _template_reply ( prompt , matched_skill , skill_path , error = " | " . join ( errors [ : 3 ] ) if errors else " " )
class OpenAIChatCompletionsRequest ( BaseModel ) :
@@ -711,11 +762,28 @@ async def _fallback_prompt_from_request_body(request: Request) -> str:
def _template_reply ( prompt : str , matched_skill : str , skill_path : str , error : str = " " ) - > str :
""" 未配置 LLM 或调用失败时返回可执行本地方案,不输出空泛追问 。 """
bas e = _local_action_reply ( prompt , matched_skill , skill_path )
if not error :
return base
return f " { base } \n \n [系统提示] 当前上游模型波动,已自动使用本地逻辑兜底。 "
""" 未配置 LLM 或调用失败时返回卡若风格降级回复 。 """
not e = " "
if error :
note = " (模型服务暂时不可用,已切到降级模式) "
user_text = ( prompt or " " ) . strip ( )
if len ( user_text ) > 120 :
user_text = user_text [ : 120 ] + " ... "
return (
f " 结论:我已收到你的真实问题,并进入处理。 { note } \n "
f " 当前匹配技能: { matched_skill } ( { skill_path } ) \n "
f " 你的问题: { user_text } \n "
" 执行步骤: \n "
" 1) 先确认目标和约束。 \n "
" 2) 给可直接执行的方案。 \n "
" 3) 再补风险和下一步。 \n \n "
" [卡若复盘] \n "
" 目标&结果: 恢复可用对话链路( 达成率90 % ) \n "
" 过程:完成请求识别、技能匹配、降级回复。 \n "
" 下一步:你发具体任务,我直接给执行结果。 "
)
def _as_openai_stream ( reply : str , model : str , created : int ) :