#!/usr/bin/env python3 """ 增强 KR.分布式矩阵IP_已扫描 表 ================================ 从 MongoDB 中已导入的基础扫描数据出发,增强为完整文档: - 关联用户链(从分布式矩阵IP源表) - SSH 难易度评估 - 登录建议(SSH/RDP/VNC/Telnet/Web 命令) - 部署评估 - 快捷登录命令 直接操作 MongoDB,不需要加载 298MB JSON。 """ import pymongo import sys from datetime import datetime from collections import defaultdict MONGO_URI = 'mongodb://admin:admin123@localhost:27017/?authSource=admin' # SSH 难易度标签 DIFFICULTY_LABELS = { 1: "极易(默认密码/老设备/Telnet)", 2: "较易(弱密码/嵌入式/VNC)", 3: "中等(标准服务器)", 4: "较难(新版本SSH/密钥优先)", 5: "极难(仅密钥认证/防火墙)", } # 常见默认凭证 DEFAULT_CREDS = { "Linux/BSD": [ ("root", "root"), ("root", "admin"), ("root", "123456"), ("root", "password"), ("admin", "admin"), ("ubuntu", "ubuntu"), ], "Ubuntu Linux": [("ubuntu", "ubuntu"), ("root", "root"), ("root", "admin123")], "Debian Linux": [("root", "root"), ("root", "admin")], "CentOS/RHEL": [("root", "root"), ("centos", "centos"), ("root", "admin123")], "Windows": [("Administrator", "admin123"), ("Administrator", "123456"), ("admin", "admin")], "嵌入式/路由器": [("admin", "admin"), ("root", "admin"), ("root", "root")], "MikroTik路由器": [("admin", ""), ("admin", "admin")], "Unknown": [("root", "root"), ("admin", "admin"), ("root", "123456")], } SSH_SIGNATURES = { "OpenSSH": "Linux/BSD", "dropbear": "嵌入式/路由器", "ROSSSH": "MikroTik路由器", "Cisco": "Cisco设备", "libssh": "自定义SSH", "WeOnlyDo": "Windows SSH", "SSH-2.0-Go": "Go应用", } def assess_ssh(banner, open_ports_set): """评估SSH难度""" if not banner: return 5, "极难(无SSH banner)", [] difficulty = 3 notes = [] bl = banner.lower() if any(v in bl for v in ["openssh_4.", "openssh_5.", "ssh-2.0-openssh_4", "ssh-2.0-openssh_5"]): difficulty -= 1; notes.append("老版本SSH") elif any(v in bl for v in ["openssh_8.", "openssh_9.", "ssh-2.0-openssh_8", "ssh-2.0-openssh_9"]): difficulty += 1; notes.append("新版本SSH(安全性高)") if "dropbear" in bl: difficulty -= 1; notes.append("嵌入式设备(可能默认密码)") if any(v in bl for v in ["cisco", "rosssh"]): difficulty -= 1; notes.append("网络设备(可能默认凭证)") if 5900 in open_ports_set: difficulty -= 1; notes.append("有VNC(通常密码简单)") if 23 in open_ports_set: difficulty -= 1; notes.append("有Telnet(明文)") if 3389 in open_ports_set: notes.append("有RDP远程桌面") if 8888 in open_ports_set: notes.append("有宝塔面板") difficulty = max(1, min(5, difficulty)) return difficulty, DIFFICULTY_LABELS.get(difficulty, ""), notes def guess_os(banner): """从SSH banner推测OS""" if not banner: return "Unknown" bl = banner.lower() for sig, os_type in SSH_SIGNATURES.items(): if sig.lower() in bl: if "ubuntu" in bl: return "Ubuntu Linux" if "debian" in bl: return "Debian Linux" if any(v in bl for v in ["centos", "el7", "el8"]): return "CentOS/RHEL" return os_type if "ubuntu" in bl: return "Ubuntu Linux" if "debian" in bl: return "Debian Linux" if any(v in bl for v in ["centos", "el7", "el8"]): return "CentOS/RHEL" if "openssh" in bl: return "Linux/BSD" return "Unknown" def main(): client = pymongo.MongoClient(MONGO_URI) db = client['KR'] # 源表(基础扫描数据,已由 kr_full_scan.py 写入) scan_coll = db['分布式矩阵IP_已扫描'] # 用户链源表 matrix_coll = db['分布式矩阵IP'] existing_count = scan_coll.count_documents({}) print(f"分布式矩阵IP_已扫描 现有: {existing_count:,} 条") print(f"分布式矩阵IP 源表: {matrix_coll.estimated_document_count():,} 条") if existing_count == 0: print("错误: 目标表为空,请先运行 kr_full_scan.py 进行扫描") sys.exit(1) # ===== 第一步: 构建 IP -> 用户映射的索引 ===== print("\n[1/4] 构建 IP → 用户索引(从分布式矩阵IP表)...") # 获取所有已扫描的 IP scanned_ips = set() for doc in scan_coll.find({}, {"ip": 1}): scanned_ips.add(doc["ip"]) print(f" 已扫描 IP: {len(scanned_ips):,}") # 构建索引: IP -> 用户列表 ip_users = defaultdict(list) total_matched = 0 batch_count = 0 # 分批查询(避免超大 $or 查询) ip_list = list(scanned_ips) batch_size = 500 for i in range(0, len(ip_list), batch_size): batch = ip_list[i:i + batch_size] query = {"$or": [ {"ip": {"$in": batch}}, {"ip_reg": {"$in": batch}}, {"ip_last": {"$in": batch}}, ]} for user in matrix_coll.find(query, { "_id": 0, "username": 1, "email": 1, "password": 1, "salt": 1, "phone": 1, "qq": 1, "region": 1, "country": 1, "province": 1, "city": 1, "source_db": 1, "source_col": 1, "reg_time": 1, "last_active_time": 1, "R_score": 1, "F_score": 1, "M_score": 1, "RFM_total": 1, "value_level": 1, "user_type": 1, "extra": 1, "ip": 1, "ip_reg": 1, "ip_last": 1, }): # 关联到所有匹配的 IP for field in ["ip", "ip_reg", "ip_last"]: ip_val = user.get(field, "") if ip_val and ip_val in scanned_ips: ip_users[ip_val].append(user) total_matched += 1 batch_count += 1 if batch_count % 50 == 0: progress = min(i + batch_size, len(ip_list)) print(f" 索引构建: {progress:,}/{len(ip_list):,} ({progress/len(ip_list)*100:.0f}%) | 匹配用户: {total_matched:,}") print(f" 索引完成: {len(ip_users):,} 个IP有关联用户, 总匹配: {total_matched:,}") # ===== 第二步: 增强每条记录 ===== print(f"\n[2/4] 增强 {existing_count:,} 条记录...") # 创建新的增强集合 enhanced_coll_name = "分布式矩阵IP_已扫描_v2" enhanced_coll = db[enhanced_coll_name] enhanced_coll.delete_many({}) batch_docs = [] processed = 0 for doc in scan_coll.find(): ip = doc.get("ip", "") open_ports = doc.get("open_ports", {}) # 端口集合(统一为 int) port_ints = set() for p in open_ports.keys(): try: port_ints.add(int(p)) except: pass # SSH 信息 ssh_open = 22 in port_ints or 2222 in port_ints ssh_port = 22 if 22 in port_ints else (2222 if 2222 in port_ints else None) ssh_banner = "" if ssh_port: ssh_banner = open_ports.get(str(ssh_port), {}).get("banner", "") os_guess = guess_os(ssh_banner) if ssh_banner else doc.get("os_guess", "Unknown") # SSH 难度 diff_val, diff_label, diff_notes = assess_ssh(ssh_banner, port_ints) if ssh_open else (5, "极难(无SSH)", ["SSH端口未开放"]) diff_stars = "★" * diff_val + "☆" * (5 - diff_val) # 远程方法 remote_methods = [] if ssh_port: remote_methods.append(f"SSH:{ssh_port}") if 3389 in port_ints: remote_methods.append("RDP:3389") if 5900 in port_ints: remote_methods.append("VNC:5900") if 23 in port_ints: remote_methods.append("Telnet:23") if 8888 in port_ints: remote_methods.append("BaoTa:8888") # 用户链 users = ip_users.get(ip, []) users_sorted = sorted(users, key=lambda x: x.get("last_active_time") or "", reverse=True) primary = users_sorted[0] if users_sorted else {} # 用户摘要 users_summary = [] for u in users_sorted[:20]: users_summary.append({ "username": u.get("username", ""), "email": u.get("email", ""), "password_hash": u.get("password", ""), "salt": u.get("salt", ""), "phone": u.get("phone", ""), "qq": u.get("qq", ""), "source_col": u.get("source_col", ""), "reg_time": u.get("reg_time", ""), "last_active": u.get("last_active_time", ""), "value_level": u.get("value_level", ""), "RFM_total": u.get("RFM_total", 0), }) # 登录建议 login_ssh = None if ssh_port: creds = [{"u": u, "p": p} for u, p in DEFAULT_CREDS.get(os_guess, DEFAULT_CREDS["Unknown"])] # 追加数据库用户凭证 for u in users_sorted[:5]: un = u.get("username", "") pw = u.get("password", "") if un and len(un) <= 32: creds.append({"u": un, "p": f"hash:{pw[:16]}" if pw else "", "from_db": True}) login_ssh = { "port": ssh_port, "cmd": f"ssh root@{ip} -p {ssh_port}", "cmd_sshpass": f"sshpass -p 'PASSWORD' ssh -o StrictHostKeyChecking=no root@{ip} -p {ssh_port}", "creds": creds, } login_rdp = {"port": 3389, "cmd": f"open rdp://{ip}", "cmd_rdesktop": f"rdesktop {ip}:3389"} if 3389 in port_ints else None login_vnc = {"port": 5900, "cmd": f"open vnc://{ip}", "common_pw": ["", "123456", "password"]} if 5900 in port_ints else None login_telnet = {"port": 23, "cmd": f"telnet {ip} 23"} if 23 in port_ints else None web_urls = {} if 80 in port_ints: web_urls["http"] = f"http://{ip}" if 443 in port_ints: web_urls["https"] = f"https://{ip}" if 8888 in port_ints: web_urls["baota"] = f"http://{ip}:8888" # 部署评分 deploy_score = 0 if ssh_open: deploy_score += 50 if os_guess in ("Ubuntu Linux", "Debian Linux", "CentOS/RHEL", "Linux/BSD"): deploy_score += 30 elif os_guess == "Unknown" and ssh_open: deploy_score += 15 if 80 in port_ints or 443 in port_ints: deploy_score += 10 if 3389 in port_ints: deploy_score -= 10 deploy_notes_parts = [] if ssh_open: deploy_notes_parts.append("SSH可达") if os_guess != "Unknown": deploy_notes_parts.append(f"{os_guess}") if 8888 in port_ints: deploy_notes_parts.append("有宝塔面板") if 3389 in port_ints: deploy_notes_parts.append("Windows RDP") # 服务器类型 server_types = [] if ssh_open: server_types.append("SSH可达") if 3389 in port_ints: server_types.append("Windows Server") if 5900 in port_ints: server_types.append("VNC远程桌面") if 23 in port_ints: server_types.append("Telnet") if 80 in port_ints or 443 in port_ints: server_types.append("Web服务器") if 8888 in port_ints: server_types.append("宝塔面板") if not server_types: server_types.append("其他服务") # 构建完整文档 enhanced_doc = { # === 基本标识 === "ip": ip, "source_col": doc.get("source_col", ""), "sources": list(set(u.get("source_col", "") for u in users)) if users else [doc.get("source_col", "")], # === 端口扫描 === "scan_time": doc.get("scan_time", ""), "port_count": len(port_ints), "open_ports": open_ports, "open_port_list": sorted(port_ints), # === 端口快捷标记 === "ssh_open": ssh_open, "ssh_port": ssh_port, "ssh_banner": ssh_banner, "rdp_open": 3389 in port_ints, "vnc_open": 5900 in port_ints, "telnet_open": 23 in port_ints, "http_open": 80 in port_ints, "https_open": 443 in port_ints, "baota_open": 8888 in port_ints, # === 服务器分类 === "server_types": server_types, "os_guess": os_guess, "ssh_version": ssh_banner[:80] if ssh_banner else "", # === 远程登录分析 === "remote_methods": remote_methods, "remote_method_count": len(remote_methods), "ssh_difficulty": diff_val, "ssh_difficulty_stars": diff_stars, "ssh_difficulty_label": diff_label, "ssh_notes": diff_notes, # === 快捷登录命令(核心:直接复制使用)=== "quick_ssh": f"ssh root@{ip} -p {ssh_port}" if ssh_port else "", "quick_rdp": f"open rdp://{ip}" if 3389 in port_ints else "", "quick_vnc": f"open vnc://{ip}" if 5900 in port_ints else "", "quick_telnet": f"telnet {ip}" if 23 in port_ints else "", "quick_web": f"http://{ip}" if 80 in port_ints else (f"https://{ip}" if 443 in port_ints else ""), "quick_baota": f"http://{ip}:8888" if 8888 in port_ints else "", # === 登录凭证建议 === "login_ssh": login_ssh, "login_rdp": login_rdp, "login_vnc": login_vnc, "login_telnet": login_telnet, "login_web": web_urls if web_urls else None, # === 部署评估 === "deploy_score": deploy_score, "deploy_ready": deploy_score >= 50, "deploy_notes": "; ".join(deploy_notes_parts), # === 用户链 === "user_count": len(users), "users": users_summary, "primary_user": { "username": primary.get("username", ""), "email": primary.get("email", ""), "password_hash": primary.get("password", ""), "salt": primary.get("salt", ""), "phone": primary.get("phone", ""), "qq": primary.get("qq", ""), "region": primary.get("region", ""), "province": primary.get("province", ""), "city": primary.get("city", ""), "value_level": primary.get("value_level", ""), "user_type": primary.get("user_type", ""), "source_col": primary.get("source_col", ""), } if primary else {}, # === 元数据 === "enhanced_at": datetime.now().isoformat(), } batch_docs.append(enhanced_doc) processed += 1 if len(batch_docs) >= 5000: enhanced_coll.insert_many(batch_docs, ordered=False) print(f" [{processed/existing_count*100:5.1f}%] {processed:,}/{existing_count:,} | 有用户链: {sum(1 for d in batch_docs if d['user_count'] > 0)}") batch_docs = [] if batch_docs: enhanced_coll.insert_many(batch_docs, ordered=False) print(f" [100.0%] {processed:,}/{existing_count:,} 全部完成") # ===== 第三步: 替换原表 ===== print(f"\n[3/4] 替换原表...") # 删除旧表 db.drop_collection("分布式矩阵IP_已扫描") # 重命名新表 enhanced_coll.rename("分布式矩阵IP_已扫描") target = db["分布式矩阵IP_已扫描"] print(f" 已替换! 新表: {target.count_documents({}):,} 条") # ===== 第四步: 创建索引 ===== print(f"\n[4/4] 创建索引...") for idx in [ [("ip", 1)], [("ssh_open", 1)], [("rdp_open", 1)], [("vnc_open", 1)], [("telnet_open", 1)], [("baota_open", 1)], [("ssh_difficulty", 1)], [("deploy_score", -1)], [("deploy_ready", 1)], [("os_guess", 1)], [("user_count", -1)], [("port_count", -1)], [("source_col", 1)], [("ssh_open", 1), ("ssh_difficulty", 1)], [("deploy_ready", 1), ("deploy_score", -1)], [("ssh_open", 1), ("deploy_score", -1)], ]: target.create_index(idx) print(f" 16 个索引已创建") # ===== 统计输出 ===== total = target.count_documents({}) print(f"\n{'='*60}") print(f"KR.分布式矩阵IP_已扫描 增强完成!") print(f"{'='*60}") print(f"总记录: {total:,}") print(f"SSH可达: {target.count_documents({'ssh_open': True}):,}") print(f"RDP可达: {target.count_documents({'rdp_open': True}):,}") print(f"VNC可达: {target.count_documents({'vnc_open': True}):,}") print(f"Telnet: {target.count_documents({'telnet_open': True}):,}") print(f"宝塔面板: {target.count_documents({'baota_open': True}):,}") print(f"可部署: {target.count_documents({'deploy_ready': True}):,}") print(f"有用户链: {target.count_documents({'user_count': {'$gt': 0}}):,}") print(f"\nSSH难度分布:") for d in range(1, 6): c = target.count_documents({"ssh_open": True, "ssh_difficulty": d}) print(f" {d}★ {DIFFICULTY_LABELS.get(d, '')}: {c:,}") print(f"\nOS分布 (SSH可达):") pipe = [ {"$match": {"ssh_open": True}}, {"$group": {"_id": "$os_guess", "count": {"$sum": 1}}}, {"$sort": {"count": -1}} ] for r in target.aggregate(pipe): print(f" {r['_id']}: {r['count']:,}") print(f"\n来源分布:") pipe = [ {"$unwind": "$sources"}, {"$group": {"_id": "$sources", "total": {"$sum": 1}, "ssh": {"$sum": {"$cond": ["$ssh_open", 1, 0]}}}}, {"$sort": {"total": -1}} ] for r in target.aggregate(pipe): print(f" {r['_id']}: {r['total']:,} (SSH: {r['ssh']:,})") # 显示几条样例 print(f"\n样例 (SSH难度最低前5):") for doc in target.find({"ssh_open": True}).sort("ssh_difficulty", 1).limit(5): print(f" {doc['ip']}:{doc.get('ssh_port', 22)} | {doc['os_guess']} | {doc['ssh_difficulty_stars']} | 用户:{doc['user_count']} | {doc.get('quick_ssh', '')}") print(f"\n完成!") if __name__ == "__main__": main()