Files
suanli-juzhen/01_扫描模块/scripts/enhance_scan_table.py

449 lines
18 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
增强 KR.分布式矩阵IP_已扫描
================================
MongoDB 中已导入的基础扫描数据出发增强为完整文档
- 关联用户链从分布式矩阵IP源表
- SSH 难易度评估
- 登录建议SSH/RDP/VNC/Telnet/Web 命令
- 部署评估
- 快捷登录命令
直接操作 MongoDB不需要加载 298MB JSON
"""
import pymongo
import sys
from datetime import datetime
from collections import defaultdict
MONGO_URI = 'mongodb://admin:admin123@localhost:27017/?authSource=admin'
# SSH 难易度标签
DIFFICULTY_LABELS = {
1: "极易(默认密码/老设备/Telnet)",
2: "较易(弱密码/嵌入式/VNC)",
3: "中等(标准服务器)",
4: "较难(新版本SSH/密钥优先)",
5: "极难(仅密钥认证/防火墙)",
}
# 常见默认凭证
DEFAULT_CREDS = {
"Linux/BSD": [
("root", "root"), ("root", "admin"), ("root", "123456"),
("root", "password"), ("admin", "admin"), ("ubuntu", "ubuntu"),
],
"Ubuntu Linux": [("ubuntu", "ubuntu"), ("root", "root"), ("root", "admin123")],
"Debian Linux": [("root", "root"), ("root", "admin")],
"CentOS/RHEL": [("root", "root"), ("centos", "centos"), ("root", "admin123")],
"Windows": [("Administrator", "admin123"), ("Administrator", "123456"), ("admin", "admin")],
"嵌入式/路由器": [("admin", "admin"), ("root", "admin"), ("root", "root")],
"MikroTik路由器": [("admin", ""), ("admin", "admin")],
"Unknown": [("root", "root"), ("admin", "admin"), ("root", "123456")],
}
SSH_SIGNATURES = {
"OpenSSH": "Linux/BSD", "dropbear": "嵌入式/路由器",
"ROSSSH": "MikroTik路由器", "Cisco": "Cisco设备",
"libssh": "自定义SSH", "WeOnlyDo": "Windows SSH",
"SSH-2.0-Go": "Go应用",
}
def assess_ssh(banner, open_ports_set):
"""评估SSH难度"""
if not banner:
return 5, "极难(无SSH banner)", []
difficulty = 3
notes = []
bl = banner.lower()
if any(v in bl for v in ["openssh_4.", "openssh_5.", "ssh-2.0-openssh_4", "ssh-2.0-openssh_5"]):
difficulty -= 1; notes.append("老版本SSH")
elif any(v in bl for v in ["openssh_8.", "openssh_9.", "ssh-2.0-openssh_8", "ssh-2.0-openssh_9"]):
difficulty += 1; notes.append("新版本SSH(安全性高)")
if "dropbear" in bl:
difficulty -= 1; notes.append("嵌入式设备(可能默认密码)")
if any(v in bl for v in ["cisco", "rosssh"]):
difficulty -= 1; notes.append("网络设备(可能默认凭证)")
if 5900 in open_ports_set:
difficulty -= 1; notes.append("有VNC(通常密码简单)")
if 23 in open_ports_set:
difficulty -= 1; notes.append("有Telnet(明文)")
if 3389 in open_ports_set:
notes.append("有RDP远程桌面")
if 8888 in open_ports_set:
notes.append("有宝塔面板")
difficulty = max(1, min(5, difficulty))
return difficulty, DIFFICULTY_LABELS.get(difficulty, ""), notes
def guess_os(banner):
"""从SSH banner推测OS"""
if not banner:
return "Unknown"
bl = banner.lower()
for sig, os_type in SSH_SIGNATURES.items():
if sig.lower() in bl:
if "ubuntu" in bl: return "Ubuntu Linux"
if "debian" in bl: return "Debian Linux"
if any(v in bl for v in ["centos", "el7", "el8"]): return "CentOS/RHEL"
return os_type
if "ubuntu" in bl: return "Ubuntu Linux"
if "debian" in bl: return "Debian Linux"
if any(v in bl for v in ["centos", "el7", "el8"]): return "CentOS/RHEL"
if "openssh" in bl: return "Linux/BSD"
return "Unknown"
def main():
client = pymongo.MongoClient(MONGO_URI)
db = client['KR']
# 源表(基础扫描数据,已由 kr_full_scan.py 写入)
scan_coll = db['分布式矩阵IP_已扫描']
# 用户链源表
matrix_coll = db['分布式矩阵IP']
existing_count = scan_coll.count_documents({})
print(f"分布式矩阵IP_已扫描 现有: {existing_count:,}")
print(f"分布式矩阵IP 源表: {matrix_coll.estimated_document_count():,}")
if existing_count == 0:
print("错误: 目标表为空,请先运行 kr_full_scan.py 进行扫描")
sys.exit(1)
# ===== 第一步: 构建 IP -> 用户映射的索引 =====
print("\n[1/4] 构建 IP → 用户索引从分布式矩阵IP表...")
# 获取所有已扫描的 IP
scanned_ips = set()
for doc in scan_coll.find({}, {"ip": 1}):
scanned_ips.add(doc["ip"])
print(f" 已扫描 IP: {len(scanned_ips):,}")
# 构建索引: IP -> 用户列表
ip_users = defaultdict(list)
total_matched = 0
batch_count = 0
# 分批查询(避免超大 $or 查询)
ip_list = list(scanned_ips)
batch_size = 500
for i in range(0, len(ip_list), batch_size):
batch = ip_list[i:i + batch_size]
query = {"$or": [
{"ip": {"$in": batch}},
{"ip_reg": {"$in": batch}},
{"ip_last": {"$in": batch}},
]}
for user in matrix_coll.find(query, {
"_id": 0, "username": 1, "email": 1, "password": 1, "salt": 1,
"phone": 1, "qq": 1, "region": 1, "country": 1, "province": 1, "city": 1,
"source_db": 1, "source_col": 1, "reg_time": 1, "last_active_time": 1,
"R_score": 1, "F_score": 1, "M_score": 1, "RFM_total": 1,
"value_level": 1, "user_type": 1, "extra": 1,
"ip": 1, "ip_reg": 1, "ip_last": 1,
}):
# 关联到所有匹配的 IP
for field in ["ip", "ip_reg", "ip_last"]:
ip_val = user.get(field, "")
if ip_val and ip_val in scanned_ips:
ip_users[ip_val].append(user)
total_matched += 1
batch_count += 1
if batch_count % 50 == 0:
progress = min(i + batch_size, len(ip_list))
print(f" 索引构建: {progress:,}/{len(ip_list):,} ({progress/len(ip_list)*100:.0f}%) | 匹配用户: {total_matched:,}")
print(f" 索引完成: {len(ip_users):,} 个IP有关联用户, 总匹配: {total_matched:,}")
# ===== 第二步: 增强每条记录 =====
print(f"\n[2/4] 增强 {existing_count:,} 条记录...")
# 创建新的增强集合
enhanced_coll_name = "分布式矩阵IP_已扫描_v2"
enhanced_coll = db[enhanced_coll_name]
enhanced_coll.delete_many({})
batch_docs = []
processed = 0
for doc in scan_coll.find():
ip = doc.get("ip", "")
open_ports = doc.get("open_ports", {})
# 端口集合(统一为 int
port_ints = set()
for p in open_ports.keys():
try: port_ints.add(int(p))
except: pass
# SSH 信息
ssh_open = 22 in port_ints or 2222 in port_ints
ssh_port = 22 if 22 in port_ints else (2222 if 2222 in port_ints else None)
ssh_banner = ""
if ssh_port:
ssh_banner = open_ports.get(str(ssh_port), {}).get("banner", "")
os_guess = guess_os(ssh_banner) if ssh_banner else doc.get("os_guess", "Unknown")
# SSH 难度
diff_val, diff_label, diff_notes = assess_ssh(ssh_banner, port_ints) if ssh_open else (5, "极难(无SSH)", ["SSH端口未开放"])
diff_stars = "" * diff_val + "" * (5 - diff_val)
# 远程方法
remote_methods = []
if ssh_port: remote_methods.append(f"SSH:{ssh_port}")
if 3389 in port_ints: remote_methods.append("RDP:3389")
if 5900 in port_ints: remote_methods.append("VNC:5900")
if 23 in port_ints: remote_methods.append("Telnet:23")
if 8888 in port_ints: remote_methods.append("BaoTa:8888")
# 用户链
users = ip_users.get(ip, [])
users_sorted = sorted(users, key=lambda x: x.get("last_active_time") or "", reverse=True)
primary = users_sorted[0] if users_sorted else {}
# 用户摘要
users_summary = []
for u in users_sorted[:20]:
users_summary.append({
"username": u.get("username", ""),
"email": u.get("email", ""),
"password_hash": u.get("password", ""),
"salt": u.get("salt", ""),
"phone": u.get("phone", ""),
"qq": u.get("qq", ""),
"source_col": u.get("source_col", ""),
"reg_time": u.get("reg_time", ""),
"last_active": u.get("last_active_time", ""),
"value_level": u.get("value_level", ""),
"RFM_total": u.get("RFM_total", 0),
})
# 登录建议
login_ssh = None
if ssh_port:
creds = [{"u": u, "p": p} for u, p in DEFAULT_CREDS.get(os_guess, DEFAULT_CREDS["Unknown"])]
# 追加数据库用户凭证
for u in users_sorted[:5]:
un = u.get("username", "")
pw = u.get("password", "")
if un and len(un) <= 32:
creds.append({"u": un, "p": f"hash:{pw[:16]}" if pw else "", "from_db": True})
login_ssh = {
"port": ssh_port,
"cmd": f"ssh root@{ip} -p {ssh_port}",
"cmd_sshpass": f"sshpass -p 'PASSWORD' ssh -o StrictHostKeyChecking=no root@{ip} -p {ssh_port}",
"creds": creds,
}
login_rdp = {"port": 3389, "cmd": f"open rdp://{ip}", "cmd_rdesktop": f"rdesktop {ip}:3389"} if 3389 in port_ints else None
login_vnc = {"port": 5900, "cmd": f"open vnc://{ip}", "common_pw": ["", "123456", "password"]} if 5900 in port_ints else None
login_telnet = {"port": 23, "cmd": f"telnet {ip} 23"} if 23 in port_ints else None
web_urls = {}
if 80 in port_ints: web_urls["http"] = f"http://{ip}"
if 443 in port_ints: web_urls["https"] = f"https://{ip}"
if 8888 in port_ints: web_urls["baota"] = f"http://{ip}:8888"
# 部署评分
deploy_score = 0
if ssh_open: deploy_score += 50
if os_guess in ("Ubuntu Linux", "Debian Linux", "CentOS/RHEL", "Linux/BSD"): deploy_score += 30
elif os_guess == "Unknown" and ssh_open: deploy_score += 15
if 80 in port_ints or 443 in port_ints: deploy_score += 10
if 3389 in port_ints: deploy_score -= 10
deploy_notes_parts = []
if ssh_open: deploy_notes_parts.append("SSH可达")
if os_guess != "Unknown": deploy_notes_parts.append(f"{os_guess}")
if 8888 in port_ints: deploy_notes_parts.append("有宝塔面板")
if 3389 in port_ints: deploy_notes_parts.append("Windows RDP")
# 服务器类型
server_types = []
if ssh_open: server_types.append("SSH可达")
if 3389 in port_ints: server_types.append("Windows Server")
if 5900 in port_ints: server_types.append("VNC远程桌面")
if 23 in port_ints: server_types.append("Telnet")
if 80 in port_ints or 443 in port_ints: server_types.append("Web服务器")
if 8888 in port_ints: server_types.append("宝塔面板")
if not server_types: server_types.append("其他服务")
# 构建完整文档
enhanced_doc = {
# === 基本标识 ===
"ip": ip,
"source_col": doc.get("source_col", ""),
"sources": list(set(u.get("source_col", "") for u in users)) if users else [doc.get("source_col", "")],
# === 端口扫描 ===
"scan_time": doc.get("scan_time", ""),
"port_count": len(port_ints),
"open_ports": open_ports,
"open_port_list": sorted(port_ints),
# === 端口快捷标记 ===
"ssh_open": ssh_open,
"ssh_port": ssh_port,
"ssh_banner": ssh_banner,
"rdp_open": 3389 in port_ints,
"vnc_open": 5900 in port_ints,
"telnet_open": 23 in port_ints,
"http_open": 80 in port_ints,
"https_open": 443 in port_ints,
"baota_open": 8888 in port_ints,
# === 服务器分类 ===
"server_types": server_types,
"os_guess": os_guess,
"ssh_version": ssh_banner[:80] if ssh_banner else "",
# === 远程登录分析 ===
"remote_methods": remote_methods,
"remote_method_count": len(remote_methods),
"ssh_difficulty": diff_val,
"ssh_difficulty_stars": diff_stars,
"ssh_difficulty_label": diff_label,
"ssh_notes": diff_notes,
# === 快捷登录命令(核心:直接复制使用)===
"quick_ssh": f"ssh root@{ip} -p {ssh_port}" if ssh_port else "",
"quick_rdp": f"open rdp://{ip}" if 3389 in port_ints else "",
"quick_vnc": f"open vnc://{ip}" if 5900 in port_ints else "",
"quick_telnet": f"telnet {ip}" if 23 in port_ints else "",
"quick_web": f"http://{ip}" if 80 in port_ints else (f"https://{ip}" if 443 in port_ints else ""),
"quick_baota": f"http://{ip}:8888" if 8888 in port_ints else "",
# === 登录凭证建议 ===
"login_ssh": login_ssh,
"login_rdp": login_rdp,
"login_vnc": login_vnc,
"login_telnet": login_telnet,
"login_web": web_urls if web_urls else None,
# === 部署评估 ===
"deploy_score": deploy_score,
"deploy_ready": deploy_score >= 50,
"deploy_notes": "; ".join(deploy_notes_parts),
# === 用户链 ===
"user_count": len(users),
"users": users_summary,
"primary_user": {
"username": primary.get("username", ""),
"email": primary.get("email", ""),
"password_hash": primary.get("password", ""),
"salt": primary.get("salt", ""),
"phone": primary.get("phone", ""),
"qq": primary.get("qq", ""),
"region": primary.get("region", ""),
"province": primary.get("province", ""),
"city": primary.get("city", ""),
"value_level": primary.get("value_level", ""),
"user_type": primary.get("user_type", ""),
"source_col": primary.get("source_col", ""),
} if primary else {},
# === 元数据 ===
"enhanced_at": datetime.now().isoformat(),
}
batch_docs.append(enhanced_doc)
processed += 1
if len(batch_docs) >= 5000:
enhanced_coll.insert_many(batch_docs, ordered=False)
print(f" [{processed/existing_count*100:5.1f}%] {processed:,}/{existing_count:,} | 有用户链: {sum(1 for d in batch_docs if d['user_count'] > 0)}")
batch_docs = []
if batch_docs:
enhanced_coll.insert_many(batch_docs, ordered=False)
print(f" [100.0%] {processed:,}/{existing_count:,} 全部完成")
# ===== 第三步: 替换原表 =====
print(f"\n[3/4] 替换原表...")
# 删除旧表
db.drop_collection("分布式矩阵IP_已扫描")
# 重命名新表
enhanced_coll.rename("分布式矩阵IP_已扫描")
target = db["分布式矩阵IP_已扫描"]
print(f" 已替换! 新表: {target.count_documents({}):,}")
# ===== 第四步: 创建索引 =====
print(f"\n[4/4] 创建索引...")
for idx in [
[("ip", 1)],
[("ssh_open", 1)],
[("rdp_open", 1)],
[("vnc_open", 1)],
[("telnet_open", 1)],
[("baota_open", 1)],
[("ssh_difficulty", 1)],
[("deploy_score", -1)],
[("deploy_ready", 1)],
[("os_guess", 1)],
[("user_count", -1)],
[("port_count", -1)],
[("source_col", 1)],
[("ssh_open", 1), ("ssh_difficulty", 1)],
[("deploy_ready", 1), ("deploy_score", -1)],
[("ssh_open", 1), ("deploy_score", -1)],
]:
target.create_index(idx)
print(f" 16 个索引已创建")
# ===== 统计输出 =====
total = target.count_documents({})
print(f"\n{'='*60}")
print(f"KR.分布式矩阵IP_已扫描 增强完成!")
print(f"{'='*60}")
print(f"总记录: {total:,}")
print(f"SSH可达: {target.count_documents({'ssh_open': True}):,}")
print(f"RDP可达: {target.count_documents({'rdp_open': True}):,}")
print(f"VNC可达: {target.count_documents({'vnc_open': True}):,}")
print(f"Telnet: {target.count_documents({'telnet_open': True}):,}")
print(f"宝塔面板: {target.count_documents({'baota_open': True}):,}")
print(f"可部署: {target.count_documents({'deploy_ready': True}):,}")
print(f"有用户链: {target.count_documents({'user_count': {'$gt': 0}}):,}")
print(f"\nSSH难度分布:")
for d in range(1, 6):
c = target.count_documents({"ssh_open": True, "ssh_difficulty": d})
print(f" {d}{DIFFICULTY_LABELS.get(d, '')}: {c:,}")
print(f"\nOS分布 (SSH可达):")
pipe = [
{"$match": {"ssh_open": True}},
{"$group": {"_id": "$os_guess", "count": {"$sum": 1}}},
{"$sort": {"count": -1}}
]
for r in target.aggregate(pipe):
print(f" {r['_id']}: {r['count']:,}")
print(f"\n来源分布:")
pipe = [
{"$unwind": "$sources"},
{"$group": {"_id": "$sources", "total": {"$sum": 1}, "ssh": {"$sum": {"$cond": ["$ssh_open", 1, 0]}}}},
{"$sort": {"total": -1}}
]
for r in target.aggregate(pipe):
print(f" {r['_id']}: {r['total']:,} (SSH: {r['ssh']:,})")
# 显示几条样例
print(f"\n样例 (SSH难度最低前5):")
for doc in target.find({"ssh_open": True}).sort("ssh_difficulty", 1).limit(5):
print(f" {doc['ip']}:{doc.get('ssh_port', 22)} | {doc['os_guess']} | {doc['ssh_difficulty_stars']} | 用户:{doc['user_count']} | {doc.get('quick_ssh', '')}")
print(f"\n完成!")
if __name__ == "__main__":
main()