#!/usr/bin/env python3 """ KR 数据库 IP 全量异步扫描器(优化版) ====================================== - 420万+ 公网 IP,扫描 8 个关键端口(远程登录 + 服务识别) - asyncio 超高并发(8000+ 并发连接) - Banner 抓取 + SSH 难易度评估 - 服务器类型智能分类 + 远程登录可行性评估 - 来源追踪(小米/房产网/老坑爹/卡塔卡银行等) - 结果写入 MongoDB _已扫描 新表 用法: python3 kr_full_scan.py [--concurrency 8000] [--timeout 2] """ import asyncio import json import time import sys import os import argparse import ipaddress from datetime import datetime from collections import defaultdict # ========== 配置 ========== # 关键扫描端口(远程登录 + 服务识别) SCAN_PORTS = { 22: "SSH", 2222: "SSH-Alt", 23: "Telnet", 80: "HTTP", 443: "HTTPS", 3389: "RDP", 5900: "VNC", 8888: "BaoTa", } # SSH Banner 特征 SSH_SIGNATURES = { "OpenSSH": "Linux/BSD", "dropbear": "嵌入式/路由器", "ROSSSH": "MikroTik路由器", "Cisco": "Cisco设备", "libssh": "自定义SSH", "Serv-U": "Windows FTP/SSH", "WeOnlyDo": "Windows SSH", "SSH-2.0-Go": "Go应用", } # SSH 难易度评估 def assess_ssh_difficulty(ssh_banner, open_ports): """评估 SSH 登录难易度(1-5星,1最容易)""" difficulty = 3 # 默认中等 auth_methods = "unknown" notes = [] if not ssh_banner: return {"difficulty": 5, "auth_methods": "无SSH", "notes": ["SSH端口未开放"]} banner_lower = ssh_banner.lower() # 老版本 SSH = 容易(可能有漏洞) if "openssh_4." in banner_lower or "openssh_5." in banner_lower: difficulty -= 1 notes.append("老版本SSH(可能有已知漏洞)") elif "openssh_6." in banner_lower: notes.append("较老版本SSH") elif "openssh_9." in banner_lower or "openssh_8." in banner_lower: difficulty += 1 notes.append("新版本SSH(安全性较高)") # dropbear = 嵌入式,通常弱密码 if "dropbear" in banner_lower: difficulty -= 1 notes.append("嵌入式设备(可能默认密码)") # Cisco/MikroTik = 网络设备,有默认密码 if "cisco" in banner_lower or "rosssh" in banner_lower: difficulty -= 1 notes.append("网络设备(可能有默认凭证)") # 有宝塔面板 = 可能有Web管理入口 if 8888 in open_ports: notes.append("有宝塔面板(可通过Web管理)") # 有 RDP = Windows,可尝试RDP if 3389 in open_ports: notes.append("有RDP(可尝试远程桌面)") # 有 VNC = 可能弱密码 if 5900 in open_ports: difficulty -= 1 notes.append("有VNC(通常密码简单)") # 有 Telnet = 不安全,容易 if 23 in open_ports: difficulty -= 1 notes.append("有Telnet(明文传输,易攻)") # 限制范围 difficulty = max(1, min(5, difficulty)) stars = "★" * difficulty + "☆" * (5 - difficulty) return { "difficulty": difficulty, "difficulty_stars": stars, "notes": notes, } def classify_server(ports_data): """根据开放端口和 Banner 判断服务器类型 + SSH难易度""" open_ports = set(ports_data.keys()) banners = {p: d.get("banner", "") for p, d in ports_data.items()} server_type = [] os_guess = "Unknown" ssh_version = "" remote_methods = [] # SSH 分析 if 22 in open_ports or 2222 in open_ports: ssh_port = 22 if 22 in open_ports else 2222 banner = banners.get(ssh_port, "") ssh_version = banner for sig, os_type in SSH_SIGNATURES.items(): if sig.lower() in banner.lower(): os_guess = os_type break if "ubuntu" in banner.lower(): os_guess = "Ubuntu Linux" elif "debian" in banner.lower(): os_guess = "Debian Linux" elif "centos" in banner.lower() or "el7" in banner.lower() or "el8" in banner.lower(): os_guess = "CentOS/RHEL" remote_methods.append(f"SSH:{ssh_port}") server_type.append("SSH可达") if 3389 in open_ports: server_type.append("Windows Server") os_guess = "Windows" remote_methods.append("RDP:3389") if 5900 in open_ports: server_type.append("VNC远程桌面") remote_methods.append("VNC:5900") if 23 in open_ports: server_type.append("Telnet") remote_methods.append("Telnet:23") if 80 in open_ports or 443 in open_ports: server_type.append("Web服务器") if 8888 in open_ports: server_type.append("宝塔面板") remote_methods.append("BaoTa:8888") # SSH 难易度评估 ssh_banner = "" if 22 in open_ports: ssh_banner = banners.get(22, "") elif 2222 in open_ports: ssh_banner = banners.get(2222, "") ssh_assessment = assess_ssh_difficulty(ssh_banner, open_ports) # 部署评分 deploy_score = 0 if 22 in open_ports or 2222 in open_ports: deploy_score += 50 if os_guess in ("Ubuntu Linux", "Debian Linux", "CentOS/RHEL", "Linux/BSD"): deploy_score += 30 elif os_guess == "Unknown" and (22 in open_ports): deploy_score += 15 if 80 in open_ports or 443 in open_ports: deploy_score += 10 if 3389 in open_ports: deploy_score -= 10 return { "server_types": server_type if server_type else ["未知服务"], "os_guess": os_guess, "ssh_version": ssh_version, "remote_methods": remote_methods, "ssh_difficulty": ssh_assessment, "deploy_score": deploy_score, "deploy_ready": deploy_score >= 50, } # ========== 异步扫描核心 ========== class AsyncPortScanner: def __init__(self, concurrency=8000, timeout=2, banner_timeout=1): self.concurrency = concurrency self.timeout = timeout self.banner_timeout = banner_timeout self.semaphore = None self.total_ips = 0 self.scanned_ips = 0 self.total_open = 0 self.ips_with_open = 0 self.results = {} self.start_time = None self.lock = asyncio.Lock() async def scan_port(self, ip, port): try: async with self.semaphore: reader, writer = await asyncio.wait_for( asyncio.open_connection(ip, port), timeout=self.timeout ) banner = "" try: if port in (80, 8888): writer.write(f"HEAD / HTTP/1.0\r\nHost: {ip}\r\n\r\n".encode()) await writer.drain() data = await asyncio.wait_for(reader.read(512), timeout=self.banner_timeout) banner = data.decode("utf-8", errors="replace").strip()[:200] except: pass writer.close() try: await writer.wait_closed() except: pass return (port, True, banner) except: return (port, False, "") async def scan_ip(self, ip): tasks = [self.scan_port(ip, port) for port in SCAN_PORTS.keys()] results = await asyncio.gather(*tasks, return_exceptions=True) open_ports = {} for result in results: if isinstance(result, Exception): continue port, is_open, banner = result if is_open: open_ports[port] = {"service": SCAN_PORTS[port], "banner": banner, "open": True} async with self.lock: self.scanned_ips += 1 if open_ports: self.ips_with_open += 1 self.total_open += len(open_ports) analysis = classify_server(open_ports) self.results[ip] = { "ip": ip, "open_ports": open_ports, "port_count": len(open_ports), "analysis": analysis, "scan_time": datetime.now().isoformat(), } if self.scanned_ips % 5000 == 0 or (open_ports and self.scanned_ips % 100 == 0): elapsed = time.time() - self.start_time rate = self.scanned_ips / elapsed if elapsed > 0 else 0 remaining = (self.total_ips - self.scanned_ips) / rate if rate > 0 else 0 progress = self.scanned_ips / self.total_ips * 100 status = f"[{progress:5.1f}%] {self.scanned_ips:,}/{self.total_ips:,} | " status += f"发现 {self.ips_with_open:,} 有端口IP ({self.total_open:,} 端口) | " status += f"{rate:.0f} IP/s | 剩余 {remaining/60:.0f}min" print(status, flush=True) async def run(self, ip_list): self.semaphore = asyncio.Semaphore(self.concurrency) self.total_ips = len(ip_list) self.start_time = time.time() print(f"{'=' * 70}") print(f"KR 数据库 IP 全量扫描器 启动") print(f"{'=' * 70}") print(f"目标IP数: {self.total_ips:,}") print(f"扫描端口: {len(SCAN_PORTS)} 个 ({', '.join(f'{p}({n})' for p, n in sorted(SCAN_PORTS.items()))})") print(f"并发: {self.concurrency} | 超时: {self.timeout}s | Banner: {self.banner_timeout}s") print(f"总连接: {self.total_ips * len(SCAN_PORTS):,}") print(f"开始: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") print(f"{'=' * 70}") batch_size = 10000 for i in range(0, len(ip_list), batch_size): batch = ip_list[i:i + batch_size] tasks = [self.scan_ip(ip) for ip in batch] await asyncio.gather(*tasks, return_exceptions=True) elapsed = time.time() - self.start_time print(f"\n{'=' * 70}") print(f"扫描完成! 耗时 {elapsed:.0f}s ({elapsed/60:.1f}min, {elapsed/3600:.1f}h)") print(f"扫描IP: {self.scanned_ips:,} | 有端口: {self.ips_with_open:,} ({self.ips_with_open/max(1,self.scanned_ips)*100:.1f}%)") print(f"总端口: {self.total_open:,} | 速率: {self.scanned_ips/elapsed:.0f} IP/s") print(f"{'=' * 70}") return self.results # ========== MongoDB 导入 ========== def import_to_mongodb(results, ip_source_map, source_db_name="KR"): """将扫描结果导入 MongoDB _已扫描 表""" import pymongo client = pymongo.MongoClient('mongodb://admin:admin123@localhost:27017/?authSource=admin') db = client[source_db_name] # 按来源分组 source_results = defaultdict(list) for ip, data in results.items(): sources = ip_source_map.get(ip, ["unknown"]) for source in sources: doc = { "ip": ip, "source_col": source, "scan_time": data["scan_time"], "port_count": data["port_count"], "open_ports": {str(k): v for k, v in data["open_ports"].items()}, "server_types": data["analysis"]["server_types"], "os_guess": data["analysis"]["os_guess"], "ssh_version": data["analysis"]["ssh_version"], "remote_methods": data["analysis"]["remote_methods"], "ssh_difficulty": data["analysis"]["ssh_difficulty"]["difficulty"], "ssh_difficulty_stars": data["analysis"]["ssh_difficulty"].get("difficulty_stars", ""), "ssh_notes": data["analysis"]["ssh_difficulty"]["notes"], "deploy_score": data["analysis"]["deploy_score"], "deploy_ready": data["analysis"]["deploy_ready"], } source_results[source].append(doc) # 写入各来源的 _已扫描 表 for source, docs in source_results.items(): coll_name = f"{source}_已扫描" coll = db[coll_name] if docs: # 清空旧数据 coll.delete_many({}) coll.insert_many(docs, ordered=False) print(f" {source_db_name}.{coll_name}: 写入 {len(docs):,} 条") # 写入总表 all_docs = [] for docs in source_results.values(): all_docs.extend(docs) total_coll = db["分布式矩阵IP_已扫描"] total_coll.delete_many({}) if all_docs: total_coll.insert_many(all_docs, ordered=False) print(f" {source_db_name}.分布式矩阵IP_已扫描: 写入 {len(all_docs):,} 条") # 创建索引 total_coll.create_index("ip") total_coll.create_index("source_col") total_coll.create_index("ssh_difficulty") total_coll.create_index("deploy_score") return len(all_docs) def import_mumayi_results(mumayi_json_path, source_db_name="KR"): """导入木蚂蚁已扫描结果到统一格式""" import pymongo with open(mumayi_json_path, 'r') as f: data = json.load(f) results = data.get("results", {}) if not results: print("木蚂蚁结果为空") return 0 client = pymongo.MongoClient('mongodb://admin:admin123@localhost:27017/?authSource=admin') db = client[source_db_name] docs = [] for ip, scan_data in results.items(): doc = { "ip": ip, "source_col": "木蚂蚁munayi_com", "scan_time": scan_data.get("scan_time", ""), "port_count": scan_data.get("port_count", 0), "open_ports": {str(k): v for k, v in scan_data.get("open_ports", {}).items()}, "server_types": scan_data.get("analysis", {}).get("server_types", []), "os_guess": scan_data.get("analysis", {}).get("os_guess", "Unknown"), "ssh_version": scan_data.get("analysis", {}).get("ssh_version", ""), "remote_methods": [], "ssh_difficulty": 5, "ssh_difficulty_stars": "☆☆☆☆☆", "ssh_notes": [], "deploy_score": scan_data.get("analysis", {}).get("deploy_score", 0), "deploy_ready": scan_data.get("analysis", {}).get("deploy_ready", False), } # 补充远程方法和SSH难易度 open_ports = set(int(p) for p in scan_data.get("open_ports", {}).keys()) if 22 in open_ports or 2222 in open_ports: ssh_port = 22 if 22 in open_ports else 2222 doc["remote_methods"].append(f"SSH:{ssh_port}") ssh_banner = scan_data.get("open_ports", {}).get(str(ssh_port), {}).get("banner", "") assessment = assess_ssh_difficulty(ssh_banner, open_ports) doc["ssh_difficulty"] = assessment["difficulty"] doc["ssh_difficulty_stars"] = assessment.get("difficulty_stars", "") doc["ssh_notes"] = assessment["notes"] if 3389 in open_ports: doc["remote_methods"].append("RDP:3389") if 5900 in open_ports: doc["remote_methods"].append("VNC:5900") if 23 in open_ports: doc["remote_methods"].append("Telnet:23") if 8888 in open_ports: doc["remote_methods"].append("BaoTa:8888") docs.append(doc) coll = db["木蚂蚁munayi_com_已扫描"] coll.delete_many({}) if docs: coll.insert_many(docs, ordered=False) coll.create_index("ip") coll.create_index("ssh_difficulty") # 也追加到总表 total_coll = db["分布式矩阵IP_已扫描"] total_coll.insert_many(docs, ordered=False) print(f" 木蚂蚁已扫描结果: 写入 {len(docs):,} 条") return len(docs) # ========== 报告生成 ========== def generate_report(results, ip_source_map, output_dir, total_ips): os.makedirs(output_dir, exist_ok=True) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # JSON json_path = os.path.join(output_dir, f"kr_scan_results_{timestamp}.json") with open(json_path, "w", encoding="utf-8") as f: json.dump({"scan_info": {"timestamp": datetime.now().isoformat(), "total_ips": total_ips, "found": len(results)}, "results": results}, f, ensure_ascii=False, indent=2) # 统计 port_stats = defaultdict(int) type_stats = defaultdict(int) source_stats = defaultdict(lambda: {"total": 0, "ssh": 0, "rdp": 0, "vnc": 0}) ssh_difficulty_dist = defaultdict(int) for ip, data in results.items(): for port in data["open_ports"]: port_stats[port] += 1 for t in data["analysis"]["server_types"]: type_stats[t] += 1 diff = data["analysis"]["ssh_difficulty"]["difficulty"] ssh_difficulty_dist[diff] += 1 sources = ip_source_map.get(ip, ["unknown"]) for src in sources: source_stats[src]["total"] += 1 if 22 in data["open_ports"] or 2222 in data["open_ports"]: source_stats[src]["ssh"] += 1 if 3389 in data["open_ports"]: source_stats[src]["rdp"] += 1 if 5900 in data["open_ports"]: source_stats[src]["vnc"] += 1 # Markdown md_path = os.path.join(output_dir, f"kr_全量扫描报告_{timestamp}.md") with open(md_path, "w", encoding="utf-8") as f: f.write(f"# KR 数据库 IP 全量扫描报告\n\n") f.write(f"> 扫描时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n") f.write(f"> 数据来源: KR.分布式矩阵IP(除木蚂蚁外所有来源)\n\n---\n\n") f.write(f"## 一、总览\n\n| 指标 | 数值 |\n|:---|:---|\n") f.write(f"| 目标 IP | {total_ips:,} |\n") f.write(f"| 有端口 IP | **{len(results):,}** ({len(results)/total_ips*100:.1f}%) |\n") ssh_count = sum(1 for d in results.values() if 22 in d["open_ports"] or 2222 in d["open_ports"]) rdp_count = sum(1 for d in results.values() if 3389 in d["open_ports"]) vnc_count = sum(1 for d in results.values() if 5900 in d["open_ports"]) f.write(f"| SSH 可达 | **{ssh_count:,}** |\n") f.write(f"| RDP 可达 | **{rdp_count:,}** |\n") f.write(f"| VNC 可达 | **{vnc_count:,}** |\n\n") f.write(f"## 二、各来源扫描统计\n\n") f.write(f"| 来源 | 有端口IP | SSH | RDP | VNC |\n|:---|:---|:---|:---|:---|\n") for src, stats in sorted(source_stats.items(), key=lambda x: -x[1]["total"]): f.write(f"| {src} | {stats['total']:,} | {stats['ssh']:,} | {stats['rdp']:,} | {stats['vnc']:,} |\n") f.write(f"\n") f.write(f"## 三、SSH 登录难易度分布\n\n") f.write(f"| 难度 | 说明 | 数量 |\n|:---|:---|:---|\n") diff_labels = {1: "★☆☆☆☆ 极易(默认密码/老设备)", 2: "★★☆☆☆ 较易(弱密码/嵌入式)", 3: "★★★☆☆ 中等(标准服务器)", 4: "★★★★☆ 较难(新版本SSH)", 5: "★★★★★ 极难(密钥认证)"} for d in sorted(ssh_difficulty_dist.keys()): f.write(f"| {diff_labels.get(d, str(d))} | | {ssh_difficulty_dist[d]:,} |\n") f.write(f"\n") f.write(f"## 四、端口统计\n\n| 端口 | 服务 | 数量 |\n|:---|:---|:---|\n") for port, count in sorted(port_stats.items(), key=lambda x: -x[1]): f.write(f"| {port} | {SCAN_PORTS.get(port, '?')} | {count:,} |\n") f.write(f"\n") # SSH 可达列表(前200) ssh_ips = [(ip, data) for ip, data in results.items() if 22 in data["open_ports"] or 2222 in data["open_ports"]] ssh_ips.sort(key=lambda x: x[1]["analysis"]["ssh_difficulty"]["difficulty"]) f.write(f"## 五、SSH 可达 IP(按难易度排序,共 {len(ssh_ips)} 个)\n\n") f.write(f"| # | IP | 来源 | SSH端口 | SSH版本 | OS | 难度 | 其他远程 | 说明 |\n") f.write(f"|:---|:---|:---|:---|:---|:---|:---|:---|:---|\n") for i, (ip, data) in enumerate(ssh_ips[:500], 1): sources = ip_source_map.get(ip, ["?"]) src = sources[0] if len(sources) == 1 else f"{sources[0]}+{len(sources)-1}" ssh_port = 22 if 22 in data["open_ports"] else 2222 ssh_ver = data["open_ports"].get(ssh_port, {}).get("banner", "")[:50] os_g = data["analysis"]["os_guess"] diff = data["analysis"]["ssh_difficulty"] stars = diff.get("difficulty_stars", "") remote = ", ".join(m for m in data["analysis"]["remote_methods"] if "SSH" not in m) notes = "; ".join(diff["notes"][:2]) f.write(f"| {i} | `{ip}` | {src} | {ssh_port} | {ssh_ver} | {os_g} | {stars} | {remote} | {notes} |\n") if len(ssh_ips) > 500: f.write(f"| ... | 共 {len(ssh_ips)} 个 | | | | | | | |\n") # SSH 列表文件 ssh_path = os.path.join(output_dir, f"kr_ssh_ips_{timestamp}.txt") with open(ssh_path, "w") as f: for ip, data in ssh_ips: ssh_port = 22 if 22 in data["open_ports"] else 2222 f.write(f"{ip}:{ssh_port}\n") print(f"\n报告: {md_path}") print(f"JSON: {json_path}") print(f"SSH列表: {ssh_path} ({len(ssh_ips)} 个)") return json_path, md_path, ssh_path # ========== 主入口 ========== def main(): parser = argparse.ArgumentParser(description="KR数据库IP全量扫描器") parser.add_argument("--input", "-i", default="/tmp/kr_new_ips.txt") parser.add_argument("--source-map", default="/tmp/kr_ip_source_map.json") parser.add_argument("--concurrency", "-c", type=int, default=8000) parser.add_argument("--timeout", "-t", type=float, default=2) parser.add_argument("--banner-timeout", type=float, default=1) parser.add_argument("--output", "-o", default="/Users/karuo/Documents/1、金:项目/3、自营项目/分布式算力矩阵/01_扫描模块/references") parser.add_argument("--limit", type=int, default=0) parser.add_argument("--skip-mongodb", action="store_true", help="跳过MongoDB导入") parser.add_argument("--mumayi-json", default="", help="木蚂蚁扫描结果JSON路径") args = parser.parse_args() # 读取 IP with open(args.input, "r") as f: ip_list = [line.strip() for line in f if line.strip()] # 读取来源映射 with open(args.source_map, "r") as f: ip_source_map = json.load(f) if args.limit > 0: ip_list = ip_list[:args.limit] print(f"加载 {len(ip_list):,} 个IP, 来源映射 {len(ip_source_map):,} 条") # 提升文件描述符 try: import resource soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) target = min(args.concurrency * 2 + 2000, hard) resource.setrlimit(resource.RLIMIT_NOFILE, (target, hard)) print(f"文件描述符: {soft} → {target}") except: pass # 执行扫描 scanner = AsyncPortScanner( concurrency=args.concurrency, timeout=args.timeout, banner_timeout=args.banner_timeout, ) results = asyncio.run(scanner.run(ip_list)) if results: # 生成报告 json_path, md_path, ssh_path = generate_report(results, ip_source_map, args.output, len(ip_list)) # 导入MongoDB if not args.skip_mongodb: print("\n导入 MongoDB...") count = import_to_mongodb(results, ip_source_map) print(f"MongoDB 导入完成: {count:,} 条") # 导入木蚂蚁结果 if args.mumayi_json and os.path.exists(args.mumayi_json): print("\n导入木蚂蚁已扫描结果...") mumayi_count = import_mumayi_results(args.mumayi_json) print(f"木蚂蚁导入: {mumayi_count:,} 条") else: print("\n未发现任何开放端口") if __name__ == "__main__": main()