#!/usr/bin/env python3 """ 分布式算力矩阵 — 深度验证扫描器 v2.0 ======================================== 解决的核心问题: 当前扫描器只做 TCP connect,导致大量 CGNAT/蜜罐/中间件误报。 本版增加「服务验证层」,确保每个标记为 open 的端口都经过协议握手验证。 验证策略: SSH → 必须收到 "SSH-" 开头的 banner HTTP → 必须收到 "HTTP/" 响应 RDP → 必须收到 RDP Negotiation Response (0x03) VNC → 必须收到 "RFB " 协议头 Telnet → 必须收到 IAC 命令 (0xff) 或可打印提示 BaoTa → HTTP 响应中含宝塔特征 SSH-Alt(2222) → 同 SSH 流水线: Phase 1: TCP Connect 快速扫描(高并发,粗筛) Phase 2: 协议验证(中并发,精筛) Phase 3: 质量评分 + 蜜罐检测 + 分类 用法: python3 verified_scan.py --input /tmp/target_ips.txt --concurrency 5000 python3 verified_scan.py --mongo-source KR --collection 分布式矩阵IP_已扫描 --reverify """ import asyncio import json import time import sys import os import struct import argparse from datetime import datetime from collections import defaultdict, Counter # ========== 配置 ========== SCAN_PORTS = { 22: "SSH", 2222: "SSH-Alt", 23: "Telnet", 80: "HTTP", 443: "HTTPS", 3389: "RDP", 5900: "VNC", 8888: "BaoTa", } # 每个端口的验证规则 PORT_VALIDATORS = { 22: "ssh", 2222: "ssh", 23: "telnet", 80: "http", 443: "https", 3389: "rdp", 5900: "vnc", 8888: "http_baota", } SSH_SIGNATURES = { "OpenSSH": "Linux/BSD", "dropbear": "嵌入式/路由器", "ROSSSH": "MikroTik路由器", "Cisco": "Cisco设备", "Comware": "H3C交换机", "HUAWEI": "华为设备", "RGOS": "锐捷设备", "NTOS": "网御设备", "libssh": "自定义SSH", "Serv-U": "Windows FTP/SSH", "WeOnlyDo": "Windows SSH", "SSH-2.0-Go": "Go应用", "SSH-2.0--": "隐藏版本", } OWN_IPS = { "42.194.232.22", "42.194.245.239", "43.139.27.93", "140.245.37.56", "119.233.228.177", } # ========== 协议验证器 ========== class ProtocolValidator: """协议层验证 — 确保端口上真正运行着对应的服务""" @staticmethod async def validate_ssh(reader, writer, ip, port, timeout=3): """SSH: 必须在 timeout 内收到 'SSH-' 开头的 banner""" try: data = await asyncio.wait_for(reader.read(256), timeout=timeout) banner = data.decode("utf-8", errors="replace").strip() if banner.startswith("SSH-") or banner.startswith("Exceeded"): return True, banner[:200], "ssh_verified" # 某些 SSH 服务器发送前需要等一下 if "ssh" in banner.lower() or "SSH" in banner: return True, banner[:200], "ssh_partial" return False, banner[:100], "not_ssh" except asyncio.TimeoutError: return False, "", "ssh_timeout" except Exception as e: return False, "", f"ssh_error:{type(e).__name__}" @staticmethod async def validate_http(reader, writer, ip, port, timeout=3): """HTTP: 发 HEAD 请求,必须收到 'HTTP/' 响应""" try: request = f"HEAD / HTTP/1.1\r\nHost: {ip}\r\nConnection: close\r\nUser-Agent: Mozilla/5.0\r\n\r\n" writer.write(request.encode()) await writer.drain() data = await asyncio.wait_for(reader.read(1024), timeout=timeout) response = data.decode("utf-8", errors="replace").strip() if response.startswith("HTTP/"): # 提取状态码 parts = response.split(None, 2) status_code = int(parts[1]) if len(parts) >= 2 else 0 # 提取 Server header server = "" for line in response.split("\r\n"): if line.lower().startswith("server:"): server = line.split(":", 1)[1].strip()[:100] break return True, f"HTTP {status_code} | {server}", "http_verified" if "= 5 and data[0] == 0x16: # TLS Handshake return True, "TLS/SSL service", "https_verified" if len(data) >= 5 and data[0] == 0x15: # TLS Alert (still TLS) return True, "TLS/SSL (alert)", "https_alert" return False, f"non-tls({len(data)}b)", "not_https" except asyncio.TimeoutError: return False, "", "https_timeout" except Exception as e: return False, "", f"https_error:{type(e).__name__}" @staticmethod async def validate_rdp(reader, writer, ip, port, timeout=3): """RDP: 发送 Connection Request, 检查是否有 Confirm 回复""" try: # X.224 Connection Request rdp_neg = bytes([ 0x03, 0x00, 0x00, 0x13, # TPKT: version=3, length=19 0x0e, # X.224: length=14 0xe0, # CR (Connection Request) 0x00, 0x00, # dst-ref 0x00, 0x00, # src-ref 0x00, # class 0 0x01, # RDP Negotiation Request 0x00, # flags 0x08, 0x00, 0x00, 0x00, # length=8 0x00, 0x00, 0x00, 0x00, # requested protocols (standard RDP) ]) # 修正长度 rdp_neg = bytes([0x03, 0x00, 0x00, len(rdp_neg)]) + rdp_neg[4:] writer.write(rdp_neg) await writer.drain() data = await asyncio.wait_for(reader.read(256), timeout=timeout) if len(data) >= 4 and data[0] == 0x03: # TPKT header return True, "RDP service", "rdp_verified" return False, f"non-rdp({len(data)}b)", "not_rdp" except asyncio.TimeoutError: return False, "", "rdp_timeout" except Exception as e: return False, "", f"rdp_error:{type(e).__name__}" @staticmethod async def validate_vnc(reader, writer, ip, port, timeout=3): """VNC: 必须收到 'RFB ' 开头的协议版本""" try: data = await asyncio.wait_for(reader.read(256), timeout=timeout) text = data.decode("utf-8", errors="replace").strip() if text.startswith("RFB "): return True, text[:50], "vnc_verified" return False, text[:50], "not_vnc" except asyncio.TimeoutError: return False, "", "vnc_timeout" except Exception as e: return False, "", f"vnc_error:{type(e).__name__}" @staticmethod async def validate_telnet(reader, writer, ip, port, timeout=3): """Telnet: 必须收到 IAC 命令(0xFF) 或可读的登录提示""" try: data = await asyncio.wait_for(reader.read(512), timeout=timeout) if data and data[0] == 0xff: # IAC command return True, "Telnet IAC", "telnet_verified" text = data.decode("utf-8", errors="replace").strip() telnet_keywords = ["login", "username", "password", "welcome", "user name", "press enter", "cisco", "mikrotik", "huawei", "h3c", "zte", "console"] if any(kw in text.lower() for kw in telnet_keywords): return True, text[:100], "telnet_prompt" if len(text) > 5: # 有一些数据回来,可能是 telnet return True, text[:100], "telnet_data" return False, text[:50], "not_telnet" except asyncio.TimeoutError: return False, "", "telnet_timeout" except Exception as e: return False, "", f"telnet_error:{type(e).__name__}" @staticmethod async def validate_http_baota(reader, writer, ip, port, timeout=3): """宝塔面板: HTTP 响应中含宝塔特征""" try: request = f"GET / HTTP/1.1\r\nHost: {ip}:8888\r\nConnection: close\r\nUser-Agent: Mozilla/5.0\r\n\r\n" writer.write(request.encode()) await writer.drain() data = await asyncio.wait_for(reader.read(4096), timeout=timeout) response = data.decode("utf-8", errors="replace") # 宝塔面板特征 baota_signs = ["宝塔", "bt.cn", "btpanel", "baota", "aapanel", "安全入口", "/login", "BTPanel"] if response.startswith("HTTP/"): for sign in baota_signs: if sign.lower() in response.lower(): return True, f"BaoTa Panel", "baota_verified" # 不是宝塔但是有效HTTP parts = response.split(None, 2) status = int(parts[1]) if len(parts) >= 2 else 0 return True, f"HTTP {status} (non-BaoTa)", "http_not_baota" if "=60 判定蜜罐 verified_set = set(verified_ports.keys()) tcp_set = set(all_tcp_open) # 1. TCP连接数 vs 验证通过数比例 if len(tcp_set) >= 5 and len(verified_set) <= 1: score += 50 reasons.append(f"TCP全通({len(tcp_set)}端口)但验证仅{len(verified_set)}个通过") # 2. 8端口全开特征 (CGNAT/蜜罐) cgnat_combo = {22, 23, 80, 443, 2222, 3389, 5900, 8888} if tcp_set >= cgnat_combo: score += 30 reasons.append("8端口全开(CGNAT/蜜罐特征)") # 3. 所有远程方式都开放 remote_ports = {22, 23, 3389, 5900} if tcp_set >= remote_ports and len(verified_set & remote_ports) < 2: score += 20 reasons.append("所有远程端口TCP通但验证不过") # 4. 没有任何有效banner has_any_banner = any( v.get("banner", "") for v in verified_ports.values() if v.get("verified") ) if len(tcp_set) >= 5 and not has_any_banner: score += 20 reasons.append("大量端口无任何banner") is_honeypot = score >= 60 return is_honeypot, score, reasons # ========== 深度验证扫描器 ========== class VerifiedScanner: """两阶段扫描: TCP快筛 → 协议验证""" def __init__(self, concurrency=5000, tcp_timeout=2, verify_timeout=3, banner_timeout=2, verify_concurrency=2000): self.concurrency = concurrency # Phase1 TCP并发 self.tcp_timeout = tcp_timeout self.verify_timeout = verify_timeout # Phase2 验证超时 self.banner_timeout = banner_timeout self.verify_concurrency = verify_concurrency # Phase2 并发 self.total_ips = 0 self.scanned_ips = 0 self.tcp_open_count = 0 self.verified_count = 0 self.honeypot_count = 0 self.results = {} self.start_time = None self.phase = 1 self.lock = asyncio.Lock() # --- Phase 1: TCP Connect --- async def tcp_check(self, ip, port, semaphore): """纯TCP连接检查,不读banner""" try: async with semaphore: _, writer = await asyncio.wait_for( asyncio.open_connection(ip, port), timeout=self.tcp_timeout ) writer.close() try: await writer.wait_closed() except: pass return (port, True) except: return (port, False) async def phase1_scan_ip(self, ip, semaphore): """Phase1: 对一个IP做TCP快扫""" tasks = [self.tcp_check(ip, port, semaphore) for port in SCAN_PORTS] results = await asyncio.gather(*tasks, return_exceptions=True) tcp_open = [] for r in results: if isinstance(r, Exception): continue port, is_open = r if is_open: tcp_open.append(port) async with self.lock: self.scanned_ips += 1 if tcp_open: self.tcp_open_count += 1 self.results[ip] = {"ip": ip, "tcp_open": tcp_open, "verified": {}} if self.scanned_ips % 10000 == 0: self._print_progress() # --- Phase 2: 协议验证 --- async def verify_port(self, ip, port, semaphore): """Phase2: 对一个 IP:Port 做协议层验证""" validator_name = PORT_VALIDATORS.get(port) if not validator_name: return port, False, "", "no_validator" try: async with semaphore: reader, writer = await asyncio.wait_for( asyncio.open_connection(ip, port), timeout=self.verify_timeout ) try: validator = getattr(ProtocolValidator, f"validate_{validator_name}") verified, banner, detail = await validator(reader, writer, ip, port, timeout=self.banner_timeout) return port, verified, banner, detail finally: writer.close() try: await writer.wait_closed() except: pass except asyncio.TimeoutError: return port, False, "", "connect_timeout" except ConnectionRefusedError: return port, False, "", "connect_refused" except Exception as e: return port, False, "", f"connect_error:{type(e).__name__}" async def phase2_verify_ip(self, ip, semaphore): """Phase2: 对一个IP的所有TCP开放端口做验证""" ip_data = self.results.get(ip) if not ip_data: return tcp_open = ip_data["tcp_open"] tasks = [self.verify_port(ip, port, semaphore) for port in tcp_open] results = await asyncio.gather(*tasks, return_exceptions=True) verified_ports = {} for r in results: if isinstance(r, Exception): continue port, is_verified, banner, detail = r port_info = { "service": SCAN_PORTS.get(port, "unknown"), "tcp_open": True, "verified": is_verified, "banner": banner, "verify_detail": detail, } if is_verified: verified_ports[port] = port_info ip_data["verified"][port] = port_info # 蜜罐检测 is_honeypot, hp_score, hp_reasons = detect_honeypot( {p: v for p, v in ip_data["verified"].items() if v["verified"]}, tcp_open ) # SSH评估 ssh_banner = "" ssh_port = None for p in [22, 2222]: if p in verified_ports and verified_ports[p]["verified"]: ssh_banner = verified_ports[p]["banner"] ssh_port = p break ssh_info = assess_ssh(ssh_banner, set(verified_ports.keys())) # 汇总 verified_list = [p for p, v in ip_data["verified"].items() if v["verified"]] verified_count = len(verified_list) # 宝塔检测 baota_detail = ip_data["verified"].get(8888, {}).get("verify_detail", "") baota_ok = baota_detail.startswith("baota") # 连接质量 (先计算,再写入) tcp_cnt = len(tcp_open) quality = int(verified_count / max(1, tcp_cnt) * 100) if is_honeypot: quality = max(0, quality - 50) ip_data.update({ "verified_port_list": sorted(verified_list), "verified_count": verified_count, "tcp_open_count": tcp_cnt, "is_honeypot": is_honeypot, "honeypot_score": hp_score, "honeypot_reasons": hp_reasons, "ssh_open": ssh_port is not None, "ssh_port": ssh_port, "ssh_banner": ssh_banner, "ssh_difficulty": ssh_info["difficulty"], "ssh_difficulty_stars": ssh_info["difficulty_stars"], "os_guess": ssh_info["os_guess"], "ssh_notes": ssh_info["notes"], "deploy_score": ssh_info["deploy_score"], "deploy_ready": ssh_info["deploy_score"] >= 50 and not is_honeypot, "rdp_verified": 3389 in verified_ports, "vnc_verified": 5900 in verified_ports, "telnet_verified": 23 in verified_ports, "http_verified": 80 in verified_ports, "https_verified": 443 in verified_ports, "baota_verified": baota_ok, "scan_time": datetime.now().isoformat(), "connection_quality": quality, }) async with self.lock: self.verified_count += 1 if is_honeypot: self.honeypot_count += 1 if self.verified_count % 5000 == 0: self._print_progress() def _calc_quality(self, ip_data): """连接质量评分 0-100""" tcp_open = len(ip_data.get("tcp_open", [])) verified = ip_data.get("verified_count", 0) if tcp_open == 0: return 0 ratio = verified / tcp_open # 全部验证通过 = 100 # 部分通过 = 按比例 # TCP全开但验证0个 = 0 quality = int(ratio * 100) # 蜜罐扣分 if ip_data.get("is_honeypot"): quality = max(0, quality - 50) return quality # --- 主执行 --- async def run(self, ip_list): """执行两阶段扫描""" self.total_ips = len(ip_list) self.start_time = time.time() print(f"{'='*70}") print(f"分布式算力矩阵 — 深度验证扫描器 v2.0") print(f"{'='*70}") print(f"目标IP: {self.total_ips:,}") print(f"扫描端口: {len(SCAN_PORTS)} ({', '.join(f'{p}({n})' for p,n in sorted(SCAN_PORTS.items()))})") print(f"Phase1 并发: {self.concurrency} | TCP超时: {self.tcp_timeout}s") print(f"Phase2 并发: {self.verify_concurrency} | 验证超时: {self.verify_timeout}s") print(f"开始: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") print(f"{'='*70}") # === Phase 1: TCP Connect === self.phase = 1 print(f"\n[Phase 1] TCP Connect 快速扫描 ({self.total_ips:,} IPs × {len(SCAN_PORTS)} ports)...") semaphore1 = asyncio.Semaphore(self.concurrency) batch_size = 10000 for i in range(0, len(ip_list), batch_size): batch = ip_list[i:i + batch_size] tasks = [self.phase1_scan_ip(ip, semaphore1) for ip in batch] await asyncio.gather(*tasks, return_exceptions=True) phase1_time = time.time() - self.start_time print(f"\n[Phase 1 完成] {phase1_time:.0f}s | " f"TCP开放: {self.tcp_open_count:,}/{self.total_ips:,} " f"({self.tcp_open_count/max(1,self.total_ips)*100:.1f}%)") # === Phase 2: 协议验证 === self.phase = 2 ips_to_verify = [ip for ip, d in self.results.items() if d.get("tcp_open")] print(f"\n[Phase 2] 协议验证 ({len(ips_to_verify):,} IPs)...") semaphore2 = asyncio.Semaphore(self.verify_concurrency) for i in range(0, len(ips_to_verify), batch_size): batch = ips_to_verify[i:i + batch_size] tasks = [self.phase2_verify_ip(ip, semaphore2) for ip in batch] await asyncio.gather(*tasks, return_exceptions=True) total_time = time.time() - self.start_time # === 统计 === real_open = sum(1 for d in self.results.values() if d.get("verified_count", 0) > 0 and not d.get("is_honeypot")) ssh_real = sum(1 for d in self.results.values() if d.get("ssh_open") and not d.get("is_honeypot")) print(f"\n{'='*70}") print(f"扫描完成! 总耗时 {total_time:.0f}s ({total_time/60:.1f}min)") print(f"{'='*70}") print(f"TCP开放IP: {self.tcp_open_count:,}") print(f"验证通过IP: {real_open:,} (去蜜罐后)") print(f"蜜罐/CGNAT: {self.honeypot_count:,}") print(f"真实SSH: {ssh_real:,}") print(f"{'='*70}") return self.results def _print_progress(self): elapsed = time.time() - self.start_time if self.phase == 1: rate = self.scanned_ips / max(1, elapsed) remaining = (self.total_ips - self.scanned_ips) / max(1, rate) pct = self.scanned_ips / max(1, self.total_ips) * 100 print(f" [P1 {pct:5.1f}%] {self.scanned_ips:,}/{self.total_ips:,} | " f"TCP开放: {self.tcp_open_count:,} | " f"{rate:.0f} IP/s | 剩余 {remaining/60:.0f}min", flush=True) else: total_verify = len([ip for ip in self.results if self.results[ip].get("tcp_open")]) pct = self.verified_count / max(1, total_verify) * 100 print(f" [P2 {pct:5.1f}%] {self.verified_count:,}/{total_verify:,} | " f"蜜罐: {self.honeypot_count:,}", flush=True) # ========== MongoDB 导入/导出 ========== def import_to_mongodb(results, ip_source_map=None, db_name="KR", collection_name="分布式矩阵IP_已验证"): """将验证结果导入 MongoDB""" import pymongo client = pymongo.MongoClient('mongodb://admin:admin123@localhost:27017/?authSource=admin') db = client[db_name] coll = db[collection_name] docs = [] for ip, data in results.items(): if data.get("verified_count", 0) == 0 and not data.get("tcp_open"): continue # 完全没有端口的跳过 source = "unknown" if ip_source_map: sources = ip_source_map.get(ip, ["unknown"]) source = sources[0] if isinstance(sources, list) else sources doc = { "ip": ip, "source_col": source, "scan_time": data.get("scan_time", datetime.now().isoformat()), # TCP层 "tcp_open_ports": sorted(data.get("tcp_open", [])), "tcp_open_count": len(data.get("tcp_open", [])), # 验证层 "verified_ports": sorted(data.get("verified_port_list", [])), "verified_count": data.get("verified_count", 0), "port_details": data.get("verified", {}), # 蜜罐 "is_honeypot": data.get("is_honeypot", False), "honeypot_score": data.get("honeypot_score", 0), "honeypot_reasons": data.get("honeypot_reasons", []), # SSH "ssh_open": data.get("ssh_open", False), "ssh_port": data.get("ssh_port"), "ssh_banner": data.get("ssh_banner", ""), "ssh_difficulty": data.get("ssh_difficulty", 5), "ssh_difficulty_stars": data.get("ssh_difficulty_stars", ""), "os_guess": data.get("os_guess", "Unknown"), "ssh_notes": data.get("ssh_notes", []), # 其他远程 "rdp_verified": data.get("rdp_verified", False), "vnc_verified": data.get("vnc_verified", False), "telnet_verified": data.get("telnet_verified", False), "http_verified": data.get("http_verified", False), "https_verified": data.get("https_verified", False), "baota_verified": data.get("baota_verified", False), # 评分 "deploy_score": data.get("deploy_score", 0), "deploy_ready": data.get("deploy_ready", False), "connection_quality": data.get("connection_quality", 0), } docs.append(doc) if docs: coll.delete_many({}) # 分批写入 batch = 5000 for i in range(0, len(docs), batch): coll.insert_many(docs[i:i+batch], ordered=False) # 索引 coll.create_index("ip") coll.create_index("ssh_open") coll.create_index("deploy_score") coll.create_index("is_honeypot") coll.create_index("connection_quality") coll.create_index("source_col") print(f"MongoDB {db_name}.{collection_name}: 写入 {len(docs):,} 条") return len(docs) def load_from_mongodb_existing(db_name="KR", collection_name="分布式矩阵IP_已扫描"): """从已有的 _已扫描 表加载IP列表,用于二次验证""" import pymongo client = pymongo.MongoClient('mongodb://admin:admin123@localhost:27017/?authSource=admin') db = client[db_name] coll = db[collection_name] ip_list = [] ip_source_map = {} for doc in coll.find({}, {"ip": 1, "source_col": 1}): ip = doc["ip"] if ip not in ip_source_map: ip_list.append(ip) ip_source_map[ip] = doc.get("source_col", "unknown") return ip_list, ip_source_map # ========== 报告 ========== def generate_report(results, output_dir, total_ips): """生成验证扫描报告""" os.makedirs(output_dir, exist_ok=True) ts = datetime.now().strftime("%Y%m%d_%H%M%S") # 统计 stats = { "total_scanned": total_ips, "tcp_open": sum(1 for d in results.values() if d.get("tcp_open")), "verified": sum(1 for d in results.values() if d.get("verified_count", 0) > 0), "honeypot": sum(1 for d in results.values() if d.get("is_honeypot")), "ssh_real": sum(1 for d in results.values() if d.get("ssh_open") and not d.get("is_honeypot")), "deploy_ready": sum(1 for d in results.values() if d.get("deploy_ready")), } # Markdown md_path = os.path.join(output_dir, f"深度验证扫描报告_{ts}.md") with open(md_path, "w", encoding="utf-8") as f: f.write(f"# 深度验证扫描报告\n\n") f.write(f"> 时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n") f.write(f"> 扫描器: verified_scan.py v2.0(两阶段:TCP+协议验证)\n\n") f.write(f"## 总览\n\n| 指标 | 数值 |\n|:---|:---|\n") f.write(f"| 扫描总IP | {stats['total_scanned']:,} |\n") f.write(f"| TCP连接成功 | {stats['tcp_open']:,} |\n") f.write(f"| **协议验证通过** | **{stats['verified']:,}** |\n") f.write(f"| 蜜罐/CGNAT排除 | {stats['honeypot']:,} |\n") f.write(f"| **真实SSH** | **{stats['ssh_real']:,}** |\n") f.write(f"| **可部署节点** | **{stats['deploy_ready']:,}** |\n\n") # 误报率 if stats['tcp_open'] > 0: false_positive = (stats['tcp_open'] - stats['verified']) / stats['tcp_open'] * 100 f.write(f"**误报率**: {false_positive:.1f}% " f"({stats['tcp_open'] - stats['verified']:,} TCP假阳性被排除)\n\n") # SSH列表(去蜜罐) ssh_ips = [(ip, d) for ip, d in results.items() if d.get("ssh_open") and not d.get("is_honeypot")] ssh_ips.sort(key=lambda x: -x[1].get("deploy_score", 0)) f.write(f"## 真实SSH IP ({len(ssh_ips)} 个)\n\n") f.write(f"| # | IP | 端口 | OS | Banner | 难度 | 质量 | 部署分 |\n") f.write(f"|:---|:---|:---|:---|:---|:---|:---|:---|\n") for i, (ip, d) in enumerate(ssh_ips[:200], 1): f.write(f"| {i} | `{ip}` | {d.get('ssh_port',22)} " f"| {d.get('os_guess','-')} " f"| {d.get('ssh_banner','')[:50]} " f"| {d.get('ssh_difficulty_stars','-')} " f"| {d.get('connection_quality',0)} " f"| {d.get('deploy_score',0)} |\n") if len(ssh_ips) > 200: f.write(f"| ... | 共{len(ssh_ips)}个 | | | | | | |\n") # SSH列表文件 ssh_path = os.path.join(output_dir, f"verified_ssh_ips_{ts}.txt") with open(ssh_path, "w") as f: for ip, d in ssh_ips: f.write(f"{ip}:{d.get('ssh_port', 22)}\n") # JSON json_path = os.path.join(output_dir, f"verified_scan_{ts}.json") with open(json_path, "w", encoding="utf-8") as f: json.dump({"stats": stats, "results": { ip: {k: v for k, v in d.items() if k != "verified"} for ip, d in results.items() if d.get("verified_count", 0) > 0 }}, f, ensure_ascii=False, indent=2) print(f"\n报告: {md_path}") print(f"SSH列表: {ssh_path} ({len(ssh_ips)} 个)") print(f"JSON: {json_path}") return md_path # ========== 主入口 ========== def main(): parser = argparse.ArgumentParser(description="分布式算力矩阵 深度验证扫描器 v2.0") parser.add_argument("--input", "-i", help="IP列表文件路径") parser.add_argument("--mongo-source", help="从MongoDB加载IP (数据库名)") parser.add_argument("--collection", default="分布式矩阵IP_已扫描", help="MongoDB集合名 (默认: 分布式矩阵IP_已扫描)") parser.add_argument("--reverify", action="store_true", help="对已扫描表做二次协议验证") parser.add_argument("--concurrency", "-c", type=int, default=5000, help="Phase1 TCP并发 (默认5000)") parser.add_argument("--verify-concurrency", type=int, default=2000, help="Phase2 验证并发 (默认2000)") parser.add_argument("--tcp-timeout", type=float, default=2, help="TCP连接超时 (默认2s)") parser.add_argument("--verify-timeout", type=float, default=3, help="协议验证超时 (默认3s)") parser.add_argument("--banner-timeout", type=float, default=2, help="Banner读取超时 (默认2s)") parser.add_argument("--output", "-o", default="/Users/karuo/Documents/1、金:项目/3、自营项目/分布式算力矩阵/01_扫描模块/references") parser.add_argument("--limit", type=int, default=0) parser.add_argument("--skip-mongodb", action="store_true") parser.add_argument("--source-map", help="IP→来源JSON映射文件") args = parser.parse_args() # 加载 IP ip_source_map = {} if args.reverify or args.mongo_source: db_name = args.mongo_source or "KR" print(f"从 MongoDB {db_name}.{args.collection} 加载IP...") ip_list, ip_source_map = load_from_mongodb_existing(db_name, args.collection) print(f" 加载 {len(ip_list):,} 个IP") elif args.input: with open(args.input, "r") as f: ip_list = [l.strip() for l in f if l.strip()] if args.source_map: with open(args.source_map, "r") as f: ip_source_map = json.load(f) else: print("错误: 需要 --input 或 --mongo-source 或 --reverify") sys.exit(1) # 排除自有IP ip_list = [ip for ip in ip_list if ip not in OWN_IPS] if args.limit > 0: ip_list = ip_list[:args.limit] print(f"待扫描: {len(ip_list):,} IPs") # 提升文件描述符 try: import resource soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) target = min(args.concurrency * 2 + 2000, hard) resource.setrlimit(resource.RLIMIT_NOFILE, (target, hard)) print(f"文件描述符: {soft} → {target}") except: pass # 执行 scanner = VerifiedScanner( concurrency=args.concurrency, tcp_timeout=args.tcp_timeout, verify_timeout=args.verify_timeout, banner_timeout=args.banner_timeout, verify_concurrency=args.verify_concurrency, ) results = asyncio.run(scanner.run(ip_list)) if results: generate_report(results, args.output, len(ip_list)) if not args.skip_mongodb: print("\n导入 MongoDB...") count = import_to_mongodb(results, ip_source_map) print(f"完成: {count:,} 条") else: print("无结果") if __name__ == "__main__": main()