Files
suanli-juzhen/01_扫描模块/scripts/verified_scan.py
卡若 048cc32afc 🎯 初始提交:分布式算力矩阵 v1.0
- 6 大模块:扫描/账号管理/节点部署/暴力破解/算力调度/监控运维
- SKILL 总控 + 子模块 SKILL
- 排除大文件(>5MB)与敏感凭证

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-15 22:46:54 +08:00

907 lines
36 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
分布式算力矩阵 — 深度验证扫描器 v2.0
========================================
解决的核心问题:
当前扫描器只做 TCP connect导致大量 CGNAT/蜜罐/中间件误报。
本版增加「服务验证层」,确保每个标记为 open 的端口都经过协议握手验证。
验证策略:
SSH → 必须收到 "SSH-" 开头的 banner
HTTP → 必须收到 "HTTP/" 响应
RDP → 必须收到 RDP Negotiation Response (0x03)
VNC → 必须收到 "RFB " 协议头
Telnet → 必须收到 IAC 命令 (0xff) 或可打印提示
BaoTa → HTTP 响应中含宝塔特征
SSH-Alt(2222) → 同 SSH
流水线:
Phase 1: TCP Connect 快速扫描(高并发,粗筛)
Phase 2: 协议验证(中并发,精筛)
Phase 3: 质量评分 + 蜜罐检测 + 分类
用法:
python3 verified_scan.py --input /tmp/target_ips.txt --concurrency 5000
python3 verified_scan.py --mongo-source KR --collection 分布式矩阵IP_已扫描 --reverify
"""
import asyncio
import json
import time
import sys
import os
import struct
import argparse
from datetime import datetime
from collections import defaultdict, Counter
# ========== 配置 ==========
SCAN_PORTS = {
22: "SSH",
2222: "SSH-Alt",
23: "Telnet",
80: "HTTP",
443: "HTTPS",
3389: "RDP",
5900: "VNC",
8888: "BaoTa",
}
# 每个端口的验证规则
PORT_VALIDATORS = {
22: "ssh",
2222: "ssh",
23: "telnet",
80: "http",
443: "https",
3389: "rdp",
5900: "vnc",
8888: "http_baota",
}
SSH_SIGNATURES = {
"OpenSSH": "Linux/BSD",
"dropbear": "嵌入式/路由器",
"ROSSSH": "MikroTik路由器",
"Cisco": "Cisco设备",
"Comware": "H3C交换机",
"HUAWEI": "华为设备",
"RGOS": "锐捷设备",
"NTOS": "网御设备",
"libssh": "自定义SSH",
"Serv-U": "Windows FTP/SSH",
"WeOnlyDo": "Windows SSH",
"SSH-2.0-Go": "Go应用",
"SSH-2.0--": "隐藏版本",
}
OWN_IPS = {
"42.194.232.22", "42.194.245.239", "43.139.27.93",
"140.245.37.56", "119.233.228.177",
}
# ========== 协议验证器 ==========
class ProtocolValidator:
"""协议层验证 — 确保端口上真正运行着对应的服务"""
@staticmethod
async def validate_ssh(reader, writer, ip, port, timeout=3):
"""SSH: 必须在 timeout 内收到 'SSH-' 开头的 banner"""
try:
data = await asyncio.wait_for(reader.read(256), timeout=timeout)
banner = data.decode("utf-8", errors="replace").strip()
if banner.startswith("SSH-") or banner.startswith("Exceeded"):
return True, banner[:200], "ssh_verified"
# 某些 SSH 服务器发送前需要等一下
if "ssh" in banner.lower() or "SSH" in banner:
return True, banner[:200], "ssh_partial"
return False, banner[:100], "not_ssh"
except asyncio.TimeoutError:
return False, "", "ssh_timeout"
except Exception as e:
return False, "", f"ssh_error:{type(e).__name__}"
@staticmethod
async def validate_http(reader, writer, ip, port, timeout=3):
"""HTTP: 发 HEAD 请求,必须收到 'HTTP/' 响应"""
try:
request = f"HEAD / HTTP/1.1\r\nHost: {ip}\r\nConnection: close\r\nUser-Agent: Mozilla/5.0\r\n\r\n"
writer.write(request.encode())
await writer.drain()
data = await asyncio.wait_for(reader.read(1024), timeout=timeout)
response = data.decode("utf-8", errors="replace").strip()
if response.startswith("HTTP/"):
# 提取状态码
parts = response.split(None, 2)
status_code = int(parts[1]) if len(parts) >= 2 else 0
# 提取 Server header
server = ""
for line in response.split("\r\n"):
if line.lower().startswith("server:"):
server = line.split(":", 1)[1].strip()[:100]
break
return True, f"HTTP {status_code} | {server}", "http_verified"
if "<html" in response.lower() or "<HTML" in response:
return True, "HTML response (no HTTP header)", "http_html_only"
return False, response[:80], "not_http"
except asyncio.TimeoutError:
return False, "", "http_timeout"
except Exception as e:
return False, "", f"http_error:{type(e).__name__}"
@staticmethod
async def validate_https(reader, writer, ip, port, timeout=3):
"""HTTPS: 尝试 TLS ClientHello看是否有 ServerHello 回复"""
try:
# 简化: 发送 TLS ClientHello 的最小包
# TLS record: ContentType=22(Handshake), Version=0x0301(TLS1.0)
client_hello = bytes([
0x16, 0x03, 0x01, 0x00, 0xc8, # TLS record header
0x01, 0x00, 0x00, 0xc4, # Handshake: ClientHello
0x03, 0x03, # Version TLS 1.2
]) + os.urandom(32) + bytes([ # Random
0x00, # Session ID length = 0
0x00, 0x04, # Cipher suites length
0x00, 0x2f, # TLS_RSA_WITH_AES_128_CBC_SHA
0x00, 0xff, # TLS_EMPTY_RENEGOTIATION_INFO
0x01, 0x00, # Compression: null
0x00, 0x97, # Extensions length
]) + bytes(0x97) # Extensions padding
writer.write(client_hello)
await writer.drain()
data = await asyncio.wait_for(reader.read(256), timeout=timeout)
if len(data) >= 5 and data[0] == 0x16: # TLS Handshake
return True, "TLS/SSL service", "https_verified"
if len(data) >= 5 and data[0] == 0x15: # TLS Alert (still TLS)
return True, "TLS/SSL (alert)", "https_alert"
return False, f"non-tls({len(data)}b)", "not_https"
except asyncio.TimeoutError:
return False, "", "https_timeout"
except Exception as e:
return False, "", f"https_error:{type(e).__name__}"
@staticmethod
async def validate_rdp(reader, writer, ip, port, timeout=3):
"""RDP: 发送 Connection Request, 检查是否有 Confirm 回复"""
try:
# X.224 Connection Request
rdp_neg = bytes([
0x03, 0x00, 0x00, 0x13, # TPKT: version=3, length=19
0x0e, # X.224: length=14
0xe0, # CR (Connection Request)
0x00, 0x00, # dst-ref
0x00, 0x00, # src-ref
0x00, # class 0
0x01, # RDP Negotiation Request
0x00, # flags
0x08, 0x00, 0x00, 0x00, # length=8
0x00, 0x00, 0x00, 0x00, # requested protocols (standard RDP)
])
# 修正长度
rdp_neg = bytes([0x03, 0x00, 0x00, len(rdp_neg)]) + rdp_neg[4:]
writer.write(rdp_neg)
await writer.drain()
data = await asyncio.wait_for(reader.read(256), timeout=timeout)
if len(data) >= 4 and data[0] == 0x03: # TPKT header
return True, "RDP service", "rdp_verified"
return False, f"non-rdp({len(data)}b)", "not_rdp"
except asyncio.TimeoutError:
return False, "", "rdp_timeout"
except Exception as e:
return False, "", f"rdp_error:{type(e).__name__}"
@staticmethod
async def validate_vnc(reader, writer, ip, port, timeout=3):
"""VNC: 必须收到 'RFB ' 开头的协议版本"""
try:
data = await asyncio.wait_for(reader.read(256), timeout=timeout)
text = data.decode("utf-8", errors="replace").strip()
if text.startswith("RFB "):
return True, text[:50], "vnc_verified"
return False, text[:50], "not_vnc"
except asyncio.TimeoutError:
return False, "", "vnc_timeout"
except Exception as e:
return False, "", f"vnc_error:{type(e).__name__}"
@staticmethod
async def validate_telnet(reader, writer, ip, port, timeout=3):
"""Telnet: 必须收到 IAC 命令(0xFF) 或可读的登录提示"""
try:
data = await asyncio.wait_for(reader.read(512), timeout=timeout)
if data and data[0] == 0xff: # IAC command
return True, "Telnet IAC", "telnet_verified"
text = data.decode("utf-8", errors="replace").strip()
telnet_keywords = ["login", "username", "password", "welcome",
"user name", "press enter", "cisco", "mikrotik",
"huawei", "h3c", "zte", "console"]
if any(kw in text.lower() for kw in telnet_keywords):
return True, text[:100], "telnet_prompt"
if len(text) > 5: # 有一些数据回来,可能是 telnet
return True, text[:100], "telnet_data"
return False, text[:50], "not_telnet"
except asyncio.TimeoutError:
return False, "", "telnet_timeout"
except Exception as e:
return False, "", f"telnet_error:{type(e).__name__}"
@staticmethod
async def validate_http_baota(reader, writer, ip, port, timeout=3):
"""宝塔面板: HTTP 响应中含宝塔特征"""
try:
request = f"GET / HTTP/1.1\r\nHost: {ip}:8888\r\nConnection: close\r\nUser-Agent: Mozilla/5.0\r\n\r\n"
writer.write(request.encode())
await writer.drain()
data = await asyncio.wait_for(reader.read(4096), timeout=timeout)
response = data.decode("utf-8", errors="replace")
# 宝塔面板特征
baota_signs = ["宝塔", "bt.cn", "btpanel", "baota", "aapanel",
"安全入口", "/login", "BTPanel"]
if response.startswith("HTTP/"):
for sign in baota_signs:
if sign.lower() in response.lower():
return True, f"BaoTa Panel", "baota_verified"
# 不是宝塔但是有效HTTP
parts = response.split(None, 2)
status = int(parts[1]) if len(parts) >= 2 else 0
return True, f"HTTP {status} (non-BaoTa)", "http_not_baota"
if "<html" in response.lower():
return True, "HTML response", "http_html"
return False, response[:80], "not_http"
except asyncio.TimeoutError:
return False, "", "baota_timeout"
except Exception as e:
return False, "", f"baota_error:{type(e).__name__}"
# ========== SSH 分析 ==========
def assess_ssh(ssh_banner, open_ports):
"""SSH 综合评估: 难度 + OS + 部署建议"""
if not ssh_banner:
return {"difficulty": 5, "os_guess": "Unknown", "notes": ["无SSH"],
"deploy_score": 0, "ssh_version": ""}
banner_lower = ssh_banner.lower()
difficulty = 3
notes = []
os_guess = "Unknown"
# OS识别
for sig, os_type in SSH_SIGNATURES.items():
if sig.lower() in banner_lower:
os_guess = os_type
break
if "ubuntu" in banner_lower:
os_guess = "Ubuntu Linux"
elif "debian" in banner_lower:
os_guess = "Debian Linux"
elif "centos" in banner_lower or "el7" in banner_lower or "el8" in banner_lower:
os_guess = "CentOS/RHEL"
# 版本分析
if "openssh_4." in banner_lower or "openssh_5." in banner_lower:
difficulty -= 1
notes.append("极老版本(可能有漏洞)")
elif "openssh_6." in banner_lower or "openssh_7.4" in banner_lower:
notes.append("较老版本")
elif "openssh_9." in banner_lower:
difficulty += 1
notes.append("新版本(安全性高)")
if "dropbear" in banner_lower:
difficulty -= 1
notes.append("嵌入式(可能默认密码)")
if "cisco" in banner_lower or "comware" in banner_lower or "huawei" in banner_lower:
difficulty -= 1
notes.append("网络设备(默认凭证)")
if 23 in open_ports:
difficulty -= 1
notes.append("有Telnet")
if 5900 in open_ports:
difficulty -= 1
notes.append("有VNC")
difficulty = max(1, min(5, difficulty))
# 部署评分
deploy_score = 50 # SSH可达基础分
if os_guess in ("Ubuntu Linux", "Debian Linux", "CentOS/RHEL", "Linux/BSD"):
deploy_score += 30
elif os_guess == "Unknown":
deploy_score += 15
if os_guess in ("H3C交换机", "华为设备", "Cisco设备", "锐捷设备", "嵌入式/路由器", "MikroTik路由器", "网御设备"):
deploy_score -= 40 # 网络设备不适合部署
if 80 in open_ports or 443 in open_ports:
deploy_score += 10
if 3389 in open_ports:
deploy_score -= 10 # Windows
return {
"difficulty": difficulty,
"difficulty_stars": "" * difficulty + "" * (5 - difficulty),
"os_guess": os_guess,
"ssh_version": ssh_banner[:200],
"notes": notes,
"deploy_score": deploy_score,
}
# ========== 蜜罐检测 ==========
def detect_honeypot(verified_ports, all_tcp_open):
"""蜜罐检测: 返回 (is_honeypot, confidence, reason)"""
reasons = []
score = 0 # 0-100, >=60 判定蜜罐
verified_set = set(verified_ports.keys())
tcp_set = set(all_tcp_open)
# 1. TCP连接数 vs 验证通过数比例
if len(tcp_set) >= 5 and len(verified_set) <= 1:
score += 50
reasons.append(f"TCP全通({len(tcp_set)}端口)但验证仅{len(verified_set)}个通过")
# 2. 8端口全开特征 (CGNAT/蜜罐)
cgnat_combo = {22, 23, 80, 443, 2222, 3389, 5900, 8888}
if tcp_set >= cgnat_combo:
score += 30
reasons.append("8端口全开(CGNAT/蜜罐特征)")
# 3. 所有远程方式都开放
remote_ports = {22, 23, 3389, 5900}
if tcp_set >= remote_ports and len(verified_set & remote_ports) < 2:
score += 20
reasons.append("所有远程端口TCP通但验证不过")
# 4. 没有任何有效banner
has_any_banner = any(
v.get("banner", "") for v in verified_ports.values() if v.get("verified")
)
if len(tcp_set) >= 5 and not has_any_banner:
score += 20
reasons.append("大量端口无任何banner")
is_honeypot = score >= 60
return is_honeypot, score, reasons
# ========== 深度验证扫描器 ==========
class VerifiedScanner:
"""两阶段扫描: TCP快筛 → 协议验证"""
def __init__(self, concurrency=5000, tcp_timeout=2, verify_timeout=3,
banner_timeout=2, verify_concurrency=2000):
self.concurrency = concurrency # Phase1 TCP并发
self.tcp_timeout = tcp_timeout
self.verify_timeout = verify_timeout # Phase2 验证超时
self.banner_timeout = banner_timeout
self.verify_concurrency = verify_concurrency # Phase2 并发
self.total_ips = 0
self.scanned_ips = 0
self.tcp_open_count = 0
self.verified_count = 0
self.honeypot_count = 0
self.results = {}
self.start_time = None
self.phase = 1
self.lock = asyncio.Lock()
# --- Phase 1: TCP Connect ---
async def tcp_check(self, ip, port, semaphore):
"""纯TCP连接检查不读banner"""
try:
async with semaphore:
_, writer = await asyncio.wait_for(
asyncio.open_connection(ip, port),
timeout=self.tcp_timeout
)
writer.close()
try:
await writer.wait_closed()
except:
pass
return (port, True)
except:
return (port, False)
async def phase1_scan_ip(self, ip, semaphore):
"""Phase1: 对一个IP做TCP快扫"""
tasks = [self.tcp_check(ip, port, semaphore) for port in SCAN_PORTS]
results = await asyncio.gather(*tasks, return_exceptions=True)
tcp_open = []
for r in results:
if isinstance(r, Exception):
continue
port, is_open = r
if is_open:
tcp_open.append(port)
async with self.lock:
self.scanned_ips += 1
if tcp_open:
self.tcp_open_count += 1
self.results[ip] = {"ip": ip, "tcp_open": tcp_open, "verified": {}}
if self.scanned_ips % 10000 == 0:
self._print_progress()
# --- Phase 2: 协议验证 ---
async def verify_port(self, ip, port, semaphore):
"""Phase2: 对一个 IP:Port 做协议层验证"""
validator_name = PORT_VALIDATORS.get(port)
if not validator_name:
return port, False, "", "no_validator"
try:
async with semaphore:
reader, writer = await asyncio.wait_for(
asyncio.open_connection(ip, port),
timeout=self.verify_timeout
)
try:
validator = getattr(ProtocolValidator, f"validate_{validator_name}")
verified, banner, detail = await validator(reader, writer, ip, port,
timeout=self.banner_timeout)
return port, verified, banner, detail
finally:
writer.close()
try:
await writer.wait_closed()
except:
pass
except asyncio.TimeoutError:
return port, False, "", "connect_timeout"
except ConnectionRefusedError:
return port, False, "", "connect_refused"
except Exception as e:
return port, False, "", f"connect_error:{type(e).__name__}"
async def phase2_verify_ip(self, ip, semaphore):
"""Phase2: 对一个IP的所有TCP开放端口做验证"""
ip_data = self.results.get(ip)
if not ip_data:
return
tcp_open = ip_data["tcp_open"]
tasks = [self.verify_port(ip, port, semaphore) for port in tcp_open]
results = await asyncio.gather(*tasks, return_exceptions=True)
verified_ports = {}
for r in results:
if isinstance(r, Exception):
continue
port, is_verified, banner, detail = r
port_info = {
"service": SCAN_PORTS.get(port, "unknown"),
"tcp_open": True,
"verified": is_verified,
"banner": banner,
"verify_detail": detail,
}
if is_verified:
verified_ports[port] = port_info
ip_data["verified"][port] = port_info
# 蜜罐检测
is_honeypot, hp_score, hp_reasons = detect_honeypot(
{p: v for p, v in ip_data["verified"].items() if v["verified"]},
tcp_open
)
# SSH评估
ssh_banner = ""
ssh_port = None
for p in [22, 2222]:
if p in verified_ports and verified_ports[p]["verified"]:
ssh_banner = verified_ports[p]["banner"]
ssh_port = p
break
ssh_info = assess_ssh(ssh_banner, set(verified_ports.keys()))
# 汇总
verified_list = [p for p, v in ip_data["verified"].items() if v["verified"]]
verified_count = len(verified_list)
# 宝塔检测
baota_detail = ip_data["verified"].get(8888, {}).get("verify_detail", "")
baota_ok = baota_detail.startswith("baota")
# 连接质量 (先计算,再写入)
tcp_cnt = len(tcp_open)
quality = int(verified_count / max(1, tcp_cnt) * 100)
if is_honeypot:
quality = max(0, quality - 50)
ip_data.update({
"verified_port_list": sorted(verified_list),
"verified_count": verified_count,
"tcp_open_count": tcp_cnt,
"is_honeypot": is_honeypot,
"honeypot_score": hp_score,
"honeypot_reasons": hp_reasons,
"ssh_open": ssh_port is not None,
"ssh_port": ssh_port,
"ssh_banner": ssh_banner,
"ssh_difficulty": ssh_info["difficulty"],
"ssh_difficulty_stars": ssh_info["difficulty_stars"],
"os_guess": ssh_info["os_guess"],
"ssh_notes": ssh_info["notes"],
"deploy_score": ssh_info["deploy_score"],
"deploy_ready": ssh_info["deploy_score"] >= 50 and not is_honeypot,
"rdp_verified": 3389 in verified_ports,
"vnc_verified": 5900 in verified_ports,
"telnet_verified": 23 in verified_ports,
"http_verified": 80 in verified_ports,
"https_verified": 443 in verified_ports,
"baota_verified": baota_ok,
"scan_time": datetime.now().isoformat(),
"connection_quality": quality,
})
async with self.lock:
self.verified_count += 1
if is_honeypot:
self.honeypot_count += 1
if self.verified_count % 5000 == 0:
self._print_progress()
def _calc_quality(self, ip_data):
"""连接质量评分 0-100"""
tcp_open = len(ip_data.get("tcp_open", []))
verified = ip_data.get("verified_count", 0)
if tcp_open == 0:
return 0
ratio = verified / tcp_open
# 全部验证通过 = 100
# 部分通过 = 按比例
# TCP全开但验证0个 = 0
quality = int(ratio * 100)
# 蜜罐扣分
if ip_data.get("is_honeypot"):
quality = max(0, quality - 50)
return quality
# --- 主执行 ---
async def run(self, ip_list):
"""执行两阶段扫描"""
self.total_ips = len(ip_list)
self.start_time = time.time()
print(f"{'='*70}")
print(f"分布式算力矩阵 — 深度验证扫描器 v2.0")
print(f"{'='*70}")
print(f"目标IP: {self.total_ips:,}")
print(f"扫描端口: {len(SCAN_PORTS)} ({', '.join(f'{p}({n})' for p,n in sorted(SCAN_PORTS.items()))})")
print(f"Phase1 并发: {self.concurrency} | TCP超时: {self.tcp_timeout}s")
print(f"Phase2 并发: {self.verify_concurrency} | 验证超时: {self.verify_timeout}s")
print(f"开始: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"{'='*70}")
# === Phase 1: TCP Connect ===
self.phase = 1
print(f"\n[Phase 1] TCP Connect 快速扫描 ({self.total_ips:,} IPs × {len(SCAN_PORTS)} ports)...")
semaphore1 = asyncio.Semaphore(self.concurrency)
batch_size = 10000
for i in range(0, len(ip_list), batch_size):
batch = ip_list[i:i + batch_size]
tasks = [self.phase1_scan_ip(ip, semaphore1) for ip in batch]
await asyncio.gather(*tasks, return_exceptions=True)
phase1_time = time.time() - self.start_time
print(f"\n[Phase 1 完成] {phase1_time:.0f}s | "
f"TCP开放: {self.tcp_open_count:,}/{self.total_ips:,} "
f"({self.tcp_open_count/max(1,self.total_ips)*100:.1f}%)")
# === Phase 2: 协议验证 ===
self.phase = 2
ips_to_verify = [ip for ip, d in self.results.items() if d.get("tcp_open")]
print(f"\n[Phase 2] 协议验证 ({len(ips_to_verify):,} IPs)...")
semaphore2 = asyncio.Semaphore(self.verify_concurrency)
for i in range(0, len(ips_to_verify), batch_size):
batch = ips_to_verify[i:i + batch_size]
tasks = [self.phase2_verify_ip(ip, semaphore2) for ip in batch]
await asyncio.gather(*tasks, return_exceptions=True)
total_time = time.time() - self.start_time
# === 统计 ===
real_open = sum(1 for d in self.results.values()
if d.get("verified_count", 0) > 0 and not d.get("is_honeypot"))
ssh_real = sum(1 for d in self.results.values()
if d.get("ssh_open") and not d.get("is_honeypot"))
print(f"\n{'='*70}")
print(f"扫描完成! 总耗时 {total_time:.0f}s ({total_time/60:.1f}min)")
print(f"{'='*70}")
print(f"TCP开放IP: {self.tcp_open_count:,}")
print(f"验证通过IP: {real_open:,} (去蜜罐后)")
print(f"蜜罐/CGNAT: {self.honeypot_count:,}")
print(f"真实SSH: {ssh_real:,}")
print(f"{'='*70}")
return self.results
def _print_progress(self):
elapsed = time.time() - self.start_time
if self.phase == 1:
rate = self.scanned_ips / max(1, elapsed)
remaining = (self.total_ips - self.scanned_ips) / max(1, rate)
pct = self.scanned_ips / max(1, self.total_ips) * 100
print(f" [P1 {pct:5.1f}%] {self.scanned_ips:,}/{self.total_ips:,} | "
f"TCP开放: {self.tcp_open_count:,} | "
f"{rate:.0f} IP/s | 剩余 {remaining/60:.0f}min", flush=True)
else:
total_verify = len([ip for ip in self.results if self.results[ip].get("tcp_open")])
pct = self.verified_count / max(1, total_verify) * 100
print(f" [P2 {pct:5.1f}%] {self.verified_count:,}/{total_verify:,} | "
f"蜜罐: {self.honeypot_count:,}", flush=True)
# ========== MongoDB 导入/导出 ==========
def import_to_mongodb(results, ip_source_map=None, db_name="KR", collection_name="分布式矩阵IP_已验证"):
"""将验证结果导入 MongoDB"""
import pymongo
client = pymongo.MongoClient('mongodb://admin:admin123@localhost:27017/?authSource=admin')
db = client[db_name]
coll = db[collection_name]
docs = []
for ip, data in results.items():
if data.get("verified_count", 0) == 0 and not data.get("tcp_open"):
continue # 完全没有端口的跳过
source = "unknown"
if ip_source_map:
sources = ip_source_map.get(ip, ["unknown"])
source = sources[0] if isinstance(sources, list) else sources
doc = {
"ip": ip,
"source_col": source,
"scan_time": data.get("scan_time", datetime.now().isoformat()),
# TCP层
"tcp_open_ports": sorted(data.get("tcp_open", [])),
"tcp_open_count": len(data.get("tcp_open", [])),
# 验证层
"verified_ports": sorted(data.get("verified_port_list", [])),
"verified_count": data.get("verified_count", 0),
"port_details": data.get("verified", {}),
# 蜜罐
"is_honeypot": data.get("is_honeypot", False),
"honeypot_score": data.get("honeypot_score", 0),
"honeypot_reasons": data.get("honeypot_reasons", []),
# SSH
"ssh_open": data.get("ssh_open", False),
"ssh_port": data.get("ssh_port"),
"ssh_banner": data.get("ssh_banner", ""),
"ssh_difficulty": data.get("ssh_difficulty", 5),
"ssh_difficulty_stars": data.get("ssh_difficulty_stars", ""),
"os_guess": data.get("os_guess", "Unknown"),
"ssh_notes": data.get("ssh_notes", []),
# 其他远程
"rdp_verified": data.get("rdp_verified", False),
"vnc_verified": data.get("vnc_verified", False),
"telnet_verified": data.get("telnet_verified", False),
"http_verified": data.get("http_verified", False),
"https_verified": data.get("https_verified", False),
"baota_verified": data.get("baota_verified", False),
# 评分
"deploy_score": data.get("deploy_score", 0),
"deploy_ready": data.get("deploy_ready", False),
"connection_quality": data.get("connection_quality", 0),
}
docs.append(doc)
if docs:
coll.delete_many({})
# 分批写入
batch = 5000
for i in range(0, len(docs), batch):
coll.insert_many(docs[i:i+batch], ordered=False)
# 索引
coll.create_index("ip")
coll.create_index("ssh_open")
coll.create_index("deploy_score")
coll.create_index("is_honeypot")
coll.create_index("connection_quality")
coll.create_index("source_col")
print(f"MongoDB {db_name}.{collection_name}: 写入 {len(docs):,}")
return len(docs)
def load_from_mongodb_existing(db_name="KR", collection_name="分布式矩阵IP_已扫描"):
"""从已有的 _已扫描 表加载IP列表用于二次验证"""
import pymongo
client = pymongo.MongoClient('mongodb://admin:admin123@localhost:27017/?authSource=admin')
db = client[db_name]
coll = db[collection_name]
ip_list = []
ip_source_map = {}
for doc in coll.find({}, {"ip": 1, "source_col": 1}):
ip = doc["ip"]
if ip not in ip_source_map:
ip_list.append(ip)
ip_source_map[ip] = doc.get("source_col", "unknown")
return ip_list, ip_source_map
# ========== 报告 ==========
def generate_report(results, output_dir, total_ips):
"""生成验证扫描报告"""
os.makedirs(output_dir, exist_ok=True)
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
# 统计
stats = {
"total_scanned": total_ips,
"tcp_open": sum(1 for d in results.values() if d.get("tcp_open")),
"verified": sum(1 for d in results.values() if d.get("verified_count", 0) > 0),
"honeypot": sum(1 for d in results.values() if d.get("is_honeypot")),
"ssh_real": sum(1 for d in results.values()
if d.get("ssh_open") and not d.get("is_honeypot")),
"deploy_ready": sum(1 for d in results.values() if d.get("deploy_ready")),
}
# Markdown
md_path = os.path.join(output_dir, f"深度验证扫描报告_{ts}.md")
with open(md_path, "w", encoding="utf-8") as f:
f.write(f"# 深度验证扫描报告\n\n")
f.write(f"> 时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
f.write(f"> 扫描器: verified_scan.py v2.0(两阶段:TCP+协议验证)\n\n")
f.write(f"## 总览\n\n| 指标 | 数值 |\n|:---|:---|\n")
f.write(f"| 扫描总IP | {stats['total_scanned']:,} |\n")
f.write(f"| TCP连接成功 | {stats['tcp_open']:,} |\n")
f.write(f"| **协议验证通过** | **{stats['verified']:,}** |\n")
f.write(f"| 蜜罐/CGNAT排除 | {stats['honeypot']:,} |\n")
f.write(f"| **真实SSH** | **{stats['ssh_real']:,}** |\n")
f.write(f"| **可部署节点** | **{stats['deploy_ready']:,}** |\n\n")
# 误报率
if stats['tcp_open'] > 0:
false_positive = (stats['tcp_open'] - stats['verified']) / stats['tcp_open'] * 100
f.write(f"**误报率**: {false_positive:.1f}% "
f"({stats['tcp_open'] - stats['verified']:,} TCP假阳性被排除)\n\n")
# SSH列表(去蜜罐)
ssh_ips = [(ip, d) for ip, d in results.items()
if d.get("ssh_open") and not d.get("is_honeypot")]
ssh_ips.sort(key=lambda x: -x[1].get("deploy_score", 0))
f.write(f"## 真实SSH IP ({len(ssh_ips)} 个)\n\n")
f.write(f"| # | IP | 端口 | OS | Banner | 难度 | 质量 | 部署分 |\n")
f.write(f"|:---|:---|:---|:---|:---|:---|:---|:---|\n")
for i, (ip, d) in enumerate(ssh_ips[:200], 1):
f.write(f"| {i} | `{ip}` | {d.get('ssh_port',22)} "
f"| {d.get('os_guess','-')} "
f"| {d.get('ssh_banner','')[:50]} "
f"| {d.get('ssh_difficulty_stars','-')} "
f"| {d.get('connection_quality',0)} "
f"| {d.get('deploy_score',0)} |\n")
if len(ssh_ips) > 200:
f.write(f"| ... | 共{len(ssh_ips)}个 | | | | | | |\n")
# SSH列表文件
ssh_path = os.path.join(output_dir, f"verified_ssh_ips_{ts}.txt")
with open(ssh_path, "w") as f:
for ip, d in ssh_ips:
f.write(f"{ip}:{d.get('ssh_port', 22)}\n")
# JSON
json_path = os.path.join(output_dir, f"verified_scan_{ts}.json")
with open(json_path, "w", encoding="utf-8") as f:
json.dump({"stats": stats, "results": {
ip: {k: v for k, v in d.items() if k != "verified"}
for ip, d in results.items() if d.get("verified_count", 0) > 0
}}, f, ensure_ascii=False, indent=2)
print(f"\n报告: {md_path}")
print(f"SSH列表: {ssh_path} ({len(ssh_ips)} 个)")
print(f"JSON: {json_path}")
return md_path
# ========== 主入口 ==========
def main():
parser = argparse.ArgumentParser(description="分布式算力矩阵 深度验证扫描器 v2.0")
parser.add_argument("--input", "-i", help="IP列表文件路径")
parser.add_argument("--mongo-source", help="从MongoDB加载IP (数据库名)")
parser.add_argument("--collection", default="分布式矩阵IP_已扫描",
help="MongoDB集合名 (默认: 分布式矩阵IP_已扫描)")
parser.add_argument("--reverify", action="store_true",
help="对已扫描表做二次协议验证")
parser.add_argument("--concurrency", "-c", type=int, default=5000,
help="Phase1 TCP并发 (默认5000)")
parser.add_argument("--verify-concurrency", type=int, default=2000,
help="Phase2 验证并发 (默认2000)")
parser.add_argument("--tcp-timeout", type=float, default=2,
help="TCP连接超时 (默认2s)")
parser.add_argument("--verify-timeout", type=float, default=3,
help="协议验证超时 (默认3s)")
parser.add_argument("--banner-timeout", type=float, default=2,
help="Banner读取超时 (默认2s)")
parser.add_argument("--output", "-o",
default="/Users/karuo/Documents/1、金项目/3、自营项目/分布式算力矩阵/01_扫描模块/references")
parser.add_argument("--limit", type=int, default=0)
parser.add_argument("--skip-mongodb", action="store_true")
parser.add_argument("--source-map", help="IP→来源JSON映射文件")
args = parser.parse_args()
# 加载 IP
ip_source_map = {}
if args.reverify or args.mongo_source:
db_name = args.mongo_source or "KR"
print(f"从 MongoDB {db_name}.{args.collection} 加载IP...")
ip_list, ip_source_map = load_from_mongodb_existing(db_name, args.collection)
print(f" 加载 {len(ip_list):,} 个IP")
elif args.input:
with open(args.input, "r") as f:
ip_list = [l.strip() for l in f if l.strip()]
if args.source_map:
with open(args.source_map, "r") as f:
ip_source_map = json.load(f)
else:
print("错误: 需要 --input 或 --mongo-source 或 --reverify")
sys.exit(1)
# 排除自有IP
ip_list = [ip for ip in ip_list if ip not in OWN_IPS]
if args.limit > 0:
ip_list = ip_list[:args.limit]
print(f"待扫描: {len(ip_list):,} IPs")
# 提升文件描述符
try:
import resource
soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
target = min(args.concurrency * 2 + 2000, hard)
resource.setrlimit(resource.RLIMIT_NOFILE, (target, hard))
print(f"文件描述符: {soft}{target}")
except:
pass
# 执行
scanner = VerifiedScanner(
concurrency=args.concurrency,
tcp_timeout=args.tcp_timeout,
verify_timeout=args.verify_timeout,
banner_timeout=args.banner_timeout,
verify_concurrency=args.verify_concurrency,
)
results = asyncio.run(scanner.run(ip_list))
if results:
generate_report(results, args.output, len(ip_list))
if not args.skip_mongodb:
print("\n导入 MongoDB...")
count = import_to_mongodb(results, ip_source_map)
print(f"完成: {count:,}")
else:
print("无结果")
if __name__ == "__main__":
main()