Files
suanli-juzhen/01_扫描模块/scripts/mumayi_full_scan.py

502 lines
20 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
木蚂蚁 IP 全量异步端口扫描器
===============================
- 117,632 个公网 IP扫描 15 个关键端口
- asyncio 高并发可调至 3000+ 并发连接
- Banner 抓取 + 服务指纹识别
- 服务器类型智能分类
- 实时进度输出 + JSON/Markdown 报告
用法: python3 mumayi_full_scan.py [--concurrency 3000] [--timeout 3]
"""
import asyncio
import json
import time
import sys
import os
import argparse
import ipaddress
from datetime import datetime
from collections import defaultdict
# ========== 配置 ==========
# 扫描端口及服务名称
SCAN_PORTS = {
22: "SSH",
21: "FTP",
23: "Telnet",
80: "HTTP",
443: "HTTPS",
2222: "SSH-Alt",
3306: "MySQL",
3389: "RDP",
5900: "VNC",
6379: "Redis",
8080: "HTTP-Proxy",
8443: "HTTPS-Alt",
8888: "BaoTa",
9200: "Elasticsearch",
27017: "MongoDB",
}
# 自有设备排除列表
OWN_INFRASTRUCTURE = {
"42.194.232.22", # 小型宝塔
"42.194.245.239", # 存客宝
"43.139.27.93", # kr宝塔
"140.245.37.56", # Oracle VPS
"119.233.228.177", # 家宽出口
}
# SSH Banner 特征(用于服务器类型分析)
SSH_SIGNATURES = {
"OpenSSH": "Linux/BSD",
"dropbear": "嵌入式/路由器",
"ROSSSH": "MikroTik路由器",
"Cisco": "Cisco设备",
"libssh": "自定义SSH",
"Serv-U": "Windows FTP/SSH",
"WeOnlyDo": "Windows SSH",
"SSH-2.0-Go": "Go应用",
}
# 服务器类型分类规则
def classify_server(ports_data):
"""根据开放端口和 Banner 判断服务器类型"""
open_ports = set(ports_data.keys())
banners = {p: d.get("banner", "") for p, d in ports_data.items()}
server_type = []
os_guess = "Unknown"
ssh_version = ""
# SSH 分析
if 22 in open_ports or 2222 in open_ports:
ssh_port = 22 if 22 in open_ports else 2222
banner = banners.get(ssh_port, "")
ssh_version = banner
for sig, os_type in SSH_SIGNATURES.items():
if sig.lower() in banner.lower():
os_guess = os_type
break
if "ubuntu" in banner.lower():
os_guess = "Ubuntu Linux"
elif "debian" in banner.lower():
os_guess = "Debian Linux"
elif "centos" in banner.lower() or "el7" in banner.lower() or "el8" in banner.lower():
os_guess = "CentOS/RHEL"
# 端口组合判断服务器类型
if 3389 in open_ports:
server_type.append("Windows Server")
os_guess = "Windows"
if (80 in open_ports or 443 in open_ports) and (8080 in open_ports or 8443 in open_ports):
server_type.append("Web应用服务器")
elif 80 in open_ports or 443 in open_ports:
server_type.append("Web服务器")
if 3306 in open_ports:
server_type.append("MySQL数据库")
if 27017 in open_ports:
server_type.append("MongoDB数据库")
if 6379 in open_ports:
server_type.append("Redis缓存")
if 9200 in open_ports:
server_type.append("Elasticsearch")
if 8888 in open_ports:
server_type.append("宝塔面板")
if 5900 in open_ports:
server_type.append("VNC远程桌面")
if 21 in open_ports:
server_type.append("FTP服务")
if 23 in open_ports:
server_type.append("Telnet(不安全)")
if 22 in open_ports or 2222 in open_ports:
server_type.append("SSH可达")
# 部署适合度评估
deploy_score = 0
if 22 in open_ports or 2222 in open_ports:
deploy_score += 50 # SSH 可连
if os_guess in ("Ubuntu Linux", "Debian Linux", "CentOS/RHEL", "Linux/BSD"):
deploy_score += 30 # Linux 系统
elif os_guess == "Unknown" and (22 in open_ports):
deploy_score += 15 # 可能是 Linux
if 80 in open_ports or 443 in open_ports:
deploy_score += 10 # 有 Web 服务 = 带宽可能好
if 3389 in open_ports:
deploy_score -= 10 # Windows 不太适合部署
return {
"server_types": server_type if server_type else ["未知服务"],
"os_guess": os_guess,
"ssh_version": ssh_version,
"deploy_score": deploy_score,
"deploy_ready": deploy_score >= 50,
}
# ========== 异步扫描核心 ==========
class AsyncPortScanner:
def __init__(self, concurrency=3000, timeout=3, banner_timeout=2):
self.concurrency = concurrency
self.timeout = timeout
self.banner_timeout = banner_timeout
self.semaphore = None
# 统计
self.total_ips = 0
self.scanned_ips = 0
self.total_open = 0
self.ips_with_open = 0
self.results = {}
self.start_time = None
self.lock = asyncio.Lock()
async def scan_port(self, ip, port):
"""扫描单个 IP:Port返回 (port, is_open, banner)"""
try:
async with self.semaphore:
reader, writer = await asyncio.wait_for(
asyncio.open_connection(ip, port),
timeout=self.timeout
)
# 尝试读取 banner
banner = ""
try:
# 对 HTTP 发送请求头
if port in (80, 8080, 8443, 8888, 9200):
writer.write(f"HEAD / HTTP/1.0\r\nHost: {ip}\r\n\r\n".encode())
await writer.drain()
data = await asyncio.wait_for(
reader.read(1024),
timeout=self.banner_timeout
)
banner = data.decode("utf-8", errors="replace").strip()[:256]
except:
pass
writer.close()
try:
await writer.wait_closed()
except:
pass
return (port, True, banner)
except:
return (port, False, "")
async def scan_ip(self, ip):
"""扫描单个 IP 的所有端口"""
tasks = [self.scan_port(ip, port) for port in SCAN_PORTS.keys()]
results = await asyncio.gather(*tasks, return_exceptions=True)
open_ports = {}
for result in results:
if isinstance(result, Exception):
continue
port, is_open, banner = result
if is_open:
open_ports[port] = {
"service": SCAN_PORTS[port],
"banner": banner,
"open": True,
}
# 更新统计
async with self.lock:
self.scanned_ips += 1
if open_ports:
self.ips_with_open += 1
self.total_open += len(open_ports)
# 分析服务器类型
analysis = classify_server(open_ports)
self.results[ip] = {
"ip": ip,
"open_ports": open_ports,
"port_count": len(open_ports),
"analysis": analysis,
"scan_time": datetime.now().isoformat(),
}
# 进度输出(每 500 个 IP 或每个有结果的 IP
if self.scanned_ips % 500 == 0 or open_ports:
elapsed = time.time() - self.start_time
rate = self.scanned_ips / elapsed if elapsed > 0 else 0
remaining = (self.total_ips - self.scanned_ips) / rate if rate > 0 else 0
progress = self.scanned_ips / self.total_ips * 100
status = f"\r[{progress:5.1f}%] {self.scanned_ips}/{self.total_ips} | "
status += f"发现 {self.ips_with_open} 个有端口IP ({self.total_open} 端口) | "
status += f"速率 {rate:.0f} IP/s | 剩余 {remaining:.0f}s"
if open_ports:
port_list = ','.join(str(p) for p in sorted(open_ports.keys()))
status += f"\n{ip} → [{port_list}] {analysis['server_types']}"
print(status, flush=True)
async def run(self, ip_list):
"""执行全量扫描"""
self.semaphore = asyncio.Semaphore(self.concurrency)
self.total_ips = len(ip_list)
self.start_time = time.time()
print(f"=" * 70)
print(f"木蚂蚁 IP 全量扫描器 启动")
print(f"=" * 70)
print(f"目标IP数: {self.total_ips:,}")
print(f"扫描端口: {len(SCAN_PORTS)} 个 ({', '.join(f'{p}({n})' for p, n in sorted(SCAN_PORTS.items()))})")
print(f"并发连接: {self.concurrency}")
print(f"连接超时: {self.timeout}s / Banner超时: {self.banner_timeout}s")
print(f"总扫描量: {self.total_ips * len(SCAN_PORTS):,} 次连接")
print(f"预估耗时: {self.total_ips * len(SCAN_PORTS) / self.concurrency * self.timeout / 60:.0f}-{self.total_ips * len(SCAN_PORTS) / self.concurrency * self.timeout / 30:.0f} 分钟")
print(f"开始时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"=" * 70)
print()
# 分批扫描(每批 5000 个 IP避免内存过大
batch_size = 5000
for i in range(0, len(ip_list), batch_size):
batch = ip_list[i:i + batch_size]
tasks = [self.scan_ip(ip) for ip in batch]
await asyncio.gather(*tasks, return_exceptions=True)
elapsed = time.time() - self.start_time
print(f"\n\n{'=' * 70}")
print(f"扫描完成!")
print(f"{'=' * 70}")
print(f"总耗时: {elapsed:.1f}s ({elapsed/60:.1f}分钟)")
print(f"扫描IP: {self.scanned_ips:,}")
print(f"有开放端口: {self.ips_with_open:,} ({self.ips_with_open/self.scanned_ips*100:.1f}%)")
print(f"总开放端口: {self.total_open:,}")
print(f"平均速率: {self.scanned_ips/elapsed:.0f} IP/s")
print(f"{'=' * 70}")
return self.results
# ========== 报告生成 ==========
def generate_reports(results, output_dir, total_ips):
"""生成 JSON + Markdown 报告"""
os.makedirs(output_dir, exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
# 1. JSON 完整结果
json_path = os.path.join(output_dir, f"mumayi_scan_results_{timestamp}.json")
with open(json_path, "w", encoding="utf-8") as f:
json.dump({
"scan_info": {
"timestamp": datetime.now().isoformat(),
"total_target_ips": total_ips,
"ips_with_open_ports": len(results),
"scan_ports": SCAN_PORTS,
},
"results": results,
}, f, ensure_ascii=False, indent=2)
# 2. 统计分析
port_stats = defaultdict(int)
type_stats = defaultdict(int)
os_stats = defaultdict(int)
ssh_ips = []
deploy_ready = []
for ip, data in sorted(results.items(), key=lambda x: -x[1]["port_count"]):
for port in data["open_ports"]:
port_stats[port] += 1
for t in data["analysis"]["server_types"]:
type_stats[t] += 1
os_stats[data["analysis"]["os_guess"]] += 1
if 22 in data["open_ports"] or 2222 in data["open_ports"]:
ssh_ips.append(data)
if data["analysis"]["deploy_ready"]:
deploy_ready.append(data)
# 3. Markdown 报告
md_path = os.path.join(output_dir, f"mumayi_扫描报告_{timestamp}.md")
with open(md_path, "w", encoding="utf-8") as f:
f.write(f"# 木蚂蚁 IP 全量扫描报告\n\n")
f.write(f"> 扫描时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
f.write(f"> 数据来源: 木蚂蚁用户RFM评估 CSV (regip + lastip)\n")
f.write(f"> 扫描器: mumayi_full_scan.py (asyncio 高并发)\n\n")
f.write(f"---\n\n")
# 概览
f.write(f"## 一、扫描概览\n\n")
f.write(f"| 指标 | 数值 |\n|:---|:---|\n")
f.write(f"| 目标 IP 总数 | {total_ips:,} |\n")
f.write(f"| 有开放端口的 IP | **{len(results):,}** ({len(results)/total_ips*100:.1f}%) |\n")
f.write(f"| SSH 可达 IP | **{len(ssh_ips):,}** |\n")
f.write(f"| 可部署节点 | **{len(deploy_ready):,}** |\n")
f.write(f"| 扫描端口数 | {len(SCAN_PORTS)} |\n\n")
# 端口统计
f.write(f"## 二、端口开放统计\n\n")
f.write(f"| 端口 | 服务 | 发现数量 | 占比 |\n|:---|:---|:---|:---|\n")
for port, count in sorted(port_stats.items(), key=lambda x: -x[1]):
f.write(f"| {port} | {SCAN_PORTS.get(port, '?')} | {count} | {count/len(results)*100:.1f}% |\n")
f.write(f"\n")
# 服务器类型分布
f.write(f"## 三、服务器类型分布\n\n")
f.write(f"| 类型 | 数量 |\n|:---|:---|\n")
for t, count in sorted(type_stats.items(), key=lambda x: -x[1]):
f.write(f"| {t} | {count} |\n")
f.write(f"\n")
# OS 猜测分布
f.write(f"## 四、操作系统分布\n\n")
f.write(f"| OS | 数量 |\n|:---|:---|\n")
for os_name, count in sorted(os_stats.items(), key=lambda x: -x[1]):
f.write(f"| {os_name} | {count} |\n")
f.write(f"\n")
# SSH 可达 IP 列表(目标列表)
f.write(f"## 五、SSH 可达 IP 列表({len(ssh_ips)} 个)\n\n")
f.write(f"> 这些 IP 开放了 SSH 端口,是部署分布式算力的首选目标\n\n")
f.write(f"| # | IP | SSH端口 | SSH版本 | OS猜测 | 其他端口 | 部署评分 | 服务器类型 |\n")
f.write(f"|:---|:---|:---|:---|:---|:---|:---|:---|\n")
for i, data in enumerate(sorted(ssh_ips, key=lambda x: -x["analysis"]["deploy_score"]), 1):
ip = data["ip"]
ssh_port = 22 if 22 in data["open_ports"] else 2222
ssh_ver = data["open_ports"].get(ssh_port, {}).get("banner", "")[:60]
os_g = data["analysis"]["os_guess"]
other = ",".join(str(p) for p in sorted(data["open_ports"].keys()) if p not in (22, 2222))
score = data["analysis"]["deploy_score"]
types = ", ".join(data["analysis"]["server_types"][:3])
f.write(f"| {i} | `{ip}` | {ssh_port} | {ssh_ver} | {os_g} | {other} | {score} | {types} |\n")
f.write(f"\n")
# 可部署节点
f.write(f"## 六、可部署节点评估({len(deploy_ready)} 个)\n\n")
f.write(f"> 部署评分 >= 50 的节点,优先用于分布式算力部署\n\n")
if deploy_ready:
f.write(f"| # | IP | 评分 | OS | 开放端口 | SSH版本 |\n")
f.write(f"|:---|:---|:---|:---|:---|:---|\n")
for i, data in enumerate(sorted(deploy_ready, key=lambda x: -x["analysis"]["deploy_score"]), 1):
ip = data["ip"]
score = data["analysis"]["deploy_score"]
os_g = data["analysis"]["os_guess"]
ports = ",".join(str(p) for p in sorted(data["open_ports"].keys()))
ssh_ver = data["analysis"].get("ssh_version", "")[:50]
f.write(f"| {i} | `{ip}` | {score} | {os_g} | {ports} | {ssh_ver} |\n")
else:
f.write(f"暂无满足条件的节点\n")
f.write(f"\n")
# 高价值目标(多端口开放)
multi_port = [d for d in results.values() if d["port_count"] >= 3]
f.write(f"## 七、高价值目标3+ 端口开放,{len(multi_port)} 个)\n\n")
if multi_port:
f.write(f"| # | IP | 开放端口数 | 端口列表 | 服务器类型 |\n")
f.write(f"|:---|:---|:---|:---|:---|\n")
for i, data in enumerate(sorted(multi_port, key=lambda x: -x["port_count"]), 1):
ip = data["ip"]
pc = data["port_count"]
ports = ", ".join(f"{p}({SCAN_PORTS.get(p,'?')})" for p in sorted(data["open_ports"].keys()))
types = ", ".join(data["analysis"]["server_types"][:3])
f.write(f"| {i} | `{ip}` | {pc} | {ports} | {types} |\n")
if i >= 100:
f.write(f"| ... | 共 {len(multi_port)} 个 | | | |\n")
break
f.write(f"\n")
# 下一步操作建议
f.write(f"## 八、下一步操作建议\n\n")
f.write(f"### 8.1 SSH 登录测试\n")
f.write(f"{len(ssh_ips)} 个 SSH 可达 IP 进行凭证测试:\n")
f.write(f"- 使用 MongoDB 中的木蚂蚁用户凭证(需 MD5 反查)\n")
f.write(f"- 常用默认凭证root/root, admin/admin 等)\n")
f.write(f"- 弱密码字典\n\n")
f.write(f"### 8.2 部署优先级\n")
f.write(f"1. 部署评分 >= 80 的 Linux 服务器(优先)\n")
f.write(f"2. 部署评分 50-79 的服务器(次选)\n")
f.write(f"3. Windows Server需 WinRM/RDP 方式)\n\n")
f.write(f"### 8.3 安全提醒\n")
f.write(f"- 仅操作授权范围内的服务器\n")
f.write(f"- 部署前确认服务器所有权\n")
f.write(f"- 使用密钥认证替代密码\n")
# 4. SSH IP 快速列表(供后续脚本使用)
ssh_list_path = os.path.join(output_dir, f"ssh_reachable_ips_{timestamp}.txt")
with open(ssh_list_path, "w") as f:
for data in sorted(ssh_ips, key=lambda x: -x["analysis"]["deploy_score"]):
ssh_port = 22 if 22 in data["open_ports"] else 2222
f.write(f"{data['ip']}:{ssh_port}\n")
print(f"\n报告已生成:")
print(f" JSON 完整结果: {json_path}")
print(f" Markdown 报告: {md_path}")
print(f" SSH IP 列表: {ssh_list_path}")
return json_path, md_path, ssh_list_path
# ========== 主入口 ==========
def main():
parser = argparse.ArgumentParser(description="木蚂蚁 IP 全量异步端口扫描器")
parser.add_argument("--input", "-i", default="/tmp/mumayi_all_ips.txt",
help="IP列表文件路径")
parser.add_argument("--concurrency", "-c", type=int, default=3000,
help="并发连接数 (默认 3000)")
parser.add_argument("--timeout", "-t", type=float, default=3,
help="连接超时秒数 (默认 3)")
parser.add_argument("--banner-timeout", type=float, default=2,
help="Banner读取超时 (默认 2)")
parser.add_argument("--output", "-o",
default="/Users/karuo/Documents/1、金项目/3、自营项目/分布式算力矩阵/01_扫描模块/references",
help="输出目录")
parser.add_argument("--limit", type=int, default=0,
help="限制扫描IP数 (0=全部)")
args = parser.parse_args()
# 读取 IP 列表
with open(args.input, "r") as f:
ip_list = [line.strip() for line in f if line.strip()]
# 过滤自有设备
ip_list = [ip for ip in ip_list if ip not in OWN_INFRASTRUCTURE]
if args.limit > 0:
ip_list = ip_list[:args.limit]
print(f"加载 {len(ip_list):,} 个目标 IP")
# 提升系统文件描述符限制
try:
import resource
soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
target = min(args.concurrency * 2 + 1000, hard)
resource.setrlimit(resource.RLIMIT_NOFILE, (target, hard))
print(f"文件描述符限制: {soft}{target}")
except:
print("警告: 无法提升文件描述符限制")
# 执行扫描
scanner = AsyncPortScanner(
concurrency=args.concurrency,
timeout=args.timeout,
banner_timeout=args.banner_timeout,
)
results = asyncio.run(scanner.run(ip_list))
# 生成报告
if results:
generate_reports(results, args.output, len(ip_list))
else:
print("\n未发现任何开放端口的 IP")
if __name__ == "__main__":
main()