#!/usr/bin/env python3 """ 木蚂蚁 IP 全量异步端口扫描器 =============================== - 117,632 个公网 IP,扫描 15 个关键端口 - asyncio 高并发(可调至 3000+ 并发连接) - Banner 抓取 + 服务指纹识别 - 服务器类型智能分类 - 实时进度输出 + JSON/Markdown 报告 用法: python3 mumayi_full_scan.py [--concurrency 3000] [--timeout 3] """ import asyncio import json import time import sys import os import argparse import ipaddress from datetime import datetime from collections import defaultdict # ========== 配置 ========== # 扫描端口及服务名称 SCAN_PORTS = { 22: "SSH", 21: "FTP", 23: "Telnet", 80: "HTTP", 443: "HTTPS", 2222: "SSH-Alt", 3306: "MySQL", 3389: "RDP", 5900: "VNC", 6379: "Redis", 8080: "HTTP-Proxy", 8443: "HTTPS-Alt", 8888: "BaoTa", 9200: "Elasticsearch", 27017: "MongoDB", } # 自有设备排除列表 OWN_INFRASTRUCTURE = { "42.194.232.22", # 小型宝塔 "42.194.245.239", # 存客宝 "43.139.27.93", # kr宝塔 "140.245.37.56", # Oracle VPS "119.233.228.177", # 家宽出口 } # SSH Banner 特征(用于服务器类型分析) SSH_SIGNATURES = { "OpenSSH": "Linux/BSD", "dropbear": "嵌入式/路由器", "ROSSSH": "MikroTik路由器", "Cisco": "Cisco设备", "libssh": "自定义SSH", "Serv-U": "Windows FTP/SSH", "WeOnlyDo": "Windows SSH", "SSH-2.0-Go": "Go应用", } # 服务器类型分类规则 def classify_server(ports_data): """根据开放端口和 Banner 判断服务器类型""" open_ports = set(ports_data.keys()) banners = {p: d.get("banner", "") for p, d in ports_data.items()} server_type = [] os_guess = "Unknown" ssh_version = "" # SSH 分析 if 22 in open_ports or 2222 in open_ports: ssh_port = 22 if 22 in open_ports else 2222 banner = banners.get(ssh_port, "") ssh_version = banner for sig, os_type in SSH_SIGNATURES.items(): if sig.lower() in banner.lower(): os_guess = os_type break if "ubuntu" in banner.lower(): os_guess = "Ubuntu Linux" elif "debian" in banner.lower(): os_guess = "Debian Linux" elif "centos" in banner.lower() or "el7" in banner.lower() or "el8" in banner.lower(): os_guess = "CentOS/RHEL" # 端口组合判断服务器类型 if 3389 in open_ports: server_type.append("Windows Server") os_guess = "Windows" if (80 in open_ports or 443 in open_ports) and (8080 in open_ports or 8443 in open_ports): server_type.append("Web应用服务器") elif 80 in open_ports or 443 in open_ports: server_type.append("Web服务器") if 3306 in open_ports: server_type.append("MySQL数据库") if 27017 in open_ports: server_type.append("MongoDB数据库") if 6379 in open_ports: server_type.append("Redis缓存") if 9200 in open_ports: server_type.append("Elasticsearch") if 8888 in open_ports: server_type.append("宝塔面板") if 5900 in open_ports: server_type.append("VNC远程桌面") if 21 in open_ports: server_type.append("FTP服务") if 23 in open_ports: server_type.append("Telnet(不安全)") if 22 in open_ports or 2222 in open_ports: server_type.append("SSH可达") # 部署适合度评估 deploy_score = 0 if 22 in open_ports or 2222 in open_ports: deploy_score += 50 # SSH 可连 if os_guess in ("Ubuntu Linux", "Debian Linux", "CentOS/RHEL", "Linux/BSD"): deploy_score += 30 # Linux 系统 elif os_guess == "Unknown" and (22 in open_ports): deploy_score += 15 # 可能是 Linux if 80 in open_ports or 443 in open_ports: deploy_score += 10 # 有 Web 服务 = 带宽可能好 if 3389 in open_ports: deploy_score -= 10 # Windows 不太适合部署 return { "server_types": server_type if server_type else ["未知服务"], "os_guess": os_guess, "ssh_version": ssh_version, "deploy_score": deploy_score, "deploy_ready": deploy_score >= 50, } # ========== 异步扫描核心 ========== class AsyncPortScanner: def __init__(self, concurrency=3000, timeout=3, banner_timeout=2): self.concurrency = concurrency self.timeout = timeout self.banner_timeout = banner_timeout self.semaphore = None # 统计 self.total_ips = 0 self.scanned_ips = 0 self.total_open = 0 self.ips_with_open = 0 self.results = {} self.start_time = None self.lock = asyncio.Lock() async def scan_port(self, ip, port): """扫描单个 IP:Port,返回 (port, is_open, banner)""" try: async with self.semaphore: reader, writer = await asyncio.wait_for( asyncio.open_connection(ip, port), timeout=self.timeout ) # 尝试读取 banner banner = "" try: # 对 HTTP 发送请求头 if port in (80, 8080, 8443, 8888, 9200): writer.write(f"HEAD / HTTP/1.0\r\nHost: {ip}\r\n\r\n".encode()) await writer.drain() data = await asyncio.wait_for( reader.read(1024), timeout=self.banner_timeout ) banner = data.decode("utf-8", errors="replace").strip()[:256] except: pass writer.close() try: await writer.wait_closed() except: pass return (port, True, banner) except: return (port, False, "") async def scan_ip(self, ip): """扫描单个 IP 的所有端口""" tasks = [self.scan_port(ip, port) for port in SCAN_PORTS.keys()] results = await asyncio.gather(*tasks, return_exceptions=True) open_ports = {} for result in results: if isinstance(result, Exception): continue port, is_open, banner = result if is_open: open_ports[port] = { "service": SCAN_PORTS[port], "banner": banner, "open": True, } # 更新统计 async with self.lock: self.scanned_ips += 1 if open_ports: self.ips_with_open += 1 self.total_open += len(open_ports) # 分析服务器类型 analysis = classify_server(open_ports) self.results[ip] = { "ip": ip, "open_ports": open_ports, "port_count": len(open_ports), "analysis": analysis, "scan_time": datetime.now().isoformat(), } # 进度输出(每 500 个 IP 或每个有结果的 IP) if self.scanned_ips % 500 == 0 or open_ports: elapsed = time.time() - self.start_time rate = self.scanned_ips / elapsed if elapsed > 0 else 0 remaining = (self.total_ips - self.scanned_ips) / rate if rate > 0 else 0 progress = self.scanned_ips / self.total_ips * 100 status = f"\r[{progress:5.1f}%] {self.scanned_ips}/{self.total_ips} | " status += f"发现 {self.ips_with_open} 个有端口IP ({self.total_open} 端口) | " status += f"速率 {rate:.0f} IP/s | 剩余 {remaining:.0f}s" if open_ports: port_list = ','.join(str(p) for p in sorted(open_ports.keys())) status += f"\n ✓ {ip} → [{port_list}] {analysis['server_types']}" print(status, flush=True) async def run(self, ip_list): """执行全量扫描""" self.semaphore = asyncio.Semaphore(self.concurrency) self.total_ips = len(ip_list) self.start_time = time.time() print(f"=" * 70) print(f"木蚂蚁 IP 全量扫描器 启动") print(f"=" * 70) print(f"目标IP数: {self.total_ips:,}") print(f"扫描端口: {len(SCAN_PORTS)} 个 ({', '.join(f'{p}({n})' for p, n in sorted(SCAN_PORTS.items()))})") print(f"并发连接: {self.concurrency}") print(f"连接超时: {self.timeout}s / Banner超时: {self.banner_timeout}s") print(f"总扫描量: {self.total_ips * len(SCAN_PORTS):,} 次连接") print(f"预估耗时: {self.total_ips * len(SCAN_PORTS) / self.concurrency * self.timeout / 60:.0f}-{self.total_ips * len(SCAN_PORTS) / self.concurrency * self.timeout / 30:.0f} 分钟") print(f"开始时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") print(f"=" * 70) print() # 分批扫描(每批 5000 个 IP,避免内存过大) batch_size = 5000 for i in range(0, len(ip_list), batch_size): batch = ip_list[i:i + batch_size] tasks = [self.scan_ip(ip) for ip in batch] await asyncio.gather(*tasks, return_exceptions=True) elapsed = time.time() - self.start_time print(f"\n\n{'=' * 70}") print(f"扫描完成!") print(f"{'=' * 70}") print(f"总耗时: {elapsed:.1f}s ({elapsed/60:.1f}分钟)") print(f"扫描IP: {self.scanned_ips:,}") print(f"有开放端口: {self.ips_with_open:,} ({self.ips_with_open/self.scanned_ips*100:.1f}%)") print(f"总开放端口: {self.total_open:,}") print(f"平均速率: {self.scanned_ips/elapsed:.0f} IP/s") print(f"{'=' * 70}") return self.results # ========== 报告生成 ========== def generate_reports(results, output_dir, total_ips): """生成 JSON + Markdown 报告""" os.makedirs(output_dir, exist_ok=True) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # 1. JSON 完整结果 json_path = os.path.join(output_dir, f"mumayi_scan_results_{timestamp}.json") with open(json_path, "w", encoding="utf-8") as f: json.dump({ "scan_info": { "timestamp": datetime.now().isoformat(), "total_target_ips": total_ips, "ips_with_open_ports": len(results), "scan_ports": SCAN_PORTS, }, "results": results, }, f, ensure_ascii=False, indent=2) # 2. 统计分析 port_stats = defaultdict(int) type_stats = defaultdict(int) os_stats = defaultdict(int) ssh_ips = [] deploy_ready = [] for ip, data in sorted(results.items(), key=lambda x: -x[1]["port_count"]): for port in data["open_ports"]: port_stats[port] += 1 for t in data["analysis"]["server_types"]: type_stats[t] += 1 os_stats[data["analysis"]["os_guess"]] += 1 if 22 in data["open_ports"] or 2222 in data["open_ports"]: ssh_ips.append(data) if data["analysis"]["deploy_ready"]: deploy_ready.append(data) # 3. Markdown 报告 md_path = os.path.join(output_dir, f"mumayi_扫描报告_{timestamp}.md") with open(md_path, "w", encoding="utf-8") as f: f.write(f"# 木蚂蚁 IP 全量扫描报告\n\n") f.write(f"> 扫描时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n") f.write(f"> 数据来源: 木蚂蚁用户RFM评估 CSV (regip + lastip)\n") f.write(f"> 扫描器: mumayi_full_scan.py (asyncio 高并发)\n\n") f.write(f"---\n\n") # 概览 f.write(f"## 一、扫描概览\n\n") f.write(f"| 指标 | 数值 |\n|:---|:---|\n") f.write(f"| 目标 IP 总数 | {total_ips:,} |\n") f.write(f"| 有开放端口的 IP | **{len(results):,}** ({len(results)/total_ips*100:.1f}%) |\n") f.write(f"| SSH 可达 IP | **{len(ssh_ips):,}** |\n") f.write(f"| 可部署节点 | **{len(deploy_ready):,}** |\n") f.write(f"| 扫描端口数 | {len(SCAN_PORTS)} |\n\n") # 端口统计 f.write(f"## 二、端口开放统计\n\n") f.write(f"| 端口 | 服务 | 发现数量 | 占比 |\n|:---|:---|:---|:---|\n") for port, count in sorted(port_stats.items(), key=lambda x: -x[1]): f.write(f"| {port} | {SCAN_PORTS.get(port, '?')} | {count} | {count/len(results)*100:.1f}% |\n") f.write(f"\n") # 服务器类型分布 f.write(f"## 三、服务器类型分布\n\n") f.write(f"| 类型 | 数量 |\n|:---|:---|\n") for t, count in sorted(type_stats.items(), key=lambda x: -x[1]): f.write(f"| {t} | {count} |\n") f.write(f"\n") # OS 猜测分布 f.write(f"## 四、操作系统分布\n\n") f.write(f"| OS | 数量 |\n|:---|:---|\n") for os_name, count in sorted(os_stats.items(), key=lambda x: -x[1]): f.write(f"| {os_name} | {count} |\n") f.write(f"\n") # SSH 可达 IP 列表(目标列表) f.write(f"## 五、SSH 可达 IP 列表({len(ssh_ips)} 个)\n\n") f.write(f"> 这些 IP 开放了 SSH 端口,是部署分布式算力的首选目标\n\n") f.write(f"| # | IP | SSH端口 | SSH版本 | OS猜测 | 其他端口 | 部署评分 | 服务器类型 |\n") f.write(f"|:---|:---|:---|:---|:---|:---|:---|:---|\n") for i, data in enumerate(sorted(ssh_ips, key=lambda x: -x["analysis"]["deploy_score"]), 1): ip = data["ip"] ssh_port = 22 if 22 in data["open_ports"] else 2222 ssh_ver = data["open_ports"].get(ssh_port, {}).get("banner", "")[:60] os_g = data["analysis"]["os_guess"] other = ",".join(str(p) for p in sorted(data["open_ports"].keys()) if p not in (22, 2222)) score = data["analysis"]["deploy_score"] types = ", ".join(data["analysis"]["server_types"][:3]) f.write(f"| {i} | `{ip}` | {ssh_port} | {ssh_ver} | {os_g} | {other} | {score} | {types} |\n") f.write(f"\n") # 可部署节点 f.write(f"## 六、可部署节点评估({len(deploy_ready)} 个)\n\n") f.write(f"> 部署评分 >= 50 的节点,优先用于分布式算力部署\n\n") if deploy_ready: f.write(f"| # | IP | 评分 | OS | 开放端口 | SSH版本 |\n") f.write(f"|:---|:---|:---|:---|:---|:---|\n") for i, data in enumerate(sorted(deploy_ready, key=lambda x: -x["analysis"]["deploy_score"]), 1): ip = data["ip"] score = data["analysis"]["deploy_score"] os_g = data["analysis"]["os_guess"] ports = ",".join(str(p) for p in sorted(data["open_ports"].keys())) ssh_ver = data["analysis"].get("ssh_version", "")[:50] f.write(f"| {i} | `{ip}` | {score} | {os_g} | {ports} | {ssh_ver} |\n") else: f.write(f"暂无满足条件的节点\n") f.write(f"\n") # 高价值目标(多端口开放) multi_port = [d for d in results.values() if d["port_count"] >= 3] f.write(f"## 七、高价值目标(3+ 端口开放,{len(multi_port)} 个)\n\n") if multi_port: f.write(f"| # | IP | 开放端口数 | 端口列表 | 服务器类型 |\n") f.write(f"|:---|:---|:---|:---|:---|\n") for i, data in enumerate(sorted(multi_port, key=lambda x: -x["port_count"]), 1): ip = data["ip"] pc = data["port_count"] ports = ", ".join(f"{p}({SCAN_PORTS.get(p,'?')})" for p in sorted(data["open_ports"].keys())) types = ", ".join(data["analysis"]["server_types"][:3]) f.write(f"| {i} | `{ip}` | {pc} | {ports} | {types} |\n") if i >= 100: f.write(f"| ... | 共 {len(multi_port)} 个 | | | |\n") break f.write(f"\n") # 下一步操作建议 f.write(f"## 八、下一步操作建议\n\n") f.write(f"### 8.1 SSH 登录测试\n") f.write(f"对 {len(ssh_ips)} 个 SSH 可达 IP 进行凭证测试:\n") f.write(f"- 使用 MongoDB 中的木蚂蚁用户凭证(需 MD5 反查)\n") f.write(f"- 常用默认凭证(root/root, admin/admin 等)\n") f.write(f"- 弱密码字典\n\n") f.write(f"### 8.2 部署优先级\n") f.write(f"1. 部署评分 >= 80 的 Linux 服务器(优先)\n") f.write(f"2. 部署评分 50-79 的服务器(次选)\n") f.write(f"3. Windows Server(需 WinRM/RDP 方式)\n\n") f.write(f"### 8.3 安全提醒\n") f.write(f"- 仅操作授权范围内的服务器\n") f.write(f"- 部署前确认服务器所有权\n") f.write(f"- 使用密钥认证替代密码\n") # 4. SSH IP 快速列表(供后续脚本使用) ssh_list_path = os.path.join(output_dir, f"ssh_reachable_ips_{timestamp}.txt") with open(ssh_list_path, "w") as f: for data in sorted(ssh_ips, key=lambda x: -x["analysis"]["deploy_score"]): ssh_port = 22 if 22 in data["open_ports"] else 2222 f.write(f"{data['ip']}:{ssh_port}\n") print(f"\n报告已生成:") print(f" JSON 完整结果: {json_path}") print(f" Markdown 报告: {md_path}") print(f" SSH IP 列表: {ssh_list_path}") return json_path, md_path, ssh_list_path # ========== 主入口 ========== def main(): parser = argparse.ArgumentParser(description="木蚂蚁 IP 全量异步端口扫描器") parser.add_argument("--input", "-i", default="/tmp/mumayi_all_ips.txt", help="IP列表文件路径") parser.add_argument("--concurrency", "-c", type=int, default=3000, help="并发连接数 (默认 3000)") parser.add_argument("--timeout", "-t", type=float, default=3, help="连接超时秒数 (默认 3)") parser.add_argument("--banner-timeout", type=float, default=2, help="Banner读取超时 (默认 2)") parser.add_argument("--output", "-o", default="/Users/karuo/Documents/1、金:项目/3、自营项目/分布式算力矩阵/01_扫描模块/references", help="输出目录") parser.add_argument("--limit", type=int, default=0, help="限制扫描IP数 (0=全部)") args = parser.parse_args() # 读取 IP 列表 with open(args.input, "r") as f: ip_list = [line.strip() for line in f if line.strip()] # 过滤自有设备 ip_list = [ip for ip in ip_list if ip not in OWN_INFRASTRUCTURE] if args.limit > 0: ip_list = ip_list[:args.limit] print(f"加载 {len(ip_list):,} 个目标 IP") # 提升系统文件描述符限制 try: import resource soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) target = min(args.concurrency * 2 + 1000, hard) resource.setrlimit(resource.RLIMIT_NOFILE, (target, hard)) print(f"文件描述符限制: {soft} → {target}") except: print("警告: 无法提升文件描述符限制") # 执行扫描 scanner = AsyncPortScanner( concurrency=args.concurrency, timeout=args.timeout, banner_timeout=args.banner_timeout, ) results = asyncio.run(scanner.run(ip_list)) # 生成报告 if results: generate_reports(results, args.output, len(ip_list)) else: print("\n未发现任何开放端口的 IP") if __name__ == "__main__": main()