Files
karuo-ai/运营中枢/scripts/receivesms_find_recent_5.py

134 lines
4.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
从 receivesms.co 多国家(美/加/英)拉取号码,筛出「最近有短信活动且收件页可正常打开」的 5 个,
并做可用性校验(能打开收件页、页面含短信结构)。
"""
import re
import time
import urllib.request
BASE = "https://www.receivesms.co"
# (国家名, 列表页URL, 收件页路径前缀, 列表正则:含 group(1)=id, group(2)=号码)
CONFIGS = [
("US", BASE + "/us-phone-numbers/us/", "us-phone-number",
re.compile(r'href="/us-phone-number/(\d+)/"[^>]*>.*?<strong>(\+1[\d\s\-]+)</strong>', re.DOTALL)),
("CA", BASE + "/canadian-phone-numbers/ca/", "ca-phone-number",
re.compile(r'href="/ca-phone-number/(\d+)/"[^>]*>.*?<strong>(\+1[\d\s\-]+)</strong>', re.DOTALL)),
("GB", BASE + "/british-phone-numbers/gb/", "gb-phone-number",
re.compile(r'href="/gb-phone-number/(\d+)/"[^>]*>.*?<strong>(\+44[\d\s]+)</strong>', re.DOTALL)),
]
def fetch(url):
req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)"})
with urllib.request.urlopen(req, timeout=25) as r:
return r.read().decode("utf-8", errors="replace")
def parse_list(html, regex):
pairs = []
for m in regex.finditer(html):
mid, num = m.group(1), m.group(2).replace(" ", "").replace("-", "").strip()
if num.startswith("+"):
pairs.append((mid, num))
return pairs
def is_recent(activity_text):
"""是否算近期24 小时内seconds/minutes/hours ago 且 ≤24。"""
if not activity_text:
return False
t = activity_text.strip().lower()
if "second" in t and "ago" in t:
return True
if "minute" in t and "ago" in t:
return True
if "hour" in t and "ago" in t:
m = re.search(r"(\d+)\s*hour", t)
if m:
return int(m.group(1)) <= 24
return True # "1 hour ago"
if "day" in t and "ago" in t:
m = re.search(r"(\d+)\s*day", t)
if m and int(m.group(1)) <= 1:
return True
return False
def inbox_usable(html):
"""收件页是否包含短信结构(可正常收码)。"""
return 'class="entry-body"' in html and 'class="sms"' in html
def main():
print("正在从 receivesms.co 多国家拉取号码并校验…", flush=True)
seen = set()
results = []
# 先尽量找 24h 内有活动的
for country, list_url, path_prefix, regex in CONFIGS:
if len(results) >= 5:
break
try:
html = fetch(list_url)
pairs = parse_list(html, regex)
except Exception as e:
print(f" {country} 列表失败: {e}", flush=True)
continue
for mid, num in list(pairs)[:12]:
if len(results) >= 5:
break
if num in seen:
continue
url = f"{BASE}/{path_prefix}/{mid}/"
try:
page = fetch(url)
except Exception:
time.sleep(0.3)
continue
if not inbox_usable(page):
time.sleep(0.25)
continue
m = re.search(r"Last activity:\s*([^<]+)<", page)
last = m.group(1).strip() if m else ""
if is_recent(last):
seen.add(num)
results.append((country, num, last, url))
print(f" [{len(results)}/5] {country} {num} Last: {last}", flush=True)
time.sleep(0.35)
# 不足 5 个则补足:任意收件页可用的号
for country, list_url, path_prefix, regex in CONFIGS:
if len(results) >= 5:
break
try:
html = fetch(list_url)
pairs = parse_list(html, regex)
except Exception:
continue
for mid, num in list(pairs)[:20]:
if len(results) >= 5:
break
if num in seen:
continue
url = f"{BASE}/{path_prefix}/{mid}/"
try:
page = fetch(url)
except Exception:
continue
if not inbox_usable(page):
continue
m = re.search(r"Last activity:\s*([^<]+)<", page)
last = (m.group(1).strip() if m else "N/A")
seen.add(num)
results.append((country, num, last, url))
print(f" [{len(results)}/5] {country} {num} Last: {last}", flush=True)
time.sleep(0.3)
print("\n--- 可用 5 个号码(已测收件页可打开)---")
for country, num, last, url in results[:5]:
print(f"{country}\t{num}\t{last}\t{url}")
print("\n使用:打开对应 URL 即可查看/刷新该号码收到的短信。")
if __name__ == "__main__":
main()