Compare commits

...

17 Commits

74 changed files with 10393 additions and 56 deletions

2
.gitignore vendored
View File

@ -50,7 +50,7 @@ __pycache__/
# Distribution / packaging
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/

View File

@ -2,6 +2,20 @@
## 使用方式
```shell
# WEB 模式,只启动 web 控制台,不启动任何引擎
python main.py --web
# 等价于
python main.py --web-only
# 启动 web 的时候启动引擎
python main.py --web --crawl --evidence --report wap,pc,site
# 这几个选项可以任意组合,例如只启动采集引擎和证据收集引擎
python main.py --web --crawl --evidence
```
```shell
# 采集模式采集指定关键字的URL列表直接存入数据库
python main.py --crawl www.yunzhiju.net

View File

@ -1 +1 @@
from .app import *
from .app import MainApp

View File

@ -1,7 +1,9 @@
import argparse
import asyncio
import sys
import os
import time
import signal
from app.engines.report_engine import Reporter
@ -14,6 +16,8 @@ from .models.base import connect_db, create_database
from loguru import logger
import sqlalchemy.exc
from .web.web import WebApp
class MainApp:
"""主应用"""
@ -23,6 +27,11 @@ class MainApp:
self.config: AppConfig = None
self.db_engine = None
# 所有的engine
self.crawl_engine = None
self.evidence_engine = None
self.report_engine = None
def parse_args(self):
"""解析命令行参数"""
parser = argparse.ArgumentParser(description="Baidu Reporter")
@ -36,7 +45,10 @@ class MainApp:
)
parser.add_argument(
"--crawl", help="采集模式,根据域名批量采集 SURL多个域名可使用英文逗号分割也可通过 --crawl-file 传入文件",
"--crawl",
nargs="?",
const="",
help="采集模式,根据域名批量采集 SURL多个域名可使用英文逗号分割也可通过 --crawl-file 传入文件",
)
parser.add_argument(
"--crawl-file", help="目标域名文件,批量传入待采集的域名,每行一个"
@ -57,7 +69,10 @@ class MainApp:
# 添加 web 服务器参数
parser.add_argument(
"--web", action="store_true", help="启动 web 服务器,启动后将忽略其他选项"
"--web", action="store_true", help="启动 web 服务器"
)
parser.add_argument(
"--web-only", action="store_true", help="启动 web 服务器,但是不启动引擎"
)
parser.add_argument(
@ -82,7 +97,7 @@ class MainApp:
if invalid_modes:
parser.error(f'无效的运行模式: {", ".join(invalid_modes)}')
args.report = reports
else:
elif args.report is not None:
args.report = ["pc", "site", "wap"]
# 检查输入的文件是否存在
@ -97,6 +112,10 @@ class MainApp:
def start_cli(self):
"""开启 CLI 模式"""
# 注册 ctrl+c 处理程序,正常结束所有的 engine
signal.signal(signal.SIGINT, self.exit_handler)
if self.args.crawl or self.args.crawl_file:
crawl = CrawlEngine()
crawl.cli_start(self.args.crawl, self.args.crawl_file)
@ -116,7 +135,31 @@ class MainApp:
def start_web(self):
"""开启 Web 模式"""
pass
# 注册 ctrl+c 处理程序,正常结束所有的 engine
signal.signal(signal.SIGINT, self.exit_handler)
# 启动所有的 engine
if self.args.crawl is not None:
self.crawl_engine = CrawlEngine()
self.crawl_engine.start()
logger.info("crawl 启动")
if self.args.evidence:
self.evidence_engine = EvidenceEngine()
self.evidence_engine.start()
logger.info("evidence 启动")
if self.args.report:
self.report_engine = Reporter(self.args.report)
self.report_engine.start()
logger.info("report 启动")
# 启动 web 页面
web_app = WebApp()
asyncio.run(web_app.start())
logger.info("web stop.")
def run(self):
"""运行应用"""
@ -151,9 +194,28 @@ class MainApp:
sys.exit(1)
# 如果指定了 --web 参数,启动 web 服务器,忽略其他选项
if self.args.web:
if self.args.web or self.args.web_only:
logger.info("启动 Web 模式")
return self.start_web()
else:
logger.info("启动 CLI 模式")
return self.start_cli()
def exit_handler(self, signum, frame):
# 在这里结束各个 engine
logger.debug("CTRL+C called.")
if self.crawl_engine:
self.crawl_engine.stop()
self.crawl_engine.cli_wait()
logger.info("crawl 退出")
if self.evidence_engine:
self.evidence_engine.stop()
self.evidence_engine.wait()
logger.info("evidence 退出")
if self.report_engine:
self.report_engine.stop()
self.report_engine.wait()
logger.info("report 退出")

View File

View File

@ -0,0 +1,8 @@
import enum
class ApiCode(enum.Enum):
OK = 20000
PARAM_ERROR = 30000
DB_ERROR = 40000
RUNTIME_ERROR = 50000

9
app/constants/domain.py Normal file
View File

@ -0,0 +1,9 @@
import enum
class DomainStatus(enum.Enum):
READY = 1 # 采集结束之后回到这个状态,新添加的默认也是这个状态
QUEUEING = 2 # 排队中,已经压入任务队列了,但是还没轮到处理
CRAWLING = 3 # 采集中
PAUSE = 999 # 暂停采集

View File

@ -1,15 +1,21 @@
import queue
import re
import threading
import time
import execjs
import requests
from DrissionPage.errors import ElementNotFoundError
from loguru import logger
from sqlmodel import Session, select
from sqlmodel import Session, select, or_, and_
from app.config.config import AppCtx
from app.constants.domain import DomainStatus
from app.models.domain import DomainModel
from app.models.report_urls import ReportUrlModel
from app.utils.common import get_proxies
from app.utils.dp import DPEngine
from app.utils.ydm_verify import YdmVerify
class CrawlEngine:
@ -27,7 +33,7 @@ class CrawlEngine:
# 线程池
self.pool: list[threading.Thread] = []
self.worker_count = 2
self.worker_count = 1
# 工作队列
self.target_queue = queue.Queue(1024)
@ -35,8 +41,6 @@ class CrawlEngine:
# 创建一个浏览器
self.dp_engine = DPEngine()
self.database = AppCtx.g_db_engine
def cli_start(self, target_domains: str, target_domain_filepath: str):
"""CLI 模式启动
target_domains: 英文逗号分割的字符串
@ -90,7 +94,7 @@ class CrawlEngine:
continue
# 存入数据库
with Session(self.database) as session:
with Session(AppCtx.g_db_engine) as session:
self.save_surl(session, domain, surl)
except queue.Empty:
# 队列空了等1秒再取一次
@ -119,7 +123,7 @@ class CrawlEngine:
# 检查在数据库中是否有重复的
for domain in domains:
with Session(self.database) as session:
with Session(AppCtx.g_db_engine) as session:
stmt = select(DomainModel).where(DomainModel.domain == domain)
result = session.exec(stmt).first()
if not result:
@ -148,29 +152,42 @@ class CrawlEngine:
def worker(self):
"""真正的工作函数后续以Web模式启动的时候走这个"""
logger.info("crawl worker start!")
while self.worker_status == 1:
while self.worker_status:
# 检查数据库,从中获取需要爬取的域名
current_timestamp = int(time.time())
with Session(AppCtx.g_db_engine) as session:
stmt = select(DomainModel).where(
DomainModel.latest_crawl_time + DomainModel.crawl_interval <= current_timestamp
or_(
DomainModel.status == 2, # 条件1: status = 2
and_(
DomainModel.latest_crawl_time + DomainModel.crawl_interval * 60 <= current_timestamp, # 条件2
DomainModel.status == 1 # 条件2
)
)
)
domains = session.exec(stmt).all()
for domain_model in domains:
# 采集前修改状态
domain_model.status = DomainStatus.CRAWLING.value
session.add(domain_model)
session.commit()
# 采集
surl_set = self.crawl(domain_model.domain)
# 存储
if surl_set:
self.save_surl(session, domain_model, surl_set)
self.save_surl(session, domain_model.domain, surl_set)
domain_model.latest_crawl_time = int(time.time())
domain_model.status = DomainStatus.READY.value
session.add(domain_model)
session.commit()
self.ev.wait(60)
self.ev.wait(10)
logger.info("crawl worker stop!")
@ -182,8 +199,8 @@ class CrawlEngine:
try:
# 初始数据
end_time = int(time.time())
start_time = end_time - 3600 * 24 * 30 # 获取最近一个月的数据
# end_time = int(time.time())
# start_time = end_time - 3600 * 24 * 30 # 获取最近一个月的数据
# 依次每一页处理
max_page = 10 # 最大页码数量0表示不限制最大数量
@ -216,6 +233,26 @@ class CrawlEngine:
# f"https://www.baidu.com/s?wd=site%3A{domain}&gpc=stf%3D{start_time}%2C{end_time}%7Cstftype%3D1&pn={(current_page - 1) * 10}")
# tab.get(f"https://www.baidu.com/s?wd=site%3A{domain}&pn={(current_page - 1) * 10}")
# 检查一下当前的URL是不是跳到验证码的页面
if "//wappass.baidu.com/static/captcha/tuxing_v2.html" in tab.url:
logger.warning("触发验证码了,尝试识别")
idx = 0
while idx < 3:
idx += 1
logger.debug(f"开始第{idx}次识别...")
captcha_result = self.verify_captcha(tab.url)
if not captcha_result:
tab.refresh()
continue
else:
tab.get(captcha_result)
break
else:
logger.error("验证码打码失败放弃本次采集等待3分钟后继续")
self.ev.wait(180)
break
# 终止条件
if current_page > max_page and max_page:
logger.debug(f"{threading.current_thread().name} 达到指定页码,退出")
@ -231,12 +268,19 @@ class CrawlEngine:
tab.wait.eles_loaded("@id=content_left")
results = tab.ele("@id=content_left").eles("@class:result")
# temp = [result.attr("mu") for result in results if result.attr("mu") is not None]
# logger.debug(f"{len(results)=}")
for result in results:
# logger.debug(f"{result=}")
surl = result.attr("mu")
if not surl:
continue
logger.debug(f"{threading.current_thread().name} 找到 URL : {surl}")
# 添加结果的时候,也检查一下抓到的 surl 是否和目标域名有关
if domain not in surl:
logger.debug(f"{threading.current_thread().name} URL {surl} 与目标域名 {domain} 无关,跳过")
else:
surl_set.add(surl)
logger.debug(f"{threading.current_thread().name} 找到 {surl}")
# 翻页的时候等一下,别太快了
self.ev.wait(0.3)
@ -261,6 +305,12 @@ class CrawlEngine:
def save_surl(session: Session, domain: str, surl_set: set[str]):
"""保存采集到的URL"""
for surl in surl_set:
# 简单的判断一下 surl 中是否包含目标域名
if domain not in surl:
logger.debug(f"跳过保存 {surl} 因为与目标域名 {domain} 不符合")
continue
# 先检查是否存在
stmt = select(ReportUrlModel).where(ReportUrlModel.surl == surl)
exist = session.exec(stmt).first()
@ -283,3 +333,170 @@ class CrawlEngine:
)
session.add(example)
session.commit()
# def captcha_listener(self):
# for pkg in self.tab.listen.steps():
# if "/cap/init" in pkg.url:
# self.captcha_data["init"] = pkg.response.body
# if "/cap/style" in pkg.url:
# self.captcha_data["style"] = pkg.response.body
# self.captcha_data["referer"] = pkg.request.headers.get("Referer")
# logger.debug(f"触发验证码的 referer: {self.captcha_data["referer"]}")
#
# self.captcha_data["cookie"] = pkg.request.headers.get("Cookie")
# logger.debug(f"触发验证码的 cookie: {self.captcha_data['cookie']}")
# if "/cap/log" in pkg.url:
# self.captcha_data["log"] = pkg.response.body
def verify_captcha(self, current_url: str):
"""尝试识别验证码,因为和 pc_reporter 的逻辑有点区别,所以单独写一遍"""
headers = {
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-US;q=0.7,zh-TW;q=0.6',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Pragma': 'no-cache',
'Referer': current_url,
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 Edg/135.0.0.0",
'X-Requested-With': 'XMLHttpRequest',
'sec-ch-ua_wap': '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
'sec-ch-ua_wap-mobile': '?0',
'sec-ch-ua_wap-platform': '"Windows"',
# "Cookie": self.captcha_data["cookie"],
}
# 解出AS / TK
ts = time.time()
ts1 = int(ts)
ts2 = int(ts * 1000)
response = requests.post(
"https://passport.baidu.com/cap/init",
data={
"_": ts2,
"refer": re.sub(r'timestamp=\d+', f'timestamp={ts1}', current_url),
"ak": "c27bbc89afca0463650ac9bde68ebe06",
"ver": "2",
"scene": "",
"ds": "",
"tk": "",
"as": "",
"reinit": 0
},
headers=headers,
proxies=get_proxies()
).json()
as_value = response["data"]["as"]
tk_value = response["data"]["tk"]
# 解出 style
response = requests.post(
"https://passport.baidu.com/cap/style",
data={
"_": int(time.time() * 1000),
"refer": re.sub(r'timestamp=\d+', f'timestamp={ts1}', current_url),
"ak": "c27bbc89afca0463650ac9bde68ebe06",
"tk": tk_value,
"scene": "",
"isios": "0",
"type": "spin",
"ver": "2"
},
headers=headers,
proxies=get_proxies()
)
response = response.json()
backstr = response["data"]["backstr"]
captcha_link = response["data"]["captchalist"][0]["source"]["back"]["path"]
logger.debug(f"{backstr=}, {captcha_link=}")
# 下载验证码图片
image_response = requests.get(captcha_link, headers=headers, proxies=get_proxies())
with open("captcha.png", "wb") as f:
f.write(image_response.content)
logger.debug("download captcha.png")
# 识别验证码
ydm = YdmVerify()
with open("captcha.png", "rb") as fp:
picture = fp.read()
slide_distance = ydm.rotate(picture)
logger.debug(f"{slide_distance=}")
if not slide_distance:
logger.error("识别验证码失败")
return None
rotate_angle_rate = round(slide_distance / 360, 2)
logger.debug(f"{rotate_angle_rate=}")
if not rotate_angle_rate:
logger.debug("识别验证码失败")
return None
# 发送验证码请求
time_log = str(int(time.time() * 1000))
with open("./js/mkd_v2_link_submit.js", 'r', encoding='utf-8') as f:
ds_js = f.read()
fs = execjs.compile(ds_js).call('getFs2', backstr, rotate_angle_rate, as_value)
data = {
"_": time_log,
"refer": current_url,
"ak": "c27bbc89afca0463650ac9bde68ebe06",
"as": as_value,
"scene": "",
"tk": tk_value,
"ver": "2",
"cv": "submit",
"typeid": "spin-0",
"fuid": "FOCoIC3q5fKa8fgJnwzbE67EJ49BGJeplOzf+4l4EOvDuu2RXBRv6R3A1AZMa49I27C0gDDLrJyxcIIeAeEhD8JYsoLTpBiaCXhLqvzbzmvy3SeAW17tKgNq/Xx+RgOdb8TWCFe62MVrDTY6lMf2GrfqL8c87KLF2qFER3obJGnfaJTn/Ne60I9LwR04t6XmGEimjy3MrXEpSuItnI4KD0FJKzTbw1AN69fBnzR2FuvMmmQZ+1zgJ72wdcVU+mcQxiE2ir0+TEYgjPJt1Qa3K1mLi+P4IWJeag2lvxB4yJ/GgLbz7OSojK1zRbqBESR5Pdk2R9IA3lxxOVzA+Iw1TWLSgWjlFVG9Xmh1+20oPSbrzvDjYtVPmZ+9/6evcXmhcO1Y58MgLozKnaQIaLfWRPAn9I0uOqAMff6fuUeWcH1mRYoTw2Nhr4J4agZi377iM/izL6cVCGRy2F8c0VpEvM5FjnYxYstXg/9EfB3EVmKAfzNRIeToJ5YV9twMcgdmlV1Uhhp5FAe6gNJIUptp7EMAaXYKm11G+JVPszQFdp9AJLcm4YSsYUXkaPI2Tl66J246cmjWQDTahAOINR5rXR5r/7VVI1RMZ8gb40q7az7vCK56XLooKT5a+rsFrf5Zu0yyCiiagElhrTEOtNdBJJq8eHwEHuFBni9ahSwpC7lbKkUwaKH69tf0DFV7hJROiLETSFloIVkHdy3+I2JUr1LsplAz0hMkWt/tE4tXVUV7QcTDTZWS/2mCoS/GV3N9awQ6iM6hs/BWjlgnEa1+5iP7WSc7RJ34FaE5PsyGXyoCWdXwNRGSZPSvVtB/Ea6w5FKazXcZ/j40FJv+iLGBn3nkkgHlne61I8I7KhtQgIkmBMJIjPMkS/L051MeqdGScsKYTJuSucgI5c3+79eVH+y2TvbOTuuHv1uGxwXFb2atIU1ZYPbmmXculmizKcKI/s44qf8uM8iBZLGkKeVyL74aPyLkg7Gk359g98BIGN/ZzJR/h+Y6AyFx+HlMoYJnS06dVmqFbvlCtSdGylKQ5f8eWtxPkJGqOFtWjIVteQYMsH/AaSJonqw+WLiZvGjYfm9p0alEyujapoTy77HzDcUoU1wUSXa5xS/Z6hXEr2OnLi0LdPVcGjz8lpLcdVeSfm9p0alEyujapoTy77HzDWf5PERRSTFqLd9BTUHLyY4Ji3EQLGQPaM1aeHxG1bJZH0s1Si/KwzTaTYzu6ziQiqwcr2kaYUiH+fMOxn69/BhNJVMhpQkhprc1KZuJRvXjppq0gKweencPxgS/jd0rjw==",
# "fuid": "FOCoIC3q5fKa8fgJnwzbE67EJ49BGJeplOzf+4l4EOvDuu2RXBRv6R3A1AZMa49I27C0gDDLrJyxcIIeAeEhD8JYsoLTpBiaCXhLqvzbzmvy3SeAW17tKgNq/Xx+RgOdb8TWCFe62MVrDTY6lMf2GrfqL8c87KLF2qFER3obJGm51EODDlnqgz44AdUN5VVLGEimjy3MrXEpSuItnI4KD4X6JLdk9kt5JRR+RlJ66q1+4kQEivhwAoCrm3oUNdYdi+yNJadLMQy5pqjmiW757BJsVwXkGdF24AsEQ3K5XBbh9EHAWDOg2T1ejpq0s2eFy9ar/j566XqWDobGoNNfmfpaEhZpob9le2b5QIEdiQcF+6iOKqU/r67N8lf+wxW6FCMUN0p4SXVVUMsKNJv2TwEq3+MvKTlPBjfdM81CMPq4LkPV+7TROLMG0V6r0A++zkWOdjFiy1eD/0R8HcRWYsUPXjDqADgs+Xs31pnSHeup+HBavJhpxl858h16cMtKQmxzisHOxsE/KMoDNYYE7ucLE22Bi0Ojbor7y6SXfVj7+B4iuZO+f7FUDWABtt/WWQqHKVfXMaw5WUmKnfSR5wwQa+N01amx6X+p+x97kkGmoNOSwxWgGvuezNFuiJQdt51yrWaL9Re9fZveXFsIu/gzGjL50VLcWv2NICayyI8BE9m62pdBPySuv4pVqQ9Sl1uTC//wIcO7QL9nm+0N6JgtCkSAWOZCh7Lr0XP6QztjlyD3bkwYJ4FTiNanaDaDJMNOONUIptCYaHTS+UC6IlHE1MUFHThGQXNkGIX8AdBh0GvEV9dnyTGKy8XFjCQiSGk66HDxtjKMU4HPNa0dtuC6f3Qc1BA80dVENIrm5fvupUvtUx+t4D1r3M6jRrNCFDmi5MpkOxe5k51gshb/lV68JOKfsQeXT2p7EM9kdbZAphQDW3ajjXdDRh/L4vMDrWe1PKtUcuW/fWn+hZVZzw+X5dQWsFNhWzqaDLLTRZQpSBdWrMIHd5mkoSCb/UJmNfWI9UswFst29h1Heb04lgaYXvleBbteLbUi5NoCAChP5oZfoCeoKKuvUEAPXXTPVjO0TTi0sVqFSdG+GFyi03wlrm3wCRN8QsWhT10pXJL0RhcLTagDnxauF9flnVwiWaq+daLSn0MEazavBACRErAMWXEI9EFQPGJKv0Ijpq+0VDw8xeJloxMf4I+yn8oxuqFuBSz8I0Kfe0QZwk5OQW6lRvv5iBU4fcPzWWTZ9FnzQ2GA5eh8aiV0nDOGmtfhiYNjbs2NxP0acAgApNd0ew==",
"fs": fs
}
response = requests.post(
"https://passport.baidu.com/cap/log",
headers=headers,
data=data,
proxies=get_proxies(),
).json()
try:
result = {
"ds": response["data"]["ds"],
"op": response["data"]["op"],
"tk": response["data"]["tk"]
}
except KeyError:
logger.error(f"验证码没转成功, response: {response=}")
time.sleep(1)
return None
logger.debug(f"{result=}")
# 检查验证码是否正确
if result["op"] != 1:
logger.error(f"op != 1, 重试")
return None
# 发送验证码请求 /cap/c 请求获取待跳转的URL
response = requests.post(
"https://passport.baidu.com/cap/c?ak=c27bbc89afca0463650ac9bde68ebe06",
headers=headers,
json={
"tk": result["tk"],
"ds": result["ds"],
"qrsign": "",
"refer": current_url
},
proxies=get_proxies()
)
data = response.json()
if data["data"].get("f"):
logger.error(f"验证码失败: {data['data'].get('f')}")
return None
if data["data"].get("s"):
logger.debug("验证成功URL" + data["data"].get("s").get("url"))
url = data["data"].get("s").get("url")
url = url.encode("utf-8").decode("unicode-escape")
logger.success("解码后的URL" + url)
return url

View File

@ -59,8 +59,8 @@ class EvidenceEngine:
logger.debug(f"开始获取 {target['surl']} 的举报数据")
self.get_screenshot_and_report_link(target)
# 每分钟跑一次
self.ev.wait(60)
# 每10秒跑一次
self.ev.wait(10)
def stop(self):
"""结束线程"""
@ -69,6 +69,9 @@ class EvidenceEngine:
self.dp_engine.close()
self.wap_dp_engine.close()
def wait(self):
self.worker_thread.join()
def get_surl_from_db(self):
"""从数据库中获取数据"""
result: list = []
@ -87,7 +90,20 @@ class EvidenceEngine:
# Part1 获取证据截图
logger.debug(f"开始获取 {surl} 在百度搜索中的截图")
img_path, tab = self.get_screenshot(target)
img_path, tab, has_result = self.get_screenshot(target)
if not has_result:
# 如果没有搜到结果,直接把 has_evidence 标记为 true 就行了
with Session(self.database) as session:
stmt = select(ReportUrlModel).where(ReportUrlModel.id == target["id"])
model: ReportUrlModel = session.exec(stmt).first()
if not model:
logger.error(f"{target['id']} 记录不存在,跳过...")
return None
# 更新数据
model.has_evidence = True
session.add(model)
session.commit()
return None
if not img_path:
return None
@ -134,7 +150,7 @@ class EvidenceEngine:
except Exception as e:
logger.error(f"获取证据截图和举报链接失败: {e}")
def get_screenshot(self, target: dict) -> tuple[str | None, MixTab]:
def get_screenshot(self, target: dict) -> tuple[str | None, MixTab, bool]:
"""获取搜索页面的截图,返回 img_path """
search_keyword = target["surl"].lstrip("https://").lstrip("http://")
tab = self.dp_engine.browser.new_tab()
@ -144,12 +160,13 @@ class EvidenceEngine:
if "未找到相关结果" in tab.html:
logger.info(f"没有关于 {search_keyword} 的数据")
return None, tab
return None, tab, False
# 图片的存储路径
# 截完图先不要关闭 tab别的地方还要用
img_path = f"./imgs/{target['domain']}/{md5(target['surl'])}.png"
return self.do_screenshot(tab, img_path)
img_path, tab = self.do_screenshot(tab, img_path)
return img_path, tab, True
def get_wap_screenshot(self, target: dict) -> tuple[str | None, MixTab]:
"""用 wap dp 再截一张 surl 本身的图"""

View File

@ -33,6 +33,7 @@ class Reporter:
def wait(self):
self.worker_thread.join()
# noinspection DuplicatedCode
def cli_start(self):
for mode in self.mode:
if mode == "pc":
@ -46,20 +47,33 @@ class Reporter:
continue
def stop(self):
for mode in self.mode:
if mode == "pc":
self.reporters["pc"].stop()
elif mode == "wap":
self.reporters["wap"].stop()
elif mode == "site":
self.reporters["site"].stop()
else:
logger.error(f"参数错误: {mode}")
continue
self.status = 0
self.ev.set()
# noinspection DuplicatedCode
def worker(self):
while self.status:
for mode in self.mode:
if mode == "pc":
if mode == "pc" and self.status:
self.reporters["pc"].run()
elif mode == "wap":
elif mode == "wap" and self.status:
self.reporters["wap"].run()
elif mode == "site":
elif mode == "site" and self.status:
self.reporters["site"].run()
else:
logger.error(f"参数错误: {mode}")
continue
self.ev.wait(60)
self.ev.wait(10)

View File

@ -1,5 +1,6 @@
from abc import ABC, abstractmethod
class BaseReporter(ABC):
"""所有 reporter 的基类"""
@ -7,3 +8,7 @@ class BaseReporter(ABC):
def run(self):
"""运行 reporter子类必须实现此方法"""
pass
def stop(self):
"""控制结束"""
pass

View File

@ -1,5 +1,6 @@
import os.path
import random
import threading
import time
from urllib.parse import urlparse, parse_qs
@ -19,6 +20,8 @@ from ...utils.ydm_verify import YdmVerify
class PcReporter(BaseReporter):
def __init__(self):
self.engine_name = "PC_REPORTER"
self.status = 1
self.ev = threading.Event()
self.database = AppCtx.g_db_engine
self.upload_pic_url = "http://jubao.baidu.com/jubao/accu/upload"
@ -45,14 +48,23 @@ class PcReporter(BaseReporter):
"Cookie": "",
}
def stop(self):
self.status = 0
self.ev.set()
logger.warning(f"{self.engine_name} 收到退出消息,等待当前任务完成后退出")
def run(self):
with Session(self.database) as session:
stmt = select(ReportUrlModel).where(ReportUrlModel.is_report_by_one == False)
stmt = select(ReportUrlModel).where(ReportUrlModel.is_report_by_one == False).where(ReportUrlModel.has_evidence == True)
rows: list[ReportUrlModel] = session.exec(stmt).all()
logger.info(f"[{self.engine_name}] 共计 {len(rows)} 条记录需要举报")
for row in rows:
if not self.status:
break
# 选个 cookie
report_cookie = random.choice(get_all_cookies())
self.headers["Cookie"] = report_cookie
@ -66,8 +78,8 @@ class PcReporter(BaseReporter):
domain = row.domain
# timestamp_s = {int(time.time() * 1000)} # 这里为啥要用 set ?
timestamp_s = int(time.time() * 1000)
# referer = f"https://jubao.baidu.com/jubao/accu/?surl={surl}token={token}&title={title}&q={q}&has_gw=0&has_v=0&_t8={timestamp_s}"
referer = "https://jubao.baidu.com/"
referer = f"https://jubao.baidu.com/jubao/accu/?surl={surl}token={token}&title={title}&q={q}&has_gw=0&has_v=0&_t8={timestamp_s}"
# referer = "https://jubao.baidu.com/"
logger.debug(f"referer: {referer}, type of referer: {type(referer)}")
self.headers["Referer"] = referer
@ -120,6 +132,8 @@ class PcReporter(BaseReporter):
retry += 1
self.ev.wait(5)
def do_report(self, ds, tk, surl, token, title, q, upload=''):
try:
phone = generate_random_phone_number()
@ -201,20 +215,26 @@ class PcReporter(BaseReporter):
# 获取 as、tk 值
try:
get_as_tk = self.post_init(surl, token, title, q, timestamp_s)
# logger.debug(f"{get_as_tk=}")
get_as = get_as_tk['as']
get_tk = get_as_tk['tk']
# 获取验证码图片下载链接、backstr
get_style_result = self.get_style(get_tk, surl, token, title, q, timestamp_s)
# logger.debug(f"{get_style_result=}")
get_backstr = get_style_result['backstr']
pic_download_link = get_style_result['captcha']
# 下载验证码图片
self.download_captcha(pic_download_link)
rotate_angle_rate = self.get_rotate_angle_rate()
logger.debug(f"{rotate_angle_rate=}")
# key = self.get_key(get_as)
if not rotate_angle_rate:
return {'op': 3}
get_ds_tk = self.post_log(get_as, get_tk, get_backstr, rotate_angle_rate)
logger.debug(f"{get_ds_tk=}")
log_ds = get_ds_tk['ds']
log_tk = get_ds_tk['tk']
log_op = get_ds_tk['op']
@ -225,7 +245,7 @@ class PcReporter(BaseReporter):
}
return result
except Exception as e:
logger.error(f'{e}')
logger.exception(f'{e}')
return {'op': 3}
def post_init(self, surl, token, title, q, timestamp_s):
@ -303,9 +323,13 @@ class PcReporter(BaseReporter):
with open('./captcha/captcha.png', 'rb') as p:
picture = p.read()
slide_distance = identify_distance.rotate(image=picture)
logger.debug(f"{slide_distance=}")
if not slide_distance:
return None
# 旋转角度为
# logger.info('rotate angle: ' + str(slide_distance))
rotate_angle_rate = round(slide_distance / 360, 2)
logger.debug(f"{rotate_angle_rate=}")
# logger.info('rotate angle rate: ' + str(rotate_angle_rate))
return rotate_angle_rate

View File

@ -1,6 +1,7 @@
import os.path
import random
import re
import threading
import time
import requests
@ -19,6 +20,8 @@ from ...utils.ua import random_ua
class SiteReporter(BaseReporter):
def __init__(self):
self.engine_name = "SITE_REPORTER"
self.status = 1
self.ev = threading.Event()
self.upload_pic_url = "https://help.baidu.com/api/mpic"
self.report_url = "https://help.baidu.com/jubaosubmit"
@ -42,15 +45,25 @@ class SiteReporter(BaseReporter):
self.token_pattern = r'name="submit_token" value="(.*?)"'
def stop(self):
# logger.debug(f"{self.engine_name} stop called.")
self.status = 0
self.ev.set()
logger.warning(f"{self.engine_name} 收到退出消息,等待当前任务完成后退出")
def run(self):
"""实现 PC 端的举报逻辑"""
with Session(self.database) as session:
stmt = select(ReportUrlModel).where(ReportUrlModel.is_report_by_site == False)
stmt = select(ReportUrlModel).where(ReportUrlModel.is_report_by_site == False).where(ReportUrlModel.has_evidence == True)
rows: list[ReportUrlModel] = session.exec(stmt).all()
logger.info(f"[{self.engine_name}] 共计 {len(rows)} 条需要举报")
for row in rows:
if not self.status:
break
# 生成举报需要的基础数据
surl = row.surl
q = row.q
@ -78,7 +91,7 @@ class SiteReporter(BaseReporter):
session.commit()
# 等待5秒继续举报
time.sleep(5)
self.ev.wait(5)
def upload_pic(self, img_path: str):
try:

View File

@ -2,6 +2,7 @@ import base64
import json
import os.path
import random
import threading
import time
import requests
@ -20,6 +21,8 @@ class WapReporter(BaseReporter):
def __init__(self):
self.engine_name = "WAP_REPORTER"
self.status = 1
self.ev = threading.Event()
self.report_url = "https://ufosdk.baidu.com/api?m=Client&a=postMsg"
self.request = requests.session()
@ -40,16 +43,27 @@ class WapReporter(BaseReporter):
self.database = AppCtx.g_db_engine
self.all_cookies = get_all_cookies()
def stop(self):
self.status = 0
self.ev.set()
logger.warning(f"{self.engine_name} 收到退出消息,等待当前任务完成后退出")
def run(self):
"""实现 WAP 端的举报逻辑"""
with Session(self.database) as session:
stmt = select(ReportUrlModel).where(ReportUrlModel.is_report_by_wap == False)
stmt = select(ReportUrlModel).where(ReportUrlModel.is_report_by_wap == False).where(
ReportUrlModel.has_evidence == True)
rows: list[ReportUrlModel] = session.exec(stmt).all()
logger.debug(f"[{self.engine_name}] 共找到 {len(rows)} 条待举报记录")
for row in rows:
if not self.status:
break
self.ev.wait(1)
# 选个 cookie
report_cookie = random.choice(get_all_cookies())
report_site_cookie = GenCookie.run(report_cookie)
@ -74,7 +88,7 @@ class WapReporter(BaseReporter):
session.add(row)
session.commit()
time.sleep(5)
self.ev.wait(5)
def get_user_info(self):
try:
@ -82,9 +96,10 @@ class WapReporter(BaseReporter):
# wapUserAgent = random.choice(self.wapUserAgent)
response = self.request.get(
"https://ufosdk.baidu.com/api?m=Web&a=getUserInfo&appid=293852",
headers=self.headers, proxies=self.proxies, allow_redirects=False, timeout=10, verify=False
headers=self.headers, proxies=self.proxies, allow_redirects=False, timeout=10
)
json_data = response.json()
logger.debug(f"{self.engine_name} get_user_info response: {json_data}")
uid = json_data['result']['uid']
un = json_data['result']['un']
userinfo["uid"] = uid
@ -140,11 +155,14 @@ class WapReporter(BaseReporter):
proxies=self.proxies,
allow_redirects=False,
timeout=10,
verify=False
)
# logger.debug(req.json())
logger.debug(response.json())
if response.json()['errno'] == 0:
data = response.json()
logger.debug(data)
if data['errno'] == 0:
logger.success(f"[{self.engine_name}] {fb_url} 举报成功")
return True
if "请勿重复提交" in data["errmsg"]:
logger.success(f"[{self.engine_name}] {fb_url} 重复提交,标记为成功")
return True
return False

View File

@ -26,13 +26,16 @@ def update_updated_at(mapper, connection, target):
target.updated_at = get_timestamp()
# noinspection PyUnresolvedReferences
def connect_db(config: AppConfig):
"""连接数据库"""
# 导入所有模型,为了自动创建数据表
from .domain import DomainModel
from .report_urls import ReportUrlModel
dsn = f"mysql+pymysql://{config.database.user}:{config.database.password}@{config.database.host}:{config.database.port}/{config.database.database}"
engine = create_engine(dsn, echo=False)
engine = create_engine(dsn, echo=False, pool_size=4, max_overflow=10, pool_recycle=60, pool_pre_ping=True)
SQLModel.metadata.create_all(engine)
AppCtx.g_db_engine = engine

View File

@ -12,7 +12,7 @@ class DomainModel(BaseModel, table=True):
# 域名
domain: str = Field(alias="domain", default="", sa_type=VARCHAR(1024))
# 爬取状态,TODO先空着后续有任务控制之后用这个字段表示这个域名的任务状态
# 爬取状态,@see constants.DomainStatus
status: int = Field(alias="status", default=0)
# 爬取间隔默认间隔为1周

View File

@ -1,6 +1,8 @@
import hashlib
import random
from app.config.config import AppCtx
def md5(s: str) -> str:
m = hashlib.md5()
@ -9,17 +11,24 @@ def md5(s: str) -> str:
def get_proxies():
username = "t14131310374591"
password = "qg6xwmrq"
tunnel = "d432.kdltps.com:15818"
proxies = {
"http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel},
"https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel}
}
# username = "t14131310374591"
# password = "qg6xwmrq"
# tunnel = "d432.kdltps.com:15818"
# proxies = {
# "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel},
# "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel}
# }
# proxies = {
# "http": "http://127.0.0.1:8080",
# "https": "http://127.0.0.1:8080"
# }
proxy = AppCtx.g_app_config.chrome.proxy
proxies = {
"http": proxy,
"https": proxy
}
return proxies

View File

@ -2,10 +2,12 @@ import base64
import json
import requests
from loguru import logger
class YdmVerify(object):
_custom_url = "https://www.jfbym.com/api/YmServer/customApi"
_token = "HhUGwpI6AtQGoux36i1ZpsDv7hwGSbr1hQ0RX-HXSZE"
_token = "2HNCDBee_JFmXAZZanQm9I7x1sqQln9BggF1xaGtMX0"
_headers = {
'Content-Type': 'application/json'
}
@ -17,4 +19,11 @@ class YdmVerify(object):
"type": "90009"
}
resp = requests.post(self._custom_url, headers=self._headers, data=json.dumps(payload))
logger.debug(f"{resp.json()=}")
response_data = resp.json()
if response_data.get("code") == 10002:
logger.error(f'{response_data.get("msg")}')
return None
return resp.json()['data']['data']

0
app/web/__init__.py Normal file
View File

View File

View File

@ -0,0 +1,116 @@
from typing import Annotated
from fastapi import APIRouter, UploadFile, Form, Query
from app.constants.api_result import ApiCode
from app.constants.domain import DomainStatus
from app.web.request.domain_request import AddDomainRequest, DeleteDomainRequest, UpdateDomainRequest, \
GetDomainListRequest, CrawlNowRequest, ToggleDomainRequest
from app.web.results import ApiResult
from app.web.service.domain_service import DomainService
router = APIRouter(prefix="/api/domain", tags=["域名管理"])
@router.get("/v1/list")
def get_all_domains(request: Annotated[GetDomainListRequest, Query()]):
"""获取所有的域名信息,支持根据域名、状态进行搜索,不传则返回全部数据,支持分页"""
return DomainService.get_list(request.page, request.size, request.domain, request.status)
@router.post("/v1/add")
def add_domains(request: AddDomainRequest):
"""添加域名"""
# 检查是否有重复的
result = DomainService.get_by_domains(request.domains)
if not result.success:
return result
existed_domains = [item.domain for item in result.data]
new_domains = [x for x in request.domains if x not in existed_domains]
if not new_domains:
return ApiResult.ok(0)
# 添加并返回
return DomainService.add_domains(request.crawl_interval, request.crawl_now, new_domains)
@router.post("/v1/import")
def import_domains(
# 同时提交文件和参数的时候,没办法使用 FormModel 的形式,必须一个一个定义
file: UploadFile,
crawl_interval: int = Form(),
crawl_now: bool = Form(),
):
"""通过上传文件添加域名,如果单个文件很大,以后改成开新协程/线程处理"""
# 把文件内容读出来
domains = []
for line in file.file:
line = line.strip()
domains.append(line.decode("UTF-8"))
# 创建协程任务
# asyncio.create_task(DomainService.add_domains(crawl_interval, crawl_now, domains))
# 检查是否有重复域名
result = DomainService.get_by_domains(domains)
if not result.success:
return result
existed_domains = [item.domain for item in result.data]
new_domains = [x for x in domains if x not in existed_domains]
# 添加并返回
return DomainService.add_domains(crawl_interval, crawl_now, new_domains)
# noinspection DuplicatedCode
@router.post("/v1/update")
def update_domain(request: UpdateDomainRequest):
"""更新域名的数据,主要是采集间隔,支持批量修改,传入多个 id"""
# 检查待更新的域名是否存在
result = DomainService.get_by_ids(request.domain_ids)
if not result.success:
return result
existed_domain_ids = [item.id for item in result.data]
for domain_id in request.domain_ids:
if domain_id not in existed_domain_ids:
return ApiResult.error(ApiCode.PARAM_ERROR.value, f"域名 ID {domain_id} 不存在")
# 更新刷新时间
return DomainService.update_domain_interval(request.domain_ids, request.crawl_interval)
# noinspection DuplicatedCode
@router.post("/v1/delete")
def delete_domain(request: DeleteDomainRequest):
"""删除域名,支持批量删除,传入多个 id"""
# 检查待删除的域名是否存在
result = DomainService.get_by_ids(request.domain_ids)
if not result.success:
return result
existed_domain_ids = [item.id for item in result.data]
for domain_id in request.domain_ids:
if domain_id not in existed_domain_ids:
return ApiResult.error(ApiCode.PARAM_ERROR.value, f"域名 ID {domain_id} 不存在")
# 删除域名
return DomainService.delete_domains(request.domain_ids, request.remove_surl)
@router.post("/v1/crawl")
def crawl_now(request: CrawlNowRequest):
"""立即爬取,实际上是把 status 置为 2"""
result = DomainService.update_domain_status(request.domain_ids, DomainStatus.QUEUEING.value)
return result
@router.post("/v1/toggle")
def toggle_domain(request: ToggleDomainRequest):
"""暂停爬取某个域名"""
return DomainService.update_domain_status(request.domain_ids, DomainStatus.PAUSE.value)

View File

@ -0,0 +1,85 @@
from typing import Annotated
from fastapi import APIRouter, Query
from loguru import logger
from app.web.request.report_request import AddUrlsRequest, CollectEvidenceRequest, ReportRequest, GetUrlListRequest
from app.web.service.domain_service import DomainService
from app.web.service.report_service import ReportURLService
router = APIRouter(prefix="/api/urls", tags=["URL管理"])
@router.get("/v1/list")
def get_all_urls(request: Annotated[GetUrlListRequest, Query()]):
"""获取所有的URL支持根据域名、状态进行过滤不传则返回全部数据支持分页"""
logger.debug(f"{request=}")
return ReportURLService.get_list(
request.domain,
request.surl,
request.is_report_by_one,
request.is_report_by_site,
request.is_report_by_wap,
request.has_evidence,
request.page,
request.size
)
@router.post("/v1/add")
def add_urls(request: AddUrlsRequest):
"""
手动添加 URL 到域名中支持批量添加
格式 [
{"domain": "", "surl": ""}, {"domain": "", "surl": ""} ...
]
添加之前先检查 domain 有没有没有的话就去创建一个 domain
"""
# 把所有的域名列表解出来,看看有没有不存在的,如果有就新建一个域名
# 这里还需要获取域名的 id
input_domains = [item.domain for item in request.urls]
result = DomainService.get_by_domains(input_domains)
if not result.success:
return result
# 创建新域名
new_domains = [x for x in input_domains if x not in result.data]
if new_domains:
result = DomainService.add_domains(1440, True, new_domains)
if not result.success:
return result
# 再获取一遍域名模型
result = DomainService.get_by_domains(input_domains)
if not result.success:
return result
# 创建 URL
domain_map: dict[str, int] = {x.domain: x.id for x in result.data}
return ReportURLService.add_urls(domain_map, request.urls)
@router.post("/v1/evidence")
def collect_evidence(request: CollectEvidenceRequest):
"""
强制手动触发证据收集任务支持批量传入已经收集过的 URL 也要强制收集
TODO:本来应该需要使用任务队列的为了简单先把数据库的相关标记改为 0 也能达到一样的效果
又不是不能用 XD
"""
return ReportURLService.batch_update_evidence_flag(request.ids)
@router.post("/v1/report")
def report(request: ReportRequest):
"""举报指定的URL支持批量传入 id 批量举报
先通过改数据库然后等引擎自己调度实现
"""
logger.debug(f"{request=}")
return ReportURLService.batch_update_report_flag(
request.ids,
request.report_by_one,
request.report_by_site,
request.report_by_wap
)

View File

@ -0,0 +1,10 @@
from fastapi import APIRouter
router = APIRouter(tags=["健康检查"])
@router.get("/status")
async def status():
return {
"status": "ok"
}

View File

View File

@ -0,0 +1,48 @@
from pydantic import BaseModel, Field
class GetDomainListRequest(BaseModel):
"""获取域名列表"""
# 分页参数
page: int = Field(default=1, gt=0)
size: int = Field(default=50, gt=0)
# 过滤条件
domain: str = ""
status: int = 0
class AddDomainRequest(BaseModel):
"""添加域名到数据库的请求参数"""
crawl_interval: int
crawl_now: bool = True
domains: list[str]
class ImportDomainFormRequest(BaseModel):
"""通过文件导入的"""
crawl_interval: int
crawl_now: bool = True
class DeleteDomainRequest(BaseModel):
"""删除域名的请求"""
domain_ids: list[int]
remove_surl: bool = False
class UpdateDomainRequest(BaseModel):
"""更新域名的请求"""
domain_ids: list[int]
crawl_interval: int
class CrawlNowRequest(BaseModel):
"""立即爬取的请求"""
domain_ids: list[int]
class ToggleDomainRequest(BaseModel):
"""暂停某个域名的爬取"""
domain_ids: list[int]

View File

@ -0,0 +1,38 @@
from typing import Optional
from pydantic import BaseModel, Field
class GetUrlListRequest(BaseModel):
domain: str = ""
surl: str = ""
is_report_by_one: Optional[int] = 2
is_report_by_site: Optional[int] = 2
is_report_by_wap: Optional[int] = 2
has_evidence: Optional[int] = 2
page: int = Field(default=1, gt=0)
size: int = Field(default=50, gt=0)
class AddUrlItem(BaseModel):
domain: str
surl: str
class AddUrlsRequest(BaseModel):
"""手动添加URL的请求体"""
urls: list[AddUrlItem]
class CollectEvidenceRequest(BaseModel):
"""手动触发证据收集的请求体"""
ids: list[int]
class ReportRequest(BaseModel):
"""手动触发证据收集的请求体"""
ids: list[int]
report_by_one: bool
report_by_site: bool
report_by_wap: bool

24
app/web/results.py Normal file
View File

@ -0,0 +1,24 @@
from dataclasses import dataclass
from typing import Any, Generic
from typing_extensions import TypeVar
from app.constants.api_result import ApiCode
T = TypeVar("T")
@dataclass
class ApiResult(Generic[T]):
code: int
message: str
success: bool
data: T | None = None
@staticmethod
def ok(data: T | None = None) -> 'ApiResult[T]':
return ApiResult(code=ApiCode.OK.value, message="ok", success=True, data=data)
@staticmethod
def error(code: int, message: str) -> 'ApiResult[None]':
return ApiResult(code=code, message=message, success=False, data=None)

View File

View File

@ -0,0 +1,141 @@
import time
from typing import Iterable, Optional
from loguru import logger
from sqlalchemy import delete, func, update
from sqlmodel import Session, select
from app.config.config import AppCtx
from app.constants.api_result import ApiCode
from app.constants.domain import DomainStatus
from app.models.domain import DomainModel
from app.models.report_urls import ReportUrlModel
from app.web.results import ApiResult
class DomainService:
@classmethod
def get_list(cls, page: int, page_size: int, domain: str, status: int):
"""获取域名列表"""
with Session(AppCtx.g_db_engine) as session:
stmt = select(DomainModel)
stmt_total = select(func.count(DomainModel.id))
if domain:
stmt = stmt.where(DomainModel.domain.like(f"%{domain}%"))
stmt_total = stmt_total.where(DomainModel.domain.like(f"%{domain}%"))
if status:
stmt = stmt.where(DomainModel.status == status)
stmt_total = stmt_total.where(DomainModel.status == status)
# 设置分页
stmt = stmt.offset((page - 1) * page_size).limit(page_size)
try:
# 域名列表
rows = session.exec(stmt).all()
# 查询符合筛选条件的总数量
total = session.exec(stmt_total).first()
logger.debug(f"{total=}")
return ApiResult.ok({"total": total, "rows": rows})
except Exception as e:
session.rollback()
logger.exception(f"查询域名列表失败,错误:{e}")
return ApiResult.error(ApiCode.DB_ERROR.value, f"查询域名列表失败,错误:{e}")
@classmethod
def get_by_domains(cls, domains: list[str]) -> ApiResult[Optional[DomainModel]]:
"""根据域名查询"""
with Session(AppCtx.g_db_engine) as session:
stmt = select(DomainModel).where(DomainModel.domain.in_(domains))
try:
rows = session.exec(stmt).all()
return ApiResult.ok(rows)
except Exception as e:
session.rollback()
return ApiResult.error(ApiCode.DB_ERROR.value, f"查询域名失败,错误:{e}")
@classmethod
def get_by_ids(cls, domain_ids: list[int]) -> ApiResult[Optional[DomainModel]]:
"""根据id查询"""
with Session(AppCtx.g_db_engine) as session:
stmt = select(DomainModel).where(DomainModel.id.in_(domain_ids))
try:
rows = session.exec(stmt).all()
return ApiResult.ok(rows)
except Exception as e:
session.rollback()
return ApiResult.error(ApiCode.DB_ERROR.value, f"查询域名失败,错误:{e}")
@classmethod
def add_domains(cls, interval: int, crawl_now: bool, domains: Iterable[str]):
"""批量添加域名"""
with Session(AppCtx.g_db_engine) as session:
new_domains = [
DomainModel(
domain=x,
status=DomainStatus.READY.value,
crawl_interval=interval,
latest_crawl_time=0 if crawl_now else int(time.time())
) for x in domains
]
session.add_all(new_domains)
try:
session.commit()
return ApiResult.ok(len(new_domains))
except Exception as e:
logger.error(f"添加域名到数据库失败,错误:{e}")
session.rollback()
return ApiResult.error(ApiCode.DB_ERROR.value, f"添加域名失败,错误:{e}")
@classmethod
def delete_domains(cls, domain_ids: list[int], remove_surl: bool = False):
"""批量删除域名remove_surl 表示是否同时删除 report_url 中该域名相关的数据"""
with Session(AppCtx.g_db_engine) as session:
stmt = delete(DomainModel).where(DomainModel.id.in_(domain_ids))
try:
session.exec(stmt)
# 如果设置了 remove_surl 为 True则删除 report_url 中该域名相关的数据
if remove_surl:
stmt = delete(ReportUrlModel).where(ReportUrlModel.domain_id.in_(domain_ids))
session.exec(stmt)
session.commit()
return ApiResult.ok(len(domain_ids))
except Exception as e:
logger.error(f"删除域名失败,错误:{e}")
session.rollback()
return ApiResult.error(ApiCode.DB_ERROR.value, f"删除域名失败,错误:{e}")
@classmethod
def update_domain_interval(cls, domain_ids: list[int], interval: int) -> ApiResult[Optional[int]]:
"""批量更新域名的 interval 值"""
with Session(AppCtx.g_db_engine) as session:
stmt = update(DomainModel).where(DomainModel.id.in_(domain_ids)).values(crawl_interval=interval)
try:
session.exec(stmt)
session.commit()
return ApiResult.ok(len(domain_ids))
except Exception as e:
logger.error(f"更新域名 interval 失败,错误:{e}")
session.rollback()
return ApiResult.error(ApiCode.DB_ERROR.value, f"更新域名 interval 失败,错误:{e}")
@classmethod
def update_domain_status(cls, domain_ids: list[int], status: int) -> ApiResult[Optional[int]]:
"""批量更新域名的 status 值"""
with Session(AppCtx.g_db_engine) as session:
stmt = update(DomainModel).where(DomainModel.id.in_(domain_ids)).values(status=status)
try:
session.exec(stmt)
session.commit()
return ApiResult.ok(len(domain_ids))
except Exception as e:
logger.error(f"更新域名 status 失败,错误:{e}")
session.rollback()
return ApiResult.error(ApiCode.DB_ERROR.value, f"更新域名 status 失败,错误:{e}")

View File

@ -0,0 +1,119 @@
from typing import Optional
from loguru import logger
from sqlalchemy import update, func
from sqlmodel import Session, select
from app.config.config import AppCtx
from app.constants.api_result import ApiCode
from app.models.report_urls import ReportUrlModel
from app.web.request.report_request import AddUrlItem
from app.web.results import ApiResult
class ReportURLService:
@classmethod
def get_list(
cls, domain: str, surl: str, is_report_by_one: Optional[int], is_report_by_site: Optional[int],
is_report_by_wap: Optional[int], has_evidence: Optional[int], page: int, size: int
):
with Session(AppCtx.g_db_engine) as session:
stmt = select(ReportUrlModel)
total_stmt = select(func.count(ReportUrlModel.id))
if domain:
stmt = stmt.where(ReportUrlModel.domain.like(f"%{domain}%"))
total_stmt = total_stmt.where(ReportUrlModel.domain.like(f"%{domain}%"))
if surl:
stmt = stmt.where(ReportUrlModel.surl.like(f"%{surl}%"))
total_stmt = total_stmt.where(ReportUrlModel.surl.like(f"%{surl}%"))
if is_report_by_one and is_report_by_one != 2:
stmt = stmt.where(ReportUrlModel.is_report_by_one == is_report_by_one)
total_stmt = total_stmt.where(ReportUrlModel.is_report_by_one == is_report_by_one)
if is_report_by_site and is_report_by_site != 2:
stmt = stmt.where(ReportUrlModel.is_report_by_site == is_report_by_site)
total_stmt = total_stmt.where(ReportUrlModel.is_report_by_site == is_report_by_site)
if is_report_by_wap and is_report_by_wap != 2:
stmt = stmt.where(ReportUrlModel.is_report_by_wap == is_report_by_wap)
total_stmt = total_stmt.where(ReportUrlModel.is_report_by_wap == is_report_by_wap)
if has_evidence and has_evidence != 2:
stmt = stmt.where(ReportUrlModel.has_evidence == has_evidence)
total_stmt = total_stmt.where(ReportUrlModel.has_evidence == has_evidence)
# 设置分页
stmt = stmt.offset((page - 1) * size).limit(size)
# logger.debug(f"{str(stmt)=}")
try:
total = session.exec(total_stmt).first()
urls = session.exec(stmt).all()
return ApiResult.ok({
"total": total,
"data": urls,
})
except Exception as e:
logger.error(f"获取URL列表失败: {e}")
return ApiResult.error(ApiCode.DB_ERROR.value, str(e))
@classmethod
def add_urls(cls, domain_map: dict[str, int], urls: list[AddUrlItem]) -> ApiResult[Optional[int]]:
"""添加URL"""
if not urls:
return ApiResult.ok(0)
models = []
for url in urls:
domain_id = domain_map.get(url.domain, None)
if not domain_id:
return ApiResult.error(ApiCode.PARAM_ERROR.value, f"域名 {url.domain} 不存在")
models.append(ReportUrlModel(
domain_id=domain_id,
domain=url.domain,
surl=url.surl,
))
with Session(AppCtx.g_db_engine) as session:
try:
session.add_all(models)
session.commit()
return ApiResult.ok(len(models))
except Exception as e:
logger.error(f"添加URL失败: {e}")
session.rollback()
return ApiResult.error(ApiCode.DB_ERROR.value, str(e))
@classmethod
def batch_update_evidence_flag(cls, url_ids: list[int]):
"""批量更新URL的has_evidence字段"""
with Session(AppCtx.g_db_engine) as session:
try:
stmt = update(ReportUrlModel).where(ReportUrlModel.id.in_(url_ids)).values(has_evidence=False)
session.exec(stmt)
session.commit()
return ApiResult.ok(len(url_ids))
except Exception as e:
logger.error(f"批量更新URL的has_evidence字段失败: {e}")
session.rollback()
return ApiResult.error(ApiCode.DB_ERROR.value, str(e))
@classmethod
def batch_update_report_flag(cls, ids: list[int], report_by_one: bool, report_by_site: bool, report_by_wap: bool):
with Session(AppCtx.g_db_engine) as session:
try:
stmt = update(ReportUrlModel).where(ReportUrlModel.id.in_(ids))
if report_by_wap:
stmt = stmt.values(is_report_by_wap=False)
if report_by_site:
stmt = stmt.values(is_report_by_site=False)
if report_by_one:
stmt = stmt.values(is_report_by_one=False)
logger.debug(f"{str(stmt)=}")
session.exec(stmt)
session.commit()
return ApiResult.ok(len(ids))
except Exception as e:
logger.error(f"批量更新URL的has_evidence字段失败: {e}")
session.rollback()
return ApiResult.error(ApiCode.DB_ERROR.value, str(e))

43
app/web/web.py Normal file
View File

@ -0,0 +1,43 @@
import os
import uvicorn
from fastapi import FastAPI, HTTPException
from fastapi.responses import FileResponse, Response
from loguru import logger
from starlette.staticfiles import StaticFiles
from starlette.types import Scope
from .controller.domain import router as domain_router
from .controller.report import router as report_router
from .controller.status import router as status_router
class SPAStaticFiles(StaticFiles):
async def get_response(self, path: str, scope: Scope) -> Response:
# 如果是前端路由,直接返回 index.html否则直接访问的时候会404
if path in ("domain", "url"):
return FileResponse(os.path.join(self.directory, "index.html"))
return await super().get_response(path, scope)
class WebApp:
def __init__(self):
self.app = FastAPI()
@staticmethod
async def start():
app = FastAPI()
# 导入路由
app.include_router(status_router)
app.include_router(report_router)
app.include_router(domain_router)
# 挂载前端文件
app.mount("/", SPAStaticFiles(directory="fe/dist", html=True), name="static")
# TODO 先写死,后面从配置文件里取
cfg = uvicorn.Config(app, host="127.0.0.1", port=3000)
server = uvicorn.Server(cfg)
await server.serve()

View File

@ -15,5 +15,5 @@ database = "baidu_reporter"
# chrome 配置
[chrome]
proxy = "http://127.0.0.1:8080"
proxy = "http://127.0.0.1:7890"
browser_path = "C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe"

9
fe/.editorconfig Normal file
View File

@ -0,0 +1,9 @@
[*.{js,jsx,mjs,cjs,ts,tsx,mts,cts,vue,css,scss,sass,less,styl}]
charset = utf-8
indent_size = 2
indent_style = space
insert_final_newline = true
trim_trailing_whitespace = true
end_of_line = lf
max_line_length = 100

1
fe/.gitattributes vendored Normal file
View File

@ -0,0 +1 @@
* text=auto eol=lf

30
fe/.gitignore vendored Normal file
View File

@ -0,0 +1,30 @@
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
lerna-debug.log*
node_modules
.DS_Store
dist-ssr
coverage
*.local
/cypress/videos/
/cypress/screenshots/
# Editor directories and files
.vscode/*
!.vscode/extensions.json
.idea
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?
*.tsbuildinfo

6
fe/.prettierrc.json Normal file
View File

@ -0,0 +1,6 @@
{
"$schema": "https://json.schemastore.org/prettierrc",
"semi": false,
"singleQuote": true,
"printWidth": 100
}

8
fe/.vscode/extensions.json vendored Normal file
View File

@ -0,0 +1,8 @@
{
"recommendations": [
"Vue.volar",
"dbaeumer.vscode-eslint",
"EditorConfig.EditorConfig",
"esbenp.prettier-vscode"
]
}

39
fe/README.md Normal file
View File

@ -0,0 +1,39 @@
# fe
This template should help get you started developing with Vue 3 in Vite.
## Recommended IDE Setup
[VSCode](https://code.visualstudio.com/) + [Volar](https://marketplace.visualstudio.com/items?itemName=Vue.volar) (and disable Vetur).
## Type Support for `.vue` Imports in TS
TypeScript cannot handle type information for `.vue` imports by default, so we replace the `tsc` CLI with `vue-tsc` for type checking. In editors, we need [Volar](https://marketplace.visualstudio.com/items?itemName=Vue.volar) to make the TypeScript language service aware of `.vue` types.
## Customize configuration
See [Vite Configuration Reference](https://vite.dev/config/).
## Project Setup
```sh
pnpm install
```
### Compile and Hot-Reload for Development
```sh
pnpm dev
```
### Type-Check, Compile and Minify for Production
```sh
pnpm build
```
### Lint with [ESLint](https://eslint.org/)
```sh
pnpm lint
```

75
fe/auto-imports.d.ts vendored Normal file
View File

@ -0,0 +1,75 @@
/* eslint-disable */
/* prettier-ignore */
// @ts-nocheck
// noinspection JSUnusedGlobalSymbols
// Generated by unplugin-auto-import
// biome-ignore lint: disable
export {}
declare global {
const EffectScope: typeof import('vue')['EffectScope']
const computed: typeof import('vue')['computed']
const createApp: typeof import('vue')['createApp']
const customRef: typeof import('vue')['customRef']
const defineAsyncComponent: typeof import('vue')['defineAsyncComponent']
const defineComponent: typeof import('vue')['defineComponent']
const effectScope: typeof import('vue')['effectScope']
const getCurrentInstance: typeof import('vue')['getCurrentInstance']
const getCurrentScope: typeof import('vue')['getCurrentScope']
const h: typeof import('vue')['h']
const inject: typeof import('vue')['inject']
const isProxy: typeof import('vue')['isProxy']
const isReactive: typeof import('vue')['isReactive']
const isReadonly: typeof import('vue')['isReadonly']
const isRef: typeof import('vue')['isRef']
const markRaw: typeof import('vue')['markRaw']
const nextTick: typeof import('vue')['nextTick']
const onActivated: typeof import('vue')['onActivated']
const onBeforeMount: typeof import('vue')['onBeforeMount']
const onBeforeUnmount: typeof import('vue')['onBeforeUnmount']
const onBeforeUpdate: typeof import('vue')['onBeforeUpdate']
const onDeactivated: typeof import('vue')['onDeactivated']
const onErrorCaptured: typeof import('vue')['onErrorCaptured']
const onMounted: typeof import('vue')['onMounted']
const onRenderTracked: typeof import('vue')['onRenderTracked']
const onRenderTriggered: typeof import('vue')['onRenderTriggered']
const onScopeDispose: typeof import('vue')['onScopeDispose']
const onServerPrefetch: typeof import('vue')['onServerPrefetch']
const onUnmounted: typeof import('vue')['onUnmounted']
const onUpdated: typeof import('vue')['onUpdated']
const onWatcherCleanup: typeof import('vue')['onWatcherCleanup']
const provide: typeof import('vue')['provide']
const reactive: typeof import('vue')['reactive']
const readonly: typeof import('vue')['readonly']
const ref: typeof import('vue')['ref']
const resolveComponent: typeof import('vue')['resolveComponent']
const shallowReactive: typeof import('vue')['shallowReactive']
const shallowReadonly: typeof import('vue')['shallowReadonly']
const shallowRef: typeof import('vue')['shallowRef']
const toRaw: typeof import('vue')['toRaw']
const toRef: typeof import('vue')['toRef']
const toRefs: typeof import('vue')['toRefs']
const toValue: typeof import('vue')['toValue']
const triggerRef: typeof import('vue')['triggerRef']
const unref: typeof import('vue')['unref']
const useAttrs: typeof import('vue')['useAttrs']
const useCssModule: typeof import('vue')['useCssModule']
const useCssVars: typeof import('vue')['useCssVars']
const useDialog: typeof import('naive-ui')['useDialog']
const useId: typeof import('vue')['useId']
const useLoadingBar: typeof import('naive-ui')['useLoadingBar']
const useMessage: typeof import('naive-ui')['useMessage']
const useModel: typeof import('vue')['useModel']
const useNotification: typeof import('naive-ui')['useNotification']
const useSlots: typeof import('vue')['useSlots']
const useTemplateRef: typeof import('vue')['useTemplateRef']
const watch: typeof import('vue')['watch']
const watchEffect: typeof import('vue')['watchEffect']
const watchPostEffect: typeof import('vue')['watchPostEffect']
const watchSyncEffect: typeof import('vue')['watchSyncEffect']
}
// for type re-export
declare global {
// @ts-ignore
export type { Component, ComponentPublicInstance, ComputedRef, DirectiveBinding, ExtractDefaultPropTypes, ExtractPropTypes, ExtractPublicPropTypes, InjectionKey, PropType, Ref, MaybeRef, MaybeRefOrGetter, VNode, WritableComputedRef } from 'vue'
import('vue')
}

35
fe/components.d.ts vendored Normal file
View File

@ -0,0 +1,35 @@
/* eslint-disable */
// @ts-nocheck
// Generated by unplugin-vue-components
// Read more: https://github.com/vuejs/core/pull/3399
// biome-ignore lint: disable
export {}
/* prettier-ignore */
declare module 'vue' {
export interface GlobalComponents {
AddDomainDialog: typeof import('./src/components/AddDomainDialog.vue')['default']
EditDomainDialog: typeof import('./src/components/EditDomainDialog.vue')['default']
ImportDomainDialog: typeof import('./src/components/ImportDomainDialog.vue')['default']
NButton: typeof import('naive-ui')['NButton']
NCard: typeof import('naive-ui')['NCard']
NCheckbox: typeof import('naive-ui')['NCheckbox']
NDataTable: typeof import('naive-ui')['NDataTable']
NDialogProvider: typeof import('naive-ui')['NDialogProvider']
NDropdown: typeof import('naive-ui')['NDropdown']
NForm: typeof import('naive-ui')['NForm']
NFormItem: typeof import('naive-ui')['NFormItem']
NFormItemGi: typeof import('naive-ui')['NFormItemGi']
NGrid: typeof import('naive-ui')['NGrid']
NInput: typeof import('naive-ui')['NInput']
NInputNumber: typeof import('naive-ui')['NInputNumber']
NModal: typeof import('naive-ui')['NModal']
NPagination: typeof import('naive-ui')['NPagination']
NSelect: typeof import('naive-ui')['NSelect']
NSpace: typeof import('naive-ui')['NSpace']
NTag: typeof import('naive-ui')['NTag']
NTooltip: typeof import('naive-ui')['NTooltip']
RouterLink: typeof import('vue-router')['RouterLink']
RouterView: typeof import('vue-router')['RouterView']
}
}

483
fe/dist/assets/DomainManager-2SUOMVR8.js vendored Normal file

File diff suppressed because one or more lines are too long

1525
fe/dist/assets/FormItem-DHmVxm6n.js vendored Normal file

File diff suppressed because one or more lines are too long

Binary file not shown.

Binary file not shown.

Binary file not shown.

1
fe/dist/assets/UrlManager-DG8i4_QJ.js vendored Normal file

File diff suppressed because one or more lines are too long

1
fe/dist/assets/index-Ca0u0JBQ.css vendored Normal file

File diff suppressed because one or more lines are too long

1161
fe/dist/assets/index-DnpM0Ntg.js vendored Normal file

File diff suppressed because one or more lines are too long

BIN
fe/dist/favicon.ico vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.2 KiB

14
fe/dist/index.html vendored Normal file
View File

@ -0,0 +1,14 @@
<!DOCTYPE html>
<html lang="">
<head>
<meta charset="UTF-8">
<link rel="icon" href="/favicon.ico">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Baidu Reporter</title>
<script type="module" crossorigin src="/assets/index-DnpM0Ntg.js"></script>
<link rel="stylesheet" crossorigin href="/assets/index-Ca0u0JBQ.css">
</head>
<body>
<div id="app"></div>
</body>
</html>

1
fe/env.d.ts vendored Normal file
View File

@ -0,0 +1 @@
/// <reference types="vite/client" />

22
fe/eslint.config.ts Normal file
View File

@ -0,0 +1,22 @@
import { globalIgnores } from 'eslint/config'
import { defineConfigWithVueTs, vueTsConfigs } from '@vue/eslint-config-typescript'
import pluginVue from 'eslint-plugin-vue'
import skipFormatting from '@vue/eslint-config-prettier/skip-formatting'
// To allow more languages other than `ts` in `.vue` files, uncomment the following lines:
// import { configureVueProject } from '@vue/eslint-config-typescript'
// configureVueProject({ scriptLangs: ['ts', 'tsx'] })
// More info at https://github.com/vuejs/eslint-config-typescript/#advanced-setup
export default defineConfigWithVueTs(
{
name: 'app/files-to-lint',
files: ['**/*.{ts,mts,tsx,vue}'],
},
globalIgnores(['**/dist/**', '**/dist-ssr/**', '**/coverage/**']),
pluginVue.configs['flat/essential'],
vueTsConfigs.recommended,
skipFormatting,
)

13
fe/index.html Normal file
View File

@ -0,0 +1,13 @@
<!DOCTYPE html>
<html lang="">
<head>
<meta charset="UTF-8">
<link rel="icon" href="/favicon.ico">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Vite App</title>
</head>
<body>
<div id="app"></div>
<script type="module" src="/src/main.ts"></script>
</body>
</html>

46
fe/package.json Normal file
View File

@ -0,0 +1,46 @@
{
"name": "fe",
"version": "0.0.0",
"private": true,
"type": "module",
"scripts": {
"dev": "vite",
"build": "run-p type-check \"build-only {@}\" --",
"preview": "vite preview",
"build-only": "vite build",
"type-check": "vue-tsc --build",
"lint": "eslint . --fix",
"format": "prettier --write src/"
},
"dependencies": {
"@tailwindcss/vite": "^4.1.1",
"axios": "^1.8.4",
"pinia": "^3.0.1",
"tailwindcss": "^4.1.1",
"vue": "^3.5.13",
"vue-router": "^4.5.0"
},
"devDependencies": {
"@tsconfig/node22": "^22.0.1",
"@types/node": "^22.13.14",
"@vicons/ionicons5": "^0.13.0",
"@vitejs/plugin-vue": "^5.2.3",
"@vitejs/plugin-vue-jsx": "^4.1.2",
"@vue/eslint-config-prettier": "^10.2.0",
"@vue/eslint-config-typescript": "^14.5.0",
"@vue/tsconfig": "^0.7.0",
"eslint": "^9.22.0",
"eslint-plugin-vue": "~10.0.0",
"jiti": "^2.4.2",
"naive-ui": "^2.41.0",
"npm-run-all2": "^7.0.2",
"prettier": "3.5.3",
"typescript": "~5.8.0",
"unplugin-auto-import": "^19.1.2",
"unplugin-vue-components": "^28.4.1",
"vfonts": "^0.0.3",
"vite": "^6.2.4",
"vite-plugin-vue-devtools": "^7.7.2",
"vue-tsc": "^2.2.8"
}
}

4097
fe/pnpm-lock.yaml generated Normal file

File diff suppressed because it is too large Load Diff

BIN
fe/public/favicon.ico Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.2 KiB

72
fe/src/App.vue Normal file
View File

@ -0,0 +1,72 @@
<script setup lang="tsx">
import { RouterLink, RouterView } from 'vue-router'
import {
NLayout,
NMessageProvider,
NConfigProvider,
NLayoutHeader,
NLayoutSider,
NMenu,
} from 'naive-ui'
import { type MenuOption } from 'naive-ui'
import { List, ColorWand } from '@vicons/ionicons5'
// TODO
const handleUpdateValue = (v: string) => {
console.log('handleUpdateValue: v')
}
//
const menuOpts: MenuOption[] = [
{
label: () => <RouterLink to={{ name: 'domain-manager' }}>域名管理</RouterLink>,
key: 'rule-manager',
icon: () => <List />,
},
{
label: () => <RouterLink to={{ name: 'url-manager' }}>URL 管理</RouterLink>,
key: 'rule-sniff',
icon: () => <ColorWand />,
},
]
</script>
<template>
<n-config-provider>
<n-dialog-provider>
<n-message-provider>
<n-layout class="h-screen">
<!-- header -->
<n-layout-header
class="h-16 p-5"
style="background-color: oklch(62.3% 0.214 259.815); color: white"
bordered
>
<span class="font-bold text-xl">BAIDU Reporter</span>
</n-layout-header>
<n-layout position="absolute" has-sider style="top: 64px">
<!-- sidebar -->
<n-layout-sider
width="8%"
show-trigger
show-collapsed-content
:collapsed-width="64"
content-style="padding: 8px; text-align:center;"
:native-scrollbar="false"
bordered
collapse-mode="width"
>
<n-menu :indent="24" :options="menuOpts" @update:value="handleUpdateValue" />
</n-layout-sider>
<!-- content -->
<n-layout content-style="padding: 16px;" :native-scrollbar="false">
<router-view />
</n-layout>
</n-layout>
</n-layout>
</n-message-provider>
</n-dialog-provider>
</n-config-provider>
</template>

View File

@ -0,0 +1,141 @@
<script setup lang="ts">
import { ref, inject } from 'vue'
import {
NModal,
NForm,
NFormItem,
NInputNumber,
NCheckbox,
NInput,
NButton,
NButtonGroup,
useMessage,
} from 'naive-ui'
import type { FormRules } from 'naive-ui'
import type { AxiosInstance } from 'axios'
const model = defineModel<boolean>('show', { required: true })
const emit = defineEmits(['success'])
const axios = inject('axios') as AxiosInstance
const message = useMessage()
const interval = ref<number>(1440) // 11440
const startImmediately = ref(true)
const domains = ref('')
const formRef = ref<InstanceType<typeof NForm> | null>(null)
const rules: FormRules = {
interval: [
{
required: true,
type: 'number',
message: '请输入采集间隔',
trigger: ['blur', 'change'],
},
{
type: 'number',
min: 1,
message: '采集间隔必须大于0',
trigger: ['blur', 'change'],
},
],
domains: [
{
required: true,
message: '请输入域名',
trigger: ['blur', 'change'],
},
{
validator: (_, value: string) => {
if (!value.trim()) return true // required
const domainList = value
.split(/[\n,]/)
.map((d) => d.trim())
.filter((d) => d)
if (domainList.length === 0) return false
return true
},
message: '域名格式不正确',
trigger: ['blur', 'change'],
},
],
}
const handleConfirm = async () => {
try {
await formRef.value?.validate()
} catch (errors) {
return
}
try {
//
const domainList = domains.value
.split(/[\n,]/) //
.map((domain) => domain.trim()) //
.filter((domain) => domain) //
const response = await axios.post('/api/domain/v1/add', {
domains: domainList,
crawl_interval: interval.value,
crawl_now: startImmediately.value,
})
if (response.data.code === 20000) {
message.success('添加成功')
emit('success')
handleClose()
} else {
message.error(`添加失败:${response.data.message}`)
}
} catch (error) {
console.error('添加失败', error)
message.error(`添加失败:${error}`)
}
}
const handleClose = () => {
//
interval.value = 1440
startImmediately.value = true
domains.value = ''
formRef.value?.restoreValidation()
model.value = false
}
</script>
<template>
<n-modal
v-model:show="model"
preset="card"
title="手动添加"
:mask-closable="false"
style="width: 600px"
>
<n-form size="small" ref="formRef" :model="{ interval, domains }" :rules="rules">
<n-form-item path="interval" label="采集间隔(分钟)">
<n-input-number v-model:value="interval" :min="1" />
</n-form-item>
<n-form-item path="domains" label="域名列表">
<n-input
v-model:value="domains"
type="textarea"
:rows="10"
placeholder="请输入域名,支持换行或英文逗号分隔"
/>
</n-form-item>
<n-form-item label="采集选项">
<n-checkbox v-model:checked="startImmediately"> 立即开始采集 </n-checkbox>
</n-form-item>
</n-form>
<template #action>
<n-button-group size="small">
<n-button type="primary" @click="handleConfirm">确认</n-button>
<n-button @click="handleClose">关闭</n-button>
</n-button-group>
</template>
</n-modal>
</template>

View File

@ -0,0 +1,115 @@
<script setup lang="ts">
import { ref, defineProps, defineEmits, inject, defineModel, computed, watch } from 'vue'
import { useMessage } from 'naive-ui'
import type { AxiosInstance } from 'axios'
import type { DataTableRowKey } from 'naive-ui'
const show = defineModel<boolean>('show')
const props = defineProps<{
// ID
domainIds: DataTableRowKey[] | null
}>()
const emit = defineEmits<{
(e: 'success'): void
(e: 'close'): void
}>()
const axios = inject('axios') as AxiosInstance
const message = useMessage()
//
const crawlInterval = ref<number | null>(null)
const loading = ref(false)
//
const dialogTitle = computed(() => {
const count = props.domainIds?.length || 0
if (count > 1) {
return `批量修改 ${count} 个域名的采集间隔`
} else if (count === 1) {
return '修改域名采集间隔'
}
return '修改采集间隔' //
})
// crawlInterval
watch(show, (newShow) => {
if (newShow) {
crawlInterval.value = null //
}
})
const handleSubmit = async () => {
if (crawlInterval.value === null || crawlInterval.value < 1) {
message.error('请输入有效的采集间隔大于等于1的整数')
return
}
if (!props.domainIds || props.domainIds.length === 0) {
message.error('没有指定要修改的域名')
return
}
try {
loading.value = true
const response = (
await axios.post('/api/domain/v1/update', {
domain_ids: props.domainIds, // 使 ID
crawl_interval: crawlInterval.value,
})
).data
if (response.code !== 20000) {
message.error(`更新失败:${response.message}`)
return
}
message.success('更新成功')
emit('success')
show.value = false //
} catch (error) {
console.error('更新失败', error)
message.error(`更新失败:${error}`)
} finally {
loading.value = false
}
}
const handleClose = () => {
show.value = false
emit('close') // close
}
</script>
<template>
<n-modal
v-model:show="show"
preset="dialog"
:title="dialogTitle"
:loading="loading"
@close="handleClose"
>
<!-- 批量编辑提示 -->
<div v-if="(domainIds?.length || 0) > 1" class="mb-4 text-orange-500">
你正在批量修改 {{ domainIds?.length }} 个域名的采集间隔
</div>
<n-form>
<n-form-item label="采集间隔(分钟)" required>
<n-input-number
v-model:value="crawlInterval"
:min="1"
:step="1"
style="width: 100%"
placeholder="请输入采集间隔"
/>
</n-form-item>
</n-form>
<template #action>
<n-button @click="handleClose">取消</n-button>
<n-button type="primary" @click="handleSubmit" :loading="loading">确定</n-button>
</template>
</n-modal>
</template>

View File

@ -0,0 +1,144 @@
<script setup lang="ts">
import { ref, inject } from 'vue'
import {
NModal,
NForm,
NFormItem,
NInputNumber,
NCheckbox,
NUpload,
NButton,
NButtonGroup,
useMessage,
} from 'naive-ui'
import type { FormRules } from 'naive-ui'
import type { AxiosInstance } from 'axios'
import type { UploadFileInfo } from 'naive-ui'
const model = defineModel<boolean>('show', { required: true })
const emit = defineEmits(['success'])
const axios = inject('axios') as AxiosInstance
const message = useMessage()
const interval = ref(1440) // 11440
const startImmediately = ref(true)
const fileList = ref<UploadFileInfo[]>([])
const formRef = ref<InstanceType<typeof NForm> | null>(null)
const rules: FormRules = {
interval: [
{
type: 'number',
required: true,
message: '请输入采集间隔',
trigger: ['blur', 'change'],
},
{
type: 'number',
min: 1,
message: '采集间隔必须大于0',
trigger: ['blur', 'change'],
},
],
fileList: [
{
type: 'array',
required: true,
message: '请选择文件',
trigger: ['change'],
},
{
validator: (_, value: UploadFileInfo[]) => {
if (!value || value.length === 0) return false
return !!value[0].file
},
message: '文件无效',
trigger: ['change'],
},
],
}
const handleConfirm = async () => {
try {
await formRef.value?.validate()
} catch (errors) {
return
}
try {
const formData = new FormData()
const file = fileList.value[0].file
if (!file) {
message.error('文件无效')
return
}
formData.append('file', file)
formData.append('crawl_interval', interval.value.toString())
formData.append('crawl_now', startImmediately.value.toString())
const response = await axios.post('/api/domain/v1/import', formData, {
headers: {
'Content-Type': 'multipart/form-data',
},
})
if (response.data.code === 20000) {
message.success('导入成功')
emit('success')
handleClose()
} else {
message.error(`导入失败:${response.data.message}`)
}
} catch (error) {
console.error('导入失败', error)
message.error(`导入失败:${error}`)
}
}
const handleClose = () => {
//
interval.value = 1440
startImmediately.value = true
fileList.value = []
formRef.value?.restoreValidation()
model.value = false
}
</script>
<template>
<n-modal
v-model:show="model"
preset="card"
title="通过文件导入"
:mask-closable="false"
style="width: 600px"
>
<n-form
size="small"
ref="formRef"
:model="{ interval, fileList }"
:rules="rules"
label-placement="left"
label-width="200"
>
<n-form-item path="interval" label="采集间隔(分钟)">
<n-input-number v-model:value="interval" :min="1" />
</n-form-item>
<n-form-item path="fileList" label="选择文件">
<n-upload v-model:file-list="fileList" :max="1" accept=".txt,.csv">
<n-button>选择文件</n-button>
</n-upload>
</n-form-item>
<n-form-item label="采集选项">
<n-checkbox v-model:checked="startImmediately"> 立即开始采集 </n-checkbox>
</n-form-item>
</n-form>
<template #action>
<n-button-group size="small">
<n-button type="primary" @click="handleConfirm">确认</n-button>
<n-button @click="handleClose">关闭</n-button>
</n-button-group>
</template>
</n-modal>
</template>

1
fe/src/main.css Normal file
View File

@ -0,0 +1 @@
@import 'tailwindcss';

26
fe/src/main.ts Normal file
View File

@ -0,0 +1,26 @@
import './main.css'
import { createApp } from 'vue'
import { createPinia } from 'pinia'
import App from './App.vue'
import router from './router'
import axios from 'axios'
import 'vfonts/Lato.css'
import 'vfonts/IBMPlexMono.css'
const app = createApp(App)
app.use(createPinia())
app.use(router)
const axiosInstance = axios.create({
withCredentials: true,
timeout: 9000,
timeoutErrorMessage: 'E_NETWORK_TIMEOUT',
})
app.provide('axios', axiosInstance)
app.mount('#app')

23
fe/src/router/index.ts Normal file
View File

@ -0,0 +1,23 @@
import { createRouter, createWebHistory } from 'vue-router'
const router = createRouter({
history: createWebHistory(import.meta.env.BASE_URL),
routes: [
{
path: '/',
redirect: '/domain',
},
{
path: '/domain',
name: 'domain-manager',
component: () => import('../views/DomainManager.vue'),
},
{
path: '/url',
name: 'url-manager',
component: () => import('../views/UrlManager.vue'),
},
],
})
export default router

12
fe/src/stores/counter.ts Normal file
View File

@ -0,0 +1,12 @@
import { ref, computed } from 'vue'
import { defineStore } from 'pinia'
export const useCounterStore = defineStore('counter', () => {
const count = ref(0)
const doubleCount = computed(() => count.value * 2)
function increment() {
count.value++
}
return { count, doubleCount, increment }
})

5
fe/src/utils/common.ts Normal file
View File

@ -0,0 +1,5 @@
const convertTimestampToDate = (timestamp: number) => {
return new Date(timestamp * 1000).toLocaleString()
}
export { convertTimestampToDate }

View File

@ -0,0 +1,430 @@
<script setup lang="tsx">
import { onMounted, inject, ref, computed } from 'vue'
import { useRoute, useRouter } from 'vue-router'
import type { AxiosInstance } from 'axios'
import { type DataTableColumn, type DataTableRowKey, useMessage, useDialog } from 'naive-ui'
import ImportDomainDialog from '../components/ImportDomainDialog.vue'
import AddDomainDialog from '../components/AddDomainDialog.vue'
import EditDomainDialog from '../components/EditDomainDialog.vue'
import { convertTimestampToDate } from '@/utils/common'
const axios = inject('axios') as AxiosInstance
const message = useMessage()
const dialog = useDialog()
const route = useRoute()
const router = useRouter()
//
const filterForm = ref({
domain: '',
status: null as number | null,
})
//
const statusOptions = [
{ label: '全部', value: null },
{ label: 'READY', value: 1 },
{ label: 'QUEUE', value: 2 },
{ label: 'CRAWLING', value: 3 },
{ label: 'PAUSE', value: 999 },
]
const showImportDialog = ref(false)
const showAddDialog = ref(false)
const showEditDialog = ref(false)
// ID
const editingDomainIds = ref<DataTableRowKey[] | null>(null)
// Key
const checkedRowKeys = ref<DataTableRowKey[]>([])
//
const pagination = ref({
page: 1,
pageSize: 50,
itemCount: 0,
showSizePicker: true,
pageSizes: [10, 20, 50, 100, 200, 500, 1000],
onChange: (page: number) => {
pagination.value.page = page
updateUrlParams()
getDomainList()
},
onUpdatePageSize: (pageSize: number) => {
pagination.value.pageSize = pageSize
pagination.value.page = 1
updateUrlParams()
getDomainList()
},
})
// URL
const updateUrlParams = () => {
router.push({
query: {
page: pagination.value.page,
size: pagination.value.pageSize,
domain: filterForm.value.domain || undefined,
status: filterForm.value.status || undefined,
},
})
}
//
const initPagination = () => {
const page = Number(route.query.page) || 1
const size = Number(route.query.size) || 50
const domain = route.query.domain as string || ''
const status = route.query.status ? Number(route.query.status) : null
pagination.value.page = page
pagination.value.pageSize = size
filterForm.value.domain = domain
filterForm.value.status = status
}
const columns: Array<DataTableColumn> = [
{
type: 'selection',
},
{
title: '#',
key: 'id',
},
{
title: '域名',
key: 'domain',
},
{
title: '状态',
key: 'status',
render: (row) => {
let statusText = ''
let statusType = ''
switch (row.status) {
case 1:
statusText = 'READY'
statusType = 'success'
break
case 2:
statusText = 'QUENE'
statusType = 'warning'
break
case 3:
statusText = 'CRAWLING'
statusType = 'info'
break
case 999:
statusText = 'PAUSE'
statusType = 'error'
break
default:
statusText = 'UNKNOWN'
statusType = 'error'
}
return <n-tag type={statusType}>{statusText}</n-tag>
},
},
{
title: '采集间隔 (分钟)',
key: 'crawl_interval',
render: (row) => (
<n-tooltip>
{{
trigger: () => <span>{row.crawl_interval}</span>,
default: () => `${(row.crawl_interval as number) / 60 / 24}`,
}}
</n-tooltip>
),
},
{
title: '最近采集时间',
key: 'latest_crawl_time',
render: (row) => convertTimestampToDate(row.latest_crawl_time as number),
},
{
title: '操作',
key: 'action',
render: (row) => (
<div class="flex gap-2">
<n-button size="small" type="primary" onClick={() => handleEdit(row)}>
编辑
</n-button>
<n-button size="small" type="info" onClick={() => handleSingleCrawl(row)}>
立即采集
</n-button>
<n-button size="small" type="error" onClick={() => handleDelete(row)}>
删除
</n-button>
</div>
),
},
]
const domains = ref([])
//
const hasSelectedRows = computed(() => checkedRowKeys.value.length > 0)
//
const handleCheck = (rowKeys: DataTableRowKey[]) => {
checkedRowKeys.value = rowKeys
}
/** 批量删除域名 */
const handleBatchDelete = () => {
if (!hasSelectedRows.value) {
message.warning('请至少选择一个域名')
return
}
const removeSurl = ref(false)
dialog.warning({
title: '确认批量删除',
content: () => (
<div>
<div class="mb-2">确定要删除选中的 {checkedRowKeys.value.length} 个域名吗</div>
<n-checkbox v-model:checked={removeSurl.value}>同时删除所有关联的 SURL</n-checkbox>
</div>
),
positiveText: '确定',
negativeText: '取消',
onPositiveClick: async () => {
try {
const response = (
await axios.post('/api/domain/v1/delete', {
domain_ids: checkedRowKeys.value,
remove_surl: removeSurl.value,
})
).data
if (response.code !== 20000) {
message.error(`批量删除域名失败,错误:${response.message}`)
return
}
message.success('批量删除成功')
checkedRowKeys.value = [] //
getDomainList()
} catch (error) {
console.error('批量删除域名失败', error)
message.error(`批量删除域名失败,错误:${error}`)
}
},
})
}
/** 批量修改采集间隔 */
const handleBatchEdit = () => {
if (!hasSelectedRows.value) {
message.warning('请至少选择一个域名')
return
}
editingDomainIds.value = [...checkedRowKeys.value] // ID ID
showEditDialog.value = true
}
/** 批量立即采集 */
const handleBatchCrawl = async () => {
if (!hasSelectedRows.value) {
message.warning('请至少选择一个域名')
return
}
try {
// TODO: /api/domain/v1/crawl
const response = (
await axios.post('/api/domain/v1/crawl', {
domain_ids: checkedRowKeys.value,
})
).data
if (response.code !== 20000) {
message.error(`批量触发采集失败:${response.message}`)
return
}
message.success('批量触发采集成功,已加入队列')
checkedRowKeys.value = [] //
getDomainList() //
} catch (error) {
console.error('批量触发采集失败', error)
message.error(`批量触发采集失败:${error}`)
}
}
/** 单个立即采集 */
const handleSingleCrawl = async (row: any) => {
try {
// ID
const response = (
await axios.post('/api/domain/v1/crawl', {
domain_ids: [row.id],
})
).data
if (response.code !== 20000) {
message.error(`触发采集失败:${response.message}`)
return
}
message.success(`域名 ${row.domain} 已加入采集队列`)
getDomainList() //
} catch (error) {
console.error('触发采集失败', error)
message.error(`触发采集失败:${error}`)
}
}
/** 删除域名 */
const handleDelete = async (row: any) => {
const removeSurl = ref(false)
dialog.warning({
title: '确认删除',
content: () => (
<div>
<div class="mb-2">确定要删除域名 {row.domain} </div>
<n-checkbox v-model:checked={removeSurl.value}>同时删除所有关联的 SURL</n-checkbox>
</div>
),
positiveText: '确定',
negativeText: '取消',
onPositiveClick: async () => {
try {
const response = (
await axios.post('/api/domain/v1/delete', {
domain_ids: [row.id],
remove_surl: removeSurl.value,
})
).data
if (response.code !== 20000) {
message.error(`删除域名失败,错误:${response.message}`)
return
}
message.success('删除成功')
// checkedRowKeys
const index = checkedRowKeys.value.findIndex((key) => key === row.id)
if (index > -1) {
checkedRowKeys.value.splice(index, 1)
}
getDomainList()
} catch (error) {
console.error('删除域名失败', error)
message.error(`删除域名失败,错误:${error}`)
}
},
})
}
/** 获取域名列表 */
const getDomainList = async () => {
try {
const response = (
await axios.get('/api/domain/v1/list', {
params: {
page: pagination.value.page,
size: pagination.value.pageSize,
domain: filterForm.value.domain || undefined,
status: filterForm.value.status || undefined,
},
})
).data
if (response.code !== 20000) {
message.error(`获取域名列表失败,错误:${response.message}`)
return
}
domains.value = response.data.rows
pagination.value.itemCount = response.data.total
} catch (error) {
console.error('获取域名列表失败', error)
message.error(`获取域名列表失败,错误:${error}`)
}
}
const handleImportSuccess = () => {
getDomainList()
}
const handleAddSuccess = () => {
getDomainList()
}
/** 编辑域名 */
const handleEdit = (row: any) => {
editingDomainIds.value = [row.id] // ID ID
showEditDialog.value = true
}
const handleEditSuccess = () => {
getDomainList()
const editedCount = editingDomainIds.value?.length || 0
editingDomainIds.value = null // ID
//
if (editedCount > 1) {
checkedRowKeys.value = []
}
}
//
const resetFilter = () => {
filterForm.value = {
domain: '',
status: null,
}
pagination.value.page = 1
updateUrlParams()
getDomainList()
}
//
const applyFilter = () => {
pagination.value.page = 1
updateUrlParams()
getDomainList()
}
onMounted(async () => {
initPagination()
await getDomainList()
})
</script>
<template>
<div class="text-2xl pb-4">域名管理</div>
<div class="flex gap-2 mb-4">
<n-button type="primary" @click="showImportDialog = true">通过文件导入</n-button>
<n-button type="primary" @click="showAddDialog = true">手动添加</n-button>
<n-button type="error" @click="handleBatchDelete" :disabled="!hasSelectedRows">批量删除</n-button>
<n-button type="warning" @click="handleBatchEdit" :disabled="!hasSelectedRows">修改间隔</n-button>
<n-button type="info" @click="handleBatchCrawl" :disabled="!hasSelectedRows">立即采集</n-button>
</div>
<!-- 筛选表单 -->
<n-form inline :model="filterForm" class="mb-4 p-4 bg-gray-50 rounded-lg">
<n-form-item label="域名" path="domain">
<n-input v-model:value="filterForm.domain" placeholder="请输入域名" clearable @keydown.enter="applyFilter" />
</n-form-item>
<n-form-item label="状态" path="status">
<n-select v-model:value="filterForm.status" :options="statusOptions" placeholder="请选择状态" style="width: 200px" />
</n-form-item>
<n-form-item>
<n-button type="primary" @click="applyFilter">筛选</n-button>
<n-button class="ml-2" @click="resetFilter">重置</n-button>
</n-form-item>
</n-form>
<n-data-table :columns="columns" :data="domains" :row-key="(row: any) => row.id" :checked-row-keys="checkedRowKeys"
@update:checked-row-keys="handleCheck" size="small" />
<div class="flex justify-center mt-4">
<n-pagination v-model:page="pagination.page" :page-size="pagination.pageSize" :item-count="pagination.itemCount"
:show-size-picker="pagination.showSizePicker" :page-sizes="pagination.pageSizes"
@update:page-size="pagination.onUpdatePageSize" @update:page="pagination.onChange" />
</div>
<ImportDomainDialog v-model:show="showImportDialog" @success="handleImportSuccess" />
<AddDomainDialog v-model:show="showAddDialog" @success="handleAddSuccess" />
<EditDomainDialog v-model:show="showEditDialog" :domain-ids="editingDomainIds" @success="handleEditSuccess"
@close="editingDomainIds = null" />
</template>

379
fe/src/views/UrlManager.vue Normal file
View File

@ -0,0 +1,379 @@
<script setup lang="tsx">
import { onMounted, inject, ref, computed } from 'vue'
import { useRoute, useRouter } from 'vue-router'
import type { AxiosInstance } from 'axios'
import { type DataTableColumn, type DataTableRowKey, useMessage, useDialog, NDropdown } from 'naive-ui'
import { convertTimestampToDate } from '@/utils/common'
const axios = inject('axios') as AxiosInstance
const message = useMessage()
const dialog = useDialog()
const route = useRoute()
const router = useRouter()
//
const searchForm = ref({
domain: '',
surl: '',
is_report_by_one: 2,
is_report_by_site: 2,
is_report_by_wap: 2,
has_evidence: 2,
})
//
const options = [
{ label: '全部', value: 2 },
{ label: '是', value: 1 },
{ label: '否', value: 0 },
]
//
const pagination = ref({
page: 1,
pageSize: 50,
itemCount: 0,
showSizePicker: true,
pageSizes: [10, 20, 50, 100, 200, 500, 1000],
onChange: (page: number) => {
pagination.value.page = page
updateUrlParams()
getUrlList()
},
onUpdatePageSize: (pageSize: number) => {
pagination.value.pageSize = pageSize
pagination.value.page = 1
updateUrlParams()
getUrlList()
},
})
// URL
const updateUrlParams = () => {
router.push({
query: {
page: pagination.value.page,
size: pagination.value.pageSize,
},
})
}
//
const initPagination = () => {
const page = Number(route.query.page) || 1
const size = Number(route.query.size) || 50
pagination.value.page = page
pagination.value.pageSize = size
}
// Key
const checkedRowKeys = ref<DataTableRowKey[]>([])
//
const reportOptions = [
{
label: '全部渠道',
key: 'all',
},
{
label: 'PC渠道',
key: 'pc',
},
{
label: 'SITE渠道',
key: 'site',
},
{
label: 'WAP渠道',
key: 'wap',
},
]
const columns: Array<DataTableColumn> = [
{
type: 'selection',
},
{
title: '#',
key: 'id',
minWidth: 60,
},
{
title: '域名',
key: 'domain',
minWidth: 200,
},
{
title: 'SURL',
key: 'surl',
minWidth: 100,
},
{
title: 'Q',
key: 'q',
minWidth: 100,
},
{
title: 'Token',
key: 'token',
minWidth: 100,
},
{
title: '已通过PC举报',
key: 'is_report_by_one',
render: (row) => (
<n-tag type={row.is_report_by_one ? 'success' : 'default'}>
{row.is_report_by_one ? '是' : '否'}
</n-tag>
),
},
{
title: '已通过site举报',
key: 'is_report_by_site',
render: (row) => (
<n-tag type={row.is_report_by_site ? 'success' : 'default'}>
{row.is_report_by_site ? '是' : '否'}
</n-tag>
),
},
{
title: '已通过WAP举报',
key: 'is_report_by_wap',
render: (row) => (
<n-tag type={row.is_report_by_wap ? 'success' : 'default'}>
{row.is_report_by_wap ? '是' : '否'}
</n-tag>
),
},
{
title: '已收集证据',
key: 'has_evidence',
render: (row) => (
<n-tag type={row.has_evidence ? 'success' : 'default'}>
{row.has_evidence ? '是' : '否'}
</n-tag>
),
},
{
title: '操作',
key: 'action',
render: (row) => {
return (
<div class="flex gap-2">
<n-dropdown
trigger="click"
options={reportOptions}
onSelect={(key: string) => handleSingleReport(row, key)}
>
<n-button size="small" type="primary">举报</n-button>
</n-dropdown>
<n-button
size="small"
type="info"
onClick={() => handleSingleCollectEvidence(row)}
>
收集证据
</n-button>
</div>
)
},
},
]
const urls = ref([])
// URL
const getUrlList = async () => {
try {
const response = (
await axios.get('/api/urls/v1/list', {
params: {
...searchForm.value,
page: pagination.value.page,
size: pagination.value.pageSize,
},
})
).data
if (response.code !== 20000) {
message.error(`获取URL列表失败${response.message}`)
return
}
urls.value = response.data.data
console.log('response.data.total:', response.data.total)
pagination.value.itemCount = response.data.total
} catch (error) {
console.error('获取URL列表失败', error)
message.error(`获取URL列表失败${error}`)
}
}
//
const handleSearch = () => {
pagination.value.page = 1
getUrlList()
}
//
const handleReset = () => {
searchForm.value = {
domain: '',
surl: '',
is_report_by_one: 2,
is_report_by_site: 2,
is_report_by_wap: 2,
has_evidence: 2,
}
handleSearch()
}
//
const handleReport = async (ids: number[], option: string) => {
//
const selectedUrls = urls.value.filter((url: any) => ids.includes(url.id))
const hasNoEvidence = selectedUrls.some((url: any) => !url.has_evidence)
if (hasNoEvidence) {
message.warning('请先收集证据后再进行举报')
return
}
try {
const response = (await axios.post('/api/urls/v1/report', {
ids,
report_by_one: option === 'all' || option === 'pc',
report_by_site: option === 'all' || option === 'site',
report_by_wap: option === 'all' || option === 'wap',
})).data
if (response.code !== 20000) {
message.error(`举报失败:${response.message}`)
return
}
message.success('操作成功已修改SURL状态等待引擎调度')
getUrlList()
} catch (error) {
console.error('举报失败', error)
message.error(`举报失败:${error}`)
}
}
//
const handleCollectEvidence = async (ids: number[]) => {
try {
const response = (await axios.post('/api/urls/v1/evidence', {
ids,
})).data
if (response.code !== 20000) {
message.error(`收集证据失败:${response.message}`)
return
}
message.success('操作成功已修改SURL状态等待引擎调度')
getUrlList()
} catch (error) {
console.error('收集证据失败', error)
message.error(`收集证据失败:${error}`)
}
}
// URL
const handleSingleReport = (row: any, option: string) => {
handleReport([row.id], option)
}
// URL
const handleSingleCollectEvidence = (row: any) => {
handleCollectEvidence([row.id])
}
//
const handleBatchReport = (option: string) => {
if (checkedRowKeys.value.length === 0) {
message.warning('请至少选择一个URL')
return
}
handleReport(checkedRowKeys.value as number[], option)
}
//
const handleBatchCollectEvidence = () => {
if (checkedRowKeys.value.length === 0) {
message.warning('请至少选择一个URL')
return
}
handleCollectEvidence(checkedRowKeys.value as number[])
}
//
const hasSelectedRows = computed(() => checkedRowKeys.value.length > 0)
onMounted(() => {
initPagination()
getUrlList()
})
</script>
<template>
<div class="p-4">
<h1 class="text-2xl mb-4">URL管理</h1>
<!-- 搜索表单 -->
<n-card class="mb-4">
<n-form :model="searchForm" label-placement="left" label-width="auto" require-mark-placement="right-hanging">
<n-grid :cols="24" :x-gap="24">
<n-form-item-gi :span="8" label="域名">
<n-input v-model:value="searchForm.domain" placeholder="请输入域名" @keyup.enter="handleSearch" />
</n-form-item-gi>
<n-form-item-gi :span="8" label="SURL">
<n-input v-model:value="searchForm.surl" placeholder="请输入SURL" @keyup.enter="handleSearch" />
</n-form-item-gi>
<n-form-item-gi :span="8" label="是否已通过PC举报">
<n-select v-model:value="searchForm.is_report_by_one" :options="options" placeholder="请选择" />
</n-form-item-gi>
<n-form-item-gi :span="8" label="是否已通过site举报">
<n-select v-model:value="searchForm.is_report_by_site" :options="options" placeholder="请选择" />
</n-form-item-gi>
<n-form-item-gi :span="8" label="是否已通过WAP举报">
<n-select v-model:value="searchForm.is_report_by_wap" :options="options" placeholder="请选择" />
</n-form-item-gi>
<n-form-item-gi :span="8" label="是否已收集证据">
<n-select v-model:value="searchForm.has_evidence" :options="options" placeholder="请选择" />
</n-form-item-gi>
</n-grid>
<div class="flex justify-end gap-2 mt-4">
<n-button @click="handleReset">重置</n-button>
<n-button type="primary" @click="handleSearch">搜索</n-button>
</div>
</n-form>
</n-card>
<!-- 数据表格 -->
<n-card>
<div class="mb-4" v-if="hasSelectedRows">
<n-space>
<n-dropdown trigger="click" :options="reportOptions" @select="handleBatchReport">
<n-button type="primary">批量举报 ({{ checkedRowKeys.length }})</n-button>
</n-dropdown>
<n-button type="info" @click="handleBatchCollectEvidence">
批量收集证据 ({{ checkedRowKeys.length }})
</n-button>
</n-space>
</div>
<n-data-table :columns="columns" :data="urls" :bordered="false" :row-key="(row: any) => row.id"
:checked-row-keys="checkedRowKeys" @update:checked-row-keys="checkedRowKeys = $event" />
<div class="flex justify-center mt-4">
<n-pagination v-model:page="pagination.page" :item-count="pagination.itemCount" :page-size="pagination.pageSize"
:show-size-picker="pagination.showSizePicker" :page-sizes="pagination.pageSizes"
:on-update:page-size="pagination.onUpdatePageSize" :on-change="pagination.onChange" />
</div>
</n-card>
</div>
</template>
<style scoped></style>

12
fe/tsconfig.app.json Normal file
View File

@ -0,0 +1,12 @@
{
"extends": "@vue/tsconfig/tsconfig.dom.json",
"include": ["env.d.ts", "src/**/*", "src/**/*.vue"],
"exclude": ["src/**/__tests__/*"],
"compilerOptions": {
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo",
"paths": {
"@/*": ["./src/*"]
}
}
}

11
fe/tsconfig.json Normal file
View File

@ -0,0 +1,11 @@
{
"files": [],
"references": [
{
"path": "./tsconfig.node.json"
},
{
"path": "./tsconfig.app.json"
}
]
}

19
fe/tsconfig.node.json Normal file
View File

@ -0,0 +1,19 @@
{
"extends": "@tsconfig/node22/tsconfig.json",
"include": [
"vite.config.*",
"vitest.config.*",
"cypress.config.*",
"nightwatch.conf.*",
"playwright.config.*",
"eslint.config.*"
],
"compilerOptions": {
"noEmit": true,
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo",
"module": "ESNext",
"moduleResolution": "Bundler",
"types": ["node"]
}
}

50
fe/vite.config.ts Normal file
View File

@ -0,0 +1,50 @@
import {fileURLToPath, URL} from 'node:url'
import {defineConfig} from 'vite'
import vue from '@vitejs/plugin-vue'
import vueJsx from '@vitejs/plugin-vue-jsx'
import vueDevTools from 'vite-plugin-vue-devtools'
import tailwindcss from '@tailwindcss/vite'
import AutoImport from 'unplugin-auto-import/vite'
import Components from 'unplugin-vue-components/vite'
import {NaiveUiResolver} from 'unplugin-vue-components/resolvers'
// https://vite.dev/config/
export default defineConfig({
plugins: [
tailwindcss(),
vue(),
vueJsx(),
vueDevTools(),
AutoImport({
imports: [
'vue',
{
'naive-ui': [
'useDialog',
'useMessage',
'useNotification',
'useLoadingBar'
]
}
]
}),
Components({
resolvers: [NaiveUiResolver()]
})
],
resolve: {
alias: {
'@': fileURLToPath(new URL('./src', import.meta.url))
},
},
server: {
proxy: {
'/api': {
target: 'http://localhost:3000',
changeOrigin: true,
}
}
}
})

View File

@ -34,13 +34,17 @@ tab = browser.new_tab(f"https://www.baidu.com/s?wd={keyword}")
# week_btn_el = tab.ele('t:li@@text()= 一月内 ')
# week_btn_el.click(by_js=True)
# tab.wait(2)
print(f"{tab.url=}")
print("2222")
tab.get("https://www.163.com/")
print(f"{tab.url=}")
# tab.ele(".content_none")
# tab.wait.eles_loaded(["#container", ".content_none", "#content_left"], any_one=True)
print("未找到相关结果" in tab.html)
print("1111")
# if "未找到相关结果" in tab.html:
# print("未找到相关结果")
# else:

272
tests/test_dp3.py Normal file
View File

@ -0,0 +1,272 @@
import random
import re
import sys
import threading
import time
from enum import verify
from pathlib import Path
import execjs
import requests
from DrissionPage import Chromium, ChromiumOptions
from loguru import logger
from app.utils.common import get_all_cookies
from app.utils.ydm_verify import YdmVerify
chrome_opts = ChromiumOptions()
chrome_opts.mute(True) # 静音
chrome_opts.no_imgs(False)
chrome_opts.set_argument("--disable-gpu")
chrome_opts.set_argument('--ignore-certificate-errors')
chrome_opts.set_argument("--proxy-server=http://127.0.0.1:7890")
# chrome_opts.incognito(True)
chrome_opts.set_browser_path(r"C:\Program Files\Google\Chrome\Application\chrome.exe")
chrome_opts.auto_port()
browser = Chromium(addr_or_opts=chrome_opts)
# tab = browser.new_tab()
# tab.listen.start([
# "passport.baidu.com/cap/init",
# "passport.baidu.com/cap/style",
# ])
# tab.get("https://wappass.baidu.com/static/captcha/tuxing_v2.html?&logid=10332554090053311096&ak=c27bbc89afca0463650ac9bde68ebe06&backurl=https%3A%2F%2Fwww.baidu.com%2Fs%3Fwd%3Dsite%253Altxbbs.com%26pn%3D50%26oq%3Dsite%253Altxbbs.com%26ct%3D2097152%26ie%3Dutf-8%26si%3Dltxbbs.com%26fenlei%3D256%26rsv_idx%3D1%26rsv_pq%3D99cae74f0003cd72%26rsv_t%3Dab2dk%252Fq4PohUCmoLbyMlEMrGJszk983ojkNLk%252FUiZGJ4ZLpwvZ46PtQUufk%26gpc%3Dstf%253D1741437499%252C1744115898%257Cstftype%253D1%26tfflag%3D1%26topic_pn%3D%26rsv_page%3D1&ext=x9G9QDmMXq%2FNo87gjGO0P1dyBXu4PagAZrreQL6%2Bticsr0rrDszYO2sAbAnT1vLIUgqUK9LXd1cIlztrhMwiv3XfcB99Y5gyF0c0ETsDFDls5CsGNJQRLPawcntn2ndVLHHLl46IaoOp8l%2FC1xtOHwMQi85PCzAojcSf2wQ76KRxVau99LtSYCIfwtv7By0w&signature=f2fbb1b81926e247835f69195661a06b&timestamp=1744115910")
# for pkg in tab.listen.steps():
# print(f"{pkg.url=}")
# print(f"{pkg.response.raw_body=}")
# current_path = Path(__file__).resolve()
# print(current_path)
# current_dir = current_path.parent.parent
# print(current_dir)
# js_path = current_dir.joinpath("./js/mkd_v2_link_submit.js")
# print(js_path.exists())
# with open("./js/mkd_v2_link_submit.js", "r", encoding="utf-8") as f:
# ds_js = f.read()
#
proxy_str = "http://127.0.0.1:7890"
headers = {
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-US;q=0.7,zh-TW;q=0.6',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
# 'Origin': 'https://jubao.baidu.com',
'Pragma': 'no-cache',
'Referer': "https://wappass.baidu.com/",
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 Edg/135.0.0.0",
'X-Requested-With': 'XMLHttpRequest',
'sec-ch-ua_wap': '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
'sec-ch-ua_wap-mobile': '?0',
'sec-ch-ua_wap-platform': '"Windows"',
"Cookie": "BDUSS=ldlSDMwdkg5VmlrbE5TZFdHUHVhWEFCTVNqcGtKZHhXeTNaTHFGZHY4Y3F5LVJiQVFBQUFBJCQAAAAAAAAAAAEAAADj3ycY1tC5zNXywO4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACo-vVsqPr1bS; PHPSESSID=f364o6o7tpsag92pd67630p870; lastIdentity=PassUserIdentity; BAIDUID=5C7396A6BE9E28B769E6E9815A1B8D5E:FG=1; BAIDUID_BFESS=5C7396A6BE9E28B769E6E9815A1B8D5E:FG=1; BDUSS_BFESS=ldlSDMwdkg5VmlrbE5TZFdHUHVhWEFCTVNqcGtKZHhXeTNaTHFGZHY4Y3F5LVJiQVFBQUFBJCQAAAAAAAAAAAEAAADj3ycY1tC5zNXywO4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACo-vVsqPr1bS",
}
tab = browser.new_tab()
tab.listen.start(r"/cap/(init|style|log)", is_regex=True)
tab.get("https://www.baidu.com")
captcha_data = {}
def listener():
for pkg in tab.listen.steps():
if "/cap/init" in pkg.url:
captcha_data["init"] = pkg.response.body
if "/cap/style" in pkg.url:
captcha_data["style"] = pkg.response.body
captcha_data["referer"] = pkg.request.headers.get("Referer")
logger.debug(f"正确的 referer: {captcha_data["referer"]}")
captcha_data["cookie"] = pkg.request.headers.get("Cookie")
logger.debug(f"cookie: {captcha_data['cookie']}")
if "/cap/log" in pkg.url:
captcha_data["log"] = pkg.response.body
thread = threading.Thread(target=listener, daemon=True)
thread.start()
def verify_captcha(current_url: str):
headers["Referer"] = captcha_data["referer"]
headers["Cookie"] = captcha_data["cookie"]
# 解出AS / TK
as_value = captcha_data["init"]["data"]["as"]
tk_value = captcha_data["init"]["data"]["tk"]
# logger.debug(f"{as_value=}, {tk_value=}")
# ts = time.time()
# ts1 = int(ts)
# ts2 = int(ts * 1000)
# response = requests.post(
# "https://passport.baidu.com/cap/init",
# data={
# "_": ts2,
# "refer": re.sub(r'timestamp=\d+', f'timestamp={ts1}', captcha_data["referer"]),
# "ak": "c27bbc89afca0463650ac9bde68ebe06",
# "ver": "2",
# "scene": "",
# "ds": "",
# "tk": "",
# "as": "",
# "reinit": 0
# },
# headers=headers,
# proxies={
# "http": proxy_str, "https": proxy_str
# }
# ).json()
# as_value = response["data"]["as"]
# tk_value = response["data"]["tk"]
logger.debug(f"{as_value=}, {tk_value=}")
# 解出 style
backstr = captcha_data["style"]["data"]["backstr"]
captcha_link = captcha_data["style"]["data"]["captchalist"][0]["source"]["back"]["path"]
# response = requests.post(
# "https://passport.baidu.com/cap/style",
# data={
# "_": int(time.time() * 1000),
# "refer": re.sub(r'timestamp=\d+', f'timestamp={ts1}', captcha_data["referer"]),
# "ak": "c27bbc89afca0463650ac9bde68ebe06",
# "tk": tk_value,
# "scene": "",
# "isios": "0",
# "type": "spin",
# "ver": "2"
# },
# headers=headers,
# proxies={
# "http": proxy_str, "https": proxy_str
# }
# )
# logger.debug(f"{response.content=}")
# response = response.json()
# backstr = response["data"]["backstr"]
# captcha_link = response["data"]["captchalist"][0]["source"]["back"]["path"]
logger.debug(f"{backstr=}, {captcha_link=}")
# 下载验证码图片
image_response = requests.get(captcha_link, headers=headers)
with open("captcha.png", "wb") as f:
f.write(image_response.content)
logger.debug("download captcha.png")
# 识别验证码
ydm = YdmVerify()
with open("captcha.png", "rb") as fp:
picture = fp.read()
slide_distance = ydm.rotate(picture)
logger.debug(f"{slide_distance=}")
if not slide_distance:
logger.error("识别验证码失败")
return None
rotate_angle_rate = round(slide_distance / 360, 2)
logger.debug(f"{rotate_angle_rate=}")
if not rotate_angle_rate:
logger.debug("识别验证码失败")
return None
# 发送验证码请求
time_log = str(int(time.time() * 1000))
with open("./js/mkd_v2_link_submit.js", 'r', encoding='utf-8') as f:
ds_js = f.read()
fs = execjs.compile(ds_js).call('getFs2', backstr, rotate_angle_rate, as_value)
data = {
"_": time_log,
"refer": captcha_data["referer"],
# "refer": "https://aigc.baidu.com/works",
# "ak": self.get_ak(),
"ak": "c27bbc89afca0463650ac9bde68ebe06", # c27bbc89afca0463650ac9bde68ebe06
# "ak": "76AKmP4xDQjB3vAIPef3KxOlJZWCpw64", # c27bbc89afca0463650ac9bde68ebe06
"as": as_value,
"scene": "",
"tk": tk_value,
"ver": "2",
"cv": "submit",
"typeid": "spin-0",
# fuid 短时间不会变, 指纹, 不同浏览器不一样
# "Edge": "FOCoIC3q5fKa8fgJnwzbE67EJ49BGJeplOzf+4l4EOvDuu2RXBRv6R3A1AZMa49I27C0gDDLrJyxcIIeAeEhD8JYsoLTpBiaCXhLqvzbzmvy3SeAW17tKgNq/Xx+RgOdb8TWCFe62MVrDTY6lMf2GrfqL8c87KLF2qFER3obJGnCfjdYr2J6wEsox+bQtrTEGEimjy3MrXEpSuItnI4KDyOhCNLIvGcJ9TrqWJqhR97vnz96e18U/ntNdoDIMLzy/6P9rOWdIYWmTQAeeG69438PcpN++VzDmPtrURexo5YYWpVFkRs9k5n0AC3djzGRuXr1+yVZXtGGofFzxBmdr9HtaANtMMPysO2XXACLNUNkLLWJN9fLc3OAWce48tpeQQ2ufd7knx4Oo6OM0PpOVDwQcezbGX85VEmymh7f7M5kIyVl+w5yn2AY4BmLrEWEsyw9SzzW8eHQ5zYIUjiw9hXi7OMMoCL+ptOvZYbHZs0R5qLHmmDCW1M8MMX5yyJF0BV1dQvKslKnAJwZu4XCbsXKn3UXosU1U30/poiN2VeXkBPeo8+Xj/4BIoC2I7WZ6zkFa/Uwd5SvC91kvff2a/Z4OwyTQNM7ES9HmRhChdWg0SJ2xEs1aiXAit16RiTlf82esJH+X/j52G7R3ErwQeJT3QoDv64R2702+8NbGIjf1ZOfxhUCpmJqV4jeHSaHRmnKgJZsK91XhhrdJKXdsbt3phIOpxGLupULr2K+v1DNdId8/HuE0776+tTpUl7shVCeM/XWrdkhru42pifhiujnDhIblsLt8grnj5/GRqcD6ZPAXqJW3lLc0/ub9jXgvXK/EczRgKl+7/tTBkPTCrUVtajA0luHLQOrVsXuN1v0/PR3i09SuFzZJkJBKE3M6rYvPttK9NQiBxhxYWDhX82uQu2XK8+8oU3gxCIaJwsQmX/It0kaZ45PZHFqtD40uOX0sXuThvUin4N4RSI2G9d7jPkj5hbBFquQKM4S+tDJ34jmplOTrqqKT7PPVfrdgd4OkK13pEy86BsJ8M0gKXgtivUgM8Bjl1m/pkg0SuDyntWLdrmMxcZYvgySvSSwQ2Qtm8EkKHIMyR/XgfHnpX5vadGpRMro2qaE8u+x8w1gJHIRKib2u6Q1JtQiZE1Rde/vRx8xKfg6uYR37n0BvfgJE5+KbeuwCyAvJRGUA2fpt0VClIfV0m2PRG7bvH00OODKY6cFi7NgWAK6Jc1G4Ugkfp7W8I0ZYwNpTTxVoxIIBF37aBhyiPWPAOeYXBqA",
# Chrome: "FOCoIC3q5fKa8fgJnwzbE67EJ49BGJeplOzf+4l4EOvDuu2RXBRv6R3A1AZMa49IZbsw3/U3NYEqD0LjhKzgMn8fIES5OyXlgwN5I+F8wHowpWWfXkQJw8/9AsO5Q2VOvnc2JlHGIlGS8Vq2z4OA80lVLon08EG3PPxkVZGm39fDi2exK9NDrZB+tNLX6ISxE5PzBgXpCOJ6oP9F1B0OBWaCMD/m01n8FhdDNCvP8EO5cetU79+pgL+ECRdtN6V4VElGJE0mxV4+4Zq4Jf/Xe/q8CkoTNf7Ti1glGYmN32UM9dg0uX+VzET/mmTRe4Dt+MuVHSzsI/bKCjPbpaOqfM8UsxDJUG9hyrGZ8QHa1kC04aTxkkTxI275dv3+ijS1zkWOdjFiy1eD/0R8HcRWYp2smk9EmXBkIAHL4H0gC9lQtdjey37/kyl4JA9Fp4zjuVO0arsD8MrGy1divU++B1KdawGqXpnbOcHZ3CctNGrpgmswaScc6DNWb34jFj0X3tdRE0uuHuqiYa5BClFS2V0TCorKi4CobgR419xWaX8IKLJiaNNLOShWdZdlQO2DXXVxcinzKHqUvWTYx45jsiUVlY78AHQGol6CJLQQ8Q797MShlazvdSwPXgJP5z0uMJp9L+3x/Y2GGhW5sit55sFuMXafALTYf69FCUw5+nVIRs150a4+KK+tA0Eu7Itiu3dM2pflKYWwPE6SDZznyejQ08vd+HpXRB/zhfSUcIYlT5gFEiMIA6SXZCo/XT7vC8D3gHdN+yr46XdVol/WkjFQof0JQH/Vhjj5C1xcAyNxq/VVBT01vdKk6zo6c08e84FEVMLd0m3XWtjFOYu7wRI7lldw2pSxyGnWvA4aiYWcWvvKNJtqB8wHqc5RPr9KRzhbxJnTM5K1vTx4xT/1ZUR3pU7nQKZo/4kP9XycIr/Jg3XMRSnqCBUJlagKAFPt2HF0LdsSk4WWcldb97Ar584nVGbSjPXEUVH0VgbUEm+dADzPoLP+NPMYOyhwgfADiqWaXyKT4UNESYXsPBkdGk6mLCaNSEQsDN1G2677Se3qjzDcyXBnEmHEFptRbmyJzKJ73veHPqfFYtsHO9jH0XnhYk8zKdRuqQ7dnuNIDwxm3UCPo22uFI0ZcgPvQm01s+8jYiMEFJDVra9jWyWTdMpMuhT3p2yYLf70CvUwIkw=",
# fuid.length = 1280, length 235 变化
# FOCoIC3q5fKa8fgJnwzbE67EJ49BGJeplOzf+4l4EOvDuu2RXBRv6R3A1AZMa49I27C0gDDLrJyxcIIeAeEhD8JYsoLTpBiaCXhLqvzbzmvy3SeAW17tKgNq/Xx+RgOdb8TWCFe62MVrDTY6lMf2GrfqL8c87KLF2qFER3obJGnCfjdYr2J6wEsox+bQtrTEGEimjy3MrXEpSuItnI4KDyOhCNLIvGcJ9TrqWJqhR97
"fuid": "FOCoIC3q5fKa8fgJnwzbE67EJ49BGJeplOzf+4l4EOvDuu2RXBRv6R3A1AZMa49I27C0gDDLrJyxcIIeAeEhD8JYsoLTpBiaCXhLqvzbzmvy3SeAW17tKgNq/Xx+RgOdb8TWCFe62MVrDTY6lMf2GrfqL8c87KLF2qFER3obJGnfaJTn/Ne60I9LwR04t6XmGEimjy3MrXEpSuItnI4KD0FJKzTbw1AN69fBnzR2FuvMmmQZ+1zgJ72wdcVU+mcQxiE2ir0+TEYgjPJt1Qa3K1mLi+P4IWJeag2lvxB4yJ/GgLbz7OSojK1zRbqBESR5Pdk2R9IA3lxxOVzA+Iw1TWLSgWjlFVG9Xmh1+20oPSbrzvDjYtVPmZ+9/6evcXmhcO1Y58MgLozKnaQIaLfWRPAn9I0uOqAMff6fuUeWcH1mRYoTw2Nhr4J4agZi377iM/izL6cVCGRy2F8c0VpEvM5FjnYxYstXg/9EfB3EVmKAfzNRIeToJ5YV9twMcgdmlV1Uhhp5FAe6gNJIUptp7EMAaXYKm11G+JVPszQFdp9AJLcm4YSsYUXkaPI2Tl66J246cmjWQDTahAOINR5rXR5r/7VVI1RMZ8gb40q7az7vCK56XLooKT5a+rsFrf5Zu0yyCiiagElhrTEOtNdBJJq8eHwEHuFBni9ahSwpC7lbKkUwaKH69tf0DFV7hJROiLETSFloIVkHdy3+I2JUr1LsplAz0hMkWt/tE4tXVUV7QcTDTZWS/2mCoS/GV3N9awQ6iM6hs/BWjlgnEa1+5iP7WSc7RJ34FaE5PsyGXyoCWdXwNRGSZPSvVtB/Ea6w5FKazXcZ/j40FJv+iLGBn3nkkgHlne61I8I7KhtQgIkmBMJIjPMkS/L051MeqdGScsKYTJuSucgI5c3+79eVH+y2TvbOTuuHv1uGxwXFb2atIU1ZYPbmmXculmizKcKI/s44qf8uM8iBZLGkKeVyL74aPyLkg7Gk359g98BIGN/ZzJR/h+Y6AyFx+HlMoYJnS06dVmqFbvlCtSdGylKQ5f8eWtxPkJGqOFtWjIVteQYMsH/AaSJonqw+WLiZvGjYfm9p0alEyujapoTy77HzDcUoU1wUSXa5xS/Z6hXEr2OnLi0LdPVcGjz8lpLcdVeSfm9p0alEyujapoTy77HzDWf5PERRSTFqLd9BTUHLyY4Ji3EQLGQPaM1aeHxG1bJZH0s1Si/KwzTaTYzu6ziQiqwcr2kaYUiH+fMOxn69/BhNJVMhpQkhprc1KZuJRvXjppq0gKweencPxgS/jd0rjw==",
"fs": fs
}
# logger.info(data)
response = requests.post(
"https://passport.baidu.com/cap/log",
headers=headers,
data=data,
proxies={"http": proxy_str, "https": proxy_str},
).json()
try:
result = {
"ds": response["data"]["ds"],
"op": response["data"]["op"],
"tk": response["data"]["tk"]
}
except KeyError:
logger.error(f"验证码没转成功, response: {response=}")
time.sleep(1)
return None
logger.debug(f"{result=}")
# 检查验证码是否正确
if result["op"] != 1:
logger.error(f"op != 1, 重试")
return None
# 发送验证码请求 /cap/c 请求获取待跳转的URL
response = requests.post(
"https://passport.baidu.com/cap/c?ak=c27bbc89afca0463650ac9bde68ebe06",
headers=headers,
json={
"tk": result["tk"],
"ds": result["ds"],
"qrsign": "",
"refer": captcha_data["referer"]
},
proxies={"http": proxy_str, "https": proxy_str},
)
data = response.json()
if data["data"].get("f"):
logger.error(f"验证码失败: {data['data'].get('f')}")
return None
if data["data"].get("s"):
logger.debug("验证成功URL" + data["data"].get("s").get("url"))
url = data["data"].get("s").get("url")
url = url.encode("utf-8").decode("unicode-escape")
logger.success("解码后的URL" + url)
return url
current_page = 1
while current_page < 15:
tab.get(f"https://www.baidu.com/s?wd=site%3Abaidu.com&pn={(current_page - 1) * 10}")
current_page += 1
if "wappass.baidu.com/static/captcha/tuxing_v2.html" in tab.url:
logger.debug("captcha!!!!")
time.sleep(2)
idx = 0
while idx < 3:
idx += 1
url = verify_captcha(tab.url)
if not url:
tab.refresh()
time.sleep(3)
else:
tab.get(url)
time.sleep(30)
logger.debug(f"{captcha_data=}")
# browser.quit()

8
tests/test_unicode.py Normal file
View File

@ -0,0 +1,8 @@
import certifi
print(certifi.where())
url = r"https://www.baidu.com/s?wd=site%3Abaidu.com\u0026pn=10\u0026p_tk=30610C1sd8U0U%2BPJYAWv8nhtnx0emHFxWZ9edG%2BaRz9YAiXcODGGnlpuX%2FIMRoUmFESarFc5H8HQuG2nq8%2FVXRIsPZt%2BoxjJAmxxHNGCVs0oz%2FZSTZsdUlvw5a53dshtXQASLvZg71Bg4ZT6j%2B5a%2B%2FM3CHWuHs8cjlMBRCAX4l%2BZt8k%3D\u0026p_timestamp=1744202399\u0026p_sign=a1ee13c92f54d14d019cbdd8edcb4088\u0026p_signature=737f76b967318af4b309d30784d440c5\u0026__pc2ps_ab=30610C1sd8U0U%2BPJYAWv8nhtnx0emHFxWZ9edG%2BaRz9YAiXcODGGnlpuX%2FIMRoUmFESarFc5H8HQuG2nq8%2FVXRIsPZt%2BoxjJAmxxHNGCVs0oz%2FZSTZsdUlvw5a53dshtXQASLvZg71Bg4ZT6j%2B5a%2B%2FM3CHWuHs8cjlMBRCAX4l%2BZt8k%3D|1744202399|737f76b967318af4b309d30784d440c5|a1ee13c92f54d14d019cbdd8edcb4088"
url = url.encode("utf-8").decode("unicode-escape")
print(url)