Compare commits
17 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 6a6f3461e2 | |||
| dd0d2ac684 | |||
| c373f896b4 | |||
| 19607428f8 | |||
| b7f545dab8 | |||
| d7240571b8 | |||
| b8e3d84c63 | |||
| 27129a80a3 | |||
| 65de166821 | |||
| dd77a24b48 | |||
| d064fdc1f9 | |||
| e4287c6605 | |||
| e7fa38cf33 | |||
| bb2e09c885 | |||
| 56ea878c29 | |||
| 552a09ee41 | |||
| 9ff7c18743 |
2
.gitignore
vendored
2
.gitignore
vendored
@ -50,7 +50,7 @@ __pycache__/
|
|||||||
# Distribution / packaging
|
# Distribution / packaging
|
||||||
build/
|
build/
|
||||||
develop-eggs/
|
develop-eggs/
|
||||||
dist/
|
|
||||||
downloads/
|
downloads/
|
||||||
eggs/
|
eggs/
|
||||||
.eggs/
|
.eggs/
|
||||||
|
|||||||
14
README.md
14
README.md
@ -2,6 +2,20 @@
|
|||||||
|
|
||||||
## 使用方式
|
## 使用方式
|
||||||
|
|
||||||
|
```shell
|
||||||
|
# WEB 模式,只启动 web 控制台,不启动任何引擎
|
||||||
|
python main.py --web
|
||||||
|
# 等价于
|
||||||
|
python main.py --web-only
|
||||||
|
|
||||||
|
# 启动 web 的时候启动引擎
|
||||||
|
python main.py --web --crawl --evidence --report wap,pc,site
|
||||||
|
|
||||||
|
# 这几个选项可以任意组合,例如只启动采集引擎和证据收集引擎
|
||||||
|
python main.py --web --crawl --evidence
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
# 采集模式,采集指定关键字的URL列表,直接存入数据库
|
# 采集模式,采集指定关键字的URL列表,直接存入数据库
|
||||||
python main.py --crawl www.yunzhiju.net
|
python main.py --crawl www.yunzhiju.net
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
from .app import *
|
from .app import MainApp
|
||||||
72
app/app.py
72
app/app.py
@ -1,7 +1,9 @@
|
|||||||
import argparse
|
import argparse
|
||||||
|
import asyncio
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
|
import signal
|
||||||
|
|
||||||
from app.engines.report_engine import Reporter
|
from app.engines.report_engine import Reporter
|
||||||
|
|
||||||
@ -14,6 +16,8 @@ from .models.base import connect_db, create_database
|
|||||||
from loguru import logger
|
from loguru import logger
|
||||||
import sqlalchemy.exc
|
import sqlalchemy.exc
|
||||||
|
|
||||||
|
from .web.web import WebApp
|
||||||
|
|
||||||
|
|
||||||
class MainApp:
|
class MainApp:
|
||||||
"""主应用"""
|
"""主应用"""
|
||||||
@ -23,6 +27,11 @@ class MainApp:
|
|||||||
self.config: AppConfig = None
|
self.config: AppConfig = None
|
||||||
self.db_engine = None
|
self.db_engine = None
|
||||||
|
|
||||||
|
# 所有的engine
|
||||||
|
self.crawl_engine = None
|
||||||
|
self.evidence_engine = None
|
||||||
|
self.report_engine = None
|
||||||
|
|
||||||
def parse_args(self):
|
def parse_args(self):
|
||||||
"""解析命令行参数"""
|
"""解析命令行参数"""
|
||||||
parser = argparse.ArgumentParser(description="Baidu Reporter")
|
parser = argparse.ArgumentParser(description="Baidu Reporter")
|
||||||
@ -36,7 +45,10 @@ class MainApp:
|
|||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--crawl", help="采集模式,根据域名批量采集 SURL,多个域名可使用英文逗号分割,也可通过 --crawl-file 传入文件",
|
"--crawl",
|
||||||
|
nargs="?",
|
||||||
|
const="",
|
||||||
|
help="采集模式,根据域名批量采集 SURL,多个域名可使用英文逗号分割,也可通过 --crawl-file 传入文件",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--crawl-file", help="目标域名文件,批量传入待采集的域名,每行一个"
|
"--crawl-file", help="目标域名文件,批量传入待采集的域名,每行一个"
|
||||||
@ -57,7 +69,10 @@ class MainApp:
|
|||||||
|
|
||||||
# 添加 web 服务器参数
|
# 添加 web 服务器参数
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--web", action="store_true", help="启动 web 服务器,启动后将忽略其他选项"
|
"--web", action="store_true", help="启动 web 服务器"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--web-only", action="store_true", help="启动 web 服务器,但是不启动引擎"
|
||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@ -82,7 +97,7 @@ class MainApp:
|
|||||||
if invalid_modes:
|
if invalid_modes:
|
||||||
parser.error(f'无效的运行模式: {", ".join(invalid_modes)}')
|
parser.error(f'无效的运行模式: {", ".join(invalid_modes)}')
|
||||||
args.report = reports
|
args.report = reports
|
||||||
else:
|
elif args.report is not None:
|
||||||
args.report = ["pc", "site", "wap"]
|
args.report = ["pc", "site", "wap"]
|
||||||
|
|
||||||
# 检查输入的文件是否存在
|
# 检查输入的文件是否存在
|
||||||
@ -97,6 +112,10 @@ class MainApp:
|
|||||||
|
|
||||||
def start_cli(self):
|
def start_cli(self):
|
||||||
"""开启 CLI 模式"""
|
"""开启 CLI 模式"""
|
||||||
|
|
||||||
|
# 注册 ctrl+c 处理程序,正常结束所有的 engine
|
||||||
|
signal.signal(signal.SIGINT, self.exit_handler)
|
||||||
|
|
||||||
if self.args.crawl or self.args.crawl_file:
|
if self.args.crawl or self.args.crawl_file:
|
||||||
crawl = CrawlEngine()
|
crawl = CrawlEngine()
|
||||||
crawl.cli_start(self.args.crawl, self.args.crawl_file)
|
crawl.cli_start(self.args.crawl, self.args.crawl_file)
|
||||||
@ -116,7 +135,31 @@ class MainApp:
|
|||||||
|
|
||||||
def start_web(self):
|
def start_web(self):
|
||||||
"""开启 Web 模式"""
|
"""开启 Web 模式"""
|
||||||
pass
|
|
||||||
|
# 注册 ctrl+c 处理程序,正常结束所有的 engine
|
||||||
|
signal.signal(signal.SIGINT, self.exit_handler)
|
||||||
|
|
||||||
|
# 启动所有的 engine
|
||||||
|
if self.args.crawl is not None:
|
||||||
|
self.crawl_engine = CrawlEngine()
|
||||||
|
self.crawl_engine.start()
|
||||||
|
logger.info("crawl 启动")
|
||||||
|
|
||||||
|
if self.args.evidence:
|
||||||
|
self.evidence_engine = EvidenceEngine()
|
||||||
|
self.evidence_engine.start()
|
||||||
|
logger.info("evidence 启动")
|
||||||
|
|
||||||
|
if self.args.report:
|
||||||
|
self.report_engine = Reporter(self.args.report)
|
||||||
|
self.report_engine.start()
|
||||||
|
logger.info("report 启动")
|
||||||
|
|
||||||
|
# 启动 web 页面
|
||||||
|
web_app = WebApp()
|
||||||
|
asyncio.run(web_app.start())
|
||||||
|
|
||||||
|
logger.info("web stop.")
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
"""运行应用"""
|
"""运行应用"""
|
||||||
@ -151,9 +194,28 @@ class MainApp:
|
|||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
# 如果指定了 --web 参数,启动 web 服务器,忽略其他选项
|
# 如果指定了 --web 参数,启动 web 服务器,忽略其他选项
|
||||||
if self.args.web:
|
if self.args.web or self.args.web_only:
|
||||||
logger.info("启动 Web 模式")
|
logger.info("启动 Web 模式")
|
||||||
return self.start_web()
|
return self.start_web()
|
||||||
else:
|
else:
|
||||||
logger.info("启动 CLI 模式")
|
logger.info("启动 CLI 模式")
|
||||||
return self.start_cli()
|
return self.start_cli()
|
||||||
|
|
||||||
|
def exit_handler(self, signum, frame):
|
||||||
|
# 在这里结束各个 engine
|
||||||
|
logger.debug("CTRL+C called.")
|
||||||
|
|
||||||
|
if self.crawl_engine:
|
||||||
|
self.crawl_engine.stop()
|
||||||
|
self.crawl_engine.cli_wait()
|
||||||
|
logger.info("crawl 退出")
|
||||||
|
|
||||||
|
if self.evidence_engine:
|
||||||
|
self.evidence_engine.stop()
|
||||||
|
self.evidence_engine.wait()
|
||||||
|
logger.info("evidence 退出")
|
||||||
|
|
||||||
|
if self.report_engine:
|
||||||
|
self.report_engine.stop()
|
||||||
|
self.report_engine.wait()
|
||||||
|
logger.info("report 退出")
|
||||||
|
|||||||
0
app/constants/__init__.py
Normal file
0
app/constants/__init__.py
Normal file
8
app/constants/api_result.py
Normal file
8
app/constants/api_result.py
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
import enum
|
||||||
|
|
||||||
|
|
||||||
|
class ApiCode(enum.Enum):
|
||||||
|
OK = 20000
|
||||||
|
PARAM_ERROR = 30000
|
||||||
|
DB_ERROR = 40000
|
||||||
|
RUNTIME_ERROR = 50000
|
||||||
9
app/constants/domain.py
Normal file
9
app/constants/domain.py
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
import enum
|
||||||
|
|
||||||
|
|
||||||
|
class DomainStatus(enum.Enum):
|
||||||
|
READY = 1 # 采集结束之后回到这个状态,新添加的默认也是这个状态
|
||||||
|
QUEUEING = 2 # 排队中,已经压入任务队列了,但是还没轮到处理
|
||||||
|
CRAWLING = 3 # 采集中
|
||||||
|
|
||||||
|
PAUSE = 999 # 暂停采集
|
||||||
@ -1,15 +1,21 @@
|
|||||||
import queue
|
import queue
|
||||||
|
import re
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
import execjs
|
||||||
|
import requests
|
||||||
from DrissionPage.errors import ElementNotFoundError
|
from DrissionPage.errors import ElementNotFoundError
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from sqlmodel import Session, select
|
from sqlmodel import Session, select, or_, and_
|
||||||
|
|
||||||
from app.config.config import AppCtx
|
from app.config.config import AppCtx
|
||||||
|
from app.constants.domain import DomainStatus
|
||||||
from app.models.domain import DomainModel
|
from app.models.domain import DomainModel
|
||||||
from app.models.report_urls import ReportUrlModel
|
from app.models.report_urls import ReportUrlModel
|
||||||
|
from app.utils.common import get_proxies
|
||||||
from app.utils.dp import DPEngine
|
from app.utils.dp import DPEngine
|
||||||
|
from app.utils.ydm_verify import YdmVerify
|
||||||
|
|
||||||
|
|
||||||
class CrawlEngine:
|
class CrawlEngine:
|
||||||
@ -27,7 +33,7 @@ class CrawlEngine:
|
|||||||
|
|
||||||
# 线程池
|
# 线程池
|
||||||
self.pool: list[threading.Thread] = []
|
self.pool: list[threading.Thread] = []
|
||||||
self.worker_count = 2
|
self.worker_count = 1
|
||||||
|
|
||||||
# 工作队列
|
# 工作队列
|
||||||
self.target_queue = queue.Queue(1024)
|
self.target_queue = queue.Queue(1024)
|
||||||
@ -35,8 +41,6 @@ class CrawlEngine:
|
|||||||
# 创建一个浏览器
|
# 创建一个浏览器
|
||||||
self.dp_engine = DPEngine()
|
self.dp_engine = DPEngine()
|
||||||
|
|
||||||
self.database = AppCtx.g_db_engine
|
|
||||||
|
|
||||||
def cli_start(self, target_domains: str, target_domain_filepath: str):
|
def cli_start(self, target_domains: str, target_domain_filepath: str):
|
||||||
"""CLI 模式启动
|
"""CLI 模式启动
|
||||||
target_domains: 英文逗号分割的字符串
|
target_domains: 英文逗号分割的字符串
|
||||||
@ -90,7 +94,7 @@ class CrawlEngine:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# 存入数据库
|
# 存入数据库
|
||||||
with Session(self.database) as session:
|
with Session(AppCtx.g_db_engine) as session:
|
||||||
self.save_surl(session, domain, surl)
|
self.save_surl(session, domain, surl)
|
||||||
except queue.Empty:
|
except queue.Empty:
|
||||||
# 队列空了,等1秒再取一次
|
# 队列空了,等1秒再取一次
|
||||||
@ -119,7 +123,7 @@ class CrawlEngine:
|
|||||||
|
|
||||||
# 检查在数据库中是否有重复的
|
# 检查在数据库中是否有重复的
|
||||||
for domain in domains:
|
for domain in domains:
|
||||||
with Session(self.database) as session:
|
with Session(AppCtx.g_db_engine) as session:
|
||||||
stmt = select(DomainModel).where(DomainModel.domain == domain)
|
stmt = select(DomainModel).where(DomainModel.domain == domain)
|
||||||
result = session.exec(stmt).first()
|
result = session.exec(stmt).first()
|
||||||
if not result:
|
if not result:
|
||||||
@ -148,29 +152,42 @@ class CrawlEngine:
|
|||||||
def worker(self):
|
def worker(self):
|
||||||
"""真正的工作函数,后续以Web模式启动的时候,走这个"""
|
"""真正的工作函数,后续以Web模式启动的时候,走这个"""
|
||||||
logger.info("crawl worker start!")
|
logger.info("crawl worker start!")
|
||||||
while self.worker_status == 1:
|
while self.worker_status:
|
||||||
# 检查数据库,从中获取需要爬取的域名
|
# 检查数据库,从中获取需要爬取的域名
|
||||||
current_timestamp = int(time.time())
|
current_timestamp = int(time.time())
|
||||||
with Session(AppCtx.g_db_engine) as session:
|
with Session(AppCtx.g_db_engine) as session:
|
||||||
|
|
||||||
stmt = select(DomainModel).where(
|
stmt = select(DomainModel).where(
|
||||||
DomainModel.latest_crawl_time + DomainModel.crawl_interval <= current_timestamp
|
or_(
|
||||||
|
DomainModel.status == 2, # 条件1: status = 2
|
||||||
|
and_(
|
||||||
|
DomainModel.latest_crawl_time + DomainModel.crawl_interval * 60 <= current_timestamp, # 条件2
|
||||||
|
DomainModel.status == 1 # 条件2
|
||||||
|
)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
domains = session.exec(stmt).all()
|
domains = session.exec(stmt).all()
|
||||||
|
|
||||||
for domain_model in domains:
|
for domain_model in domains:
|
||||||
|
|
||||||
|
# 采集前修改状态
|
||||||
|
domain_model.status = DomainStatus.CRAWLING.value
|
||||||
|
session.add(domain_model)
|
||||||
|
session.commit()
|
||||||
|
|
||||||
# 采集
|
# 采集
|
||||||
surl_set = self.crawl(domain_model.domain)
|
surl_set = self.crawl(domain_model.domain)
|
||||||
|
|
||||||
# 存储
|
# 存储
|
||||||
if surl_set:
|
if surl_set:
|
||||||
self.save_surl(session, domain_model, surl_set)
|
self.save_surl(session, domain_model.domain, surl_set)
|
||||||
|
|
||||||
domain_model.latest_crawl_time = int(time.time())
|
domain_model.latest_crawl_time = int(time.time())
|
||||||
|
domain_model.status = DomainStatus.READY.value
|
||||||
session.add(domain_model)
|
session.add(domain_model)
|
||||||
session.commit()
|
session.commit()
|
||||||
|
|
||||||
self.ev.wait(60)
|
self.ev.wait(10)
|
||||||
|
|
||||||
logger.info("crawl worker stop!")
|
logger.info("crawl worker stop!")
|
||||||
|
|
||||||
@ -182,8 +199,8 @@ class CrawlEngine:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
# 初始数据
|
# 初始数据
|
||||||
end_time = int(time.time())
|
# end_time = int(time.time())
|
||||||
start_time = end_time - 3600 * 24 * 30 # 获取最近一个月的数据
|
# start_time = end_time - 3600 * 24 * 30 # 获取最近一个月的数据
|
||||||
|
|
||||||
# 依次每一页处理
|
# 依次每一页处理
|
||||||
max_page = 10 # 最大页码数量,0表示不限制最大数量
|
max_page = 10 # 最大页码数量,0表示不限制最大数量
|
||||||
@ -216,6 +233,26 @@ class CrawlEngine:
|
|||||||
# f"https://www.baidu.com/s?wd=site%3A{domain}&gpc=stf%3D{start_time}%2C{end_time}%7Cstftype%3D1&pn={(current_page - 1) * 10}")
|
# f"https://www.baidu.com/s?wd=site%3A{domain}&gpc=stf%3D{start_time}%2C{end_time}%7Cstftype%3D1&pn={(current_page - 1) * 10}")
|
||||||
# tab.get(f"https://www.baidu.com/s?wd=site%3A{domain}&pn={(current_page - 1) * 10}")
|
# tab.get(f"https://www.baidu.com/s?wd=site%3A{domain}&pn={(current_page - 1) * 10}")
|
||||||
|
|
||||||
|
# 检查一下当前的URL是不是跳到验证码的页面
|
||||||
|
if "//wappass.baidu.com/static/captcha/tuxing_v2.html" in tab.url:
|
||||||
|
logger.warning("触发验证码了,尝试识别")
|
||||||
|
idx = 0
|
||||||
|
while idx < 3:
|
||||||
|
idx += 1
|
||||||
|
logger.debug(f"开始第{idx}次识别...")
|
||||||
|
captcha_result = self.verify_captcha(tab.url)
|
||||||
|
if not captcha_result:
|
||||||
|
tab.refresh()
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
tab.get(captcha_result)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
logger.error("验证码打码失败,放弃本次采集,等待3分钟后继续")
|
||||||
|
self.ev.wait(180)
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
# 终止条件
|
# 终止条件
|
||||||
if current_page > max_page and max_page:
|
if current_page > max_page and max_page:
|
||||||
logger.debug(f"{threading.current_thread().name} 达到指定页码,退出")
|
logger.debug(f"{threading.current_thread().name} 达到指定页码,退出")
|
||||||
@ -231,12 +268,19 @@ class CrawlEngine:
|
|||||||
tab.wait.eles_loaded("@id=content_left")
|
tab.wait.eles_loaded("@id=content_left")
|
||||||
results = tab.ele("@id=content_left").eles("@class:result")
|
results = tab.ele("@id=content_left").eles("@class:result")
|
||||||
# temp = [result.attr("mu") for result in results if result.attr("mu") is not None]
|
# temp = [result.attr("mu") for result in results if result.attr("mu") is not None]
|
||||||
|
# logger.debug(f"{len(results)=}")
|
||||||
for result in results:
|
for result in results:
|
||||||
|
# logger.debug(f"{result=}")
|
||||||
surl = result.attr("mu")
|
surl = result.attr("mu")
|
||||||
if not surl:
|
if not surl:
|
||||||
continue
|
continue
|
||||||
logger.debug(f"{threading.current_thread().name} 找到 URL : {surl}")
|
|
||||||
surl_set.add(surl)
|
# 添加结果的时候,也检查一下抓到的 surl 是否和目标域名有关
|
||||||
|
if domain not in surl:
|
||||||
|
logger.debug(f"{threading.current_thread().name} URL {surl} 与目标域名 {domain} 无关,跳过")
|
||||||
|
else:
|
||||||
|
surl_set.add(surl)
|
||||||
|
logger.debug(f"{threading.current_thread().name} 找到 {surl}")
|
||||||
|
|
||||||
# 翻页的时候等一下,别太快了
|
# 翻页的时候等一下,别太快了
|
||||||
self.ev.wait(0.3)
|
self.ev.wait(0.3)
|
||||||
@ -261,6 +305,12 @@ class CrawlEngine:
|
|||||||
def save_surl(session: Session, domain: str, surl_set: set[str]):
|
def save_surl(session: Session, domain: str, surl_set: set[str]):
|
||||||
"""保存采集到的URL"""
|
"""保存采集到的URL"""
|
||||||
for surl in surl_set:
|
for surl in surl_set:
|
||||||
|
|
||||||
|
# 简单的判断一下 surl 中是否包含目标域名
|
||||||
|
if domain not in surl:
|
||||||
|
logger.debug(f"跳过保存 {surl} 因为与目标域名 {domain} 不符合")
|
||||||
|
continue
|
||||||
|
|
||||||
# 先检查是否存在
|
# 先检查是否存在
|
||||||
stmt = select(ReportUrlModel).where(ReportUrlModel.surl == surl)
|
stmt = select(ReportUrlModel).where(ReportUrlModel.surl == surl)
|
||||||
exist = session.exec(stmt).first()
|
exist = session.exec(stmt).first()
|
||||||
@ -283,3 +333,170 @@ class CrawlEngine:
|
|||||||
)
|
)
|
||||||
session.add(example)
|
session.add(example)
|
||||||
session.commit()
|
session.commit()
|
||||||
|
|
||||||
|
# def captcha_listener(self):
|
||||||
|
# for pkg in self.tab.listen.steps():
|
||||||
|
# if "/cap/init" in pkg.url:
|
||||||
|
# self.captcha_data["init"] = pkg.response.body
|
||||||
|
# if "/cap/style" in pkg.url:
|
||||||
|
# self.captcha_data["style"] = pkg.response.body
|
||||||
|
# self.captcha_data["referer"] = pkg.request.headers.get("Referer")
|
||||||
|
# logger.debug(f"触发验证码的 referer: {self.captcha_data["referer"]}")
|
||||||
|
#
|
||||||
|
# self.captcha_data["cookie"] = pkg.request.headers.get("Cookie")
|
||||||
|
# logger.debug(f"触发验证码的 cookie: {self.captcha_data['cookie']}")
|
||||||
|
# if "/cap/log" in pkg.url:
|
||||||
|
# self.captcha_data["log"] = pkg.response.body
|
||||||
|
|
||||||
|
def verify_captcha(self, current_url: str):
|
||||||
|
"""尝试识别验证码,因为和 pc_reporter 的逻辑有点区别,所以单独写一遍"""
|
||||||
|
headers = {
|
||||||
|
'Accept': 'application/json, text/javascript, */*; q=0.01',
|
||||||
|
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-US;q=0.7,zh-TW;q=0.6',
|
||||||
|
'Cache-Control': 'no-cache',
|
||||||
|
'Connection': 'keep-alive',
|
||||||
|
'Pragma': 'no-cache',
|
||||||
|
'Referer': current_url,
|
||||||
|
'Sec-Fetch-Dest': 'empty',
|
||||||
|
'Sec-Fetch-Mode': 'cors',
|
||||||
|
'Sec-Fetch-Site': 'same-origin',
|
||||||
|
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 Edg/135.0.0.0",
|
||||||
|
'X-Requested-With': 'XMLHttpRequest',
|
||||||
|
'sec-ch-ua_wap': '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
|
||||||
|
'sec-ch-ua_wap-mobile': '?0',
|
||||||
|
'sec-ch-ua_wap-platform': '"Windows"',
|
||||||
|
# "Cookie": self.captcha_data["cookie"],
|
||||||
|
}
|
||||||
|
|
||||||
|
# 解出AS / TK
|
||||||
|
ts = time.time()
|
||||||
|
ts1 = int(ts)
|
||||||
|
ts2 = int(ts * 1000)
|
||||||
|
response = requests.post(
|
||||||
|
"https://passport.baidu.com/cap/init",
|
||||||
|
data={
|
||||||
|
"_": ts2,
|
||||||
|
"refer": re.sub(r'timestamp=\d+', f'timestamp={ts1}', current_url),
|
||||||
|
"ak": "c27bbc89afca0463650ac9bde68ebe06",
|
||||||
|
"ver": "2",
|
||||||
|
"scene": "",
|
||||||
|
"ds": "",
|
||||||
|
"tk": "",
|
||||||
|
"as": "",
|
||||||
|
"reinit": 0
|
||||||
|
},
|
||||||
|
headers=headers,
|
||||||
|
proxies=get_proxies()
|
||||||
|
).json()
|
||||||
|
as_value = response["data"]["as"]
|
||||||
|
tk_value = response["data"]["tk"]
|
||||||
|
|
||||||
|
# 解出 style
|
||||||
|
response = requests.post(
|
||||||
|
"https://passport.baidu.com/cap/style",
|
||||||
|
data={
|
||||||
|
"_": int(time.time() * 1000),
|
||||||
|
"refer": re.sub(r'timestamp=\d+', f'timestamp={ts1}', current_url),
|
||||||
|
"ak": "c27bbc89afca0463650ac9bde68ebe06",
|
||||||
|
"tk": tk_value,
|
||||||
|
"scene": "",
|
||||||
|
"isios": "0",
|
||||||
|
"type": "spin",
|
||||||
|
"ver": "2"
|
||||||
|
},
|
||||||
|
headers=headers,
|
||||||
|
proxies=get_proxies()
|
||||||
|
)
|
||||||
|
response = response.json()
|
||||||
|
backstr = response["data"]["backstr"]
|
||||||
|
captcha_link = response["data"]["captchalist"][0]["source"]["back"]["path"]
|
||||||
|
logger.debug(f"{backstr=}, {captcha_link=}")
|
||||||
|
|
||||||
|
# 下载验证码图片
|
||||||
|
image_response = requests.get(captcha_link, headers=headers, proxies=get_proxies())
|
||||||
|
with open("captcha.png", "wb") as f:
|
||||||
|
f.write(image_response.content)
|
||||||
|
logger.debug("download captcha.png")
|
||||||
|
|
||||||
|
# 识别验证码
|
||||||
|
ydm = YdmVerify()
|
||||||
|
with open("captcha.png", "rb") as fp:
|
||||||
|
picture = fp.read()
|
||||||
|
|
||||||
|
slide_distance = ydm.rotate(picture)
|
||||||
|
logger.debug(f"{slide_distance=}")
|
||||||
|
if not slide_distance:
|
||||||
|
logger.error("识别验证码失败")
|
||||||
|
return None
|
||||||
|
rotate_angle_rate = round(slide_distance / 360, 2)
|
||||||
|
logger.debug(f"{rotate_angle_rate=}")
|
||||||
|
|
||||||
|
if not rotate_angle_rate:
|
||||||
|
logger.debug("识别验证码失败")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 发送验证码请求
|
||||||
|
time_log = str(int(time.time() * 1000))
|
||||||
|
with open("./js/mkd_v2_link_submit.js", 'r', encoding='utf-8') as f:
|
||||||
|
ds_js = f.read()
|
||||||
|
fs = execjs.compile(ds_js).call('getFs2', backstr, rotate_angle_rate, as_value)
|
||||||
|
data = {
|
||||||
|
"_": time_log,
|
||||||
|
"refer": current_url,
|
||||||
|
"ak": "c27bbc89afca0463650ac9bde68ebe06",
|
||||||
|
"as": as_value,
|
||||||
|
"scene": "",
|
||||||
|
"tk": tk_value,
|
||||||
|
"ver": "2",
|
||||||
|
"cv": "submit",
|
||||||
|
"typeid": "spin-0",
|
||||||
|
"fuid": "FOCoIC3q5fKa8fgJnwzbE67EJ49BGJeplOzf+4l4EOvDuu2RXBRv6R3A1AZMa49I27C0gDDLrJyxcIIeAeEhD8JYsoLTpBiaCXhLqvzbzmvy3SeAW17tKgNq/Xx+RgOdb8TWCFe62MVrDTY6lMf2GrfqL8c87KLF2qFER3obJGnfaJTn/Ne60I9LwR04t6XmGEimjy3MrXEpSuItnI4KD0FJKzTbw1AN69fBnzR2FuvMmmQZ+1zgJ72wdcVU+mcQxiE2ir0+TEYgjPJt1Qa3K1mLi+P4IWJeag2lvxB4yJ/GgLbz7OSojK1zRbqBESR5Pdk2R9IA3lxxOVzA+Iw1TWLSgWjlFVG9Xmh1+20oPSbrzvDjYtVPmZ+9/6evcXmhcO1Y58MgLozKnaQIaLfWRPAn9I0uOqAMff6fuUeWcH1mRYoTw2Nhr4J4agZi377iM/izL6cVCGRy2F8c0VpEvM5FjnYxYstXg/9EfB3EVmKAfzNRIeToJ5YV9twMcgdmlV1Uhhp5FAe6gNJIUptp7EMAaXYKm11G+JVPszQFdp9AJLcm4YSsYUXkaPI2Tl66J246cmjWQDTahAOINR5rXR5r/7VVI1RMZ8gb40q7az7vCK56XLooKT5a+rsFrf5Zu0yyCiiagElhrTEOtNdBJJq8eHwEHuFBni9ahSwpC7lbKkUwaKH69tf0DFV7hJROiLETSFloIVkHdy3+I2JUr1LsplAz0hMkWt/tE4tXVUV7QcTDTZWS/2mCoS/GV3N9awQ6iM6hs/BWjlgnEa1+5iP7WSc7RJ34FaE5PsyGXyoCWdXwNRGSZPSvVtB/Ea6w5FKazXcZ/j40FJv+iLGBn3nkkgHlne61I8I7KhtQgIkmBMJIjPMkS/L051MeqdGScsKYTJuSucgI5c3+79eVH+y2TvbOTuuHv1uGxwXFb2atIU1ZYPbmmXculmizKcKI/s44qf8uM8iBZLGkKeVyL74aPyLkg7Gk359g98BIGN/ZzJR/h+Y6AyFx+HlMoYJnS06dVmqFbvlCtSdGylKQ5f8eWtxPkJGqOFtWjIVteQYMsH/AaSJonqw+WLiZvGjYfm9p0alEyujapoTy77HzDcUoU1wUSXa5xS/Z6hXEr2OnLi0LdPVcGjz8lpLcdVeSfm9p0alEyujapoTy77HzDWf5PERRSTFqLd9BTUHLyY4Ji3EQLGQPaM1aeHxG1bJZH0s1Si/KwzTaTYzu6ziQiqwcr2kaYUiH+fMOxn69/BhNJVMhpQkhprc1KZuJRvXjppq0gKweencPxgS/jd0rjw==",
|
||||||
|
# "fuid": "FOCoIC3q5fKa8fgJnwzbE67EJ49BGJeplOzf+4l4EOvDuu2RXBRv6R3A1AZMa49I27C0gDDLrJyxcIIeAeEhD8JYsoLTpBiaCXhLqvzbzmvy3SeAW17tKgNq/Xx+RgOdb8TWCFe62MVrDTY6lMf2GrfqL8c87KLF2qFER3obJGm51EODDlnqgz44AdUN5VVLGEimjy3MrXEpSuItnI4KD4X6JLdk9kt5JRR+RlJ66q1+4kQEivhwAoCrm3oUNdYdi+yNJadLMQy5pqjmiW757BJsVwXkGdF24AsEQ3K5XBbh9EHAWDOg2T1ejpq0s2eFy9ar/j566XqWDobGoNNfmfpaEhZpob9le2b5QIEdiQcF+6iOKqU/r67N8lf+wxW6FCMUN0p4SXVVUMsKNJv2TwEq3+MvKTlPBjfdM81CMPq4LkPV+7TROLMG0V6r0A++zkWOdjFiy1eD/0R8HcRWYsUPXjDqADgs+Xs31pnSHeup+HBavJhpxl858h16cMtKQmxzisHOxsE/KMoDNYYE7ucLE22Bi0Ojbor7y6SXfVj7+B4iuZO+f7FUDWABtt/WWQqHKVfXMaw5WUmKnfSR5wwQa+N01amx6X+p+x97kkGmoNOSwxWgGvuezNFuiJQdt51yrWaL9Re9fZveXFsIu/gzGjL50VLcWv2NICayyI8BE9m62pdBPySuv4pVqQ9Sl1uTC//wIcO7QL9nm+0N6JgtCkSAWOZCh7Lr0XP6QztjlyD3bkwYJ4FTiNanaDaDJMNOONUIptCYaHTS+UC6IlHE1MUFHThGQXNkGIX8AdBh0GvEV9dnyTGKy8XFjCQiSGk66HDxtjKMU4HPNa0dtuC6f3Qc1BA80dVENIrm5fvupUvtUx+t4D1r3M6jRrNCFDmi5MpkOxe5k51gshb/lV68JOKfsQeXT2p7EM9kdbZAphQDW3ajjXdDRh/L4vMDrWe1PKtUcuW/fWn+hZVZzw+X5dQWsFNhWzqaDLLTRZQpSBdWrMIHd5mkoSCb/UJmNfWI9UswFst29h1Heb04lgaYXvleBbteLbUi5NoCAChP5oZfoCeoKKuvUEAPXXTPVjO0TTi0sVqFSdG+GFyi03wlrm3wCRN8QsWhT10pXJL0RhcLTagDnxauF9flnVwiWaq+daLSn0MEazavBACRErAMWXEI9EFQPGJKv0Ijpq+0VDw8xeJloxMf4I+yn8oxuqFuBSz8I0Kfe0QZwk5OQW6lRvv5iBU4fcPzWWTZ9FnzQ2GA5eh8aiV0nDOGmtfhiYNjbs2NxP0acAgApNd0ew==",
|
||||||
|
"fs": fs
|
||||||
|
}
|
||||||
|
response = requests.post(
|
||||||
|
"https://passport.baidu.com/cap/log",
|
||||||
|
headers=headers,
|
||||||
|
data=data,
|
||||||
|
proxies=get_proxies(),
|
||||||
|
).json()
|
||||||
|
try:
|
||||||
|
result = {
|
||||||
|
"ds": response["data"]["ds"],
|
||||||
|
"op": response["data"]["op"],
|
||||||
|
"tk": response["data"]["tk"]
|
||||||
|
}
|
||||||
|
except KeyError:
|
||||||
|
logger.error(f"验证码没转成功, response: {response=}")
|
||||||
|
time.sleep(1)
|
||||||
|
return None
|
||||||
|
logger.debug(f"{result=}")
|
||||||
|
|
||||||
|
# 检查验证码是否正确
|
||||||
|
if result["op"] != 1:
|
||||||
|
logger.error(f"op != 1, 重试")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 发送验证码请求 /cap/c 请求,获取待跳转的URL
|
||||||
|
response = requests.post(
|
||||||
|
"https://passport.baidu.com/cap/c?ak=c27bbc89afca0463650ac9bde68ebe06",
|
||||||
|
headers=headers,
|
||||||
|
json={
|
||||||
|
"tk": result["tk"],
|
||||||
|
"ds": result["ds"],
|
||||||
|
"qrsign": "",
|
||||||
|
"refer": current_url
|
||||||
|
},
|
||||||
|
proxies=get_proxies()
|
||||||
|
)
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
if data["data"].get("f"):
|
||||||
|
logger.error(f"验证码失败: {data['data'].get('f')}")
|
||||||
|
return None
|
||||||
|
if data["data"].get("s"):
|
||||||
|
logger.debug("验证成功,URL:" + data["data"].get("s").get("url"))
|
||||||
|
url = data["data"].get("s").get("url")
|
||||||
|
url = url.encode("utf-8").decode("unicode-escape")
|
||||||
|
logger.success("解码后的URL:" + url)
|
||||||
|
return url
|
||||||
|
|||||||
@ -59,8 +59,8 @@ class EvidenceEngine:
|
|||||||
logger.debug(f"开始获取 {target['surl']} 的举报数据")
|
logger.debug(f"开始获取 {target['surl']} 的举报数据")
|
||||||
self.get_screenshot_and_report_link(target)
|
self.get_screenshot_and_report_link(target)
|
||||||
|
|
||||||
# 每分钟跑一次
|
# 每10秒跑一次
|
||||||
self.ev.wait(60)
|
self.ev.wait(10)
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
"""结束线程"""
|
"""结束线程"""
|
||||||
@ -69,6 +69,9 @@ class EvidenceEngine:
|
|||||||
self.dp_engine.close()
|
self.dp_engine.close()
|
||||||
self.wap_dp_engine.close()
|
self.wap_dp_engine.close()
|
||||||
|
|
||||||
|
def wait(self):
|
||||||
|
self.worker_thread.join()
|
||||||
|
|
||||||
def get_surl_from_db(self):
|
def get_surl_from_db(self):
|
||||||
"""从数据库中获取数据"""
|
"""从数据库中获取数据"""
|
||||||
result: list = []
|
result: list = []
|
||||||
@ -87,7 +90,20 @@ class EvidenceEngine:
|
|||||||
|
|
||||||
# Part1 获取证据截图
|
# Part1 获取证据截图
|
||||||
logger.debug(f"开始获取 {surl} 在百度搜索中的截图")
|
logger.debug(f"开始获取 {surl} 在百度搜索中的截图")
|
||||||
img_path, tab = self.get_screenshot(target)
|
img_path, tab, has_result = self.get_screenshot(target)
|
||||||
|
if not has_result:
|
||||||
|
# 如果没有搜到结果,直接把 has_evidence 标记为 true 就行了
|
||||||
|
with Session(self.database) as session:
|
||||||
|
stmt = select(ReportUrlModel).where(ReportUrlModel.id == target["id"])
|
||||||
|
model: ReportUrlModel = session.exec(stmt).first()
|
||||||
|
if not model:
|
||||||
|
logger.error(f"{target['id']} 记录不存在,跳过...")
|
||||||
|
return None
|
||||||
|
# 更新数据
|
||||||
|
model.has_evidence = True
|
||||||
|
session.add(model)
|
||||||
|
session.commit()
|
||||||
|
return None
|
||||||
if not img_path:
|
if not img_path:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@ -134,7 +150,7 @@ class EvidenceEngine:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"获取证据截图和举报链接失败: {e}")
|
logger.error(f"获取证据截图和举报链接失败: {e}")
|
||||||
|
|
||||||
def get_screenshot(self, target: dict) -> tuple[str | None, MixTab]:
|
def get_screenshot(self, target: dict) -> tuple[str | None, MixTab, bool]:
|
||||||
"""获取搜索页面的截图,返回 img_path """
|
"""获取搜索页面的截图,返回 img_path """
|
||||||
search_keyword = target["surl"].lstrip("https://").lstrip("http://")
|
search_keyword = target["surl"].lstrip("https://").lstrip("http://")
|
||||||
tab = self.dp_engine.browser.new_tab()
|
tab = self.dp_engine.browser.new_tab()
|
||||||
@ -144,12 +160,13 @@ class EvidenceEngine:
|
|||||||
|
|
||||||
if "未找到相关结果" in tab.html:
|
if "未找到相关结果" in tab.html:
|
||||||
logger.info(f"没有关于 {search_keyword} 的数据")
|
logger.info(f"没有关于 {search_keyword} 的数据")
|
||||||
return None, tab
|
return None, tab, False
|
||||||
|
|
||||||
# 图片的存储路径
|
# 图片的存储路径
|
||||||
# 截完图先不要关闭 tab,别的地方还要用
|
# 截完图先不要关闭 tab,别的地方还要用
|
||||||
img_path = f"./imgs/{target['domain']}/{md5(target['surl'])}.png"
|
img_path = f"./imgs/{target['domain']}/{md5(target['surl'])}.png"
|
||||||
return self.do_screenshot(tab, img_path)
|
img_path, tab = self.do_screenshot(tab, img_path)
|
||||||
|
return img_path, tab, True
|
||||||
|
|
||||||
def get_wap_screenshot(self, target: dict) -> tuple[str | None, MixTab]:
|
def get_wap_screenshot(self, target: dict) -> tuple[str | None, MixTab]:
|
||||||
"""用 wap dp 再截一张 surl 本身的图"""
|
"""用 wap dp 再截一张 surl 本身的图"""
|
||||||
|
|||||||
@ -33,6 +33,7 @@ class Reporter:
|
|||||||
def wait(self):
|
def wait(self):
|
||||||
self.worker_thread.join()
|
self.worker_thread.join()
|
||||||
|
|
||||||
|
# noinspection DuplicatedCode
|
||||||
def cli_start(self):
|
def cli_start(self):
|
||||||
for mode in self.mode:
|
for mode in self.mode:
|
||||||
if mode == "pc":
|
if mode == "pc":
|
||||||
@ -46,20 +47,33 @@ class Reporter:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
|
|
||||||
|
for mode in self.mode:
|
||||||
|
if mode == "pc":
|
||||||
|
self.reporters["pc"].stop()
|
||||||
|
elif mode == "wap":
|
||||||
|
self.reporters["wap"].stop()
|
||||||
|
elif mode == "site":
|
||||||
|
self.reporters["site"].stop()
|
||||||
|
else:
|
||||||
|
logger.error(f"参数错误: {mode}")
|
||||||
|
continue
|
||||||
|
|
||||||
self.status = 0
|
self.status = 0
|
||||||
self.ev.set()
|
self.ev.set()
|
||||||
|
|
||||||
|
# noinspection DuplicatedCode
|
||||||
def worker(self):
|
def worker(self):
|
||||||
while self.status:
|
while self.status:
|
||||||
for mode in self.mode:
|
for mode in self.mode:
|
||||||
if mode == "pc":
|
if mode == "pc" and self.status:
|
||||||
self.reporters["pc"].run()
|
self.reporters["pc"].run()
|
||||||
elif mode == "wap":
|
elif mode == "wap" and self.status:
|
||||||
self.reporters["wap"].run()
|
self.reporters["wap"].run()
|
||||||
elif mode == "site":
|
elif mode == "site" and self.status:
|
||||||
self.reporters["site"].run()
|
self.reporters["site"].run()
|
||||||
else:
|
else:
|
||||||
logger.error(f"参数错误: {mode}")
|
logger.error(f"参数错误: {mode}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
self.ev.wait(60)
|
self.ev.wait(10)
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
|
||||||
class BaseReporter(ABC):
|
class BaseReporter(ABC):
|
||||||
"""所有 reporter 的基类"""
|
"""所有 reporter 的基类"""
|
||||||
|
|
||||||
@ -7,3 +8,7 @@ class BaseReporter(ABC):
|
|||||||
def run(self):
|
def run(self):
|
||||||
"""运行 reporter,子类必须实现此方法"""
|
"""运行 reporter,子类必须实现此方法"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def stop(self):
|
||||||
|
"""控制结束"""
|
||||||
|
pass
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
import os.path
|
import os.path
|
||||||
import random
|
import random
|
||||||
|
import threading
|
||||||
import time
|
import time
|
||||||
from urllib.parse import urlparse, parse_qs
|
from urllib.parse import urlparse, parse_qs
|
||||||
|
|
||||||
@ -19,6 +20,8 @@ from ...utils.ydm_verify import YdmVerify
|
|||||||
class PcReporter(BaseReporter):
|
class PcReporter(BaseReporter):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.engine_name = "PC_REPORTER"
|
self.engine_name = "PC_REPORTER"
|
||||||
|
self.status = 1
|
||||||
|
self.ev = threading.Event()
|
||||||
self.database = AppCtx.g_db_engine
|
self.database = AppCtx.g_db_engine
|
||||||
|
|
||||||
self.upload_pic_url = "http://jubao.baidu.com/jubao/accu/upload"
|
self.upload_pic_url = "http://jubao.baidu.com/jubao/accu/upload"
|
||||||
@ -45,14 +48,23 @@ class PcReporter(BaseReporter):
|
|||||||
"Cookie": "",
|
"Cookie": "",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def stop(self):
|
||||||
|
self.status = 0
|
||||||
|
self.ev.set()
|
||||||
|
logger.warning(f"{self.engine_name} 收到退出消息,等待当前任务完成后退出")
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
with Session(self.database) as session:
|
with Session(self.database) as session:
|
||||||
stmt = select(ReportUrlModel).where(ReportUrlModel.is_report_by_one == False)
|
stmt = select(ReportUrlModel).where(ReportUrlModel.is_report_by_one == False).where(ReportUrlModel.has_evidence == True)
|
||||||
rows: list[ReportUrlModel] = session.exec(stmt).all()
|
rows: list[ReportUrlModel] = session.exec(stmt).all()
|
||||||
|
|
||||||
logger.info(f"[{self.engine_name}] 共计 {len(rows)} 条记录需要举报")
|
logger.info(f"[{self.engine_name}] 共计 {len(rows)} 条记录需要举报")
|
||||||
|
|
||||||
for row in rows:
|
for row in rows:
|
||||||
|
|
||||||
|
if not self.status:
|
||||||
|
break
|
||||||
|
|
||||||
# 选个 cookie
|
# 选个 cookie
|
||||||
report_cookie = random.choice(get_all_cookies())
|
report_cookie = random.choice(get_all_cookies())
|
||||||
self.headers["Cookie"] = report_cookie
|
self.headers["Cookie"] = report_cookie
|
||||||
@ -66,8 +78,8 @@ class PcReporter(BaseReporter):
|
|||||||
domain = row.domain
|
domain = row.domain
|
||||||
# timestamp_s = {int(time.time() * 1000)} # 这里为啥要用 set ?
|
# timestamp_s = {int(time.time() * 1000)} # 这里为啥要用 set ?
|
||||||
timestamp_s = int(time.time() * 1000)
|
timestamp_s = int(time.time() * 1000)
|
||||||
# referer = f"https://jubao.baidu.com/jubao/accu/?surl={surl}token={token}&title={title}&q={q}&has_gw=0&has_v=0&_t8={timestamp_s}"
|
referer = f"https://jubao.baidu.com/jubao/accu/?surl={surl}token={token}&title={title}&q={q}&has_gw=0&has_v=0&_t8={timestamp_s}"
|
||||||
referer = "https://jubao.baidu.com/"
|
# referer = "https://jubao.baidu.com/"
|
||||||
logger.debug(f"referer: {referer}, type of referer: {type(referer)}")
|
logger.debug(f"referer: {referer}, type of referer: {type(referer)}")
|
||||||
self.headers["Referer"] = referer
|
self.headers["Referer"] = referer
|
||||||
|
|
||||||
@ -120,6 +132,8 @@ class PcReporter(BaseReporter):
|
|||||||
|
|
||||||
retry += 1
|
retry += 1
|
||||||
|
|
||||||
|
self.ev.wait(5)
|
||||||
|
|
||||||
def do_report(self, ds, tk, surl, token, title, q, upload=''):
|
def do_report(self, ds, tk, surl, token, title, q, upload=''):
|
||||||
try:
|
try:
|
||||||
phone = generate_random_phone_number()
|
phone = generate_random_phone_number()
|
||||||
@ -201,20 +215,26 @@ class PcReporter(BaseReporter):
|
|||||||
# 获取 as、tk 值
|
# 获取 as、tk 值
|
||||||
try:
|
try:
|
||||||
get_as_tk = self.post_init(surl, token, title, q, timestamp_s)
|
get_as_tk = self.post_init(surl, token, title, q, timestamp_s)
|
||||||
|
# logger.debug(f"{get_as_tk=}")
|
||||||
get_as = get_as_tk['as']
|
get_as = get_as_tk['as']
|
||||||
get_tk = get_as_tk['tk']
|
get_tk = get_as_tk['tk']
|
||||||
|
|
||||||
# 获取验证码图片下载链接、backstr
|
# 获取验证码图片下载链接、backstr
|
||||||
get_style_result = self.get_style(get_tk, surl, token, title, q, timestamp_s)
|
get_style_result = self.get_style(get_tk, surl, token, title, q, timestamp_s)
|
||||||
|
# logger.debug(f"{get_style_result=}")
|
||||||
get_backstr = get_style_result['backstr']
|
get_backstr = get_style_result['backstr']
|
||||||
pic_download_link = get_style_result['captcha']
|
pic_download_link = get_style_result['captcha']
|
||||||
|
|
||||||
# 下载验证码图片
|
# 下载验证码图片
|
||||||
self.download_captcha(pic_download_link)
|
self.download_captcha(pic_download_link)
|
||||||
rotate_angle_rate = self.get_rotate_angle_rate()
|
rotate_angle_rate = self.get_rotate_angle_rate()
|
||||||
|
logger.debug(f"{rotate_angle_rate=}")
|
||||||
# key = self.get_key(get_as)
|
# key = self.get_key(get_as)
|
||||||
|
if not rotate_angle_rate:
|
||||||
|
return {'op': 3}
|
||||||
|
|
||||||
get_ds_tk = self.post_log(get_as, get_tk, get_backstr, rotate_angle_rate)
|
get_ds_tk = self.post_log(get_as, get_tk, get_backstr, rotate_angle_rate)
|
||||||
|
logger.debug(f"{get_ds_tk=}")
|
||||||
log_ds = get_ds_tk['ds']
|
log_ds = get_ds_tk['ds']
|
||||||
log_tk = get_ds_tk['tk']
|
log_tk = get_ds_tk['tk']
|
||||||
log_op = get_ds_tk['op']
|
log_op = get_ds_tk['op']
|
||||||
@ -225,7 +245,7 @@ class PcReporter(BaseReporter):
|
|||||||
}
|
}
|
||||||
return result
|
return result
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f'{e}')
|
logger.exception(f'{e}')
|
||||||
return {'op': 3}
|
return {'op': 3}
|
||||||
|
|
||||||
def post_init(self, surl, token, title, q, timestamp_s):
|
def post_init(self, surl, token, title, q, timestamp_s):
|
||||||
@ -303,9 +323,13 @@ class PcReporter(BaseReporter):
|
|||||||
with open('./captcha/captcha.png', 'rb') as p:
|
with open('./captcha/captcha.png', 'rb') as p:
|
||||||
picture = p.read()
|
picture = p.read()
|
||||||
slide_distance = identify_distance.rotate(image=picture)
|
slide_distance = identify_distance.rotate(image=picture)
|
||||||
|
logger.debug(f"{slide_distance=}")
|
||||||
|
if not slide_distance:
|
||||||
|
return None
|
||||||
# 旋转角度为
|
# 旋转角度为
|
||||||
# logger.info('rotate angle: ' + str(slide_distance))
|
# logger.info('rotate angle: ' + str(slide_distance))
|
||||||
rotate_angle_rate = round(slide_distance / 360, 2)
|
rotate_angle_rate = round(slide_distance / 360, 2)
|
||||||
|
logger.debug(f"{rotate_angle_rate=}")
|
||||||
# logger.info('rotate angle rate: ' + str(rotate_angle_rate))
|
# logger.info('rotate angle rate: ' + str(rotate_angle_rate))
|
||||||
return rotate_angle_rate
|
return rotate_angle_rate
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
import os.path
|
import os.path
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
|
import threading
|
||||||
import time
|
import time
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
@ -19,6 +20,8 @@ from ...utils.ua import random_ua
|
|||||||
class SiteReporter(BaseReporter):
|
class SiteReporter(BaseReporter):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.engine_name = "SITE_REPORTER"
|
self.engine_name = "SITE_REPORTER"
|
||||||
|
self.status = 1
|
||||||
|
self.ev = threading.Event()
|
||||||
|
|
||||||
self.upload_pic_url = "https://help.baidu.com/api/mpic"
|
self.upload_pic_url = "https://help.baidu.com/api/mpic"
|
||||||
self.report_url = "https://help.baidu.com/jubaosubmit"
|
self.report_url = "https://help.baidu.com/jubaosubmit"
|
||||||
@ -42,15 +45,25 @@ class SiteReporter(BaseReporter):
|
|||||||
|
|
||||||
self.token_pattern = r'name="submit_token" value="(.*?)"'
|
self.token_pattern = r'name="submit_token" value="(.*?)"'
|
||||||
|
|
||||||
|
def stop(self):
|
||||||
|
# logger.debug(f"{self.engine_name} stop called.")
|
||||||
|
self.status = 0
|
||||||
|
self.ev.set()
|
||||||
|
logger.warning(f"{self.engine_name} 收到退出消息,等待当前任务完成后退出")
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
"""实现 PC 端的举报逻辑"""
|
"""实现 PC 端的举报逻辑"""
|
||||||
with Session(self.database) as session:
|
with Session(self.database) as session:
|
||||||
stmt = select(ReportUrlModel).where(ReportUrlModel.is_report_by_site == False)
|
stmt = select(ReportUrlModel).where(ReportUrlModel.is_report_by_site == False).where(ReportUrlModel.has_evidence == True)
|
||||||
rows: list[ReportUrlModel] = session.exec(stmt).all()
|
rows: list[ReportUrlModel] = session.exec(stmt).all()
|
||||||
|
|
||||||
logger.info(f"[{self.engine_name}] 共计 {len(rows)} 条需要举报")
|
logger.info(f"[{self.engine_name}] 共计 {len(rows)} 条需要举报")
|
||||||
|
|
||||||
for row in rows:
|
for row in rows:
|
||||||
|
|
||||||
|
if not self.status:
|
||||||
|
break
|
||||||
|
|
||||||
# 生成举报需要的基础数据
|
# 生成举报需要的基础数据
|
||||||
surl = row.surl
|
surl = row.surl
|
||||||
q = row.q
|
q = row.q
|
||||||
@ -77,8 +90,8 @@ class SiteReporter(BaseReporter):
|
|||||||
session.add(row)
|
session.add(row)
|
||||||
session.commit()
|
session.commit()
|
||||||
|
|
||||||
# 等待5秒继续举报
|
# 等待5秒继续举报
|
||||||
time.sleep(5)
|
self.ev.wait(5)
|
||||||
|
|
||||||
def upload_pic(self, img_path: str):
|
def upload_pic(self, img_path: str):
|
||||||
try:
|
try:
|
||||||
|
|||||||
@ -2,6 +2,7 @@ import base64
|
|||||||
import json
|
import json
|
||||||
import os.path
|
import os.path
|
||||||
import random
|
import random
|
||||||
|
import threading
|
||||||
import time
|
import time
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
@ -20,6 +21,8 @@ class WapReporter(BaseReporter):
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
|
||||||
self.engine_name = "WAP_REPORTER"
|
self.engine_name = "WAP_REPORTER"
|
||||||
|
self.status = 1
|
||||||
|
self.ev = threading.Event()
|
||||||
|
|
||||||
self.report_url = "https://ufosdk.baidu.com/api?m=Client&a=postMsg"
|
self.report_url = "https://ufosdk.baidu.com/api?m=Client&a=postMsg"
|
||||||
self.request = requests.session()
|
self.request = requests.session()
|
||||||
@ -40,16 +43,27 @@ class WapReporter(BaseReporter):
|
|||||||
self.database = AppCtx.g_db_engine
|
self.database = AppCtx.g_db_engine
|
||||||
self.all_cookies = get_all_cookies()
|
self.all_cookies = get_all_cookies()
|
||||||
|
|
||||||
|
def stop(self):
|
||||||
|
self.status = 0
|
||||||
|
self.ev.set()
|
||||||
|
logger.warning(f"{self.engine_name} 收到退出消息,等待当前任务完成后退出")
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
"""实现 WAP 端的举报逻辑"""
|
"""实现 WAP 端的举报逻辑"""
|
||||||
with Session(self.database) as session:
|
with Session(self.database) as session:
|
||||||
stmt = select(ReportUrlModel).where(ReportUrlModel.is_report_by_wap == False)
|
stmt = select(ReportUrlModel).where(ReportUrlModel.is_report_by_wap == False).where(
|
||||||
|
ReportUrlModel.has_evidence == True)
|
||||||
rows: list[ReportUrlModel] = session.exec(stmt).all()
|
rows: list[ReportUrlModel] = session.exec(stmt).all()
|
||||||
|
|
||||||
logger.debug(f"[{self.engine_name}] 共找到 {len(rows)} 条待举报记录")
|
logger.debug(f"[{self.engine_name}] 共找到 {len(rows)} 条待举报记录")
|
||||||
|
|
||||||
for row in rows:
|
for row in rows:
|
||||||
|
|
||||||
|
if not self.status:
|
||||||
|
break
|
||||||
|
|
||||||
|
self.ev.wait(1)
|
||||||
|
|
||||||
# 选个 cookie
|
# 选个 cookie
|
||||||
report_cookie = random.choice(get_all_cookies())
|
report_cookie = random.choice(get_all_cookies())
|
||||||
report_site_cookie = GenCookie.run(report_cookie)
|
report_site_cookie = GenCookie.run(report_cookie)
|
||||||
@ -74,7 +88,7 @@ class WapReporter(BaseReporter):
|
|||||||
session.add(row)
|
session.add(row)
|
||||||
session.commit()
|
session.commit()
|
||||||
|
|
||||||
time.sleep(5)
|
self.ev.wait(5)
|
||||||
|
|
||||||
def get_user_info(self):
|
def get_user_info(self):
|
||||||
try:
|
try:
|
||||||
@ -82,9 +96,10 @@ class WapReporter(BaseReporter):
|
|||||||
# wapUserAgent = random.choice(self.wapUserAgent)
|
# wapUserAgent = random.choice(self.wapUserAgent)
|
||||||
response = self.request.get(
|
response = self.request.get(
|
||||||
"https://ufosdk.baidu.com/api?m=Web&a=getUserInfo&appid=293852",
|
"https://ufosdk.baidu.com/api?m=Web&a=getUserInfo&appid=293852",
|
||||||
headers=self.headers, proxies=self.proxies, allow_redirects=False, timeout=10, verify=False
|
headers=self.headers, proxies=self.proxies, allow_redirects=False, timeout=10
|
||||||
)
|
)
|
||||||
json_data = response.json()
|
json_data = response.json()
|
||||||
|
logger.debug(f"{self.engine_name} get_user_info response: {json_data}")
|
||||||
uid = json_data['result']['uid']
|
uid = json_data['result']['uid']
|
||||||
un = json_data['result']['un']
|
un = json_data['result']['un']
|
||||||
userinfo["uid"] = uid
|
userinfo["uid"] = uid
|
||||||
@ -140,11 +155,14 @@ class WapReporter(BaseReporter):
|
|||||||
proxies=self.proxies,
|
proxies=self.proxies,
|
||||||
allow_redirects=False,
|
allow_redirects=False,
|
||||||
timeout=10,
|
timeout=10,
|
||||||
verify=False
|
|
||||||
)
|
)
|
||||||
# logger.debug(req.json())
|
# logger.debug(req.json())
|
||||||
logger.debug(response.json())
|
data = response.json()
|
||||||
if response.json()['errno'] == 0:
|
logger.debug(data)
|
||||||
|
if data['errno'] == 0:
|
||||||
logger.success(f"[{self.engine_name}] {fb_url} 举报成功")
|
logger.success(f"[{self.engine_name}] {fb_url} 举报成功")
|
||||||
return True
|
return True
|
||||||
|
if "请勿重复提交" in data["errmsg"]:
|
||||||
|
logger.success(f"[{self.engine_name}] {fb_url} 重复提交,标记为成功")
|
||||||
|
return True
|
||||||
return False
|
return False
|
||||||
|
|||||||
@ -26,13 +26,16 @@ def update_updated_at(mapper, connection, target):
|
|||||||
target.updated_at = get_timestamp()
|
target.updated_at = get_timestamp()
|
||||||
|
|
||||||
|
|
||||||
|
# noinspection PyUnresolvedReferences
|
||||||
def connect_db(config: AppConfig):
|
def connect_db(config: AppConfig):
|
||||||
"""连接数据库"""
|
"""连接数据库"""
|
||||||
|
|
||||||
# 导入所有模型,为了自动创建数据表
|
# 导入所有模型,为了自动创建数据表
|
||||||
|
from .domain import DomainModel
|
||||||
|
from .report_urls import ReportUrlModel
|
||||||
|
|
||||||
dsn = f"mysql+pymysql://{config.database.user}:{config.database.password}@{config.database.host}:{config.database.port}/{config.database.database}"
|
dsn = f"mysql+pymysql://{config.database.user}:{config.database.password}@{config.database.host}:{config.database.port}/{config.database.database}"
|
||||||
engine = create_engine(dsn, echo=False)
|
engine = create_engine(dsn, echo=False, pool_size=4, max_overflow=10, pool_recycle=60, pool_pre_ping=True)
|
||||||
|
|
||||||
SQLModel.metadata.create_all(engine)
|
SQLModel.metadata.create_all(engine)
|
||||||
AppCtx.g_db_engine = engine
|
AppCtx.g_db_engine = engine
|
||||||
|
|||||||
@ -12,7 +12,7 @@ class DomainModel(BaseModel, table=True):
|
|||||||
# 域名
|
# 域名
|
||||||
domain: str = Field(alias="domain", default="", sa_type=VARCHAR(1024))
|
domain: str = Field(alias="domain", default="", sa_type=VARCHAR(1024))
|
||||||
|
|
||||||
# 爬取状态,TODO:先空着,后续有任务控制之后,用这个字段表示这个域名的任务状态
|
# 爬取状态,@see constants.DomainStatus
|
||||||
status: int = Field(alias="status", default=0)
|
status: int = Field(alias="status", default=0)
|
||||||
|
|
||||||
# 爬取间隔,默认间隔为1周
|
# 爬取间隔,默认间隔为1周
|
||||||
|
|||||||
@ -1,6 +1,8 @@
|
|||||||
import hashlib
|
import hashlib
|
||||||
import random
|
import random
|
||||||
|
|
||||||
|
from app.config.config import AppCtx
|
||||||
|
|
||||||
|
|
||||||
def md5(s: str) -> str:
|
def md5(s: str) -> str:
|
||||||
m = hashlib.md5()
|
m = hashlib.md5()
|
||||||
@ -9,17 +11,24 @@ def md5(s: str) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def get_proxies():
|
def get_proxies():
|
||||||
username = "t14131310374591"
|
# username = "t14131310374591"
|
||||||
password = "qg6xwmrq"
|
# password = "qg6xwmrq"
|
||||||
tunnel = "d432.kdltps.com:15818"
|
# tunnel = "d432.kdltps.com:15818"
|
||||||
proxies = {
|
# proxies = {
|
||||||
"http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel},
|
# "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel},
|
||||||
"https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel}
|
# "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel}
|
||||||
}
|
# }
|
||||||
# proxies = {
|
# proxies = {
|
||||||
# "http": "http://127.0.0.1:8080",
|
# "http": "http://127.0.0.1:8080",
|
||||||
# "https": "http://127.0.0.1:8080"
|
# "https": "http://127.0.0.1:8080"
|
||||||
# }
|
# }
|
||||||
|
|
||||||
|
proxy = AppCtx.g_app_config.chrome.proxy
|
||||||
|
proxies = {
|
||||||
|
"http": proxy,
|
||||||
|
"https": proxy
|
||||||
|
}
|
||||||
|
|
||||||
return proxies
|
return proxies
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -2,10 +2,12 @@ import base64
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
|
||||||
class YdmVerify(object):
|
class YdmVerify(object):
|
||||||
_custom_url = "https://www.jfbym.com/api/YmServer/customApi"
|
_custom_url = "https://www.jfbym.com/api/YmServer/customApi"
|
||||||
_token = "HhUGwpI6AtQGoux36i1ZpsDv7hwGSbr1hQ0RX-HXSZE"
|
_token = "2HNCDBee_JFmXAZZanQm9I7x1sqQln9BggF1xaGtMX0"
|
||||||
_headers = {
|
_headers = {
|
||||||
'Content-Type': 'application/json'
|
'Content-Type': 'application/json'
|
||||||
}
|
}
|
||||||
@ -17,4 +19,11 @@ class YdmVerify(object):
|
|||||||
"type": "90009"
|
"type": "90009"
|
||||||
}
|
}
|
||||||
resp = requests.post(self._custom_url, headers=self._headers, data=json.dumps(payload))
|
resp = requests.post(self._custom_url, headers=self._headers, data=json.dumps(payload))
|
||||||
|
logger.debug(f"{resp.json()=}")
|
||||||
|
|
||||||
|
response_data = resp.json()
|
||||||
|
if response_data.get("code") == 10002:
|
||||||
|
logger.error(f'{response_data.get("msg")}')
|
||||||
|
return None
|
||||||
|
|
||||||
return resp.json()['data']['data']
|
return resp.json()['data']['data']
|
||||||
0
app/web/__init__.py
Normal file
0
app/web/__init__.py
Normal file
0
app/web/controller/__init__.py
Normal file
0
app/web/controller/__init__.py
Normal file
116
app/web/controller/domain.py
Normal file
116
app/web/controller/domain.py
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
from typing import Annotated
|
||||||
|
|
||||||
|
from fastapi import APIRouter, UploadFile, Form, Query
|
||||||
|
|
||||||
|
from app.constants.api_result import ApiCode
|
||||||
|
from app.constants.domain import DomainStatus
|
||||||
|
from app.web.request.domain_request import AddDomainRequest, DeleteDomainRequest, UpdateDomainRequest, \
|
||||||
|
GetDomainListRequest, CrawlNowRequest, ToggleDomainRequest
|
||||||
|
from app.web.results import ApiResult
|
||||||
|
from app.web.service.domain_service import DomainService
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/api/domain", tags=["域名管理"])
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/v1/list")
|
||||||
|
def get_all_domains(request: Annotated[GetDomainListRequest, Query()]):
|
||||||
|
"""获取所有的域名信息,支持根据域名、状态进行搜索,不传则返回全部数据,支持分页"""
|
||||||
|
return DomainService.get_list(request.page, request.size, request.domain, request.status)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/v1/add")
|
||||||
|
def add_domains(request: AddDomainRequest):
|
||||||
|
"""添加域名"""
|
||||||
|
|
||||||
|
# 检查是否有重复的
|
||||||
|
result = DomainService.get_by_domains(request.domains)
|
||||||
|
if not result.success:
|
||||||
|
return result
|
||||||
|
|
||||||
|
existed_domains = [item.domain for item in result.data]
|
||||||
|
new_domains = [x for x in request.domains if x not in existed_domains]
|
||||||
|
if not new_domains:
|
||||||
|
return ApiResult.ok(0)
|
||||||
|
|
||||||
|
# 添加并返回
|
||||||
|
return DomainService.add_domains(request.crawl_interval, request.crawl_now, new_domains)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/v1/import")
|
||||||
|
def import_domains(
|
||||||
|
# 同时提交文件和参数的时候,没办法使用 FormModel 的形式,必须一个一个定义
|
||||||
|
file: UploadFile,
|
||||||
|
crawl_interval: int = Form(),
|
||||||
|
crawl_now: bool = Form(),
|
||||||
|
):
|
||||||
|
"""通过上传文件添加域名,如果单个文件很大,以后改成开新协程/线程处理"""
|
||||||
|
|
||||||
|
# 把文件内容读出来
|
||||||
|
domains = []
|
||||||
|
for line in file.file:
|
||||||
|
line = line.strip()
|
||||||
|
domains.append(line.decode("UTF-8"))
|
||||||
|
|
||||||
|
# 创建协程任务
|
||||||
|
# asyncio.create_task(DomainService.add_domains(crawl_interval, crawl_now, domains))
|
||||||
|
|
||||||
|
# 检查是否有重复域名
|
||||||
|
result = DomainService.get_by_domains(domains)
|
||||||
|
if not result.success:
|
||||||
|
return result
|
||||||
|
existed_domains = [item.domain for item in result.data]
|
||||||
|
new_domains = [x for x in domains if x not in existed_domains]
|
||||||
|
|
||||||
|
# 添加并返回
|
||||||
|
return DomainService.add_domains(crawl_interval, crawl_now, new_domains)
|
||||||
|
|
||||||
|
|
||||||
|
# noinspection DuplicatedCode
|
||||||
|
@router.post("/v1/update")
|
||||||
|
def update_domain(request: UpdateDomainRequest):
|
||||||
|
"""更新域名的数据,主要是采集间隔,支持批量修改,传入多个 id"""
|
||||||
|
|
||||||
|
# 检查待更新的域名是否存在
|
||||||
|
result = DomainService.get_by_ids(request.domain_ids)
|
||||||
|
if not result.success:
|
||||||
|
return result
|
||||||
|
|
||||||
|
existed_domain_ids = [item.id for item in result.data]
|
||||||
|
for domain_id in request.domain_ids:
|
||||||
|
if domain_id not in existed_domain_ids:
|
||||||
|
return ApiResult.error(ApiCode.PARAM_ERROR.value, f"域名 ID {domain_id} 不存在")
|
||||||
|
|
||||||
|
# 更新刷新时间
|
||||||
|
return DomainService.update_domain_interval(request.domain_ids, request.crawl_interval)
|
||||||
|
|
||||||
|
|
||||||
|
# noinspection DuplicatedCode
|
||||||
|
@router.post("/v1/delete")
|
||||||
|
def delete_domain(request: DeleteDomainRequest):
|
||||||
|
"""删除域名,支持批量删除,传入多个 id"""
|
||||||
|
|
||||||
|
# 检查待删除的域名是否存在
|
||||||
|
result = DomainService.get_by_ids(request.domain_ids)
|
||||||
|
if not result.success:
|
||||||
|
return result
|
||||||
|
|
||||||
|
existed_domain_ids = [item.id for item in result.data]
|
||||||
|
for domain_id in request.domain_ids:
|
||||||
|
if domain_id not in existed_domain_ids:
|
||||||
|
return ApiResult.error(ApiCode.PARAM_ERROR.value, f"域名 ID {domain_id} 不存在")
|
||||||
|
|
||||||
|
# 删除域名
|
||||||
|
return DomainService.delete_domains(request.domain_ids, request.remove_surl)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/v1/crawl")
|
||||||
|
def crawl_now(request: CrawlNowRequest):
|
||||||
|
"""立即爬取,实际上是把 status 置为 2"""
|
||||||
|
result = DomainService.update_domain_status(request.domain_ids, DomainStatus.QUEUEING.value)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/v1/toggle")
|
||||||
|
def toggle_domain(request: ToggleDomainRequest):
|
||||||
|
"""暂停爬取某个域名"""
|
||||||
|
return DomainService.update_domain_status(request.domain_ids, DomainStatus.PAUSE.value)
|
||||||
85
app/web/controller/report.py
Normal file
85
app/web/controller/report.py
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
from typing import Annotated
|
||||||
|
|
||||||
|
from fastapi import APIRouter, Query
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
from app.web.request.report_request import AddUrlsRequest, CollectEvidenceRequest, ReportRequest, GetUrlListRequest
|
||||||
|
from app.web.service.domain_service import DomainService
|
||||||
|
from app.web.service.report_service import ReportURLService
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/api/urls", tags=["URL管理"])
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/v1/list")
|
||||||
|
def get_all_urls(request: Annotated[GetUrlListRequest, Query()]):
|
||||||
|
"""获取所有的URL,支持根据域名、状态进行过滤,不传则返回全部数据,支持分页"""
|
||||||
|
|
||||||
|
logger.debug(f"{request=}")
|
||||||
|
|
||||||
|
return ReportURLService.get_list(
|
||||||
|
request.domain,
|
||||||
|
request.surl,
|
||||||
|
request.is_report_by_one,
|
||||||
|
request.is_report_by_site,
|
||||||
|
request.is_report_by_wap,
|
||||||
|
request.has_evidence,
|
||||||
|
request.page,
|
||||||
|
request.size
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/v1/add")
|
||||||
|
def add_urls(request: AddUrlsRequest):
|
||||||
|
"""
|
||||||
|
手动添加 URL 到域名中,支持批量添加
|
||||||
|
格式 [
|
||||||
|
{"domain": "", "surl": ""}, {"domain": "", "surl": ""} ...
|
||||||
|
]
|
||||||
|
添加之前先检查 domain 有没有,没有的话就去创建一个 domain
|
||||||
|
"""
|
||||||
|
# 把所有的域名列表解出来,看看有没有不存在的,如果有就新建一个域名
|
||||||
|
# 这里还需要获取域名的 id
|
||||||
|
input_domains = [item.domain for item in request.urls]
|
||||||
|
result = DomainService.get_by_domains(input_domains)
|
||||||
|
if not result.success:
|
||||||
|
return result
|
||||||
|
|
||||||
|
# 创建新域名
|
||||||
|
new_domains = [x for x in input_domains if x not in result.data]
|
||||||
|
if new_domains:
|
||||||
|
result = DomainService.add_domains(1440, True, new_domains)
|
||||||
|
if not result.success:
|
||||||
|
return result
|
||||||
|
|
||||||
|
# 再获取一遍域名模型
|
||||||
|
result = DomainService.get_by_domains(input_domains)
|
||||||
|
if not result.success:
|
||||||
|
return result
|
||||||
|
|
||||||
|
# 创建 URL
|
||||||
|
domain_map: dict[str, int] = {x.domain: x.id for x in result.data}
|
||||||
|
return ReportURLService.add_urls(domain_map, request.urls)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/v1/evidence")
|
||||||
|
def collect_evidence(request: CollectEvidenceRequest):
|
||||||
|
"""
|
||||||
|
强制手动触发证据收集任务,支持批量传入,已经收集过的 URL 也要强制收集
|
||||||
|
TODO:本来应该需要使用任务队列的,为了简单先把数据库的相关标记改为 0 ,也能达到一样的效果
|
||||||
|
又不是不能用 XD
|
||||||
|
"""
|
||||||
|
return ReportURLService.batch_update_evidence_flag(request.ids)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/v1/report")
|
||||||
|
def report(request: ReportRequest):
|
||||||
|
"""举报指定的URL,支持批量传入 id 批量举报
|
||||||
|
先通过改数据库,然后等引擎自己调度实现
|
||||||
|
"""
|
||||||
|
logger.debug(f"{request=}")
|
||||||
|
return ReportURLService.batch_update_report_flag(
|
||||||
|
request.ids,
|
||||||
|
request.report_by_one,
|
||||||
|
request.report_by_site,
|
||||||
|
request.report_by_wap
|
||||||
|
)
|
||||||
10
app/web/controller/status.py
Normal file
10
app/web/controller/status.py
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
from fastapi import APIRouter
|
||||||
|
|
||||||
|
|
||||||
|
router = APIRouter(tags=["健康检查"])
|
||||||
|
|
||||||
|
@router.get("/status")
|
||||||
|
async def status():
|
||||||
|
return {
|
||||||
|
"status": "ok"
|
||||||
|
}
|
||||||
0
app/web/request/__init__.py
Normal file
0
app/web/request/__init__.py
Normal file
48
app/web/request/domain_request.py
Normal file
48
app/web/request/domain_request.py
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class GetDomainListRequest(BaseModel):
|
||||||
|
"""获取域名列表"""
|
||||||
|
|
||||||
|
# 分页参数
|
||||||
|
page: int = Field(default=1, gt=0)
|
||||||
|
size: int = Field(default=50, gt=0)
|
||||||
|
|
||||||
|
# 过滤条件
|
||||||
|
domain: str = ""
|
||||||
|
status: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
class AddDomainRequest(BaseModel):
|
||||||
|
"""添加域名到数据库的请求参数"""
|
||||||
|
crawl_interval: int
|
||||||
|
crawl_now: bool = True
|
||||||
|
domains: list[str]
|
||||||
|
|
||||||
|
|
||||||
|
class ImportDomainFormRequest(BaseModel):
|
||||||
|
"""通过文件导入的"""
|
||||||
|
crawl_interval: int
|
||||||
|
crawl_now: bool = True
|
||||||
|
|
||||||
|
|
||||||
|
class DeleteDomainRequest(BaseModel):
|
||||||
|
"""删除域名的请求"""
|
||||||
|
domain_ids: list[int]
|
||||||
|
remove_surl: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
class UpdateDomainRequest(BaseModel):
|
||||||
|
"""更新域名的请求"""
|
||||||
|
domain_ids: list[int]
|
||||||
|
crawl_interval: int
|
||||||
|
|
||||||
|
|
||||||
|
class CrawlNowRequest(BaseModel):
|
||||||
|
"""立即爬取的请求"""
|
||||||
|
domain_ids: list[int]
|
||||||
|
|
||||||
|
|
||||||
|
class ToggleDomainRequest(BaseModel):
|
||||||
|
"""暂停某个域名的爬取"""
|
||||||
|
domain_ids: list[int]
|
||||||
38
app/web/request/report_request.py
Normal file
38
app/web/request/report_request.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class GetUrlListRequest(BaseModel):
|
||||||
|
domain: str = ""
|
||||||
|
surl: str = ""
|
||||||
|
is_report_by_one: Optional[int] = 2
|
||||||
|
is_report_by_site: Optional[int] = 2
|
||||||
|
is_report_by_wap: Optional[int] = 2
|
||||||
|
has_evidence: Optional[int] = 2
|
||||||
|
|
||||||
|
page: int = Field(default=1, gt=0)
|
||||||
|
size: int = Field(default=50, gt=0)
|
||||||
|
|
||||||
|
|
||||||
|
class AddUrlItem(BaseModel):
|
||||||
|
domain: str
|
||||||
|
surl: str
|
||||||
|
|
||||||
|
|
||||||
|
class AddUrlsRequest(BaseModel):
|
||||||
|
"""手动添加URL的请求体"""
|
||||||
|
urls: list[AddUrlItem]
|
||||||
|
|
||||||
|
|
||||||
|
class CollectEvidenceRequest(BaseModel):
|
||||||
|
"""手动触发证据收集的请求体"""
|
||||||
|
ids: list[int]
|
||||||
|
|
||||||
|
|
||||||
|
class ReportRequest(BaseModel):
|
||||||
|
"""手动触发证据收集的请求体"""
|
||||||
|
ids: list[int]
|
||||||
|
report_by_one: bool
|
||||||
|
report_by_site: bool
|
||||||
|
report_by_wap: bool
|
||||||
24
app/web/results.py
Normal file
24
app/web/results.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Any, Generic
|
||||||
|
|
||||||
|
from typing_extensions import TypeVar
|
||||||
|
|
||||||
|
from app.constants.api_result import ApiCode
|
||||||
|
|
||||||
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ApiResult(Generic[T]):
|
||||||
|
code: int
|
||||||
|
message: str
|
||||||
|
success: bool
|
||||||
|
data: T | None = None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def ok(data: T | None = None) -> 'ApiResult[T]':
|
||||||
|
return ApiResult(code=ApiCode.OK.value, message="ok", success=True, data=data)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def error(code: int, message: str) -> 'ApiResult[None]':
|
||||||
|
return ApiResult(code=code, message=message, success=False, data=None)
|
||||||
0
app/web/service/__init__.py
Normal file
0
app/web/service/__init__.py
Normal file
141
app/web/service/domain_service.py
Normal file
141
app/web/service/domain_service.py
Normal file
@ -0,0 +1,141 @@
|
|||||||
|
import time
|
||||||
|
from typing import Iterable, Optional
|
||||||
|
|
||||||
|
from loguru import logger
|
||||||
|
from sqlalchemy import delete, func, update
|
||||||
|
from sqlmodel import Session, select
|
||||||
|
|
||||||
|
from app.config.config import AppCtx
|
||||||
|
from app.constants.api_result import ApiCode
|
||||||
|
from app.constants.domain import DomainStatus
|
||||||
|
from app.models.domain import DomainModel
|
||||||
|
from app.models.report_urls import ReportUrlModel
|
||||||
|
from app.web.results import ApiResult
|
||||||
|
|
||||||
|
|
||||||
|
class DomainService:
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_list(cls, page: int, page_size: int, domain: str, status: int):
|
||||||
|
"""获取域名列表"""
|
||||||
|
with Session(AppCtx.g_db_engine) as session:
|
||||||
|
stmt = select(DomainModel)
|
||||||
|
stmt_total = select(func.count(DomainModel.id))
|
||||||
|
if domain:
|
||||||
|
stmt = stmt.where(DomainModel.domain.like(f"%{domain}%"))
|
||||||
|
stmt_total = stmt_total.where(DomainModel.domain.like(f"%{domain}%"))
|
||||||
|
if status:
|
||||||
|
stmt = stmt.where(DomainModel.status == status)
|
||||||
|
stmt_total = stmt_total.where(DomainModel.status == status)
|
||||||
|
|
||||||
|
# 设置分页
|
||||||
|
stmt = stmt.offset((page - 1) * page_size).limit(page_size)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 域名列表
|
||||||
|
rows = session.exec(stmt).all()
|
||||||
|
|
||||||
|
# 查询符合筛选条件的总数量
|
||||||
|
total = session.exec(stmt_total).first()
|
||||||
|
logger.debug(f"{total=}")
|
||||||
|
|
||||||
|
return ApiResult.ok({"total": total, "rows": rows})
|
||||||
|
except Exception as e:
|
||||||
|
session.rollback()
|
||||||
|
logger.exception(f"查询域名列表失败,错误:{e}")
|
||||||
|
return ApiResult.error(ApiCode.DB_ERROR.value, f"查询域名列表失败,错误:{e}")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_by_domains(cls, domains: list[str]) -> ApiResult[Optional[DomainModel]]:
|
||||||
|
"""根据域名查询"""
|
||||||
|
with Session(AppCtx.g_db_engine) as session:
|
||||||
|
stmt = select(DomainModel).where(DomainModel.domain.in_(domains))
|
||||||
|
try:
|
||||||
|
rows = session.exec(stmt).all()
|
||||||
|
return ApiResult.ok(rows)
|
||||||
|
except Exception as e:
|
||||||
|
session.rollback()
|
||||||
|
return ApiResult.error(ApiCode.DB_ERROR.value, f"查询域名失败,错误:{e}")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_by_ids(cls, domain_ids: list[int]) -> ApiResult[Optional[DomainModel]]:
|
||||||
|
"""根据id查询"""
|
||||||
|
with Session(AppCtx.g_db_engine) as session:
|
||||||
|
stmt = select(DomainModel).where(DomainModel.id.in_(domain_ids))
|
||||||
|
try:
|
||||||
|
rows = session.exec(stmt).all()
|
||||||
|
return ApiResult.ok(rows)
|
||||||
|
except Exception as e:
|
||||||
|
session.rollback()
|
||||||
|
return ApiResult.error(ApiCode.DB_ERROR.value, f"查询域名失败,错误:{e}")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def add_domains(cls, interval: int, crawl_now: bool, domains: Iterable[str]):
|
||||||
|
"""批量添加域名"""
|
||||||
|
with Session(AppCtx.g_db_engine) as session:
|
||||||
|
new_domains = [
|
||||||
|
DomainModel(
|
||||||
|
domain=x,
|
||||||
|
status=DomainStatus.READY.value,
|
||||||
|
crawl_interval=interval,
|
||||||
|
latest_crawl_time=0 if crawl_now else int(time.time())
|
||||||
|
) for x in domains
|
||||||
|
]
|
||||||
|
|
||||||
|
session.add_all(new_domains)
|
||||||
|
try:
|
||||||
|
session.commit()
|
||||||
|
return ApiResult.ok(len(new_domains))
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"添加域名到数据库失败,错误:{e}")
|
||||||
|
session.rollback()
|
||||||
|
return ApiResult.error(ApiCode.DB_ERROR.value, f"添加域名失败,错误:{e}")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def delete_domains(cls, domain_ids: list[int], remove_surl: bool = False):
|
||||||
|
"""批量删除域名,remove_surl 表示是否同时删除 report_url 中该域名相关的数据"""
|
||||||
|
with Session(AppCtx.g_db_engine) as session:
|
||||||
|
stmt = delete(DomainModel).where(DomainModel.id.in_(domain_ids))
|
||||||
|
try:
|
||||||
|
session.exec(stmt)
|
||||||
|
|
||||||
|
# 如果设置了 remove_surl 为 True,则删除 report_url 中该域名相关的数据
|
||||||
|
if remove_surl:
|
||||||
|
stmt = delete(ReportUrlModel).where(ReportUrlModel.domain_id.in_(domain_ids))
|
||||||
|
session.exec(stmt)
|
||||||
|
|
||||||
|
session.commit()
|
||||||
|
|
||||||
|
return ApiResult.ok(len(domain_ids))
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"删除域名失败,错误:{e}")
|
||||||
|
session.rollback()
|
||||||
|
return ApiResult.error(ApiCode.DB_ERROR.value, f"删除域名失败,错误:{e}")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def update_domain_interval(cls, domain_ids: list[int], interval: int) -> ApiResult[Optional[int]]:
|
||||||
|
"""批量更新域名的 interval 值"""
|
||||||
|
with Session(AppCtx.g_db_engine) as session:
|
||||||
|
stmt = update(DomainModel).where(DomainModel.id.in_(domain_ids)).values(crawl_interval=interval)
|
||||||
|
try:
|
||||||
|
session.exec(stmt)
|
||||||
|
session.commit()
|
||||||
|
return ApiResult.ok(len(domain_ids))
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"更新域名 interval 失败,错误:{e}")
|
||||||
|
session.rollback()
|
||||||
|
return ApiResult.error(ApiCode.DB_ERROR.value, f"更新域名 interval 失败,错误:{e}")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def update_domain_status(cls, domain_ids: list[int], status: int) -> ApiResult[Optional[int]]:
|
||||||
|
"""批量更新域名的 status 值"""
|
||||||
|
with Session(AppCtx.g_db_engine) as session:
|
||||||
|
stmt = update(DomainModel).where(DomainModel.id.in_(domain_ids)).values(status=status)
|
||||||
|
try:
|
||||||
|
session.exec(stmt)
|
||||||
|
session.commit()
|
||||||
|
return ApiResult.ok(len(domain_ids))
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"更新域名 status 失败,错误:{e}")
|
||||||
|
session.rollback()
|
||||||
|
return ApiResult.error(ApiCode.DB_ERROR.value, f"更新域名 status 失败,错误:{e}")
|
||||||
119
app/web/service/report_service.py
Normal file
119
app/web/service/report_service.py
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from loguru import logger
|
||||||
|
from sqlalchemy import update, func
|
||||||
|
from sqlmodel import Session, select
|
||||||
|
|
||||||
|
from app.config.config import AppCtx
|
||||||
|
from app.constants.api_result import ApiCode
|
||||||
|
from app.models.report_urls import ReportUrlModel
|
||||||
|
from app.web.request.report_request import AddUrlItem
|
||||||
|
from app.web.results import ApiResult
|
||||||
|
|
||||||
|
|
||||||
|
class ReportURLService:
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_list(
|
||||||
|
cls, domain: str, surl: str, is_report_by_one: Optional[int], is_report_by_site: Optional[int],
|
||||||
|
is_report_by_wap: Optional[int], has_evidence: Optional[int], page: int, size: int
|
||||||
|
):
|
||||||
|
|
||||||
|
with Session(AppCtx.g_db_engine) as session:
|
||||||
|
stmt = select(ReportUrlModel)
|
||||||
|
total_stmt = select(func.count(ReportUrlModel.id))
|
||||||
|
if domain:
|
||||||
|
stmt = stmt.where(ReportUrlModel.domain.like(f"%{domain}%"))
|
||||||
|
total_stmt = total_stmt.where(ReportUrlModel.domain.like(f"%{domain}%"))
|
||||||
|
if surl:
|
||||||
|
stmt = stmt.where(ReportUrlModel.surl.like(f"%{surl}%"))
|
||||||
|
total_stmt = total_stmt.where(ReportUrlModel.surl.like(f"%{surl}%"))
|
||||||
|
if is_report_by_one and is_report_by_one != 2:
|
||||||
|
stmt = stmt.where(ReportUrlModel.is_report_by_one == is_report_by_one)
|
||||||
|
total_stmt = total_stmt.where(ReportUrlModel.is_report_by_one == is_report_by_one)
|
||||||
|
if is_report_by_site and is_report_by_site != 2:
|
||||||
|
stmt = stmt.where(ReportUrlModel.is_report_by_site == is_report_by_site)
|
||||||
|
total_stmt = total_stmt.where(ReportUrlModel.is_report_by_site == is_report_by_site)
|
||||||
|
if is_report_by_wap and is_report_by_wap != 2:
|
||||||
|
stmt = stmt.where(ReportUrlModel.is_report_by_wap == is_report_by_wap)
|
||||||
|
total_stmt = total_stmt.where(ReportUrlModel.is_report_by_wap == is_report_by_wap)
|
||||||
|
if has_evidence and has_evidence != 2:
|
||||||
|
stmt = stmt.where(ReportUrlModel.has_evidence == has_evidence)
|
||||||
|
total_stmt = total_stmt.where(ReportUrlModel.has_evidence == has_evidence)
|
||||||
|
|
||||||
|
# 设置分页
|
||||||
|
stmt = stmt.offset((page - 1) * size).limit(size)
|
||||||
|
# logger.debug(f"{str(stmt)=}")
|
||||||
|
try:
|
||||||
|
total = session.exec(total_stmt).first()
|
||||||
|
urls = session.exec(stmt).all()
|
||||||
|
return ApiResult.ok({
|
||||||
|
"total": total,
|
||||||
|
"data": urls,
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"获取URL列表失败: {e}")
|
||||||
|
return ApiResult.error(ApiCode.DB_ERROR.value, str(e))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def add_urls(cls, domain_map: dict[str, int], urls: list[AddUrlItem]) -> ApiResult[Optional[int]]:
|
||||||
|
"""添加URL"""
|
||||||
|
if not urls:
|
||||||
|
return ApiResult.ok(0)
|
||||||
|
|
||||||
|
models = []
|
||||||
|
|
||||||
|
for url in urls:
|
||||||
|
domain_id = domain_map.get(url.domain, None)
|
||||||
|
if not domain_id:
|
||||||
|
return ApiResult.error(ApiCode.PARAM_ERROR.value, f"域名 {url.domain} 不存在")
|
||||||
|
models.append(ReportUrlModel(
|
||||||
|
domain_id=domain_id,
|
||||||
|
domain=url.domain,
|
||||||
|
surl=url.surl,
|
||||||
|
))
|
||||||
|
|
||||||
|
with Session(AppCtx.g_db_engine) as session:
|
||||||
|
try:
|
||||||
|
session.add_all(models)
|
||||||
|
session.commit()
|
||||||
|
return ApiResult.ok(len(models))
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"添加URL失败: {e}")
|
||||||
|
session.rollback()
|
||||||
|
return ApiResult.error(ApiCode.DB_ERROR.value, str(e))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def batch_update_evidence_flag(cls, url_ids: list[int]):
|
||||||
|
"""批量更新URL的has_evidence字段"""
|
||||||
|
with Session(AppCtx.g_db_engine) as session:
|
||||||
|
try:
|
||||||
|
stmt = update(ReportUrlModel).where(ReportUrlModel.id.in_(url_ids)).values(has_evidence=False)
|
||||||
|
session.exec(stmt)
|
||||||
|
session.commit()
|
||||||
|
return ApiResult.ok(len(url_ids))
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"批量更新URL的has_evidence字段失败: {e}")
|
||||||
|
session.rollback()
|
||||||
|
return ApiResult.error(ApiCode.DB_ERROR.value, str(e))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def batch_update_report_flag(cls, ids: list[int], report_by_one: bool, report_by_site: bool, report_by_wap: bool):
|
||||||
|
with Session(AppCtx.g_db_engine) as session:
|
||||||
|
try:
|
||||||
|
stmt = update(ReportUrlModel).where(ReportUrlModel.id.in_(ids))
|
||||||
|
if report_by_wap:
|
||||||
|
stmt = stmt.values(is_report_by_wap=False)
|
||||||
|
if report_by_site:
|
||||||
|
stmt = stmt.values(is_report_by_site=False)
|
||||||
|
if report_by_one:
|
||||||
|
stmt = stmt.values(is_report_by_one=False)
|
||||||
|
|
||||||
|
logger.debug(f"{str(stmt)=}")
|
||||||
|
session.exec(stmt)
|
||||||
|
session.commit()
|
||||||
|
return ApiResult.ok(len(ids))
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"批量更新URL的has_evidence字段失败: {e}")
|
||||||
|
session.rollback()
|
||||||
|
return ApiResult.error(ApiCode.DB_ERROR.value, str(e))
|
||||||
43
app/web/web.py
Normal file
43
app/web/web.py
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
import uvicorn
|
||||||
|
from fastapi import FastAPI, HTTPException
|
||||||
|
from fastapi.responses import FileResponse, Response
|
||||||
|
from loguru import logger
|
||||||
|
from starlette.staticfiles import StaticFiles
|
||||||
|
from starlette.types import Scope
|
||||||
|
|
||||||
|
from .controller.domain import router as domain_router
|
||||||
|
from .controller.report import router as report_router
|
||||||
|
from .controller.status import router as status_router
|
||||||
|
|
||||||
|
|
||||||
|
class SPAStaticFiles(StaticFiles):
|
||||||
|
async def get_response(self, path: str, scope: Scope) -> Response:
|
||||||
|
# 如果是前端路由,直接返回 index.html,否则直接访问的时候会404
|
||||||
|
if path in ("domain", "url"):
|
||||||
|
return FileResponse(os.path.join(self.directory, "index.html"))
|
||||||
|
return await super().get_response(path, scope)
|
||||||
|
|
||||||
|
|
||||||
|
class WebApp:
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.app = FastAPI()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def start():
|
||||||
|
app = FastAPI()
|
||||||
|
|
||||||
|
# 导入路由
|
||||||
|
app.include_router(status_router)
|
||||||
|
app.include_router(report_router)
|
||||||
|
app.include_router(domain_router)
|
||||||
|
|
||||||
|
# 挂载前端文件
|
||||||
|
app.mount("/", SPAStaticFiles(directory="fe/dist", html=True), name="static")
|
||||||
|
|
||||||
|
# TODO 先写死,后面从配置文件里取
|
||||||
|
cfg = uvicorn.Config(app, host="127.0.0.1", port=3000)
|
||||||
|
server = uvicorn.Server(cfg)
|
||||||
|
await server.serve()
|
||||||
@ -15,5 +15,5 @@ database = "baidu_reporter"
|
|||||||
|
|
||||||
# chrome 配置
|
# chrome 配置
|
||||||
[chrome]
|
[chrome]
|
||||||
proxy = "http://127.0.0.1:8080"
|
proxy = "http://127.0.0.1:7890"
|
||||||
browser_path = "C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe"
|
browser_path = "C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe"
|
||||||
9
fe/.editorconfig
Normal file
9
fe/.editorconfig
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
[*.{js,jsx,mjs,cjs,ts,tsx,mts,cts,vue,css,scss,sass,less,styl}]
|
||||||
|
charset = utf-8
|
||||||
|
indent_size = 2
|
||||||
|
indent_style = space
|
||||||
|
insert_final_newline = true
|
||||||
|
trim_trailing_whitespace = true
|
||||||
|
|
||||||
|
end_of_line = lf
|
||||||
|
max_line_length = 100
|
||||||
1
fe/.gitattributes
vendored
Normal file
1
fe/.gitattributes
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
* text=auto eol=lf
|
||||||
30
fe/.gitignore
vendored
Normal file
30
fe/.gitignore
vendored
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
# Logs
|
||||||
|
logs
|
||||||
|
*.log
|
||||||
|
npm-debug.log*
|
||||||
|
yarn-debug.log*
|
||||||
|
yarn-error.log*
|
||||||
|
pnpm-debug.log*
|
||||||
|
lerna-debug.log*
|
||||||
|
|
||||||
|
node_modules
|
||||||
|
.DS_Store
|
||||||
|
|
||||||
|
dist-ssr
|
||||||
|
coverage
|
||||||
|
*.local
|
||||||
|
|
||||||
|
/cypress/videos/
|
||||||
|
/cypress/screenshots/
|
||||||
|
|
||||||
|
# Editor directories and files
|
||||||
|
.vscode/*
|
||||||
|
!.vscode/extensions.json
|
||||||
|
.idea
|
||||||
|
*.suo
|
||||||
|
*.ntvs*
|
||||||
|
*.njsproj
|
||||||
|
*.sln
|
||||||
|
*.sw?
|
||||||
|
|
||||||
|
*.tsbuildinfo
|
||||||
6
fe/.prettierrc.json
Normal file
6
fe/.prettierrc.json
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
{
|
||||||
|
"$schema": "https://json.schemastore.org/prettierrc",
|
||||||
|
"semi": false,
|
||||||
|
"singleQuote": true,
|
||||||
|
"printWidth": 100
|
||||||
|
}
|
||||||
8
fe/.vscode/extensions.json
vendored
Normal file
8
fe/.vscode/extensions.json
vendored
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"recommendations": [
|
||||||
|
"Vue.volar",
|
||||||
|
"dbaeumer.vscode-eslint",
|
||||||
|
"EditorConfig.EditorConfig",
|
||||||
|
"esbenp.prettier-vscode"
|
||||||
|
]
|
||||||
|
}
|
||||||
39
fe/README.md
Normal file
39
fe/README.md
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
# fe
|
||||||
|
|
||||||
|
This template should help get you started developing with Vue 3 in Vite.
|
||||||
|
|
||||||
|
## Recommended IDE Setup
|
||||||
|
|
||||||
|
[VSCode](https://code.visualstudio.com/) + [Volar](https://marketplace.visualstudio.com/items?itemName=Vue.volar) (and disable Vetur).
|
||||||
|
|
||||||
|
## Type Support for `.vue` Imports in TS
|
||||||
|
|
||||||
|
TypeScript cannot handle type information for `.vue` imports by default, so we replace the `tsc` CLI with `vue-tsc` for type checking. In editors, we need [Volar](https://marketplace.visualstudio.com/items?itemName=Vue.volar) to make the TypeScript language service aware of `.vue` types.
|
||||||
|
|
||||||
|
## Customize configuration
|
||||||
|
|
||||||
|
See [Vite Configuration Reference](https://vite.dev/config/).
|
||||||
|
|
||||||
|
## Project Setup
|
||||||
|
|
||||||
|
```sh
|
||||||
|
pnpm install
|
||||||
|
```
|
||||||
|
|
||||||
|
### Compile and Hot-Reload for Development
|
||||||
|
|
||||||
|
```sh
|
||||||
|
pnpm dev
|
||||||
|
```
|
||||||
|
|
||||||
|
### Type-Check, Compile and Minify for Production
|
||||||
|
|
||||||
|
```sh
|
||||||
|
pnpm build
|
||||||
|
```
|
||||||
|
|
||||||
|
### Lint with [ESLint](https://eslint.org/)
|
||||||
|
|
||||||
|
```sh
|
||||||
|
pnpm lint
|
||||||
|
```
|
||||||
75
fe/auto-imports.d.ts
vendored
Normal file
75
fe/auto-imports.d.ts
vendored
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
/* eslint-disable */
|
||||||
|
/* prettier-ignore */
|
||||||
|
// @ts-nocheck
|
||||||
|
// noinspection JSUnusedGlobalSymbols
|
||||||
|
// Generated by unplugin-auto-import
|
||||||
|
// biome-ignore lint: disable
|
||||||
|
export {}
|
||||||
|
declare global {
|
||||||
|
const EffectScope: typeof import('vue')['EffectScope']
|
||||||
|
const computed: typeof import('vue')['computed']
|
||||||
|
const createApp: typeof import('vue')['createApp']
|
||||||
|
const customRef: typeof import('vue')['customRef']
|
||||||
|
const defineAsyncComponent: typeof import('vue')['defineAsyncComponent']
|
||||||
|
const defineComponent: typeof import('vue')['defineComponent']
|
||||||
|
const effectScope: typeof import('vue')['effectScope']
|
||||||
|
const getCurrentInstance: typeof import('vue')['getCurrentInstance']
|
||||||
|
const getCurrentScope: typeof import('vue')['getCurrentScope']
|
||||||
|
const h: typeof import('vue')['h']
|
||||||
|
const inject: typeof import('vue')['inject']
|
||||||
|
const isProxy: typeof import('vue')['isProxy']
|
||||||
|
const isReactive: typeof import('vue')['isReactive']
|
||||||
|
const isReadonly: typeof import('vue')['isReadonly']
|
||||||
|
const isRef: typeof import('vue')['isRef']
|
||||||
|
const markRaw: typeof import('vue')['markRaw']
|
||||||
|
const nextTick: typeof import('vue')['nextTick']
|
||||||
|
const onActivated: typeof import('vue')['onActivated']
|
||||||
|
const onBeforeMount: typeof import('vue')['onBeforeMount']
|
||||||
|
const onBeforeUnmount: typeof import('vue')['onBeforeUnmount']
|
||||||
|
const onBeforeUpdate: typeof import('vue')['onBeforeUpdate']
|
||||||
|
const onDeactivated: typeof import('vue')['onDeactivated']
|
||||||
|
const onErrorCaptured: typeof import('vue')['onErrorCaptured']
|
||||||
|
const onMounted: typeof import('vue')['onMounted']
|
||||||
|
const onRenderTracked: typeof import('vue')['onRenderTracked']
|
||||||
|
const onRenderTriggered: typeof import('vue')['onRenderTriggered']
|
||||||
|
const onScopeDispose: typeof import('vue')['onScopeDispose']
|
||||||
|
const onServerPrefetch: typeof import('vue')['onServerPrefetch']
|
||||||
|
const onUnmounted: typeof import('vue')['onUnmounted']
|
||||||
|
const onUpdated: typeof import('vue')['onUpdated']
|
||||||
|
const onWatcherCleanup: typeof import('vue')['onWatcherCleanup']
|
||||||
|
const provide: typeof import('vue')['provide']
|
||||||
|
const reactive: typeof import('vue')['reactive']
|
||||||
|
const readonly: typeof import('vue')['readonly']
|
||||||
|
const ref: typeof import('vue')['ref']
|
||||||
|
const resolveComponent: typeof import('vue')['resolveComponent']
|
||||||
|
const shallowReactive: typeof import('vue')['shallowReactive']
|
||||||
|
const shallowReadonly: typeof import('vue')['shallowReadonly']
|
||||||
|
const shallowRef: typeof import('vue')['shallowRef']
|
||||||
|
const toRaw: typeof import('vue')['toRaw']
|
||||||
|
const toRef: typeof import('vue')['toRef']
|
||||||
|
const toRefs: typeof import('vue')['toRefs']
|
||||||
|
const toValue: typeof import('vue')['toValue']
|
||||||
|
const triggerRef: typeof import('vue')['triggerRef']
|
||||||
|
const unref: typeof import('vue')['unref']
|
||||||
|
const useAttrs: typeof import('vue')['useAttrs']
|
||||||
|
const useCssModule: typeof import('vue')['useCssModule']
|
||||||
|
const useCssVars: typeof import('vue')['useCssVars']
|
||||||
|
const useDialog: typeof import('naive-ui')['useDialog']
|
||||||
|
const useId: typeof import('vue')['useId']
|
||||||
|
const useLoadingBar: typeof import('naive-ui')['useLoadingBar']
|
||||||
|
const useMessage: typeof import('naive-ui')['useMessage']
|
||||||
|
const useModel: typeof import('vue')['useModel']
|
||||||
|
const useNotification: typeof import('naive-ui')['useNotification']
|
||||||
|
const useSlots: typeof import('vue')['useSlots']
|
||||||
|
const useTemplateRef: typeof import('vue')['useTemplateRef']
|
||||||
|
const watch: typeof import('vue')['watch']
|
||||||
|
const watchEffect: typeof import('vue')['watchEffect']
|
||||||
|
const watchPostEffect: typeof import('vue')['watchPostEffect']
|
||||||
|
const watchSyncEffect: typeof import('vue')['watchSyncEffect']
|
||||||
|
}
|
||||||
|
// for type re-export
|
||||||
|
declare global {
|
||||||
|
// @ts-ignore
|
||||||
|
export type { Component, ComponentPublicInstance, ComputedRef, DirectiveBinding, ExtractDefaultPropTypes, ExtractPropTypes, ExtractPublicPropTypes, InjectionKey, PropType, Ref, MaybeRef, MaybeRefOrGetter, VNode, WritableComputedRef } from 'vue'
|
||||||
|
import('vue')
|
||||||
|
}
|
||||||
35
fe/components.d.ts
vendored
Normal file
35
fe/components.d.ts
vendored
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
/* eslint-disable */
|
||||||
|
// @ts-nocheck
|
||||||
|
// Generated by unplugin-vue-components
|
||||||
|
// Read more: https://github.com/vuejs/core/pull/3399
|
||||||
|
// biome-ignore lint: disable
|
||||||
|
export {}
|
||||||
|
|
||||||
|
/* prettier-ignore */
|
||||||
|
declare module 'vue' {
|
||||||
|
export interface GlobalComponents {
|
||||||
|
AddDomainDialog: typeof import('./src/components/AddDomainDialog.vue')['default']
|
||||||
|
EditDomainDialog: typeof import('./src/components/EditDomainDialog.vue')['default']
|
||||||
|
ImportDomainDialog: typeof import('./src/components/ImportDomainDialog.vue')['default']
|
||||||
|
NButton: typeof import('naive-ui')['NButton']
|
||||||
|
NCard: typeof import('naive-ui')['NCard']
|
||||||
|
NCheckbox: typeof import('naive-ui')['NCheckbox']
|
||||||
|
NDataTable: typeof import('naive-ui')['NDataTable']
|
||||||
|
NDialogProvider: typeof import('naive-ui')['NDialogProvider']
|
||||||
|
NDropdown: typeof import('naive-ui')['NDropdown']
|
||||||
|
NForm: typeof import('naive-ui')['NForm']
|
||||||
|
NFormItem: typeof import('naive-ui')['NFormItem']
|
||||||
|
NFormItemGi: typeof import('naive-ui')['NFormItemGi']
|
||||||
|
NGrid: typeof import('naive-ui')['NGrid']
|
||||||
|
NInput: typeof import('naive-ui')['NInput']
|
||||||
|
NInputNumber: typeof import('naive-ui')['NInputNumber']
|
||||||
|
NModal: typeof import('naive-ui')['NModal']
|
||||||
|
NPagination: typeof import('naive-ui')['NPagination']
|
||||||
|
NSelect: typeof import('naive-ui')['NSelect']
|
||||||
|
NSpace: typeof import('naive-ui')['NSpace']
|
||||||
|
NTag: typeof import('naive-ui')['NTag']
|
||||||
|
NTooltip: typeof import('naive-ui')['NTooltip']
|
||||||
|
RouterLink: typeof import('vue-router')['RouterLink']
|
||||||
|
RouterView: typeof import('vue-router')['RouterView']
|
||||||
|
}
|
||||||
|
}
|
||||||
483
fe/dist/assets/DomainManager-2SUOMVR8.js
vendored
Normal file
483
fe/dist/assets/DomainManager-2SUOMVR8.js
vendored
Normal file
File diff suppressed because one or more lines are too long
1525
fe/dist/assets/FormItem-DHmVxm6n.js
vendored
Normal file
1525
fe/dist/assets/FormItem-DHmVxm6n.js
vendored
Normal file
File diff suppressed because one or more lines are too long
BIN
fe/dist/assets/IBMPlexMono-Regular-CAJ2AE84.ttf
vendored
Normal file
BIN
fe/dist/assets/IBMPlexMono-Regular-CAJ2AE84.ttf
vendored
Normal file
Binary file not shown.
BIN
fe/dist/assets/LatoLatin-Regular-Dmlz1U0B.woff2
vendored
Normal file
BIN
fe/dist/assets/LatoLatin-Regular-Dmlz1U0B.woff2
vendored
Normal file
Binary file not shown.
BIN
fe/dist/assets/LatoLatin-Semibold-Dbk81p2D.woff2
vendored
Normal file
BIN
fe/dist/assets/LatoLatin-Semibold-Dbk81p2D.woff2
vendored
Normal file
Binary file not shown.
1
fe/dist/assets/UrlManager-DG8i4_QJ.js
vendored
Normal file
1
fe/dist/assets/UrlManager-DG8i4_QJ.js
vendored
Normal file
File diff suppressed because one or more lines are too long
1
fe/dist/assets/index-Ca0u0JBQ.css
vendored
Normal file
1
fe/dist/assets/index-Ca0u0JBQ.css
vendored
Normal file
File diff suppressed because one or more lines are too long
1161
fe/dist/assets/index-DnpM0Ntg.js
vendored
Normal file
1161
fe/dist/assets/index-DnpM0Ntg.js
vendored
Normal file
File diff suppressed because one or more lines are too long
BIN
fe/dist/favicon.ico
vendored
Normal file
BIN
fe/dist/favicon.ico
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 4.2 KiB |
14
fe/dist/index.html
vendored
Normal file
14
fe/dist/index.html
vendored
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<link rel="icon" href="/favicon.ico">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Baidu Reporter</title>
|
||||||
|
<script type="module" crossorigin src="/assets/index-DnpM0Ntg.js"></script>
|
||||||
|
<link rel="stylesheet" crossorigin href="/assets/index-Ca0u0JBQ.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div id="app"></div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
1
fe/env.d.ts
vendored
Normal file
1
fe/env.d.ts
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
/// <reference types="vite/client" />
|
||||||
22
fe/eslint.config.ts
Normal file
22
fe/eslint.config.ts
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
import { globalIgnores } from 'eslint/config'
|
||||||
|
import { defineConfigWithVueTs, vueTsConfigs } from '@vue/eslint-config-typescript'
|
||||||
|
import pluginVue from 'eslint-plugin-vue'
|
||||||
|
import skipFormatting from '@vue/eslint-config-prettier/skip-formatting'
|
||||||
|
|
||||||
|
// To allow more languages other than `ts` in `.vue` files, uncomment the following lines:
|
||||||
|
// import { configureVueProject } from '@vue/eslint-config-typescript'
|
||||||
|
// configureVueProject({ scriptLangs: ['ts', 'tsx'] })
|
||||||
|
// More info at https://github.com/vuejs/eslint-config-typescript/#advanced-setup
|
||||||
|
|
||||||
|
export default defineConfigWithVueTs(
|
||||||
|
{
|
||||||
|
name: 'app/files-to-lint',
|
||||||
|
files: ['**/*.{ts,mts,tsx,vue}'],
|
||||||
|
},
|
||||||
|
|
||||||
|
globalIgnores(['**/dist/**', '**/dist-ssr/**', '**/coverage/**']),
|
||||||
|
|
||||||
|
pluginVue.configs['flat/essential'],
|
||||||
|
vueTsConfigs.recommended,
|
||||||
|
skipFormatting,
|
||||||
|
)
|
||||||
13
fe/index.html
Normal file
13
fe/index.html
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<link rel="icon" href="/favicon.ico">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Vite App</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div id="app"></div>
|
||||||
|
<script type="module" src="/src/main.ts"></script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
46
fe/package.json
Normal file
46
fe/package.json
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
{
|
||||||
|
"name": "fe",
|
||||||
|
"version": "0.0.0",
|
||||||
|
"private": true,
|
||||||
|
"type": "module",
|
||||||
|
"scripts": {
|
||||||
|
"dev": "vite",
|
||||||
|
"build": "run-p type-check \"build-only {@}\" --",
|
||||||
|
"preview": "vite preview",
|
||||||
|
"build-only": "vite build",
|
||||||
|
"type-check": "vue-tsc --build",
|
||||||
|
"lint": "eslint . --fix",
|
||||||
|
"format": "prettier --write src/"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"@tailwindcss/vite": "^4.1.1",
|
||||||
|
"axios": "^1.8.4",
|
||||||
|
"pinia": "^3.0.1",
|
||||||
|
"tailwindcss": "^4.1.1",
|
||||||
|
"vue": "^3.5.13",
|
||||||
|
"vue-router": "^4.5.0"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"@tsconfig/node22": "^22.0.1",
|
||||||
|
"@types/node": "^22.13.14",
|
||||||
|
"@vicons/ionicons5": "^0.13.0",
|
||||||
|
"@vitejs/plugin-vue": "^5.2.3",
|
||||||
|
"@vitejs/plugin-vue-jsx": "^4.1.2",
|
||||||
|
"@vue/eslint-config-prettier": "^10.2.0",
|
||||||
|
"@vue/eslint-config-typescript": "^14.5.0",
|
||||||
|
"@vue/tsconfig": "^0.7.0",
|
||||||
|
"eslint": "^9.22.0",
|
||||||
|
"eslint-plugin-vue": "~10.0.0",
|
||||||
|
"jiti": "^2.4.2",
|
||||||
|
"naive-ui": "^2.41.0",
|
||||||
|
"npm-run-all2": "^7.0.2",
|
||||||
|
"prettier": "3.5.3",
|
||||||
|
"typescript": "~5.8.0",
|
||||||
|
"unplugin-auto-import": "^19.1.2",
|
||||||
|
"unplugin-vue-components": "^28.4.1",
|
||||||
|
"vfonts": "^0.0.3",
|
||||||
|
"vite": "^6.2.4",
|
||||||
|
"vite-plugin-vue-devtools": "^7.7.2",
|
||||||
|
"vue-tsc": "^2.2.8"
|
||||||
|
}
|
||||||
|
}
|
||||||
4097
fe/pnpm-lock.yaml
generated
Normal file
4097
fe/pnpm-lock.yaml
generated
Normal file
File diff suppressed because it is too large
Load Diff
BIN
fe/public/favicon.ico
Normal file
BIN
fe/public/favicon.ico
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 4.2 KiB |
72
fe/src/App.vue
Normal file
72
fe/src/App.vue
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
<script setup lang="tsx">
|
||||||
|
import { RouterLink, RouterView } from 'vue-router'
|
||||||
|
import {
|
||||||
|
NLayout,
|
||||||
|
NMessageProvider,
|
||||||
|
NConfigProvider,
|
||||||
|
NLayoutHeader,
|
||||||
|
NLayoutSider,
|
||||||
|
NMenu,
|
||||||
|
} from 'naive-ui'
|
||||||
|
import { type MenuOption } from 'naive-ui'
|
||||||
|
import { List, ColorWand } from '@vicons/ionicons5'
|
||||||
|
|
||||||
|
// 点击菜单时触发的操作,TODO 需要高亮对应的菜单项
|
||||||
|
const handleUpdateValue = (v: string) => {
|
||||||
|
console.log('handleUpdateValue: v')
|
||||||
|
}
|
||||||
|
|
||||||
|
// 菜单定义
|
||||||
|
const menuOpts: MenuOption[] = [
|
||||||
|
{
|
||||||
|
label: () => <RouterLink to={{ name: 'domain-manager' }}>域名管理</RouterLink>,
|
||||||
|
key: 'rule-manager',
|
||||||
|
icon: () => <List />,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: () => <RouterLink to={{ name: 'url-manager' }}>URL 管理</RouterLink>,
|
||||||
|
key: 'rule-sniff',
|
||||||
|
icon: () => <ColorWand />,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<template>
|
||||||
|
<n-config-provider>
|
||||||
|
<n-dialog-provider>
|
||||||
|
<n-message-provider>
|
||||||
|
<n-layout class="h-screen">
|
||||||
|
<!-- header -->
|
||||||
|
<n-layout-header
|
||||||
|
class="h-16 p-5"
|
||||||
|
style="background-color: oklch(62.3% 0.214 259.815); color: white"
|
||||||
|
bordered
|
||||||
|
>
|
||||||
|
<span class="font-bold text-xl">BAIDU Reporter</span>
|
||||||
|
</n-layout-header>
|
||||||
|
|
||||||
|
<n-layout position="absolute" has-sider style="top: 64px">
|
||||||
|
<!-- sidebar -->
|
||||||
|
<n-layout-sider
|
||||||
|
width="8%"
|
||||||
|
show-trigger
|
||||||
|
show-collapsed-content
|
||||||
|
:collapsed-width="64"
|
||||||
|
content-style="padding: 8px; text-align:center;"
|
||||||
|
:native-scrollbar="false"
|
||||||
|
bordered
|
||||||
|
collapse-mode="width"
|
||||||
|
>
|
||||||
|
<n-menu :indent="24" :options="menuOpts" @update:value="handleUpdateValue" />
|
||||||
|
</n-layout-sider>
|
||||||
|
|
||||||
|
<!-- content -->
|
||||||
|
<n-layout content-style="padding: 16px;" :native-scrollbar="false">
|
||||||
|
<router-view />
|
||||||
|
</n-layout>
|
||||||
|
</n-layout>
|
||||||
|
</n-layout>
|
||||||
|
</n-message-provider>
|
||||||
|
</n-dialog-provider>
|
||||||
|
</n-config-provider>
|
||||||
|
</template>
|
||||||
141
fe/src/components/AddDomainDialog.vue
Normal file
141
fe/src/components/AddDomainDialog.vue
Normal file
@ -0,0 +1,141 @@
|
|||||||
|
<script setup lang="ts">
|
||||||
|
import { ref, inject } from 'vue'
|
||||||
|
import {
|
||||||
|
NModal,
|
||||||
|
NForm,
|
||||||
|
NFormItem,
|
||||||
|
NInputNumber,
|
||||||
|
NCheckbox,
|
||||||
|
NInput,
|
||||||
|
NButton,
|
||||||
|
NButtonGroup,
|
||||||
|
useMessage,
|
||||||
|
} from 'naive-ui'
|
||||||
|
import type { FormRules } from 'naive-ui'
|
||||||
|
import type { AxiosInstance } from 'axios'
|
||||||
|
|
||||||
|
const model = defineModel<boolean>('show', { required: true })
|
||||||
|
const emit = defineEmits(['success'])
|
||||||
|
|
||||||
|
const axios = inject('axios') as AxiosInstance
|
||||||
|
const message = useMessage()
|
||||||
|
|
||||||
|
const interval = ref<number>(1440) // 默认1天(1440分钟)
|
||||||
|
const startImmediately = ref(true)
|
||||||
|
const domains = ref('')
|
||||||
|
const formRef = ref<InstanceType<typeof NForm> | null>(null)
|
||||||
|
|
||||||
|
const rules: FormRules = {
|
||||||
|
interval: [
|
||||||
|
{
|
||||||
|
required: true,
|
||||||
|
type: 'number',
|
||||||
|
message: '请输入采集间隔',
|
||||||
|
trigger: ['blur', 'change'],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'number',
|
||||||
|
min: 1,
|
||||||
|
message: '采集间隔必须大于0',
|
||||||
|
trigger: ['blur', 'change'],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
domains: [
|
||||||
|
{
|
||||||
|
required: true,
|
||||||
|
message: '请输入域名',
|
||||||
|
trigger: ['blur', 'change'],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
validator: (_, value: string) => {
|
||||||
|
if (!value.trim()) return true // 空值由 required 规则处理
|
||||||
|
const domainList = value
|
||||||
|
.split(/[\n,]/)
|
||||||
|
.map((d) => d.trim())
|
||||||
|
.filter((d) => d)
|
||||||
|
if (domainList.length === 0) return false
|
||||||
|
return true
|
||||||
|
},
|
||||||
|
message: '域名格式不正确',
|
||||||
|
trigger: ['blur', 'change'],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleConfirm = async () => {
|
||||||
|
try {
|
||||||
|
await formRef.value?.validate()
|
||||||
|
} catch (errors) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// 处理域名输入,支持换行和逗号分隔
|
||||||
|
const domainList = domains.value
|
||||||
|
.split(/[\n,]/) // 同时支持换行和逗号分隔
|
||||||
|
.map((domain) => domain.trim()) // 去除每个域名的前后空格
|
||||||
|
.filter((domain) => domain) // 过滤空字符串
|
||||||
|
|
||||||
|
const response = await axios.post('/api/domain/v1/add', {
|
||||||
|
domains: domainList,
|
||||||
|
crawl_interval: interval.value,
|
||||||
|
crawl_now: startImmediately.value,
|
||||||
|
})
|
||||||
|
|
||||||
|
if (response.data.code === 20000) {
|
||||||
|
message.success('添加成功')
|
||||||
|
emit('success')
|
||||||
|
handleClose()
|
||||||
|
} else {
|
||||||
|
message.error(`添加失败:${response.data.message}`)
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error('添加失败', error)
|
||||||
|
message.error(`添加失败:${error}`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleClose = () => {
|
||||||
|
// 清空表单
|
||||||
|
interval.value = 1440
|
||||||
|
startImmediately.value = true
|
||||||
|
domains.value = ''
|
||||||
|
formRef.value?.restoreValidation()
|
||||||
|
model.value = false
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<template>
|
||||||
|
<n-modal
|
||||||
|
v-model:show="model"
|
||||||
|
preset="card"
|
||||||
|
title="手动添加"
|
||||||
|
:mask-closable="false"
|
||||||
|
style="width: 600px"
|
||||||
|
>
|
||||||
|
<n-form size="small" ref="formRef" :model="{ interval, domains }" :rules="rules">
|
||||||
|
<n-form-item path="interval" label="采集间隔(分钟)">
|
||||||
|
<n-input-number v-model:value="interval" :min="1" />
|
||||||
|
</n-form-item>
|
||||||
|
|
||||||
|
<n-form-item path="domains" label="域名列表">
|
||||||
|
<n-input
|
||||||
|
v-model:value="domains"
|
||||||
|
type="textarea"
|
||||||
|
:rows="10"
|
||||||
|
placeholder="请输入域名,支持换行或英文逗号分隔"
|
||||||
|
/>
|
||||||
|
</n-form-item>
|
||||||
|
|
||||||
|
<n-form-item label="采集选项">
|
||||||
|
<n-checkbox v-model:checked="startImmediately"> 立即开始采集 </n-checkbox>
|
||||||
|
</n-form-item>
|
||||||
|
</n-form>
|
||||||
|
<template #action>
|
||||||
|
<n-button-group size="small">
|
||||||
|
<n-button type="primary" @click="handleConfirm">确认</n-button>
|
||||||
|
<n-button @click="handleClose">关闭</n-button>
|
||||||
|
</n-button-group>
|
||||||
|
</template>
|
||||||
|
</n-modal>
|
||||||
|
</template>
|
||||||
115
fe/src/components/EditDomainDialog.vue
Normal file
115
fe/src/components/EditDomainDialog.vue
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
<script setup lang="ts">
|
||||||
|
import { ref, defineProps, defineEmits, inject, defineModel, computed, watch } from 'vue'
|
||||||
|
import { useMessage } from 'naive-ui'
|
||||||
|
import type { AxiosInstance } from 'axios'
|
||||||
|
import type { DataTableRowKey } from 'naive-ui'
|
||||||
|
|
||||||
|
const show = defineModel<boolean>('show')
|
||||||
|
const props = defineProps<{
|
||||||
|
// 要修改的域名 ID 列表
|
||||||
|
domainIds: DataTableRowKey[] | null
|
||||||
|
}>()
|
||||||
|
|
||||||
|
const emit = defineEmits<{
|
||||||
|
(e: 'success'): void
|
||||||
|
(e: 'close'): void
|
||||||
|
}>()
|
||||||
|
|
||||||
|
const axios = inject('axios') as AxiosInstance
|
||||||
|
const message = useMessage()
|
||||||
|
|
||||||
|
// 采集间隔需要用户输入
|
||||||
|
const crawlInterval = ref<number | null>(null)
|
||||||
|
const loading = ref(false)
|
||||||
|
|
||||||
|
// 动态计算弹窗标题
|
||||||
|
const dialogTitle = computed(() => {
|
||||||
|
const count = props.domainIds?.length || 0
|
||||||
|
if (count > 1) {
|
||||||
|
return `批量修改 ${count} 个域名的采集间隔`
|
||||||
|
} else if (count === 1) {
|
||||||
|
return '修改域名采集间隔'
|
||||||
|
}
|
||||||
|
return '修改采集间隔' // 应该不会出现,但作为备用
|
||||||
|
})
|
||||||
|
|
||||||
|
// 弹窗显示时,重置 crawlInterval
|
||||||
|
watch(show, (newShow) => {
|
||||||
|
if (newShow) {
|
||||||
|
crawlInterval.value = null // 每次打开都清空,强制用户输入
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
const handleSubmit = async () => {
|
||||||
|
if (crawlInterval.value === null || crawlInterval.value < 1) {
|
||||||
|
message.error('请输入有效的采集间隔(大于等于1的整数)')
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!props.domainIds || props.domainIds.length === 0) {
|
||||||
|
message.error('没有指定要修改的域名')
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
loading.value = true
|
||||||
|
|
||||||
|
const response = (
|
||||||
|
await axios.post('/api/domain/v1/update', {
|
||||||
|
domain_ids: props.domainIds, // 直接使用传入的 ID
|
||||||
|
crawl_interval: crawlInterval.value,
|
||||||
|
})
|
||||||
|
).data
|
||||||
|
|
||||||
|
if (response.code !== 20000) {
|
||||||
|
message.error(`更新失败:${response.message}`)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
message.success('更新成功')
|
||||||
|
emit('success')
|
||||||
|
show.value = false // 关闭弹窗
|
||||||
|
} catch (error) {
|
||||||
|
console.error('更新失败', error)
|
||||||
|
message.error(`更新失败:${error}`)
|
||||||
|
} finally {
|
||||||
|
loading.value = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleClose = () => {
|
||||||
|
show.value = false
|
||||||
|
emit('close') // 触发 close 事件
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<template>
|
||||||
|
<n-modal
|
||||||
|
v-model:show="show"
|
||||||
|
preset="dialog"
|
||||||
|
:title="dialogTitle"
|
||||||
|
:loading="loading"
|
||||||
|
@close="handleClose"
|
||||||
|
>
|
||||||
|
<!-- 批量编辑提示 -->
|
||||||
|
<div v-if="(domainIds?.length || 0) > 1" class="mb-4 text-orange-500">
|
||||||
|
你正在批量修改 {{ domainIds?.length }} 个域名的采集间隔。
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<n-form>
|
||||||
|
<n-form-item label="采集间隔(分钟)" required>
|
||||||
|
<n-input-number
|
||||||
|
v-model:value="crawlInterval"
|
||||||
|
:min="1"
|
||||||
|
:step="1"
|
||||||
|
style="width: 100%"
|
||||||
|
placeholder="请输入采集间隔"
|
||||||
|
/>
|
||||||
|
</n-form-item>
|
||||||
|
</n-form>
|
||||||
|
<template #action>
|
||||||
|
<n-button @click="handleClose">取消</n-button>
|
||||||
|
<n-button type="primary" @click="handleSubmit" :loading="loading">确定</n-button>
|
||||||
|
</template>
|
||||||
|
</n-modal>
|
||||||
|
</template>
|
||||||
144
fe/src/components/ImportDomainDialog.vue
Normal file
144
fe/src/components/ImportDomainDialog.vue
Normal file
@ -0,0 +1,144 @@
|
|||||||
|
<script setup lang="ts">
|
||||||
|
import { ref, inject } from 'vue'
|
||||||
|
import {
|
||||||
|
NModal,
|
||||||
|
NForm,
|
||||||
|
NFormItem,
|
||||||
|
NInputNumber,
|
||||||
|
NCheckbox,
|
||||||
|
NUpload,
|
||||||
|
NButton,
|
||||||
|
NButtonGroup,
|
||||||
|
useMessage,
|
||||||
|
} from 'naive-ui'
|
||||||
|
import type { FormRules } from 'naive-ui'
|
||||||
|
import type { AxiosInstance } from 'axios'
|
||||||
|
import type { UploadFileInfo } from 'naive-ui'
|
||||||
|
|
||||||
|
const model = defineModel<boolean>('show', { required: true })
|
||||||
|
const emit = defineEmits(['success'])
|
||||||
|
|
||||||
|
const axios = inject('axios') as AxiosInstance
|
||||||
|
const message = useMessage()
|
||||||
|
|
||||||
|
const interval = ref(1440) // 默认1天(1440分钟)
|
||||||
|
const startImmediately = ref(true)
|
||||||
|
const fileList = ref<UploadFileInfo[]>([])
|
||||||
|
const formRef = ref<InstanceType<typeof NForm> | null>(null)
|
||||||
|
|
||||||
|
const rules: FormRules = {
|
||||||
|
interval: [
|
||||||
|
{
|
||||||
|
type: 'number',
|
||||||
|
required: true,
|
||||||
|
message: '请输入采集间隔',
|
||||||
|
trigger: ['blur', 'change'],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'number',
|
||||||
|
min: 1,
|
||||||
|
message: '采集间隔必须大于0',
|
||||||
|
trigger: ['blur', 'change'],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
fileList: [
|
||||||
|
{
|
||||||
|
type: 'array',
|
||||||
|
required: true,
|
||||||
|
message: '请选择文件',
|
||||||
|
trigger: ['change'],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
validator: (_, value: UploadFileInfo[]) => {
|
||||||
|
if (!value || value.length === 0) return false
|
||||||
|
return !!value[0].file
|
||||||
|
},
|
||||||
|
message: '文件无效',
|
||||||
|
trigger: ['change'],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleConfirm = async () => {
|
||||||
|
try {
|
||||||
|
await formRef.value?.validate()
|
||||||
|
} catch (errors) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const formData = new FormData()
|
||||||
|
const file = fileList.value[0].file
|
||||||
|
if (!file) {
|
||||||
|
message.error('文件无效')
|
||||||
|
return
|
||||||
|
}
|
||||||
|
formData.append('file', file)
|
||||||
|
formData.append('crawl_interval', interval.value.toString())
|
||||||
|
formData.append('crawl_now', startImmediately.value.toString())
|
||||||
|
|
||||||
|
const response = await axios.post('/api/domain/v1/import', formData, {
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'multipart/form-data',
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
if (response.data.code === 20000) {
|
||||||
|
message.success('导入成功')
|
||||||
|
emit('success')
|
||||||
|
handleClose()
|
||||||
|
} else {
|
||||||
|
message.error(`导入失败:${response.data.message}`)
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error('导入失败', error)
|
||||||
|
message.error(`导入失败:${error}`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleClose = () => {
|
||||||
|
// 清空表单
|
||||||
|
interval.value = 1440
|
||||||
|
startImmediately.value = true
|
||||||
|
fileList.value = []
|
||||||
|
formRef.value?.restoreValidation()
|
||||||
|
model.value = false
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<template>
|
||||||
|
<n-modal
|
||||||
|
v-model:show="model"
|
||||||
|
preset="card"
|
||||||
|
title="通过文件导入"
|
||||||
|
:mask-closable="false"
|
||||||
|
style="width: 600px"
|
||||||
|
>
|
||||||
|
<n-form
|
||||||
|
size="small"
|
||||||
|
ref="formRef"
|
||||||
|
:model="{ interval, fileList }"
|
||||||
|
:rules="rules"
|
||||||
|
label-placement="left"
|
||||||
|
label-width="200"
|
||||||
|
>
|
||||||
|
<n-form-item path="interval" label="采集间隔(分钟)">
|
||||||
|
<n-input-number v-model:value="interval" :min="1" />
|
||||||
|
</n-form-item>
|
||||||
|
<n-form-item path="fileList" label="选择文件">
|
||||||
|
<n-upload v-model:file-list="fileList" :max="1" accept=".txt,.csv">
|
||||||
|
<n-button>选择文件</n-button>
|
||||||
|
</n-upload>
|
||||||
|
</n-form-item>
|
||||||
|
<n-form-item label="采集选项">
|
||||||
|
<n-checkbox v-model:checked="startImmediately"> 立即开始采集 </n-checkbox>
|
||||||
|
</n-form-item>
|
||||||
|
</n-form>
|
||||||
|
<template #action>
|
||||||
|
<n-button-group size="small">
|
||||||
|
<n-button type="primary" @click="handleConfirm">确认</n-button>
|
||||||
|
<n-button @click="handleClose">关闭</n-button>
|
||||||
|
</n-button-group>
|
||||||
|
</template>
|
||||||
|
</n-modal>
|
||||||
|
</template>
|
||||||
1
fe/src/main.css
Normal file
1
fe/src/main.css
Normal file
@ -0,0 +1 @@
|
|||||||
|
@import 'tailwindcss';
|
||||||
26
fe/src/main.ts
Normal file
26
fe/src/main.ts
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
import './main.css'
|
||||||
|
|
||||||
|
import { createApp } from 'vue'
|
||||||
|
import { createPinia } from 'pinia'
|
||||||
|
|
||||||
|
import App from './App.vue'
|
||||||
|
import router from './router'
|
||||||
|
|
||||||
|
import axios from 'axios'
|
||||||
|
|
||||||
|
import 'vfonts/Lato.css'
|
||||||
|
import 'vfonts/IBMPlexMono.css'
|
||||||
|
|
||||||
|
const app = createApp(App)
|
||||||
|
|
||||||
|
app.use(createPinia())
|
||||||
|
app.use(router)
|
||||||
|
|
||||||
|
const axiosInstance = axios.create({
|
||||||
|
withCredentials: true,
|
||||||
|
timeout: 9000,
|
||||||
|
timeoutErrorMessage: 'E_NETWORK_TIMEOUT',
|
||||||
|
})
|
||||||
|
app.provide('axios', axiosInstance)
|
||||||
|
|
||||||
|
app.mount('#app')
|
||||||
23
fe/src/router/index.ts
Normal file
23
fe/src/router/index.ts
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
import { createRouter, createWebHistory } from 'vue-router'
|
||||||
|
|
||||||
|
const router = createRouter({
|
||||||
|
history: createWebHistory(import.meta.env.BASE_URL),
|
||||||
|
routes: [
|
||||||
|
{
|
||||||
|
path: '/',
|
||||||
|
redirect: '/domain',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
path: '/domain',
|
||||||
|
name: 'domain-manager',
|
||||||
|
component: () => import('../views/DomainManager.vue'),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
path: '/url',
|
||||||
|
name: 'url-manager',
|
||||||
|
component: () => import('../views/UrlManager.vue'),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
})
|
||||||
|
|
||||||
|
export default router
|
||||||
12
fe/src/stores/counter.ts
Normal file
12
fe/src/stores/counter.ts
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
import { ref, computed } from 'vue'
|
||||||
|
import { defineStore } from 'pinia'
|
||||||
|
|
||||||
|
export const useCounterStore = defineStore('counter', () => {
|
||||||
|
const count = ref(0)
|
||||||
|
const doubleCount = computed(() => count.value * 2)
|
||||||
|
function increment() {
|
||||||
|
count.value++
|
||||||
|
}
|
||||||
|
|
||||||
|
return { count, doubleCount, increment }
|
||||||
|
})
|
||||||
5
fe/src/utils/common.ts
Normal file
5
fe/src/utils/common.ts
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
const convertTimestampToDate = (timestamp: number) => {
|
||||||
|
return new Date(timestamp * 1000).toLocaleString()
|
||||||
|
}
|
||||||
|
|
||||||
|
export { convertTimestampToDate }
|
||||||
430
fe/src/views/DomainManager.vue
Normal file
430
fe/src/views/DomainManager.vue
Normal file
@ -0,0 +1,430 @@
|
|||||||
|
<script setup lang="tsx">
|
||||||
|
import { onMounted, inject, ref, computed } from 'vue'
|
||||||
|
import { useRoute, useRouter } from 'vue-router'
|
||||||
|
import type { AxiosInstance } from 'axios'
|
||||||
|
import { type DataTableColumn, type DataTableRowKey, useMessage, useDialog } from 'naive-ui'
|
||||||
|
import ImportDomainDialog from '../components/ImportDomainDialog.vue'
|
||||||
|
import AddDomainDialog from '../components/AddDomainDialog.vue'
|
||||||
|
import EditDomainDialog from '../components/EditDomainDialog.vue'
|
||||||
|
import { convertTimestampToDate } from '@/utils/common'
|
||||||
|
|
||||||
|
const axios = inject('axios') as AxiosInstance
|
||||||
|
const message = useMessage()
|
||||||
|
const dialog = useDialog()
|
||||||
|
const route = useRoute()
|
||||||
|
const router = useRouter()
|
||||||
|
|
||||||
|
// 筛选条件
|
||||||
|
const filterForm = ref({
|
||||||
|
domain: '',
|
||||||
|
status: null as number | null,
|
||||||
|
})
|
||||||
|
|
||||||
|
// 状态选项
|
||||||
|
const statusOptions = [
|
||||||
|
{ label: '全部', value: null },
|
||||||
|
{ label: 'READY', value: 1 },
|
||||||
|
{ label: 'QUEUE', value: 2 },
|
||||||
|
{ label: 'CRAWLING', value: 3 },
|
||||||
|
{ label: 'PAUSE', value: 999 },
|
||||||
|
]
|
||||||
|
|
||||||
|
const showImportDialog = ref(false)
|
||||||
|
const showAddDialog = ref(false)
|
||||||
|
const showEditDialog = ref(false)
|
||||||
|
|
||||||
|
// 当前正在编辑的域名 ID 列表(单个或批量)
|
||||||
|
const editingDomainIds = ref<DataTableRowKey[] | null>(null)
|
||||||
|
|
||||||
|
// 选中行的 Key
|
||||||
|
const checkedRowKeys = ref<DataTableRowKey[]>([])
|
||||||
|
|
||||||
|
// 分页相关状态
|
||||||
|
const pagination = ref({
|
||||||
|
page: 1,
|
||||||
|
pageSize: 50,
|
||||||
|
itemCount: 0,
|
||||||
|
showSizePicker: true,
|
||||||
|
pageSizes: [10, 20, 50, 100, 200, 500, 1000],
|
||||||
|
onChange: (page: number) => {
|
||||||
|
pagination.value.page = page
|
||||||
|
updateUrlParams()
|
||||||
|
getDomainList()
|
||||||
|
},
|
||||||
|
onUpdatePageSize: (pageSize: number) => {
|
||||||
|
pagination.value.pageSize = pageSize
|
||||||
|
pagination.value.page = 1
|
||||||
|
updateUrlParams()
|
||||||
|
getDomainList()
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
// 更新URL参数
|
||||||
|
const updateUrlParams = () => {
|
||||||
|
router.push({
|
||||||
|
query: {
|
||||||
|
page: pagination.value.page,
|
||||||
|
size: pagination.value.pageSize,
|
||||||
|
domain: filterForm.value.domain || undefined,
|
||||||
|
status: filterForm.value.status || undefined,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// 初始化分页参数
|
||||||
|
const initPagination = () => {
|
||||||
|
const page = Number(route.query.page) || 1
|
||||||
|
const size = Number(route.query.size) || 50
|
||||||
|
const domain = route.query.domain as string || ''
|
||||||
|
const status = route.query.status ? Number(route.query.status) : null
|
||||||
|
|
||||||
|
pagination.value.page = page
|
||||||
|
pagination.value.pageSize = size
|
||||||
|
filterForm.value.domain = domain
|
||||||
|
filterForm.value.status = status
|
||||||
|
}
|
||||||
|
|
||||||
|
const columns: Array<DataTableColumn> = [
|
||||||
|
{
|
||||||
|
type: 'selection',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: '#',
|
||||||
|
key: 'id',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: '域名',
|
||||||
|
key: 'domain',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: '状态',
|
||||||
|
key: 'status',
|
||||||
|
render: (row) => {
|
||||||
|
let statusText = ''
|
||||||
|
let statusType = ''
|
||||||
|
|
||||||
|
switch (row.status) {
|
||||||
|
case 1:
|
||||||
|
statusText = 'READY'
|
||||||
|
statusType = 'success'
|
||||||
|
break
|
||||||
|
case 2:
|
||||||
|
statusText = 'QUENE'
|
||||||
|
statusType = 'warning'
|
||||||
|
break
|
||||||
|
case 3:
|
||||||
|
statusText = 'CRAWLING'
|
||||||
|
statusType = 'info'
|
||||||
|
break
|
||||||
|
case 999:
|
||||||
|
statusText = 'PAUSE'
|
||||||
|
statusType = 'error'
|
||||||
|
break
|
||||||
|
default:
|
||||||
|
statusText = 'UNKNOWN'
|
||||||
|
statusType = 'error'
|
||||||
|
}
|
||||||
|
|
||||||
|
return <n-tag type={statusType}>{statusText}</n-tag>
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: '采集间隔 (分钟)',
|
||||||
|
key: 'crawl_interval',
|
||||||
|
render: (row) => (
|
||||||
|
<n-tooltip>
|
||||||
|
{{
|
||||||
|
trigger: () => <span>{row.crawl_interval}</span>,
|
||||||
|
default: () => `约 ${(row.crawl_interval as number) / 60 / 24} 天`,
|
||||||
|
}}
|
||||||
|
</n-tooltip>
|
||||||
|
),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: '最近采集时间',
|
||||||
|
key: 'latest_crawl_time',
|
||||||
|
render: (row) => convertTimestampToDate(row.latest_crawl_time as number),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: '操作',
|
||||||
|
key: 'action',
|
||||||
|
render: (row) => (
|
||||||
|
<div class="flex gap-2">
|
||||||
|
<n-button size="small" type="primary" onClick={() => handleEdit(row)}>
|
||||||
|
编辑
|
||||||
|
</n-button>
|
||||||
|
<n-button size="small" type="info" onClick={() => handleSingleCrawl(row)}>
|
||||||
|
立即采集
|
||||||
|
</n-button>
|
||||||
|
<n-button size="small" type="error" onClick={() => handleDelete(row)}>
|
||||||
|
删除
|
||||||
|
</n-button>
|
||||||
|
</div>
|
||||||
|
),
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
const domains = ref([])
|
||||||
|
|
||||||
|
// 是否有选中的行
|
||||||
|
const hasSelectedRows = computed(() => checkedRowKeys.value.length > 0)
|
||||||
|
|
||||||
|
// 处理选中行变化
|
||||||
|
const handleCheck = (rowKeys: DataTableRowKey[]) => {
|
||||||
|
checkedRowKeys.value = rowKeys
|
||||||
|
}
|
||||||
|
|
||||||
|
/** 批量删除域名 */
|
||||||
|
const handleBatchDelete = () => {
|
||||||
|
if (!hasSelectedRows.value) {
|
||||||
|
message.warning('请至少选择一个域名')
|
||||||
|
return
|
||||||
|
}
|
||||||
|
const removeSurl = ref(false)
|
||||||
|
dialog.warning({
|
||||||
|
title: '确认批量删除',
|
||||||
|
content: () => (
|
||||||
|
<div>
|
||||||
|
<div class="mb-2">确定要删除选中的 {checkedRowKeys.value.length} 个域名吗?</div>
|
||||||
|
<n-checkbox v-model:checked={removeSurl.value}>同时删除所有关联的 SURL</n-checkbox>
|
||||||
|
</div>
|
||||||
|
),
|
||||||
|
positiveText: '确定',
|
||||||
|
negativeText: '取消',
|
||||||
|
onPositiveClick: async () => {
|
||||||
|
try {
|
||||||
|
const response = (
|
||||||
|
await axios.post('/api/domain/v1/delete', {
|
||||||
|
domain_ids: checkedRowKeys.value,
|
||||||
|
remove_surl: removeSurl.value,
|
||||||
|
})
|
||||||
|
).data
|
||||||
|
if (response.code !== 20000) {
|
||||||
|
message.error(`批量删除域名失败,错误:${response.message}`)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
message.success('批量删除成功')
|
||||||
|
checkedRowKeys.value = [] // 清空选择
|
||||||
|
getDomainList()
|
||||||
|
} catch (error) {
|
||||||
|
console.error('批量删除域名失败', error)
|
||||||
|
message.error(`批量删除域名失败,错误:${error}`)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/** 批量修改采集间隔 */
|
||||||
|
const handleBatchEdit = () => {
|
||||||
|
if (!hasSelectedRows.value) {
|
||||||
|
message.warning('请至少选择一个域名')
|
||||||
|
return
|
||||||
|
}
|
||||||
|
editingDomainIds.value = [...checkedRowKeys.value] // 设置要编辑的 ID 为当前选中的 ID
|
||||||
|
showEditDialog.value = true
|
||||||
|
}
|
||||||
|
|
||||||
|
/** 批量立即采集 */
|
||||||
|
const handleBatchCrawl = async () => {
|
||||||
|
if (!hasSelectedRows.value) {
|
||||||
|
message.warning('请至少选择一个域名')
|
||||||
|
return
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
// TODO: 确认后端是否有 /api/domain/v1/crawl 接口
|
||||||
|
const response = (
|
||||||
|
await axios.post('/api/domain/v1/crawl', {
|
||||||
|
domain_ids: checkedRowKeys.value,
|
||||||
|
})
|
||||||
|
).data
|
||||||
|
|
||||||
|
if (response.code !== 20000) {
|
||||||
|
message.error(`批量触发采集失败:${response.message}`)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
message.success('批量触发采集成功,已加入队列')
|
||||||
|
checkedRowKeys.value = [] // 清空选择
|
||||||
|
getDomainList() // 刷新列表查看状态变化
|
||||||
|
} catch (error) {
|
||||||
|
console.error('批量触发采集失败', error)
|
||||||
|
message.error(`批量触发采集失败:${error}`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** 单个立即采集 */
|
||||||
|
const handleSingleCrawl = async (row: any) => {
|
||||||
|
try {
|
||||||
|
// 调用与批量采集相同的接口,但只传单个 ID
|
||||||
|
const response = (
|
||||||
|
await axios.post('/api/domain/v1/crawl', {
|
||||||
|
domain_ids: [row.id],
|
||||||
|
})
|
||||||
|
).data
|
||||||
|
|
||||||
|
if (response.code !== 20000) {
|
||||||
|
message.error(`触发采集失败:${response.message}`)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
message.success(`域名 ${row.domain} 已加入采集队列`)
|
||||||
|
getDomainList() // 刷新列表查看状态变化
|
||||||
|
} catch (error) {
|
||||||
|
console.error('触发采集失败', error)
|
||||||
|
message.error(`触发采集失败:${error}`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** 删除域名 */
|
||||||
|
const handleDelete = async (row: any) => {
|
||||||
|
const removeSurl = ref(false)
|
||||||
|
|
||||||
|
dialog.warning({
|
||||||
|
title: '确认删除',
|
||||||
|
content: () => (
|
||||||
|
<div>
|
||||||
|
<div class="mb-2">确定要删除域名 {row.domain} 吗?</div>
|
||||||
|
<n-checkbox v-model:checked={removeSurl.value}>同时删除所有关联的 SURL</n-checkbox>
|
||||||
|
</div>
|
||||||
|
),
|
||||||
|
positiveText: '确定',
|
||||||
|
negativeText: '取消',
|
||||||
|
onPositiveClick: async () => {
|
||||||
|
try {
|
||||||
|
const response = (
|
||||||
|
await axios.post('/api/domain/v1/delete', {
|
||||||
|
domain_ids: [row.id],
|
||||||
|
remove_surl: removeSurl.value,
|
||||||
|
})
|
||||||
|
).data
|
||||||
|
if (response.code !== 20000) {
|
||||||
|
message.error(`删除域名失败,错误:${response.message}`)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
message.success('删除成功')
|
||||||
|
// 如果删除的是选中的行,也从 checkedRowKeys 中移除
|
||||||
|
const index = checkedRowKeys.value.findIndex((key) => key === row.id)
|
||||||
|
if (index > -1) {
|
||||||
|
checkedRowKeys.value.splice(index, 1)
|
||||||
|
}
|
||||||
|
getDomainList()
|
||||||
|
} catch (error) {
|
||||||
|
console.error('删除域名失败', error)
|
||||||
|
message.error(`删除域名失败,错误:${error}`)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/** 获取域名列表 */
|
||||||
|
const getDomainList = async () => {
|
||||||
|
try {
|
||||||
|
const response = (
|
||||||
|
await axios.get('/api/domain/v1/list', {
|
||||||
|
params: {
|
||||||
|
page: pagination.value.page,
|
||||||
|
size: pagination.value.pageSize,
|
||||||
|
domain: filterForm.value.domain || undefined,
|
||||||
|
status: filterForm.value.status || undefined,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
).data
|
||||||
|
if (response.code !== 20000) {
|
||||||
|
message.error(`获取域名列表失败,错误:${response.message}`)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
domains.value = response.data.rows
|
||||||
|
pagination.value.itemCount = response.data.total
|
||||||
|
} catch (error) {
|
||||||
|
console.error('获取域名列表失败', error)
|
||||||
|
message.error(`获取域名列表失败,错误:${error}`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleImportSuccess = () => {
|
||||||
|
getDomainList()
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleAddSuccess = () => {
|
||||||
|
getDomainList()
|
||||||
|
}
|
||||||
|
|
||||||
|
/** 编辑域名 */
|
||||||
|
const handleEdit = (row: any) => {
|
||||||
|
editingDomainIds.value = [row.id] // 设置要编辑的 ID 为当前行的 ID
|
||||||
|
showEditDialog.value = true
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleEditSuccess = () => {
|
||||||
|
getDomainList()
|
||||||
|
const editedCount = editingDomainIds.value?.length || 0
|
||||||
|
editingDomainIds.value = null // 清空正在编辑的 ID
|
||||||
|
// 如果是批量编辑,成功后清空表格的选择
|
||||||
|
if (editedCount > 1) {
|
||||||
|
checkedRowKeys.value = []
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 重置筛选
|
||||||
|
const resetFilter = () => {
|
||||||
|
filterForm.value = {
|
||||||
|
domain: '',
|
||||||
|
status: null,
|
||||||
|
}
|
||||||
|
pagination.value.page = 1
|
||||||
|
updateUrlParams()
|
||||||
|
getDomainList()
|
||||||
|
}
|
||||||
|
|
||||||
|
// 应用筛选
|
||||||
|
const applyFilter = () => {
|
||||||
|
pagination.value.page = 1
|
||||||
|
updateUrlParams()
|
||||||
|
getDomainList()
|
||||||
|
}
|
||||||
|
|
||||||
|
onMounted(async () => {
|
||||||
|
initPagination()
|
||||||
|
await getDomainList()
|
||||||
|
})
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<template>
|
||||||
|
<div class="text-2xl pb-4">域名管理</div>
|
||||||
|
<div class="flex gap-2 mb-4">
|
||||||
|
<n-button type="primary" @click="showImportDialog = true">通过文件导入</n-button>
|
||||||
|
<n-button type="primary" @click="showAddDialog = true">手动添加</n-button>
|
||||||
|
<n-button type="error" @click="handleBatchDelete" :disabled="!hasSelectedRows">批量删除</n-button>
|
||||||
|
<n-button type="warning" @click="handleBatchEdit" :disabled="!hasSelectedRows">修改间隔</n-button>
|
||||||
|
<n-button type="info" @click="handleBatchCrawl" :disabled="!hasSelectedRows">立即采集</n-button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- 筛选表单 -->
|
||||||
|
<n-form inline :model="filterForm" class="mb-4 p-4 bg-gray-50 rounded-lg">
|
||||||
|
<n-form-item label="域名" path="domain">
|
||||||
|
<n-input v-model:value="filterForm.domain" placeholder="请输入域名" clearable @keydown.enter="applyFilter" />
|
||||||
|
</n-form-item>
|
||||||
|
<n-form-item label="状态" path="status">
|
||||||
|
<n-select v-model:value="filterForm.status" :options="statusOptions" placeholder="请选择状态" style="width: 200px" />
|
||||||
|
</n-form-item>
|
||||||
|
<n-form-item>
|
||||||
|
<n-button type="primary" @click="applyFilter">筛选</n-button>
|
||||||
|
<n-button class="ml-2" @click="resetFilter">重置</n-button>
|
||||||
|
</n-form-item>
|
||||||
|
</n-form>
|
||||||
|
|
||||||
|
<n-data-table :columns="columns" :data="domains" :row-key="(row: any) => row.id" :checked-row-keys="checkedRowKeys"
|
||||||
|
@update:checked-row-keys="handleCheck" size="small" />
|
||||||
|
|
||||||
|
<div class="flex justify-center mt-4">
|
||||||
|
<n-pagination v-model:page="pagination.page" :page-size="pagination.pageSize" :item-count="pagination.itemCount"
|
||||||
|
:show-size-picker="pagination.showSizePicker" :page-sizes="pagination.pageSizes"
|
||||||
|
@update:page-size="pagination.onUpdatePageSize" @update:page="pagination.onChange" />
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<ImportDomainDialog v-model:show="showImportDialog" @success="handleImportSuccess" />
|
||||||
|
<AddDomainDialog v-model:show="showAddDialog" @success="handleAddSuccess" />
|
||||||
|
<EditDomainDialog v-model:show="showEditDialog" :domain-ids="editingDomainIds" @success="handleEditSuccess"
|
||||||
|
@close="editingDomainIds = null" />
|
||||||
|
</template>
|
||||||
379
fe/src/views/UrlManager.vue
Normal file
379
fe/src/views/UrlManager.vue
Normal file
@ -0,0 +1,379 @@
|
|||||||
|
<script setup lang="tsx">
|
||||||
|
import { onMounted, inject, ref, computed } from 'vue'
|
||||||
|
import { useRoute, useRouter } from 'vue-router'
|
||||||
|
import type { AxiosInstance } from 'axios'
|
||||||
|
import { type DataTableColumn, type DataTableRowKey, useMessage, useDialog, NDropdown } from 'naive-ui'
|
||||||
|
import { convertTimestampToDate } from '@/utils/common'
|
||||||
|
|
||||||
|
const axios = inject('axios') as AxiosInstance
|
||||||
|
const message = useMessage()
|
||||||
|
const dialog = useDialog()
|
||||||
|
const route = useRoute()
|
||||||
|
const router = useRouter()
|
||||||
|
|
||||||
|
// 搜索条件
|
||||||
|
const searchForm = ref({
|
||||||
|
domain: '',
|
||||||
|
surl: '',
|
||||||
|
is_report_by_one: 2,
|
||||||
|
is_report_by_site: 2,
|
||||||
|
is_report_by_wap: 2,
|
||||||
|
has_evidence: 2,
|
||||||
|
})
|
||||||
|
|
||||||
|
// 选项数据
|
||||||
|
const options = [
|
||||||
|
{ label: '全部', value: 2 },
|
||||||
|
{ label: '是', value: 1 },
|
||||||
|
{ label: '否', value: 0 },
|
||||||
|
]
|
||||||
|
|
||||||
|
// 分页相关状态
|
||||||
|
const pagination = ref({
|
||||||
|
page: 1,
|
||||||
|
pageSize: 50,
|
||||||
|
itemCount: 0,
|
||||||
|
showSizePicker: true,
|
||||||
|
pageSizes: [10, 20, 50, 100, 200, 500, 1000],
|
||||||
|
onChange: (page: number) => {
|
||||||
|
pagination.value.page = page
|
||||||
|
updateUrlParams()
|
||||||
|
getUrlList()
|
||||||
|
},
|
||||||
|
onUpdatePageSize: (pageSize: number) => {
|
||||||
|
pagination.value.pageSize = pageSize
|
||||||
|
pagination.value.page = 1
|
||||||
|
updateUrlParams()
|
||||||
|
getUrlList()
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
// 更新URL参数
|
||||||
|
const updateUrlParams = () => {
|
||||||
|
router.push({
|
||||||
|
query: {
|
||||||
|
page: pagination.value.page,
|
||||||
|
size: pagination.value.pageSize,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// 初始化分页参数
|
||||||
|
const initPagination = () => {
|
||||||
|
const page = Number(route.query.page) || 1
|
||||||
|
const size = Number(route.query.size) || 50
|
||||||
|
pagination.value.page = page
|
||||||
|
pagination.value.pageSize = size
|
||||||
|
}
|
||||||
|
|
||||||
|
// 选中行的 Key
|
||||||
|
const checkedRowKeys = ref<DataTableRowKey[]>([])
|
||||||
|
|
||||||
|
// 举报选项
|
||||||
|
const reportOptions = [
|
||||||
|
{
|
||||||
|
label: '全部渠道',
|
||||||
|
key: 'all',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'PC渠道',
|
||||||
|
key: 'pc',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'SITE渠道',
|
||||||
|
key: 'site',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'WAP渠道',
|
||||||
|
key: 'wap',
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
const columns: Array<DataTableColumn> = [
|
||||||
|
{
|
||||||
|
type: 'selection',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: '#',
|
||||||
|
key: 'id',
|
||||||
|
minWidth: 60,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: '域名',
|
||||||
|
key: 'domain',
|
||||||
|
minWidth: 200,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: 'SURL',
|
||||||
|
key: 'surl',
|
||||||
|
minWidth: 100,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: 'Q',
|
||||||
|
key: 'q',
|
||||||
|
minWidth: 100,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: 'Token',
|
||||||
|
key: 'token',
|
||||||
|
minWidth: 100,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: '已通过PC举报',
|
||||||
|
key: 'is_report_by_one',
|
||||||
|
render: (row) => (
|
||||||
|
<n-tag type={row.is_report_by_one ? 'success' : 'default'}>
|
||||||
|
{row.is_report_by_one ? '是' : '否'}
|
||||||
|
</n-tag>
|
||||||
|
),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: '已通过site举报',
|
||||||
|
key: 'is_report_by_site',
|
||||||
|
render: (row) => (
|
||||||
|
<n-tag type={row.is_report_by_site ? 'success' : 'default'}>
|
||||||
|
{row.is_report_by_site ? '是' : '否'}
|
||||||
|
</n-tag>
|
||||||
|
),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: '已通过WAP举报',
|
||||||
|
key: 'is_report_by_wap',
|
||||||
|
render: (row) => (
|
||||||
|
<n-tag type={row.is_report_by_wap ? 'success' : 'default'}>
|
||||||
|
{row.is_report_by_wap ? '是' : '否'}
|
||||||
|
</n-tag>
|
||||||
|
),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: '已收集证据',
|
||||||
|
key: 'has_evidence',
|
||||||
|
render: (row) => (
|
||||||
|
<n-tag type={row.has_evidence ? 'success' : 'default'}>
|
||||||
|
{row.has_evidence ? '是' : '否'}
|
||||||
|
</n-tag>
|
||||||
|
),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: '操作',
|
||||||
|
key: 'action',
|
||||||
|
render: (row) => {
|
||||||
|
return (
|
||||||
|
<div class="flex gap-2">
|
||||||
|
<n-dropdown
|
||||||
|
trigger="click"
|
||||||
|
options={reportOptions}
|
||||||
|
onSelect={(key: string) => handleSingleReport(row, key)}
|
||||||
|
>
|
||||||
|
<n-button size="small" type="primary">举报</n-button>
|
||||||
|
</n-dropdown>
|
||||||
|
<n-button
|
||||||
|
size="small"
|
||||||
|
type="info"
|
||||||
|
onClick={() => handleSingleCollectEvidence(row)}
|
||||||
|
>
|
||||||
|
收集证据
|
||||||
|
</n-button>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
const urls = ref([])
|
||||||
|
|
||||||
|
// 获取URL列表
|
||||||
|
const getUrlList = async () => {
|
||||||
|
try {
|
||||||
|
const response = (
|
||||||
|
await axios.get('/api/urls/v1/list', {
|
||||||
|
params: {
|
||||||
|
...searchForm.value,
|
||||||
|
page: pagination.value.page,
|
||||||
|
size: pagination.value.pageSize,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
).data
|
||||||
|
|
||||||
|
if (response.code !== 20000) {
|
||||||
|
message.error(`获取URL列表失败:${response.message}`)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
urls.value = response.data.data
|
||||||
|
console.log('response.data.total:', response.data.total)
|
||||||
|
pagination.value.itemCount = response.data.total
|
||||||
|
} catch (error) {
|
||||||
|
console.error('获取URL列表失败', error)
|
||||||
|
message.error(`获取URL列表失败:${error}`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 搜索
|
||||||
|
const handleSearch = () => {
|
||||||
|
pagination.value.page = 1
|
||||||
|
getUrlList()
|
||||||
|
}
|
||||||
|
|
||||||
|
// 重置搜索
|
||||||
|
const handleReset = () => {
|
||||||
|
searchForm.value = {
|
||||||
|
domain: '',
|
||||||
|
surl: '',
|
||||||
|
is_report_by_one: 2,
|
||||||
|
is_report_by_site: 2,
|
||||||
|
is_report_by_wap: 2,
|
||||||
|
has_evidence: 2,
|
||||||
|
}
|
||||||
|
handleSearch()
|
||||||
|
}
|
||||||
|
|
||||||
|
// 处理举报
|
||||||
|
const handleReport = async (ids: number[], option: string) => {
|
||||||
|
// 检查是否有证据
|
||||||
|
const selectedUrls = urls.value.filter((url: any) => ids.includes(url.id))
|
||||||
|
const hasNoEvidence = selectedUrls.some((url: any) => !url.has_evidence)
|
||||||
|
|
||||||
|
if (hasNoEvidence) {
|
||||||
|
message.warning('请先收集证据后再进行举报')
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = (await axios.post('/api/urls/v1/report', {
|
||||||
|
ids,
|
||||||
|
report_by_one: option === 'all' || option === 'pc',
|
||||||
|
report_by_site: option === 'all' || option === 'site',
|
||||||
|
report_by_wap: option === 'all' || option === 'wap',
|
||||||
|
})).data
|
||||||
|
|
||||||
|
if (response.code !== 20000) {
|
||||||
|
message.error(`举报失败:${response.message}`)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
message.success('操作成功,已修改SURL状态,等待引擎调度')
|
||||||
|
getUrlList()
|
||||||
|
} catch (error) {
|
||||||
|
console.error('举报失败', error)
|
||||||
|
message.error(`举报失败:${error}`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 处理收集证据
|
||||||
|
const handleCollectEvidence = async (ids: number[]) => {
|
||||||
|
try {
|
||||||
|
const response = (await axios.post('/api/urls/v1/evidence', {
|
||||||
|
ids,
|
||||||
|
})).data
|
||||||
|
|
||||||
|
if (response.code !== 20000) {
|
||||||
|
message.error(`收集证据失败:${response.message}`)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
message.success('操作成功,已修改SURL状态,等待引擎调度')
|
||||||
|
getUrlList()
|
||||||
|
} catch (error) {
|
||||||
|
console.error('收集证据失败', error)
|
||||||
|
message.error(`收集证据失败:${error}`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 处理单个URL的举报
|
||||||
|
const handleSingleReport = (row: any, option: string) => {
|
||||||
|
handleReport([row.id], option)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 处理单个URL的收集证据
|
||||||
|
const handleSingleCollectEvidence = (row: any) => {
|
||||||
|
handleCollectEvidence([row.id])
|
||||||
|
}
|
||||||
|
|
||||||
|
// 处理批量举报
|
||||||
|
const handleBatchReport = (option: string) => {
|
||||||
|
if (checkedRowKeys.value.length === 0) {
|
||||||
|
message.warning('请至少选择一个URL')
|
||||||
|
return
|
||||||
|
}
|
||||||
|
handleReport(checkedRowKeys.value as number[], option)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 处理批量收集证据
|
||||||
|
const handleBatchCollectEvidence = () => {
|
||||||
|
if (checkedRowKeys.value.length === 0) {
|
||||||
|
message.warning('请至少选择一个URL')
|
||||||
|
return
|
||||||
|
}
|
||||||
|
handleCollectEvidence(checkedRowKeys.value as number[])
|
||||||
|
}
|
||||||
|
|
||||||
|
// 是否有选中的行
|
||||||
|
const hasSelectedRows = computed(() => checkedRowKeys.value.length > 0)
|
||||||
|
|
||||||
|
onMounted(() => {
|
||||||
|
initPagination()
|
||||||
|
getUrlList()
|
||||||
|
})
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<template>
|
||||||
|
<div class="p-4">
|
||||||
|
<h1 class="text-2xl mb-4">URL管理</h1>
|
||||||
|
|
||||||
|
<!-- 搜索表单 -->
|
||||||
|
<n-card class="mb-4">
|
||||||
|
<n-form :model="searchForm" label-placement="left" label-width="auto" require-mark-placement="right-hanging">
|
||||||
|
<n-grid :cols="24" :x-gap="24">
|
||||||
|
<n-form-item-gi :span="8" label="域名">
|
||||||
|
<n-input v-model:value="searchForm.domain" placeholder="请输入域名" @keyup.enter="handleSearch" />
|
||||||
|
</n-form-item-gi>
|
||||||
|
<n-form-item-gi :span="8" label="SURL">
|
||||||
|
<n-input v-model:value="searchForm.surl" placeholder="请输入SURL" @keyup.enter="handleSearch" />
|
||||||
|
</n-form-item-gi>
|
||||||
|
<n-form-item-gi :span="8" label="是否已通过PC举报">
|
||||||
|
<n-select v-model:value="searchForm.is_report_by_one" :options="options" placeholder="请选择" />
|
||||||
|
</n-form-item-gi>
|
||||||
|
<n-form-item-gi :span="8" label="是否已通过site举报">
|
||||||
|
<n-select v-model:value="searchForm.is_report_by_site" :options="options" placeholder="请选择" />
|
||||||
|
</n-form-item-gi>
|
||||||
|
<n-form-item-gi :span="8" label="是否已通过WAP举报">
|
||||||
|
<n-select v-model:value="searchForm.is_report_by_wap" :options="options" placeholder="请选择" />
|
||||||
|
</n-form-item-gi>
|
||||||
|
<n-form-item-gi :span="8" label="是否已收集证据">
|
||||||
|
<n-select v-model:value="searchForm.has_evidence" :options="options" placeholder="请选择" />
|
||||||
|
</n-form-item-gi>
|
||||||
|
</n-grid>
|
||||||
|
<div class="flex justify-end gap-2 mt-4">
|
||||||
|
<n-button @click="handleReset">重置</n-button>
|
||||||
|
<n-button type="primary" @click="handleSearch">搜索</n-button>
|
||||||
|
</div>
|
||||||
|
</n-form>
|
||||||
|
</n-card>
|
||||||
|
|
||||||
|
<!-- 数据表格 -->
|
||||||
|
<n-card>
|
||||||
|
<div class="mb-4" v-if="hasSelectedRows">
|
||||||
|
<n-space>
|
||||||
|
<n-dropdown trigger="click" :options="reportOptions" @select="handleBatchReport">
|
||||||
|
<n-button type="primary">批量举报 ({{ checkedRowKeys.length }})</n-button>
|
||||||
|
</n-dropdown>
|
||||||
|
<n-button type="info" @click="handleBatchCollectEvidence">
|
||||||
|
批量收集证据 ({{ checkedRowKeys.length }})
|
||||||
|
</n-button>
|
||||||
|
</n-space>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<n-data-table :columns="columns" :data="urls" :bordered="false" :row-key="(row: any) => row.id"
|
||||||
|
:checked-row-keys="checkedRowKeys" @update:checked-row-keys="checkedRowKeys = $event" />
|
||||||
|
|
||||||
|
<div class="flex justify-center mt-4">
|
||||||
|
<n-pagination v-model:page="pagination.page" :item-count="pagination.itemCount" :page-size="pagination.pageSize"
|
||||||
|
:show-size-picker="pagination.showSizePicker" :page-sizes="pagination.pageSizes"
|
||||||
|
:on-update:page-size="pagination.onUpdatePageSize" :on-change="pagination.onChange" />
|
||||||
|
</div>
|
||||||
|
</n-card>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
|
||||||
|
<style scoped></style>
|
||||||
12
fe/tsconfig.app.json
Normal file
12
fe/tsconfig.app.json
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
{
|
||||||
|
"extends": "@vue/tsconfig/tsconfig.dom.json",
|
||||||
|
"include": ["env.d.ts", "src/**/*", "src/**/*.vue"],
|
||||||
|
"exclude": ["src/**/__tests__/*"],
|
||||||
|
"compilerOptions": {
|
||||||
|
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo",
|
||||||
|
|
||||||
|
"paths": {
|
||||||
|
"@/*": ["./src/*"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
11
fe/tsconfig.json
Normal file
11
fe/tsconfig.json
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
{
|
||||||
|
"files": [],
|
||||||
|
"references": [
|
||||||
|
{
|
||||||
|
"path": "./tsconfig.node.json"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"path": "./tsconfig.app.json"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
19
fe/tsconfig.node.json
Normal file
19
fe/tsconfig.node.json
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
{
|
||||||
|
"extends": "@tsconfig/node22/tsconfig.json",
|
||||||
|
"include": [
|
||||||
|
"vite.config.*",
|
||||||
|
"vitest.config.*",
|
||||||
|
"cypress.config.*",
|
||||||
|
"nightwatch.conf.*",
|
||||||
|
"playwright.config.*",
|
||||||
|
"eslint.config.*"
|
||||||
|
],
|
||||||
|
"compilerOptions": {
|
||||||
|
"noEmit": true,
|
||||||
|
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo",
|
||||||
|
|
||||||
|
"module": "ESNext",
|
||||||
|
"moduleResolution": "Bundler",
|
||||||
|
"types": ["node"]
|
||||||
|
}
|
||||||
|
}
|
||||||
50
fe/vite.config.ts
Normal file
50
fe/vite.config.ts
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
import {fileURLToPath, URL} from 'node:url'
|
||||||
|
|
||||||
|
import {defineConfig} from 'vite'
|
||||||
|
import vue from '@vitejs/plugin-vue'
|
||||||
|
import vueJsx from '@vitejs/plugin-vue-jsx'
|
||||||
|
import vueDevTools from 'vite-plugin-vue-devtools'
|
||||||
|
import tailwindcss from '@tailwindcss/vite'
|
||||||
|
import AutoImport from 'unplugin-auto-import/vite'
|
||||||
|
import Components from 'unplugin-vue-components/vite'
|
||||||
|
import {NaiveUiResolver} from 'unplugin-vue-components/resolvers'
|
||||||
|
|
||||||
|
|
||||||
|
// https://vite.dev/config/
|
||||||
|
export default defineConfig({
|
||||||
|
plugins: [
|
||||||
|
tailwindcss(),
|
||||||
|
vue(),
|
||||||
|
vueJsx(),
|
||||||
|
vueDevTools(),
|
||||||
|
AutoImport({
|
||||||
|
imports: [
|
||||||
|
'vue',
|
||||||
|
{
|
||||||
|
'naive-ui': [
|
||||||
|
'useDialog',
|
||||||
|
'useMessage',
|
||||||
|
'useNotification',
|
||||||
|
'useLoadingBar'
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}),
|
||||||
|
Components({
|
||||||
|
resolvers: [NaiveUiResolver()]
|
||||||
|
})
|
||||||
|
],
|
||||||
|
resolve: {
|
||||||
|
alias: {
|
||||||
|
'@': fileURLToPath(new URL('./src', import.meta.url))
|
||||||
|
},
|
||||||
|
},
|
||||||
|
server: {
|
||||||
|
proxy: {
|
||||||
|
'/api': {
|
||||||
|
target: 'http://localhost:3000',
|
||||||
|
changeOrigin: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
@ -34,13 +34,17 @@ tab = browser.new_tab(f"https://www.baidu.com/s?wd={keyword}")
|
|||||||
# week_btn_el = tab.ele('t:li@@text()= 一月内 ')
|
# week_btn_el = tab.ele('t:li@@text()= 一月内 ')
|
||||||
# week_btn_el.click(by_js=True)
|
# week_btn_el.click(by_js=True)
|
||||||
# tab.wait(2)
|
# tab.wait(2)
|
||||||
|
print(f"{tab.url=}")
|
||||||
print("2222")
|
print("2222")
|
||||||
|
tab.get("https://www.163.com/")
|
||||||
|
print(f"{tab.url=}")
|
||||||
# tab.ele(".content_none")
|
# tab.ele(".content_none")
|
||||||
# tab.wait.eles_loaded(["#container", ".content_none", "#content_left"], any_one=True)
|
# tab.wait.eles_loaded(["#container", ".content_none", "#content_left"], any_one=True)
|
||||||
print("未找到相关结果" in tab.html)
|
print("未找到相关结果" in tab.html)
|
||||||
print("1111")
|
print("1111")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# if "未找到相关结果" in tab.html:
|
# if "未找到相关结果" in tab.html:
|
||||||
# print("未找到相关结果")
|
# print("未找到相关结果")
|
||||||
# else:
|
# else:
|
||||||
|
|||||||
272
tests/test_dp3.py
Normal file
272
tests/test_dp3.py
Normal file
@ -0,0 +1,272 @@
|
|||||||
|
import random
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
from enum import verify
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import execjs
|
||||||
|
import requests
|
||||||
|
from DrissionPage import Chromium, ChromiumOptions
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
from app.utils.common import get_all_cookies
|
||||||
|
from app.utils.ydm_verify import YdmVerify
|
||||||
|
|
||||||
|
chrome_opts = ChromiumOptions()
|
||||||
|
chrome_opts.mute(True) # 静音
|
||||||
|
chrome_opts.no_imgs(False)
|
||||||
|
chrome_opts.set_argument("--disable-gpu")
|
||||||
|
chrome_opts.set_argument('--ignore-certificate-errors')
|
||||||
|
chrome_opts.set_argument("--proxy-server=http://127.0.0.1:7890")
|
||||||
|
# chrome_opts.incognito(True)
|
||||||
|
chrome_opts.set_browser_path(r"C:\Program Files\Google\Chrome\Application\chrome.exe")
|
||||||
|
chrome_opts.auto_port()
|
||||||
|
browser = Chromium(addr_or_opts=chrome_opts)
|
||||||
|
|
||||||
|
# tab = browser.new_tab()
|
||||||
|
# tab.listen.start([
|
||||||
|
# "passport.baidu.com/cap/init",
|
||||||
|
# "passport.baidu.com/cap/style",
|
||||||
|
# ])
|
||||||
|
# tab.get("https://wappass.baidu.com/static/captcha/tuxing_v2.html?&logid=10332554090053311096&ak=c27bbc89afca0463650ac9bde68ebe06&backurl=https%3A%2F%2Fwww.baidu.com%2Fs%3Fwd%3Dsite%253Altxbbs.com%26pn%3D50%26oq%3Dsite%253Altxbbs.com%26ct%3D2097152%26ie%3Dutf-8%26si%3Dltxbbs.com%26fenlei%3D256%26rsv_idx%3D1%26rsv_pq%3D99cae74f0003cd72%26rsv_t%3Dab2dk%252Fq4PohUCmoLbyMlEMrGJszk983ojkNLk%252FUiZGJ4ZLpwvZ46PtQUufk%26gpc%3Dstf%253D1741437499%252C1744115898%257Cstftype%253D1%26tfflag%3D1%26topic_pn%3D%26rsv_page%3D1&ext=x9G9QDmMXq%2FNo87gjGO0P1dyBXu4PagAZrreQL6%2Bticsr0rrDszYO2sAbAnT1vLIUgqUK9LXd1cIlztrhMwiv3XfcB99Y5gyF0c0ETsDFDls5CsGNJQRLPawcntn2ndVLHHLl46IaoOp8l%2FC1xtOHwMQi85PCzAojcSf2wQ76KRxVau99LtSYCIfwtv7By0w&signature=f2fbb1b81926e247835f69195661a06b×tamp=1744115910")
|
||||||
|
# for pkg in tab.listen.steps():
|
||||||
|
# print(f"{pkg.url=}")
|
||||||
|
# print(f"{pkg.response.raw_body=}")
|
||||||
|
# current_path = Path(__file__).resolve()
|
||||||
|
# print(current_path)
|
||||||
|
# current_dir = current_path.parent.parent
|
||||||
|
# print(current_dir)
|
||||||
|
# js_path = current_dir.joinpath("./js/mkd_v2_link_submit.js")
|
||||||
|
# print(js_path.exists())
|
||||||
|
|
||||||
|
# with open("./js/mkd_v2_link_submit.js", "r", encoding="utf-8") as f:
|
||||||
|
# ds_js = f.read()
|
||||||
|
#
|
||||||
|
|
||||||
|
proxy_str = "http://127.0.0.1:7890"
|
||||||
|
headers = {
|
||||||
|
'Accept': 'application/json, text/javascript, */*; q=0.01',
|
||||||
|
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-US;q=0.7,zh-TW;q=0.6',
|
||||||
|
'Cache-Control': 'no-cache',
|
||||||
|
'Connection': 'keep-alive',
|
||||||
|
# 'Origin': 'https://jubao.baidu.com',
|
||||||
|
'Pragma': 'no-cache',
|
||||||
|
'Referer': "https://wappass.baidu.com/",
|
||||||
|
'Sec-Fetch-Dest': 'empty',
|
||||||
|
'Sec-Fetch-Mode': 'cors',
|
||||||
|
'Sec-Fetch-Site': 'same-origin',
|
||||||
|
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 Edg/135.0.0.0",
|
||||||
|
'X-Requested-With': 'XMLHttpRequest',
|
||||||
|
'sec-ch-ua_wap': '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
|
||||||
|
'sec-ch-ua_wap-mobile': '?0',
|
||||||
|
'sec-ch-ua_wap-platform': '"Windows"',
|
||||||
|
"Cookie": "BDUSS=ldlSDMwdkg5VmlrbE5TZFdHUHVhWEFCTVNqcGtKZHhXeTNaTHFGZHY4Y3F5LVJiQVFBQUFBJCQAAAAAAAAAAAEAAADj3ycY1tC5zNXywO4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACo-vVsqPr1bS; PHPSESSID=f364o6o7tpsag92pd67630p870; lastIdentity=PassUserIdentity; BAIDUID=5C7396A6BE9E28B769E6E9815A1B8D5E:FG=1; BAIDUID_BFESS=5C7396A6BE9E28B769E6E9815A1B8D5E:FG=1; BDUSS_BFESS=ldlSDMwdkg5VmlrbE5TZFdHUHVhWEFCTVNqcGtKZHhXeTNaTHFGZHY4Y3F5LVJiQVFBQUFBJCQAAAAAAAAAAAEAAADj3ycY1tC5zNXywO4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACo-vVsqPr1bS",
|
||||||
|
}
|
||||||
|
|
||||||
|
tab = browser.new_tab()
|
||||||
|
tab.listen.start(r"/cap/(init|style|log)", is_regex=True)
|
||||||
|
tab.get("https://www.baidu.com")
|
||||||
|
|
||||||
|
captcha_data = {}
|
||||||
|
|
||||||
|
|
||||||
|
def listener():
|
||||||
|
for pkg in tab.listen.steps():
|
||||||
|
if "/cap/init" in pkg.url:
|
||||||
|
captcha_data["init"] = pkg.response.body
|
||||||
|
if "/cap/style" in pkg.url:
|
||||||
|
captcha_data["style"] = pkg.response.body
|
||||||
|
captcha_data["referer"] = pkg.request.headers.get("Referer")
|
||||||
|
logger.debug(f"正确的 referer: {captcha_data["referer"]}")
|
||||||
|
captcha_data["cookie"] = pkg.request.headers.get("Cookie")
|
||||||
|
logger.debug(f"cookie: {captcha_data['cookie']}")
|
||||||
|
if "/cap/log" in pkg.url:
|
||||||
|
captcha_data["log"] = pkg.response.body
|
||||||
|
|
||||||
|
|
||||||
|
thread = threading.Thread(target=listener, daemon=True)
|
||||||
|
thread.start()
|
||||||
|
|
||||||
|
|
||||||
|
def verify_captcha(current_url: str):
|
||||||
|
headers["Referer"] = captcha_data["referer"]
|
||||||
|
headers["Cookie"] = captcha_data["cookie"]
|
||||||
|
|
||||||
|
# 解出AS / TK
|
||||||
|
as_value = captcha_data["init"]["data"]["as"]
|
||||||
|
tk_value = captcha_data["init"]["data"]["tk"]
|
||||||
|
# logger.debug(f"{as_value=}, {tk_value=}")
|
||||||
|
# ts = time.time()
|
||||||
|
# ts1 = int(ts)
|
||||||
|
# ts2 = int(ts * 1000)
|
||||||
|
# response = requests.post(
|
||||||
|
# "https://passport.baidu.com/cap/init",
|
||||||
|
# data={
|
||||||
|
# "_": ts2,
|
||||||
|
# "refer": re.sub(r'timestamp=\d+', f'timestamp={ts1}', captcha_data["referer"]),
|
||||||
|
# "ak": "c27bbc89afca0463650ac9bde68ebe06",
|
||||||
|
# "ver": "2",
|
||||||
|
# "scene": "",
|
||||||
|
# "ds": "",
|
||||||
|
# "tk": "",
|
||||||
|
# "as": "",
|
||||||
|
# "reinit": 0
|
||||||
|
# },
|
||||||
|
# headers=headers,
|
||||||
|
# proxies={
|
||||||
|
# "http": proxy_str, "https": proxy_str
|
||||||
|
# }
|
||||||
|
# ).json()
|
||||||
|
# as_value = response["data"]["as"]
|
||||||
|
# tk_value = response["data"]["tk"]
|
||||||
|
logger.debug(f"{as_value=}, {tk_value=}")
|
||||||
|
|
||||||
|
# 解出 style
|
||||||
|
backstr = captcha_data["style"]["data"]["backstr"]
|
||||||
|
captcha_link = captcha_data["style"]["data"]["captchalist"][0]["source"]["back"]["path"]
|
||||||
|
# response = requests.post(
|
||||||
|
# "https://passport.baidu.com/cap/style",
|
||||||
|
# data={
|
||||||
|
# "_": int(time.time() * 1000),
|
||||||
|
# "refer": re.sub(r'timestamp=\d+', f'timestamp={ts1}', captcha_data["referer"]),
|
||||||
|
# "ak": "c27bbc89afca0463650ac9bde68ebe06",
|
||||||
|
# "tk": tk_value,
|
||||||
|
# "scene": "",
|
||||||
|
# "isios": "0",
|
||||||
|
# "type": "spin",
|
||||||
|
# "ver": "2"
|
||||||
|
# },
|
||||||
|
# headers=headers,
|
||||||
|
# proxies={
|
||||||
|
# "http": proxy_str, "https": proxy_str
|
||||||
|
# }
|
||||||
|
# )
|
||||||
|
# logger.debug(f"{response.content=}")
|
||||||
|
# response = response.json()
|
||||||
|
# backstr = response["data"]["backstr"]
|
||||||
|
# captcha_link = response["data"]["captchalist"][0]["source"]["back"]["path"]
|
||||||
|
logger.debug(f"{backstr=}, {captcha_link=}")
|
||||||
|
|
||||||
|
# 下载验证码图片
|
||||||
|
image_response = requests.get(captcha_link, headers=headers)
|
||||||
|
with open("captcha.png", "wb") as f:
|
||||||
|
f.write(image_response.content)
|
||||||
|
logger.debug("download captcha.png")
|
||||||
|
|
||||||
|
# 识别验证码
|
||||||
|
ydm = YdmVerify()
|
||||||
|
with open("captcha.png", "rb") as fp:
|
||||||
|
picture = fp.read()
|
||||||
|
|
||||||
|
slide_distance = ydm.rotate(picture)
|
||||||
|
logger.debug(f"{slide_distance=}")
|
||||||
|
if not slide_distance:
|
||||||
|
logger.error("识别验证码失败")
|
||||||
|
return None
|
||||||
|
rotate_angle_rate = round(slide_distance / 360, 2)
|
||||||
|
logger.debug(f"{rotate_angle_rate=}")
|
||||||
|
|
||||||
|
if not rotate_angle_rate:
|
||||||
|
logger.debug("识别验证码失败")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 发送验证码请求
|
||||||
|
time_log = str(int(time.time() * 1000))
|
||||||
|
with open("./js/mkd_v2_link_submit.js", 'r', encoding='utf-8') as f:
|
||||||
|
ds_js = f.read()
|
||||||
|
fs = execjs.compile(ds_js).call('getFs2', backstr, rotate_angle_rate, as_value)
|
||||||
|
data = {
|
||||||
|
"_": time_log,
|
||||||
|
"refer": captcha_data["referer"],
|
||||||
|
# "refer": "https://aigc.baidu.com/works",
|
||||||
|
# "ak": self.get_ak(),
|
||||||
|
"ak": "c27bbc89afca0463650ac9bde68ebe06", # c27bbc89afca0463650ac9bde68ebe06
|
||||||
|
# "ak": "76AKmP4xDQjB3vAIPef3KxOlJZWCpw64", # c27bbc89afca0463650ac9bde68ebe06
|
||||||
|
"as": as_value,
|
||||||
|
"scene": "",
|
||||||
|
"tk": tk_value,
|
||||||
|
"ver": "2",
|
||||||
|
"cv": "submit",
|
||||||
|
"typeid": "spin-0",
|
||||||
|
# fuid 短时间不会变, 指纹, 不同浏览器不一样
|
||||||
|
# "Edge": "FOCoIC3q5fKa8fgJnwzbE67EJ49BGJeplOzf+4l4EOvDuu2RXBRv6R3A1AZMa49I27C0gDDLrJyxcIIeAeEhD8JYsoLTpBiaCXhLqvzbzmvy3SeAW17tKgNq/Xx+RgOdb8TWCFe62MVrDTY6lMf2GrfqL8c87KLF2qFER3obJGnCfjdYr2J6wEsox+bQtrTEGEimjy3MrXEpSuItnI4KDyOhCNLIvGcJ9TrqWJqhR97vnz96e18U/ntNdoDIMLzy/6P9rOWdIYWmTQAeeG69438PcpN++VzDmPtrURexo5YYWpVFkRs9k5n0AC3djzGRuXr1+yVZXtGGofFzxBmdr9HtaANtMMPysO2XXACLNUNkLLWJN9fLc3OAWce48tpeQQ2ufd7knx4Oo6OM0PpOVDwQcezbGX85VEmymh7f7M5kIyVl+w5yn2AY4BmLrEWEsyw9SzzW8eHQ5zYIUjiw9hXi7OMMoCL+ptOvZYbHZs0R5qLHmmDCW1M8MMX5yyJF0BV1dQvKslKnAJwZu4XCbsXKn3UXosU1U30/poiN2VeXkBPeo8+Xj/4BIoC2I7WZ6zkFa/Uwd5SvC91kvff2a/Z4OwyTQNM7ES9HmRhChdWg0SJ2xEs1aiXAit16RiTlf82esJH+X/j52G7R3ErwQeJT3QoDv64R2702+8NbGIjf1ZOfxhUCpmJqV4jeHSaHRmnKgJZsK91XhhrdJKXdsbt3phIOpxGLupULr2K+v1DNdId8/HuE0776+tTpUl7shVCeM/XWrdkhru42pifhiujnDhIblsLt8grnj5/GRqcD6ZPAXqJW3lLc0/ub9jXgvXK/EczRgKl+7/tTBkPTCrUVtajA0luHLQOrVsXuN1v0/PR3i09SuFzZJkJBKE3M6rYvPttK9NQiBxhxYWDhX82uQu2XK8+8oU3gxCIaJwsQmX/It0kaZ45PZHFqtD40uOX0sXuThvUin4N4RSI2G9d7jPkj5hbBFquQKM4S+tDJ34jmplOTrqqKT7PPVfrdgd4OkK13pEy86BsJ8M0gKXgtivUgM8Bjl1m/pkg0SuDyntWLdrmMxcZYvgySvSSwQ2Qtm8EkKHIMyR/XgfHnpX5vadGpRMro2qaE8u+x8w1gJHIRKib2u6Q1JtQiZE1Rde/vRx8xKfg6uYR37n0BvfgJE5+KbeuwCyAvJRGUA2fpt0VClIfV0m2PRG7bvH00OODKY6cFi7NgWAK6Jc1G4Ugkfp7W8I0ZYwNpTTxVoxIIBF37aBhyiPWPAOeYXBqA",
|
||||||
|
# Chrome: "FOCoIC3q5fKa8fgJnwzbE67EJ49BGJeplOzf+4l4EOvDuu2RXBRv6R3A1AZMa49IZbsw3/U3NYEqD0LjhKzgMn8fIES5OyXlgwN5I+F8wHowpWWfXkQJw8/9AsO5Q2VOvnc2JlHGIlGS8Vq2z4OA80lVLon08EG3PPxkVZGm39fDi2exK9NDrZB+tNLX6ISxE5PzBgXpCOJ6oP9F1B0OBWaCMD/m01n8FhdDNCvP8EO5cetU79+pgL+ECRdtN6V4VElGJE0mxV4+4Zq4Jf/Xe/q8CkoTNf7Ti1glGYmN32UM9dg0uX+VzET/mmTRe4Dt+MuVHSzsI/bKCjPbpaOqfM8UsxDJUG9hyrGZ8QHa1kC04aTxkkTxI275dv3+ijS1zkWOdjFiy1eD/0R8HcRWYp2smk9EmXBkIAHL4H0gC9lQtdjey37/kyl4JA9Fp4zjuVO0arsD8MrGy1divU++B1KdawGqXpnbOcHZ3CctNGrpgmswaScc6DNWb34jFj0X3tdRE0uuHuqiYa5BClFS2V0TCorKi4CobgR419xWaX8IKLJiaNNLOShWdZdlQO2DXXVxcinzKHqUvWTYx45jsiUVlY78AHQGol6CJLQQ8Q797MShlazvdSwPXgJP5z0uMJp9L+3x/Y2GGhW5sit55sFuMXafALTYf69FCUw5+nVIRs150a4+KK+tA0Eu7Itiu3dM2pflKYWwPE6SDZznyejQ08vd+HpXRB/zhfSUcIYlT5gFEiMIA6SXZCo/XT7vC8D3gHdN+yr46XdVol/WkjFQof0JQH/Vhjj5C1xcAyNxq/VVBT01vdKk6zo6c08e84FEVMLd0m3XWtjFOYu7wRI7lldw2pSxyGnWvA4aiYWcWvvKNJtqB8wHqc5RPr9KRzhbxJnTM5K1vTx4xT/1ZUR3pU7nQKZo/4kP9XycIr/Jg3XMRSnqCBUJlagKAFPt2HF0LdsSk4WWcldb97Ar584nVGbSjPXEUVH0VgbUEm+dADzPoLP+NPMYOyhwgfADiqWaXyKT4UNESYXsPBkdGk6mLCaNSEQsDN1G2677Se3qjzDcyXBnEmHEFptRbmyJzKJ73veHPqfFYtsHO9jH0XnhYk8zKdRuqQ7dnuNIDwxm3UCPo22uFI0ZcgPvQm01s+8jYiMEFJDVra9jWyWTdMpMuhT3p2yYLf70CvUwIkw=",
|
||||||
|
# fuid.length = 1280, length 235 变化
|
||||||
|
# FOCoIC3q5fKa8fgJnwzbE67EJ49BGJeplOzf+4l4EOvDuu2RXBRv6R3A1AZMa49I27C0gDDLrJyxcIIeAeEhD8JYsoLTpBiaCXhLqvzbzmvy3SeAW17tKgNq/Xx+RgOdb8TWCFe62MVrDTY6lMf2GrfqL8c87KLF2qFER3obJGnCfjdYr2J6wEsox+bQtrTEGEimjy3MrXEpSuItnI4KDyOhCNLIvGcJ9TrqWJqhR97
|
||||||
|
"fuid": "FOCoIC3q5fKa8fgJnwzbE67EJ49BGJeplOzf+4l4EOvDuu2RXBRv6R3A1AZMa49I27C0gDDLrJyxcIIeAeEhD8JYsoLTpBiaCXhLqvzbzmvy3SeAW17tKgNq/Xx+RgOdb8TWCFe62MVrDTY6lMf2GrfqL8c87KLF2qFER3obJGnfaJTn/Ne60I9LwR04t6XmGEimjy3MrXEpSuItnI4KD0FJKzTbw1AN69fBnzR2FuvMmmQZ+1zgJ72wdcVU+mcQxiE2ir0+TEYgjPJt1Qa3K1mLi+P4IWJeag2lvxB4yJ/GgLbz7OSojK1zRbqBESR5Pdk2R9IA3lxxOVzA+Iw1TWLSgWjlFVG9Xmh1+20oPSbrzvDjYtVPmZ+9/6evcXmhcO1Y58MgLozKnaQIaLfWRPAn9I0uOqAMff6fuUeWcH1mRYoTw2Nhr4J4agZi377iM/izL6cVCGRy2F8c0VpEvM5FjnYxYstXg/9EfB3EVmKAfzNRIeToJ5YV9twMcgdmlV1Uhhp5FAe6gNJIUptp7EMAaXYKm11G+JVPszQFdp9AJLcm4YSsYUXkaPI2Tl66J246cmjWQDTahAOINR5rXR5r/7VVI1RMZ8gb40q7az7vCK56XLooKT5a+rsFrf5Zu0yyCiiagElhrTEOtNdBJJq8eHwEHuFBni9ahSwpC7lbKkUwaKH69tf0DFV7hJROiLETSFloIVkHdy3+I2JUr1LsplAz0hMkWt/tE4tXVUV7QcTDTZWS/2mCoS/GV3N9awQ6iM6hs/BWjlgnEa1+5iP7WSc7RJ34FaE5PsyGXyoCWdXwNRGSZPSvVtB/Ea6w5FKazXcZ/j40FJv+iLGBn3nkkgHlne61I8I7KhtQgIkmBMJIjPMkS/L051MeqdGScsKYTJuSucgI5c3+79eVH+y2TvbOTuuHv1uGxwXFb2atIU1ZYPbmmXculmizKcKI/s44qf8uM8iBZLGkKeVyL74aPyLkg7Gk359g98BIGN/ZzJR/h+Y6AyFx+HlMoYJnS06dVmqFbvlCtSdGylKQ5f8eWtxPkJGqOFtWjIVteQYMsH/AaSJonqw+WLiZvGjYfm9p0alEyujapoTy77HzDcUoU1wUSXa5xS/Z6hXEr2OnLi0LdPVcGjz8lpLcdVeSfm9p0alEyujapoTy77HzDWf5PERRSTFqLd9BTUHLyY4Ji3EQLGQPaM1aeHxG1bJZH0s1Si/KwzTaTYzu6ziQiqwcr2kaYUiH+fMOxn69/BhNJVMhpQkhprc1KZuJRvXjppq0gKweencPxgS/jd0rjw==",
|
||||||
|
"fs": fs
|
||||||
|
}
|
||||||
|
# logger.info(data)
|
||||||
|
|
||||||
|
response = requests.post(
|
||||||
|
"https://passport.baidu.com/cap/log",
|
||||||
|
headers=headers,
|
||||||
|
data=data,
|
||||||
|
proxies={"http": proxy_str, "https": proxy_str},
|
||||||
|
).json()
|
||||||
|
try:
|
||||||
|
result = {
|
||||||
|
"ds": response["data"]["ds"],
|
||||||
|
"op": response["data"]["op"],
|
||||||
|
"tk": response["data"]["tk"]
|
||||||
|
}
|
||||||
|
except KeyError:
|
||||||
|
logger.error(f"验证码没转成功, response: {response=}")
|
||||||
|
time.sleep(1)
|
||||||
|
return None
|
||||||
|
logger.debug(f"{result=}")
|
||||||
|
|
||||||
|
# 检查验证码是否正确
|
||||||
|
if result["op"] != 1:
|
||||||
|
logger.error(f"op != 1, 重试")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 发送验证码请求 /cap/c 请求,获取待跳转的URL
|
||||||
|
response = requests.post(
|
||||||
|
"https://passport.baidu.com/cap/c?ak=c27bbc89afca0463650ac9bde68ebe06",
|
||||||
|
headers=headers,
|
||||||
|
json={
|
||||||
|
"tk": result["tk"],
|
||||||
|
"ds": result["ds"],
|
||||||
|
"qrsign": "",
|
||||||
|
"refer": captcha_data["referer"]
|
||||||
|
},
|
||||||
|
proxies={"http": proxy_str, "https": proxy_str},
|
||||||
|
)
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
if data["data"].get("f"):
|
||||||
|
logger.error(f"验证码失败: {data['data'].get('f')}")
|
||||||
|
return None
|
||||||
|
if data["data"].get("s"):
|
||||||
|
logger.debug("验证成功,URL:" + data["data"].get("s").get("url"))
|
||||||
|
url = data["data"].get("s").get("url")
|
||||||
|
url = url.encode("utf-8").decode("unicode-escape")
|
||||||
|
logger.success("解码后的URL:" + url)
|
||||||
|
return url
|
||||||
|
|
||||||
|
|
||||||
|
current_page = 1
|
||||||
|
while current_page < 15:
|
||||||
|
tab.get(f"https://www.baidu.com/s?wd=site%3Abaidu.com&pn={(current_page - 1) * 10}")
|
||||||
|
current_page += 1
|
||||||
|
if "wappass.baidu.com/static/captcha/tuxing_v2.html" in tab.url:
|
||||||
|
logger.debug("captcha!!!!")
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
idx = 0
|
||||||
|
while idx < 3:
|
||||||
|
idx += 1
|
||||||
|
url = verify_captcha(tab.url)
|
||||||
|
if not url:
|
||||||
|
tab.refresh()
|
||||||
|
time.sleep(3)
|
||||||
|
else:
|
||||||
|
tab.get(url)
|
||||||
|
|
||||||
|
time.sleep(30)
|
||||||
|
|
||||||
|
logger.debug(f"{captcha_data=}")
|
||||||
|
# browser.quit()
|
||||||
8
tests/test_unicode.py
Normal file
8
tests/test_unicode.py
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
|
||||||
|
import certifi
|
||||||
|
print(certifi.where())
|
||||||
|
|
||||||
|
|
||||||
|
url = r"https://www.baidu.com/s?wd=site%3Abaidu.com\u0026pn=10\u0026p_tk=30610C1sd8U0U%2BPJYAWv8nhtnx0emHFxWZ9edG%2BaRz9YAiXcODGGnlpuX%2FIMRoUmFESarFc5H8HQuG2nq8%2FVXRIsPZt%2BoxjJAmxxHNGCVs0oz%2FZSTZsdUlvw5a53dshtXQASLvZg71Bg4ZT6j%2B5a%2B%2FM3CHWuHs8cjlMBRCAX4l%2BZt8k%3D\u0026p_timestamp=1744202399\u0026p_sign=a1ee13c92f54d14d019cbdd8edcb4088\u0026p_signature=737f76b967318af4b309d30784d440c5\u0026__pc2ps_ab=30610C1sd8U0U%2BPJYAWv8nhtnx0emHFxWZ9edG%2BaRz9YAiXcODGGnlpuX%2FIMRoUmFESarFc5H8HQuG2nq8%2FVXRIsPZt%2BoxjJAmxxHNGCVs0oz%2FZSTZsdUlvw5a53dshtXQASLvZg71Bg4ZT6j%2B5a%2B%2FM3CHWuHs8cjlMBRCAX4l%2BZt8k%3D|1744202399|737f76b967318af4b309d30784d440c5|a1ee13c92f54d14d019cbdd8edcb4088"
|
||||||
|
url = url.encode("utf-8").decode("unicode-escape")
|
||||||
|
print(url)
|
||||||
Loading…
x
Reference in New Issue
Block a user