diff --git a/app/engines/crawl_engine.py b/app/engines/crawl_engine.py index 3a00a08..34c3ab8 100644 --- a/app/engines/crawl_engine.py +++ b/app/engines/crawl_engine.py @@ -188,7 +188,7 @@ class CrawlEngine: session.add(domain_model) session.commit() - self.ev.wait(60) + self.ev.wait(10) logger.info("crawl worker stop!") diff --git a/app/engines/evidence_engine.py b/app/engines/evidence_engine.py index 40c3ded..59d3e24 100644 --- a/app/engines/evidence_engine.py +++ b/app/engines/evidence_engine.py @@ -90,7 +90,20 @@ class EvidenceEngine: # Part1 获取证据截图 logger.debug(f"开始获取 {surl} 在百度搜索中的截图") - img_path, tab = self.get_screenshot(target) + img_path, tab, has_result = self.get_screenshot(target) + if not has_result: + # 如果没有搜到结果,直接把 has_evidence 标记为 true 就行了 + with Session(self.database) as session: + stmt = select(ReportUrlModel).where(ReportUrlModel.id == target["id"]) + model: ReportUrlModel = session.exec(stmt).first() + if not model: + logger.error(f"{target['id']} 记录不存在,跳过...") + return None + # 更新数据 + model.has_evidence = True + session.add(model) + session.commit() + return None if not img_path: return None @@ -137,7 +150,7 @@ class EvidenceEngine: except Exception as e: logger.error(f"获取证据截图和举报链接失败: {e}") - def get_screenshot(self, target: dict) -> tuple[str | None, MixTab]: + def get_screenshot(self, target: dict) -> tuple[str | None, MixTab, bool]: """获取搜索页面的截图,返回 img_path """ search_keyword = target["surl"].lstrip("https://").lstrip("http://") tab = self.dp_engine.browser.new_tab() @@ -147,12 +160,13 @@ class EvidenceEngine: if "未找到相关结果" in tab.html: logger.info(f"没有关于 {search_keyword} 的数据") - return None, tab + return None, tab, False # 图片的存储路径 # 截完图先不要关闭 tab,别的地方还要用 img_path = f"./imgs/{target['domain']}/{md5(target['surl'])}.png" - return self.do_screenshot(tab, img_path) + img_path, tab = self.do_screenshot(tab, img_path) + return img_path, tab, True def get_wap_screenshot(self, target: dict) -> tuple[str | None, MixTab]: """用 wap dp 再截一张 surl 本身的图""" diff --git a/app/engines/report_engine.py b/app/engines/report_engine.py index f3bedde..fb9ddd1 100644 --- a/app/engines/report_engine.py +++ b/app/engines/report_engine.py @@ -47,6 +47,18 @@ class Reporter: continue def stop(self): + + for mode in self.mode: + if mode == "pc": + self.reporters["pc"].stop() + elif mode == "wap": + self.reporters["wap"].stop() + elif mode == "site": + self.reporters["site"].stop() + else: + logger.error(f"参数错误: {mode}") + continue + self.status = 0 self.ev.set() diff --git a/app/engines/reporters/pc_reporter.py b/app/engines/reporters/pc_reporter.py index 815b305..01cd42a 100644 --- a/app/engines/reporters/pc_reporter.py +++ b/app/engines/reporters/pc_reporter.py @@ -51,6 +51,7 @@ class PcReporter(BaseReporter): def stop(self): self.status = 0 self.ev.set() + logger.warning(f"{self.engine_name} 收到退出消息,等待当前任务完成后退出") def run(self): with Session(self.database) as session: @@ -214,20 +215,26 @@ class PcReporter(BaseReporter): # 获取 as、tk 值 try: get_as_tk = self.post_init(surl, token, title, q, timestamp_s) + # logger.debug(f"{get_as_tk=}") get_as = get_as_tk['as'] get_tk = get_as_tk['tk'] # 获取验证码图片下载链接、backstr get_style_result = self.get_style(get_tk, surl, token, title, q, timestamp_s) + # logger.debug(f"{get_style_result=}") get_backstr = get_style_result['backstr'] pic_download_link = get_style_result['captcha'] # 下载验证码图片 self.download_captcha(pic_download_link) rotate_angle_rate = self.get_rotate_angle_rate() + logger.debug(f"{rotate_angle_rate=}") # key = self.get_key(get_as) + if not rotate_angle_rate: + return {'op': 3} get_ds_tk = self.post_log(get_as, get_tk, get_backstr, rotate_angle_rate) + logger.debug(f"{get_ds_tk=}") log_ds = get_ds_tk['ds'] log_tk = get_ds_tk['tk'] log_op = get_ds_tk['op'] @@ -238,7 +245,7 @@ class PcReporter(BaseReporter): } return result except Exception as e: - logger.error(f'{e}') + logger.exception(f'{e}') return {'op': 3} def post_init(self, surl, token, title, q, timestamp_s): @@ -316,9 +323,13 @@ class PcReporter(BaseReporter): with open('./captcha/captcha.png', 'rb') as p: picture = p.read() slide_distance = identify_distance.rotate(image=picture) + logger.debug(f"{slide_distance=}") + if not slide_distance: + return None # 旋转角度为 # logger.info('rotate angle: ' + str(slide_distance)) rotate_angle_rate = round(slide_distance / 360, 2) + logger.debug(f"{rotate_angle_rate=}") # logger.info('rotate angle rate: ' + str(rotate_angle_rate)) return rotate_angle_rate diff --git a/app/engines/reporters/site_reporter.py b/app/engines/reporters/site_reporter.py index ed9c38c..60e9dce 100644 --- a/app/engines/reporters/site_reporter.py +++ b/app/engines/reporters/site_reporter.py @@ -46,8 +46,10 @@ class SiteReporter(BaseReporter): self.token_pattern = r'name="submit_token" value="(.*?)"' def stop(self): + # logger.debug(f"{self.engine_name} stop called.") self.status = 0 self.ev.set() + logger.warning(f"{self.engine_name} 收到退出消息,等待当前任务完成后退出") def run(self): """实现 PC 端的举报逻辑""" diff --git a/app/engines/reporters/wap_reporter.py b/app/engines/reporters/wap_reporter.py index 51ad315..cb65205 100644 --- a/app/engines/reporters/wap_reporter.py +++ b/app/engines/reporters/wap_reporter.py @@ -46,6 +46,7 @@ class WapReporter(BaseReporter): def stop(self): self.status = 0 self.ev.set() + logger.warning(f"{self.engine_name} 收到退出消息,等待当前任务完成后退出") def run(self): """实现 WAP 端的举报逻辑""" diff --git a/app/utils/ydm_verify.py b/app/utils/ydm_verify.py index 607830c..6bd175d 100644 --- a/app/utils/ydm_verify.py +++ b/app/utils/ydm_verify.py @@ -2,6 +2,8 @@ import base64 import json import requests +from loguru import logger + class YdmVerify(object): _custom_url = "https://www.jfbym.com/api/YmServer/customApi" @@ -17,4 +19,11 @@ class YdmVerify(object): "type": "90009" } resp = requests.post(self._custom_url, headers=self._headers, data=json.dumps(payload)) + logger.debug(f"{resp.json()=}") + + response_data = resp.json() + if response_data.get("code") == 10002: + logger.error(f'{response_data.get("msg")}') + return None + return resp.json()['data']['data'] \ No newline at end of file diff --git a/app/web/controller/report.py b/app/web/controller/report.py index c0c6fc9..edd4cfa 100644 --- a/app/web/controller/report.py +++ b/app/web/controller/report.py @@ -1,6 +1,7 @@ from typing import Annotated from fastapi import APIRouter, Query +from loguru import logger from app.web.request.report_request import AddUrlsRequest, CollectEvidenceRequest, ReportRequest, GetUrlListRequest from app.web.service.domain_service import DomainService @@ -10,8 +11,11 @@ router = APIRouter(prefix="/api/urls", tags=["URL管理"]) @router.get("/v1/list") -async def get_all_urls(request: Annotated[GetUrlListRequest, Query()]): +def get_all_urls(request: Annotated[GetUrlListRequest, Query()]): """获取所有的URL,支持根据域名、状态进行过滤,不传则返回全部数据,支持分页""" + + logger.debug(f"{request=}") + return ReportURLService.get_list( request.domain, request.surl, @@ -25,7 +29,7 @@ async def get_all_urls(request: Annotated[GetUrlListRequest, Query()]): @router.post("/v1/add") -async def add_urls(request: AddUrlsRequest): +def add_urls(request: AddUrlsRequest): """ 手动添加 URL 到域名中,支持批量添加 格式 [ @@ -58,7 +62,7 @@ async def add_urls(request: AddUrlsRequest): @router.post("/v1/evidence") -async def collect_evidence(request: CollectEvidenceRequest): +def collect_evidence(request: CollectEvidenceRequest): """ 强制手动触发证据收集任务,支持批量传入,已经收集过的 URL 也要强制收集 TODO:本来应该需要使用任务队列的,为了简单先把数据库的相关标记改为 0 ,也能达到一样的效果 @@ -68,10 +72,11 @@ async def collect_evidence(request: CollectEvidenceRequest): @router.post("/v1/report") -async def report(request: ReportRequest): +def report(request: ReportRequest): """举报指定的URL,支持批量传入 id 批量举报 先通过改数据库,然后等引擎自己调度实现 """ + logger.debug(f"{request=}") return ReportURLService.batch_update_report_flag( request.ids, request.report_by_one, diff --git a/app/web/request/report_request.py b/app/web/request/report_request.py index 06c838c..770222f 100644 --- a/app/web/request/report_request.py +++ b/app/web/request/report_request.py @@ -6,10 +6,10 @@ from pydantic import BaseModel, Field class GetUrlListRequest(BaseModel): domain: str = "" surl: str = "" - is_report_by_one: Optional[bool] = False - is_report_by_site: Optional[bool] = False - is_report_by_wap: Optional[bool] = False - has_evidence: Optional[bool] = False + is_report_by_one: Optional[int] = 2 + is_report_by_site: Optional[int] = 2 + is_report_by_wap: Optional[int] = 2 + has_evidence: Optional[int] = 2 page: int = Field(default=1, gt=0) size: int = Field(default=50, gt=0) diff --git a/app/web/service/report_service.py b/app/web/service/report_service.py index 5f3c53b..a6517f7 100644 --- a/app/web/service/report_service.py +++ b/app/web/service/report_service.py @@ -15,8 +15,9 @@ class ReportURLService: @classmethod def get_list( - cls, domain: str, surl: str, is_report_by_one: Optional[bool], is_report_by_site: Optional[bool], - is_report_by_wap: Optional[bool], has_evidence: Optional[bool], page: int, size: int): + cls, domain: str, surl: str, is_report_by_one: Optional[int], is_report_by_site: Optional[int], + is_report_by_wap: Optional[int], has_evidence: Optional[int], page: int, size: int + ): with Session(AppCtx.g_db_engine) as session: stmt = select(ReportUrlModel) @@ -27,22 +28,22 @@ class ReportURLService: if surl: stmt = stmt.where(ReportUrlModel.surl.like(f"%{surl}%")) total_stmt = total_stmt.where(ReportUrlModel.surl.like(f"%{surl}%")) - if is_report_by_one is not None: + if is_report_by_one and is_report_by_one != 2: stmt = stmt.where(ReportUrlModel.is_report_by_one == is_report_by_one) total_stmt = total_stmt.where(ReportUrlModel.is_report_by_one == is_report_by_one) - if is_report_by_site is not None: + if is_report_by_site and is_report_by_site != 2: stmt = stmt.where(ReportUrlModel.is_report_by_site == is_report_by_site) total_stmt = total_stmt.where(ReportUrlModel.is_report_by_site == is_report_by_site) - if is_report_by_wap is not None: + if is_report_by_wap and is_report_by_wap != 2: stmt = stmt.where(ReportUrlModel.is_report_by_wap == is_report_by_wap) total_stmt = total_stmt.where(ReportUrlModel.is_report_by_wap == is_report_by_wap) - if has_evidence is not None: + if has_evidence and has_evidence != 2: stmt = stmt.where(ReportUrlModel.has_evidence == has_evidence) total_stmt = total_stmt.where(ReportUrlModel.has_evidence == has_evidence) # 设置分页 stmt = stmt.offset((page - 1) * size).limit(size) - + # logger.debug(f"{str(stmt)=}") try: total = session.exec(total_stmt).first() urls = session.exec(stmt).all() @@ -103,10 +104,12 @@ class ReportURLService: stmt = update(ReportUrlModel).where(ReportUrlModel.id.in_(ids)) if report_by_wap: stmt = stmt.values(is_report_by_wap=False) - elif report_by_site: + if report_by_site: stmt = stmt.values(is_report_by_site=False) - elif report_by_one: + if report_by_one: stmt = stmt.values(is_report_by_one=False) + + logger.debug(f"{str(stmt)=}") session.exec(stmt) session.commit() return ApiResult.ok(len(ids)) diff --git a/fe/components.d.ts b/fe/components.d.ts index 65b1d0c..d5d5be1 100644 --- a/fe/components.d.ts +++ b/fe/components.d.ts @@ -12,16 +12,23 @@ declare module 'vue' { EditDomainDialog: typeof import('./src/components/EditDomainDialog.vue')['default'] ImportDomainDialog: typeof import('./src/components/ImportDomainDialog.vue')['default'] NButton: typeof import('naive-ui')['NButton'] + NCard: typeof import('naive-ui')['NCard'] NCheckbox: typeof import('naive-ui')['NCheckbox'] NConfigProvider: typeof import('naive-ui')['NConfigProvider'] NDataTable: typeof import('naive-ui')['NDataTable'] NDialogProvider: typeof import('naive-ui')['NDialogProvider'] + NDropdown: typeof import('naive-ui')['NDropdown'] NForm: typeof import('naive-ui')['NForm'] NFormItem: typeof import('naive-ui')['NFormItem'] + NFormItemGi: typeof import('naive-ui')['NFormItemGi'] + NGrid: typeof import('naive-ui')['NGrid'] NInput: typeof import('naive-ui')['NInput'] NInputNumber: typeof import('naive-ui')['NInputNumber'] NModal: typeof import('naive-ui')['NModal'] NPagination: typeof import('naive-ui')['NPagination'] + NSelect: typeof import('naive-ui')['NSelect'] + NSpace: typeof import('naive-ui')['NSpace'] + NSwitch: typeof import('naive-ui')['NSwitch'] NTag: typeof import('naive-ui')['NTag'] NTooltip: typeof import('naive-ui')['NTooltip'] RouterLink: typeof import('vue-router')['RouterLink'] diff --git a/fe/src/App.vue b/fe/src/App.vue index 5c0ed06..1a884ff 100644 --- a/fe/src/App.vue +++ b/fe/src/App.vue @@ -1,6 +1,13 @@ - + - + - - 立即开始采集 - + 立即开始采集 @@ -116,4 +138,4 @@ const handleClose = () => { - \ No newline at end of file + diff --git a/fe/src/components/EditDomainDialog.vue b/fe/src/components/EditDomainDialog.vue index 488e698..0be6d33 100644 --- a/fe/src/components/EditDomainDialog.vue +++ b/fe/src/components/EditDomainDialog.vue @@ -1,102 +1,115 @@ - - - - 你正在批量修改 {{ domainIds?.length }} 个域名的采集间隔。 - + + + + 你正在批量修改 {{ domainIds?.length }} 个域名的采集间隔。 + - - - - - - - 取消 - 确定 - - - \ No newline at end of file + + + + + + + 取消 + 确定 + + + diff --git a/fe/src/components/ImportDomainDialog.vue b/fe/src/components/ImportDomainDialog.vue index f239cd3..9024db9 100644 --- a/fe/src/components/ImportDomainDialog.vue +++ b/fe/src/components/ImportDomainDialog.vue @@ -1,14 +1,24 @@ - - + + @@ -109,9 +131,7 @@ const handleClose = () => { - - 立即开始采集 - + 立即开始采集 @@ -121,4 +141,4 @@ const handleClose = () => { - \ No newline at end of file + diff --git a/fe/src/main.ts b/fe/src/main.ts index 147e53b..1cb8b67 100644 --- a/fe/src/main.ts +++ b/fe/src/main.ts @@ -1,15 +1,15 @@ import './main.css' -import {createApp} from 'vue' -import {createPinia} from 'pinia' +import { createApp } from 'vue' +import { createPinia } from 'pinia' import App from './App.vue' import router from './router' -import axios from "axios" +import axios from 'axios' -import "vfonts/Lato.css" -import "vfonts/IBMPlexMono.css" +import 'vfonts/Lato.css' +import 'vfonts/IBMPlexMono.css' const app = createApp(App) @@ -19,7 +19,7 @@ app.use(router) const axiosInstance = axios.create({ withCredentials: true, timeout: 9000, - timeoutErrorMessage: "E_NETWORK_TIMEOUT", + timeoutErrorMessage: 'E_NETWORK_TIMEOUT', }) app.provide('axios', axiosInstance) diff --git a/fe/src/views/DomainManager.vue b/fe/src/views/DomainManager.vue index f2d1d64..fca6c0b 100644 --- a/fe/src/views/DomainManager.vue +++ b/fe/src/views/DomainManager.vue @@ -1,28 +1,43 @@ @@ -347,6 +400,20 @@ onMounted(async () => { 立即采集 + + + + + + + + + + 筛选 + 重置 + + + @@ -360,5 +427,4 @@ onMounted(async () => { - diff --git a/fe/src/views/UrlManager.vue b/fe/src/views/UrlManager.vue index 09de175..615285d 100644 --- a/fe/src/views/UrlManager.vue +++ b/fe/src/views/UrlManager.vue @@ -1,7 +1,379 @@ - + - DomainManager + + URL管理 + + + + + + + + + + + + + + + + + + + + + + + + + + 重置 + 搜索 + + + + + + + + + + 批量举报 ({{ checkedRowKeys.length }}) + + + 批量收集证据 ({{ checkedRowKeys.length }}) + + + + + + + + + + +
DomainManager