2025-03-30 22:49:37 +08:00

361 lines
18 KiB
Python

import os.path
import random
import time
from urllib.parse import urlparse, parse_qs
import execjs
import requests
from loguru import logger
from sqlmodel import Session, select
from .base import BaseReporter
from ...config.config import AppCtx
from ...models.report_urls import ReportUrlModel
from ...utils.common import get_proxies, get_all_cookies, md5, generate_random_phone_number
from ...utils.ua import random_ua
from ...utils.ydm_verify import YdmVerify
class PcReporter(BaseReporter):
def __init__(self):
self.engine_name = "PC_REPORTER"
self.database = AppCtx.g_db_engine
self.upload_pic_url = "http://jubao.baidu.com/jubao/accu/upload"
self.report_url = "https://jubao.baidu.com/jubao/accu/submit"
self.proxies = get_proxies()
self.headers = {
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-US;q=0.7,zh-TW;q=0.6',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Origin': 'https://jubao.baidu.com',
'Pragma': 'no-cache',
'Referer': "",
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': random_ua(),
'X-Requested-With': 'XMLHttpRequest',
'sec-ch-ua_wap': '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
'sec-ch-ua_wap-mobile': '?0',
'sec-ch-ua_wap-platform': '"Windows"',
"Cookie": "",
}
def run(self):
with Session(self.database) as session:
stmt = select(ReportUrlModel).where(ReportUrlModel.is_report_by_one == False)
rows: list[ReportUrlModel] = session.exec(stmt).all()
logger.info(f"[{self.engine_name}] 共计 {len(rows)} 条记录需要举报")
for row in rows:
# 选个 cookie
report_cookie = random.choice(get_all_cookies())
self.headers["Cookie"] = report_cookie
logger.debug(f"cookie: {report_cookie}")
# 构造 referer
surl = row.surl
q = row.q
token = row.token
title = row.title
domain = row.domain
# timestamp_s = {int(time.time() * 1000)} # 这里为啥要用 set ?
timestamp_s = int(time.time() * 1000)
# referer = f"https://jubao.baidu.com/jubao/accu/?surl={surl}token={token}&title={title}&q={q}&has_gw=0&has_v=0&_t8={timestamp_s}"
referer = "https://jubao.baidu.com/"
logger.debug(f"referer: {referer}, type of referer: {type(referer)}")
self.headers["Referer"] = referer
# 检查图片是否存在
img_path = f"./imgs/{domain}/{md5(surl)}.png"
wap_img_path = f"./imgs/{domain}/{md5(surl)}-wap.png"
if not all((os.path.exists(img_path), os.path.exists(wap_img_path))):
logger.debug(f"图片{img_path}{wap_img_path} 不存在")
continue
# 上传图片
img_filename = self.upload_report_pic(img_path)
wap_img_filename = self.upload_report_pic(wap_img_path)
logger.debug(f"{img_filename=}, {wap_img_filename=}")
if not all((img_filename, wap_img_filename)):
logger.debug(f"图片 {img_path}{wap_img_path} 上传失败")
continue
# 提交举报
retry = 0
while retry < 3:
verify_result = self.verify_captcha(surl, token, title, q, timestamp_s)
# 校验失败了
if verify_result["op"] != 1:
logger.debug("验证码校验失败!")
retry += 1
continue
# 走到这里说明成功了
logger.info("验证码校验成功")
ds = verify_result["ds"]
tk = verify_result["tk"]
if self.do_report(ds, tk, surl, token, title, q, img_filename):
# 举报成功了,更新数据库即可
row.is_report_by_one = True
session.add(row)
session.commit()
break
retry += 1
def do_report(self, ds, tk, surl, token, title, q, upload=''):
try:
phone = generate_random_phone_number()
# logger.error(f"surl={surl}, title={title},token={token},q={q}")
data = {
'problemtype': '11001',
'keyword': q,
'title': title,
'token': token,
'surl': surl,
'url': '',
'isnatural': '1',
'hasGw': '0',
'hasV': '0',
'buzId': '1004',
'key': 'OoMZm9dghrzLTSLyPl25VcvLBuavUH%2ByvFz%2BPsgntmcXR%2FVwnRoDbJcyjTRtW3sYx0psN8bB%2FY1ZfrCne2tgnYIowsBLFE3mSnrUfZH5L64qYVPFHFKNUZG9Ihzj5pdQIXfNkR7D1qZ%2BH5gwXOSmhlh8BgGVNOWegqhKBcHaMKs%3D%23%23htD3CYLHqsqXvHybBgjJov8Wxel8EYKJdU%2FLY4f8NrclL7Hu0%2Feuv9EKveT1%2FJ1WO53ihO%2FonJmjI9GOD3x%2BVqhQxnTlxGoOjSSOhGW5X%2B3PaNfdzOQv0epktgY1G08HNICi9ftmBMShrFImYz0ihI%2B9PpalvA3QzKROQzF85xw%3D%23%23CO3Pb0NDSZfr7Z2LcWQMhLIEbBDYBcD0tjS5Vcxo0O2e8i7%2FEz9XApnigZn5kMewEb0B1FHumPsFEJjvrJ4HwyxZ5LKkeJrmfr7SOa8v59Y57eCtmlXmQ8mHH6ER6UF%2Fv0V8YXk%2FPkLWfZfopL1DFPEkNDkrBiFgAFKW1hakUDM%3D',
'description': f"{surl} 存在大量的淫秽色情信息被百度搜索引擎收录。用于给黄色网站引流。",
'upload': upload,
'phone': phone,
'email': f"{phone}@qq.com",
'mobile_net_type': '4g',
'tk': tk,
'ds': ds,
}
response = requests.post(
self.report_url,
headers=self.headers,
data=data,
allow_redirects=False,
proxies=self.proxies,
timeout=10
)
if response.json().get('status') == 0:
logger.success(f"{surl} 举报成功")
return True
elif response.json().get("status") == 4:
logger.warning(f"{surl} 重复举报")
return True
else:
logger.warning(f"{surl} 举报失败,{response.json()=}")
return False
except Exception as e:
logger.error(f'{e}')
return False
def upload_report_pic(self, img_path: str):
"""上传图片"""
try:
with open(img_path, "rb") as fp:
files = {
"upfile": (f"{int(time.time() * 1000)}.jpg", fp, "image/png")
}
data = {
"index": 0
}
response = requests.post(
"http://jubao.baidu.com/jubao/accu/upload",
files=files,
data=data,
proxies=self.proxies,
headers=self.headers,
timeout=10
)
json_data = response.json()
data = json_data["data"]
parsed_url = urlparse(data)
query_params = parse_qs(parsed_url.query)
filename = query_params.get("filename", [None])[0]
return filename
except Exception as e:
logger.error(f"[{self.engine_name}] 上传图片 {img_path} 失败,错误: {e}")
return None
def verify_captcha(self, surl, token, title, q, timestamp_s):
# 获取 as、tk 值
try:
get_as_tk = self.post_init(surl, token, title, q, timestamp_s)
get_as = get_as_tk['as']
get_tk = get_as_tk['tk']
# 获取验证码图片下载链接、backstr
get_style_result = self.get_style(get_tk, surl, token, title, q, timestamp_s)
get_backstr = get_style_result['backstr']
pic_download_link = get_style_result['captcha']
# 下载验证码图片
self.download_captcha(pic_download_link)
rotate_angle_rate = self.get_rotate_angle_rate()
# key = self.get_key(get_as)
get_ds_tk = self.post_log(get_as, get_tk, get_backstr, rotate_angle_rate)
log_ds = get_ds_tk['ds']
log_tk = get_ds_tk['tk']
log_op = get_ds_tk['op']
result = {
'ds': log_ds,
'tk': log_tk,
'op': log_op
}
return result
except Exception as e:
logger.error(f'{e}')
return {'op': 3}
def post_init(self, surl, token, title, q, timestamp_s):
try:
url = "https://passport.baidu.com/cap/init"
data = {
"_": int(time.time() * 1000),
"refer": f"http://jubao.baidu.com/jubao/accu/?surl={surl}&token={token}&title={title}&q={q}&has_gw=0&has_v=0&_t8={timestamp_s}",
"ak": self.get_ak(),
"ver": "2",
"scene": "",
"ds": "",
"tk": "",
"as": "",
"reinit": 0
}
# logger.debug(f"{self.headers=}")
# logger.debug(f"{data=}")
response = requests.post(url, headers=self.headers, data=data, proxies=self.proxies).json()
# logger.success(response)
# tk 不对 op=3
# as 不对验证不过
result_init = {
"as": response["data"]["as"],
"tk": response["data"]["tk"]
}
# logger.info(result_init)
return result_init
except Exception as e:
logger.error(f"[post_init]验证码识别失败{e}")
return None
def get_style(self, get_tk, surl, token, title, q, timestamp_s):
"""获取验证码图片下载链接、backstr"""
# "https://wappass.baidu.com/cap/style"
try:
url = "https://passport.baidu.com/cap/style"
data = {
"_": int(time.time() * 1000),
"refer": f"http://jubao.baidu.com/jubao/accu/?surl={surl}&token={token}&title={title}&q={q}&has_gw=0&has_v=0&_t8={timestamp_s}",
"ak": self.get_ak(),
"tk": get_tk,
"scene": "",
"isios": "0",
"type": "spin",
"ver": "2"
}
response = requests.post(url, headers=self.headers, data=data, proxies=self.proxies).json()
# logger.success(response)
# backstr 不对会报错存在安全风险
result = {
"backstr": response["data"]["backstr"],
"captcha": response["data"]["captchalist"][0]["source"]["back"]["path"],
# "spin-0": response["data"]['ext']['p']['q']['spin-0'],
# "c": response["data"]['ext']['p']['c']
}
return result
except Exception as e:
logger.error(f"[get_style] 验证码识别失败{e}")
return None
def download_captcha(self, img_url):
"""下载验证码图片"""
try:
img_download = requests.get(img_url, headers=self.headers)
with open('./captcha/captcha.png', 'wb') as pic:
pic.write(img_download.content)
logger.success('captcha download success!')
except Exception as e:
logger.error(f"[download_captcha] 验证码识别失败 {e}")
@staticmethod
def get_rotate_angle_rate():
identify_distance = YdmVerify()
with open('./captcha/captcha.png', 'rb') as p:
picture = p.read()
slide_distance = identify_distance.rotate(image=picture)
# 旋转角度为
# logger.info('rotate angle: ' + str(slide_distance))
rotate_angle_rate = round(slide_distance / 360, 2)
# logger.info('rotate angle rate: ' + str(rotate_angle_rate))
return rotate_angle_rate
@staticmethod
def get_key(get_as):
try:
with open('./js/mkd_v2_link_submit.js', 'r', encoding='utf-8') as f:
ds_js = f.read()
key = execjs.compile(ds_js).call('getNewKey', get_as)
return key
except Exception as e:
logger.error(f"[get_key]验证码识别失败 {e}")
def post_log(self, get_as, tk, back_str, rotate_angle_rate):
# logger.debug(get_as)
# logger.debug(tk)
# logger.debug(back_str)
# logger.debug(rotate_angle_rate)
try:
time_log = str(int(time.time() * 1000))
with open('./js/mkd_v2_link_submit.js', 'r', encoding='utf-8') as f:
ds_js = f.read()
# print(back_str)
fs = execjs.compile(ds_js).call('getFs2', back_str, rotate_angle_rate, get_as)
# logger.info('fs: %s' % fs)
# logger.info('fs length: %s' % len(fs))
url = "https://passport.baidu.com/cap/log"
data = {
"_": time_log,
"refer": "https://aigc.baidu.com/works",
"ak": self.get_ak(),
"as": get_as,
"scene": "",
"tk": tk,
"ver": "2",
"cv": "submit",
"typeid": "spin-0",
# fuid 短时间不会变, 指纹, 不同浏览器不一样
# "Edge": "FOCoIC3q5fKa8fgJnwzbE67EJ49BGJeplOzf+4l4EOvDuu2RXBRv6R3A1AZMa49I27C0gDDLrJyxcIIeAeEhD8JYsoLTpBiaCXhLqvzbzmvy3SeAW17tKgNq/Xx+RgOdb8TWCFe62MVrDTY6lMf2GrfqL8c87KLF2qFER3obJGnCfjdYr2J6wEsox+bQtrTEGEimjy3MrXEpSuItnI4KDyOhCNLIvGcJ9TrqWJqhR97vnz96e18U/ntNdoDIMLzy/6P9rOWdIYWmTQAeeG69438PcpN++VzDmPtrURexo5YYWpVFkRs9k5n0AC3djzGRuXr1+yVZXtGGofFzxBmdr9HtaANtMMPysO2XXACLNUNkLLWJN9fLc3OAWce48tpeQQ2ufd7knx4Oo6OM0PpOVDwQcezbGX85VEmymh7f7M5kIyVl+w5yn2AY4BmLrEWEsyw9SzzW8eHQ5zYIUjiw9hXi7OMMoCL+ptOvZYbHZs0R5qLHmmDCW1M8MMX5yyJF0BV1dQvKslKnAJwZu4XCbsXKn3UXosU1U30/poiN2VeXkBPeo8+Xj/4BIoC2I7WZ6zkFa/Uwd5SvC91kvff2a/Z4OwyTQNM7ES9HmRhChdWg0SJ2xEs1aiXAit16RiTlf82esJH+X/j52G7R3ErwQeJT3QoDv64R2702+8NbGIjf1ZOfxhUCpmJqV4jeHSaHRmnKgJZsK91XhhrdJKXdsbt3phIOpxGLupULr2K+v1DNdId8/HuE0776+tTpUl7shVCeM/XWrdkhru42pifhiujnDhIblsLt8grnj5/GRqcD6ZPAXqJW3lLc0/ub9jXgvXK/EczRgKl+7/tTBkPTCrUVtajA0luHLQOrVsXuN1v0/PR3i09SuFzZJkJBKE3M6rYvPttK9NQiBxhxYWDhX82uQu2XK8+8oU3gxCIaJwsQmX/It0kaZ45PZHFqtD40uOX0sXuThvUin4N4RSI2G9d7jPkj5hbBFquQKM4S+tDJ34jmplOTrqqKT7PPVfrdgd4OkK13pEy86BsJ8M0gKXgtivUgM8Bjl1m/pkg0SuDyntWLdrmMxcZYvgySvSSwQ2Qtm8EkKHIMyR/XgfHnpX5vadGpRMro2qaE8u+x8w1gJHIRKib2u6Q1JtQiZE1Rde/vRx8xKfg6uYR37n0BvfgJE5+KbeuwCyAvJRGUA2fpt0VClIfV0m2PRG7bvH00OODKY6cFi7NgWAK6Jc1G4Ugkfp7W8I0ZYwNpTTxVoxIIBF37aBhyiPWPAOeYXBqA",
# Chrome: "FOCoIC3q5fKa8fgJnwzbE67EJ49BGJeplOzf+4l4EOvDuu2RXBRv6R3A1AZMa49IZbsw3/U3NYEqD0LjhKzgMn8fIES5OyXlgwN5I+F8wHowpWWfXkQJw8/9AsO5Q2VOvnc2JlHGIlGS8Vq2z4OA80lVLon08EG3PPxkVZGm39fDi2exK9NDrZB+tNLX6ISxE5PzBgXpCOJ6oP9F1B0OBWaCMD/m01n8FhdDNCvP8EO5cetU79+pgL+ECRdtN6V4VElGJE0mxV4+4Zq4Jf/Xe/q8CkoTNf7Ti1glGYmN32UM9dg0uX+VzET/mmTRe4Dt+MuVHSzsI/bKCjPbpaOqfM8UsxDJUG9hyrGZ8QHa1kC04aTxkkTxI275dv3+ijS1zkWOdjFiy1eD/0R8HcRWYp2smk9EmXBkIAHL4H0gC9lQtdjey37/kyl4JA9Fp4zjuVO0arsD8MrGy1divU++B1KdawGqXpnbOcHZ3CctNGrpgmswaScc6DNWb34jFj0X3tdRE0uuHuqiYa5BClFS2V0TCorKi4CobgR419xWaX8IKLJiaNNLOShWdZdlQO2DXXVxcinzKHqUvWTYx45jsiUVlY78AHQGol6CJLQQ8Q797MShlazvdSwPXgJP5z0uMJp9L+3x/Y2GGhW5sit55sFuMXafALTYf69FCUw5+nVIRs150a4+KK+tA0Eu7Itiu3dM2pflKYWwPE6SDZznyejQ08vd+HpXRB/zhfSUcIYlT5gFEiMIA6SXZCo/XT7vC8D3gHdN+yr46XdVol/WkjFQof0JQH/Vhjj5C1xcAyNxq/VVBT01vdKk6zo6c08e84FEVMLd0m3XWtjFOYu7wRI7lldw2pSxyGnWvA4aiYWcWvvKNJtqB8wHqc5RPr9KRzhbxJnTM5K1vTx4xT/1ZUR3pU7nQKZo/4kP9XycIr/Jg3XMRSnqCBUJlagKAFPt2HF0LdsSk4WWcldb97Ar584nVGbSjPXEUVH0VgbUEm+dADzPoLP+NPMYOyhwgfADiqWaXyKT4UNESYXsPBkdGk6mLCaNSEQsDN1G2677Se3qjzDcyXBnEmHEFptRbmyJzKJ73veHPqfFYtsHO9jH0XnhYk8zKdRuqQ7dnuNIDwxm3UCPo22uFI0ZcgPvQm01s+8jYiMEFJDVra9jWyWTdMpMuhT3p2yYLf70CvUwIkw=",
# fuid.length = 1280, length 235 变化
# FOCoIC3q5fKa8fgJnwzbE67EJ49BGJeplOzf+4l4EOvDuu2RXBRv6R3A1AZMa49I27C0gDDLrJyxcIIeAeEhD8JYsoLTpBiaCXhLqvzbzmvy3SeAW17tKgNq/Xx+RgOdb8TWCFe62MVrDTY6lMf2GrfqL8c87KLF2qFER3obJGnCfjdYr2J6wEsox+bQtrTEGEimjy3MrXEpSuItnI4KDyOhCNLIvGcJ9TrqWJqhR97
"fuid": "FOCoIC3q5fKa8fgJnwzbE67EJ49BGJeplOzf+4l4EOvDuu2RXBRv6R3A1AZMa49I27C0gDDLrJyxcIIeAeEhD8JYsoLTpBiaCXhLqvzbzmvy3SeAW17tKgNq/Xx+RgOdb8TWCFe62MVrDTY6lMf2GrfqL8c87KLF2qFER3obJGnfaJTn/Ne60I9LwR04t6XmGEimjy3MrXEpSuItnI4KD0FJKzTbw1AN69fBnzR2FuvMmmQZ+1zgJ72wdcVU+mcQxiE2ir0+TEYgjPJt1Qa3K1mLi+P4IWJeag2lvxB4yJ/GgLbz7OSojK1zRbqBESR5Pdk2R9IA3lxxOVzA+Iw1TWLSgWjlFVG9Xmh1+20oPSbrzvDjYtVPmZ+9/6evcXmhcO1Y58MgLozKnaQIaLfWRPAn9I0uOqAMff6fuUeWcH1mRYoTw2Nhr4J4agZi377iM/izL6cVCGRy2F8c0VpEvM5FjnYxYstXg/9EfB3EVmKAfzNRIeToJ5YV9twMcgdmlV1Uhhp5FAe6gNJIUptp7EMAaXYKm11G+JVPszQFdp9AJLcm4YSsYUXkaPI2Tl66J246cmjWQDTahAOINR5rXR5r/7VVI1RMZ8gb40q7az7vCK56XLooKT5a+rsFrf5Zu0yyCiiagElhrTEOtNdBJJq8eHwEHuFBni9ahSwpC7lbKkUwaKH69tf0DFV7hJROiLETSFloIVkHdy3+I2JUr1LsplAz0hMkWt/tE4tXVUV7QcTDTZWS/2mCoS/GV3N9awQ6iM6hs/BWjlgnEa1+5iP7WSc7RJ34FaE5PsyGXyoCWdXwNRGSZPSvVtB/Ea6w5FKazXcZ/j40FJv+iLGBn3nkkgHlne61I8I7KhtQgIkmBMJIjPMkS/L051MeqdGScsKYTJuSucgI5c3+79eVH+y2TvbOTuuHv1uGxwXFb2atIU1ZYPbmmXculmizKcKI/s44qf8uM8iBZLGkKeVyL74aPyLkg7Gk359g98BIGN/ZzJR/h+Y6AyFx+HlMoYJnS06dVmqFbvlCtSdGylKQ5f8eWtxPkJGqOFtWjIVteQYMsH/AaSJonqw+WLiZvGjYfm9p0alEyujapoTy77HzDcUoU1wUSXa5xS/Z6hXEr2OnLi0LdPVcGjz8lpLcdVeSfm9p0alEyujapoTy77HzDWf5PERRSTFqLd9BTUHLyY4Ji3EQLGQPaM1aeHxG1bJZH0s1Si/KwzTaTYzu6ziQiqwcr2kaYUiH+fMOxn69/BhNJVMhpQkhprc1KZuJRvXjppq0gKweencPxgS/jd0rjw==",
"fs": fs
}
# logger.info(data)
response = requests.post(url, headers=self.headers, data=data, proxies=self.proxies).json()
# logger.success(response)
result = {
"ds": response["data"]["ds"],
"op": response["data"]["op"],
"tk": response["data"]["tk"]
}
return result
except Exception as e:
logger.error(f"[post_log] 验证码识别失败 {e}")
@staticmethod
def get_ak():
# 定值
ak = "76AKmP4xDQjB3vAIPef3KxOlJZWCpw64"
return ak