import os.path import random import time from urllib.parse import urlparse, parse_qs import execjs import requests from loguru import logger from sqlmodel import Session, select from .base import BaseReporter from ...config.config import AppCtx from ...models.report_urls import ReportUrlModel from ...utils.common import get_proxies, get_all_cookies, md5, generate_random_phone_number from ...utils.ua import random_ua from ...utils.ydm_verify import YdmVerify class PcReporter(BaseReporter): def __init__(self): self.engine_name = "PC_REPORTER" self.database = AppCtx.g_db_engine self.upload_pic_url = "http://jubao.baidu.com/jubao/accu/upload" self.report_url = "https://jubao.baidu.com/jubao/accu/submit" self.proxies = get_proxies() self.headers = { 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-US;q=0.7,zh-TW;q=0.6', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive', 'Origin': 'https://jubao.baidu.com', 'Pragma': 'no-cache', 'Referer': "", 'Sec-Fetch-Dest': 'empty', 'Sec-Fetch-Mode': 'cors', 'Sec-Fetch-Site': 'same-origin', 'User-Agent': random_ua(), 'X-Requested-With': 'XMLHttpRequest', 'sec-ch-ua_wap': '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"', 'sec-ch-ua_wap-mobile': '?0', 'sec-ch-ua_wap-platform': '"Windows"', "Cookie": "", } def run(self): with Session(self.database) as session: stmt = select(ReportUrlModel).where(ReportUrlModel.is_report_by_one == False) rows: list[ReportUrlModel] = session.exec(stmt).all() logger.info(f"[{self.engine_name}] 共计 {len(rows)} 条记录需要举报") for row in rows: # 选个 cookie report_cookie = random.choice(get_all_cookies()) self.headers["Cookie"] = report_cookie logger.debug(f"cookie: {report_cookie}") # 构造 referer surl = row.surl q = row.q token = row.token title = row.title domain = row.domain # timestamp_s = {int(time.time() * 1000)} # 这里为啥要用 set ? timestamp_s = int(time.time() * 1000) # referer = f"https://jubao.baidu.com/jubao/accu/?surl={surl}token={token}&title={title}&q={q}&has_gw=0&has_v=0&_t8={timestamp_s}" referer = "https://jubao.baidu.com/" logger.debug(f"referer: {referer}, type of referer: {type(referer)}") self.headers["Referer"] = referer # 检查图片是否存在 img_path = f"./imgs/{domain}/{md5(surl)}.png" if not os.path.exists(img_path): logger.warning(f"图片{img_path}不存在") continue wap_img_path = f"./imgs/{domain}/{md5(surl)}-wap.png" if AppCtx.g_app_config.wap_screenshot: if not os.path.exists(wap_img_path): logger.warning(f"图片{wap_img_path}不存在") continue # 上传图片 img_filename = self.upload_report_pic(img_path) logger.debug(f"{img_filename=}") if not img_filename: logger.warning(f"图片 {img_path} 上传失败") continue if AppCtx.g_app_config.wap_screenshot: wap_img_filename = self.upload_report_pic(wap_img_path) logger.debug(f"{wap_img_filename=}") if not wap_img_filename: logger.warning(f"图片 {wap_img_filename} 上传失败") # 提交举报 retry = 0 while retry < 3: verify_result = self.verify_captcha(surl, token, title, q, timestamp_s) # 校验失败了 if verify_result["op"] != 1: logger.debug("验证码校验失败!") retry += 1 continue # 走到这里说明成功了 logger.info("验证码校验成功") ds = verify_result["ds"] tk = verify_result["tk"] if self.do_report(ds, tk, surl, token, title, q, img_filename): # 举报成功了,更新数据库即可 row.is_report_by_one = True session.add(row) session.commit() break retry += 1 def do_report(self, ds, tk, surl, token, title, q, upload=''): try: phone = generate_random_phone_number() # logger.error(f"surl={surl}, title={title},token={token},q={q}") data = { 'problemtype': '11001', 'keyword': q, 'title': title, 'token': token, 'surl': surl, 'url': '', 'isnatural': '1', 'hasGw': '0', 'hasV': '0', 'buzId': '1004', 'key': 'OoMZm9dghrzLTSLyPl25VcvLBuavUH%2ByvFz%2BPsgntmcXR%2FVwnRoDbJcyjTRtW3sYx0psN8bB%2FY1ZfrCne2tgnYIowsBLFE3mSnrUfZH5L64qYVPFHFKNUZG9Ihzj5pdQIXfNkR7D1qZ%2BH5gwXOSmhlh8BgGVNOWegqhKBcHaMKs%3D%23%23htD3CYLHqsqXvHybBgjJov8Wxel8EYKJdU%2FLY4f8NrclL7Hu0%2Feuv9EKveT1%2FJ1WO53ihO%2FonJmjI9GOD3x%2BVqhQxnTlxGoOjSSOhGW5X%2B3PaNfdzOQv0epktgY1G08HNICi9ftmBMShrFImYz0ihI%2B9PpalvA3QzKROQzF85xw%3D%23%23CO3Pb0NDSZfr7Z2LcWQMhLIEbBDYBcD0tjS5Vcxo0O2e8i7%2FEz9XApnigZn5kMewEb0B1FHumPsFEJjvrJ4HwyxZ5LKkeJrmfr7SOa8v59Y57eCtmlXmQ8mHH6ER6UF%2Fv0V8YXk%2FPkLWfZfopL1DFPEkNDkrBiFgAFKW1hakUDM%3D', 'description': f"{surl} 存在大量的淫秽色情信息被百度搜索引擎收录。用于给黄色网站引流。", 'upload': upload, 'phone': phone, 'email': f"{phone}@qq.com", 'mobile_net_type': '4g', 'tk': tk, 'ds': ds, } response = requests.post( self.report_url, headers=self.headers, data=data, allow_redirects=False, proxies=self.proxies, timeout=10 ) if response.json().get('status') == 0: logger.success(f"{surl} 举报成功") return True elif response.json().get("status") == 4: logger.warning(f"{surl} 重复举报") return True else: logger.warning(f"{surl} 举报失败,{response.json()=}") return False except Exception as e: logger.error(f'{e}') return False def upload_report_pic(self, img_path: str): """上传图片""" try: with open(img_path, "rb") as fp: files = { "upfile": (f"{int(time.time() * 1000)}.jpg", fp, "image/png") } data = { "index": 0 } response = requests.post( "http://jubao.baidu.com/jubao/accu/upload", files=files, data=data, proxies=self.proxies, headers=self.headers, timeout=10 ) json_data = response.json() data = json_data["data"] parsed_url = urlparse(data) query_params = parse_qs(parsed_url.query) filename = query_params.get("filename", [None])[0] return filename except Exception as e: logger.error(f"[{self.engine_name}] 上传图片 {img_path} 失败,错误: {e}") return None def verify_captcha(self, surl, token, title, q, timestamp_s): # 获取 as、tk 值 try: get_as_tk = self.post_init(surl, token, title, q, timestamp_s) get_as = get_as_tk['as'] get_tk = get_as_tk['tk'] # 获取验证码图片下载链接、backstr get_style_result = self.get_style(get_tk, surl, token, title, q, timestamp_s) get_backstr = get_style_result['backstr'] pic_download_link = get_style_result['captcha'] # 下载验证码图片 self.download_captcha(pic_download_link) rotate_angle_rate = self.get_rotate_angle_rate() # key = self.get_key(get_as) get_ds_tk = self.post_log(get_as, get_tk, get_backstr, rotate_angle_rate) log_ds = get_ds_tk['ds'] log_tk = get_ds_tk['tk'] log_op = get_ds_tk['op'] result = { 'ds': log_ds, 'tk': log_tk, 'op': log_op } return result except Exception as e: logger.error(f'{e}') return {'op': 3} def post_init(self, surl, token, title, q, timestamp_s): try: url = "https://passport.baidu.com/cap/init" data = { "_": int(time.time() * 1000), "refer": f"http://jubao.baidu.com/jubao/accu/?surl={surl}&token={token}&title={title}&q={q}&has_gw=0&has_v=0&_t8={timestamp_s}", "ak": self.get_ak(), "ver": "2", "scene": "", "ds": "", "tk": "", "as": "", "reinit": 0 } # logger.debug(f"{self.headers=}") # logger.debug(f"{data=}") response = requests.post(url, headers=self.headers, data=data, proxies=self.proxies).json() # logger.success(response) # tk 不对 op=3 # as 不对验证不过 result_init = { "as": response["data"]["as"], "tk": response["data"]["tk"] } # logger.info(result_init) return result_init except Exception as e: logger.error(f"[post_init]验证码识别失败{e}") return None def get_style(self, get_tk, surl, token, title, q, timestamp_s): """获取验证码图片下载链接、backstr""" # "https://wappass.baidu.com/cap/style" try: url = "https://passport.baidu.com/cap/style" data = { "_": int(time.time() * 1000), "refer": f"http://jubao.baidu.com/jubao/accu/?surl={surl}&token={token}&title={title}&q={q}&has_gw=0&has_v=0&_t8={timestamp_s}", "ak": self.get_ak(), "tk": get_tk, "scene": "", "isios": "0", "type": "spin", "ver": "2" } response = requests.post(url, headers=self.headers, data=data, proxies=self.proxies).json() # logger.success(response) # backstr 不对会报错存在安全风险 result = { "backstr": response["data"]["backstr"], "captcha": response["data"]["captchalist"][0]["source"]["back"]["path"], # "spin-0": response["data"]['ext']['p']['q']['spin-0'], # "c": response["data"]['ext']['p']['c'] } return result except Exception as e: logger.error(f"[get_style] 验证码识别失败{e}") return None def download_captcha(self, img_url): """下载验证码图片""" try: img_download = requests.get(img_url, headers=self.headers) with open('./captcha/captcha.png', 'wb') as pic: pic.write(img_download.content) logger.success('captcha download success!') except Exception as e: logger.error(f"[download_captcha] 验证码识别失败 {e}") @staticmethod def get_rotate_angle_rate(): identify_distance = YdmVerify() with open('./captcha/captcha.png', 'rb') as p: picture = p.read() slide_distance = identify_distance.rotate(image=picture) # 旋转角度为 # logger.info('rotate angle: ' + str(slide_distance)) rotate_angle_rate = round(slide_distance / 360, 2) # logger.info('rotate angle rate: ' + str(rotate_angle_rate)) return rotate_angle_rate @staticmethod def get_key(get_as): try: with open('./js/mkd_v2_link_submit.js', 'r', encoding='utf-8') as f: ds_js = f.read() key = execjs.compile(ds_js).call('getNewKey', get_as) return key except Exception as e: logger.error(f"[get_key]验证码识别失败 {e}") def post_log(self, get_as, tk, back_str, rotate_angle_rate): # logger.debug(get_as) # logger.debug(tk) # logger.debug(back_str) # logger.debug(rotate_angle_rate) try: time_log = str(int(time.time() * 1000)) with open('./js/mkd_v2_link_submit.js', 'r', encoding='utf-8') as f: ds_js = f.read() # print(back_str) fs = execjs.compile(ds_js).call('getFs2', back_str, rotate_angle_rate, get_as) # logger.info('fs: %s' % fs) # logger.info('fs length: %s' % len(fs)) url = "https://passport.baidu.com/cap/log" data = { "_": time_log, "refer": "https://aigc.baidu.com/works", "ak": self.get_ak(), "as": get_as, "scene": "", "tk": tk, "ver": "2", "cv": "submit", "typeid": "spin-0", # fuid 短时间不会变, 指纹, 不同浏览器不一样 # "Edge": "FOCoIC3q5fKa8fgJnwzbE67EJ49BGJeplOzf+4l4EOvDuu2RXBRv6R3A1AZMa49I27C0gDDLrJyxcIIeAeEhD8JYsoLTpBiaCXhLqvzbzmvy3SeAW17tKgNq/Xx+RgOdb8TWCFe62MVrDTY6lMf2GrfqL8c87KLF2qFER3obJGnCfjdYr2J6wEsox+bQtrTEGEimjy3MrXEpSuItnI4KDyOhCNLIvGcJ9TrqWJqhR97vnz96e18U/ntNdoDIMLzy/6P9rOWdIYWmTQAeeG69438PcpN++VzDmPtrURexo5YYWpVFkRs9k5n0AC3djzGRuXr1+yVZXtGGofFzxBmdr9HtaANtMMPysO2XXACLNUNkLLWJN9fLc3OAWce48tpeQQ2ufd7knx4Oo6OM0PpOVDwQcezbGX85VEmymh7f7M5kIyVl+w5yn2AY4BmLrEWEsyw9SzzW8eHQ5zYIUjiw9hXi7OMMoCL+ptOvZYbHZs0R5qLHmmDCW1M8MMX5yyJF0BV1dQvKslKnAJwZu4XCbsXKn3UXosU1U30/poiN2VeXkBPeo8+Xj/4BIoC2I7WZ6zkFa/Uwd5SvC91kvff2a/Z4OwyTQNM7ES9HmRhChdWg0SJ2xEs1aiXAit16RiTlf82esJH+X/j52G7R3ErwQeJT3QoDv64R2702+8NbGIjf1ZOfxhUCpmJqV4jeHSaHRmnKgJZsK91XhhrdJKXdsbt3phIOpxGLupULr2K+v1DNdId8/HuE0776+tTpUl7shVCeM/XWrdkhru42pifhiujnDhIblsLt8grnj5/GRqcD6ZPAXqJW3lLc0/ub9jXgvXK/EczRgKl+7/tTBkPTCrUVtajA0luHLQOrVsXuN1v0/PR3i09SuFzZJkJBKE3M6rYvPttK9NQiBxhxYWDhX82uQu2XK8+8oU3gxCIaJwsQmX/It0kaZ45PZHFqtD40uOX0sXuThvUin4N4RSI2G9d7jPkj5hbBFquQKM4S+tDJ34jmplOTrqqKT7PPVfrdgd4OkK13pEy86BsJ8M0gKXgtivUgM8Bjl1m/pkg0SuDyntWLdrmMxcZYvgySvSSwQ2Qtm8EkKHIMyR/XgfHnpX5vadGpRMro2qaE8u+x8w1gJHIRKib2u6Q1JtQiZE1Rde/vRx8xKfg6uYR37n0BvfgJE5+KbeuwCyAvJRGUA2fpt0VClIfV0m2PRG7bvH00OODKY6cFi7NgWAK6Jc1G4Ugkfp7W8I0ZYwNpTTxVoxIIBF37aBhyiPWPAOeYXBqA", # Chrome: "FOCoIC3q5fKa8fgJnwzbE67EJ49BGJeplOzf+4l4EOvDuu2RXBRv6R3A1AZMa49IZbsw3/U3NYEqD0LjhKzgMn8fIES5OyXlgwN5I+F8wHowpWWfXkQJw8/9AsO5Q2VOvnc2JlHGIlGS8Vq2z4OA80lVLon08EG3PPxkVZGm39fDi2exK9NDrZB+tNLX6ISxE5PzBgXpCOJ6oP9F1B0OBWaCMD/m01n8FhdDNCvP8EO5cetU79+pgL+ECRdtN6V4VElGJE0mxV4+4Zq4Jf/Xe/q8CkoTNf7Ti1glGYmN32UM9dg0uX+VzET/mmTRe4Dt+MuVHSzsI/bKCjPbpaOqfM8UsxDJUG9hyrGZ8QHa1kC04aTxkkTxI275dv3+ijS1zkWOdjFiy1eD/0R8HcRWYp2smk9EmXBkIAHL4H0gC9lQtdjey37/kyl4JA9Fp4zjuVO0arsD8MrGy1divU++B1KdawGqXpnbOcHZ3CctNGrpgmswaScc6DNWb34jFj0X3tdRE0uuHuqiYa5BClFS2V0TCorKi4CobgR419xWaX8IKLJiaNNLOShWdZdlQO2DXXVxcinzKHqUvWTYx45jsiUVlY78AHQGol6CJLQQ8Q797MShlazvdSwPXgJP5z0uMJp9L+3x/Y2GGhW5sit55sFuMXafALTYf69FCUw5+nVIRs150a4+KK+tA0Eu7Itiu3dM2pflKYWwPE6SDZznyejQ08vd+HpXRB/zhfSUcIYlT5gFEiMIA6SXZCo/XT7vC8D3gHdN+yr46XdVol/WkjFQof0JQH/Vhjj5C1xcAyNxq/VVBT01vdKk6zo6c08e84FEVMLd0m3XWtjFOYu7wRI7lldw2pSxyGnWvA4aiYWcWvvKNJtqB8wHqc5RPr9KRzhbxJnTM5K1vTx4xT/1ZUR3pU7nQKZo/4kP9XycIr/Jg3XMRSnqCBUJlagKAFPt2HF0LdsSk4WWcldb97Ar584nVGbSjPXEUVH0VgbUEm+dADzPoLP+NPMYOyhwgfADiqWaXyKT4UNESYXsPBkdGk6mLCaNSEQsDN1G2677Se3qjzDcyXBnEmHEFptRbmyJzKJ73veHPqfFYtsHO9jH0XnhYk8zKdRuqQ7dnuNIDwxm3UCPo22uFI0ZcgPvQm01s+8jYiMEFJDVra9jWyWTdMpMuhT3p2yYLf70CvUwIkw=", # fuid.length = 1280, length 235 变化 # FOCoIC3q5fKa8fgJnwzbE67EJ49BGJeplOzf+4l4EOvDuu2RXBRv6R3A1AZMa49I27C0gDDLrJyxcIIeAeEhD8JYsoLTpBiaCXhLqvzbzmvy3SeAW17tKgNq/Xx+RgOdb8TWCFe62MVrDTY6lMf2GrfqL8c87KLF2qFER3obJGnCfjdYr2J6wEsox+bQtrTEGEimjy3MrXEpSuItnI4KDyOhCNLIvGcJ9TrqWJqhR97 "fuid": "FOCoIC3q5fKa8fgJnwzbE67EJ49BGJeplOzf+4l4EOvDuu2RXBRv6R3A1AZMa49I27C0gDDLrJyxcIIeAeEhD8JYsoLTpBiaCXhLqvzbzmvy3SeAW17tKgNq/Xx+RgOdb8TWCFe62MVrDTY6lMf2GrfqL8c87KLF2qFER3obJGnfaJTn/Ne60I9LwR04t6XmGEimjy3MrXEpSuItnI4KD0FJKzTbw1AN69fBnzR2FuvMmmQZ+1zgJ72wdcVU+mcQxiE2ir0+TEYgjPJt1Qa3K1mLi+P4IWJeag2lvxB4yJ/GgLbz7OSojK1zRbqBESR5Pdk2R9IA3lxxOVzA+Iw1TWLSgWjlFVG9Xmh1+20oPSbrzvDjYtVPmZ+9/6evcXmhcO1Y58MgLozKnaQIaLfWRPAn9I0uOqAMff6fuUeWcH1mRYoTw2Nhr4J4agZi377iM/izL6cVCGRy2F8c0VpEvM5FjnYxYstXg/9EfB3EVmKAfzNRIeToJ5YV9twMcgdmlV1Uhhp5FAe6gNJIUptp7EMAaXYKm11G+JVPszQFdp9AJLcm4YSsYUXkaPI2Tl66J246cmjWQDTahAOINR5rXR5r/7VVI1RMZ8gb40q7az7vCK56XLooKT5a+rsFrf5Zu0yyCiiagElhrTEOtNdBJJq8eHwEHuFBni9ahSwpC7lbKkUwaKH69tf0DFV7hJROiLETSFloIVkHdy3+I2JUr1LsplAz0hMkWt/tE4tXVUV7QcTDTZWS/2mCoS/GV3N9awQ6iM6hs/BWjlgnEa1+5iP7WSc7RJ34FaE5PsyGXyoCWdXwNRGSZPSvVtB/Ea6w5FKazXcZ/j40FJv+iLGBn3nkkgHlne61I8I7KhtQgIkmBMJIjPMkS/L051MeqdGScsKYTJuSucgI5c3+79eVH+y2TvbOTuuHv1uGxwXFb2atIU1ZYPbmmXculmizKcKI/s44qf8uM8iBZLGkKeVyL74aPyLkg7Gk359g98BIGN/ZzJR/h+Y6AyFx+HlMoYJnS06dVmqFbvlCtSdGylKQ5f8eWtxPkJGqOFtWjIVteQYMsH/AaSJonqw+WLiZvGjYfm9p0alEyujapoTy77HzDcUoU1wUSXa5xS/Z6hXEr2OnLi0LdPVcGjz8lpLcdVeSfm9p0alEyujapoTy77HzDWf5PERRSTFqLd9BTUHLyY4Ji3EQLGQPaM1aeHxG1bJZH0s1Si/KwzTaTYzu6ziQiqwcr2kaYUiH+fMOxn69/BhNJVMhpQkhprc1KZuJRvXjppq0gKweencPxgS/jd0rjw==", "fs": fs } # logger.info(data) response = requests.post(url, headers=self.headers, data=data, proxies=self.proxies).json() # logger.success(response) result = { "ds": response["data"]["ds"], "op": response["data"]["op"], "tk": response["data"]["tk"] } return result except Exception as e: logger.error(f"[post_log] 验证码识别失败 {e}") @staticmethod def get_ak(): # 定值 ak = "76AKmP4xDQjB3vAIPef3KxOlJZWCpw64" return ak