1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
| import requests from io import BytesIO import ddddocr import os from bs4 import BeautifulSoup import csv from tqdm import tqdm
def save_to_csv(data, filename='results.csv'): file_exists = os.path.isfile(filename) with open(filename, mode='a', newline='', encoding='utf-8') as file: writer = csv.DictWriter(file, fieldnames=['准考证号', '姓名', '性别', '班级', '类型']) if not file_exists: writer.writeheader() writer.writerow(data)
def get_captcha(session, captcha_page_url, local_filename): response = session.get(captcha_page_url, headers=common_headers) with open(local_filename, 'wb') as f: f.write(response.content) ocr = ddddocr.DdddOcr() with open(local_filename, 'rb') as f: captcha_text = ocr.classification(f.read()) os.remove(local_filename) return captcha_text
with open('stuinfo.txt', 'r', encoding='utf-8') as file: lines = file.readlines()
common_headers = { 'Host': '********.yichafen.com', 'Cookie': '*************', 'Sec-Ch-Ua': '"-Not.A/Brand";v="8", "Chromium";v="102"', 'Dnt': '1', 'Sec-Ch-Ua-Mobile': '?0', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.63 Safari/537.36', 'Sec-Ch-Ua-Platform': '"Windows"', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Connection': 'close' }
session = requests.Session()
captcha_page_url = 'https://********.yichafen.com/public/verify.html' local_filename = 'captcha.png'
headers_post = { 'Host': '********.yichafen.com', 'Cookie': '**************', 'Sec-Ch-Ua': '"-Not.A/Brand";v="8", "Chromium";v="102"', 'Dnt': '1', 'Sec-Ch-Ua-Mobile': '?0', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.63 Safari/537.36', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Accept': '*/*', 'X-Requested-With': 'XMLHttpRequest', 'Sec-Ch-Ua-Platform': '"Windows"', 'Origin': 'https://********.yichafen.com', 'Sec-Fetch-Site': 'same-origin', 'Sec-Fetch-Mode': 'cors', 'Sec-Fetch-Dest': 'empty', 'Referer': 'https://********.yichafen.com/qz/L7P1Tvwqlt?from_device=mobile', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Connection': 'close' }
headers_get = { 'Host': '********.yichafen.com', 'Cookie': '********', 'Sec-Ch-Ua': '"-Not.A/Brand";v="8", "Chromium";v="102"', 'Sec-Ch-Ua-Mobile': '?0', 'Sec-Ch-Ua-Platform': '"Windows"', 'Upgrade-Insecure-Requests': '1', 'Dnt': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.63 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 'Sec-Fetch-Site': 'same-origin', 'Sec-Fetch-Mode': 'navigate', 'Sec-Fetch-Dest': 'iframe', 'Referer': 'https://********.yichafen.com/qz/L7P1Tvwqlt?from_device=mobile', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Connection': 'close' }
for line in tqdm(lines, desc="Processing Students", unit="student"): name, id_card = line.strip().split(',') last_six_digits = id_card[-6:] captcha = get_captcha(session, captcha_page_url, local_filename) print(f"识别的验证码为: {captcha}")
data = { 's_xingming': name, 's_shenfenzhenghouliuwei': last_six_digits, 'verify': captcha }
post_url = 'https://********.yichafen.com/public/verifycondition/sqcode/NsjcAn4mMjU3MHw5ZGNhYmNhNDhiYzliNjRkNGJjZjNjZThiZmRjY2Q3Y3xzNmFqd3p5ZAO0O0OO0O0O/from_device/mobile.html' post_response = requests.post(post_url, headers=headers_post, data=data)
if post_response.status_code == 200: print(post_response.text) print(f"POST请求成功: 姓名: {name}, 身份证后六位: {last_six_digits}")
get_url = 'https://********.yichafen.com/public/queryresult/from_device/mobile.html' get_response = requests.get(get_url, headers=headers_get)
soup = BeautifulSoup(get_response.text, 'html.parser') table = soup.find('table', {'class': 'table table-bordered s_table-bordered js_result_table'}) rows = table.find_all('tr')
data = {} for row in rows: cols = row.find_all('td') label = cols[0].get_text(strip=True) value = cols[1].get_text(strip=True) if label in ['准考证号', '姓名', '性别', '班级', '类型']: data[label] = value
print(data) print("\n\n") save_to_csv(data) else: print(f"POST请求失败: 姓名: {name}, 身份证后六位: {last_six_digits}")
|