from requests import post
from requests import get
import time
import re
host = 'https://www.example.com'
headers = {
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Referer':'https://www.example.com',
'Accept-Encoding': 'utf-8',
'Connection':'keep-alive',
'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100100 Firefox/74.0',
'Content-Type':'application/x-www-form-urlencoded',
'Cookie':'',
}
def getSchool():
schoolList = {}
schoolList['num'] = []
schoolList['name'] = []
schoolList['school'] = {}
url = '{0}/xxx'.format(host)
response = get(url,headers)
data_pattern = r'<select name="yxh" id="yxh">(.*?)</select>'
raw_data = response.text.replace('\r\n','').replace('\t','')
data = re.findall(data_pattern,raw_data)[0]
school_no_pattern = '<option value=\"(.*?)\" >'
school_name_pattern = '<option value="{0}" >(.*?)</option>'
false_name = '---请选择---'
for no in re.findall(school_no_pattern,data):
if '---请选择---' in no:
pass
else:
schoolList['num'].append(no)
for i in range(len(schoolList['num'])):
school_name_pattern = '<option value="{0}" >(.*?)</option>'.format(schoolList['num'][i])
schoolname = re.findall(school_name_pattern,data)[0]
schoolList['name'].append(schoolname)
schoolList['school'][schoolname] = schoolList['num'][i]
return (schoolList)
def getNum(school_num,school_name):
if (school_num == ""):
return
query_url ='{0}/xxx'.format(host)
num = int(9999)
current_num = 0
frontNum = int(school_num) * 100000
sum_ = 0
while(True):
payload = 'kslx0=0&userid='+ str(frontNum + current_num + num)
response = post(query_url,data = payload,headers = headers)
time.sleep(1)
if '考生号错误' in response.text:
num /= 2
num = int(num)
else:
current_num = num
sum_ = num
num += 1
payload = 'kslx0=0&userid='+ str(frontNum + current_num + num)
response = post(query_url,data = payload,headers = headers)
if '考生号错误' in response.text:
num /= 2
num = int(num)
return ("{0} {1} 报名人数:{2}" .format(school_name,school_num,str(sum_)))
break;
else:
pass
if(num == 0):
break
return
def main():
schoolList = getSchool()
school_name_list = schoolList['name']
for school_name in school_name_list:
print('正在获取: {0} ...'.format(school_name))
print(getNum(schoolList['school'][school_name],school_name))
main()
关于获取某省专插本录取人数的代码
编程
安全
•
阅读量
0
•
阅读时间:
2.3分
2020-03-28