声明:
本文章中所有内容仅供学习交流使用,不用于其他任何目的,抓包内容、敏感网址、数据接口等均已做脱敏处理,严禁用于商业用途和非法用途,否则由此产生的一切后果均与作者无关!有相关问题请第一时间头像私信联系我删除博客!
主要代码
import json
import re
import time
from lxml import etree
import execjs
import requests
from urllib.parse import quote
from urllib.parse import parse_qs
session = requests.session()
cookies = {
}
requestId=""
for i in range(0,100):
page = i+1
keyword='数据分析'
cp = execjs.compile(open('./前程无忧.js', 'r', encoding='utf-8').read())
headers = {
}
ts=round(time.time())*1000
if page==1:
sign=cp.call('sign', ts, page, "no", None,keyword)
else:
sign=cp.call('sign', ts, page, requestId, None,keyword)
headers['sign'] = sign
params = {
'api_key': '51job',
'timestamp': ts,
'keyword': keyword,
'searchType': '2',
'function': '',
'industry': '',
'jobArea': '030200',
'jobArea2': '',
'landmark': '',
'metro': '',
'salary': '',
'workYear': '',
'degree': '',
'companyType': '',
'companySize': '',
'jobType': '',
'issueDate': '',
'sortType': '0',
'pageNum': page,
'requestId': '',
'keywordType': 'guess_exp_tag6',
'pageSize': '20',
'source': '1',
'accountId': '',
'pageCode': 'sou|sou|soulb',
}
response = session.get('/search-pc', params=params, cookies=cookies, headers=headers)
request_info_pattern = re.compile(r"var requestInfo = ({.*?});", re.DOTALL)
match = request_info_pattern.search(response.text)
if match:
request_info_str = match.group(1)
# 由于requestInfo对象是一个字面量,可以直接使用json.loads来解析
# request_info = json.loads(request_info_str)
# token = re.findall('token:.*,', request_info_str)[0].split("token: ")[1][1:-2]
token="0b72f618-4c1-4aba-9a78-f" + str(ts-6666) + "ba"
print(token)
refer = re.findall('refer:.*,', request_info_str)[0].split("refer: ")[1][1:-2]
args = "/api/job/search-pc?"+re.findall('args:.*,', request_info_str)[0].split("args: ")[1][1:-2]
args_dict = parse_qs(re.findall('args:.*,', request_info_str)[0].split("args: ")[1][1:-2])
sign=cp.call('sign',"","","",args)
url1 = "/analyze.jsonp"
n=json.loads(requests.get("http://localhost:3000/get227").text)['n']
print(n)
params = {
"a": "CF_APP_WAF",
"t": token,
"n": n,
"p":“”,
"scene": "register",
"asyn": "0",
"lang": "cn",
"v": "1",
"callback": f"jsonp_{time.time()*1000}"
}
response = session.get(url1, headers=headers, params=params)
data=json.loads(re.findall("{.*}",response.text)[0])['result']
csessionid=data['csessionid']
value=data['value']
headers = {
}
params = {
'api_key': args_dict['api_key'][0],
'timestamp': args_dict['timestamp'][0],
'keyword': args_dict['keyword'][0],
'searchType': '2',
'function': '',
'industry': '',
'jobArea': args_dict['jobArea'][0],
'jobArea2': '',
'landmark': '',
'metro': '',
'salary': '',
'workYear': '',
'degree': '',
'companyType': '',
'companySize': '',
'jobType': '',
'issueDate': '',
'sortType': '0',
'pageNum': args_dict['pageNum'][0],
'requestId': '',
'keywordType': args_dict['keywordType'][0],
'pageSize': args_dict['pageSize'][0],
'source': '1',
'accountId': '',
'pageCode': args_dict['pageCode'][0],
'u_atoken': token,
'u_asession': csessionid,
'u_asig': value,
'u_aref': "123",
}
response = session.get('/search-pc', cookies=cookies,
headers=headers,params=params)
print(response.text)
data=json.loads(response.text)
requestId=data['resultbody']['requestId']
print('pass 滑块')
continue
else:
print("No requestInfo object found.")
data=json.loads(response.text)
if data['resultbody']['requestId']:
requestId=data['resultbody']['requestId']
print(str(data)[0:1000]