爬虫代码:
import requests
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36',
'Referer':'https://www1.rmfysszc.gov.cn/projects.shtml?dh=3&gpstate=1&wsbm_slt=1'
}
form_data = {
"type": "0",
"name": "",
"area": "河南省",
"city": "河南省",
"city1": "==请选择==",
"city2": "==请选择==",
"xmxz": "0",
"state": "0",
"money": "",
"money1": "",
"number": "0",
"fid1": "",
"fid2": "",
"fid3": "",
"order": "0",
"page": "1",
"include": "0"
}
response = requests.post('https://www1.rmfysszc.gov.cn/ProjectHandle.shtml',data=form_data,headers=headers)
print(response.status_code)
print(response.text)
打印返回结果:
<script>document.cookie=('_')+('_')+('j')+('s')+('l')+('_')+('c')+('l')+('e')+('a')+('r')+('a')+('n')+('c')+('e')+('_')+('s')+('=')+(-~false+'')+((1+[4]>>1)+'')+(~~[]+'')+(3+6+'')+(1+8+'')+(6+'')+(-~false+'')+(-~false+'')+((+false)+'')+((+[])+'')+('.')+(2+4+'')+('|')+('-')+(-~{}+'')+('|')+('B')+((1+[0])/[2]+'')+('S')+('T')+('S')+('w')+('F')+('%')+((1<<1)+'')+('F')+('P')+(0+1+0+1+'')+('N')+('y')+('M')+('X')+('N')+('m')+('Y')+('z')+((+true)+'')+((+true)+'')+('n')+('Q')+('y')+('p')+('f')+('B')+('M')+('%')+(3+'')+('D')+(';')+(' ')+('M')+('a')+('x')+('-')+('a')+('g')+('e')+('=')+(1+2+'')+((1+[2])/[2]+'')+((+[])+'')+(~~{}+'')+(';')+(' ')+('P')+('a')+('t')+('h')+('=')+('/')+(';')+(' ')+('S')+('a')+('m')+('e')+('S')+('i')+('t')+('e')+('=')+('N')+('o')+('n')+('e')+(';')+(' ')+('S')+('e')+('c')+('u')+('r')+('e');location.href=location.pathname+location.search</script>
后来打印状态码发现是521?
解决办法:
爬虫代码headers中添加网页cookie
注意:如何已添加cookie,出现断网情况,需要重新获取cookie
修改后的代码:
import requests
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36',
'Referer':'https://www1.rmfysszc.gov.cn/projects.shtml?dh=3&gpstate=1&wsbm_slt=1',
'Cookie':'Cookies-01=78968004; ASP.NET_SessionId=kscnfewrtce2cj1gd3y1oefd; __jsluid_s=3626e7c26101665bc4ae1157ce1dbf7b; Hm_lvt_5698cdfa8b95bb873f5ca4ecf94ac150=1709957130; __jsl_clearance_s=1709957570.246|0|hWDDOOVhkDy6L7BtCygGoT9x5YE%3D; Hm_lpvt_5698cdfa8b95bb873f5ca4ecf94ac150=1709957572'
}
form_data = {
"type": "0",
"name": "",
"area": "河南省",
"city": "河南省",
"city1": "==请选择==",
"city2": "==请选择==",
"xmxz": "0",
"state": "0",
"money": "",
"money1": "",
"number": "0",
"fid1": "",
"fid2": "",
"fid3": "",
"order": "0",
"page": "1",
"include": "0"
}
response = requests.post('https://www1.rmfysszc.gov.cn/ProjectHandle.shtml',data=form_data,headers=headers)
print(response.status_code)
print(response.text)