详细代码链接https://flowus.cn/hbzx/3c42674d-8e6f-42e3-a3f6-bc1258034676
import requests
from lxml import etree #xpath解析库
def 源代码(url):
cookies = {
'global_cookie': 'xeqnmumh38dvpj96uzseftwdr20lvkwkfb9',
'otherid': 'b44a1837638234f1a0a15e37877e0685',
'g_sourcepage': 'zf_fy%5Elb_pc',
'__utma': '147393320.818863681.1714391725.1714391725.1714391725.1',
'__utmc': '147393320',
'__utmz': '147393320.1714391725.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)',
'__utmt_t0': '1',
'__utmt_t1': '1',
'__utmt_t2': '1',
'keyWord_recenthousebengbu': '%5b%7b%22name%22%3a%22%e9%be%99%e5%ad%90%e6%b9%96%22%2c%22detailName%22%3a%22%22%2c%22url%22%3a%22%2fhouse-a011914%2fs31%2f%22%2c%22sort%22%3a1%7d%5d',
'city': 'sh',
'ASP.NET_SessionId': '4fpr5u3w5zqqzitrnwafk3cr',
'zf_csrfcookie': '1cCMHJcsaY7XgtGVMdiMdsydBeGKPxx7G1pYnsK0yn4vRI361O_aeBQfC7SAKi4gktL0kQ2',
'unique_cookie': 'U_xeqnmumh38dvpj96uzseftwdr20lvkwkfb9*6',
'__utmb': '147393320.18.10.1714391725',
}
headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'accept-language': 'zh-CN,zh;q=0.9',
'cache-control': 'max-age=0',
# 'cookie': 'global_cookie=xeqnmumh38dvpj96uzseftwdr20lvkwkfb9; otherid=b44a1837638234f1a0a15e37877e0685; g_sourcepage=zf_fy%5Elb_pc; __utma=147393320.818863681.1714391725.1714391725.1714391725.1; __utmc=147393320; __utmz=147393320.1714391725.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utmt_t0=1; __utmt_t1=1; __utmt_t2=1; keyWord_recenthousebengbu=%5b%7b%22name%22%3a%22%e9%be%99%e5%ad%90%e6%b9%96%22%2c%22detailName%22%3a%22%22%2c%22url%22%3a%22%2fhouse-a011914%2fs31%2f%22%2c%22sort%22%3a1%7d%5d; city=sh; ASP.NET_SessionId=4fpr5u3w5zqqzitrnwafk3cr; zf_csrfcookie=1cCMHJcsaY7XgtGVMdiMdsydBeGKPxx7G1pYnsK0yn4vRI361O_aeBQfC7SAKi4gktL0kQ2; unique_cookie=U_xeqnmumh38dvpj96uzseftwdr20lvkwkfb9*6; __utmb=147393320.18.10.1714391725',
'priority': 'u=0, i',
'referer': 'https://sh.zu.fang.com/house/i33/',
'sec-ch-ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
}
response = requests.get(url, cookies=cookies, headers=headers).text
#response是源代码
return response
if __name__=='__main__':
for fan in range(1,10):
url='https://sh.zu.fang.com/house/i3{fan}/'
res=源代码(url)
res=etree.HTML(res) #初始化
#//*[@id="rentid_D09_01_02"]/a a后面没有加/text()
#//*[@id="rentid_D09_60_02"]/a
#//*[@id="rentid_D09_02_02"]/a
#//*[@id="rentid_D09_31_02"]/a
for i in range(1,61):
if i<10:
ix='0'+str(i)
xp=f'//*[@id="rentid_D09_{ix}_02"]/a/text()'
else:
xp=f'//*[@id="rentid_D09_{i}_02"]/a/text()'
title=res.xpath(xp)[0]
print(title,end=' ')
xp=f'//*[@id="listBox"]/div[3]/dl[{i}]/dd/p[2]/text()'
#//*[@id="listBox"]/div[3]/dl[60]/dd/p[2]
p=res.xpath(xp)[0].strip()
print(p,end=' ')
xp=f'//*[@id="listBox"]/div[3]/dl[{i}]/dd/div[2]/p/span/text()'
#//*[@id="listBox"]/div[3]/dl[60]/dd/div[2]/p/span
jg=res.xpath(xp)[0]
print(jg)