前言
大家早好、午好、晚好吖 ❤ ~欢迎光临本文章
所用知识点:
-
动态数据抓包
-
requests发送请求
-
json数据解析
开发环境:
-
python 3.8 运行代码
-
pycharm 2022.3 辅助敲代码
-
requests 请求模块 ,第三方,需安装
win + R 输入cmd 输入安装命令 pip install 模块名
爬虫案例如何实现
-
发送请求
-
获取数据
-
解析数据
-
保存数据
代码展示
导入模块
import requests
- 发送请求
url = 'https://*****/graphql'
伪装
# a. 请求方式是什么?
# post
# b. 请求头
headers = {
# 登陆之后,cookie里面会存储账号信息
'Cookie': 'kpf=PC_WEB; clientid=3; did=web_4bf90ac4df8aaef2d99527f1da1063fd; userId=3075864129; kpn=KUAISHOU_VISION; kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABwgHhKM9-oogbgtutzowJEttqd5KamRJ9hNag-Elf82OEdJdt4lodSEThrdp1MLPgFl-6cdtZ1dshlUAYh-sHYS3dgBB8iZ_tqBzosa79OffPexwG6aWCkIgjdglclYP7KtYppEXrzXdwGt-2d-ZB6IxGTWINrZ5HOtAFzVukl4W23Vy2n19JBxuHMJDxpP4sBwW4rFP0QNklm91AbvsvsBoS7YoRGiN2PM_7zCD1Dj9m5oYoIiCaVnNEAqYiNgeeOzP3DC_9EWJBkH_O0cAr7fedfqlmeigFMAE; kuaishou.server.web_ph=507361000af22c186ded0b017d8e740f042a',
# 域名
'Host': 'www.****',
# 防盗链
'Referer': 'https://*****/profile/3xgapy2dxxbzsvw',
# 设备信息
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36'
}
pcursor = ""
while True:
# c. 请求参数(请求体)
json = {
'operationName':"visionProfilePhotoList",
'query':"fragment photoContent on PhotoEntity {\n id\n duration\n caption\n originCaption\n likeCount\n viewCount\n realLikeCount\n coverUrl\n photoUrl\n photoH265Url\n manifest\n manifestH265\n videoResource\n coverUrls {\n url\n __typename\n }\n timestamp\n expTag\n animatedCoverUrl\n distance\n videoRatio\n liked\n stereoType\n profileUserTopPhoto\n musicBlocked\n __typename\n}\n\nfragment feedContent on Feed {\n type\n author {\n id\n name\n headerUrl\n following\n headerUrls {\n url\n __typename\n }\n __typename\n }\n photo {\n ...photoContent\n __typename\n }\n canAddComment\n llsid\n status\n currentPcursor\n tags {\n type\n name\n __typename\n }\n __typename\n}\n\nquery visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n ...feedContent\n __typename\n }\n hostName\n pcursor\n __typename\n }\n}\n",
'variables': {'userId': "3xjqise3ywy8abk", 'pcursor': pcursor, 'page': "profile"}
}
response = requests.post(url=url, headers=headers, json=json)
- 获取数据
# Python字典
json_data = response.json()
- 解析数据 提取数据 需要的数据
feeds = json_data['data']['visionProfilePhotoList']['feeds']
pcursor = json_data['data']['visionProfilePhotoList']['pcursor']
for feed in feeds:
caption = feed['photo']['caption']
photoUrl = feed['photo']['photoUrl']
# 点赞 / 评论 / ....
_id = feed['photo']['id']
author_id = feed['author']['id']
expTag = feed['photo']['expTag']
like_json = {
'operationName':"visionVideoLike",
'query':"mutation visionVideoLike($photoId: String, $photoAuthorId: String, $cancel: Int, $expTag: String) {\n visionVideoLike(photoId: $photoId, photoAuthorId: $photoAuthorId, cancel: $cancel, expTag: $expTag) {\n result\n __typename\n }\n}\n",
'variables': {'cancel': 0, 'expTag': expTag, 'photoAuthorId': author_id, 'photoId': _id}
}
resp = requests.post(url=url, headers=headers, json=like_json)
print(resp)
# print(caption, photoUrl)
# video_data = requests.get(photoUrl).content
- 保存数据
# with open(f'{caption}.mp4', mode='wb') as f:
# f.write(video_data)
if pcursor == 'no_more':
break
尾语 💝
好了,今天的分享就差不多到这里了!
完整代码、更多资源、疑惑解答直接点击下方名片自取即可。
对下一篇大家想看什么,可在评论区留言哦!看到我会更新哒(ง •_•)ง
喜欢就关注一下博主,或点赞收藏评论一下我的文章叭!!!