快手数据获取相对简单访问地址固定且不需要登录token
列表地址获取的固定接口
https://www.kuaishou.com/graphql
发送post请求注意每个快手账号对应的id
import time
from datetime import datetime
import logging
import json
import pymysql
import requests
# 创建一个logger
logger = logging.getLogger('my_logger')
logger.setLevel(logging.DEBUG) # 设置日志级别
# 创建一个handler,用于写入日志文件
fh = logging.FileHandler('ks.log', encoding='utf-8') # 日志文件名
# 定义handler的输出格式
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fh.setFormatter(formatter)
# 给logger添加handler
logger.addHandler(fh)
#!!!数据库链接需要调整
mydatabase = pymysql.connect(host='localhost',
user='root',
password='123456',
database='ry',
charset='utf8mb4')
cursor = mydatabase.cursor()
cursor.execute(
'SELECT id,base_media_name,dy_url FROM `media_account_manager2` where `type_id` = 484 AND `status` = 3 and dy_url is not null'
)
result = cursor.fetchall()
headers = {
'Cookie': 'kpf=PC_WEB; clientid=3; did=web_8239e5591749f85a281700fcf0834715; didv=1719032992223; kpn=KUAISHOU_VISION',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}
now = datetime.now()
now_formatted_date = now.strftime("%Y-%m-%d")
try:
for row in result:
m_id = row[0]
ks_name = row[1]
ks_url = row[2]
time.sleep(5)
print(ks_name)
print('***************************************')
urlKs = 'https://www.kuaishou.com/graphql'
jsonObj = {
"operationName":"visionProfilePhotoList",
"variables":{
"userId":ks_url,
"pcursor":"",
"page":"profile"
},
"query":"fragment photoContent on PhotoEntity {\n __typename\n id\n duration\n caption\n originCaption\n likeCount\n viewCount\n commentCount\n realLikeCount\n coverUrl\n photoUrl\n photoH265Url\n manifest\n manifestH265\n videoResource\n coverUrls {\n url\n __typename\n }\n timestamp\n expTag\n animatedCoverUrl\n distance\n videoRatio\n liked\n stereoType\n profileUserTopPhoto\n musicBlocked\n}\n\nfragment recoPhotoFragment on recoPhotoEntity {\n __typename\n id\n duration\n caption\n originCaption\n likeCount\n viewCount\n commentCount\n realLikeCount\n coverUrl\n photoUrl\n photoH265Url\n manifest\n manifestH265\n videoResource\n coverUrls {\n url\n __typename\n }\n timestamp\n expTag\n animatedCoverUrl\n distance\n videoRatio\n liked\n stereoType\n profileUserTopPhoto\n musicBlocked\n}\n\nfragment feedContent on Feed {\n type\n author {\n id\n name\n headerUrl\n following\n headerUrls {\n url\n __typename\n }\n __typename\n }\n photo {\n ...photoContent\n ...recoPhotoFragment\n __typename\n }\n canAddComment\n llsid\n status\n currentPcursor\n tags {\n type\n name\n __typename\n }\n __typename\n}\n\nquery visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n ...feedContent\n __typename\n }\n hostName\n pcursor\n __typename\n }\n}\n"
}
response = requests.post(url=urlKs, json=jsonObj, headers=headers)
if response.status_code == 200:
response_data2 = response.json()
data = response_data2.get('data',{}).get('visionProfilePhotoList',{}).get('feeds',[])
print(data)
for item in data:
itemName = item.get('photo',{}).get('caption','null')
photoUrl = item.get('photo',{}).get('photoUrl','null')
timestamp = item.get('photo',{}).get('timestamp','null')
timestamp_s = timestamp / 1000
dt_object = datetime.fromtimestamp(timestamp_s)
# dt_object = datetime.fromtimestamp(timestamp)
# formatted_date = dt_object.strftime('%Y-%m-%d')
create_time_str = dt_object.strftime("%Y-%m-%d %H:%M:%S")
print(itemName)#视频名称
print(photoUrl)#视频地址
print(create_time_str)#视频发布时间
text = ''
insert_query = "INSERT INTO `ry`.`media_content`(`title`, `pub_date`, `url`, `content`, `media_id`, `media_name`,`type_id`,`platform`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s)"
# 执行插入操作
cursor.execute(
insert_query,
(itemName, create_time_str, photoUrl, text, m_id, ks_name, '483', '快手自动抓取'))
# 提交事务
mydatabase.commit()
except Exception as e:
logger.info('******快手获取发生错误********')
logger.info(e)
logger.info(item)
logger.info('******快手账号:' + ks_name + ',数据获取异常******')
else:
logger.info('******快手数据结束********')
finally:
mydatabase.close()