简介:由于数据需求的日益增大,小红书网页版已经不能满足我们日常工作的需求,为此,小编特地开发了小红书手机版算法,方便大家获取更多的数据,提升工作效率。
手机版接口主要包括:搜素,详情,话题,评论,主页,用户信息,用户收藏,用户喜欢,发现页等等。
搜索页
评论页
code:
签名获取
def get_shield_value(self, note_id, xhs_api_url, xy_common_params, method='GET', apibody=''):
if method == 'GET':
if note_id:
body = f'noteid={note_id}¶m={quote(xy_common_params)}&device={self.device_id}&hmac={quote(self.hmac)}&url={quote(xhs_api_url)}&direction=48'
else:
body = f'param={quote(xy_common_params)}&device={self.device_id}&hmac={quote(self.hmac)}&url={quote(xhs_api_url)}&direction=48'
response = requests.post(self.get_shield_url, data=body, headers=self.headers, timeout=5)
return response.text.strip()
else:
url = f'{self.post_shield_url}?url={urllib.parse.quote(xhs_api_url)}¶m={urllib.parse.quote(xy_common_params)}&direction=40&body={urllib.parse.quote(apibody)}&hmac={urllib.parse.quote(self.hmac)}&device={self.device_id}'
headers = {
'User-Agent': 'Apifox/1.0.0 (https://apifox.com)',
'Content-Type': 'application/json'
}
response = requests.post(url, headers=headers)
response.raise_for_status()
shield = response.text.strip()
return shield
headers生成
def generate_post_headers(self, note_id, xhs_api_url, custom_headers, session_id, api='', method='GET', apibody=''):
post_headers = copy.deepcopy(custom_headers)
post_headers['x-legacy-sid'] = f'session.{session_id}'
post_headers['xy-common-params'] = re.sub(r'session\.\d+', f'session.{session_id}', post_headers['xy-common-params'])
shield = self.get_shield_value(note_id, xhs_api_url, post_headers['xy-common-params'], method, apibody)
post_headers.update({
'shield': shield,
})
logger.info(shield)
if method == 'POST':
post_headers['xy-direction'] = '40'
return post_headers
请求
def spider_search(self, keyword, page='1', page_pos='0', sort='general', note_type='不限', publish_time='不限', search_type='不限', session_id=None):
"""
搜索
:param keyword: 关键词
:param sort: 排序方式 general:综合 time_descending:最新 popularity_descending:最多点赞 comment_descending:最多评论 collect_descending:最多收藏
:param note_type: 笔记类型 不限 视频笔记 普通笔记
:param publish_time: 发布时间 不限 一天内 一周内 半年内
:param search_type: 搜索范围 不限 已看过 未看过
:param session_id:
:return:
"""
api_url_base = "https://edith.xiaohongshu.com/api/sns/v10/search/notes"
filters = [
{
'tags': [sort],
'type': 'sort_type'
},
{
'tags': [note_type],
'type': 'filter_note_type'
},
{
'tags': [publish_time],
'type': 'filter_note_time'
},
{
'tags': [search_type],
'type': 'filter_note_range'
}
]
params = {
"keyword": keyword,
"filters": json.dumps(filters, ensure_ascii=False, separators=(',', ':')),
"sort": "",
"page": page,
"page_size": "20",
"source": "explore_feed",
"search_id": "2ehsgm5x5z2etryfwa5ts",
"session_id": "2ehsglrpf9h3h4y091csg",
"api_extra": "",
"page_pos": page_pos,
"pin_note_id": "",
"allow_rewrite": "1",
"geo": "",
"loaded_ad": "",
"query_extra_info": "",
"rec_extra_params": "",
"preview_ad": "",
"scene": "",
"is_optimize": "0",
"location_permission": "0",
"is_out_of_china": "0",
"device_level": "4",
"refresh_type": "0",
"in_map_card_exp": "0",
"search_tab": ""
}
xhs_api_url = api_url_base + '?' + urllib.parse.urlencode(params)
post_headers = self.generate_post_headers('', xhs_api_url, self.custom_headers, session_id, api=xhs_api_url)
# logger.info(post_headers)
response = requests.get(xhs_api_url, headers=post_headers, timeout=5)
res_json = response.json()
logger.info(f'请求数据: {json.dumps(res_json, ensure_ascii=False)}')
return res_json