我想生成的文件用excel保存,所以得安装一下这个
pip install pandas openpyxl
代码
import requests
import pandas as pd
# 模拟浏览器请求头
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
# 豆瓣电影搜索评分9分以上的URL模板
base_url = 'https://movie.douban.com/j/new_search_subjects?sort=U&range=9,10&tags=&start='
# 存储电影信息的列表
movies_info = []
# 爬取前10页的数据(每页20部电影)
for start in range(0, 200, 20):
url = base_url + str(start)
# 发送请求
response = requests.get(url, headers=headers)
response.raise_for_status()
# 解析JSON响应
data = response.json()
# 查找电影条目
for item in data['data']:
title = item['title']
rating = item['rate']
link = item['url']
movies_info.append({
'title': title,
'rating': rating,
'link': link
})
# 将数据转换为DataFrame
df = pd.DataFrame(movies_info, columns=['title', 'rating', 'link'])
# 保存为Excel文件
excel_file = "douban.xlsx"
df.to_excel(excel_file, index=False)
print(f"数据已成功写入 {excel_file}")
共有200个电影9分以上