import requests
"""
1. 手动到chrome获取下载请求
2. 获取excel的动态id
3. 拼出excel的下载链接
4. 下载
"""
class Excel:
def __init__(self):
self.cookie_string = ""
self.headers = {
"authority": "docs.qq.com",
"method": "GET",
"path": "/v1/export/query_progress?u=28b403f5c49b4e38add0acaff339ed41&operationId=144115215919843666_fb7bbca6-a03a-23df-f662-ffdbc961bb9f",
"scheme": "https",
"Accept": "application/json, text/plain, */*",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cookie": self.cookie_string,
"Referer": "https://docs.qq.com/sheet/DU3NaS1h3Z2Voc09u?u=28b403f5c49b4e38add0acaff339ed41&tab=BB08J2",
"Sec-Ch-Ua": '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
"Sec-Ch-Ua-Mobile": "?0",
"Sec-Ch-Ua-Platform": "Windows",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"Traceparent": "00-b895d4ff7358b61546dde0bd9c69e4fa-3d91d815d6e57d83-01",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
self.data = {
'exportType': 0,
'switches': '{"embedFonts":false}',
'exportSource': 'client',
'docId': '300000000$SsZKXwgehsOn'
}
def get_operationId(self, browser_url):
"""
:param browser_url: 浏览器的url
:return: excel的动态id
"""
"https://docs.qq.com/v1/export/export_office?u=28b403f5c49b4e38add0acaff339ed41"
_response = requests.post(browser_url, data=self.data, headers=self.headers).json()
print("excel动态id:", _response['operationId'])
return _response['operationId']
def excel_url(self, url):
"""
获取excel的下载链接
:return:
"""
for i in range(5):
response = requests.get(url, headers=self.headers).json()
try:
if response['file_url']:
return response['file_url']
except Exception as e:
pass
else:
return response['file_url']
def write2excel(self, file_url, file_name="download_excel.xlsx"):
"""
:param file_url: excel的下载链接
:param file_name: excel命名
:return:
"""
excel_content = requests.get(url=file_url).content
with open(file_name, 'wb') as f:
f.write(excel_content)
print("excel下载完成")
# def split_url(self, browser_url):
# """
# 分割url
# :param browser_url: 浏览器上的url
# :return:
# """
# return browser_url.split("&tab")[0]
def __call__(self):
"""
browser_url: chrome F12的下载请求:export_office
:return:
"""
# todo:通过浏览器url获取下载请求链接,解决需要f12的问题
browser_url = ""
operationId = self.get_operationId(browser_url)
url = "https://docs.qq.com/v1/export/query_progress?u=28b403f5c49b4e38add0acaff339ed41&operationId=" + operationId
excel_url = self.excel_url(url)
self.write2excel(excel_url, file_name="download_excel.xlsx")
if __name__ == '__main__':
debug = Excel()
debug()