爬虫(持续更新ing)
request模块
import requests
url='https://www.baidu.com'
res=requests.get(url)
print(res.content.decode())
print('url',res.url)
print('request headers',res.request.headers)
print('res headers',res.headers)
import requests
url='https://www.baidu.com/img/flexible/logo/pc/result.png'
res=requests.get(url)
with open('./img/jwq.png','wb') as img:
img.write(res.content)
import requests
url='https://www.baidu.com'
headers={
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36'
}
res=requests.get(url,headers=headers)
print(res.content.decode())
print(len(res.content.decode()))
print(res.request.headers)
import requests
import random
url='https://www.baidu.com'
user_agents = [
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60',
'Opera/8.0 (Windows NT 5.1; U; en)',
'Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0',
'Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2 ',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)',
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0) ',
]
ua=random.choice(user_agents)
headers={
'user-agent':ua
}
res=requests.get(url,headers=headers)
print(len(res.content.decode()))
print(res.request.headers)
from fake_useragent import UserAgent
ua = UserAgent()
print(ua.random)
import requests
from fake_useragent import UserAgent
from urllib.parse import quote,unquote
ua = UserAgent()
url='https://www.baidu.com/s'
name=input('请输入关键词:')
params={
'wd':name
}
headers={
'user-agent':ua.random
}
res=requests.get(url,headers=headers,params=params)
print(res.content.decode())
import requests
from fake_useragent import UserAgent
ua = UserAgent()
url='https://p1.music.126.net/_JcHT6u-TYhxjDbO3IhVQA==/109951170537166630.jpg?imageView&quality=89'
headers={
'user-agent':ua.random
}
res=requests.get(url,headers=headers)
with open('img/网易云.jpg','wb') as f:
f.write(res.content)
import requests
from fake_useragent import UserAgent
ua = UserAgent()
url='https://ws6.stream.qqmusic.qq.com/RS02064dfdIM38rSZY.mp3?guid=7976864250&vkey=AE4590431EAD34766DBAA9BA1A3715B3B45721EE23180669EA694EB7CA1F0DB4C8DE867A9883D4E897ED4E6F2ECF600CDFD34C78F2C07E09__v215192d1e&uin=554242051&fromtag=120052'
headers={
'user-agent':ua.random
}
res=requests.get(url,headers=headers)
with open('video/晴天.mp3','wb') as f:
f.write(res.content)
import requests
from fake_useragent import UserAgent
ua = UserAgent()
url='https://mv6.music.tc.qq.com/44B177558A20632E722F75FB6A67025F0BFC15AB98CC0B58FD3FC79E00B2EEDC9FAC3DF26DD0A319EACA6B2A30D24E2CZZqqmusic_default__v21ea05e5a/qmmv_0b53feaagaaao4ae4d5t4vtvikiaamuqaa2a.f9944.ts'
headers={
'user-agent':ua.random
}
res=requests.get(url,headers=headers)
with open('video/qq音乐.mp4','wb') as f:
f.write(res.content)
import requests
from fake_useragent import UserAgent
ua = UserAgent()
url='https://tieba.baidu.com/f?'
name=input('请输入关键词:')
page=int(input('请输入要保存的页数:'))
for i in range(page):
params = {
'kw': name,
'ie': 'utf-8',
'pn': 0
}
headers = {
'user-agent': ua.random
}
res = requests.get(url, headers=headers,params=params)
with open(f'html/{name}{i+1}.html', 'wb') as f:
f.write(res.content)
import requests
from fake_useragent import UserAgent
class TieBa:
def __init__(self):
self.url='https://tieba.baidu.com/f?'
self.headers = {
'user-agent': UserAgent().random
}
def send(self,params):
res = requests.get(self.url, headers=self.headers,params=params)
return res.text
def save(self,page,con):
with open(f'html/{page}.html', 'w',encoding='utf-8') as f:
f.write(con)
def run(self):
name = input('请输入关键词:')
pages = int(input('请输入要保存的页数:'))
for page in range(pages):
params = {
'kw': name,
'ie': 'utf-8',
'pn': pages * 50
}
data=self.send(params)
self.save(page,data)
te =TieBa()
te.run()
import requests
from fake_useragent import UserAgent
import json
url = 'https://ifanyi.iciba.com/index.php?c=trans'
headers = {
'user-agent': UserAgent().random
}
name=input('请输入翻译内容:')
post_data = {
'from': 'zh',
'to': 'en',
'q': name,
}
res = requests.post(url, headers=headers,data=post_data)
res.encoding = 'utf-8'
dict=json.loads(res.text)
print(dict['out'])
import requests
from fake_useragent import UserAgent
url='https://www.baidu.com'
headers={
'user-agent':UserAgent().random
}
proxies={
'http':'1.1.1.1:9527',
'https':'1.1.1.1:9527'
}
res=requests.get(url,headers=headers,proxies=proxies)
print(res.content.decode())