前言
本文是该专栏的第1篇,后面会持续更新对应平台干货知识,记得关注。
需求背景和说明:实现获取真实播放量,以及增加播放量
废话不多说,跟着笔者直接往下看正文,在文中将结合代码进行详细说明。(附完整代码)
正文
在开始之前,需准备代理IP。加入代理IP主要是为了防止平台触发的封控,导致请求异常。总之,在每次请求url的时候,随机带入某个IP防止被平台检测,便于达到增加播放量预期。
1. 代理
如果条件允许的情况下,有代理API更好,每次请求url调用API也更方便。如果没有的话,也不用担心,网上本身也有着很多免费国内代理,不过代理是否可用需要自己花点时间去检测,检测方法笔者这里就不详述了,后面有时间的话会单独针对这一块做详细说明。
下面是获取某个代理平台的免费IP方法,示例如下:
# 获取代理IP
def get_prox():
url = 'https://www.xiaoxiangdaili.com/free/4360' # 可更改
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Language': 'zh-CN,zh;q=0.9'
}
res = requests.get(url, headers=headers)
ip_list = []
if res.status_code == 200:
result = etree.HTML(res.text)
all_ip = result.xpath('//div[@class="freeProxyInfo"]/table/tbody/tr')
for tr in all_ip:
ip_data = tr.xpath('.//td/text()')
ip = ip_data[0]
port = ip_data[1]
ip_port = f"http://{ip}:{port}"
ip_list.append(ip_port)
else:
print(res.status_code)
return ip_list
注意:这里笔者仅以某个代理平台为例,url可能已失效。大致思路就是针对你的目标代理平台进行IP抓取,并存入到自己本地的IP代理池。
2. 获取播放量
代码如下:
def get_video_data(video_url, proxy):
session = requests.Session()
session.proxies = proxy
bvid = video_url.rsplit('/')[-1]
res = session.get(url="https://api.bilibili.com/x/player/pagelist?bvid={}&jsonp=jsonp".format(bvid))
cid = res.json()['data'][0]['cid']
res = session.get(url="https://api.bilibili.com/x/web-interface/view?cid={}&bvid={}".format(cid, bvid))
res_json = res.json()
# aid = res_json['data']['aid']
view_s = res_json['data']['stat']['view']
# duration = res_json['data']['duration']
session.close()
print(f"当前视频{bvid}")
return view_s
3. 播放量增加
代码如下:
def start_visit(start_url, proxy):
session = requests.Session()
session.proxies = proxy
url = f'{start_url}/'
res = session.get(url=url)
result = re.findall(r"window.__INITIAL_STATE__=(.+);\(function", res.text)
result = json.loads(result[0])
aid = result["aid"]
bvid = result["bvid"]
cid = result["videoData"]["cid"]
ftime = int(time.time()) - random.randint(100, 500)
stime = int(time.time())
buvid3_dic = res.cookies.get_dict()
buvid3 = buvid3_dic["buvid3"]
session.cookies.set("buvid3", buvid3)
# 获取时间戳
a1 = hex(int(time.time()*1000))[2:].upper()
b_lsid = ""
for i in range(8):
v1 = math.ceil(16 * random.uniform(0,1))
v2 = hex(v1)[2:].upper()
b_lsid += v2
a2 = b_lsid.rjust(8,"0")
b_lsid = a1 + "_" + a2
session.cookies.set("b_lsid", b_lsid)
# _uuid
u = str(uuid.uuid4())
time_sec = (str(int(time.time() * 1000) % 1e5)).strip(".0")
time_sec = time_sec.rjust(5, "0")
_uuid = u + time_sec + "infoc"
session.cookies.set("_uuid", _uuid)
# buvid4
res = session.get(url="https://api.bilibili.com/x/frontend/finger/spi")
dic = res.text
dic = json.loads(dic)
buvid4 = dic["data"]["b_4"]
session.cookies.set("buvid4", buvid4)
# sid
b3 = buvid3_dic["buvid3"]
cookies = {
"buvid3":b3,
"CURRENT_BLACKGAP": "1",
"CURRENT_FNVAL": "4048"
}
resp = session.get(url="https://api.bilibili.com/x/player/v2?aid=385535851&cid=761767676")
sid_dic = resp.cookies.get_dict()
sid = sid_dic["sid"]
session.cookies.set("sid", sid)
session.cookies.set("CURRENT_FNVAL", "4048")
url_add = "https://api.bilibili.com/x/click-interface/click/web/h5"
data = {
"aid": aid,
"cid": cid,
"bvid": bvid,
"part": "1",
"lv": "0",
"ftime": ftime,
"stime": stime,
"jsonp": "jsonp",
"type": "3",
"sub_type": "0",
"from_spmid": "333.1073.sub_channel.latest_video.click",
"auto_continued_play": "0",
"refer_url": "",
"bsource": "",
"spmid": "333.788.0.0"
}
session.post(url=url_add, data=data)
print('增加播放量完成')
4. 完整代码
完整代码如下:
import requests
import json
import random
import math
import uuid
from lxml import etree
import re
import time
# 获取代理IP
def get_prox():
url = 'https://www.xiaoxiangdaili.com/free/4360' # 可更改
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Language': 'zh-CN,zh;q=0.9'
}
res = requests.get(url, headers=headers)
ip_list = []
if res.status_code == 200:
result = etree.HTML(res.text)
all_ip = result.xpath('//div[@class="freeProxyInfo"]/table/tbody/tr')
for tr in all_ip:
ip_data = tr.xpath('.//td/text()')
ip = ip_data[0]
port = ip_data[1]
ip_port = f"http://{ip}:{port}"
ip_list.append(ip_port)
else:
print(res.status_code)
return ip_list
def start_visit(start_url, proxy):
session = requests.Session()
session.proxies = proxy
url = f'{start_url}/'
res = session.get(url=url)
result = re.findall(r"window.__INITIAL_STATE__=(.+);\(function", res.text)
result = json.loads(result[0])
aid = result["aid"]
bvid = result["bvid"]
cid = result["videoData"]["cid"]
ftime = int(time.time()) - random.randint(100, 500)
stime = int(time.time())
buvid3_dic = res.cookies.get_dict()
buvid3 = buvid3_dic["buvid3"]
session.cookies.set("buvid3", buvid3)
# 获取时间戳
a1 = hex(int(time.time()*1000))[2:].upper()
b_lsid = ""
for i in range(8):
v1 = math.ceil(16 * random.uniform(0,1))
v2 = hex(v1)[2:].upper()
b_lsid += v2
a2 = b_lsid.rjust(8,"0")
b_lsid = a1 + "_" + a2
session.cookies.set("b_lsid", b_lsid)
# _uuid
u = str(uuid.uuid4())
time_sec = (str(int(time.time() * 1000) % 1e5)).strip(".0")
time_sec = time_sec.rjust(5, "0")
_uuid = u + time_sec + "infoc"
session.cookies.set("_uuid", _uuid)
# buvid4
res = session.get(url="https://api.bilibili.com/x/frontend/finger/spi")
dic = res.text
dic = json.loads(dic)
buvid4 = dic["data"]["b_4"]
session.cookies.set("buvid4", buvid4)
# sid
b3 = buvid3_dic["buvid3"]
cookies = {
"buvid3":b3,
"CURRENT_BLACKGAP": "1",
"CURRENT_FNVAL": "4048"
}
resp = session.get(url="https://api.bilibili.com/x/player/v2?aid=385535851&cid=761767676")
sid_dic = resp.cookies.get_dict()
sid = sid_dic["sid"]
session.cookies.set("sid", sid)
session.cookies.set("CURRENT_FNVAL", "4048")
url_add = "https://api.bilibili.com/x/click-interface/click/web/h5"
data = {
"aid": aid,
"cid": cid,
"bvid": bvid,
"part": "1",
"lv": "0",
"ftime": ftime,
"stime": stime,
"jsonp": "jsonp",
"type": "3",
"sub_type": "0",
"from_spmid": "333.1073.sub_channel.latest_video.click",
"auto_continued_play": "0",
"refer_url": "",
"bsource": "",
"spmid": "333.788.0.0"
}
session.post(url=url_add, data=data)
print('增加播放量完成')
def get_video_data(video_url, proxy):
session = requests.Session()
session.proxies = proxy
bvid = video_url.rsplit('/')[-1]
res = session.get(url="https://api.bilibili.com/x/player/pagelist?bvid={}&jsonp=jsonp".format(bvid))
cid = res.json()['data'][0]['cid']
res = session.get(url="https://api.bilibili.com/x/web-interface/view?cid={}&bvid={}".format(cid, bvid))
res_json = res.json()
# aid = res_json['data']['aid']
view_s = res_json['data']['stat']['view']
# duration = res_json['data']['duration']
session.close()
print(f"当前视频{bvid}")
return view_s
if __name__ == "__main__":
ip_list = get_prox()
url = input('请输入要访问的B站视频链接, url格式以"https://www.bilibili.com/video/BV1tg4y1b7hY"为例:')
num = input('输入要访问的次数:')
for w in range(int(num)):
print(f'当前第{w+1}次')
# 设置IP代理
# ip_proxy = random.choice(ip_list)
# proxy = {"http": ip_proxy} # 注意取消注释,替换正常可用的IP代理平台
proxy = {"http": "61.216.156.222:60808"} # 该IP仅为示例
try:
views = get_video_data(url, proxy)
print(views)
start_visit(url, proxy)
time.sleep(random.randint(10, 30))
except Exception as e:
print(e)
最后,笔者补充几点:
1. 程序运行的时候,需要输入对应的视频url,格式如下:
url = f'https://www.bilibili.com/video/{对应视频ID}'
2. 需要输入访问视频的次数,这里必须携带IP代理;
3. 每次请求之间,需加入时间等待缓存,切勿进行频繁请求。
4. 如需要将数据保存,可查阅相关方法或者笔者在专栏“爬虫实战进阶”里面有详细介绍过很多方法,可参考。
如果还想了解爬虫的更多知识,后面我会持续分享,记得收藏并关注,后面的python干货在等着你。
如果喜欢本文或者本文对你有帮助的话,记得关注并点个赞哟,有问题和需求欢迎留言私信。