1.B站的视频:音频内容和视频内容是分开的,爬完之后需要进行视频合成。
2.每个b站视频的bid号都是唯一的。
3.合成视频使用ffmpeg,通过官网下载,并且pip安装对应的python包,pip install ffmpeg-python。
win10系统下载编译好的:
并将安装好的包含bin文件的路径添加到环境变量和py脚本中。代码测试无误。只针对B站免费视频。仅用作学习,不做其他违规用途。
import os
import re
from pathlib import Path
from bs4 import BeautifulSoup
import requests
import ffmpeg # pip install ffmpeg-python
import json
import random
global ffmpegPath
# 指定ffmpeg的位置 `windows电脑下是ffmpeg.exe`
ffmpegPath = r'D:\Users\ffmpeg-master-latest-win64-gpl-shared\bin\ffmpeg.exe'
ua_list=['Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.6726.400 QQBrowser/10.2.2265.400',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11'
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER',
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36'
]
def merge_data(video_name):
print('视频合成开始:')
src_voice_path = Path(r'..\\crawler_tool\\' + video_name + ".mp3")
src_video_path = Path(r'..\\crawler_tool\\' + video_name + ".mp4")
output_file = r'..\\crawler_tool\\video\\' + video_name + "merge.mp4"
videoFile = ffmpeg.input(src_video_path)
audioFile = ffmpeg.input(src_voice_path)
stream = ffmpeg.output(videoFile, audioFile, output_file, vcodec='copy', acodec='copy')
ffmpeg.run(stream, cmd=ffmpegPath)
print("视频合成完成")
class BilibiliVideoAudio:
def __init__(self, bid):
self.bid = bid
user_agent=random.choice(ua_list)
self.headers = {
"referer": "https://www.bilibili.com",
"origin": "https://www.bilibili.com",
'user_agent':user_agent,
'Accept-Encoding': 'identity'}
def get_video_audio(self):
# 构造视频链接并发送请求获取页面内容
url = f'https://www.bilibili.com/video/{self.bid}?spm_id_from=333.851.b_7265636f6d6d656e64.6'
content = requests.get(url, headers=self.headers).content.decode('utf-8')
soup = BeautifulSoup(content, 'html.parser')
# 获取视频标题
meta_tag = soup.head.find('meta', attrs={'name': 'title'})
src_title = meta_tag['content']
title = re.sub(r'[^\w]', '_', src_title)
# 获取视频和音频链接
pattern = r'window\.__playinfo__=({.*?})\s*</script>'
json_data = re.findall(pattern, content)[0]
data = json.loads(json_data)
video_url = data['data']['dash']['video'][0]['base_url']
audio_url = data['data']['dash']['audio'][0]['base_url']
return {
'title': title,
'video_url': video_url,
'audio_url': audio_url
}
def download_video_audio(self, url, filename):
# 对文件名进行清理,去除不合规字符
filename = self.sanitize_filename(filename)
try:
# 发送请求下载视频或音频文件
resp = requests.get(url, headers=self.headers).content
download_path = os.path.join(r'.\\', filename) # 构造下载路径
with open(download_path, mode='wb') as file:
file.write(resp)
print("{:*^30}".format(f"下载完成:{filename}"))
except Exception as e:
print(e)
def sanitize_filename(self, filename):
# 定义不合规字符的正则表达式
invalid_chars_regex = r'[\"*<>?\\|/:,]'
# 替换不合规字符为空格
sanitized_filename = re.sub(invalid_chars_regex, ' ', filename)
return sanitized_filename
def main():
#bids = [input("请输入视频的bid:")]
bids = ["BV1yV411P7tJ"] # 视频的bid,可以修改为其他视频的bid
for bid in bids:
bilibili = BilibiliVideoAudio(bid)
info = bilibili.get_video_audio()
title = info['title']
video_url = info['video_url']
audio_url = info['audio_url']
bilibili.download_video_audio(video_url, f"{title}.mp4")
bilibili.download_video_audio(audio_url, f"{title}.mp3")
merge_data(title)
main()