video_bvid:
import os
import requests
import json
import re
from bs4 import BeautifulSoup
import subprocess
# from detail_video import video_bvid
# video_bvid 是一个从外部得到的单个视频ID
video_bvid = 'BV1cx421Q7ve'
class BilibiliVideoAudio:
def __init__(self, bvid):
"""
初始化方法,接收一个bvid作为视频的唯一标识符。
"""
self.bvid = bvid
# 设置请求头,用于模拟浏览器访问Bilibili网站
self.headers = {
"referer": "https://search.bilibili.com/all?keyword=%E4%B8%BB%E6%92%AD%E8%AF%B4%E8%81%94%E6%92%AD&from_source=webtop_search&spm_id_from=333.1007&search_source=5&page=4&o=90",
"origin": "https://search.bilibili.com",
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
'Accept-Encoding': 'gzip, deflate, br'
}
def get_video_audio(self):
"""
获取视频和音频的链接以及视频标题。
"""
# 构造视频链接并发送请求获取页面内容
url = f'https://www.bilibili.com/video/{self.bvid}/?spm_id_from=333.337.search-card.all.click&vd_source=14378ecd144bed421affe1fe0ddd8981'
content = requests.get(url, headers=self.headers).content.decode('utf-8')
# 使用BeautifulSoup解析HTML内容
soup = BeautifulSoup(content, 'html.parser')
# 获取视频标题
meta_tag = soup.head.find('meta', attrs={'name': 'title'})
title = meta_tag['content']
# 获取视频和音频链接的正则表达式
pattern = r'window\.__playinfo__=({.*?})\s*</script>'
# 提取并解析JSON数据
json_data = re.findall(pattern, content)[0]
data = json.loads(json_data)
# 提取视频和音频的基础URL
video_url = data['data']['dash']['video'][0]['base_url']
audio_url = data['data']['dash']['audio'][0]['base_url']
# 返回包含标题、视频URL和音频URL的字典
return {
'title': title,
'video_url': video_url,
'audio_url': audio_url
}
def download_video_audio(self, url, filename):
"""
下载视频或音频文件。
"""
# 对文件名进行清理,去除不合规字符
filename = self.sanitize_filename(filename)
try:
# 发送请求下载文件内容
resp = requests.get(url, headers=self.headers).content
# 构造下载路径
download_path = os.path.join('D:\\video', filename)
# 将文件内容写入到指定路径
with open(download_path, mode='wb') as file:
file.write(resp)
# 打印下载完成信息
print("{:*^30}".format(f"下载完成:{filename}"))
except Exception as e:
# 打印异常信息
print(e)
def sanitize_filename(self, filename):
"""
清理文件名中的不合规字符。
"""
# 定义不合规字符的正则表达式
invalid_chars_regex = r'[\"*<>?\\|/:,]'
# 替换不合规字符为空格
sanitized_filename = re.sub(invalid_chars_regex, ' ', filename)
return sanitized_filename
def merge_video_audio(self, video_path, audio_path, output_path):
"""
使用ffmpeg来合并视频和音频。
"""
try:
# 构造ffmpeg命令行参数
command = [
'ffmpeg',
'-y', # 覆盖输出文件如果它已经存在
'-i', video_path, # 输入视频路径
'-i', audio_path, # 输入音频路径
'-c', 'copy', # 复制原始数据,不进行转码
output_path # 输出视频路径
]
# 执行ffmpeg命令
subprocess.run(command, check=True)
# 打印合并完成信息
print(f"视频和音频合并完成:{output_path}")
except subprocess.CalledProcessError as e:
# 打印合并失败信息
print(f"合并失败: {e}")
def main():
try:
# 只处理一个 bvid(Bilibili 视频的唯一标识符)
bilibili = BilibiliVideoAudio(video_bvid) # 创建一个Bilibili视频音频处理对象,传入视频bvid
video_audio_info = bilibili.get_video_audio() # 获取视频和音频的信息
# 从返回的信息中提取标题、视频URL和音频URL
title = video_audio_info['title']
video_url = video_audio_info['video_url']
audio_url = video_audio_info['audio_url']
# 定义处理后的视频存放路径
processed_videos_path = 'D:\\processed_videos'
# 如果该路径不存在,则创建该路径
if not os.path.exists(processed_videos_path):
os.makedirs(processed_videos_path)
# 构造视频文件名、音频文件名和输出文件名
video_filename = f"{title}.mp4"
audio_filename = f"{title}.mp3"
output_filename = f"{title} - combined.mp4"
# 构造视频文件、音频文件和输出文件的完整路径
video_file_path = os.path.join('D:\\video', video_filename)
audio_file_path = os.path.join('D:\\video', audio_filename)
output_file_path = os.path.join(processed_videos_path, output_filename)
# 下载视频和音频到指定位置
bilibili.download_video_audio(video_url, video_filename) # 下载视频
bilibili.download_video_audio(audio_url, audio_filename) # 下载音频
# 合并下载的视频和音频文件到指定输出路径
bilibili.merge_video_audio(video_file_path, audio_file_path, output_file_path)
# 可选:合并后删除单独的视频和音频文件
# os.remove(video_file_path)
# os.remove(audio_file_path)
except Exception as ex:
# 捕获并打印处理视频/音频时发生的异常
print(f"处理视频/音频 {video_bvid} 失败: {ex}")
main()