"""
@description:爬取B站视频和音频,并合成
@oath:视频、音频路径:process_video;合成后路径:finally_video
@class:videoSpider
"""
from wyw.functions.videoSpider import VideoSpider
from selenium import webdriver
def get_cookie(url):
user_data_dir = r"C:\Users\Administrator\AppData\Local\Google\Chrome\User Data"
user_option = webdriver.ChromeOptions()
user_option.add_argument("--headless")
user_option.add_argument(f'--user-data-dir={user_data_dir}')
# 初始化浏览器驱动
driver = webdriver.Chrome(options=user_option)
# 打开网页
driver.get(url=url)
# 获取浏览器Cookies
cookies = driver.get_cookies()
# 打印Cookies
cookie_str = "; ".join([f"{cookie['name']}={cookie['value']}" for cookie in cookies])
# 关闭浏览器
driver.quit()
return cookie_str
if __name__ == '__main__':
url = 'https://www.bilibili.com/video/BV1jt421c7yN/'
cookie = get_cookie(url)
headers = {
"Referer": url,
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/124.0.0.0 Safari/537.36",
"Cookie": cookie
}
video_spider = VideoSpider(url=url, headers=headers)
# 调用函数
title = video_spider.video_spider()
video_spider.video_compose()
bilibiliSpider.py"""
@description:接bilibiliSpider.py
"""
import requests
import re
import os
import json
import subprocess
from selenium import webdriver
class VideoSpider:
def __init__(self, url, headers, title=None):
self.title = title
self.url = url
self.headers = headers
def video_spider(self):
try:
# 发送请求
response = requests.get(url=self.url, headers=self.headers)
response.raise_for_status() # 如果响应状态不是200,将抛出异常
html = response.text
# 解析数据:提取视频标题
title = re.findall('title="(.*?)"', html)[0]
print(f"视频标题: {title}")
# 解析视频信息
info = re.findall('window.__playinfo__=(.*?)</script>', html)[0]
json_data = json.loads(info)
# 提取视频链接和音频链接
video_url = json_data['data']['dash']['video'][0]['baseUrl']
audio_url = json_data['data']['dash']['audio'][0]['baseUrl']
print(f"视频链接: {video_url}")
print(f"音频链接: {audio_url}")
# 下载视频内容
video_content = requests.get(url=video_url, headers=self.headers).content
# 下载音频内容
audio_content = requests.get(url=audio_url, headers=self.headers).content
# 创建文件夹
if not os.path.exists(f'process_video\\{title}'):
os.makedirs(f'process_video\\{title}')
# 保存视频和音频数据
with open(f'process_video\\{title}\\{title}.mp4', mode='wb') as v:
v.write(video_content)
with open(f'process_video\\{title}\\{title}.mp3', mode='wb') as a:
a.write(audio_content)
print("视频音频下载完成!")
self.title = title
return title # 返回视频标题
except requests.exceptions.RequestException as e:
print(f"请求错误: {e}")
except json.JSONDecodeError:
print("解析JSON时出错")
except Exception as e:
print(f"发生错误: {e}")
def video_compose(self):
if self.title:
print("开始合并视频...")
if not os.path.exists(f'finally_video\\{self.title}'):
os.makedirs(f'finally_video\\{self.title}')
# 合并成完整的视频内容
cmd = f'ffmpeg -hide_banner -i "process_video\\{self.title}\\{self.title}.mp4" -i "process_video\\{self.title}\\{self.title}.mp3" -c:v copy -c:a aac -strict experimental "finally_video\\{self.title}\\{self.title}.mp4"'
# 调用命令
subprocess.run(cmd)
else:
print("title isNon")
videoSpider.py总结
- 登录后才能爬取到高码率的视频,因此get请求需要调用cookie
- 爬取下来的视频可能不支持预览,可能原因是播放器解码失败,选用KMPlayer播放视频