📺

爬取B站视频、合成音画

""" @description:爬取B站视频和音频,并合成 @oath:视频、音频路径:process_video;合成后路径:finally_video @class:videoSpider """ from wyw.functions.videoSpider import VideoSpider from selenium import webdriver def get_cookie(url): user_data_dir = r"C:\Users\Administrator\AppData\Local\Google\Chrome\User Data" user_option = webdriver.ChromeOptions() user_option.add_argument("--headless") user_option.add_argument(f'--user-data-dir={user_data_dir}') # 初始化浏览器驱动 driver = webdriver.Chrome(options=user_option) # 打开网页 driver.get(url=url) # 获取浏览器Cookies cookies = driver.get_cookies() # 打印Cookies cookie_str = "; ".join([f"{cookie['name']}={cookie['value']}" for cookie in cookies]) # 关闭浏览器 driver.quit() return cookie_str if __name__ == '__main__': url = 'https://www.bilibili.com/video/BV1jt421c7yN/' cookie = get_cookie(url) headers = { "Referer": url, "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/124.0.0.0 Safari/537.36", "Cookie": cookie } video_spider = VideoSpider(url=url, headers=headers) # 调用函数 title = video_spider.video_spider() video_spider.video_compose()
bilibiliSpider.py
""" @description:接bilibiliSpider.py """ import requests import re import os import json import subprocess from selenium import webdriver class VideoSpider: def __init__(self, url, headers, title=None): self.title = title self.url = url self.headers = headers def video_spider(self): try: # 发送请求 response = requests.get(url=self.url, headers=self.headers) response.raise_for_status() # 如果响应状态不是200,将抛出异常 html = response.text # 解析数据:提取视频标题 title = re.findall('title="(.*?)"', html)[0] print(f"视频标题: {title}") # 解析视频信息 info = re.findall('window.__playinfo__=(.*?)</script>', html)[0] json_data = json.loads(info) # 提取视频链接和音频链接 video_url = json_data['data']['dash']['video'][0]['baseUrl'] audio_url = json_data['data']['dash']['audio'][0]['baseUrl'] print(f"视频链接: {video_url}") print(f"音频链接: {audio_url}") # 下载视频内容 video_content = requests.get(url=video_url, headers=self.headers).content # 下载音频内容 audio_content = requests.get(url=audio_url, headers=self.headers).content # 创建文件夹 if not os.path.exists(f'process_video\\{title}'): os.makedirs(f'process_video\\{title}') # 保存视频和音频数据 with open(f'process_video\\{title}\\{title}.mp4', mode='wb') as v: v.write(video_content) with open(f'process_video\\{title}\\{title}.mp3', mode='wb') as a: a.write(audio_content) print("视频音频下载完成!") self.title = title return title # 返回视频标题 except requests.exceptions.RequestException as e: print(f"请求错误: {e}") except json.JSONDecodeError: print("解析JSON时出错") except Exception as e: print(f"发生错误: {e}") def video_compose(self): if self.title: print("开始合并视频...") if not os.path.exists(f'finally_video\\{self.title}'): os.makedirs(f'finally_video\\{self.title}') # 合并成完整的视频内容 cmd = f'ffmpeg -hide_banner -i "process_video\\{self.title}\\{self.title}.mp4" -i "process_video\\{self.title}\\{self.title}.mp3" -c:v copy -c:a aac -strict experimental "finally_video\\{self.title}\\{self.title}.mp4"' # 调用命令 subprocess.run(cmd) else: print("title isNon")
videoSpider.py

总结

  • 登录后才能爬取到高码率的视频,因此get请求需要调用cookie
  • 需要获取登录状态的cookie才有效
  • B站的视频和音频是分开的,需要分别爬取并合成
  • 爬取下来的视频可能不支持预览,可能原因是播放器解码失败,选用KMPlayer播放视频
  • ffmpeg的视频合成效率远高于moviepy