Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 2970c82

Browse files
committed
no message
1 parent 814d9bb commit 2970c82

File tree

1 file changed

+75
-0
lines changed

1 file changed

+75
-0
lines changed
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
# !/usr/bin/python
2+
# -*- coding:utf-8 -*-
3+
import requests, time, urllib.request, re, json, sys
4+
from bs4 import BeautifulSoup
5+
6+
class bilibili_crawl:
7+
8+
def __init__(self, bv):
9+
# 视频页地址
10+
self.url = 'https://www.bilibili.com/video/' + bv
11+
# 下载开始时间
12+
self.start_time = time.time()
13+
14+
def get_vedio_info(self):
15+
try:
16+
headers = {
17+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'
18+
}
19+
20+
response = requests.get(url = self.url, headers = headers)
21+
if response.status_code == 200:
22+
23+
bs = BeautifulSoup(response.text, 'html.parser')
24+
# 取视频标题
25+
video_title = bs.find('span', class_='tit').get_text()
26+
27+
# 取视频链接
28+
pattern = re.compile(r"window\.__playinfo__=(.*?)$", re.MULTILINE | re.DOTALL)
29+
script = bs.find("script", text=pattern)
30+
result = pattern.search(script.next).group(1)
31+
32+
temp = json.loads(result)
33+
# 取第一个视频链接
34+
for item in temp['data']['dash']['video']:
35+
if 'baseUrl' in item.keys():
36+
video_url = item['baseUrl']
37+
break
38+
39+
return {
40+
'title': video_title,
41+
'url': video_url
42+
}
43+
except requests.RequestException:
44+
print('视频链接错误,请重新更换')
45+
46+
def download_video(self, video):
47+
title = re.sub(r'[\/:*?"<>|]', '-', video['title'])
48+
url = video['url']
49+
filename = title + '.mp4'
50+
opener = urllib.request.build_opener()
51+
opener.addheaders = [('Origin', 'https://www.bilibili.com'),
52+
('Referer', self.url),
53+
('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36')]
54+
urllib.request.install_opener(opener)
55+
urllib.request.urlretrieve(url = url, filename = filename, reporthook = self.schedule)
56+
57+
def schedule(self, blocknum, blocksize, totalsize):
58+
'''
59+
urllib.urlretrieve 的回调函数
60+
:param blocknum: 已经下载的数据块
61+
:param blocksize: 数据块的大小
62+
:param totalsize: 远程文件的大小
63+
:return:
64+
'''
65+
percent = 100.0 * blocknum * blocksize / totalsize
66+
if percent > 100:
67+
percent = 100
68+
s = ('#' * round(percent)).ljust(100, '-')
69+
sys.stdout.write("%.2f%%" % percent + '[ ' + s +']' + '\r')
70+
sys.stdout.flush()
71+
72+
if __name__ == '__main__':
73+
bc = bilibili_crawl('BV1Vh411Z7j5')
74+
vedio = bc.get_vedio_info()
75+
bc.download_video(vedio)

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /