1
+ # !/usr/bin/python
2
+ # -*- coding:utf-8 -*-
3
+ import requests , time , urllib .request , re , json , sys
4
+ from bs4 import BeautifulSoup
5
+
6
+ class bilibili_crawl :
7
+
8
+ def __init__ (self , bv ):
9
+ # 视频页地址
10
+ self .url = 'https://www.bilibili.com/video/' + bv
11
+ # 下载开始时间
12
+ self .start_time = time .time ()
13
+
14
+ def get_vedio_info (self ):
15
+ try :
16
+ headers = {
17
+ 'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'
18
+ }
19
+
20
+ response = requests .get (url = self .url , headers = headers )
21
+ if response .status_code == 200 :
22
+
23
+ bs = BeautifulSoup (response .text , 'html.parser' )
24
+ # 取视频标题
25
+ video_title = bs .find ('span' , class_ = 'tit' ).get_text ()
26
+
27
+ # 取视频链接
28
+ pattern = re .compile (r"window\.__playinfo__=(.*?)$" , re .MULTILINE | re .DOTALL )
29
+ script = bs .find ("script" , text = pattern )
30
+ result = pattern .search (script .next ).group (1 )
31
+
32
+ temp = json .loads (result )
33
+ # 取第一个视频链接
34
+ for item in temp ['data' ]['dash' ]['video' ]:
35
+ if 'baseUrl' in item .keys ():
36
+ video_url = item ['baseUrl' ]
37
+ break
38
+
39
+ return {
40
+ 'title' : video_title ,
41
+ 'url' : video_url
42
+ }
43
+ except requests .RequestException :
44
+ print ('视频链接错误,请重新更换' )
45
+
46
+ def download_video (self , video ):
47
+ title = re .sub (r'[\/:*?"<>|]' , '-' , video ['title' ])
48
+ url = video ['url' ]
49
+ filename = title + '.mp4'
50
+ opener = urllib .request .build_opener ()
51
+ opener .addheaders = [('Origin' , 'https://www.bilibili.com' ),
52
+ ('Referer' , self .url ),
53
+ ('User-Agent' , 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36' )]
54
+ urllib .request .install_opener (opener )
55
+ urllib .request .urlretrieve (url = url , filename = filename , reporthook = self .schedule )
56
+
57
+ def schedule (self , blocknum , blocksize , totalsize ):
58
+ '''
59
+ urllib.urlretrieve 的回调函数
60
+ :param blocknum: 已经下载的数据块
61
+ :param blocksize: 数据块的大小
62
+ :param totalsize: 远程文件的大小
63
+ :return:
64
+ '''
65
+ percent = 100.0 * blocknum * blocksize / totalsize
66
+ if percent > 100 :
67
+ percent = 100
68
+ s = ('#' * round (percent )).ljust (100 , '-' )
69
+ sys .stdout .write ("%.2f%%" % percent + '[ ' + s + ']' + '\r ' )
70
+ sys .stdout .flush ()
71
+
72
+ if __name__ == '__main__' :
73
+ bc = bilibili_crawl ('BV1Vh411Z7j5' )
74
+ vedio = bc .get_vedio_info ()
75
+ bc .download_video (vedio )
0 commit comments