Commit 2970c82

committed

no message

1 parent 814d9bb commit 2970c82Copy full SHA for 2970c82

File tree

1 file changed

+75

-0

lines changed

moumoubaimifan/bilibili_crawler
- bilibili_crawl.py

1 file changed

+75

-0

lines changed

`‎moumoubaimifan/bilibili_crawler/bilibili_crawl.py`

Lines changed: 75 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,75 @@`
	`1`	`+# !/usr/bin/python`
	`2`	`+# -- coding:utf-8 --`
	`3`	`+import requests, time, urllib.request, re, json, sys`
	`4`	`+from bs4 import BeautifulSoup`
	`5`	`+`
	`6`	`+class bilibili_crawl:`
	`7`	`+`
	`8`	`+ def __init__(self, bv):`
	`9`	`+ # 视频页地址`
	`10`	`+ self.url = 'https://www.bilibili.com/video/' + bv`
	`11`	`+ # 下载开始时间`
	`12`	`+ self.start_time = time.time()`
	`13`	`+`
	`14`	`+ def get_vedio_info(self):`
	`15`	`+ try:`
	`16`	`+ headers = {`
	`17`	`+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'`
	`18`	`+ }`
	`19`	`+`
	`20`	`+ response = requests.get(url = self.url, headers = headers)`
	`21`	`+ if response.status_code == 200:`
	`22`	`+`
	`23`	`+ bs = BeautifulSoup(response.text, 'html.parser')`
	`24`	`+ # 取视频标题`
	`25`	`+ video_title = bs.find('span', class_='tit').get_text()`
	`26`	`+`
	`27`	`+ # 取视频链接`
	`28`	`+ pattern = re.compile(r"window\.__playinfo__=(.*?)$", re.MULTILINE \| re.DOTALL)`
	`29`	`+ script = bs.find("script", text=pattern)`
	`30`	`+ result = pattern.search(script.next).group(1)`
	`31`	`+`
	`32`	`+ temp = json.loads(result)`
	`33`	`+ # 取第一个视频链接`
	`34`	`+ for item in temp['data']['dash']['video']:`
	`35`	`+ if 'baseUrl' in item.keys():`
	`36`	`+ video_url = item['baseUrl']`
	`37`	`+ break`
	`38`	`+`
	`39`	`+ return {`
	`40`	`+ 'title': video_title,`
	`41`	`+ 'url': video_url`
	`42`	`+ }`
	`43`	`+ except requests.RequestException:`
	`44`	`+ print('视频链接错误,请重新更换')`
	`45`	`+`
	`46`	`+ def download_video(self, video):`
	`47`	`+ title = re.sub(r'[\/:*?"<>\|]', '-', video['title'])`
	`48`	`+ url = video['url']`
	`49`	`+ filename = title + '.mp4'`
	`50`	`+ opener = urllib.request.build_opener()`
	`51`	`+ opener.addheaders = [('Origin', 'https://www.bilibili.com'),`
	`52`	`+ ('Referer', self.url),`
	`53`	`+ ('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36')]`
	`54`	`+ urllib.request.install_opener(opener)`
	`55`	`+ urllib.request.urlretrieve(url = url, filename = filename, reporthook = self.schedule)`
	`56`	`+`
	`57`	`+ def schedule(self, blocknum, blocksize, totalsize):`
	`58`	`+ '''`
	`59`	`+ urllib.urlretrieve 的回调函数`
	`60`	`+ :param blocknum: 已经下载的数据块`
	`61`	`+ :param blocksize: 数据块的大小`
	`62`	`+ :param totalsize: 远程文件的大小`
	`63`	`+ :return:`
	`64`	`+ '''`
	`65`	`+ percent = 100.0 * blocknum * blocksize / totalsize`
	`66`	`+ if percent > 100:`
	`67`	`+ percent = 100`
	`68`	`+ s = ('#' * round(percent)).ljust(100, '-')`
	`69`	`+ sys.stdout.write("%.2f%%" % percent + '[ ' + s +']' + '\r')`
	`70`	`+ sys.stdout.flush()`
	`71`	`+`
	`72`	`+if __name__ == '__main__':`
	`73`	`+ bc = bilibili_crawl('BV1Vh411Z7j5')`
	`74`	`+ vedio = bc.get_vedio_info()`
	`75`	`+ bc.download_video(vedio)`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit 2970c82

File tree

1 file changed

1 file changed

`‎moumoubaimifan/bilibili_crawler/bilibili_crawl.py`

0 commit comments