diff --git a/115.py b/115.py index 8cb93b7..6156e0d 100755 --- a/115.py +++ b/115.py @@ -32,13 +32,13 @@ ############################################################ # file extensions -mediatype = { +mediatype = [ ".wma", ".wav", ".mp3", ".aac", ".ra", ".ram", ".mp2", ".ogg", ".aif", ".mpega", ".amr", ".mid", ".midi", ".m4a", ".m4v", ".wmv", ".rmvb", ".mpeg4", ".mpeg2", ".flv", ".avi", ".3gp", ".mpga", ".qt", ".rm", ".wmz", ".wmd", ".wvx", ".wmx", ".wm", ".swf", ".mpg", ".mp4", ".mkv", ".mpeg", ".mov", ".mdf", ".iso", ".asf" -} +] s = '\x1b[%d;%dm%s\x1b[0m' # terminual color template diff --git a/91porn.py b/91porn.py index 358851c..c649171 100755 --- a/91porn.py +++ b/91porn.py @@ -9,6 +9,7 @@ import argparse import random import select +import urllib2 ############################################################ # wget exit status @@ -73,13 +74,14 @@ def get_infos(self): if r.ok: dlink = re.search( r'file=(http.+?)&', r.content).group(1) + dlink = urllib2.unquote(dlink) name = re.search( r'viewkey=([\d\w]+)', self.url).group(1) infos = { 'name': '%s.mp4' % name, 'file': os.path.join(os.getcwd(), '%s.mp4' % name), 'dir_': os.getcwd(), - 'dlink': dlink + 'dlink': dlink, } if not args.get_url: self.download(infos) diff --git a/README.md b/README.md index 53e74e4..81fa0af 100644 --- a/README.md +++ b/README.md @@ -1,28 +1,61 @@ -## iScript +# iScript + +## pan.baidu.com.py 已经重构,不再维护 + +[**BaiduPCS-Py**](https://github.com/PeterDing/BaiduPCS-Py) 是 pan.baidu.com.py 的重构版,运行在 Python>= 3.6 + +[](https://gitter.im/PeterDing/iScript?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) > *[L]* *[W]* *[LW]* 分别表示,在linux, windows, linux和windows 下通过测试。 + > ***windows用户可在babun (https://github.com/babun/babun) 下运行。*** - | | | ---------|---------|---------| -*[L]* | [xiami.py](#xiami.py) | 下载或播放高品质虾米音乐(xiami.com) | -*[L]* | [pan.baidu.com.py](#pan.baidu.com.py) | 百度网盘的下载、离线下载、上传、播放、转存、文件操作 | -*[L]* | [bt.py](#bt.py) | magnet torrent 互转、及 过滤敏.感.词 | -*[L]* | [115.py](#115.py) | 115网盘的下载和播放 | -*[L]* | [yunpan.360.cn.py](#yunpan.360.cn.py) | 360网盘的下载 | -*[L]* | [music.baidu.com.py](#music.baidu.com.py) | 下载或播放高品质百度音乐(music.baidu.com) | -*[L]* | [music.163.com.py](#music.163.com.py) | 下载或播放高品质网易音乐(music.163.com) | -*[L]* | [flv_cmd.py](#flv_cmd.py) | 基于在线服务的视频解析 client - 支持下载、播放 | -*[L]* | [tumblr.py](#tumblr.py) | 下载某个tumblr.com的所有图片 | -*[L]* | [unzip.py](#unzip.py) | 解决linux下unzip乱码的问题 | -*[L]* | [ed2k_search.py](#ed2k_search.py) | 基于 donkey4u.com 的emule搜索 | -*[L]* | [91porn.py](#91porn.py) | 下载或播放91porn | -*[L]* | [ThunderLixianExporter.user.js](#ThunderLixianExporter.user.js) | A fork of https://github.com/binux/ThunderLixianExporter - 增加了mpv和mplayer的导出。 | - | 待续 | | + +*[L]* - [leetcode_problems.py](#leetcode_problems.py) - 下载Leetcode的算法题 +*[L]* - [xiami.py](#xiami.py) - 下载或播放高品质虾米音乐(xiami.com) +*[L]* - [pan.baidu.com.py](#pan.baidu.com.py) - 百度网盘的下载、离线下载、上传、播放、转存、文件操作 +*[L]* - [bt.py](#bt.py) - magnet torrent 互转、及 过滤敏.感.词 +*[L]* - [115.py](#115.py) - 115网盘的下载和播放 +*[L]* - [yunpan.360.cn.py](#yunpan.360.cn.py) - 360网盘的下载 +*[L]* - [music.baidu.com.py](#music.baidu.com.py) - 下载或播放高品质百度音乐(music.baidu.com) +*[L]* - [music.163.com.py](#music.163.com.py) - 下载或播放高品质网易音乐(music.163.com) +*[L]* - [flv_cmd.py](#flv_cmd.py) - 基于在线服务的视频解析 client - 支持下载、播放 +*[L]* - [tumblr.py](#tumblr.py) - 下载某个tumblr.com的所有图片、视频、音频 +*[L]* - [unzip.py](#unzip.py) - 解决linux下unzip乱码的问题 +*[L]* - [ed2k_search.py](#ed2k_search.py) - 基于 donkey4u.com 的emule搜索 +*[L]* - [91porn.py](#91porn.py) - 下载或播放91porn +*[L]* - [ThunderLixianExporter.user.js](#ThunderLixianExporter.user.js) - A fork of https://github.com/binux/ThunderLixianExporter - 增加了mpv和mplayer的导出。 --- + +### leetcode_problems.py - 下载Leetcode的算法题 + +#### 依赖 + +``` +python2-requests (https://github.com/kennethreitz/requests) + +python2-lxml + +``` + +#### 参数: + +``` + --index sort by index + --level sort by level + --tag sort by tag + --title sort by title + --rm_blank 移除题中的空行 + --line LINE 两题之间的空行 + -r, --redownload 重新下载数据 +``` + +下载的数据保持在 ./leecode_problems.pk +转成的txt在 './leecode problems.txt' + --- @@ -48,9 +81,11 @@ xiami.py 是一个虾米音乐的命令行(CLI)客户端。提供登录、下载 初次使用需要登录 xm login (原xiami账号) -**支持淘宝账户** xm logintaobao +~~**支持淘宝账户** xm logintaobao~~ + +~~**对于淘宝账户,登录后只保存有关虾米的cookies,删除了有关淘宝的cookies**~~ -**对于淘宝账户,登录后只保存有关虾米的cookies,删除了有关淘宝的cookies** +**淘宝登录加密算法无法破解,需要手动获取cookies (方法见下 手动添加cookie登录)** **vip账户**支持高品质音乐的下载和播放。 @@ -73,12 +108,6 @@ login login username login username password -# 淘宝账号登录 -gt -logintaobao -logintaobao username -logintaobao username password - signout # 退出登录 d 或 download url1 url2 # 下载 @@ -109,6 +138,11 @@ xm login xm login username xm login username password +# 手动添加cookie登录 +1. 用浏览器登录后,按F12,然后访问 https://www.xiami.com/album/123456 +2. 选择‘网络’或network,找到 123456,在其中找到 Cookie: xxx +3. 然后在终端运行 xm g "xxx" + # 退出登录 xm signout @@ -134,6 +168,9 @@ xm d http://www.xiami.com/chart/index/c/2?spm=a1z1s.2943549.6827465.6.VrEAoY xm d http://www.xiami.com/genre/detail/gid/2?spm=a1z1s.3057857.6850221.1.g9ySan xm d http://www.xiami.com/genre/detail/sid/2970?spm=a1z1s.3057857.6850221.4.pkepgt +# 下载 widget (虾米播播) +xm d http://www.xiami.com/widget/player-multi?uid=4350663&sid=1774531852,378713,3294421,1771778464,378728,378717,378727,1773346501,&width=990&height=346&mainColor=e29833&backColor=60362a&widget_from=4350663 + # 下载落网期刊 # 分析落网期刊的音乐后,在虾米上搜索并下载 xm d http://www.luoo.net/music/706 @@ -159,10 +196,13 @@ xm s http://www.xiami.com/artist/23460?spm=a1z1s.6928801.1561534521.115.ShW08b > http://kanoha.org/2011/08/30/xiami-absolute-address/ + > http://www.blackglory.me/xiami-vip-audition-with-no-quality-difference-between-downloading/ + > https://gist.github.com/lepture/1014329 + > 淘宝登录代码: https://github.com/ly0/xiami-tools --- @@ -170,6 +210,10 @@ xm s http://www.xiami.com/artist/23460?spm=a1z1s.6928801.1561534521.115.ShW08b ### pan.baidu.com.py - 百度网盘的下载、离线下载、上传、播放、转存、文件操作 +**pan.baidu.com.py 已经重构,不再维护** + +[**BaiduPCS-Py**](https://github.com/PeterDing/BaiduPCS-Py) 是 pan.baidu.com.py 的重构版,运行在 Python>= 3.6 + #### 1. 依赖 ``` @@ -177,19 +221,16 @@ wget aria2 (~ 1.18) -python2-rsa - -python2-pyasn1 - -python2-requests (https://github.com/kennethreitz/requests) +aget-rs (https://github.com/PeterDing/aget-rs/releases) -requests-toolbelt (https://github.com/sigmavirus24/requests-toolbelt) +pip2 install rsa pyasn1 requests requests-toolbelt mpv (http://mpv.io) # 可选依赖 shadowsocks # 用于加密上传。 # 用 python2 的 pip 安装 +pip2 install shadowsocks # 除了用pip安装包,还可以手动: https://github.com/PeterDing/iScript/wiki/%E6%89%8B%E5%8A%A8%E8%A7%A3%E5%86%B3pan.baidu.com.py%E4%BE%9D%E8%B5%96%E5%8C%85 @@ -207,6 +248,10 @@ pan.baidu.com.py 是一个百度网盘的命令行客户端。 **支持多帐号登录** +**现在只支持[用cookie登录](#cookie_login)** + +**支持cookie登录** + **支持加密上传**, 需要 shadowsocks **cd, ls 功能完全支持** @@ -217,6 +262,8 @@ pan.baidu.com.py 是一个百度网盘的命令行客户端。 下载工具默认为wget, 可用参数-a num选用aria2 +**支持用 aget 加速下载, 用法见下** + 下载的文件,保存在当前目录下。 下载默认为非递归,递归下载加 -R @@ -251,6 +298,7 @@ g login login username login username password +login username cookie # 删除帐号 userdelete 或 ud @@ -398,7 +446,13 @@ jca 或 jobclearall # 清除 *全部任务* #### 参数: ``` --a num, --aria2c num aria2c分段下载数量: eg: -a 10 +-a num, --aria2c num aria2c 分段下载数量: eg: -a 10 +-g num, --aget_s num aget 分段下载数量: eg: -g 100 +-k num, --aget_k size aget 分段大小: eg: -k 200K + -k 1M + -k 2M +--appid num 设置 app-id. 如果无法下载或下载慢, 尝试设置为 778750 +-o path, --outdir path 指定下周目录: eg: -o /path/to/directory -p, --play play with mpv -P password, --passwd password 分享密码,加密密码 -y, --yes yes # 用于 rmre, mvre, cpre, rnre !!慎用 @@ -420,6 +474,7 @@ jca 或 jobclearall # 清除 *全部任务* d -t dc,no # 下载并解密,不覆盖加密文件 dc -t no # 解密,不覆盖加密文件 d -t ie # ignore error, 忽略除Ctrl-C以外的下载错误 + d -t 8s # 检测文件是否是"百度8秒",如果是则不下载 p -t m3 # 播放流媒体(m3u8) s -t c # 连续转存 (如果转存出错,再次运行命令 # 可以从出错的地方开始,用于转存大量文件时) @@ -464,6 +519,17 @@ bp login username password # 一直用 bp login 即可 ``` + +#### cookie 登录: + +1. 打开 chrome 隐身模式窗口 +2. 在隐身模式窗口登录 pan.baidu.com +3. 在登录后的页面打开 chrome 开发者工具(怎么打开自行google),选择 `Network` ,然后刷新页面。在刷新后的 `Network` 的 `Name` 列表中选中 `list?dir=...` 开头的一项,然后在右侧找到 `Cookie:` ,复制 `Cookie:` 后面的所有内容。 +4. 用 `pan.baidu.com.py` 登录,`password / cookie:` 处粘贴上面复制的内容。(粘贴后是看不见的)。 +5. 不要退出 pan.baidu.com,只是关闭隐身模式窗口就可以。 + +> 如果使用 cookie 登录,`username` 可以是任意的东西。 + #### 删除帐号: ``` @@ -502,7 +568,8 @@ bp cd ... ``` ## 下载、播放速度慢? -如果wiki中的速度解决方法不管用,可以试试加该参数 -t fs +如果无法下载或下载慢, 尝试设置参数 --appid 778750 +bp d /path/file --appid 778750 # 下载当前工作目录 (递归) bp d . -R @@ -529,11 +596,17 @@ bp d 'http://pan.baidu.com/share/link?shareid=1622654699&uk=1026372002&fid=21126 # 下载别人加密分享的*单个文件*,密码参数-s bp d http://pan.baidu.com/s/1i3FVlw5 -s vuej -# 用aria2下载 +# 用aria2 下载 bp d http://pan.baidu.com/s/1i3FVlw5 -s vuej -a 5 bp d /movie/her.mkv -a 4 bp d url -s [secret] -a 10 +# 用 aget 下载 +bp d http://pan.baidu.com/s/1i3FVlw5 -s vuej -g 100 +bp d /movie/her.mkv -g 100 -k 200K +bp d url -s [secret] -g 100 -k 100K +如果下载速度很慢,可以试试加大 -g, 减小 -k, -k 一般在 100K ~ 300K 之间合适 + # 下载并解码 ## 默认加密方法为 aes-256-cfb bp d /path/to/encrypted_file -t dc [-P password] # 覆盖加密文件 (默认) @@ -712,10 +785,16 @@ ls、重命名、移动、删除、复制、使用正则表达式进行文件操 > https://gist.github.com/HououinRedflag/6191023 + > https://github.com/banbanchs/pan-baidu-download/blob/master/bddown_core.py + > https://github.com/houtianze/bypy + +> 3个方法解决百度网盘限速: https://www.runningcheese.com/baiduyun + + --- @@ -734,6 +813,11 @@ magnet 和 torrent 的相互转换 过滤敏.感.词功能用于净网时期的 baidu, xunlei +在中国大陆使用代理可能有更好的效果: +使用代理有两种方法: +1. shadowsocks + proxychains +2. -p protocol://ip:port + ~~8.30日后,无法使用。 见 http://tieba.baidu.com/p/3265467666~~ [**百度云疑似解封,百度网盘内八秒视频部分恢复**](http://fuli.ba/baiduyunhuifuguankan.html) @@ -794,7 +878,7 @@ cr 或 ctre foo bar magnet_link1 /path/to/torrent1 [-d /path/to/save] #### 参数: ``` --p PROXY, --proxy PROXY proxy for torrage.com, eg: -p 127.0.0.1:8087 (默认) +-p PROXY, --proxy PROXY proxy for torrage.com, eg: -p "sooks5://127.0.0.1:8883" -t TYPE_, --type_ TYPE_ 类型参数: -t n (默认) 用数字替换文件名 -t be64 用base64加密文件名,torrent用百度下载后,可用 pan.baidu.com.py rnr /path -t f,bd64 改回原名字 @@ -835,8 +919,10 @@ bt c magnet_link -t be64 > http://blog.chinaunix.net/uid-28450123-id-4051635.html + > http://en.wikipedia.org/wiki/Torrent_file + --- @@ -932,6 +1018,8 @@ pan115 -p url ### yunpan.360.cn.py - 360网盘的下载 +**!!!脚本已不再维护!!!** + #### 1. 依赖 ``` @@ -1117,6 +1205,7 @@ nm -p url > https://github.com/yanunon/NeteaseCloudMusic/wiki/%E7%BD%91%E6%98%93%E4%BA%91%E9%9F%B3%E4%B9%90API%E5%88%86%E6%9E%90 + > http://s3.music.126.net/s/2/core.js --- @@ -1124,6 +1213,10 @@ nm -p url ### flv_cmd.py - 基于在线服务的视频解析 client - 支持下载、播放 +**!!!脚本已不再维护!!!** + +**请使用 youtube-dl or you-get** + #### 1. 依赖 ``` @@ -1152,7 +1245,7 @@ http://flvgo.com/sites #### 3. 用法 -fl是flvxz_cl.py的马甲 (alias fl='python2 /path/to/flvxz_cl.py') +fl是flv_cmd.py的马甲 (alias fl='python2 /path/to/flv_cmd.py') #### 下载: @@ -1172,20 +1265,24 @@ fl url -p > https://github.com/soimort/you-get + > https://github.com/iambus/youku-lixian + > https://github.com/rg3/youtube-dl --- -### tumblr.py - 下载某个tumblr.com的所有图片 +### tumblr.py - 下载某个tumblr.com的所有图片、视频、音频 #### 1. 依赖 ``` wget +mpv (http://mpv.io) + python2-requests (https://github.com/kennethreitz/requests) ``` @@ -1199,16 +1296,30 @@ python2-requests (https://github.com/kennethreitz/requests) 下载的文件,保存在当前目录下。 -默认下载原图。 +默认下载图片(原图)。 支持连续下载,下载进度储存在下载文件夹内的 json.json。 +**正确退出程序使用 Ctrl-C** +**下载 更新的图片或其他 用 tumblr --update URL, 或 删除 json.json** + #### 参数: ``` -p PROCESSES, --processes PROCESSES 指定多进程数,默认为10个,最多为20个 eg: -p 20 -c, --check 尝试修复未下载成功的图片 -t TAG, --tag TAG 下载特定tag的图片, eg: -t beautiful + +-P, --play play with mpv +-A, --audio download audios +-V, --video download videos +-q, --quiet quiet + +--update 下载新发布的东西 +--redownload 重新遍历所有的东西,如果有漏掉的东西则下载 +--proxy protocol://address:port 设置代理 + +-f OFFSET, --offset OFFSET 从第offset个开始,只对 -V 有用。 ``` #### 3. 用法 @@ -1216,13 +1327,44 @@ python2-requests (https://github.com/kennethreitz/requests) tm是tumblr.py的马甲 (alias tm='python2 /path/to/tumblr.py') ``` -# 下载某个tumblr -tm http://sosuperawesome.tumblr.com/ -tm http://sosuperawesome.tumblr.com/ -t beautiful +# 下载图片 +tm http://sosuperawesome.tumblr.com +tm http://sosuperawesome.tumblr.com -t beautiful + +# 下载图片(使用代理) +tm http://sosuperawesome.tumblr.com -x socks5://127.0.0.1:1024 +tm http://sosuperawesome.tumblr.com -t beautiful -x socks5://127.0.0.1:1024 + +# 下载单张图片 +tm http://sosuperawesome.tumblr.com/post/121467716523/murosvur-on-etsy + +# 下载视频 +tm url -V +tm url -V -f 42 +tm url -V -t tag + +# 下载单个视频 +tm url/post/1234567890 -V + +# 播放视频 +tm url -VP +tm url -VP -f 42 + +# 下载音频 +tm url -A +tm url -A -f 42 +tm url -A -t tag + +# 下载单个音频 +tm url/post/1234567890 -A + +# 播放音频 +tm url -AP +tm url -AP -f 42 + +# 播放音频(quiet) +tm url -APq -# 指定tag下载 -tm beautiful -tm cool ``` --- @@ -1284,7 +1426,9 @@ mpv (http://mpv.io) #### 2. 使用说明 -> 没有解决 *7个/day* 限制 +> youtube-dl 已支持91porn + +没有解决每个ip *10个/day* 限制 下载工具默认为wget, 可用参数-a选用aria2 diff --git a/bt.py b/bt.py index 918082f..63a74df 100755 --- a/bt.py +++ b/bt.py @@ -11,6 +11,10 @@ import urlparse import argparse +s = '\x1b[%d;%dm%s\x1b[0m' # terminual color template +letters = [i for i in '.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' \ + + '0123456789'] + ############################################################ headers = { "Connection": "keep-alive", @@ -27,11 +31,16 @@ ss = requests.session() ss.headers.update(headers) -s = u'\x1b[%d;%dm%s\x1b[0m' # terminual color template -letters = [i for i in '.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' \ - + '0123456789'] +def save_img(url, ext): + path = os.path.join(os.path.expanduser('~'), 'vcode.%s' % ext) + with open(path, 'w') as g: + data = requests.get(url).content + g.write(data) + print " ++ 验证码已保存至", s % (1, 97, path) + input_code = raw_input(s % (2, 92, " 输入验证码: ")) + return input_code -class bt(object): +class Bt(object): def transfer(self, string, tpath, foo=None, bar=None): self.dir_dict = {} self.sub_dir_index = 0 @@ -113,21 +122,22 @@ def _check_ext(self, ext): def get_torrent(self, hh): print s % (1, 93, '\n ++ get torrent from web') - def do(url, proxies=None, data=None, timeout=None): + def do(url, data=None, timeout=None): try: + proxies = {'http': args.proxy} if args.proxy else None r = ss.get(url, proxies=proxies, timeout=timeout) cnt = r.content if r.ok and cnt and '
' not in cnt \ and '4:name' in cnt: - print s % (1, 92, u' √ get torrent.') + print s % (1, 92, ' √ get torrent.') return cnt else: - print s % (1, 91, u' ×ばつ not get.') + print s % (1, 91, ' ×ばつ not get.') return None except: return None - ## with xunlei + ## xunlei print s % (1, 94, '>> try:'), 'bt.box.n0808.com' url = 'http://bt.box.n0808.com/%s/%s/%s.torrent' \ % (hh[:2], hh[-2:], hh) @@ -135,37 +145,37 @@ def do(url, proxies=None, data=None, timeout=None): result = do(url) if result: return result - ## with https://torrage.com + ## https://torrage.com if ss.headers.get('Referer'): del ss.headers['Referer'] - if args.proxy: - print s % (1, 94, '>> try:'), 'torrage.com' - proxies = { - 'http': args.proxy} if args.proxy else None - url = 'http://torrage.com/torrent/%s.torrent' % hh - try: - result = do(url, proxies=proxies) - if result: return result - except: - print s % (1, 91, ' !! proxy doesn\'t work:'), args.proxy + print s % (1, 94, '>> try:'), 'torrage.com' + url = 'http://torrage.com/torrent/%s.torrent' % hh + try: + result = do(url) + if result: return result + except: + pass - ## with http://btcache.me + ## http://btcache.me if ss.headers.get('Referer'): del ss.headers['Referer'] print s % (1, 94, '>> try:'), 'btcache.me' url = 'http://btcache.me/torrent/%s' % hh r = ss.get(url) key = re.search(r'name="key" value="(.+?)"', r.content) if key: + url = 'http://btcache.me/captcha' + vcode = save_img(url, 'png') data = { - "key": key.group(1) + "key": key.group(1), + "captcha": vcode } ss.headers['Referer'] = url url = 'http://btcache.me/download' - result = do(url, data=data, proxies=proxies) + result = do(url, data=data) if result: return result else: - print s % (1, 91, u' ×ばつ not get.') + print s % (1, 91, ' ×ばつ not get.') - ## some torrent stores + ## torrent stores if ss.headers.get('Referer'): del ss.headers['Referer'] urls = [ #'http://www.sobt.org/Tool/downbt?info=%s', @@ -225,6 +235,7 @@ def trans(tpath): dd = bencode.bdecode(string) except Exception as e: print s % (1, 91, ' !! torrent is wrong:'), e + return None info = bencode.bencode(dd['info']) hh = sha1(info).hexdigest() print '# %s' % tpath @@ -274,7 +285,7 @@ def do(): tpath = os.path.join(dir_, 'change_' + i) self.transfer(string, tpath, foo=foo, bar=bar) - paths.update(ipath) + # ??? paths.update(ipath) if os.getcwd() == os.path.abspath(dir_): do() elif os.getcwd() != os.path.abspath(dir_) and \ @@ -334,7 +345,6 @@ def main(argv): p.add_argument('-i', '--import_from', type=str, nargs='*', help='import magnet from local.') p.add_argument('-p', '--proxy', action='store', - default='socks5://127.0.0.1:8883', type=str, help='proxy for torrage.com, \ eg: -p "sooks5://127.0.0.1:8883"') p.add_argument('-d', '--directory', action='store', default=None, @@ -356,18 +366,18 @@ def main(argv): if comd == 'm' or comd == 'mt': # magnet to torrent urls = xxx if not args.import_from \ else import_magnet(args.import_from) - x = bt() + x = Bt() x.magnet2torrent(urls, dir_) elif comd == 't' or comd == 'tm': # torrent ot magnet paths = xxx - x = bt() + x = Bt() x.torrent2magnet(paths) elif comd == 'c' or comd == 'ct': # change ups = xxx if not args.import_from \ else import_magnet(args.import_from) - x = bt() + x = Bt() x.change(ups, dir_, foo=None, bar=None) elif comd == 'cr' or comd == 'ctre': # change @@ -375,7 +385,7 @@ def main(argv): bar = xxx[1] ups = xxx[2:] if not args.import_from \ else import_magnet(args.import_from) - x = bt() + x = Bt() x.change(ups, dir_, foo=foo, bar=bar) else: diff --git a/leetcode_problems.py b/leetcode_problems.py new file mode 100755 index 0000000..1c94063 --- /dev/null +++ b/leetcode_problems.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python +# -*- coding=utf-8 -*- + +import sys +import re +import os +import argparse +import requests +from lxml import html as lxml_html + +try: + import html +except ImportError: + import HTMLParser + html = HTMLParser.HTMLParser() + +try: + import cPickle as pk +except ImportError: + import pickle as pk + +class LeetcodeProblems(object): + def get_problems_info(self): + leetcode_url = 'https://leetcode.com/problemset/algorithms' + res = requests.get(leetcode_url) + if not res.ok: + print('request error') + sys.exit() + cm = res.text + cmt = cm.split('tbody>')[-2] + indexs = re.findall(r' (\d+)', cmt) + problem_urls = ['https://leetcode.com' + url \ + for url in re.findall( + r'', html).group(1) + r'', html).group(1) j = json.loads(text) songids = [i['id'] for i in j] d = modificate_text( @@ -364,12 +365,14 @@ def display_infos(self, i): print '>>', s % (2, 92, 'http://music.163.com/song/%s' \ % i['song_id']) print '>>', s % (2, 97, 'MP3-Quality'), ':', \ - s % (1, 92, q[i['mp3_quality']]) + s % (1, 92, str(q.get(i['mp3_quality']))) print '' def play(self, amount_songs, n=None): for i in self.song_infos: self.display_infos(i) + if not i['durl']: + continue cmd = 'mpv --really-quiet --audio-display no %s' % i['durl'] os.system(cmd) timeout = 1 @@ -401,7 +404,7 @@ def download(self, amount_songs, n=None): continue if not args.undownload: q = {'h': 'High', 'm': 'Middle', 'l': 'Low'} - mp3_quality = q[i['mp3_quality']] + mp3_quality = str(q.get(i['mp3_quality'])) if n == None: print(u'\n ++ 正在下载: #%s/%s# %s\n' \ u' ++ mp3_quality: %s' \ @@ -412,6 +415,9 @@ def download(self, amount_songs, n=None): u' ++ mp3_quality: %s' \ % (n, amount_songs, col, s % (1, 91, mp3_quality))) + if not i['durl']: + continue + file_name_for_wget = file_name.replace('`', '\`') cmd = 'wget -c -nv -U "%s" -O "%s.tmp" %s' \ % (headers['User-Agent'], file_name_for_wget, i['durl']) diff --git a/music.baidu.com.py b/music.baidu.com.py index 0185b46..c2c9016 100755 --- a/music.baidu.com.py +++ b/music.baidu.com.py @@ -85,13 +85,10 @@ def __init__(self, url): self.download = self.play if args.play else self.download - def get_songidlist(self, id_): - html = self.opener.open(self.template_album % id_).read() + def get_songidlist(self, song_id): + html = self.opener.open(self.template_album % song_id).read() songidlist = re.findall(r'/song/(\d+)', html) - api_json = self.opener.open(self.template_api % ','.join(songidlist)).read() - api_json = json.loads(api_json) - infos = api_json['data']['songList'] - return infos + return songidlist def get_cover(self, url): i = 1 @@ -122,53 +119,40 @@ def url_parser(self): elif '/song/' in self.url: self.song_id = re.search(r'/song/(\d+)', self.url).group(1) #print(s % (2, 92, u'\n -- 正在分析歌曲信息 ...')) - self.get_song_infos() + self.get_song_infos(self.song_id) else: print(s % (2, 91, u' 请正确输入baidu网址.')) + self.download() - def get_song_infos(self): - api_json = self.opener.open(self.template_api % self.song_id).read() + def get_song_infos(self, song_id, track_number=''): + api_json = self.opener.open(self.template_api % song_id).read() j = json.loads(api_json) song_info = {} song_info['song_id'] = unicode(j['data']['songList'][0]['songId']) - song_info['track'] = u'' + song_info['track'] = unicode(track_number) song_info['song_url'] = u'http://music.baidu.com/song/' + song_info['song_id'] song_info['song_name'] = modificate_text(j['data']['songList'][0]['songName']).strip() song_info['album_name'] = modificate_text(j['data']['songList'][0]['albumName']).strip() song_info['artist_name'] = modificate_text(j['data']['songList'][0]['artistName']).strip() song_info['album_pic_url'] = j['data']['songList'][0]['songPicRadio'] + song_info['file_name'] = song_info['artist_name'] + ' - ' + song_info['song_name'] + if song_info['track']: + song_info['file_name'] = song_info['track'].zfill(2) + '.' + song_info['file_name'] if args.flac: - song_info['file_name'] = song_info['song_name'] + ' - ' + song_info['artist_name'] + '.flac' + song_info['file_name'] = song_info['file_name'] + '.flac' else: - song_info['file_name'] = song_info['song_name'] + ' - ' + song_info['artist_name'] + '.mp3' + song_info['file_name'] = song_info['file_name'] + '.mp3' song_info['durl'] = j['data']['songList'][0]['songLink'] self.song_infos.append(song_info) - self.download() def get_album_infos(self): songidlist = self.get_songidlist(self.album_id) - z = z_index(songidlist) - ii = 1 + track_number = 1 for i in songidlist: - song_info = {} - song_info['song_id'] = unicode(i['songId']) - song_info['song_url'] = u'http://music.baidu.com/song/' + song_info['song_id'] - song_info['track'] = unicode(ii) - song_info['song_name'] = modificate_text(i['songName']).strip() - song_info['artist_name'] = modificate_text(i['artistName']).strip() - song_info['album_pic_url'] = i['songPicRadio'] - if args.flac: - song_info['file_name'] = song_info['track'].zfill(z) + '.' + song_info['song_name'] + ' - ' + song_info['artist_name'] + '.flac' - else: - song_info['file_name'] = song_info['track'].zfill(z) + '.' + song_info['song_name'] + ' - ' + song_info['artist_name'] + '.mp3' - song_info['album_name'] = modificate_text(i['albumName']).strip() \ - if i['albumName'] else modificate_text(self.song_infos[0]['album_name']) - song_info['durl'] = i['songLink'] - self.song_infos.append(song_info) - ii += 1 - d = modificate_text(self.song_infos[0]['album_name'] + ' - ' + self.song_infos[0]['artist_name']) + self.get_song_infos(i, track_number) + track_number += 1 + d = modificate_text(self.song_infos[0]['artist_name'] + ' - ' + self.song_infos[0]['album_name']) self.dir_ = os.path.join(os.getcwd().decode('utf8'), d) - self.download() def display_infos(self, i): print '\n ----------------' @@ -203,6 +187,7 @@ def download(self): file_name = os.path.join(dir_, t) if os.path.exists(file_name): ## if file exists, no get_durl ii += 1 + print(u'\n 文件已存在~') continue file_name_for_wget = file_name.replace('`', '\`') if 'zhangmenshiting.baidu.com' in i['durl'] or \ diff --git a/pan.baidu.com.py b/pan.baidu.com.py index b59dea1..2617123 100755 --- a/pan.baidu.com.py +++ b/pan.baidu.com.py @@ -3,6 +3,8 @@ import os import sys +import hashlib +import functools import requests requests.packages.urllib3.disable_warnings() # disable urllib3's warnings https://urllib3.readthedocs.org/en/latest/security.html#insecurerequestwarning from requests_toolbelt import MultipartEncoder @@ -68,24 +70,24 @@ ############################################################ # file extensions -mediatype = { +mediatype = [ ".wma", ".wav", ".mp3", ".aac", ".ra", ".ram", ".mp2", ".ogg", \ ".aif", ".mpega", ".amr", ".mid", ".midi", ".m4a", ".m4v", ".wmv", \ ".rmvb", ".mpeg4", ".mpeg2", ".flv", ".avi", ".3gp", ".mpga", ".qt", \ ".rm", ".wmz", ".wmd", ".wvx", ".wmx", ".wm", ".swf", ".mpg", ".mp4", \ ".mkv", ".mpeg", ".mov", ".mdf", ".iso", ".asf", ".vob" -} -imagetype = { +] +imagetype = [ ".jpg", ".jpeg", ".gif", ".bmp", ".png", ".jpe", ".cur", ".svg", \ ".svgz", ".tif", ".tiff", ".ico" -} -doctype = { +] +doctype = [ ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx", ".vsd", ".txt", ".pdf", \ ".ods", ".ots", ".odt", ".rtf", ".dot", ".dotx", ".odm", ".pps", ".pot", \ ".xlt", ".xltx", ".csv", ".ppsx", ".potx", ".epub", ".apk", ".exe", \ ".msi", ".ipa", ".torrent", ".mobi" -} -archivetype = { +] +archivetype = [ ".7z", ".a", ".ace", ".afa", ".alz", ".android", ".apk", ".ar", \ ".arc", ".arj", ".b1", ".b1", ".ba", ".bh", ".bz2", ".cab", ".cab", \ ".cfs", ".chm", ".cpio", ".cpt", ".cqm", ".dar", ".dd", ".dgc", ".dmg", \ @@ -99,7 +101,349 @@ ".uc0", ".uc2", ".uca", ".ucn", ".ue2", ".uha", ".ur2", ".war", ".web", \ ".wim", ".x", ".xar", ".xp3", ".xz", ".yz1", ".z", ".zip", ".zipx", \ ".zoo", ".zpaq", ".zz" -} +] + +PHONE_MODEL_DATABASE = [ + "1501_M02", # 360 F4 + "1503-M02", # 360 N4 + "1505-A01", # 360 N4S + "303SH", # 夏普 Aquos Crystal Xx Mini 303SH + "304SH", # 夏普 Aquos Crystal Xx SoftBank + "305SH", # 夏普 Aquos Crystal Y + "306SH", # 夏普 Aquos Crystal 306SH + "360 Q5 Plus", # 360 Q5 Plus + "360 Q5", # 360 Q5 + "402SH", # 夏普 Aquos Crystal X + "502SH", # 夏普 Aquos Crystal Xx2 + "6607", # OPPO U3 + "A1001", # 一加手机1 + "ASUS_A001", # 华硕 ZenFone 3 Ultra + "ASUS_A001", # 华硕 ZenFone 3 Ultra + "ASUS_Z00ADB", # 华硕 ZenFone 2 + "ASUS_Z00UDB", # 华硕 Zenfone Selfie + "ASUS_Z00XSB", # 华硕 ZenFone Zoom + "ASUS_Z012DE", # 华硕 ZenFone 3 + "ASUS_Z012DE", # 华硕 ZenFone 3 + "ASUS_Z016D", # 华硕 ZenFone 3 尊爵 + "ATH-TL00H", # 华为 荣耀 7i + "Aster T", # Vertu Aster T + "BLN-AL10", # 华为 荣耀 畅玩6X + "BND-AL10", # 荣耀7X + "BTV-W09", # 华为 M3 + "CAM-UL00", # 华为 荣耀 畅玩5A + "Constellation V", # Vertu Constellation V + "D6683", # 索尼 Xperia Z3 Dual TD + "DIG-AL00", # 华为 畅享 6S + "E2312", # 索尼 Xperia M4 Aqua + "E2363 ", # 索尼 Xperia M4 Aqua Dual + "E5363", # 索尼 Xperia C4 + "E5563", # 索尼 Xperia C5 + "E5663", # 索尼 Xperia M5 + "E5823", # 索尼 Xperia Z5 Compact + "E6533", # 索尼 Xperia Z3+ + "E6683", # 索尼 Xperia Z5 + "E6883", # 索尼 Xperia Z5 Premium + "EBEN M2", # 8848 M2 + "EDI-AL10", # 华为 荣耀 Note 8 + "EVA-AL00", # 华为 P9 + "F100A", # 金立 F100 + "F103B", # 金立 F103B + "F3116", # 索尼 Xperia XA + "F3216", # 索尼 Xperia XA Ultra + "F5121 / F5122", # 索尼 Xperia X + "F5321", # 索尼 Xperia X Compact + "F8132", # 索尼 Xperia X Performance + "F8332", # 索尼 Xperia XZ + "FRD-AL00", # 华为 荣耀 8 + "FS8001", # 夏普 C1 + "FS8002", # 夏普 A1 + "G0111", # 格力手机 1 + "G0215", # 格力手机 2 + "G8142", # 索尼Xperia XZ Premium G8142 + "G8342", # 索尼Xperia XZ1 + "GIONEE S9", # 金立 S9 + "GN5001S", # 金立 金钢 + "GN5003", # 金立 大金钢 + "GN8002S", # 金立 M6 Plus + "GN8003", # 金立 M6 + "GN9011", # 金立 S8 + "GN9012", # 金立 S6 Pro + "GRA-A0", # Coolpad Cool Play 6C + "H60-L11", # 华为 荣耀 6 + "HN3-U01", # 华为 荣耀 3 + "HTC D10w", # HTC Desire 10 Pro + "HTC E9pw", # HTC One E9+ + "HTC M10u", # HTC 10 + "HTC M8St", # HTC One M8 + "HTC M9PT", # HTC One M9+ + "HTC M9e", # HTC One M9 + "HTC One A9", # HTC One A9 + "HTC U-1w", # HTC U Ultra + "HTC X9u", # HTC One X9 + "HTC_M10h", # HTC 10 国际版 + "HUAWEI CAZ-AL00", # 华为 Nova + "HUAWEI CRR-UL00", # 华为 Mate S + "HUAWEI GRA-UL10", # 华为 P8 + "HUAWEI MLA-AL10", # 华为 麦芒 5 + "HUAWEI MT7-AL00", # 华为 mate 7 + "HUAWEI MT7-TL00", # 华为 Mate 7 + "HUAWEI NXT-AL10", # 华为 Mate 8 + "HUAWEI P7-L00", # 华为 P7 + "HUAWEI RIO-AL00", # 华为 麦芒 4 + "HUAWEI TAG-AL00", # 华为 畅享 5S + "HUAWEI VNS-AL00", # 华为 G9 + "IUNI N1", # 艾优尼 N1 + "IUNI i1", # 艾优尼 i1 + "KFAPWI", # Amazon Kindle Fire HDX 8.9 + "KFSOWI", # Amazon Kindle Fire HDX 7 + "KFTHWI", # Amazon Kindle Fire HD + "KIW-TL00H", # 华为 荣耀 畅玩5X + "KNT-AL10", # 华为 荣耀 V8 + "L55t", # 索尼 Xperia Z3 + "L55u", # 索尼 Xperia Z3 + "LEX626", # 乐视 乐S3 + "LEX720", # 乐视 乐Pro3 + "LG-D858", # LG G3 + "LG-H818", # LG G4 + "LG-H848", # LG G5 SE + "LG-H868", # LG G5 + "LG-H968", # LG V10 + "LON-AL00", # 华为 Mate 9 Pro + "LON-AL00-PD", # 华为 Mate 9 Porsche Design + "LT18i", # Sony Ericsson Xperia Arc S + "LT22i", # Sony Ericsson Xperia P + "LT26i", # Sony Ericsson Xperia S + "LT26ii", # Sony Ericsson Xperia SL + "LT26w", # Sony Ericsson Xperia Acro S + "Le X520", # 乐视 乐2 + "Le X620", # 乐视 乐2Pro + "Le X820", # 乐视 乐Max2 + "Lenovo A3580", # 联想 黄金斗士 A8 畅玩 + "Lenovo A7600-m", # 联想 黄金斗士 S8 + "Lenovo A938t", # 联想 黄金斗士 Note8 + "Lenovo K10e70", # 联想 乐檬K10 + "Lenovo K30-T", # 联想 乐檬 K3 + "Lenovo K32C36", # 联想 乐檬3 + "Lenovo K50-t3s", # 联想 乐檬 K3 Note + "Lenovo K52-T38", # 联想 乐檬 K5 Note + "Lenovo K52e78", # Lenovo K5 Note + "Lenovo P2c72", # 联想 P2 + "Lenovo X3c50", # 联想 乐檬 X3 + "Lenovo Z90-3", # 联想 VIBE Shot大拍 + "M040", # 魅族 MX 2 + "M1 E", # 魅蓝 E + "M2-801w", # 华为 M2 + "M2017", # 金立 M2017 + "M3", # EBEN M3 + "M355", # 魅族 MX 3 + "MHA-AL00", # 华为 Mate 9 + "MI 4LTE", # 小米手机4 + "MI 4S", # 小米手机4S + "MI 5", # 小米手机5 + "MI 5s Plus", # 小米手机5s Plus + "MI 5s", # 小米手机5s + "MI MAX", # 小米Max + "MI Note Pro", # 小米Note顶配版 + "MI PAD 2", # 小米平板 2 + "MIX", # 小米MIX + "MLA-UL00", # 华为 G9 Plus + "MP1503", # 美图 M6 + "MP1512", # 美图 M6s + "MT27i", # Sony Ericsson Xperia Sola + "MX4 Pro", # 魅族 MX 4 Pro + "MX4", # 魅族 MX 4 + "MX5", # 魅族 MX 5 + "MX6", # 魅族 MX 6 + "Meitu V4s", # 美图 V4s + "Meizu M3 Max", # 魅蓝max + "Meizu U20", # 魅蓝U20 + "Mi 5", + "Mi 6", + "Mi A1", # MI androidone + "Mi Note 2", # 小米Note2 + "MiTV2S-48", # 小米电视2s + "Moto G (4)", # 摩托罗拉 G4 Plus + "N1", # Nokia N1 + "NCE-AL00", # 华为 畅享 6 + "NTS-AL00", # 华为 荣耀 Magic + "NWI-AL10", # nova2s + "NX508J", # 努比亚 Z9 + "NX511J", # 努比亚 小牛4 Z9 Mini + "NX512J", # 努比亚 大牛 Z9 Max + "NX513J", # 努比亚 My 布拉格 + "NX513J", # 努比亚 布拉格S + "NX523J", # 努比亚 Z11 Max + "NX529J", # 努比亚 小牛5 Z11 Mini + "NX531J", # 努比亚 Z11 + "NX549J", # 努比亚 小牛6 Z11 MiniS + "NX563J", # 努比亚Z17 + "Nexus 4", + "Nexus 5X", + "Nexus 6", + "Nexus 6P", + "Nexus 7", + "Nexus 9", + "Nokia_X", # Nokia X + "Nokia_XL_4G", # Nokia XL + "ONE A2001", # 一加手机2 + "ONE E1001", # 一加手机X + "ONEPLUS A5010", # 一加5T + "OPPO A53", # OPPO A53 + "OPPO A59M", # OPPO A59 + "OPPO A59s", # OPPO A59s + "OPPO R11", + "OPPO R7", # OPPO R7 + "OPPO R7Plus", # OPPO R7Plus + "OPPO R7S", # OPPO R7S + "OPPO R7sPlus", # OPPO R7sPlus + "OPPO R9 Plustm A", # OPPO R9Plus + "OPPO R9s Plus", # OPPO R9s Plus + "OPPO R9s", + "OPPO R9s", # OPPO R9s + "OPPO R9tm", # OPPO R9 + "PE-TL10", # 华为 荣耀 6 Plus + "PLK-TL01H", # 华为 荣耀 7 + "Pro 5", # 魅族 Pro 5 + "Pro 6", # 魅族 Pro 6 + "Pro 6s", # 魅族 Pro 6s + "RM-1010", # Nokia Lumia 638 + "RM-1018", # Nokia Lumia 530 + "RM-1087", # Nokia Lumia 930 + "RM-1090", # Nokia Lumia 535 + "RM-867", # Nokia Lumia 920 + "RM-875", # Nokia Lumia 1020 + "RM-887", # Nokia Lumia 720 + "RM-892", # Nokia Lumia 925 + "RM-927", # Nokia Lumia 929 + "RM-937", # Nokia Lumia 1520 + "RM-975", # Nokia Lumia 635 + "RM-977", # Nokia Lumia 630 + "RM-984", # Nokia Lumia 830 + "RM-996", # Nokia Lumia 1320 + "Redmi 3S", # 红米3s + "Redmi 4", # 小米 红米4 + "Redmi 4A", # 小米 红米4A + "Redmi Note 2", # 小米 红米Note2 + "Redmi Note 3", # 小米 红米Note3 + "Redmi Note 4", # 小米 红米Note4 + "Redmi Pro", # 小米 红米Pro + "S3", # 佳域S3 + "SCL-TL00H", # 华为 荣耀 4A + "SD4930UR", # Amazon Fire Phone + "SH-03G", # 夏普 Aquos Zeta SH-03G + "SH-04F", # 夏普 Aquos Zeta SH-04F + "SHV31", # 夏普 Aquos Serie Mini SHV31 + "SM-A5100", # Samsung Galaxy A5 + "SM-A7100", # Samsung Galaxy A7 + "SM-A8000", # Samsung Galaxy A8 + "SM-A9000", # Samsung Galaxy A9 + "SM-A9100", # Samsung Galaxy A9 高配版 + "SM-C5000", # Samsung Galaxy C5 + "SM-C5010", # Samsung Galaxy C5 Pro + "SM-C7000", # Samsung Galaxy C7 + "SM-C7010", # Samsung Galaxy C7 Pro + "SM-C9000", # Samsung Galaxy C9 Pro + "SM-G1600", # Samsung Galaxy Folder + "SM-G5500", # Samsung Galaxy On5 + "SM-G6000", # Samsung Galaxy On7 + "SM-G7100", # Samsung Galaxy On7(2016) + "SM-G7200", # Samsung Galasy Grand Max + "SM-G9198", # Samsung 领世旗舰III + "SM-G9208", # Samsung Galaxy S6 + "SM-G9250", # Samsung Galasy S7 Edge + "SM-G9280", # Samsung Galaxy S6 Edge+ + "SM-G9300", # Samsung Galaxy S7 + "SM-G9350", # Samsung Galaxy S7 Edge + "SM-G9500", # Samsung Galaxy S8 + "SM-G9550", # Samsung Galaxy S8+ + "SM-G9600", # Samsung Galaxy S9 + "SM-G960F", # Galaxy S9 Dual SIM + "SM-G9650", # Samsung Galaxy S9+ + "SM-G965F", # Galaxy S9+ Dual SIM + "SM-J3109", # Samsung Galaxy J3 + "SM-J3110", # Samsung Galaxy J3 Pro + "SM-J327A", # Samsung Galaxy J3 Emerge + "SM-J5008", # Samsung Galaxy J5 + "SM-J7008", # Samsung Galaxy J7 + "SM-N9108V", # Samsung Galasy Note4 + "SM-N9200", # Samsung Galaxy Note5 + "SM-N9300", # Samsung Galaxy Note 7 + "SM-N935S", # Samsung Galaxy Note Fan Edition + "SM-N9500", # Samsung Galasy Note8 + "SM-W2015", # Samsung W2015 + "SM-W2016", # Samsung W2016 + "SM-W2017", # Samsung W2017 + "SM705", # 锤子 T1 + "SM801", # 锤子 T2 + "SM901", # 锤子 M1 + "SM919", # 锤子 M1L + "ST18i", # Sony Ericsson Xperia Ray + "ST25i", # Sony Ericsson Xperia U + "STV100-1", # 黑莓Priv + "Signature Touch", # Vertu Signature Touch + "TA-1000", # Nokia 6 + "TA-1000", # HMD Nokia 6 + "TA-1041", # Nokia 7 + "VERTU Ti", # Vertu Ti + "VIE-AL10", # 华为 P9 Plus + "VIVO X20", + "VIVO X20A", + "W909", # 金立 天鉴 W909 + "X500", # 乐视 乐1S + "X608", # 乐视 乐1 + "X800", # 乐视 乐1Pro + "X900", # 乐视 乐Max + "XT1085", # 摩托罗拉 X + "XT1570", # 摩托罗拉 X Style + "XT1581", # 摩托罗拉 X 极 + "XT1585", # 摩托罗拉 Droid Turbo 2 + "XT1635", # 摩托罗拉 Z Play + "XT1635-02", # 摩托罗拉 Z Play + "XT1650", # 摩托罗拉 Z + "XT1650-05", # 摩托罗拉 Z + "XT1706", # 摩托罗拉 E3 POWER + "YD201", # YotaPhone2 + "YD206", # YotaPhone2 + "YQ60", # 锤子 坚果 + "ZTE A2015", # 中兴 AXON 天机 + "ZTE A2017", # 中兴 AXON 天机 7 + "ZTE B2015", # 中兴 AXON 天机 MINI + "ZTE BV0720", # 中兴 Blade A2 + "ZTE BV0730", # 中兴 Blade A2 Plus + "ZTE C2016", # 中兴 AXON 天机 MAX + "ZTE C2017", # 中兴 AXON 天机 7 MAX + "ZTE G720C", # 中兴 星星2号 + "ZUK Z2121", # ZUK Z2 Pro + "ZUK Z2131", # ZUK Z2 + "ZUK Z2151", # ZUK Edge + "ZUK Z2155", # ZUK Edge L + "m030", # 魅族mx + "m1 metal", # 魅蓝metal + "m1 note", # 魅蓝 Note + "m1", # 魅蓝 + "m2 note", # 魅蓝 Note 2 + "m2", # 魅蓝 2 + "m3 note", # 魅蓝 Note 3 + "m3", # 魅蓝 3 + "m3s", # 魅蓝 3S + "m9", # 魅族m9 + "marlin", # Google Pixel XL + "sailfish", # Google Pixel + "vivo V3Max", # vivo V3Max + "vivo X6D", # vivo X6 + "vivo X6PlusD", # vivo X6Plus + "vivo X6S", # vivo X6S + "vivo X6SPlus", # vivo X6SPlus + "vivo X7", # vivo X7 + "vivo X7Plus", # vivo X7Plus + "vivo X9", # vivo X9 + "vivo X9Plus", # vivo X9Plus + "vivo Xplay5A 金", # vivo Xplay5 + "vivo Xplay6", # vivo Xplay6 + "vivo Y66", # vivo Y66 + "vivo Y67", # vivo Y67 + "z1221", # ZUK Z1 +] s = '\x1b[%s;%sm%s\x1b[0m' # terminual color template @@ -108,19 +452,53 @@ save_share_path = os.path.join(os.path.expanduser('~'), '.bp.ss.pickle') headers = { - "Accept":"text/html,application/xhtml+xml,application/xml; " \ - "q=0.9,image/webp,*/*;q=0.8", - "Accept-Encoding":"text/html", + "Accept": "application/json, text/javascript, text/html, */*; q=0.01", + "Accept-Encoding":"gzip, deflate, sdch", "Accept-Language":"en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4,zh-TW;q=0.2", - "Content-Type":"application/x-www-form-urlencoded", - "Referer":"http://www.baidu.com/", - "User-Agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " \ - "(KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36" + "Referer":"http://pan.baidu.com/disk/home", + "X-Requested-With": "XMLHttpRequest", + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36", + "Connection": "keep-alive", } +NETDISK_UA = 'netdisk;8.12.9;;android-android;7.0;JSbridge3.0.0' + ss = requests.session() ss.headers.update(headers) +def to_md5(buff): + assert isinstance(buff, (str, unicode)) + if isinstance(buff, unicode): + buff = buff.encode('utf-8') + return hashlib.md5(buff).hexdigest() + + +def to_sha1(buff): + assert isinstance(buff, (str, unicode)) + if isinstance(buff, unicode): + buff = buff.encode('utf-8') + return hashlib.sha1(buff).hexdigest() + +# 根据key计算出imei +def sum_IMEI(key): + hs = 53202347234687234 + for k in key: + hs += (hs << 5) + ord(k) + hs %= int(1e15) + if hs < int(1e14): + hs += int(1e14) + return str(int(hs)) + +# 根据key, 从 PHONE_MODEL_DATABASE 中取出手机型号 +def get_phone_model(key): + if len(PHONE_MODEL_DATABASE) <= 0: + return "S3" + hs = 2134 + for k in key: + hs += (hs << 4) + ord(k) + hs %= len(PHONE_MODEL_DATABASE) + return PHONE_MODEL_DATABASE[hs] + def import_shadowsocks(): try: global encrypt @@ -170,7 +548,7 @@ def fast_pcs_server(j): return j do = lambda dlink: \ - re.sub(r'://[^/]+?/', '://www.baidupcs.com/', dlink) + re.sub(r'://[^/]+?/', '://c.pcs.baidu.com/', dlink) #re.sub(r'://[^/]+?/', '://c.pcs.baidu.com/', dlink) if isinstance(j, dict) and j.get('info') and len(j['info'])> 0: @@ -184,12 +562,12 @@ def fast_pcs_server(j): def is_wenxintishi(dlink): while True: try: - r = ss.get(dlink, stream=True) + res = ss.head(dlink) break except requests.exceptions.ConnectionError: time.sleep(2) - url = r.url - if 'wenxintishi' in url: + location = res.headers.get('location', '') + if 'wenxintishi' in location: return True else: return False @@ -211,17 +589,33 @@ def print_process_bar(point, total, slice_size, speed = sizeof_fmt(slice_size / (now - start_time)) + '/s' t = int(nowpoint*length) - msg = '\r' + ' '.join([pre, '[%s%s]' % ('='*t, ' '*(length - t)), \ + msg = '\r' + ' '.join([pre, '|%s%s|' % ('='*t, ' '*(length - t)), \ str(percent) + '%', speed, msg, suf]) sys.stdout.write(msg) sys.stdout.flush() return now + +def is_cookie(cookie): + return 'BDUSS=' in cookie and 'PANPSC=' in cookie and len(cookie)> 150 + + +def parse_cookies(cookie): + cookies = {} + for c in cookie.split('; '): + k, v = c.split('=', 1) + cookies[k] = v + return cookies + + class panbaiducom_HOME(object): def __init__(self): self._download_do = self._play_do if args.play else self._download_do self.ondup = 'overwrite' self.accounts = self._check_cookie_file() + self.dsign = None + self.timestamp = None + self.user_id = None self.highlights = [] if any([args.tails, args.heads, args.includes]): @@ -238,6 +632,21 @@ def __init__(self): if 'ec' in args.type_ or 'dc' in args.type_ or args.comd == 'dc': import_shadowsocks() + def _request(self, method, url, action, **kwargs): + i = 0 + while i < 3: + i += 1 + response = ss.request(method, url, **kwargs) + if not (response.ok is True and response.status_code == 200): + continue + else: + return response + + self.save_cookies() + + print s % (1, 91, ' ! [{}] Server error'.format(action)) + sys.exit() + @staticmethod def _check_cookie_file(): def correct_do(): @@ -277,14 +686,23 @@ def init(self): user = u[0] self.user = user self.cwd = j[user]['cwd'] if j[user].get('cwd') else '/' + self.user_id = j[user].get('user_id') + self.bduss = j[user]['cookies']['BDUSS'] ss.cookies.update(j[user]['cookies']) else: print s % (1, 91, ' !! no account is online, please login or userchange') sys.exit(1) if not self.check_login(): - print s % (1, 91, ' !! cookie is invalid, please login\n'), u[0] + print s % (1, 91, ' !! cookie is invalid, please login.'), u[0] + del j[u[0]] + with open(cookie_file, 'w') as g: + pk.dump(j, g) sys.exit(1) + + if not self.user_id: + info = self._user_info(self.bduss) + self.user_id = info['user']['id'] else: print s % (1, 97, ' no account, please login') sys.exit(1) @@ -293,28 +711,34 @@ def init(self): def save_img(url, ext): path = os.path.join(os.path.expanduser('~'), 'vcode.%s' % ext) with open(path, 'w') as g: - data = urllib.urlopen(url).read() + res = ss.get(url) + data = res.content g.write(data) print " ++ 验证码已保存至", s % (1, 97, path) input_code = raw_input(s % (2, 92, " 输入验证码: ")) return input_code def check_login(self): - #print s % (1, 97, '\n -- check_login') - url = 'http://pan.baidu.com/api/quota' - j = ss.get(url).json() - if j['errno'] != 0: + # html_string = self._request('GET', 'http://pan.baidu.com/', 'check_login').content + info = self._meta(['/']) + + if info and info['errno'] == 0: + return True + else: print s % (1, 91, ' -- check_login fail\n') return False - else: #print s % (1, 92, ' -- check_login success\n') #self.get_dsign() #self.save_cookies() - return True def login(self, username, password): print s % (1, 97, '\n -- login') + if is_cookie(password): + cookies = parse_cookies(password) + ss.cookies.update(cookies) + return + # error_message: at _check_account_exception from # https://github.com/ly0/baidupcsapi/blob/master/baidupcsapi/api.py login_error_msg = { @@ -335,70 +759,86 @@ def login(self, username, password): '401007': '手机号关联了其他帐号,请选择登录' } + self._request('GET', 'http://www.baidu.com', 'login') + # Get token - token = self._get_bdstoken() + # token = self._get_bdstoken() + resp = self._request('GET', 'https://passport.baidu.com/v2/api/?getapi&tpl=netdisk' + '&apiver=v3&tt={}&class=login&logintype=basicLogin'.format(int(time.time())), + 'login') + + _json = json.loads(resp.content.replace('\'', '"')) + if _json['errInfo']['no'] != "0": + print s % (1, 91, ' ! Can\'t get token') + sys.exit(1) + + token = _json['data']['token'] + code_string = _json['data']['codeString'] # get publickey - url = 'https://passport.baidu.com/v2/getpublickey?token=%s' % token - r = ss.get(url) - j = json.loads(r.content.replace('\'', '"')) - pubkey = j['pubkey'] - key = rsa.PublicKey.load_pkcs1_openssl_pem(pubkey) - password_encoded = base64.b64encode(rsa.encrypt(password, key)) - rsakey = j['key'] + # url = ('https://passport.baidu.com/v2/getpublickey?&token={}' + # '&tpl=netdisk&apiver=v3&tt={}').format(token, int(time.time())) + # r = ss.get(url) + # j = json.loads(r.content.replace('\'', '"')) + # pubkey = j['pubkey'] + # key = rsa.PublicKey.load_pkcs1_openssl_pem(pubkey) + # password_encoded = base64.b64encode(rsa.encrypt(password, key)) + # rsakey = j['key'] # Construct post body - data = { - "staticpage": "http://www.baidu.com/cache/user/html/v3Jump.html", - "charset": "UTF-8", - "token": token, - "tpl": "pp", - "subpro": "", - "apiver": "v3", - "tt": int(time.time()), - "codestring": "", - "safeflg": "0", - "isPhone": "", - "quick_user": "0", - "logintype": "dialogLogin", - "logLoginType": "pc_loginDialog", - "idc": "", - "loginmerge": "true", - "splogin": "rate", - "username": username, - "password": password_encoded, - "verifycode": "", - "mem_pass": "on", - "rsakey": str(rsakey), - "crypttype": "12", - "ppui_logintime": "40228", - "callback": "parent.bd__pcbs__uvwly2", - } - + verifycode = '' while True: + data = { + "staticpage": "http://pan.baidu.com/res/static/thirdparty/pass_v3_jump.html", + "charset": "utf-8", + "token": token, + "tpl": "netdisk", + "subpro": "", + "apiver": "v3", + "tt": int(time.time()), + "codestring": code_string, + "safeflg": "0", + "u": "http://pan.baidu.com/", + "isPhone": "", + "quick_user": "0", + "logintype": "basicLogin", + "logLoginType": "pc_loginBasic", + "idc": "", + "loginmerge": "true", + "username": username, + "password": password, + "verifycode": verifycode, + "mem_pass": "on", + "rsakey": "", + "crypttype": "", + "ppui_logintime": "2602", + "callback": "parent.bd__pcbs__ahhlgk", + } + # Post! # XXX : do not handle errors url = 'https://passport.baidu.com/v2/api/?login' r = ss.post(url, data=data) # Callback for verify code if we need - #codestring = r.content[r.content.index('(')+1:r.content.index(')')] + #code_string = r.content[r.content.index('(')+1:r.content.index(')')] errno = re.search(r'err_no=(\d+)', r.content).group(1) - if errno == '0': + if ss.cookies.get('BDUSS'): + # ss.get("http://pan.baidu.com/disk/home") break elif errno in ('257', '3', '6'): print s % (1, 91, ' ! Error %s:' % errno), \ login_error_msg[errno] t = re.search('codeString=(.+?)&', r.content) - codestring = t.group(1) if t else "" - vcurl = 'https://passport.baidu.com/cgi-bin/genimage?'+codestring - verifycode = self.save_img(vcurl, 'gif') if codestring != "" else "" - data['codestring'] = codestring + code_string = t.group(1) if t else "" + vcurl = 'https://passport.baidu.com/cgi-bin/genimage?' + code_string + verifycode = self.save_img(vcurl, 'jpg') if code_string != "" else "" + data['codestring'] = code_string data['verifycode'] = verifycode #self.save_cookies() else: print s % (1, 91, ' ! Error %s:' % errno), \ - login_error_msg[errno] + login_error_msg.get(errno, "unknow, please feedback to author") sys.exit(1) def save_cookies(self, username=None, on=0, tocwd=False): @@ -408,6 +848,7 @@ def save_cookies(self, username=None, on=0, tocwd=False): accounts[username]['cookies'] = \ accounts[username].get('cookies', ss.cookies.get_dict()) accounts[username]['on'] = on + accounts[username]['user_id'] = self.user_id quota = self._get_quota() capacity = '%s/%s' % (sizeof_fmt(quota['used']), sizeof_fmt(quota['total'])) accounts[username]['capacity'] = capacity @@ -427,8 +868,58 @@ def _get_bdstoken(self): if hasattr(self, 'bdstoken'): return self.bdstoken - self.bdstoken = md5.new(str(time.time())).hexdigest() - return self.bdstoken + resp = self._request('GET', 'http://pan.baidu.com/disk/home', + '_get_bdstoken') + + html_string = resp.content + + mod = re.search(r'bdstoken[\'":\s]+([0-9a-f]{32})', html_string) + if mod: + self.bdstoken = mod.group(1) + return self.bdstoken + else: + print s % (1, 91, ' ! Can\'t get bdstoken') + sys.exit(1) + + # self.bdstoken = md5.new(str(time.time())).hexdigest() + + def _user_info(self, bduss): + timestamp = str(int(time.time())) + model = get_phone_model(bduss) + phoneIMEIStr = sum_IMEI(bduss) + + data = { + 'bdusstoken': bduss + '|null', + 'channel_id': '', + 'channel_uid': '', + 'stErrorNums': '0', + 'subapp_type': 'mini', + 'timestamp': timestamp + '922', + } + data['_client_type'] = '2' + data['_client_version'] = '7.0.0.0' + data['_phone_imei'] = phoneIMEIStr + data['from'] = 'mini_ad_wandoujia' + data['model'] = model + data['cuid'] = to_md5( + bduss + '_' + data['_client_version'] + '_' + data['_phone_imei'] + '_' + data['from'] + ).upper() + '|' + phoneIMEIStr[::-1] + data['sign'] = to_md5( + ''.join([k + '=' + data[k] for k in sorted(data.keys())]) + 'tiebaclient!!!' + ).upper() + + headers = { + 'Content-Type': 'application/x-www-form-urlencoded', + 'Cookie': 'ka=open', + 'net': '1', + 'User-Agent': 'bdtb for Android 6.9.2.1', + 'client_logid': timestamp + '416', + 'Connection': 'Keep-Alive', + } + + resp = requests.post('http://tieba.baidu.com/c/s/login', headers=headers, data=data) + info = resp.json() + return info #def _sift(self, fileslist, name=None, size=None, time=None, head=None, tail=None, include=None, exclude=None): def _sift(self, fileslist, **arguments): @@ -566,8 +1057,10 @@ def _get_path(self, url): def _get_quota(self): url = 'http://pan.baidu.com/api/quota' - r = ss.get(url) - j = r.json() + + resp = self._request('GET', url, '_get_quota') + + j = resp.json() if j['errno'] != 0: print s % (1, 92, ' !! Error at _get_quota') sys.exit(1) @@ -591,36 +1084,41 @@ def _get_file_list(self, order, desc, dir_, num, all=True): "desc": 1, ## reversely "order": order, ## sort by name, or size, time "_": int(time.time()*1000), - #"bdstoken": self._get_bdstoken(), + # "bdstoken": self._get_bdstoken(), } if not desc: del p['desc'] url = 'http://pan.baidu.com/api/list' - infos = [] + path_list = [] while True: - r = ss.get(url, params=p, headers=theaders) + r = ss.get(url, params=p) j = r.json() if j['errno'] != 0: print s % (1, 91, ' error: _get_file_list'), '--', j sys.exit(1) else: - infos += j['list'] + path_ls = j['list'] + path_list += path_ls if not all: return j - if len(infos) == num: + if len(path_ls) == num: p['page'] += 1 else: - j['list'] = infos + j['list'] = path_list return j def _get_dsign(self): + # if self.dsign is not None: + # return None + url = 'http://pan.baidu.com/disk/home' - r = ss.get(url) + r = self._request('GET', url, '_get_dsign') html = r.content - sign1 = re.search(r'sign1 = \'(.+?)\';', html).group(1) - sign3 = re.search(r'sign3 = \'(.+?)\';', html).group(1) - timestamp = re.search(r'timestamp = \'(.+?)\';', html).group(1) + + sign1 = re.search(r'"sign1":"(.+?)"', html).group(1) + sign3 = re.search(r'"sign3":"(.+?)"', html).group(1) + timestamp = re.search(r'"timestamp":(\d+)', html).group(1) # following javascript code from http://pan.baidu.com/disk/home #yunData.sign2 = function s(j, r) { @@ -683,36 +1181,76 @@ def sign2(j, r): self.dsign = sign2(sign3, sign1) self.timestamp = timestamp - def _get_dlink(self, i): - if not hasattr(self, 'dsign'): - self._get_dsign() + def _get_dlink(self, path): + bduss = self.bduss + uid = self.user_id + + timestamp = str(int(time.time() * 1000)) + devuid = '0|' + to_md5(bduss).upper() + + enc = to_sha1(bduss) + rand = to_sha1( + enc + str(uid) + 'ebrcUYiuxaZv2XGu7KIYKxUrqfnOfpDF' + str(timestamp) + devuid + ) + + url = ( + 'https://pcs.baidu.com/rest/2.0/pcs/file?app_id=' + args.appid \ + + '&method=locatedownload&ver=2' \ + + '&path=' + urllib.quote(path) + '&time=' \ + + timestamp + '&rand=' + rand + '&devuid=' + devuid + ) + + headers = dict(ss.headers) + headers['User-Agent'] = 'netdisk;2.2.51.6;netdisk;10.0.63;PC;android-android' + resp = self._request('GET', url, '_get_dlink', headers=headers) + info = resp.json() + if info.get('urls'): + dlink = info['urls'][0]['url'].encode('utf8') + return dlink + else: + print s % (1, 91, ' !! Error at _get_dlink, can\'t get dlink') + sys.exit(1) + + def _get_dlink4(self, path): + # use app_id: 778750 + # reference: [3个方法解决百度网盘限速](https://www.runningcheese.com/baiduyun) + dlink = ('http://c.pcs.baidu.com/rest/2.0/pcs/file?method=download' + '&app_id={}&path={}&ver=2.0&clienttype=1').format( + args.appid, urllib.quote(path)) + dlink = fast_pcs_server(dlink) + return dlink + + def _get_dlink3(self, fs_id): while True: + dsign, timestamp = self._get_dsign() + params = { "channel": "chunlei", "clienttype": 0, + "app_id": "250528", "web": 1, - #"bdstoken": self._get_bdstoken() - } - - data = { + # "bdstoken": self._get_bdstoken(), "sign": self.dsign, "timestamp": self.timestamp, - "fidlist": "[%s]" % i['fs_id'], - "type": "dlink" + "fidlist": '[{}]'.format(fs_id), + "type": "dlink", } url = 'http://pan.baidu.com/api/download' - r = ss.post(url, params=params, data=data) + r = ss.get(url, params=params) j = r.json() + print(j) if j['errno'] == 0: dlink = j['dlink'][0]['dlink'].encode('utf8') - dlink = re.sub(r'prisign=.+?(&|$)', r'prisign=unknow1円', dlink) - dlink = dlink.replace('chkbd=0', 'chkbd=1') - dlink = dlink.replace('chkv=0', 'chkv=1') + # dlink = re.sub(r'prisign=.+?(&|$)', r'prisign=unknow1円', dlink) + # dlink = dlink.replace('chkbd=0', 'chkbd=1') + # dlink = dlink.replace('chkv=0', 'chkv=1') + dlink = fast_pcs_server(dlink) return dlink else: - self._get_dsign() + print s % (1, 91, ' !! Error at _get_dlink, can\'t get dlink') + continue def _get_dlink2(self, i): j = self._meta([i['path'].encode('utf8')], d @@ -747,7 +1285,7 @@ def download(self, paths): base_dir = '' if os.path.split(path)[0] == '/' \ else os.path.split(path)[0] - meta = self._meta([path], d + meta = self._meta([path], d if meta: if meta['info'][0]['isdir']: dir_loop = [path] @@ -787,10 +1325,9 @@ def download(self, paths): t = i['path'].encode('utf8') t = t.replace(base_dir, '') t = t[1:] if t[0] == '/' else t - t = os.path.join(os.getcwd(), t) + t = os.path.join(args.outdir, t) - if not i.has_key('dlink'): - i['dlink'] = self._get_dlink2(i) + i['dlink'] = self._get_dlink(i['path'].encode('utf8')) infos = { 'file': t, @@ -811,16 +1348,16 @@ def download(self, paths): elif not meta['info'][0]['isdir']: t = os.path.join( - os.getcwd(), meta['info'][0]['server_filename'].encode('utf8') + args.outdir, meta['info'][0]['server_filename'].encode('utf8') ) infos = { 'file': t, 'path': meta['info'][0]['path'].encode('utf8'), 'dir_': os.path.split(t)[0], - #'dlink': self._get_dlink(meta['info'][0]), + 'dlink': self._get_dlink(meta['info'][0]['path'].encode('utf8')), 'm3u8': self._get_m3u8(meta['info'][0]) \ if 'm3' in args.type_ else None, - 'dlink': meta['info'][0]['dlink'].encode('utf8'), + # 'dlink': meta['info'][0]['dlink'].encode('utf8'), 'name': meta['info'][0]['server_filename'].encode('utf8'), 'size': meta['info'][0]['size'], } @@ -852,29 +1389,68 @@ def _download_do(infos): print '\n ++ download: #', s % (1, 97, infos['nn']), '/', \ s % (1, 97, infos['total_file']), '#', col - #cookie = 'BDUSS=%s' % ss.cookies.get('BDUSS') - if args.aria2c: + if '8s' in args.type_ and is_wenxintishi(infos['dlink']): + print s % (1, 93, ' !! 百度8秒 !!') + return + + cookie = 'Cookie: ' + '; '.join([ + k + '=' + v for k, v in ss.cookies.get_dict().items()]) + + # Netdisk user agents: + # + # "netdisk;6.7.1.9;PC;PC-Windows;10.0.17763;WindowsBaiduYunGuanJia" + # "netdisk;5.3.1.3;PC;PC-Windows;5.1.2600;WindowsBaiduYunGuanJia" + # "netdisk;7.15.1;HUAWEI+G750-T01;android-android;4.2.2" + # "netdisk;4.4.0.6;PC;PC-Windows;6.2.9200;WindowsBaiduYunGuanJia" + # "netdisk;5.3.1.3;PC;PC-Windows;5.1.2600;WindowsBaiduYunGuanJia" + # + # 'LogStatistic' + + # Recently all downloading requests using above user-agents are limited by baidu + + # user_agent = headers['User-Agent'] + user_agent = 'netdisk;2.2.51.6;netdisk;10.0.63;PC;android-android' + + if args.aget_s: + quiet = ' --quiet=true' if args.quiet else '' + cmd = 'ag ' \ + '"%s" ' \ + '-o "%s.tmp" ' \ + '-H "User-Agent: %s" ' \ + '-H "Connection: Keep-Alive" ' \ + '-H "%s" ' \ + '-s %s -k %s' \ + % (infos['dlink'], infos['file'], user_agent, cookie, args.aget_s, args.aget_k) + elif args.aria2c: quiet = ' --quiet=true' if args.quiet else '' taria2c = ' -x %s -s %s' % (args.aria2c, args.aria2c) tlimit = ' --max-download-limit %s' % args.limit if args.limit else '' - #'--user-agent "netdisk;4.4.0.6;PC;PC-Windows;6.2.9200;WindowsBaiduYunGuanJia" ' \ - #'--header "Referer:http://pan.baidu.com/disk/home " ' \ - cmd = 'aria2c -c -k 1M%s%s%s ' \ + cmd = 'aria2c -c%s%s%s ' \ '-o "%s.tmp" -d "%s" ' \ - '--user-agent "netdisk;4.4.0.6;PC;PC-Windows;6.2.9200;WindowsBaiduYunGuanJia" ' \ + '--user-agent "%s" ' \ + '--header "Connection: Keep-Alive" ' \ + '--header "Accept-Encoding: gzip" ' \ + '--header "%s" ' \ '"%s"' \ % (quiet, taria2c, tlimit, infos['name'], - infos['dir_'], infos['dlink']) + infos['dir_'], user_agent, + cookie, infos['dlink']) else: + if infos['size']>= 100 * OneM: + print '\x1b[1;91mWarning\x1b[0m: '\ + '\x1b[1;91m%s\x1b[0m\n\n' % "File size is large, please use aget or aria2 to download\naget: https://github.com/PeterDing/aget-rs\naria2: https://github.com/aria2/aria2" + quiet = ' -q' if args.quiet else '' tlimit = ' --limit-rate %s' % args.limit if args.limit else '' cmd = 'wget -c%s%s ' \ '-O "%s.tmp" ' \ - '--user-agent "%s" ' \ - '--header "Referer:http://pan.baidu.com/disk/home" ' \ + '--header "User-Agent: %s" ' \ + '--header "Connection: Keep-Alive" ' \ + '--header "Accept-Encoding: gzip" ' \ + '--header "%s" ' \ '"%s"' \ % (quiet, tlimit, infos['file'], - headers['User-Agent'], infos['dlink']) + user_agent, cookie, infos['dlink']) status = os.system(cmd) exit = True @@ -885,7 +1461,15 @@ def _download_do(infos): pass else: exit = False - if status != 0: # other http-errors, such as 302. + + content_length_matched = False + saved_path = '%s.tmp' % infos['file'] + if os.path.exists(saved_path): + meta = os.stat(saved_path) + if meta.st_size == infos['size']: + content_length_matched = True + + if status != 0 or not content_length_matched: # other http-errors, such as 302. #wget_exit_status_info = wget_es[status] print('\n\n ---### \x1b[1;91mEXIT STATUS\x1b[0m ==> '\ '\x1b[1;91m%d\x1b[0m ###--- \n\n' % status) @@ -917,11 +1501,18 @@ def _play_do(infos): g.write(infos['m3u8']) infos['dlink'] = '/tmp/tmp_pan.baidu.com.py.m3u8' + cookie = 'Cookie: ' + '; '.join([ + k + '=' + v for k, v in ss.cookies.get_dict().items()]) + user_agent = 'User-Agent: ' + headers['User-Agent'] quiet = ' --really-quiet' if args.quiet else '' - cmd = 'mpv%s --no-ytdl --cache-default 20480 --cache-secs 120 ' \ - '--http-header-fields "User-Agent:%s" ' \ - '--http-header-fields "Referer:http://pan.baidu.com/disk/home" "%s"' \ - % (quiet, headers['User-Agent'], infos['dlink']) + cmd = 'mpv%s --no-ytdl --http-header-fields="%s","%s" ' \ + % (quiet, user_agent, cookie) + + if infos.get('m3u8'): + # https://github.com/mpv-player/mpv/issues/6928#issuecomment-532198445 + cmd += ' --stream-lavf-o-append="protocol_whitelist=file,http,https,tcp,tls,crypto,hls,applehttp" ' + + cmd += "%s" % infos['dlink'] os.system(cmd) timeout = 1 @@ -956,14 +1547,17 @@ def _make_dir(self, dir_): return ENoError def _meta(self, file_list, dlink=0): + p = { - "channel": "chunlei", - "app_id": "250528", + # "channel": "chunlei", + # "app_id": "250528", "method": "filemetas", "dlink": dlink, "blocks": 0, # 0 or 1 - #"bdstoken": self._get_bdstoken() + # "bdstoken": self._get_bdstoken() } + + # ss.get('http://pan.baidu.com/disk/home') url = 'http://pan.baidu.com/api/filemetas' i = 0 j = {} @@ -972,7 +1566,8 @@ def _meta(self, file_list, dlink=0): if fl: data = {'target': json.dumps(fl)} try: - r = ss.post(url, params=p, data=data) + r = self._request('POST', url, '_meta', params=p, data=data) + # r = ss.post(url, params=p, data=data) js = r.json() if js['errno'] == 0 and i == 0: if dlink: @@ -1024,8 +1619,13 @@ def _rapidupload_file(self, lpath, rpath): "content-crc32" : content_crc32, "ondup" : self.ondup } + + # WARNING: here needs netdist user-agent + theaders = dict(ss.headers) + theaders['User-Agent'] = NETDISK_UA + url = 'https://c.pcs.baidu.com/rest/2.0/pcs/file' - r = ss.post(url, params=p, data=data, verify=VERIFY) + r = ss.post(url, params=p, data=data, verify=VERIFY, headers=theaders) if r.ok: return ENoError else: @@ -1085,8 +1685,13 @@ def _combine_file(self, lpath, rpath): {'block_list': self.upload_datas[lpath]['slice_md5s']} ) } + + # WARNING: here needs netdist user-agent + theaders = dict(ss.headers) + theaders['User-Agent'] = NETDISK_UA + url = 'https://c.pcs.baidu.com/rest/2.0/pcs/file' - r = ss.post(url, params=p, data=data, verify=VERIFY) + r = ss.post(url, params=p, data=data, verify=VERIFY, headers=theaders) if r.ok: return ENoError else: @@ -1111,8 +1716,10 @@ def _upload_slice(self, piece=0, slice=DefaultSliceSize): fl = cStringIO.StringIO(__slice_block) files = {'file': ('file', fl, '')} data = MultipartEncoder(files) - theaders = headers + theaders = dict(headers) theaders['Content-Type'] = data.content_type + theaders['User-Agent'] = NETDISK_UA + url = 'https://c.pcs.baidu.com/rest/2.0/pcs/file' r = ss.post(url, params=p, data=data, verify=VERIFY, headers=theaders) j = r.json() @@ -1378,7 +1985,7 @@ def save_datas(self, path, infos): ################################################################## # for saving shares - def _share_transfer(self, info): + def _share_transfer(self, surl, info): meta = self._meta([info['remotepath'].encode('utf8')]) if not meta: self._make_dir(info['remotepath'].encode('utf8')) @@ -1391,36 +1998,34 @@ def _share_transfer(self, info): j = {'errno': 'file has exist'} return j - theaders = headers - theaders.update( - { - 'Referer': 'http://pan.baidu.com/share/link?shareid=%s&uk=%s' \ - % (self.shareid, self.uk) - } - ) - - p = { - "app_id": 250528, - "channel": "chunlei", - "clienttype": 0, - "web": 1, - "ondup": "overwrite", - "async": 1, - "from": self.uk, - "shareid": self.shareid, - "bdstoken": self._get_bdstoken() - } - data = "path=" \ - + urllib.quote_plus(info['remotepath'].encode('utf8')) \ - + '&' \ - + "filelist=" \ - + urllib.quote_plus( - '["%s"]' % info['path'].encode('utf8') + data = ('fsidlist=' \ + + urllib.quote_plus('[%s]' % info['fs_id']) \ + + '&path=' \ + + urllib.quote_plus(info['remotepath'].encode('utf8')) ) - url = 'http://pan.baidu.com/share/transfer' - r = ss.post(url, params=p, data=data, headers=theaders) + url = ('https://pan.baidu.com/share/transfer?' + 'shareid={}&from={}&bdstoken={}&channel=chunlei' + '&clienttype=0&web=1&app_id=250528'.format( + self.shareid, + self.uk, + self._get_bdstoken())) + + theaders = { + 'Cookie': '; '.join(['{}={}'.format(k, v) for k, v in ss.cookies.get_dict().items()]), + 'Origin': 'https://pan.baidu.com', + 'Accept-Encoding': 'gzip, deflate, br', + 'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4', + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + 'Accept': '*/*', + 'Referer': surl, + 'X-Requested-With': 'XMLHttpRequest', + 'Connection': 'keep-alive', + } + r = ss.post(url, data=data, headers=theaders) j = r.json() + #if j['errno'] == 0: #return ENoError #else: @@ -1460,6 +2065,7 @@ def _get_share_list(self, info): def _get_share_infos(self, url, remotepath, infos): r = ss.get(url) + ss.cookies.update(r.cookies.get_dict()) html = r.content info = panbaiducom.get_web_fileinfo(html, url) @@ -1467,19 +2073,20 @@ def _get_share_infos(self, url, remotepath, infos): self.shareid = info['shareid'] self.bdstoken = info['bdstoken'] - fileinfo = info['fileinfo'] - j = json.loads(fileinfo) + j = info['file_list']['list'] isdirs = [x['isdir'] for x in j] paths = [x['path'] for x in j] - z = zip(isdirs, paths) + fs_ids = [x['fs_id'] for x in j] + z = zip(fs_ids, isdirs, paths) if not infos: infos = [ { - 'isdir': x, - 'path': y, + 'fs_id': a, + 'isdir': b, + 'path': c, 'remotepath': remotepath \ if remotepath[-1] != '/' else remotepath[:-1] - } for x, y in z + } for a, b, c in z ] return infos @@ -1500,7 +2107,7 @@ def save_share(self, url, remotepath, infos=None): while True: print s % (1, 97, ' ++ transfer:'), info['path'] - result = self._share_transfer(info) + result = self._share_transfer(url, info) if result['errno'] == 0: break elif result['errno'] == 12 or result['errno'] == -33: @@ -1524,22 +2131,48 @@ def save_share(self, url, remotepath, infos=None): @staticmethod def _secret_or_not(url): - ss.headers['Referer'] = 'http://pan.baidu.com' - r = ss.get(url) + surl = url.split('?')[0].split('/1')[1].strip('/') + + ss.headers['Referer'] = 'https://pan.baidu.com' + r = ss.get(url, headers=headers) + + if r.status_code != 200 and r.status_code != 302: + ss.headers['Cookie'] = ';'.join(['{}={}'.format(k, v) for k, v in ss.cookies.get_dict().items()]) + r = ss.get(url, headers=headers, cookies=r.cookies) + if 'init' in r.url: if not args.secret: secret = raw_input(s % (2, 92, " 请输入提取密码: ")) else: secret = args.secret - data = 'pwd=%s' % secret - url = "%s&t=%d" % ( - r.url.replace('init', 'verify'), \ - int(time.time()) + + data = 'pwd=%s&vcode=&vcode_str=' % secret + url = ( + 'https://pan.baidu.com/share/verify?' + + 'surl=' + surl + + '&t=' + str(int(time.time()*1000)) + + '&channel=chunlei' + + '&web=1' + + '&app_id=250528' + + '&bdstoken=null' + + '&clienttype=0' ) - r = ss.post(url, data=data) + theaders = { + 'Accept-Encoding': 'gzip, deflate', + 'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4', + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + 'Accept': '*/*', + 'X-Requested-With': 'XMLHttpRequest', + 'Connection': 'keep-alive', + 'Sec-Fetch-Mode': 'cors', + 'Referer': 'https://pan.baidu.com/share/init?surl=' + surl + } + r = ss.post(url, data=data, headers=theaders) if r.json()['errno']: - print s % (2, 91, " !! 提取密码错误\n") + print s % (2, 91, " !! 提取密码错误, %s\n" % r.text) sys.exit(1) + ss.cookies.update(r.cookies.get_dict()) ####################################################################### # for saveing inbox shares @@ -1670,20 +2303,17 @@ def save_inbox_share(self, url, remotepath, infos=None): ####################################################################### # for finding files - def _search(self, keyword, directory): + def _search(self, keyword, directory, page=1, num=1000): + p = { - "channel": "chunlei", - "clienttype": 0, - "web": 1, - "key": keyword, - "dir": directory if directory else "", - #"timeStamp": "0.15937364846467972", - #"bdstoken": self._get_bdstoken(), + 'recursion': '', + 'key': keyword, + 'dir': directory, } if args.recursive: p['recursion'] = 1 url = 'http://pan.baidu.com/api/search' - r = ss.get(url, params=p) + r = self._request('GET', url, '_search', params=p) j = r.json() if j['errno'] == 0: return j['list'] @@ -1804,9 +2434,7 @@ def do(): kw = keyword.decode('utf8', 'ignore') self.highlights.append({'text': kw, 'is_regex': 0}) infos = {i['fs_id']: i for i in infos}.values() - infos = self._sift(infos, name=arguments.get('name'), \ - size=arguments.get('size'), time=arguments.get('time'), \ - desc=arguments.get('desc')) + infos = self._sift(infos, **arguments) if not infos: return if not arguments.get('pipe'): @@ -1954,6 +2582,7 @@ def _filemanager(self, opera, data): "channel": "chunlei", "clienttype": 0, "web": 1, + "async": "2", "opera": opera, "bdstoken": self._get_bdstoken(), } @@ -2238,7 +2867,7 @@ def _get_torrent_info(self, path): } url = 'http://pan.baidu.com/rest/2.0/services/cloud_dl' - r = ss.post(url, params=p) + r = ss.get(url, params=p) j = r.json() if j.get('error_code'): print s % (1, 91, ' !! Error at _get_torrent_info:'), j['error_msg'] @@ -2437,19 +3066,20 @@ def add_tasks(self, urls, remotepath): } def _task_display(self, infos): + cross_line = '—' * int(os.popen('tput cols').read()) template = '%s %s\n' \ '%s %s\n' \ '%s %s\n' \ '%s %s\n' \ '%s %s\n' \ '%s %s\n' \ - '------------------------------\n' \ - % (s % (2, 97, ' id:'), s % (1, 97, "%s"), \ - s % (1, 97, ' status:'), s % (2, "%s", "%s"), \ - s % (1, 97, ' done:'), s % (3, 93, "%s"), \ - s % (2, 97, ' name:'), "%s", \ - s % (2, 97, ' path:'), "%s", \ - s % (2, 97, ' source:'), "%s") + '%s\n' \ + % (s % (2, 97, ' id:'), s % (1, 97, "%s"), \ + s % (1, 97, 'status:'), s % (1, "%s", "%s"), \ + s % (1, 97, ' done:'), s % (2, 93, "%s"), \ + s % (2, 97, ' name:'), "%s", \ + s % (2, 97, ' path:'), "%s", \ + s % (2, 97, 'source:'), "%s", cross_line) for i in infos: if i['result'] == 0: @@ -2677,7 +3307,7 @@ def _share(self, paths, pwd=None): r = ss.post(url, params=params, data=data) j = r.json() - if j['errno'] != 0: + if not j.get('shorturl'): print s % (1, 91, ' !! Error at _share'), j sys.exit(1) else: @@ -2762,7 +3392,6 @@ def cd_do(path): class panbaiducom(object): @staticmethod def get_web_fileinfo(cm, url): - info = {} if 'shareview' in url: info['uk'] = re.search(r'uk="(\d+)"', cm).group(1) info['shareid'] = re.search(r'shareid="(\d+)"', cm).group(1) @@ -2771,16 +3400,12 @@ def get_web_fileinfo(cm, url): t = t.replace('\\\\', '!@#$%^'*10) t = t.replace('\\', '') t = t.replace('!@#$%^'*10, '\\') - info['fileinfo'] = t + info['fileinfo'] = t info['timestamp'] = re.search(r'timestamp="(\d+)"', cm).group(1) - info['sign'] = re.search(r'downloadsign="(.+?)"', cm).group(1) + info['sign'] = re.search(r'downloadsign="(.+?)"', cm).group(1) else: - info['uk'] = re.search(r'yunData\.MYUK = "(\d+)"', cm).group(1) - info['shareid'] = re.search(r'yunData\.SHARE_ID = "(\d+)"', cm).group(1) - info['bdstoken'] = re.search(r'yunData\.MYBDSTOKEN = "(.*?)"', cm).group(1) - info['fileinfo'] = re.search(r'yunData.FILEINFO = (.+)', cm).group(1)[:-2] - info['timestamp'] = re.search(r'yunData.TIMESTAMP = "(.+?)"', cm).group(1) - info['sign'] = re.search(r'yunData.SIGN = "(.+?)"', cm).group(1) + info_str = re.search(r'yunData.setData\((.+?)\);', cm).group(1) + info = json.loads(info_str) return info @@ -2789,56 +3414,86 @@ def get_params(self, path): html = r.content info = self.get_web_fileinfo(html, path) - uk = info['uk'] - shareid = info['shareid'] - timestamp = info['timestamp'] - sign = info['sign'] + self.uk = str(info['uk']) + self.shareid = str(info['shareid']) + self.timestamp = str(info['timestamp']) + self.sign = info['sign'] + self.bdstoken = info['bdstoken'] self.params = { - #"bdstoken": bdstoken, - "uk": uk, - "shareid": shareid, - "timestamp": timestamp, - "sign": sign, - "channel": "chunlei", - "clienttype": 0, - "web": 1, + "bdstoken": self.bdstoken, + "uk": self.uk, + "shareid": self.shareid, + "timestamp": self.timestamp, + "sign": self.sign, "channel": "chunlei", "clienttype": 0, "web": 1 } - fileinfo = info['fileinfo'] - j = json.loads(fileinfo) + j = info['file_list']['list'] self.infos.update({ 'name': j[0]['server_filename'].encode('utf8'), 'file': os.path.join( - os.getcwd(), j[0]['server_filename'].encode('utf8') + args.outdir, j[0]['server_filename'].encode('utf8') ), - 'dir_': os.getcwd(), + 'dir_': args.outdir, 'fs_id': j[0]['fs_id'] }) + def get_vcode(self): + url = ( + 'https://pan.baidu.com/api/getvcode' + '?prod=pan' + '&t={}' + '&channel=chunlei' + '&web=1' + '&app_id=250528' + '&bdstoken={}' + ).format(random.random(), self.bdstoken) + + r = ss.get(url) + j = r.json() + return j + def get_infos(self): - url = 'http://pan.baidu.com/share/download' - data = 'fid_list=["%s"]' % self.infos['fs_id'] + url = ('https://pan.baidu.com/api/sharedownload?' + 'sign={}×tamp={}&bdstoken={}' + '&channel=chunlei&clienttype=0&web=1').format( + self.sign, self.timestamp, self.bdstoken) + + data = { + 'encrypt': '0', + 'product': 'share', + 'uk': self.uk, + 'primaryid': self.shareid, + 'fid_list': urllib.quote_plus('[%s]' % self.infos['fs_id']), + 'path_list': '', + 'vip': '0', + } while True: - r = ss.post(url, data=data, params=self.params) + data_str = '&'.join(['{}={}'.format(k, v) for k, v in data.items()]) + r = ss.post(url, data=data_str) j = r.json() - if not j['errno']: - dlink = fast_pcs_server(j['dlink'].encode('utf8')) + errno = j['errno'] + if errno == 0: + dlink = fast_pcs_server(j['list'][0]['dlink'].encode('utf8')) self.infos['dlink'] = dlink if args.play: panbaiducom_HOME._play_do(self.infos) else: panbaiducom_HOME._download_do(self.infos) break + elif errno == 118: + print s % (1, 91, ' !! 没有下载权限!, 请转存网盘后,从网盘地址下载') + sys.exit(1) else: + j = self.get_vcode() vcode = j['vcode'] input_code = panbaiducom_HOME.save_img(j['img'], 'jpg') - self.params.update({'input': input_code, 'vcode': vcode}) + data.update({'vcode_input': input_code, 'vcode_str': vcode}) def get_infos2(self, path): while True: @@ -2849,8 +3504,8 @@ def get_infos2(self, path): if dlink: self.infos = { 'name': name, - 'file': os.path.join(os.getcwd(), name), - 'dir_': os.getcwd(), + 'file': os.path.join(args.outdir, name), + 'dir_': args.outdir, 'dlink': fast_pcs_server(dlink.group(1)) } if args.play: @@ -2859,7 +3514,7 @@ def get_infos2(self, path): panbaiducom_HOME._download_do(self.infos) break else: - print s % (1, ' !! Error at get_infos2, can\'t get dlink') + print s % (1, 91, ' !! Error at get_infos2, can\'t get dlink') def do(self, paths): for path in paths: @@ -2880,8 +3535,8 @@ def do4(self, paths): name = urllib.unquote_plus(t) self.infos = { 'name': name, - 'file': os.path.join(os.getcwd(), name), - 'dir_': os.getcwd(), + 'file': os.path.join(args.outdir, name), + 'dir_': args.outdir, 'dlink': fast_pcs_server(path) } @@ -2891,6 +3546,11 @@ def do4(self, paths): panbaiducom_HOME._download_do(self.infos) break +def assert_download_tools(): + for tool in ('wget', 'aget', 'aria2c'): + if ' ' in os.popen('which %s' % tool).read(): + print s % (1, 91, ' !!! aria2 is not installed') + def sighandler(signum, frame): print s % (1, 91, " !! Signal:"), signum if args.comd in ('u', 'upload'): @@ -2920,7 +3580,15 @@ def handle_args(argv): ' 用法见 https://github.com/PeterDing/iScript') p.add_argument('xxx', type=str, nargs='*', help='命令对象.') p.add_argument('-a', '--aria2c', action='store', default=None, \ - type=int, help='aria2c分段下载数量') + type=int, help='aria2c 分段下载数量') + p.add_argument('-g', '--aget_s', action='store', default=None, \ + type=int, help='aget 分段下载数量') + p.add_argument('-k', '--aget_k', action='store', default='200K', \ + type=str, help='aget 分段大小') + p.add_argument('--appid', action='store', default='778750', type=str, \ + help='设置 app-id. 如果无法下载或下载慢, 尝试设置为 778750') + p.add_argument('-o', '--outdir', action='store', default=os.getcwd(), \ + type=str, help='保存目录') p.add_argument('-p', '--play', action='store_true', help='play with mpv') p.add_argument('-v', '--view', action='count', help='view details') p.add_argument('-V', '--VERIFY', action='store_true', help='verify') @@ -2993,11 +3661,11 @@ def handle_command(comd, xxx): xh = panbaiducom_HOME() if len(xxx) < 1: - username = raw_input(s % (1, 97, ' username: ')) - password = getpass(s % (1, 97, ' password: ')) + username = raw_input(s % (1, 97, ' username: ')) + password = getpass(s % (1, 97, ' password / cookie: ')) elif len(xxx) == 1: username = xxx[0] - password = getpass(s % (1, 97, ' password: ')) + password = getpass(s % (1, 97, ' password / cookie: ')) elif len(xxx) == 2: username = xxx[0] password = xxx[1] @@ -3017,10 +3685,10 @@ def handle_command(comd, xxx): comd == 'user': accounts = panbaiducom_HOME._check_cookie_file() if accounts: - cu = zip(range(len(accounts)), [u for u in accounts]) + cu = zip(range(len(accounts)), sorted([u for u in accounts])) for i, u in cu: print s % (1, 92, i+1) if accounts[u]['on'] else s % (1, 91, i+1), \ - accounts[u]['capacity'], \ + accounts[u]['capacity'].ljust(15), \ s % (2, 92, u) if accounts[u]['on'] else s % (2, 97, u) if comd == 'userdelete' or comd == 'ud': print s % (2, 97, 0), s % (2, 91, 'ALL') elif comd == 'user': sys.exit() @@ -3037,7 +3705,7 @@ def handle_command(comd, xxx): if comd == 'userdelete' or comd == 'ud': if u != 'ALL': if accounts[u]['on'] and len(accounts)> 1: - print s % (1, 91, ' !! %s is online. To delete the account, firstly changing another account' % u) + print s % (1, 91, ' !! %s is online. To delete the account, firstly switching to other account' % u) sys.exit() del accounts[u] else: @@ -3105,7 +3773,13 @@ def handle_command(comd, xxx): ' d url1 url2 ..') sys.exit(1) - if comd == 'p' or comd == 'play': args.play = True + # login session + panbaiducom_HOME().init() + + if comd == 'p' or comd == 'play': + args.play = True + else: + assert_download_tools() enter_password() @@ -3167,11 +3841,12 @@ def handle_command(comd, xxx): ) else: infos = None + if '/inbox/' in xxx[0]: url = xxx[0] x.save_inbox_share(url, remotepath, infos=infos) else: - url = re.search(r'(http://.+?.baidu.com/.+?)(#|$)', xxx[0]).group(1) + url = re.search(r'(https?://.+?.baidu.com/.+?)(#|$)', xxx[0]).group(1) url = url.replace('wap/link', 'share/link') x._secret_or_not(url) x.save_share(url, remotepath, infos=infos) @@ -3447,9 +4122,11 @@ def handle_command(comd, xxx): if 'x' in locals(): x.save_cookies(on=1, tocwd=True) - elif 'px' in locals(): + elif 'px' in globals(): px.save_cookies(on=1, tocwd=True) + + def main(argv): handle_signal() diff --git a/tumblr.py b/tumblr.py index 48dbd95..4329c63 100755 --- a/tumblr.py +++ b/tumblr.py @@ -1,17 +1,33 @@ #!/usr/bin/env python2 # vim: set fileencoding=utf8 +from __future__ import unicode_literals + import os import sys import re import json +import collections +import multiprocessing import requests +requests.packages.urllib3.disable_warnings() import argparse import random -import subprocess import time +import select +import signal + +API_KEY = 'fuiKNFp9vQFvjLNvx4sUwti4Yb5yGutBN4Xh10LXZhhRKjWlV4' -api_key = 'fuiKNFp9vQFvjLNvx4sUwti4Yb5yGutBN4Xh10LXZhhRKjWlV4' +PID_PATH = '/tmp/tumblr.py.pid' + +# statistic parameters +NET_ERRORS = multiprocessing.Value('i', 0) +UNCOMPLETION = multiprocessing.Value('i', 0) +DOWNLOAD_ERRORS = multiprocessing.Value('i', 0) +DOWNLOADS = multiprocessing.Value('i', 0) +COMPLETION = multiprocessing.Value('i', 0) +OFFSET = multiprocessing.Value('i', 0) ############################################################ # wget exit status @@ -46,251 +62,578 @@ ss = requests.session() ss.headers.update(headers) -def check_queue(queue, cb): - for f in queue: - st = f[0].poll() - if st is not None: - if st == 0: cb(f[1]) - queue.remove(f) - return queue - -def sleep(size, num): - t = float(size) / num - time.sleep(t) - -def async(tasks, queue, run=None, cb=None, num=10): - queue = check_queue(queue, cb) - sleep(len(queue), num) - nsize = num - len(queue) - for i in xrange(nsize): - try: - task = tasks.pop(0) - except IndexError: - break - f = run(task) - if f: queue.append(f) - return tasks, queue - -class tumblr(object): - def __init__(self): - self.queue = [] - self.tasks = [] +PROXY = None - def save_json(self): - with open(self.json_path, 'w') as g: - g.write(json.dumps( - {'key': self.key}, indent=4, sort_keys=True)) +class Error(Exception): + def __init__(self, msg): + self.msg = msg + def __str__(self): + return self.msg - def get_site_infos(self, postid=None): - self.infos['photos'] = [] - self.url = 'https://api.tumblr.com/v2/blog/%s/posts/photo' \ - % self.infos['host'] - params = { - "offset": self.key if not postid else "", - "limit": 20 if not postid else "", - "type": "photo", - "filter": "text", - "tag": args.tag, - "id": postid if postid else "", - "api_key": api_key - } +def reset_statistic_params(): + NET_ERRORS.value = 0 + UNCOMPLETION.value = 0 + DOWNLOAD_ERRORS.value = 0 + DOWNLOADS.value = 0 + COMPLETION.value = 0 + OFFSET.value = 0 - r = None +def play(urls, args): + for url in urls: + tumblr = Tumblr(args, url) + while True: + items = tumblr.get_item_generator() + if not items: + break + play_do(items, args.quiet) + +def play_do(items, quiet): + for item in items: + num = random.randint(0, 7) % 8 + col = s % (2, num + 90, item['durl']) + print ' ++ play:', col + quiet = ' --really-quiet' if quiet else '' + cmd = 'mpv%s --no-ytdl --cache-default 20480 --cache-secs 120 ' \ + '--http-header-fields "User-Agent:%s" ' \ + '"%s"' \ + % (quiet, headers['User-Agent'], item['durl']) + + os.system(cmd) + timeout = 1 + ii, _, _ = select.select([sys.stdin], [], [], timeout) + if ii: + sys.exit(0) + else: + pass + +def remove_downloaded_items(items): + N = len(items) + for i in range(N): + item = items.pop() + filepath = os.path.join(item['dir_'], item['subdir'], item['filename']) + if not os.path.exists(filepath): + items.appendleft(item) + +def download_run(item): + filepath = os.path.join(item['dir_'], item['subdir'], item['filename']) + # if os.path.exists(filepath): + # return None + # num = random.randint(0, 7) % 8 + # col = s % (1, num + 90, filepath) + # print ' ++ download: %s' % col + + if PROXY: + cmd = ' '.join([ + 'curl', '-s', '-x', '"%s"' % PROXY, '-o', '"%s.tmp"' % filepath, + '-H', '"User-Agent: %s"' % headers['User-Agent'], + '"%s"' % item['durl'] + ]) + else: + cmd = ' '.join([ + 'curl', '-s', '-o', '"%s.tmp"' % filepath, + '-H', '"User-Agent: %s"' % headers['User-Agent'], + '"%s"' % item['durl'] + ]) + status = os.system(cmd) + return status, filepath + +def callback(filepath): + os.rename('%s.tmp' % filepath, filepath) + +class Downloader(multiprocessing.Process): + def __init__(self, queue, lock): + super(Downloader, self).__init__() + self.queue = queue + self.daemon = True + self.lock = lock + + def run(self): + while True: + item = self.queue.get() + self.queue.task_done() + if not item: + break + status = download_run(item) + if not status: # file was downloaded. + continue + status, filepath = status + if status != 0: + # print s % (1, 93, '[Error %s] at wget' % status), wget_es[status] + self.lock.acquire() + UNCOMPLETION.value += 1 + DOWNLOAD_ERRORS.value += 1 + self.lock.release() + else: + self.lock.acquire() + DOWNLOADS.value += 1 + self.lock.release() + callback(filepath) + +class TumblrAPI(object): + def _request(self, base_hostname, target, type, params): + api_url = '/'.join(['https://api.tumblr.com/v2/blog', + base_hostname, target, type]) + params['api_key'] = API_KEY + if PROXY: + proxies = {'http': PROXY, 'https': PROXY} + else: + proxies = None while True: try: - r = ss.get(self.url, params=params) + res = ss.get(api_url, params=params, proxies=proxies, timeout=10) + json_data = res.json() break + except KeyboardInterrupt: + sys.exit() except Exception as e: - print s % (1, 91, ' !! Error at get_infos'), e + NET_ERRORS.value += 1 # count errors + print s % (1, 93, '[Error at requests]:'), e, '\n' time.sleep(5) - if r.ok: - j = r.json() - if j['response']['posts']: - for i in j['response']['posts']: - index = 1 + if json_data['meta']['msg'].lower() != 'ok': + raise Error(s % (1, 91, json_data['meta']['msg'])) + + return json_data['response'] + + def _info(self, base_hostname): + return self._request(base_hostname, 'info', '', None) + + def _photo(self, base_hostname, offset='', tag='', post_id='', to_items=True): + def make_items(raw_data): + items = collections.deque() + for i in raw_data['posts']: + index = 1 + if i.get('photos'): for ii in i['photos']: - durl = ii['original_size']['url'].encode('utf8') - filepath = os.path.join(self.infos['dir_'], '%s_%s.%s' \ - % (i['id'], index, durl.split('.')[-1])) - filename = os.path.split(filepath)[-1] + durl = ii['original_size']['url'].replace('http:', 'https:') + filename = os.path.join( + '%s_%s.%s' % (i['id'], index, durl.split('.')[-1])) t = { - 'filepath': filepath, 'durl': durl, - 'filename': filename + 'filename': filename, + 'key': i['timestamp'], + 'subdir': 'photos', } index += 1 - self.infos['photos'].append(t) - else: - print s % (1, 92, '\n --- job over ---') - sys.exit(0) - else: - print s % (1, 91, '\n !! Error, get_infos') - print r.status_code, r.content - sys.exit(1) + items.append(t) + return items - def get_tag_infos(self): - self.infos['photos'] = [] - self.url = 'https://api.tumblr.com/v2/tagged' params = { - "limit": 20, - "type": "photo", - "tag": self.infos['tag'], - "before": self.key, - "api_key": api_key + 'offset': offset, + 'before': offset if tag else '', + 'tag': tag, + 'id': post_id, + 'limit': 20 if not tag and not post_id else '', + 'filter': 'text' } + raw_data = self._request(base_hostname, 'posts', 'photo', params) + if to_items: + return make_items(raw_data) + else: + return raw_data - r = None - while True: - try: - r = ss.get(self.url, params=params) - break - except Exception as e: - print s % (1, 91, ' !! Error at get_infos'), e - time.sleep(5) - if r.ok: - j = r.json() - if j['response']: - for i in j['response']: - index = 1 - if i.get('photos'): - for ii in i['photos']: - durl = ii['original_size']['url'].encode('utf8') - filepath = os.path.join( - self.infos['dir_'], '%s_%s.%s' \ - % (i['id'], index, durl.split('.')[-1])) - filename = os.path.split(filepath)[-1] - t = { - 'filepath': filepath, - 'durl': durl, - 'filename': filename, - 'key': i['timestamp'] - } - index += 1 - self.infos['photos'].append(t) - else: - print s % (1, 92, '\n --- job over ---') - sys.exit(0) + def _audio(self, base_hostname, offset='', tag='', post_id='', to_items=True): + def make_items(raw_data): + items = collections.deque() + for i in raw_data['posts']: + durl = i['audio_url'].replace('http:', 'https:') + filename = os.path.join( + '%s_%s.%s' % (i['id'], i['track_name'], durl.split('.')[-1])) + t = { + 'durl': durl, + 'filename': filename, + 'timestamp': i['timestamp'] if tag else '', + 'subdir': 'audios' + } + items.append(t) + return items + + params = { + 'offset': offset, + 'before': offset if tag else '', + 'tag': tag, + 'id': post_id, + 'limit': 20 if not tag and not post_id else '', + 'filter': 'text' + } + raw_data = self._request(base_hostname, 'posts', 'audio', params) + if to_items: + return make_items(raw_data) else: - print s % (1, 91, '\n !! Error, get_infos') - print r.status_code, r.content - sys.exit(1) + return raw_data - def download(self): - def run(i): - if os.path.exists(i['filepath']): - return - num = random.randint(0, 7) % 8 - col = s % (1, num + 90, i['filepath']) - print ' ++ download: %s' % col - cmd = [ - 'wget', '-c', '-q', - '-O', '%s.tmp' % i['filepath'], - '--user-agent', '"%s"' % headers['User-Agent'], - '%s' % i['durl'].replace('http:', 'https:') - ] - f = subprocess.Popen(cmd) - return f, i['filepath'] - - def callback(filepath): - os.rename('%s.tmp' % filepath, filepath) - - tasks = self.infos['photos'] + self.tasks - self.tasks = [] - while True: - tasks, self.queue = async( - tasks, self.queue, run=run, - cb=callback, num=self.processes) - if len(tasks) <= self.processes: - self.tasks = tasks - break + def _video(self, base_hostname, offset='', tag='', post_id='', to_items=True): + def make_items(raw_data): + items = collections.deque() + for i in raw_data['posts']: + if not i.get('video_url'): + continue + durl = i['video_url'].replace('http:', 'https:') + filename = os.path.join( + '%s.%s' % (i['id'], durl.split('.')[-1])) + t = { + 'durl': durl, + 'filename': filename, + 'timestamp': i['timestamp'] if tag else '', + 'subdir': 'videos' + } + items.append(t) + return items - def download_site(self, url): - self.infos = { - 'host': re.search(r'http(s|)://(.+?)($|/)', url).group(2)} - self.infos['dir_'] = os.path.join(os.getcwd(), self.infos['host']) - self.processes = int(args.processes) - - if not os.path.exists(self.infos['dir_']): - os.makedirs(self.infos['dir_']) - self.json_path = os.path.join(self.infos['dir_'], 'json.json') - self.key = 0 - print s % (1, 92, '\n ## begin'), 'key = %s' % self.key + params = { + 'offset': offset, + 'before': offset if tag else '', + 'tag': tag, + 'id': post_id, + 'limit': 20 if not tag and not post_id else '', + 'filter': 'text' + } + raw_data = self._request(base_hostname, 'posts', 'video', params) + if to_items: + return make_items(raw_data) else: - self.json_path = os.path.join(self.infos['dir_'], 'json.json') - if os.path.exists(self.json_path): - self.key = json.loads(open(self.json_path).read())['key'] - 20 - print s % (1, 92, '\n ## begin'), 'key = %s' % self.key + return raw_data + +class Tumblr(TumblrAPI): + def __init__(self, args, url): + self.args = args + self.offset = self.args.offset + self.make_items = self.parse_urls(url) + + def save_json(self): + with open(self.json_path, 'w') as g: + g.write(json.dumps( + {'offset': self.offset}, indent=4, sort_keys=True)) + + def init_infos(self, base_hostname, target_type, tag=''): + self.infos = {'host': base_hostname} + if not tag: + dir_ = os.path.join(os.getcwd(), self.infos['host']) + json_path = os.path.join(dir_, 'json.json') + + if not os.path.exists(dir_): + if not self.args.play: + os.makedirs(dir_) else: - self.key = 0 - - if args.check: - t = os.listdir(self.infos['dir_']) - t = [i[:i.find('_')] for i in t if i.endswith('.tmp')] - ltmp = list(set(t)) - for postid in ltmp: - self.get_site_infos(postid) - self.download() + if os.path.exists(json_path): + self.offset = json.load(open(json_path))['offset'] - 60 \ + if not self.args.update else self.args.offset + if self.offset < 0: self.offset = 0 else: - while True: - self.get_site_infos() - self.key += 20 + dir_ = os.path.join(os.getcwd(), 'tumblr-%s' % tag) + json_path = os.path.join(dir_, 'json.json') + + if not os.path.exists(dir_): + if not self.args.play: + os.makedirs(dir_) + self.offset = int(time.time()) + else: + if os.path.exists(json_path): + self.offset = json.load(open(json_path))['offset'] \ + if not self.args.update else int(time.time()) + + self.infos['dir_'] = dir_ + self.json_path = json_path + subdir = os.path.join(dir_, target_type) + if not os.path.exists(subdir) and not self.args.play: + os.makedirs(subdir) + + if not self.args.play: + for fl in os.listdir(subdir): + if not fl.endswith('.tmp'): + COMPLETION.value += 1 + else: + UNCOMPLETION.value += 1 + + if self.args.offset: + self.offset = self.args.offset + + print s % (1, 92, '## begin:'), 'offset = %s,' % self.offset, base_hostname + print s % (1, 97, 'INFO:\n') + \ + 'D = Downloads, R = Repair_Need\n' + \ + 'C = Completion, NE = Net_Errors, O = Offset' + + def download_photos_by_offset(self, base_hostname, post_id): + self.init_infos(base_hostname, 'photos') + + def do(): + items = self._photo( + base_hostname, offset=self.offset if not post_id else '', post_id=post_id) + if not items: + return [] + self.offset += 20 + self.save_json() + return items + return do + + def download_photos_by_tag(self, base_hostname, tag): + self.init_infos(base_hostname, 'photos', tag=tag) + + def do(): + items = self._photo(base_hostname, tag=tag, before=self.offset) + if not items: + return [] + self.offset = items[-1]['timestamp'] + self.save_json() + return items + return do + + def download_videos_by_offset(self, base_hostname, post_id): + self.init_infos(base_hostname, 'videos') + + def do(): + items = self._video( + base_hostname, offset=self.offset, post_id=post_id) + if not items: + return [] + self.offset += 20 + if not self.args.play: + self.save_json() + return items + return do + + def download_videos_by_tag(self, base_hostname, tag): + self.init_infos(base_hostname, 'videos', tag) + + def do(): + items = self._video( + base_hostname, before=self.offset, tag=tag) + if not items: + return [] + self.offset = items[-1]['timestamp'] + if not self.args.play: + self.save_json() + return items + return do + + def download_audios_by_offset(self, base_hostname, post_id): + self.init_infos(base_hostname, 'audios') + + def do(): + items = self._audio( + base_hostname, offset=self.offset if not post_id else '', post_id=post_id) + if not items: + return [] + self.offset += 20 + if not self.args.play: self.save_json() - self.download() - - def download_tag(self, tag): - self.infos = {'tag': tag} - self.infos['dir_'] = os.path.join( - os.getcwd(), 'tumblr-%s' % self.infos['tag']) - self.processes = int(args.processes) - - if not os.path.exists(self.infos['dir_']): - os.makedirs(self.infos['dir_']) - self.json_path = os.path.join(self.infos['dir_'], 'json.json') - self.key = int(time.time()) - print s % (1, 92, '\n ## begin'), 'key = %s' % self.key + return items + return do + + def download_audios_by_tag(self, base_hostname, tag): + self.init_infos(base_hostname, 'audios', tag) + + def do(): + items = self._audio( + base_hostname, before=self.offset, tag=tag) + if not self.infos['items']: + return [] + self.offset = self.infos['items'][-1]['timestamp'] + if not self.args.play: + self.save_json() + return items + return do + + def download_photos(self, base_hostname, post_id='', tag=''): + if tag: + return self.download_photos_by_tag(base_hostname, tag) + else: + return self.download_photos_by_offset(base_hostname, post_id=post_id) + + def download_videos(self, base_hostname, post_id='', tag=''): + if tag: + return self.download_videos_by_tag(base_hostname, tag) + else: + return self.download_videos_by_offset(base_hostname, post_id=post_id) + + def download_audios(self, base_hostname, post_id='', tag=''): + if tag: + return self.download_audios_by_tag(base_hostname, tag) else: - self.json_path = os.path.join(self.infos['dir_'], 'json.json') - if os.path.exists(self.json_path): - self.key = json.loads(open(self.json_path).read())['key'] - print s % (1, 92, '\n ## begin'), 'key = %s' % self.key + return self.download_audios_by_offset(base_hostname, post_id=post_id) + + def fix_photos(self, base_hostname): + self.init_infos(base_hostname, 'photos') + + t = os.listdir(os.path.join(self.infos['dir_'], 'photos')) + t = [i[:i.find('_')] for i in t if i.endswith('.tmp')] + self.post_ids = list(set(t)) + + def do(): + if len(self.post_ids): + post_id = self.post_ids.pop() + return self._photo(base_hostname, post_id=post_id) else: - self.key = int(time.time()) - - if args.check: - t = os.listdir(self.infos['dir_']) - t = [i[:i.find('_')] for i in t if i.endswith('.tmp')] - ltmp = list(set(t)) - for postid in ltmp: - self.get_site_infos(postid) - self.download() + return [] + return do + + def parse_urls(self, url): + _mod = re.search(r'(http://|https://|)(?P(.+?)<', html).group(1) + r' (.+?)<', html).group(1) d = modificate_text(artist_name + u' - top 20') dir_ = os.path.join(os.getcwdu(), d) self.dir_ = modificate_file_name_for_wget(dir_) @@ -682,9 +1038,9 @@ def download_artist_top_20_songs(self): n += 1 def download_artist_radio(self): - html = ss.get(url_artist_top_song % self.artist_id).text + html = self._request(url_artist_top_song % self.artist_id).text artist_name = re.search( - r'