55import re
66from urlparse import urljoin
77
8- __version__ = (1 , 1 , 3 )
8+ __version__ = (1 , 1 , 4 )
99__author__ = 'Alexandr Shurigin (https://github.com/phpdude/)'
1010
1111# HTML tags syntax http://www.w3.org/TR/html-markup/syntax.html
@@ -70,13 +70,13 @@ def resolve_url(
7070 if user_agent :
7171 s .headers ['User-Agent' ] = user_agent
7272
73- def follow_meta_redirects (url , max_redirects , ** kwargs ):
73+ def follow_meta_redirects (url , redirects , ** kwargs ):
7474 urls_history [url ] = True
7575
76- if max_redirects < 0 :
76+ if redirects < 0 :
7777 raise ValueError ("Cannot resolve real url with max_redirects=%s" % max_redirects )
7878
79- max_redirects -= 1
79+ redirects -= 1
8080
8181 with closing (s .get (url , allow_redirects = True , stream = True , ** kwargs )) as resp :
8282 if resp .history :
@@ -85,28 +85,28 @@ def follow_meta_redirects(url, max_redirects, **kwargs):
8585
8686 head , real_url = next (resp .iter_content (chunk_size , decode_unicode = False )), resp .url
8787
88- # Removing html blocks in <noscript></noscript>
89- if remove_noscript :
90- head = re .sub ('<noscript[^>]*>.*</noscript[^>]*>' , '' , head , flags = re .DOTALL )
88+ # Removing html blocks in <noscript></noscript>
89+ if remove_noscript :
90+ head = re .sub ('<noscript[^>]*>.*</noscript[^>]*>' , '' , head , flags = re .DOTALL )
9191
92- redirect = None
93- if 'refresh' in resp .headers :
94- redirect = resp .headers ['refresh' ]
95- elif not redirect :
96- for tag in get_tags (head , 'meta' ):
97- if tag .get ('http-equiv' , '' ) == 'refresh' :
98- redirect = tag .get ('content' , None )
92+ redirect = None
93+ if 'refresh' in resp .headers :
94+ redirect = resp .headers ['refresh' ]
95+ elif not redirect :
96+ for tag in get_tags (head , 'meta' ):
97+ if tag .get ('http-equiv' , '' ) == 'refresh' :
98+ redirect = tag .get ('content' , None )
9999
100- if redirect :
101- m = re .search ('url\s*=\s*([^\s;]+)' , redirect , re .I )
102- if m :
103- m = m .group (1 )
100+ if redirect :
101+ m = re .search ('url\s*=\s*([^\s;]+)' , redirect , re .I )
102+ if m :
103+ m = m .group (1 )
104104
105- # fixing case url='#url here#'
106- if m .startswith (('"' , "'" )) and m .endswith (('"' , "'" )):
107- m = m [1 :- 1 ]
105+ # fixing case url='#url here#'
106+ if m .startswith (('"' , "'" )) and m .endswith (('"' , "'" )):
107+ m = m [1 :- 1 ]
108108
109- real_url = follow_meta_redirects (urljoin (resp .url , m ), max_redirects )
109+ real_url = follow_meta_redirects (urljoin (resp .url , m ), redirects )
110110
111111 urls_history [real_url ] = True
112112
0 commit comments