|
5 | 5 | import re |
6 | 6 | from urlparse import urljoin |
7 | 7 |
|
8 | | -__version__ = (1, 1, 2) |
| 8 | +__version__ = (1, 1, 3) |
9 | 9 | __author__ = 'Alexandr Shurigin (https://github.com/phpdude/)' |
10 | 10 |
|
11 | 11 | # HTML tags syntax http://www.w3.org/TR/html-markup/syntax.html |
@@ -64,6 +64,9 @@ def resolve_url( |
64 | 64 | s = requests.session() |
65 | 65 |
|
66 | 66 | urls_history = OrderedDict() |
| 67 | + # disable compression for streamed requests. |
| 68 | + s.headers['Accept-Encoding'] = '' |
| 69 | + |
67 | 70 | if user_agent: |
68 | 71 | s.headers['User-Agent'] = user_agent |
69 | 72 |
|
@@ -97,7 +100,13 @@ def follow_meta_redirects(url, max_redirects, **kwargs): |
97 | 100 | if redirect: |
98 | 101 | m = re.search('url\s*=\s*([^\s;]+)', redirect, re.I) |
99 | 102 | if m: |
100 | | - real_url = follow_meta_redirects(urljoin(resp.url, m.group(1)), max_redirects) |
| 103 | + m = m.group(1) |
| 104 | + |
| 105 | + # fixing case url='#url here#' |
| 106 | + if m.startswith(('"', "'")) and m.endswith(('"', "'")): |
| 107 | + m = m[1:-1] |
| 108 | + |
| 109 | + real_url = follow_meta_redirects(urljoin(resp.url, m), max_redirects) |
101 | 110 |
|
102 | 111 | urls_history[real_url] = True |
103 | 112 |
|
|
0 commit comments