Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 9b688a8

Browse files
committed
add proxyCheck
1 parent c61d4a7 commit 9b688a8

File tree

2 files changed

+159
-0
lines changed

2 files changed

+159
-0
lines changed

‎SQLAlchemyUsage.py‎

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# -*- coding: utf-8 -*-
2+
from sqlalchemy import create_engine
3+
from sqlalchemy.ext.declarative import declarative_base
4+
from sqlalchemy import Column,Integer,String,DateTime
5+
from sqlalchemy.orm import sessionmaker
6+
7+
# mysql-python
8+
engine = create_engine('mysql+mysqldb://root:123456@localhost:3306/scrapy?charset=utf8')
9+
10+
# 创建对象的基类:
11+
Base = declarative_base()
12+
# 创建DBSession类型
13+
Session = sessionmaker(bind=engine)
14+
session=Session()
15+
class ProxyIP(Base):
16+
17+
__tablename__="ProxyIPs"
18+
# 表的结构:
19+
IpPort=Column(String(20),primary_key=True)
20+
Country=Column(String(20))
21+
Speed=Column(Integer)
22+
Type=Column(String(10))
23+
Level=Column(String(20))
24+
LastCheck=Column(DateTime)
25+
GoogleProxy=Column(String(1))
26+
27+
28+
# 寻找Base的所有子类,按照子类的结构在数据库中生成对应的数据表信息
29+
Base.metadata.create_all(engine)
30+
31+
ip=ProxyIP(IpPort='127.0.0.1:8080',Country='CN',Speed=500,Type='HTTP',Level='Anonymous',LastCheck='2016年11月20日',GoogleProxy='Y')
32+
session.add(ip)
33+
session.commit()

‎proxyCheck.py‎

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
# -*- coding: utf-8 -*-
2+
import httplib
3+
import urllib2
4+
import random
5+
import datetime
6+
from lxml import etree as ET
7+
8+
def checkProxy(proxyIP,protocol="http",retry_times=3,timeout=5):
9+
user_agent_list = [ \
10+
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 "
11+
"(KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
12+
"Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 "
13+
"(KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
14+
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 "
15+
"(KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
16+
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 "
17+
"(KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
18+
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 "
19+
"(KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
20+
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 "
21+
"(KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
22+
"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 "
23+
"(KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
24+
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 "
25+
"(KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
26+
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 "
27+
"(KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
28+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 "
29+
"(KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
30+
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 "
31+
"(KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
32+
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 "
33+
"(KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
34+
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 "
35+
"(KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
36+
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 "
37+
"(KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
38+
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 "
39+
"(KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
40+
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 "
41+
"(KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
42+
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 "
43+
"(KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
44+
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 "
45+
"(KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
46+
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
47+
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
48+
"Mozilla/5.0 (Windows NT 10.0; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0",
49+
"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3; rv:11.0) like Gecko",
50+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
51+
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)",
52+
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
53+
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
54+
"Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
55+
"Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
56+
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
57+
"Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
58+
"Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
59+
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
60+
"Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
61+
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
62+
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
63+
"Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
64+
"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
65+
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
66+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
67+
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52"
68+
]
69+
headers = {
70+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
71+
"Accept-Language": "zh-CN,zh;q=0.8",
72+
"Cache-Control": "max-age=0",
73+
"Connection": "keep-alive",
74+
"Host": "ip.cn",
75+
"Upgrade-Insecure-Requests": "1",
76+
"User-Agent": random.choice(user_agent_list)
77+
}
78+
urllib2.socket.setdefaulttimeout(timeout)
79+
proxy_handler = urllib2.ProxyHandler({protocol: "%s://%s" % (protocol,proxyIP)})
80+
opener = urllib2.build_opener(proxy_handler)
81+
urllib2.install_opener(opener)
82+
req = urllib2.Request("http://ip.cn", headers=headers)
83+
result = {}
84+
85+
try:
86+
87+
starttime = datetime.datetime.now()
88+
response = urllib2.urlopen(req).read()
89+
html = ET.HTML(response)
90+
rstIP=html.xpath("//div[@id='result']/div[@class='well']/p[1]/code/text()")[0]
91+
rstLocation=html.xpath("//div[@id='result']/div[@class='well']/p[2]/code/text()")[0]
92+
cost = (datetime.datetime.now() - starttime).seconds
93+
94+
if rstIP:
95+
result["rstIP"] = rstIP
96+
result["rstLocation"] = rstLocation
97+
result["cost"] = cost
98+
result["status"] = "ok"
99+
return result
100+
else:
101+
return -1
102+
103+
except urllib2.URLError, e:
104+
if hasattr(e, "reason"):
105+
result["status"]="error"
106+
result["reason"] = e.reason
107+
result["msg"] = "Failed to reach the server!"
108+
return result
109+
elif hasattr(e, "code"):
110+
result["status"] = "error"
111+
result[" code"] = e.code
112+
result["msg"] = "The server couldn't fulfill the request!"
113+
else:
114+
return -1
115+
except urllib2.socket.timeout,e:
116+
result["status"] = "error"
117+
result["msg"] = e.message
118+
return result
119+
except httplib.BadStatusLine, e:
120+
result["status"] = "error"
121+
result["msg"] = e.message
122+
123+
124+
125+
126+
print checkProxy("173.35.56.183:3128",protocol="http",retry_times=3,timeout=5)

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /