1
+ # coding=utf-8
2
+ from urllib .request import urlretrieve
3
+
4
+ from selenium import webdriver
5
+ from bs4 import BeautifulSoup
6
+ import time
7
+
8
+ from selenium .webdriver import ActionChains
9
+
10
+ def login (login_qq ,password , business_qq ):
11
+ '''
12
+ 登陆
13
+ :param login_qq: 登陆用的QQ
14
+ :param password: 登陆的QQ密码
15
+ :param business_qq: 业务QQ
16
+ :return: driver
17
+ '''
18
+ driver = webdriver .Chrome ()
19
+
20
+ driver .get ('https://user.qzone.qq.com/{}/311' .format (business_qq )) # URL
21
+ driver .implicitly_wait (10 ) # 隐示等待,为了等待充分加载好网址
22
+ driver .find_element_by_id ('login_div' )
23
+ driver .switch_to .frame ('login_frame' ) # 切到输入账号密码的frame
24
+ driver .find_element_by_id ('switcher_plogin' ).click () ##点击‘账号密码登录’
25
+ driver .find_element_by_id ('u' ).clear () ##清空账号栏
26
+ driver .find_element_by_id ('u' ).send_keys (login_qq ) # 输入账号
27
+ driver .find_element_by_id ('p' ).clear () # 清空密码栏
28
+ driver .find_element_by_id ('p' ).send_keys (password ) # 输入密码
29
+ driver .find_element_by_id ('login_button' ).click () # 点击‘登录’
30
+ driver .switch_to .default_content ()
31
+
32
+ driver .implicitly_wait (10 )
33
+ time .sleep (5 )
34
+
35
+ try :
36
+ driver .find_element_by_id ('QM_OwnerInfo_Icon' )
37
+ return driver
38
+ except :
39
+ print ('不能访问' + business_qq )
40
+ return None
41
+
42
+
43
+
44
+ def get_photo (driver ):
45
+
46
+ # 照片下载路径
47
+ photo_path = "C:/Users/xxx/Desktop/photo/{}/{}.jpg"
48
+
49
+ # 相册索引
50
+ photoIndex = 1
51
+
52
+ while True :
53
+ # 回到主文档
54
+ driver .switch_to .default_content ()
55
+ # driver.switch_to.parent_frame()
56
+ # 点击头部的相册按钮
57
+ driver .find_element_by_xpath ('//*[@id="menuContainer"]/div/ul/li[3]/a' ).click ()
58
+ #等待加载
59
+ driver .implicitly_wait (10 )
60
+ time .sleep (3 )
61
+ # 切换 frame
62
+ driver .switch_to .frame ('app_canvas_frame' )
63
+ # 各个相册的超链接
64
+ a = driver .find_elements_by_class_name ('album-cover' )
65
+ # 单个相册
66
+ a [photoIndex ].click ()
67
+
68
+ driver .implicitly_wait (10 )
69
+ time .sleep (3 )
70
+ # 相册的第一张图
71
+ p = driver .find_elements_by_class_name ('item-cover' )[0 ]
72
+ p .click ()
73
+ time .sleep (3 )
74
+
75
+ # 相册大图在父frame,切换到父frame
76
+ driver .switch_to .parent_frame ()
77
+ # 循环相册中的照片
78
+ while True :
79
+ # 照片url地址和名称
80
+ img = driver .find_element_by_id ('js-img-disp' )
81
+ src = img .get_attribute ('src' ).replace ('&t=5' , '' )
82
+ name = driver .find_element_by_id ("js-photo-name" ).text
83
+
84
+ # 下载
85
+ urlretrieve (src , photo_path .format (qq , name ))
86
+
87
+ # 取下面的 当前照片张数/总照片数量
88
+ counts = driver .find_element_by_xpath ('//*[@id="js-ctn-infoBar"]/div/div[1]/span' ).text
89
+
90
+ counts = counts .split ('/' )
91
+ # 最后一张的时候退出照片浏览
92
+ if int (counts [0 ]) == int (counts [1 ]):
93
+ # 右上角的 X 按钮
94
+ driver .find_element_by_xpath ('//*[@id="js-viewer-main"]/div[1]/a' ).click ()
95
+ break
96
+ # 点击 下一张,网页加载慢,所以10次加载
97
+ for i in (1 , 10 ):
98
+ if driver .find_element_by_id ('js-btn-nextPhoto' ):
99
+ n = driver .find_element_by_id ('js-btn-nextPhoto' )
100
+ ActionChains (driver ).click (n ).perform ()
101
+ break
102
+ else :
103
+ time .sleep (5 )
104
+
105
+ # 相册数量比较,是否下载了全部的相册
106
+ photoIndex = photoIndex + 1
107
+ if len (a ) <= photoIndex :
108
+ break
109
+
110
+
111
+ def get_shuoshuo (driver ):
112
+
113
+ page = 1
114
+ while True :
115
+ # 下拉滚动条
116
+ for j in range (1 , 5 ):
117
+ driver .execute_script ("window.scrollBy(0,5000)" )
118
+ time .sleep (2 )
119
+
120
+ # 切换 frame
121
+ driver .switch_to .frame ('app_canvas_frame' )
122
+ # 构建 BeautifulSoup 对象
123
+ bs = BeautifulSoup (driver .page_source .encode ('GBK' , 'ignore' ).decode ('gbk' ))
124
+ # 找到页面上的所有说说
125
+ pres = bs .find_all ('pre' , class_ = 'content' )
126
+
127
+ for pre in pres :
128
+ shuoshuo = pre .text
129
+ tx = pre .parent .parent .find ('a' , class_ = "c_tx c_tx3 goDetail" )['title' ]
130
+ print (tx + ":" + shuoshuo )
131
+
132
+ # 页数判断
133
+ page = page + 1
134
+ maxPage = bs .find ('a' , title = '末页' ).text
135
+
136
+ if int (maxPage ) < page :
137
+ break
138
+
139
+ driver .find_element_by_link_text (u'下一页' ).click ()
140
+ # 回到主文档
141
+ driver .switch_to .default_content ()
142
+ # 等待页面加载
143
+ time .sleep (3 )
144
+
145
+
146
+ if __name__ == '__main__' :
147
+
148
+ driver = login ('11111111' , 'password' , '2222222' )
149
+ if driver :
150
+ get_shuoshuo (driver )
151
+ get_photo (driver )
0 commit comments