Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 0800f21

Browse files
committed
no message
1 parent 8837e2d commit 0800f21

File tree

1 file changed

+151
-0
lines changed

1 file changed

+151
-0
lines changed

‎moumoubaimifan/qqzone/qqzone.py‎

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
# coding=utf-8
2+
from urllib.request import urlretrieve
3+
4+
from selenium import webdriver
5+
from bs4 import BeautifulSoup
6+
import time
7+
8+
from selenium.webdriver import ActionChains
9+
10+
def login(login_qq,password, business_qq):
11+
'''
12+
登陆
13+
:param login_qq: 登陆用的QQ
14+
:param password: 登陆的QQ密码
15+
:param business_qq: 业务QQ
16+
:return: driver
17+
'''
18+
driver = webdriver.Chrome()
19+
20+
driver.get('https://user.qzone.qq.com/{}/311'.format(business_qq)) # URL
21+
driver.implicitly_wait(10) # 隐示等待,为了等待充分加载好网址
22+
driver.find_element_by_id('login_div')
23+
driver.switch_to.frame('login_frame') # 切到输入账号密码的frame
24+
driver.find_element_by_id('switcher_plogin').click() ##点击‘账号密码登录’
25+
driver.find_element_by_id('u').clear() ##清空账号栏
26+
driver.find_element_by_id('u').send_keys(login_qq) # 输入账号
27+
driver.find_element_by_id('p').clear() # 清空密码栏
28+
driver.find_element_by_id('p').send_keys(password) # 输入密码
29+
driver.find_element_by_id('login_button').click() # 点击‘登录’
30+
driver.switch_to.default_content()
31+
32+
driver.implicitly_wait(10)
33+
time.sleep(5)
34+
35+
try:
36+
driver.find_element_by_id('QM_OwnerInfo_Icon')
37+
return driver
38+
except:
39+
print('不能访问' + business_qq)
40+
return None
41+
42+
43+
44+
def get_photo(driver):
45+
46+
# 照片下载路径
47+
photo_path = "C:/Users/xxx/Desktop/photo/{}/{}.jpg"
48+
49+
# 相册索引
50+
photoIndex = 1
51+
52+
while True:
53+
# 回到主文档
54+
driver.switch_to.default_content()
55+
# driver.switch_to.parent_frame()
56+
# 点击头部的相册按钮
57+
driver.find_element_by_xpath('//*[@id="menuContainer"]/div/ul/li[3]/a').click()
58+
#等待加载
59+
driver.implicitly_wait(10)
60+
time.sleep(3)
61+
# 切换 frame
62+
driver.switch_to.frame('app_canvas_frame')
63+
# 各个相册的超链接
64+
a = driver.find_elements_by_class_name('album-cover')
65+
# 单个相册
66+
a[photoIndex].click()
67+
68+
driver.implicitly_wait(10)
69+
time.sleep(3)
70+
# 相册的第一张图
71+
p = driver.find_elements_by_class_name('item-cover')[0]
72+
p.click()
73+
time.sleep(3)
74+
75+
# 相册大图在父frame,切换到父frame
76+
driver.switch_to.parent_frame()
77+
# 循环相册中的照片
78+
while True:
79+
# 照片url地址和名称
80+
img = driver.find_element_by_id('js-img-disp')
81+
src = img.get_attribute('src').replace('&t=5', '')
82+
name = driver.find_element_by_id("js-photo-name").text
83+
84+
# 下载
85+
urlretrieve(src, photo_path.format(qq, name))
86+
87+
# 取下面的 当前照片张数/总照片数量
88+
counts = driver.find_element_by_xpath('//*[@id="js-ctn-infoBar"]/div/div[1]/span').text
89+
90+
counts = counts.split('/')
91+
# 最后一张的时候退出照片浏览
92+
if int(counts[0]) == int(counts[1]):
93+
# 右上角的 X 按钮
94+
driver.find_element_by_xpath('//*[@id="js-viewer-main"]/div[1]/a').click()
95+
break
96+
# 点击 下一张,网页加载慢,所以10次加载
97+
for i in (1, 10):
98+
if driver.find_element_by_id('js-btn-nextPhoto'):
99+
n = driver.find_element_by_id('js-btn-nextPhoto')
100+
ActionChains(driver).click(n).perform()
101+
break
102+
else:
103+
time.sleep(5)
104+
105+
# 相册数量比较,是否下载了全部的相册
106+
photoIndex = photoIndex + 1
107+
if len(a) <= photoIndex:
108+
break
109+
110+
111+
def get_shuoshuo(driver):
112+
113+
page = 1
114+
while True:
115+
# 下拉滚动条
116+
for j in range(1, 5):
117+
driver.execute_script("window.scrollBy(0,5000)")
118+
time.sleep(2)
119+
120+
# 切换 frame
121+
driver.switch_to.frame('app_canvas_frame')
122+
# 构建 BeautifulSoup 对象
123+
bs = BeautifulSoup(driver.page_source.encode('GBK', 'ignore').decode('gbk'))
124+
# 找到页面上的所有说说
125+
pres = bs.find_all('pre', class_='content')
126+
127+
for pre in pres:
128+
shuoshuo = pre.text
129+
tx = pre.parent.parent.find('a', class_="c_tx c_tx3 goDetail")['title']
130+
print(tx + ":" + shuoshuo)
131+
132+
# 页数判断
133+
page = page + 1
134+
maxPage = bs.find('a', title='末页').text
135+
136+
if int(maxPage) < page:
137+
break
138+
139+
driver.find_element_by_link_text(u'下一页').click()
140+
# 回到主文档
141+
driver.switch_to.default_content()
142+
# 等待页面加载
143+
time.sleep(3)
144+
145+
146+
if __name__ == '__main__':
147+
148+
driver = login('11111111', 'password', '2222222')
149+
if driver:
150+
get_shuoshuo(driver)
151+
get_photo(driver)

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /