Commit 0800f21

committed

no message

1 parent 8837e2d commit 0800f21Copy full SHA for 0800f21

File tree

1 file changed

+151

-0

lines changed

moumoubaimifan/qqzone
- qqzone.py

1 file changed

+151

-0

lines changed

`‎moumoubaimifan/qqzone/qqzone.py‎`

Lines changed: 151 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,151 @@`
	`1`	`+# coding=utf-8`
	`2`	`+from urllib.request import urlretrieve`
	`3`	`+`
	`4`	`+from selenium import webdriver`
	`5`	`+from bs4 import BeautifulSoup`
	`6`	`+import time`
	`7`	`+`
	`8`	`+from selenium.webdriver import ActionChains`
	`9`	`+`
	`10`	`+def login(login_qq,password, business_qq):`
	`11`	`+ '''`
	`12`	`+ 登陆`
	`13`	`+ :param login_qq: 登陆用的QQ`
	`14`	`+ :param password: 登陆的QQ密码`
	`15`	`+ :param business_qq: 业务QQ`
	`16`	`+ :return: driver`
	`17`	`+ '''`
	`18`	`+ driver = webdriver.Chrome()`
	`19`	`+`
	`20`	`+ driver.get('https://user.qzone.qq.com/{}/311'.format(business_qq)) # URL`
	`21`	`+ driver.implicitly_wait(10) # 隐示等待,为了等待充分加载好网址`
	`22`	`+ driver.find_element_by_id('login_div')`
	`23`	`+ driver.switch_to.frame('login_frame') # 切到输入账号密码的frame`
	`24`	`+ driver.find_element_by_id('switcher_plogin').click() ##点击‘账号密码登录’`
	`25`	`+ driver.find_element_by_id('u').clear() ##清空账号栏`
	`26`	`+ driver.find_element_by_id('u').send_keys(login_qq) # 输入账号`
	`27`	`+ driver.find_element_by_id('p').clear() # 清空密码栏`
	`28`	`+ driver.find_element_by_id('p').send_keys(password) # 输入密码`
	`29`	`+ driver.find_element_by_id('login_button').click() # 点击‘登录’`
	`30`	`+ driver.switch_to.default_content()`
	`31`	`+`
	`32`	`+ driver.implicitly_wait(10)`
	`33`	`+ time.sleep(5)`
	`34`	`+`
	`35`	`+ try:`
	`36`	`+ driver.find_element_by_id('QM_OwnerInfo_Icon')`
	`37`	`+ return driver`
	`38`	`+ except:`
	`39`	`+ print('不能访问' + business_qq)`
	`40`	`+ return None`
	`41`	`+`
	`42`	`+`
	`43`	`+`
	`44`	`+def get_photo(driver):`
	`45`	`+`
	`46`	`+ # 照片下载路径`
	`47`	`+ photo_path = "C:/Users/xxx/Desktop/photo/{}/{}.jpg"`
	`48`	`+`
	`49`	`+ # 相册索引`
	`50`	`+ photoIndex = 1`
	`51`	`+`
	`52`	`+ while True:`
	`53`	`+ # 回到主文档`
	`54`	`+ driver.switch_to.default_content()`
	`55`	`+ # driver.switch_to.parent_frame()`
	`56`	`+ # 点击头部的相册按钮`
	`57`	`+ driver.find_element_by_xpath('//*[@id="menuContainer"]/div/ul/li[3]/a').click()`
	`58`	`+ #等待加载`
	`59`	`+ driver.implicitly_wait(10)`
	`60`	`+ time.sleep(3)`
	`61`	`+ # 切换 frame`
	`62`	`+ driver.switch_to.frame('app_canvas_frame')`
	`63`	`+ # 各个相册的超链接`
	`64`	`+ a = driver.find_elements_by_class_name('album-cover')`
	`65`	`+ # 单个相册`
	`66`	`+ a[photoIndex].click()`
	`67`	`+`
	`68`	`+ driver.implicitly_wait(10)`
	`69`	`+ time.sleep(3)`
	`70`	`+ # 相册的第一张图`
	`71`	`+ p = driver.find_elements_by_class_name('item-cover')[0]`
	`72`	`+ p.click()`
	`73`	`+ time.sleep(3)`
	`74`	`+`
	`75`	`+ # 相册大图在父frame,切换到父frame`
	`76`	`+ driver.switch_to.parent_frame()`
	`77`	`+ # 循环相册中的照片`
	`78`	`+ while True:`
	`79`	`+ # 照片url地址和名称`
	`80`	`+ img = driver.find_element_by_id('js-img-disp')`
	`81`	`+ src = img.get_attribute('src').replace('&t=5', '')`
	`82`	`+ name = driver.find_element_by_id("js-photo-name").text`
	`83`	`+`
	`84`	`+ # 下载`
	`85`	`+ urlretrieve(src, photo_path.format(qq, name))`
	`86`	`+`
	`87`	`+ # 取下面的当前照片张数/总照片数量`
	`88`	`+ counts = driver.find_element_by_xpath('//*[@id="js-ctn-infoBar"]/div/div[1]/span').text`
	`89`	`+`
	`90`	`+ counts = counts.split('/')`
	`91`	`+ # 最后一张的时候退出照片浏览`
	`92`	`+ if int(counts[0]) == int(counts[1]):`
	`93`	`+ # 右上角的 X 按钮`
	`94`	`+ driver.find_element_by_xpath('//*[@id="js-viewer-main"]/div[1]/a').click()`
	`95`	`+ break`
	`96`	`+ # 点击下一张,网页加载慢,所以10次加载`
	`97`	`+ for i in (1, 10):`
	`98`	`+ if driver.find_element_by_id('js-btn-nextPhoto'):`
	`99`	`+ n = driver.find_element_by_id('js-btn-nextPhoto')`
	`100`	`+ ActionChains(driver).click(n).perform()`
	`101`	`+ break`
	`102`	`+ else:`
	`103`	`+ time.sleep(5)`
	`104`	`+`
	`105`	`+ # 相册数量比较,是否下载了全部的相册`
	`106`	`+ photoIndex = photoIndex + 1`
	`107`	`+ if len(a) <= photoIndex:`
	`108`	`+ break`
	`109`	`+`
	`110`	`+`
	`111`	`+def get_shuoshuo(driver):`
	`112`	`+`
	`113`	`+ page = 1`
	`114`	`+ while True:`
	`115`	`+ # 下拉滚动条`
	`116`	`+ for j in range(1, 5):`
	`117`	`+ driver.execute_script("window.scrollBy(0,5000)")`
	`118`	`+ time.sleep(2)`
	`119`	`+`
	`120`	`+ # 切换 frame`
	`121`	`+ driver.switch_to.frame('app_canvas_frame')`
	`122`	`+ # 构建 BeautifulSoup 对象`
	`123`	`+ bs = BeautifulSoup(driver.page_source.encode('GBK', 'ignore').decode('gbk'))`
	`124`	`+ # 找到页面上的所有说说`
	`125`	`+ pres = bs.find_all('pre', class_='content')`
	`126`	`+`
	`127`	`+ for pre in pres:`
	`128`	`+ shuoshuo = pre.text`
	`129`	`+ tx = pre.parent.parent.find('a', class_="c_tx c_tx3 goDetail")['title']`
	`130`	`+ print(tx + ":" + shuoshuo)`
	`131`	`+`
	`132`	`+ # 页数判断`
	`133`	`+ page = page + 1`
	`134`	`+ maxPage = bs.find('a', title='末页').text`
	`135`	`+`
	`136`	`+ if int(maxPage) < page:`
	`137`	`+ break`
	`138`	`+`
	`139`	`+ driver.find_element_by_link_text(u'下一页').click()`
	`140`	`+ # 回到主文档`
	`141`	`+ driver.switch_to.default_content()`
	`142`	`+ # 等待页面加载`
	`143`	`+ time.sleep(3)`
	`144`	`+`
	`145`	`+`
	`146`	`+if __name__ == '__main__':`
	`147`	`+`
	`148`	`+ driver = login('11111111', 'password', '2222222')`
	`149`	`+ if driver:`
	`150`	`+ get_shuoshuo(driver)`
	`151`	`+ get_photo(driver)`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit 0800f21

File tree

1 file changed

1 file changed

`‎moumoubaimifan/qqzone/qqzone.py‎`

0 commit comments