Commit 7f26c65

codingforentrepreneurs codingforentrepreneurs

committed

Day 16 - Login, Scrape, Automate Comments & Likes with Selenium

1 parent 7a1e8c4 commit 7f26c65Copy full SHA for 7f26c65

File tree

5 files changed

+272

-0

lines changed

tutorial-reference/Day 16

5 files changed

+272

-0

lines changed

`‎tutorial-reference/Day 16/Pipfile‎`

Lines changed: 13 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,13 @@`
	`1`	`+[[source]]`
	`2`	`+name = "pypi"`
	`3`	`+url = "https://pypi.org/simple"`
	`4`	`+verify_ssl = true`
	`5`	`+`
	`6`	`+[dev-packages]`
	`7`	`+`
	`8`	`+[packages]`
	`9`	`+selenium = "*"`
	`10`	`+requests = "*"`
	`11`	`+`
	`12`	`+[requires]`
	`13`	`+python_version = "3.8"`

`‎tutorial-reference/Day 16/Pipfile.lock‎`

Lines changed: 65 additions & 0 deletions

Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

`‎tutorial-reference/Day 16/conf.py‎`

Lines changed: 18 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,18 @@`
	`1`	`+"""`
	`2`	`+INSTA_USERNAME=<set below>`
	`3`	`+INSTA_PASSWORD=<set below>`
	`4`	`+`
	`5`	`+`
	`6`	`+`
	`7`	`+`
	`8`	`+`
	`9`	`+`
	`10`	`+`
	`11`	`+`
	`12`	`+`
	`13`	`+`
	`14`	`+`
	`15`	`+`
	`16`	`+"""`
	`17`	`+INSTA_USERNAME = ''`
	`18`	`+INSTA_PASSWORD = ''`

`‎tutorial-reference/Day 16/google.py‎`

Lines changed: 31 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,31 @@`
	`1`	`+import time`
	`2`	`+from selenium import webdriver`
	`3`	`+`
	`4`	`+browser = webdriver.Chrome() # Firefox()`
	`5`	`+`
	`6`	`+url = 'https://google.com'`
	`7`	`+browser.get(url)`
	`8`	`+`
	`9`	`+"""`
	`10`	`+<input type='text' class='' id='' name='??' />`
	`11`	`+<textarea name='??'><textarea>`
	`12`	`+<input name="q" type="text">`
	`13`	`+"""`
	`14`	`+time.sleep(2)`
	`15`	`+name = 'q'`
	`16`	`+search_el = browser.find_element_by_name("q")`
	`17`	`+# print(search_el)`
	`18`	`+# search_el = browser.find_elements_by_css_selector("h1")`
	`19`	`+search_el.send_keys("selenium python")`
	`20`	`+`
	`21`	`+"""`
	`22`	`+<input type='submit' />`
	`23`	`+<button type='submit' />`
	`24`	`+<form></form>`
	`25`	`+`
	`26`	`+<input type="submit">`
	`27`	`+"""`
	`28`	`+submit_btn_el = browser.find_element_by_css_selector("input[type='submit']")`
	`29`	`+print(submit_btn_el.get_attribute('name'))`
	`30`	`+time.sleep(2)`
	`31`	`+submit_btn_el.click()`

`‎tutorial-reference/Day 16/insta.py‎`

Lines changed: 145 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,145 @@`
	`1`	`+# import getpass`
	`2`	`+# my_password = getpass.getpass("What is your password?\n")`
	`3`	`+# print(my_password)`
	`4`	`+from urllib.parse import urlparse`
	`5`	`+import os`
	`6`	`+import time`
	`7`	`+import requests`
	`8`	`+from conf import INSTA_USERNAME, INSTA_PASSWORD`
	`9`	`+from selenium import webdriver`
	`10`	`+`
	`11`	`+browser = webdriver.Chrome()`
	`12`	`+`
	`13`	`+url = "https://www.instagram.com"`
	`14`	`+browser.get(url)`
	`15`	`+`
	`16`	`+time.sleep(2)`
	`17`	`+username_el = browser.find_element_by_name("username")`
	`18`	`+username_el.send_keys(INSTA_USERNAME)`
	`19`	`+`
	`20`	`+password_el = browser.find_element_by_name("password")`
	`21`	`+password_el.send_keys(INSTA_PASSWORD)`
	`22`	`+`
	`23`	`+time.sleep(1.5)`
	`24`	`+submit_btn_el = browser.find_element_by_css_selector("button[type='submit']")`
	`25`	`+submit_btn_el.click()`
	`26`	`+`
	`27`	`+body_el = browser.find_element_by_css_selector("body")`
	`28`	`+html_text = body_el.get_attribute("innerHTML")`
	`29`	`+`
	`30`	`+# print(html_text)`
	`31`	`+`
	`32`	`+"""`
	`33`	`+<button class="_5f5mN jIbKX _6VtSN yZn4P">Follow</button>`
	`34`	`+"""`
	`35`	`+`
	`36`	`+# browser.find_elements_by_css_selector("button")`
	`37`	`+`
	`38`	`+# xpath`
	`39`	`+# my_button_xpath = "//button"`
	`40`	`+ #browser.find_elements_by_xpath(my_button_xpath)`
	`41`	`+`
	`42`	`+def click_to_follow(browser):`
	`43`	`+ # my_follow_btn_xpath = "//a[contains(text(), 'Follow')][not(contains(text(), 'Following'))]"`
	`44`	`+ # my_follow_btn_xpath = "//*[contains(text(), 'Follow')][not(contains(text(), 'Following'))]"`
	`45`	`+ my_follow_btn_xpath = "//button[contains(text(), 'Follow')][not(contains(text(), 'Following'))]"`
	`46`	`+ follow_btn_elments = browser.find_elements_by_xpath(my_follow_btn_xpath)`
	`47`	`+ for btn in follow_btn_elments:`
	`48`	`+ time.sleep(2) # self-throttle`
	`49`	`+ try:`
	`50`	`+ btn.click()`
	`51`	`+ except:`
	`52`	`+ pass`
	`53`	`+`
	`54`	`+# new_user_url = "https://www.instagram.com/ted/"`
	`55`	`+# browser.get(new_user_url)`
	`56`	`+# click_to_follow(browser)`
	`57`	`+`
	`58`	`+time.sleep(2)`
	`59`	`+the_rock_url = "https://www.instagram.com/therock/"`
	`60`	`+browser.get(the_rock_url)`
	`61`	`+`
	`62`	`+`
	`63`	`+post_url_pattern = "https://www.instagram.com/p/<post-slug-id>"`
	`64`	`+post_xpath_str = "//a[contains(@href, '/p/')]"`
	`65`	`+post_links = browser.find_elements_by_xpath(post_xpath_str)`
	`66`	`+post_link_el = None`
	`67`	`+`
	`68`	`+if len(post_links) > 0:`
	`69`	`+ post_link_el = post_links[0]`
	`70`	`+`
	`71`	`+if post_link_el != None:`
	`72`	`+ post_href = post_link_el.get_attribute("href")`
	`73`	`+ browser.get(post_href)`
	`74`	`+`
	`75`	`+video_els = browser.find_elements_by_xpath("//video")`
	`76`	`+images_els = browser.find_elements_by_xpath("//img")`
	`77`	`+base_dir = os.path.dirname(os.path.abspath(__file__))`
	`78`	`+data_dir = os.path.join(base_dir, "data")`
	`79`	`+os.makedirs(data_dir, exist_ok=True)`
	`80`	`+`
	`81`	`+# PIL to verify the size of any given image.`
	`82`	`+`
	`83`	`+def scrape_and_save(elements):`
	`84`	`+ for el in elements:`
	`85`	`+ # print(img.get_attribute('src'))`
	`86`	`+ url = el.get_attribute('src')`
	`87`	`+ base_url = urlparse(url).path`
	`88`	`+ filename = os.path.basename(base_url)`
	`89`	`+ filepath = os.path.join(data_dir, filename)`
	`90`	`+ if os.path.exists(filepath):`
	`91`	`+ continue`
	`92`	`+ with requests.get(url, stream=True) as r:`
	`93`	`+ try:`
	`94`	`+ r.raise_for_status()`
	`95`	`+ except:`
	`96`	`+ continue`
	`97`	`+ with open(filepath, 'wb') as f:`
	`98`	`+ for chunk in r.iter_content(chunk_size=8192):`
	`99`	`+ if chunk:`
	`100`	`+ f.write(chunk)`
	`101`	`+`
	`102`	`+"""`
	`103`	`+LONG TERM Goal:`
	`104`	`+Use machine learning to classify the post's`
	`105`	`+image or video`
	`106`	`+and then comment in a relevant fashion`
	`107`	`+"""`
	`108`	`+`
	`109`	`+"""`
	`110`	`+<textarea aria-label="Add a comment..." placeholder="Add a comment..." class="Ypffh" autocomplete="off" autocorrect="off" style="height: 18px;"></textarea>`
	`111`	`+"""`
	`112`	`+def automate_comment(browser, content="That is cool!"):`
	`113`	`+ time.sleep(3)`
	`114`	`+ comment_xpath_str = "//textarea[contains(@placeholder, 'Add a comment')]"`
	`115`	`+ comment_el = browser.find_element_by_xpath(comment_xpath_str)`
	`116`	`+ comment_el.send_keys(content)`
	`117`	`+ submit_btns_xpath = "button[type='submit']"`
	`118`	`+ submit_btns_els = browser.find_elements_by_css_selector(submit_btns_xpath)`
	`119`	`+ time.sleep(2)`
	`120`	`+ for btn in submit_btns_els:`
	`121`	`+ try:`
	`122`	`+ btn.click()`
	`123`	`+ except:`
	`124`	`+ pass`
	`125`	`+`
	`126`	`+`
	`127`	`+def automate_likes(browser):`
	`128`	`+ like_heart_svg_xpath = "//*[contains(@aria-label, 'Like')]"`
	`129`	`+ all_like_hearts_elements = browser.find_elements_by_xpath(like_heart_svg_xpath)`
	`130`	`+ max_heart_h = -1`
	`131`	`+ for heart_el in all_like_hearts_elements:`
	`132`	`+ h = heart_el.get_attribute("height")`
	`133`	`+ current_h = int(h)`
	`134`	`+ if current_h > max_heart_h:`
	`135`	`+ max_heart_h = current_h`
	`136`	`+ all_like_hearts_elements = browser.find_elements_by_xpath(like_heart_svg_xpath)`
	`137`	`+ for heart_el in all_like_hearts_elements:`
	`138`	`+ h = heart_el.get_attribute("height")`
	`139`	`+ if h == max_heart_h or h == f"{max_heart_h}":`
	`140`	`+ parent_button = heart_el.find_element_by_xpath('..')`
	`141`	`+ time.sleep(2)`
	`142`	`+ try:`
	`143`	`+ parent_button.click()`
	`144`	`+ except:`
	`145`	`+ pass`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit 7f26c65

File tree

5 files changed

5 files changed

`‎tutorial-reference/Day 16/Pipfile‎`

`‎tutorial-reference/Day 16/Pipfile.lock‎`

`‎tutorial-reference/Day 16/conf.py‎`

`‎tutorial-reference/Day 16/google.py‎`

`‎tutorial-reference/Day 16/insta.py‎`

0 commit comments