Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 7f26c65

Browse files
Day 16 - Login, Scrape, Automate Comments & Likes with Selenium
1 parent 7a1e8c4 commit 7f26c65

File tree

5 files changed

+272
-0
lines changed

5 files changed

+272
-0
lines changed

‎tutorial-reference/Day 16/Pipfile‎

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
[[source]]
2+
name = "pypi"
3+
url = "https://pypi.org/simple"
4+
verify_ssl = true
5+
6+
[dev-packages]
7+
8+
[packages]
9+
selenium = "*"
10+
requests = "*"
11+
12+
[requires]
13+
python_version = "3.8"

‎tutorial-reference/Day 16/Pipfile.lock‎

Lines changed: 65 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎tutorial-reference/Day 16/conf.py‎

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
"""
2+
INSTA_USERNAME=<set below>
3+
INSTA_PASSWORD=<set below>
4+
5+
6+
7+
8+
9+
10+
11+
12+
13+
14+
15+
16+
"""
17+
INSTA_USERNAME = ''
18+
INSTA_PASSWORD = ''

‎tutorial-reference/Day 16/google.py‎

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import time
2+
from selenium import webdriver
3+
4+
browser = webdriver.Chrome() # Firefox()
5+
6+
url = 'https://google.com'
7+
browser.get(url)
8+
9+
"""
10+
<input type='text' class='' id='' name='??' />
11+
<textarea name='??'><textarea>
12+
<input name="q" type="text">
13+
"""
14+
time.sleep(2)
15+
name = 'q'
16+
search_el = browser.find_element_by_name("q")
17+
# print(search_el)
18+
# search_el = browser.find_elements_by_css_selector("h1")
19+
search_el.send_keys("selenium python")
20+
21+
"""
22+
<input type='submit' />
23+
<button type='submit' />
24+
<form></form>
25+
26+
<input type="submit">
27+
"""
28+
submit_btn_el = browser.find_element_by_css_selector("input[type='submit']")
29+
print(submit_btn_el.get_attribute('name'))
30+
time.sleep(2)
31+
submit_btn_el.click()

‎tutorial-reference/Day 16/insta.py‎

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
# import getpass
2+
# my_password = getpass.getpass("What is your password?\n")
3+
# print(my_password)
4+
from urllib.parse import urlparse
5+
import os
6+
import time
7+
import requests
8+
from conf import INSTA_USERNAME, INSTA_PASSWORD
9+
from selenium import webdriver
10+
11+
browser = webdriver.Chrome()
12+
13+
url = "https://www.instagram.com"
14+
browser.get(url)
15+
16+
time.sleep(2)
17+
username_el = browser.find_element_by_name("username")
18+
username_el.send_keys(INSTA_USERNAME)
19+
20+
password_el = browser.find_element_by_name("password")
21+
password_el.send_keys(INSTA_PASSWORD)
22+
23+
time.sleep(1.5)
24+
submit_btn_el = browser.find_element_by_css_selector("button[type='submit']")
25+
submit_btn_el.click()
26+
27+
body_el = browser.find_element_by_css_selector("body")
28+
html_text = body_el.get_attribute("innerHTML")
29+
30+
# print(html_text)
31+
32+
"""
33+
<button class="_5f5mN jIbKX _6VtSN yZn4P">Follow</button>
34+
"""
35+
36+
# browser.find_elements_by_css_selector("button")
37+
38+
# xpath
39+
# my_button_xpath = "//button"
40+
#browser.find_elements_by_xpath(my_button_xpath)
41+
42+
def click_to_follow(browser):
43+
# my_follow_btn_xpath = "//a[contains(text(), 'Follow')][not(contains(text(), 'Following'))]"
44+
# my_follow_btn_xpath = "//*[contains(text(), 'Follow')][not(contains(text(), 'Following'))]"
45+
my_follow_btn_xpath = "//button[contains(text(), 'Follow')][not(contains(text(), 'Following'))]"
46+
follow_btn_elments = browser.find_elements_by_xpath(my_follow_btn_xpath)
47+
for btn in follow_btn_elments:
48+
time.sleep(2) # self-throttle
49+
try:
50+
btn.click()
51+
except:
52+
pass
53+
54+
# new_user_url = "https://www.instagram.com/ted/"
55+
# browser.get(new_user_url)
56+
# click_to_follow(browser)
57+
58+
time.sleep(2)
59+
the_rock_url = "https://www.instagram.com/therock/"
60+
browser.get(the_rock_url)
61+
62+
63+
post_url_pattern = "https://www.instagram.com/p/<post-slug-id>"
64+
post_xpath_str = "//a[contains(@href, '/p/')]"
65+
post_links = browser.find_elements_by_xpath(post_xpath_str)
66+
post_link_el = None
67+
68+
if len(post_links) > 0:
69+
post_link_el = post_links[0]
70+
71+
if post_link_el != None:
72+
post_href = post_link_el.get_attribute("href")
73+
browser.get(post_href)
74+
75+
video_els = browser.find_elements_by_xpath("//video")
76+
images_els = browser.find_elements_by_xpath("//img")
77+
base_dir = os.path.dirname(os.path.abspath(__file__))
78+
data_dir = os.path.join(base_dir, "data")
79+
os.makedirs(data_dir, exist_ok=True)
80+
81+
# PIL to verify the size of any given image.
82+
83+
def scrape_and_save(elements):
84+
for el in elements:
85+
# print(img.get_attribute('src'))
86+
url = el.get_attribute('src')
87+
base_url = urlparse(url).path
88+
filename = os.path.basename(base_url)
89+
filepath = os.path.join(data_dir, filename)
90+
if os.path.exists(filepath):
91+
continue
92+
with requests.get(url, stream=True) as r:
93+
try:
94+
r.raise_for_status()
95+
except:
96+
continue
97+
with open(filepath, 'wb') as f:
98+
for chunk in r.iter_content(chunk_size=8192):
99+
if chunk:
100+
f.write(chunk)
101+
102+
"""
103+
LONG TERM Goal:
104+
Use machine learning to classify the post's
105+
image or video
106+
and then comment in a relevant fashion
107+
"""
108+
109+
"""
110+
<textarea aria-label="Add a comment..." placeholder="Add a comment..." class="Ypffh" autocomplete="off" autocorrect="off" style="height: 18px;"></textarea>
111+
"""
112+
def automate_comment(browser, content="That is cool!"):
113+
time.sleep(3)
114+
comment_xpath_str = "//textarea[contains(@placeholder, 'Add a comment')]"
115+
comment_el = browser.find_element_by_xpath(comment_xpath_str)
116+
comment_el.send_keys(content)
117+
submit_btns_xpath = "button[type='submit']"
118+
submit_btns_els = browser.find_elements_by_css_selector(submit_btns_xpath)
119+
time.sleep(2)
120+
for btn in submit_btns_els:
121+
try:
122+
btn.click()
123+
except:
124+
pass
125+
126+
127+
def automate_likes(browser):
128+
like_heart_svg_xpath = "//*[contains(@aria-label, 'Like')]"
129+
all_like_hearts_elements = browser.find_elements_by_xpath(like_heart_svg_xpath)
130+
max_heart_h = -1
131+
for heart_el in all_like_hearts_elements:
132+
h = heart_el.get_attribute("height")
133+
current_h = int(h)
134+
if current_h > max_heart_h:
135+
max_heart_h = current_h
136+
all_like_hearts_elements = browser.find_elements_by_xpath(like_heart_svg_xpath)
137+
for heart_el in all_like_hearts_elements:
138+
h = heart_el.get_attribute("height")
139+
if h == max_heart_h or h == f"{max_heart_h}":
140+
parent_button = heart_el.find_element_by_xpath('..')
141+
time.sleep(2)
142+
try:
143+
parent_button.click()
144+
except:
145+
pass

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /