Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 5a8afa9

Browse files
committed
Update script.py and gitignore
This is essentially a finished script, but can be improved. Instead of urllib, I used wget to pull images from Reddit.
1 parent 83b29b4 commit 5a8afa9

File tree

2 files changed

+75
-15
lines changed

2 files changed

+75
-15
lines changed

‎Reddit Meme Scraper/.gitignore

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,9 @@
11
venv/
2-
.idea/
2+
.idea/
3+
*.csv
4+
Test/
5+
*.txt
6+
scriptcopy.py
7+
*.png
8+
*.jpg
9+
*.jpeg

‎Reddit Meme Scraper/script.py

Lines changed: 67 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,47 @@
11
import praw
2-
import PySimpleGUI as pg
3-
import urllib
4-
import pandas
2+
import PySimpleGUI as sg
3+
import wget
4+
import pandasaspd
55
import datetime as dt
66
import os
77

8-
reddit = praw.Reddit(client_id = '',
9-
client_secret = '',
10-
user_agent = '')
8+
destination_folder = sg.popup_get_folder('Choose where to download files:\n\n'
9+
'NOTE: A folder to store the files will be created within the directory!',
10+
default_path='', title='Choose destination')
11+
folder_lst = [destination_folder]
12+
if folder_lst[0] is None:
13+
sg.Popup('Destination not specified!\nProgram terminated!', title='ERROR: No destination!',
14+
custom_text='Close', button_type=0)
15+
raise SystemExit()
1116

12-
subreddit = reddit.subreddit('sbname+sbname+sbname')
13-
posts = subreddit.hot(limit=10)
17+
18+
class RedditCred:
19+
def __init__(self):
20+
self.text_file = 'reddit_tokens.txt'
21+
22+
# Functions made to read the reddit app id and secret from file
23+
def read_id(self):
24+
file = self.text_file
25+
with open(file, 'r') as f:
26+
lines = f.readlines()
27+
return lines[0].strip()
28+
29+
def read_secret(self):
30+
file = self.text_file
31+
with open(file, 'r') as f:
32+
lines = f.readlines()
33+
return lines[1].strip()
34+
35+
36+
red_cred = RedditCred()
37+
u_agent = 'Script that downloads memes from various subreddits'
38+
39+
reddit = praw.Reddit(client_id=red_cred.read_id(),
40+
client_secret=red_cred.read_secret(),
41+
user_agent=u_agent)
42+
43+
subreddit = reddit.subreddit('deepfriedmemes+surrealmemes+nukedmemes+bigbangedmemes+wackytictacs+bonehurtingjuice')
44+
posts = subreddit.hot(limit=25)
1445

1546
# Empty lists to hold data
1647

@@ -33,11 +64,33 @@
3364
# This iterates through URLs, checks if it has the specified image extension and downloads the image
3465

3566
for index, url in enumerate(image_urls):
36-
images_path = os.getcwd()
37-
_, ext = os.path.splitext(url)
38-
if ext in image_extensions:
67+
path = str(folder_lst[0])
68+
file_ending = str(url)[2:-1]
69+
_, extension = os.path.splitext(file_ending)
70+
if extension in image_extensions:
3971
try:
40-
print('Downloading ', image_urls[index], ' at', images_path + image_titles[index] + ext)
41-
urllib.urlretrieve(image_urls[index], images_path + image_titles[index] + ext)
72+
if os.path.exists(path + '/' + 'Downloaded Images'):
73+
pass
74+
else:
75+
os.mkdir(path + '/' + 'Downloaded Images')
76+
77+
destination = str(folder_lst[0]) + '/' + 'Downloaded Images' + '/'
78+
print(f"Downloading '{str(image_titles[index])[2:-1]}' to '{path}' from '{str(image_urls[index])[2:-1]}'")
79+
download = wget.download(str(image_urls[index])[2:-1], out=destination)
4280
except:
43-
print('Something went wrong while downloading ', image_urls[index])
81+
print(f"Something went wrong while downloading '{str(image_urls[index])[2:-1]}'\n")
82+
else:
83+
print("\nDownload complete!")
84+
sg.Popup(f"Files downloaded into:\n\n'{path}/Downloaded Images'", title='Download complete!')
85+
86+
87+
# Optional saving of collected data to .csv file
88+
89+
dataframe = pd.DataFrame({
90+
'Title': image_titles,
91+
'Score': image_scores,
92+
'URL': image_urls,
93+
'Timestamp': image_timestamps,
94+
'ID': image_ids
95+
})
96+
csv = dataframe.to_csv('./images.csv', index=True, header=True)

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /