Commit 5a8afa9

committed

Update script.py and gitignore

This is essentially a finished script, but can be improved. Instead of urllib, I used wget to pull images from Reddit.

1 parent 83b29b4 commit 5a8afa9Copy full SHA for 5a8afa9

File tree

2 files changed

+75

-15

lines changed

Reddit Meme Scraper
- .gitignore
- script.py

2 files changed

+75

-15

lines changed

`‎Reddit Meme Scraper/.gitignore`

Lines changed: 8 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,9 @@`
`1`	`1`	`venv/`
`2`		`-.idea/`
	`2`	`+.idea/`
	`3`	`+*.csv`
	`4`	`+Test/`
	`5`	`+*.txt`
	`6`	`+scriptcopy.py`
	`7`	`+*.png`
	`8`	`+*.jpg`
	`9`	`+*.jpeg`

`‎Reddit Meme Scraper/script.py`

Lines changed: 67 additions & 14 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,16 +1,47 @@`
`1`	`1`	`import praw`
`2`		`-import PySimpleGUI as pg`
`3`		`-import urllib`
`4`		`-import pandas`
	`2`	`+import PySimpleGUI as sg`
	`3`	`+import wget`
	`4`	`+import pandasaspd`
`5`	`5`	`import datetime as dt`
`6`	`6`	`import os`
`7`	`7`
`8`		`-reddit = praw.Reddit(client_id = '',`
`9`		`- client_secret = '',`
`10`		`- user_agent = '')`
	`8`	`+destination_folder = sg.popup_get_folder('Choose where to download files:\n\n'`
	`9`	`+ 'NOTE: A folder to store the files will be created within the directory!',`
	`10`	`+ default_path='', title='Choose destination')`
	`11`	`+folder_lst = [destination_folder]`
	`12`	`+if folder_lst[0] is None:`
	`13`	`+ sg.Popup('Destination not specified!\nProgram terminated!', title='ERROR: No destination!',`
	`14`	`+ custom_text='Close', button_type=0)`
	`15`	`+ raise SystemExit()`
`11`	`16`
`12`		`-subreddit = reddit.subreddit('sbname+sbname+sbname')`
`13`		`-posts = subreddit.hot(limit=10)`
	`17`	`+`
	`18`	`+class RedditCred:`
	`19`	`+ def __init__(self):`
	`20`	`+ self.text_file = 'reddit_tokens.txt'`
	`21`	`+`
	`22`	`+# Functions made to read the reddit app id and secret from file`
	`23`	`+ def read_id(self):`
	`24`	`+ file = self.text_file`
	`25`	`+ with open(file, 'r') as f:`
	`26`	`+ lines = f.readlines()`
	`27`	`+ return lines[0].strip()`
	`28`	`+`
	`29`	`+ def read_secret(self):`
	`30`	`+ file = self.text_file`
	`31`	`+ with open(file, 'r') as f:`
	`32`	`+ lines = f.readlines()`
	`33`	`+ return lines[1].strip()`
	`34`	`+`
	`35`	`+`
	`36`	`+red_cred = RedditCred()`
	`37`	`+u_agent = 'Script that downloads memes from various subreddits'`
	`38`	`+`
	`39`	`+reddit = praw.Reddit(client_id=red_cred.read_id(),`
	`40`	`+ client_secret=red_cred.read_secret(),`
	`41`	`+ user_agent=u_agent)`
	`42`	`+`
	`43`	`+subreddit = reddit.subreddit('deepfriedmemes+surrealmemes+nukedmemes+bigbangedmemes+wackytictacs+bonehurtingjuice')`
	`44`	`+posts = subreddit.hot(limit=25)`
`14`	`45`
`15`	`46`	`# Empty lists to hold data`
`16`	`47`
`@@ -33,11 +64,33 @@`
`33`	`64`	`# This iterates through URLs, checks if it has the specified image extension and downloads the image`
`34`	`65`
`35`	`66`	`for index, url in enumerate(image_urls):`
`36`		`- images_path = os.getcwd()`
`37`		`- _, ext = os.path.splitext(url)`
`38`		`- if ext in image_extensions:`
	`67`	`+ path = str(folder_lst[0])`
	`68`	`+ file_ending = str(url)[2:-1]`
	`69`	`+ _, extension = os.path.splitext(file_ending)`
	`70`	`+ if extension in image_extensions:`
`39`	`71`	`try:`
`40`		`- print('Downloading ', image_urls[index], ' at', images_path + image_titles[index] + ext)`
`41`		`- urllib.urlretrieve(image_urls[index], images_path + image_titles[index] + ext)`
	`72`	`+ if os.path.exists(path + '/' + 'Downloaded Images'):`
	`73`	`+ pass`
	`74`	`+ else:`
	`75`	`+ os.mkdir(path + '/' + 'Downloaded Images')`
	`76`	`+`
	`77`	`+ destination = str(folder_lst[0]) + '/' + 'Downloaded Images' + '/'`
	`78`	`+ print(f"Downloading '{str(image_titles[index])[2:-1]}' to '{path}' from '{str(image_urls[index])[2:-1]}'")`
	`79`	`+ download = wget.download(str(image_urls[index])[2:-1], out=destination)`
`42`	`80`	`except:`
`43`		`- print('Something went wrong while downloading ', image_urls[index])`
	`81`	`+ print(f"Something went wrong while downloading '{str(image_urls[index])[2:-1]}'\n")`
	`82`	`+else:`
	`83`	`+ print("\nDownload complete!")`
	`84`	`+ sg.Popup(f"Files downloaded into:\n\n'{path}/Downloaded Images'", title='Download complete!')`
	`85`	`+`
	`86`	`+`
	`87`	`+# Optional saving of collected data to .csv file`
	`88`	`+`
	`89`	`+dataframe = pd.DataFrame({`
	`90`	`+ 'Title': image_titles,`
	`91`	`+ 'Score': image_scores,`
	`92`	`+ 'URL': image_urls,`
	`93`	`+ 'Timestamp': image_timestamps,`
	`94`	`+ 'ID': image_ids`
	`95`	`+})`
	`96`	`+csv = dataframe.to_csv('./images.csv', index=True, header=True)`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit 5a8afa9

File tree

2 files changed

2 files changed

`‎Reddit Meme Scraper/.gitignore`

`‎Reddit Meme Scraper/script.py`

0 commit comments