\$\begingroup\$
\$\endgroup\$
The script downloads images and videos from a user's profile page on Instagram. It works fine, but I'd like to know if there are ways to improve it. All ideas and suggestions are welcome.
import sys
import os
import requests
import urllib.request
import time
import json
from colorama import init, deinit
from termcolor import colored, cprint
import argparse
print_green = lambda x: cprint(x, 'green') #print out text in green
print_magenta = lambda x : cprint(x, 'magenta') #print out text in magenta
print_yellow = lambda x: cprint(x, 'yellow') #print out text in yellow
print_cyan = lambda x: cprint(x, 'cyan') #print out text in cyan
# download images
def image_downloader(edge, images_path):
display_url = edge['node']['display_url']
file_name = edge['node']['taken_at_timestamp']
download_path = images_path + '\\' + str(file_name) + '.jpg'
if not os.path.exists(download_path):
print_yellow('Downloading ' + str(file_name) + '.jpg...........')
urllib.request.urlretrieve(display_url, download_path)
print_green(str(file_name) + '.jpg Downloaded')
print('\n')
else:
print_green(str(file_name) + '.jpg has been downloaded before')
print('\n')
# download videos
def video_downloader(shortcode, videos_path):
r = requests.get('https://www.instagram.com/p/' + shortcode + '/?__a=1')
video_url = r.json()['graphql']['shortcode_media']['video_url']
file_name = r.json()['graphql']['shortcode_media']['taken_at_timestamp']
download_path = videos_path + '\\' + str(file_name) + '.mp4'
if not os.path.exists(download_path):
print_yellow('Downloading ' + str(file_name) + '.mp4...........')
urllib.request.urlretrieve(video_url, download_path)
print_green(str(file_name) + '.mp4 Downloaded')
print('\n')
else:
print_green(str(file_name) + '.mp4 has been downloaded before')
print('\n')
# download images and videos from posts containing more than one pictures or videos`
def sidecar_downloader(shortcode, images_path, videos_path):
r = requests.get('https://www.instagram.com/p/' + shortcode + '/?__a=1')
num = 1
for edge in r.json()['graphql']['shortcode_media']['edge_sidecar_to_children']['edges']:
is_video = edge['node']['is_video']
if is_video == False:
display_url = edge['node']['display_url']
file_name = r.json()['graphql']['shortcode_media']['taken_at_timestamp']
download_path = images_path + '\\' + str(file_name) + '_' + str(num) + '.jpg'
if not os.path.exists(download_path):
print_yellow('Downloading ' + str(file_name) + '_' + str(num) + '.jpg...........')
urllib.request.urlretrieve(display_url, download_path)
print_green(str(file_name) + '_' + str(num) + '.jpg Downloaded')
print('\n')
else:
print_green(str(file_name) + '_' + str(num) + '.jpg has been downloaded before')
print('\n')
else:
video_url = edge['node']['video_url']
file_name = r.json()['graphql']['shortcode_media']['taken_at_timestamp']
download_path = videos_path + '\\' + str(file_name) + '_' + str(num) + '.mp4'
if not os.path.exists(download_path):
print_yellow('Downloading ' + str(file_name) + '_' + str(num) + '.mp4...........')
urllib.request.urlretrieve(video_url, download_path)
print_green(str(file_name) + '_' + str(num) + '.mp4 Downloaded')
print('\n')
else:
print_green(str(file_name) + '_' + str(num) + '.mp4 has been downloaded before')
print('\n')
num += 1
def main(account_json_info, path):
init()
r = requests.get(account_json_info)
user_id = r.json()['graphql']['user']['id']
end_cursor = ''
next_page = True
is_video = False
images_path = path + '\\Images'
videos_path = path + '\\Videos'
if os.path.exists(path) == False:
os.makedirs(path)
if os.path.exists(images_path) == False:
os.makedirs(images_path)
if os.path.exists(videos_path) == False:
os.makedirs(videos_path)
print_magenta('User Folder Created!\n')
else:
print_magenta('User Folder Has Been Created Before!\n')
while next_page == True:
r = requests.get('https://www.instagram.com/graphql/query/',
params = {
'query_id': '17880160963012870',
'id': user_id,
'first': 12,
'after': end_cursor
}
)
graphql = r.json()['data']
for edge in graphql['user']['edge_owner_to_timeline_media']['edges']:
__typename = edge['node']['__typename']
if __typename == 'GraphImage':
image_downloader(edge, images_path)
elif __typename == 'GraphVideo':
shortcode = edge['node']['shortcode']
video_downloader(shortcode, videos_path)
elif __typename == 'GraphSidecar':
shortcode = edge['node']['shortcode']
sidecar_downloader(shortcode, images_path, videos_path)
end_cursor = graphql['user']['edge_owner_to_timeline_media']['page_info']['end_cursor']
next_page = graphql['user']['edge_owner_to_timeline_media']['page_info']['has_next_page']
time.sleep(10)
deinit()
if __name__ == '__main__':
print('\n\n')
init(autoreset = True)
print_cyan('Instagram Media Downloader'.center(os.get_terminal_size().columns, '-'))
deinit()
parser = argparse.ArgumentParser(description = 'Download Instagram Images and Videos from a User\'s Profile Page')
parser.add_argument('-u', '--user', dest = 'username', required = True, help = 'Username on Instagram')
parser.add_argument('-p', '--path', dest = 'path', required = True, help = 'Root path where downloaded Instagram Media is saved')
args = parser.parse_args()
account_json_info = 'https://www.instagram.com/' + args.username + '/?__a=1' #insert username into the link
args.path += '\\' + args.username #add username to the directory given
main(account_json_info, args.path)
AlexV
7,3532 gold badges24 silver badges47 bronze badges
asked Jul 10, 2019 at 19:05
1 Answer 1
\$\begingroup\$
\$\endgroup\$
1
- Organizing Imports: I ordered your import statements alphabetically. While this is not a required style point, it's a person preference that you can decide to follow if you want.
- Unused Imports: You had a few unused imports (
json
,sys
,termcolor
) - String Formatting: You concatenate multiple strings with variables everywhere in your code. You can format your string with
f"..."
, so you can directly implement variables in your strings, like so:download_path = f"{videos_path}\\{file_name}.mp4"
. - Variable Assignment/Parameter Spacing: There should only be one space before the
=
and after the=
when assigning variables. For default parameters, there should be no spaces. I'm not sure if this is a practice that's taught, but it's how I learned my python styling. - DRY: Don't Repeat Yourself! Your
print_...
lambda's do the exact same thing, with only one string being different. These four lambdas can be simplified to a simple method, with passing the text and color to print. I'm pretty suretermcolor
has a built-in methodcolored
that does the exact thing, but I'd have to double check. - Truth/False Comparisons: As an example,
if is_video == False
should be changed toif not is_video
. It does the exact same thing, without the verbose== False
, and utilizingnot
. You use both ways in your code, but you should really avoid== False
/== True
. - Constant Variable Names: Variables that are constants should be UPPERCASE.
- Docstrings: You had the right idea with regular comments describing what the method does. You should move these comments into a docstring inside the method, so any documentation can tell what your method is supposed to do.
Final Code
import argparse
import os
import requests
import time
import urllib.request
from colorama import init, deinit
from termcolor import cprint
def print_in_color(text, color):
""" Prints `text` in passed `color` """
cprint(text, color)
def image_downloader(edge, images_path):
""" Downloads images """
display_url = edge['node']['display_url']
file_name = edge['node']['taken_at_timestamp']
download_path = f"{images_path}\\{file_name}.jpg"
if not os.path.exists(download_path):
print_in_color(f"Downloading {str(file_name)}.jpg...........", "yellow")
urllib.request.urlretrieve(display_url, download_path)
print_in_color(f"{file_name}.jpg downloaded.\n", "green")
else:
print_in_color(f"{file_name}.jpg has been downloaded already.\n", "green")
def video_downloader(shortcode, videos_path):
""" Downloads videos """
videos = requests.get(f"https://www.instagram.com/p/{shortcode}/?__a=1")
video_url = videos.json()['graphql']['shortcode_media']['video_url']
file_name = videos.json()['graphql']['shortcode_media']['taken_at_timestamp']
download_path = f"{videos_path}\\{file_name}.mp4"
if not os.path.exists(download_path):
print_in_color(f"Downloading {file_name}.mp4...........", "yellow")
urllib.request.urlretrieve(video_url, download_path)
print_in_color(f"{file_name}.mp4 downloaded.\n", "green")
else:
print_in_color(f"{file_name}.mp4 has been downloaded already.\n", "green")
def sidecar_downloader(shortcode, images_path, videos_path):
""" Downloads images and videos from posts containing more than one pictures or videos """
r = requests.get(f"https://www.instagram.com/p/{shortcode}/?__a=1")
num = 1
for edge in r.json()['graphql']['shortcode_media']['edge_sidecar_to_children']['edges']:
is_video = edge['node']['is_video']
if not is_video:
display_url = edge['node']['display_url']
file_name = r.json()['graphql']['shortcode_media']['taken_at_timestamp']
download_path = f"{images_path}\\{file_name}_{num}.jpg"
if not os.path.exists(download_path):
print_in_color(f"Downloading {file_name}_{num}.jpg...........", "yellow")
urllib.request.urlretrieve(display_url, download_path)
print_in_color(f"{file_name}_{num}.jpg downloaded.\n", "green")
else:
print_in_color(f"{file_name}_{num}.jpg has been downloaded already.\n", "green")
else:
video_url = edge['node']['video_url']
file_name = r.json()['graphql']['shortcode_media']['taken_at_timestamp']
download_path = f"{videos_path}\\{file_name}_{num}.mp4"
if not os.path.exists(download_path):
print_in_color(f"Downloading {file_name}_{num}.mp4...........", "yellow")
urllib.request.urlretrieve(video_url, download_path)
print_in_color(f"{file_name}_{num}.mp4 downloaded.\n", "green")
else:
print_in_color(f"{file_name}_{num}.mp4 has been downloaded already.\n", "green")
num += 1
def main(account_json_info, path):
""" Runs methods that download photos/videos from the user """
init()
r = requests.get(account_json_info)
user_id = r.json()['graphql']['user']['id']
end_cursor = ''
next_page = True
images_path = f"{path}\\Images"
videos_path = f"{path}\\Videos"
if not os.path.exists(path):
os.makedirs(path)
if not os.path.exists(images_path):
os.makedirs(images_path)
if not os.path.exists(videos_path):
os.makedirs(videos_path)
print_in_color("User Folder Created!\n", "magenta")
else:
print_in_color("User Folder Has Been Created Already!\n", "magenta")
while next_page:
r = requests.get('https://www.instagram.com/graphql/query/',
params={
'query_id': '17880160963012870',
'id': user_id,
'first': 12,
'after': end_cursor
}
)
graphql = r.json()['data']
for edge in graphql['user']['edge_owner_to_timeline_media']['edges']:
__typename = edge['node']['__typename']
if __typename == 'GraphImage':
image_downloader(edge, images_path)
elif __typename == 'GraphVideo':
shortcode = edge['node']['shortcode']
video_downloader(shortcode, videos_path)
elif __typename == 'GraphSidecar':
shortcode = edge['node']['shortcode']
sidecar_downloader(shortcode, images_path, videos_path)
end_cursor = graphql['user']['edge_owner_to_timeline_media']['page_info']['end_cursor']
next_page = graphql['user']['edge_owner_to_timeline_media']['page_info']['has_next_page']
time.sleep(10)
deinit()
if __name__ == '__main__':
print('\n\n')
init(autoreset=True)
print_in_color('Instagram Media Downloader'.center(os.get_terminal_size().columns, '-'), "cyan")
deinit()
PARSER = argparse.ArgumentParser(description='Download Instagram Images and Videos from a User\'s Profile Page')
PARSER.add_argument('-u', '--user', dest='username', required=True, help='Username on Instagram')
PARSER.add_argument('-p', '--path', dest='path', required=True, help='Root path where downloaded Instagram Media is saved')
ARGS = PARSER.parse_args()
#Insert username into link
ACCOUNT_JSON_INFO = f"https://www.instagram.com/{ARGS.username}/?__a=1"
ARGS.path += f"\\{ARGS.username}"
main(ACCOUNT_JSON_INFO, ARGS.path)
answered Jul 25, 2019 at 6:22
-
\$\begingroup\$ Import standard in python is to import standard library packages/modules as one block first, then external packages/modules as another block, then personal packages/modules - each block separated by an empty line. \$\endgroup\$QuantumChris– QuantumChris2019年07月25日 12:27:12 +00:00Commented Jul 25, 2019 at 12:27
lang-py