Python Script to download images and videos from a user's profile on Instagram

Question 1

The script downloads images and videos from a user's profile page on Instagram. It works fine, but I'd like to know if there are ways to improve it. All ideas and suggestions are welcome.

import sys
import os
import requests
import urllib.request
import time
import json
from colorama import init, deinit
from termcolor import colored, cprint
import argparse
print_green = lambda x: cprint(x, 'green') #print out text in green 
print_magenta = lambda x : cprint(x, 'magenta') #print out text in magenta
print_yellow = lambda x: cprint(x, 'yellow') #print out text in yellow
print_cyan = lambda x: cprint(x, 'cyan') #print out text in cyan
# download images
def image_downloader(edge, images_path):
 display_url = edge['node']['display_url']
 file_name = edge['node']['taken_at_timestamp']
 download_path = images_path + '\\' + str(file_name) + '.jpg'
 if not os.path.exists(download_path):
 print_yellow('Downloading ' + str(file_name) + '.jpg...........')
 urllib.request.urlretrieve(display_url, download_path)
 print_green(str(file_name) + '.jpg Downloaded')
 print('\n')
 else:
 print_green(str(file_name) + '.jpg has been downloaded before')
 print('\n')
# download videos 
def video_downloader(shortcode, videos_path):
 r = requests.get('https://www.instagram.com/p/' + shortcode + '/?__a=1')
 video_url = r.json()['graphql']['shortcode_media']['video_url']
 file_name = r.json()['graphql']['shortcode_media']['taken_at_timestamp']
 download_path = videos_path + '\\' + str(file_name) + '.mp4'
 if not os.path.exists(download_path):
 print_yellow('Downloading ' + str(file_name) + '.mp4...........')
 urllib.request.urlretrieve(video_url, download_path)
 print_green(str(file_name) + '.mp4 Downloaded')
 print('\n')
 else:
 print_green(str(file_name) + '.mp4 has been downloaded before')
 print('\n')
# download images and videos from posts containing more than one pictures or videos`
def sidecar_downloader(shortcode, images_path, videos_path):
 r = requests.get('https://www.instagram.com/p/' + shortcode + '/?__a=1')
 num = 1
 for edge in r.json()['graphql']['shortcode_media']['edge_sidecar_to_children']['edges']:
 is_video = edge['node']['is_video']
 if is_video == False:
 display_url = edge['node']['display_url']
 file_name = r.json()['graphql']['shortcode_media']['taken_at_timestamp']
 download_path = images_path + '\\' + str(file_name) + '_' + str(num) + '.jpg'
 if not os.path.exists(download_path):
 print_yellow('Downloading ' + str(file_name) + '_' + str(num) + '.jpg...........')
 urllib.request.urlretrieve(display_url, download_path)
 print_green(str(file_name) + '_' + str(num) + '.jpg Downloaded')
 print('\n')
 else:
 print_green(str(file_name) + '_' + str(num) + '.jpg has been downloaded before')
 print('\n')
 else:
 video_url = edge['node']['video_url']
 file_name = r.json()['graphql']['shortcode_media']['taken_at_timestamp']
 download_path = videos_path + '\\' + str(file_name) + '_' + str(num) + '.mp4'
 if not os.path.exists(download_path):
 print_yellow('Downloading ' + str(file_name) + '_' + str(num) + '.mp4...........')
 urllib.request.urlretrieve(video_url, download_path)
 print_green(str(file_name) + '_' + str(num) + '.mp4 Downloaded')
 print('\n')
 else:
 print_green(str(file_name) + '_' + str(num) + '.mp4 has been downloaded before')
 print('\n')
 num += 1
def main(account_json_info, path):
 init()
 r = requests.get(account_json_info) 
 user_id = r.json()['graphql']['user']['id']
 end_cursor = ''
 next_page = True
 is_video = False
 images_path = path + '\\Images'
 videos_path = path + '\\Videos'
 if os.path.exists(path) == False:
 os.makedirs(path)
 if os.path.exists(images_path) == False:
 os.makedirs(images_path)
 if os.path.exists(videos_path) == False:
 os.makedirs(videos_path)
 print_magenta('User Folder Created!\n')
 else:
 print_magenta('User Folder Has Been Created Before!\n')
 while next_page == True:
 r = requests.get('https://www.instagram.com/graphql/query/', 
 params = {
 'query_id': '17880160963012870', 
 'id': user_id,
 'first': 12, 
 'after': end_cursor
 }
 )
 graphql = r.json()['data']
 for edge in graphql['user']['edge_owner_to_timeline_media']['edges']:
 __typename = edge['node']['__typename']
 if __typename == 'GraphImage':
 image_downloader(edge, images_path)
 elif __typename == 'GraphVideo':
 shortcode = edge['node']['shortcode']
 video_downloader(shortcode, videos_path)
 elif __typename == 'GraphSidecar':
 shortcode = edge['node']['shortcode']
 sidecar_downloader(shortcode, images_path, videos_path)
 end_cursor = graphql['user']['edge_owner_to_timeline_media']['page_info']['end_cursor']
 next_page = graphql['user']['edge_owner_to_timeline_media']['page_info']['has_next_page']
 time.sleep(10)
 deinit()
if __name__ == '__main__':
 print('\n\n')
 init(autoreset = True)
 print_cyan('Instagram Media Downloader'.center(os.get_terminal_size().columns, '-'))
 deinit()
 parser = argparse.ArgumentParser(description = 'Download Instagram Images and Videos from a User\'s Profile Page')
 parser.add_argument('-u', '--user', dest = 'username', required = True, help = 'Username on Instagram')
 parser.add_argument('-p', '--path', dest = 'path', required = True, help = 'Root path where downloaded Instagram Media is saved')
 args = parser.parse_args()
 account_json_info = 'https://www.instagram.com/' + args.username + '/?__a=1' #insert username into the link
 args.path += '\\' + args.username #add username to the directory given
 main(account_json_info, args.path)

Question 2

Organizing Imports: I ordered your import statements alphabetically. While this is not a required style point, it's a person preference that you can decide to follow if you want.
Unused Imports: You had a few unused imports (json, sys, termcolor)
String Formatting: You concatenate multiple strings with variables everywhere in your code. You can format your string with f"...", so you can directly implement variables in your strings, like so: download_path = f"{videos_path}\\{file_name}.mp4".
Variable Assignment/Parameter Spacing: There should only be one space before the = and after the = when assigning variables. For default parameters, there should be no spaces. I'm not sure if this is a practice that's taught, but it's how I learned my python styling.
DRY: Don't Repeat Yourself! Your print_... lambda's do the exact same thing, with only one string being different. These four lambdas can be simplified to a simple method, with passing the text and color to print. I'm pretty sure termcolor has a built-in method colored that does the exact thing, but I'd have to double check.
Truth/False Comparisons: As an example, if is_video == False should be changed to if not is_video. It does the exact same thing, without the verbose == False, and utilizing not. You use both ways in your code, but you should really avoid == False/== True.
Constant Variable Names: Variables that are constants should be UPPERCASE.
Docstrings: You had the right idea with regular comments describing what the method does. You should move these comments into a docstring inside the method, so any documentation can tell what your method is supposed to do.

Final Code

import argparse
import os
import requests
import time
import urllib.request
from colorama import init, deinit
from termcolor import cprint
def print_in_color(text, color):
 """ Prints `text` in passed `color` """
 cprint(text, color)
def image_downloader(edge, images_path):
 """ Downloads images """
 display_url = edge['node']['display_url']
 file_name = edge['node']['taken_at_timestamp']
 download_path = f"{images_path}\\{file_name}.jpg"
 if not os.path.exists(download_path):
 print_in_color(f"Downloading {str(file_name)}.jpg...........", "yellow")
 urllib.request.urlretrieve(display_url, download_path)
 print_in_color(f"{file_name}.jpg downloaded.\n", "green")
 else:
 print_in_color(f"{file_name}.jpg has been downloaded already.\n", "green")
def video_downloader(shortcode, videos_path):
 """ Downloads videos """
 videos = requests.get(f"https://www.instagram.com/p/{shortcode}/?__a=1")
 video_url = videos.json()['graphql']['shortcode_media']['video_url']
 file_name = videos.json()['graphql']['shortcode_media']['taken_at_timestamp']
 download_path = f"{videos_path}\\{file_name}.mp4"
 if not os.path.exists(download_path):
 print_in_color(f"Downloading {file_name}.mp4...........", "yellow")
 urllib.request.urlretrieve(video_url, download_path)
 print_in_color(f"{file_name}.mp4 downloaded.\n", "green")
 else:
 print_in_color(f"{file_name}.mp4 has been downloaded already.\n", "green")
def sidecar_downloader(shortcode, images_path, videos_path):
 """ Downloads images and videos from posts containing more than one pictures or videos """
 r = requests.get(f"https://www.instagram.com/p/{shortcode}/?__a=1")
 num = 1
 for edge in r.json()['graphql']['shortcode_media']['edge_sidecar_to_children']['edges']:
 is_video = edge['node']['is_video']
 if not is_video:
 display_url = edge['node']['display_url']
 file_name = r.json()['graphql']['shortcode_media']['taken_at_timestamp']
 download_path = f"{images_path}\\{file_name}_{num}.jpg"
 if not os.path.exists(download_path):
 print_in_color(f"Downloading {file_name}_{num}.jpg...........", "yellow")
 urllib.request.urlretrieve(display_url, download_path)
 print_in_color(f"{file_name}_{num}.jpg downloaded.\n", "green")
 else:
 print_in_color(f"{file_name}_{num}.jpg has been downloaded already.\n", "green")
 else:
 video_url = edge['node']['video_url']
 file_name = r.json()['graphql']['shortcode_media']['taken_at_timestamp']
 download_path = f"{videos_path}\\{file_name}_{num}.mp4"
 if not os.path.exists(download_path):
 print_in_color(f"Downloading {file_name}_{num}.mp4...........", "yellow")
 urllib.request.urlretrieve(video_url, download_path)
 print_in_color(f"{file_name}_{num}.mp4 downloaded.\n", "green")
 else:
 print_in_color(f"{file_name}_{num}.mp4 has been downloaded already.\n", "green")
 num += 1
def main(account_json_info, path):
 """ Runs methods that download photos/videos from the user """
 init()
 r = requests.get(account_json_info)
 user_id = r.json()['graphql']['user']['id']
 end_cursor = ''
 next_page = True
 images_path = f"{path}\\Images"
 videos_path = f"{path}\\Videos"
 if not os.path.exists(path):
 os.makedirs(path)
 if not os.path.exists(images_path):
 os.makedirs(images_path)
 if not os.path.exists(videos_path):
 os.makedirs(videos_path)
 print_in_color("User Folder Created!\n", "magenta")
 else:
 print_in_color("User Folder Has Been Created Already!\n", "magenta")
 while next_page:
 r = requests.get('https://www.instagram.com/graphql/query/',
 params={
 'query_id': '17880160963012870',
 'id': user_id,
 'first': 12,
 'after': end_cursor
 }
 )
 graphql = r.json()['data']
 for edge in graphql['user']['edge_owner_to_timeline_media']['edges']:
 __typename = edge['node']['__typename']
 if __typename == 'GraphImage':
 image_downloader(edge, images_path)
 elif __typename == 'GraphVideo':
 shortcode = edge['node']['shortcode']
 video_downloader(shortcode, videos_path)
 elif __typename == 'GraphSidecar':
 shortcode = edge['node']['shortcode']
 sidecar_downloader(shortcode, images_path, videos_path)
 end_cursor = graphql['user']['edge_owner_to_timeline_media']['page_info']['end_cursor']
 next_page = graphql['user']['edge_owner_to_timeline_media']['page_info']['has_next_page']
 time.sleep(10)
 deinit()
if __name__ == '__main__':
 print('\n\n')
 init(autoreset=True)
 print_in_color('Instagram Media Downloader'.center(os.get_terminal_size().columns, '-'), "cyan")
 deinit()
 PARSER = argparse.ArgumentParser(description='Download Instagram Images and Videos from a User\'s Profile Page')
 PARSER.add_argument('-u', '--user', dest='username', required=True, help='Username on Instagram')
 PARSER.add_argument('-p', '--path', dest='path', required=True, help='Root path where downloaded Instagram Media is saved')
 ARGS = PARSER.parse_args()
 #Insert username into link
 ACCOUNT_JSON_INFO = f"https://www.instagram.com/{ARGS.username}/?__a=1"
 ARGS.path += f"\\{ARGS.username}"
 main(ACCOUNT_JSON_INFO, ARGS.path)

Question 3

Import standard in python is to import standard library packages/modules as one block first, then external packages/modules as another block, then personal packages/modules - each block separated by an empty line.

Ben A Ben A 10.7k5 gold badges37 silver badges101 bronze badges · Answer 1 · 2019-07-25 06:22:12Z

Organizing Imports: I ordered your import statements alphabetically. While this is not a required style point, it's a person preference that you can decide to follow if you want.
Unused Imports: You had a few unused imports (json, sys, termcolor)
String Formatting: You concatenate multiple strings with variables everywhere in your code. You can format your string with f"...", so you can directly implement variables in your strings, like so: download_path = f"{videos_path}\\{file_name}.mp4".
Variable Assignment/Parameter Spacing: There should only be one space before the = and after the = when assigning variables. For default parameters, there should be no spaces. I'm not sure if this is a practice that's taught, but it's how I learned my python styling.
DRY: Don't Repeat Yourself! Your print_... lambda's do the exact same thing, with only one string being different. These four lambdas can be simplified to a simple method, with passing the text and color to print. I'm pretty sure termcolor has a built-in method colored that does the exact thing, but I'd have to double check.
Truth/False Comparisons: As an example, if is_video == False should be changed to if not is_video. It does the exact same thing, without the verbose == False, and utilizing not. You use both ways in your code, but you should really avoid == False/== True.
Constant Variable Names: Variables that are constants should be UPPERCASE.
Docstrings: You had the right idea with regular comments describing what the method does. You should move these comments into a docstring inside the method, so any documentation can tell what your method is supposed to do.

Final Code

import argparse
import os
import requests
import time
import urllib.request
from colorama import init, deinit
from termcolor import cprint
def print_in_color(text, color):
 """ Prints `text` in passed `color` """
 cprint(text, color)
def image_downloader(edge, images_path):
 """ Downloads images """
 display_url = edge['node']['display_url']
 file_name = edge['node']['taken_at_timestamp']
 download_path = f"{images_path}\\{file_name}.jpg"
 if not os.path.exists(download_path):
 print_in_color(f"Downloading {str(file_name)}.jpg...........", "yellow")
 urllib.request.urlretrieve(display_url, download_path)
 print_in_color(f"{file_name}.jpg downloaded.\n", "green")
 else:
 print_in_color(f"{file_name}.jpg has been downloaded already.\n", "green")
def video_downloader(shortcode, videos_path):
 """ Downloads videos """
 videos = requests.get(f"https://www.instagram.com/p/{shortcode}/?__a=1")
 video_url = videos.json()['graphql']['shortcode_media']['video_url']
 file_name = videos.json()['graphql']['shortcode_media']['taken_at_timestamp']
 download_path = f"{videos_path}\\{file_name}.mp4"
 if not os.path.exists(download_path):
 print_in_color(f"Downloading {file_name}.mp4...........", "yellow")
 urllib.request.urlretrieve(video_url, download_path)
 print_in_color(f"{file_name}.mp4 downloaded.\n", "green")
 else:
 print_in_color(f"{file_name}.mp4 has been downloaded already.\n", "green")
def sidecar_downloader(shortcode, images_path, videos_path):
 """ Downloads images and videos from posts containing more than one pictures or videos """
 r = requests.get(f"https://www.instagram.com/p/{shortcode}/?__a=1")
 num = 1
 for edge in r.json()['graphql']['shortcode_media']['edge_sidecar_to_children']['edges']:
 is_video = edge['node']['is_video']
 if not is_video:
 display_url = edge['node']['display_url']
 file_name = r.json()['graphql']['shortcode_media']['taken_at_timestamp']
 download_path = f"{images_path}\\{file_name}_{num}.jpg"
 if not os.path.exists(download_path):
 print_in_color(f"Downloading {file_name}_{num}.jpg...........", "yellow")
 urllib.request.urlretrieve(display_url, download_path)
 print_in_color(f"{file_name}_{num}.jpg downloaded.\n", "green")
 else:
 print_in_color(f"{file_name}_{num}.jpg has been downloaded already.\n", "green")
 else:
 video_url = edge['node']['video_url']
 file_name = r.json()['graphql']['shortcode_media']['taken_at_timestamp']
 download_path = f"{videos_path}\\{file_name}_{num}.mp4"
 if not os.path.exists(download_path):
 print_in_color(f"Downloading {file_name}_{num}.mp4...........", "yellow")
 urllib.request.urlretrieve(video_url, download_path)
 print_in_color(f"{file_name}_{num}.mp4 downloaded.\n", "green")
 else:
 print_in_color(f"{file_name}_{num}.mp4 has been downloaded already.\n", "green")
 num += 1
def main(account_json_info, path):
 """ Runs methods that download photos/videos from the user """
 init()
 r = requests.get(account_json_info)
 user_id = r.json()['graphql']['user']['id']
 end_cursor = ''
 next_page = True
 images_path = f"{path}\\Images"
 videos_path = f"{path}\\Videos"
 if not os.path.exists(path):
 os.makedirs(path)
 if not os.path.exists(images_path):
 os.makedirs(images_path)
 if not os.path.exists(videos_path):
 os.makedirs(videos_path)
 print_in_color("User Folder Created!\n", "magenta")
 else:
 print_in_color("User Folder Has Been Created Already!\n", "magenta")
 while next_page:
 r = requests.get('https://www.instagram.com/graphql/query/',
 params={
 'query_id': '17880160963012870',
 'id': user_id,
 'first': 12,
 'after': end_cursor
 }
 )
 graphql = r.json()['data']
 for edge in graphql['user']['edge_owner_to_timeline_media']['edges']:
 __typename = edge['node']['__typename']
 if __typename == 'GraphImage':
 image_downloader(edge, images_path)
 elif __typename == 'GraphVideo':
 shortcode = edge['node']['shortcode']
 video_downloader(shortcode, videos_path)
 elif __typename == 'GraphSidecar':
 shortcode = edge['node']['shortcode']
 sidecar_downloader(shortcode, images_path, videos_path)
 end_cursor = graphql['user']['edge_owner_to_timeline_media']['page_info']['end_cursor']
 next_page = graphql['user']['edge_owner_to_timeline_media']['page_info']['has_next_page']
 time.sleep(10)
 deinit()
if __name__ == '__main__':
 print('\n\n')
 init(autoreset=True)
 print_in_color('Instagram Media Downloader'.center(os.get_terminal_size().columns, '-'), "cyan")
 deinit()
 PARSER = argparse.ArgumentParser(description='Download Instagram Images and Videos from a User\'s Profile Page')
 PARSER.add_argument('-u', '--user', dest='username', required=True, help='Username on Instagram')
 PARSER.add_argument('-p', '--path', dest='path', required=True, help='Root path where downloaded Instagram Media is saved')
 ARGS = PARSER.parse_args()
 #Insert username into link
 ACCOUNT_JSON_INFO = f"https://www.instagram.com/{ARGS.username}/?__a=1"
 ARGS.path += f"\\{ARGS.username}"
 main(ACCOUNT_JSON_INFO, ARGS.path)

Import standard in python is to import standard library packages/modules as one block first, then external packages/modules as another block, then personal packages/modules - each block separated by an empty line.

Stack Exchange Network

Python Script to download images and videos from a user's profile on Instagram

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Python Script to download images and videos from a user's profile on Instagram

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions