|
| 1 | +from bs4 import BeautifulSoup |
| 2 | +import requests |
| 3 | + |
| 4 | +# Function to get Movie Details |
| 5 | + |
| 6 | + |
| 7 | +def getMovieDetails(movieName): |
| 8 | + # Base URL of IMDB website |
| 9 | + url = 'https://www.imdb.com' |
| 10 | + |
| 11 | + # Query to find movie title |
| 12 | + query = '/search/title?title=' |
| 13 | + |
| 14 | + # Empty dictionary to store movie Details |
| 15 | + movieDetails = {} |
| 16 | + |
| 17 | + # Query formed |
| 18 | + movienamequery = query+'+'.join(movieName.strip().split(' ')) |
| 19 | + |
| 20 | + # WebPage is obtained and parsed |
| 21 | + html = requests.get(url+movienamequery+'&title_type=feature') |
| 22 | + bs = BeautifulSoup(html.text, 'html.parser') |
| 23 | + |
| 24 | + # Gets the first movie that appears in title section |
| 25 | + result = bs.find('h3', {'class': 'lister-item-header'}) |
| 26 | + |
| 27 | + if result is None: |
| 28 | + return None |
| 29 | + |
| 30 | + movielink = url+result.a.attrs['href'] |
| 31 | + movieDetails['name'] = result.a.text |
| 32 | + |
| 33 | + # Gets the page with movie details |
| 34 | + html = requests.get(movielink) |
| 35 | + bs = BeautifulSoup(html.text, 'html.parser') |
| 36 | + # Year |
| 37 | + try: |
| 38 | + movieDetails['year'] = bs.find('span', {'id': 'titleYear'}).a.text |
| 39 | + except AttributeError: |
| 40 | + movieDetails['year'] = 'Not available' |
| 41 | + subtext = bs.find('div', {'class': 'subtext'}) |
| 42 | + |
| 43 | + # Rating,Genres,Runtime,Release Date, |
| 44 | + |
| 45 | + movieDetails['genres'] = [ |
| 46 | + i.text for i in subtext.findAll('a', {'title': None})] |
| 47 | + try: |
| 48 | + movieDetails['rating'] = bs.find( |
| 49 | + 'div', {'class': 'ratingValue'}).span.text |
| 50 | + movieDetails['runtime'] = subtext.time.text.strip() |
| 51 | + except AttributeError: |
| 52 | + movieDetails['rating'] = 'Not yet rated' |
| 53 | + movieDetails['runtime'] = 'Not available' |
| 54 | + movieDetails['release_date'] = subtext.find( |
| 55 | + 'a', {'title': 'See more release dates'}).text.strip() |
| 56 | + |
| 57 | + # Gets the credit section of the page |
| 58 | + creditSummary = bs.findAll('div', {'class': 'credit_summary_item'}) |
| 59 | + |
| 60 | + # Directors,Writers and Cast |
| 61 | + movieDetails['directors'] = [i.text for i in creditSummary[0].findAll('a')] |
| 62 | + movieDetails['writers'] = [i.text for i in creditSummary[1].findAll( |
| 63 | + 'a') if 'name' in i.attrs['href']] |
| 64 | + try: |
| 65 | + movieDetails['cast'] = [i.text for i in creditSummary[2].findAll( |
| 66 | + 'a') if 'name' in i.attrs['href']] |
| 67 | + |
| 68 | + # For some films, writer details are not provided |
| 69 | + except IndexError: |
| 70 | + movieDetails['cast']=movieDetails['writers'] |
| 71 | + movieDetails['writers']='Not found' |
| 72 | + |
| 73 | + # The plot is seperate AJAX call and does not come in the html page, So one more request to plotsummary page |
| 74 | + html = requests.get(movielink+'plotsummary') |
| 75 | + bs = BeautifulSoup(html.text, 'html.parser') |
| 76 | + |
| 77 | + # Plot |
| 78 | + movieDetails['plot'] = bs.find( |
| 79 | + 'li', {'class': 'ipl-zebra-list__item'}).p.text.strip() |
| 80 | + |
| 81 | + # Returns the dictionary with movie details |
| 82 | + return movieDetails |
| 83 | + |
| 84 | + |
| 85 | +if __name__ == "__main__": |
| 86 | + movieName = input('Enter the movie name whose details are to be fetched\n') |
| 87 | + movieDetails = getMovieDetails(movieName) |
| 88 | + if movieDetails is None: |
| 89 | + print('No movie of this name found !!!!!') |
| 90 | + quit() |
| 91 | + print('\n{movie} ({year})'.format( |
| 92 | + movie=movieDetails['name'], year=movieDetails['year'])) |
| 93 | + print('Rating:', movieDetails['rating']) |
| 94 | + print('Runtime:', movieDetails['runtime']) |
| 95 | + print('Release Date:', movieDetails['release_date']) |
| 96 | + print('Genres:', ', '.join(movieDetails['genres'])) |
| 97 | + print('Director:', ', '.join(movieDetails['directors'])) |
| 98 | + print('Writer:', ', '.join(movieDetails['writers'])) |
| 99 | + print('Cast:', ', '.join(movieDetails['cast'])) |
| 100 | + print('Plot Summary:\n', movieDetails['plot']) |
0 commit comments