I am new to BeautifulSoup and trying to extract the table. I have followed documentation to do a nested for loop to extract the cell data but it only returns the first three rows. Here is my code:
from six.moves import urllib
from bs4 import BeautifulSoup
import pandas as pd
def get_url_content(url):
try:
html=urllib.request.urlopen(url)
except urllib.error.HTTPError as e:
return None
try:
soup=BeautifulSoup(html.read(),'html.parser')
except AttributeError as e:
return None
return soup
URL="http://www.megamillions.com/winning-numbers/search?startDate=1/1/2017&endDate=3/31/2018"
soup=get_url_content(URL)
for tr in soup.find_all('tr'):
for td in tr.find_all('td'):
print td.text
I also tried adding if statement before the second loop as:
if tr.parentGenerator=='tbody':
but it returns empty list.
3 Answers 3
The site is dymamic, which means you need to use a browser manipulation tool such as selenium. Then, extract text from multiple class names for each search:
import urllib
import re
from bs4 import BeautifulSoup as soup
from selenium import webdriver
def get_table():
d = webdriver.Chrome('path/to/driver') #or webdriver.Firefox(), depending on your browser
d.get('http://www.megamillions.com/winning-numbers/search?startDate=1/1/2017&endDate=3/31/2018')
table = [i.text for i in soup(d.page_source, 'lxml').find_all('td', {'class':re.compile('dates|number|mega|details')})]
final_table = [table[i:i+9] for i in range(0, len(table), 9)]
last_data = [dict(zip(['draw_date', 'balls', 'megaball', 'megaplier', 'details'], [a, b, c, d, e])) for a, *b, c, d, e in final_table]
return last_data
print(get_table())
Output:
[{'draw_date': '12/29/2017', 'balls': ['4', '10', '18', '28', '62'], 'megaball': '7', 'megaplier': '2', 'details': 'Details'}, {'draw_date': '12/26/2017', 'balls': ['10', '12', '20', '38', '41'], 'megaball': '25', 'megaplier': '4', 'details': 'Details'}, {'draw_date': '12/22/2017', 'balls': ['1', '20', '30', '33', '42'], 'megaball': '16', 'megaplier': '4', 'details': 'Details'}, {'draw_date': '12/19/2017', 'balls': ['28', '37', '39', '42', '58'], 'megaball': '2', 'megaplier': '3', 'details': 'Details'}, {'draw_date': '12/15/2017', 'balls': ['4', '12', '36', '44', '57'], 'megaball': '19', 'megaplier': '4', 'details': 'Details'}, {'draw_date': '12/12/2017', 'balls': ['8', '23', '24', '25', '27'], 'megaball': '9', 'megaplier': '2', 'details': 'Details'}, {'draw_date': '12/8/2017', 'balls': ['6', '37', '46', '60', '70'], 'megaball': '24', 'megaplier': '2', 'details': 'Details'}, {'draw_date': '12/5/2017', 'balls': ['14', '15', '37', '42', '67'], 'megaball': '22', 'megaplier': '4', 'details': 'Details'}, {'draw_date': '12/1/2017', 'balls': ['16', '22', '40', '41', '59'], 'megaball': '8', 'megaplier': '4', 'details': 'Details'}, {'draw_date': '11/28/2017', 'balls': ['10', '17', '47', '51', '61'], 'megaball': '5', 'megaplier': '2', 'details': 'Details'}, {'draw_date': '11/24/2017', 'balls': ['16', '36', '54', '61', '64'], 'megaball': '22', 'megaplier': '3', 'details': 'Details'}, {'draw_date': '11/21/2017', 'balls': ['3', '7', '22', '27', '50'], 'megaball': '3', 'megaplier': '3', 'details': 'Details'}, {'draw_date': '11/17/2017', 'balls': ['3', '26', '55', '58', '70'], 'megaball': '15', 'megaplier': '4', 'details': 'Details'}, {'draw_date': '11/14/2017', 'balls': ['1', '14', '21', '22', '28'], 'megaball': '19', 'megaplier': '3', 'details': 'Details'}, {'draw_date': '11/10/2017', 'balls': ['6', '23', '38', '42', '58'], 'megaball': '24', 'megaplier': '2', 'details': 'Details'}, {'draw_date': '11/7/2017', 'balls': ['1', '54', '60', '68', '69'], 'megaball': '11', 'megaplier': '4', 'details': 'Details'}, {'draw_date': '11/3/2017', 'balls': ['10', '22', '42', '61', '69'], 'megaball': '3', 'megaplier': '2', 'details': 'Details'}, {'draw_date': '10/31/2017', 'balls': ['6', '28', '31', '52', '53'], 'megaball': '12', 'megaplier': '4', 'details': 'Details'}, {'draw_date': '10/27/2017', 'balls': ['17', '27', '41', '51', '52'], 'megaball': '13', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '10/24/2017', 'balls': ['20', '24', '34', '56', '64'], 'megaball': '6', 'megaplier': '4', 'details': 'Details'}, {'draw_date': '10/20/2017', 'balls': ['6', '23', '63', '66', '73'], 'megaball': '9', 'megaplier': '2', 'details': 'Details'}, {'draw_date': '10/17/2017', 'balls': ['31', '45', '49', '56', '70'], 'megaball': '11', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '10/13/2017', 'balls': ['2', '7', '18', '26', '31'], 'megaball': '12', 'megaplier': '2', 'details': 'Details'}, {'draw_date': '10/10/2017', 'balls': ['7', '16', '24', '61', '62'], 'megaball': '2', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '10/6/2017', 'balls': ['21', '33', '36', '45', '56'], 'megaball': '12', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '10/3/2017', 'balls': ['12', '18', '19', '25', '67'], 'megaball': '7', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '9/29/2017', 'balls': ['25', '51', '62', '73', '74'], 'megaball': '7', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '9/26/2017', 'balls': ['1', '10', '57', '66', '75'], 'megaball': '4', 'megaplier': '4', 'details': 'Details'}, {'draw_date': '9/22/2017', 'balls': ['5', '39', '54', '63', '66'], 'megaball': '15', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '9/19/2017', 'balls': ['9', '28', '31', '50', '61'], 'megaball': '10', 'megaplier': '2', 'details': 'Details'}, {'draw_date': '9/15/2017', 'balls': ['18', '24', '34', '38', '58'], 'megaball': '3', 'megaplier': '4', 'details': 'Details'}, {'draw_date': '9/12/2017', 'balls': ['26', '37', '41', '54', '65'], 'megaball': '3', 'megaplier': '4', 'details': 'Details'}, {'draw_date': '9/8/2017', 'balls': ['4', '5', '14', '26', '73'], 'megaball': '14', 'megaplier': '3', 'details': 'Details'}, {'draw_date': '9/5/2017', 'balls': ['11', '17', '59', '70', '72'], 'megaball': '1', 'megaplier': '3', 'details': 'Details'}, {'draw_date': '9/1/2017', 'balls': ['4', '13', '31', '50', '64'], 'megaball': '12', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '8/29/2017', 'balls': ['2', '13', '17', '35', '73'], 'megaball': '3', 'megaplier': '3', 'details': 'Details'}, {'draw_date': '8/25/2017', 'balls': ['17', '38', '42', '51', '65'], 'megaball': '11', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '8/22/2017', 'balls': ['24', '35', '46', '50', '51'], 'megaball': '7', 'megaplier': '4', 'details': 'Details'}, {'draw_date': '8/18/2017', 'balls': ['1', '31', '34', '40', '75'], 'megaball': '6', 'megaplier': '3', 'details': 'Details'}, {'draw_date': '8/15/2017', 'balls': ['7', '16', '20', '66', '73'], 'megaball': '7', 'megaplier': '2', 'details': 'Details'}, {'draw_date': '8/11/2017', 'balls': ['23', '33', '53', '56', '58'], 'megaball': '6', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '8/8/2017', 'balls': ['11', '17', '50', '52', '74'], 'megaball': '14', 'megaplier': '2', 'details': 'Details'}, {'draw_date': '8/4/2017', 'balls': ['9', '17', '25', '63', '71'], 'megaball': '4', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '8/1/2017', 'balls': ['20', '22', '52', '57', '73'], 'megaball': '7', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '7/28/2017', 'balls': ['4', '6', '31', '49', '52'], 'megaball': '11', 'megaplier': '3', 'details': 'Details'}, {'draw_date': '7/25/2017', 'balls': ['2', '5', '26', '58', '60'], 'megaball': '6', 'megaplier': '3', 'details': 'Details'}, {'draw_date': '7/21/2017', 'balls': ['18', '31', '36', '50', '74'], 'megaball': '10', 'megaplier': '4', 'details': 'Details'}, {'draw_date': '7/18/2017', 'balls': ['8', '12', '23', '51', '73'], 'megaball': '6', 'megaplier': '4', 'details': 'Details'}, {'draw_date': '7/14/2017', 'balls': ['11', '12', '24', '32', '73'], 'megaball': '1', 'megaplier': '4', 'details': 'Details'}, {'draw_date': '7/11/2017', 'balls': ['7', '18', '24', '55', '74'], 'megaball': '10', 'megaplier': '2', 'details': 'Details'}, {'draw_date': '7/7/2017', 'balls': ['2', '9', '11', '28', '60'], 'megaball': '10', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '7/4/2017', 'balls': ['16', '39', '47', '53', '71'], 'megaball': '15', 'megaplier': '4', 'details': 'Details'}, {'draw_date': '6/30/2017', 'balls': ['10', '38', '51', '55', '64'], 'megaball': '6', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '6/27/2017', 'balls': ['4', '21', '45', '52', '57'], 'megaball': '14', 'megaplier': '4', 'details': 'Details'}, {'draw_date': '6/23/2017', 'balls': ['12', '20', '53', '66', '74'], 'megaball': '11', 'megaplier': '2', 'details': 'Details'}, {'draw_date': '6/20/2017', 'balls': ['2', '15', '41', '49', '63'], 'megaball': '3', 'megaplier': '3', 'details': 'Details'}, {'draw_date': '6/16/2017', 'balls': ['18', '22', '26', '30', '44'], 'megaball': '9', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '6/13/2017', 'balls': ['27', '51', '62', '68', '75'], 'megaball': '8', 'megaplier': '3', 'details': 'Details'}, {'draw_date': '6/9/2017', 'balls': ['3', '16', '28', '33', '37'], 'megaball': '9', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '6/6/2017', 'balls': ['3', '5', '16', '49', '75'], 'megaball': '5', 'megaplier': '3', 'details': 'Details'}, {'draw_date': '6/2/2017', 'balls': ['7', '42', '57', '69', '72'], 'megaball': '10', 'megaplier': '3', 'details': 'Details'}, {'draw_date': '5/30/2017', 'balls': ['5', '20', '32', '37', '67'], 'megaball': '5', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '5/26/2017', 'balls': ['25', '26', '28', '37', '56'], 'megaball': '5', 'megaplier': '3', 'details': 'Details'}, {'draw_date': '5/23/2017', 'balls': ['6', '13', '17', '33', '60'], 'megaball': '14', 'megaplier': '2', 'details': 'Details'}, {'draw_date': '5/19/2017', 'balls': ['1', '4', '5', '24', '30'], 'megaball': '1', 'megaplier': '3', 'details': 'Details'}, {'draw_date': '5/16/2017', 'balls': ['4', '35', '39', '56', '72'], 'megaball': '11', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '5/12/2017', 'balls': ['28', '34', '41', '42', '47'], 'megaball': '13', 'megaplier': '4', 'details': 'Details'}, {'draw_date': '5/9/2017', 'balls': ['6', '29', '45', '69', '73'], 'megaball': '11', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '5/5/2017', 'balls': ['4', '23', '33', '47', '53'], 'megaball': '7', 'megaplier': '4', 'details': 'Details'}, {'draw_date': '5/2/2017', 'balls': ['5', '14', '42', '43', '58'], 'megaball': '1', 'megaplier': '4', 'details': 'Details'}, {'draw_date': '4/28/2017', 'balls': ['6', '13', '18', '20', '31'], 'megaball': '13', 'megaplier': '4', 'details': 'Details'}, {'draw_date': '4/25/2017', 'balls': ['3', '13', '33', '40', '50'], 'megaball': '2', 'megaplier': '4', 'details': 'Details'}, {'draw_date': '4/21/2017', 'balls': ['1', '12', '13', '32', '34'], 'megaball': '10', 'megaplier': '2', 'details': 'Details'}, {'draw_date': '4/18/2017', 'balls': ['8', '29', '30', '43', '64'], 'megaball': '6', 'megaplier': '3', 'details': 'Details'}, {'draw_date': '4/14/2017', 'balls': ['5', '10', '55', '60', '73'], 'megaball': '12', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '4/11/2017', 'balls': ['19', '34', '35', '38', '49'], 'megaball': '8', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '4/7/2017', 'balls': ['30', '33', '43', '47', '69'], 'megaball': '15', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '4/4/2017', 'balls': ['13', '24', '34', '35', '55'], 'megaball': '9', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '3/31/2017', 'balls': ['17', '24', '27', '32', '58'], 'megaball': '10', 'megaplier': '3', 'details': 'Details'}, {'draw_date': '3/28/2017', 'balls': ['30', '33', '35', '37', '46'], 'megaball': '10', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '3/24/2017', 'balls': ['5', '28', '37', '61', '69'], 'megaball': '1', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '3/21/2017', 'balls': ['4', '45', '53', '73', '75'], 'megaball': '7', 'megaplier': '3', 'details': 'Details'}, {'draw_date': '3/17/2017', 'balls': ['11', '27', '31', '58', '60'], 'megaball': '10', 'megaplier': '4', 'details': 'Details'}, {'draw_date': '3/14/2017', 'balls': ['16', '23', '28', '33', '59'], 'megaball': '13', 'megaplier': '3', 'details': 'Details'}, {'draw_date': '3/10/2017', 'balls': ['26', '38', '42', '58', '70'], 'megaball': '5', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '3/7/2017', 'balls': ['3', '30', '45', '53', '68'], 'megaball': '11', 'megaplier': '3', 'details': 'Details'}, {'draw_date': '3/3/2017', 'balls': ['14', '26', '39', '48', '51'], 'megaball': '9', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '2/28/2017', 'balls': ['20', '33', '45', '58', '69'], 'megaball': '4', 'megaplier': '2', 'details': 'Details'}, {'draw_date': '2/24/2017', 'balls': ['12', '29', '33', '42', '68'], 'megaball': '14', 'megaplier': '3', 'details': 'Details'}, {'draw_date': '2/21/2017', 'balls': ['9', '21', '30', '32', '75'], 'megaball': '9', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '2/17/2017', 'balls': ['4', '56', '58', '67', '75'], 'megaball': '8', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '2/14/2017', 'balls': ['7', '11', '33', '60', '68'], 'megaball': '15', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '2/10/2017', 'balls': ['32', '39', '51', '62', '75'], 'megaball': '14', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '2/7/2017', 'balls': ['23', '28', '37', '56', '71'], 'megaball': '12', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '2/3/2017', 'balls': ['3', '6', '29', '30', '64'], 'megaball': '3', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '1/31/2017', 'balls': ['3', '14', '27', '62', '72'], 'megaball': '4', 'megaplier': '3', 'details': 'Details'}, {'draw_date': '1/27/2017', 'balls': ['17', '37', '53', '54', '61'], 'megaball': '8', 'megaplier': '3', 'details': 'Details'}, {'draw_date': '1/24/2017', 'balls': ['8', '42', '54', '63', '67'], 'megaball': '11', 'megaplier': '4', 'details': 'Details'}, {'draw_date': '1/20/2017', 'balls': ['7', '9', '24', '41', '53'], 'megaball': '14', 'megaplier': '3', 'details': 'Details'}, {'draw_date': '1/17/2017', 'balls': ['20', '31', '54', '56', '59'], 'megaball': '3', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '1/13/2017', 'balls': ['10', '44', '58', '74', '75'], 'megaball': '11', 'megaplier': '3', 'details': 'Details'}, {'draw_date': '1/10/2017', 'balls': ['11', '20', '40', '41', '59'], 'megaball': '15', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '1/6/2017', 'balls': ['6', '10', '44', '47', '54'], 'megaball': '6', 'megaplier': '5', 'details': 'Details'}, {'draw_date': '1/3/2017', 'balls': ['14', '16', '23', '49', '53'], 'megaball': '12', 'megaplier': '2', 'details': 'Details'}]
4 Comments
You don't need any browser simulator if you wish to go for newly released Requests_HTML library which is able to handle dynamically generated items. Given that, you can try like below:
import requests_html
URL = "http://www.megamillions.com/winning-numbers/search?startDate=1/1/2017&endDate=3/31/2018"
with requests_html.HTMLSession() as session:
r = session.get(URL)
r.html.render(sleep=5)
table = r.html.find("table#table", first=True)
for items in table.find("tr")[2:]:
data = [item.text for item in items.find("th,td")[:-1]]
print(data)
Partial Output:
['Draw Date', 'Balls', 'Mega Ball', 'Megaplier']
['12/29/2017', '4', '10', '18', '28', '62', '7', '2']
['12/26/2017', '10', '12', '20', '38', '41', '25', '4']
['12/22/2017', '1', '20', '30', '33', '42', '16', '4']
['12/19/2017', '28', '37', '39', '42', '58', '2', '3']
['12/15/2017', '4', '12', '36', '44', '57', '19', '4']
['12/12/2017', '8', '23', '24', '25', '27', '9', '2']
['12/8/2017', '6', '37', '46', '60', '70', '24', '2']
['12/5/2017', '14', '15', '37', '42', '67', '22', '4']
['12/1/2017', '16', '22', '40', '41', '59', '8', '4']
['11/28/2017', '10', '17', '47', '51', '61', '5', '2']
['11/24/2017', '16', '36', '54', '61', '64', '22', '3']
['11/21/2017', '3', '7', '22', '27', '50', '3', '3']
['11/17/2017', '3', '26', '55', '58', '70', '15', '4']
3 Comments
Even though the table is loaded dynamically, you can use requests module to get its contents. Go to the XHR in the Network tab in the Developer tools. An AJAX request is send to http://www.megamillions.com/Media/Static/winning-numbers/winning-numbers.json returns all the data you want in the form of JSON.
To get the JSON you can use this:
import requests
r = requests.get('http://www.megamillions.com/Media/Static/winning-numbers/winning-numbers.json')
data = r.json()
As you can see that JSON is in the following format (items starting from today's date):
{'nextDraw': {'IsPending': False,
'JackpotAnnuityAmount': 345000000,
'JackpotCashAmount': 206500000,
'MegaBall': 11,
'Megaplier': 3,
'NextDrawDate': '2018-03-21T03:00:00',
'NextJackpotAnnuityAmount': 377000000,
'NextJackpotCashAmount': 225700000},
'numbersList': [{'DrawDate': '2018-03-16T00:00:00',
'GameName': 'MegaMillions',
'MegaBall': 11,
'Megaplier': 3,
'WhiteBall1': 26,
'WhiteBall2': 52,
'WhiteBall3': 33,
'WhiteBall4': 1,
'WhiteBall5': 13},
...
...
You can get whatever you want from the data variable in this way:
for item in data['numbersList']:
date = item['DrawDate']
megaball = item['MegaBall']
megaplier = item['Megaplier']
# and similarly other items
print(date, megaball, megaplier)
Partial output:
2018年03月16日T00:00:00 11 3
2018年03月13日T00:00:00 17 5
2018年03月09日T00:00:00 22 4
2018年03月06日T00:00:00 22 5
2018年03月02日T00:00:00 8 4
2018年02月27日T00:00:00 23 3
2018年02月23日T00:00:00 9 4
2018年02月20日T00:00:00 14 3
...
...
Comments
Explore related questions
See similar questions with these tags.