lec16

In [1]:

from datascience import *
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

Swain vs. Alabama¶

In [2]:

population_proportions = make_array(.26, .74)
population_proportions

Out[2]:

array([ 0.26, 0.74])

In [3]:

sample_proportions(100, population_proportions)

Out[3]:

array([ 0.24, 0.76])

In [4]:

def panel_proportion():
 return sample_proportions(100, population_proportions).item(0)

In [5]:

panel_proportion()

Out[5]:

0.27

In [6]:

panels = make_array()
for i in np.arange(10000):
 new_panel = panel_proportion() * 100
 panels = np.append(panels, new_panel)

In [7]:

Table().with_column(
 'Number of Black Men on Panel of 100', panels
).hist(bins=np.arange(5.5,40.))
# Plotting details; ignore this code
plots.ylim(-0.002, 0.09)
plots.scatter(8, 0, color='red', s=30);

Mendel and Pea Flowers¶

In [8]:

## Mendel had 929 plants, of which 709 had purple flowers
observed_purples = 709 / 929
observed_purples

Out[8]:

0.7631862217438106

In [9]:

predicted_proportions = make_array(.75, .25)
sample_proportions(929, predicted_proportions)

Out[9]:

array([ 0.76103337, 0.23896663])

In [10]:

def purple_flowers():
 return sample_proportions(929, predicted_proportions).item(0) * 100

In [11]:

purple_flowers()

Out[11]:

75.1345532831001

In [12]:

purples = make_array()
for i in np.arange(10000):
 new_purple = purple_flowers()
 purples = np.append(purples, new_purple)

In [13]:

Table().with_column('Percent of purple flowers in sample of 929', purples).hist()
# Plotting details; ignore this code
plots.ylim(-0.02, 0.25)
plots.scatter(observed_purples * 100, 0, color='red', s=30);

In [14]:

Table().with_column('Discrepancy in sample of 929 if the model is true', abs(purples- 75)).hist()
# Plotting details; ignore this code
plots.ylim(-0.02, 0.6)
plots.scatter(abs(observed_purples * 100 - 75), 0, color='red', s=30);

In [15]:

abs(observed_purples * 100 - 75)

Out[15]:

1.318622174381062