1+ {
2+  "nbformat" : 4 ,
3+  "nbformat_minor" : 0 ,
4+  "metadata" : {
5+  "colab" : {
6+  "name" : " t-Test.ipynb" 
7+  "version" : " 0.3.2" 
8+  "provenance" : []
9+  },
10+  "kernelspec" : {
11+  "name" : " python3" 
12+  "display_name" : " Python 3" 
13+  }
14+  },
15+  "cells" : [
16+  {
17+  "cell_type" : " code" 
18+  "metadata" : {
19+  "id" : " Y21N_2yv3Grl" 
20+  "colab_type" : " code" 
21+  "colab" : {}
22+  },
23+  "source" : [
24+  " ## Import the packages\n " 
25+  " import numpy as np\n " 
26+  " from scipy import stats" 
27+  ],
28+  "execution_count" : 0 ,
29+  "outputs" : []
30+  },
31+  {
32+  "cell_type" : " code" 
33+  "metadata" : {
34+  "id" : " Aga_0SM43OdO" 
35+  "colab_type" : " code" 
36+  "colab" : {
37+  "base_uri" : " https://localhost:8080/" 
38+  "height" : 85 
39+  },
40+  "outputId" : " 8d4b8f3d-a6df-4129-b3b6-867144288009" 
41+  },
42+  "source" : [
43+  " ## Define 2 random distributions\n " 
44+  " \n " 
45+  " #Sample Size\n " 
46+  " N = 10\n " 
47+  " \n " 
48+  " #Gaussian distributed data with mean = 2 and var = 1\n " 
49+  " a = np.random.randn(N) + 2\n " 
50+  " print(a)\n " 
51+  " \n " 
52+  " #Gaussian distributed data with with mean = 0 and var = 1\n " 
53+  " b = np.random.randn(N)\n " 
54+  " print(b)" 
55+  ],
56+  "execution_count" : 6 ,
57+  "outputs" : [
58+  {
59+  "output_type" : " stream" 
60+  "text" : [
61+  " [3.41987841 2.4642942 1.3074381 1.88900262 1.5018451 2.08785958\n " 
62+  "  4.18763608 2.76111147 1.25673154 1.22916177]\n " 
63+  " [ 0.09625918 -0.426427 -0.81593085 -0.27386856 -0.19758738 0.71729565\n " 
64+  "  -0.44211666 0.07106772 -0.53144206 -0.21403634]\n " 
65+  ],
66+  "name" : " stdout" 
67+  }
68+  ]
69+  },
70+  {
71+  "cell_type" : " code" 
72+  "metadata" : {
73+  "id" : " DGw_0SoQ2Uhj" 
74+  "colab_type" : " code" 
75+  "colab" : {
76+  "base_uri" : " https://localhost:8080/" 
77+  "height" : 51 
78+  },
79+  "outputId" : " b6caa6b7-64df-44e7-b626-13fb8165baeb" 
80+  },
81+  "source" : [
82+  " ## Calculate the Standard Deviation\n " 
83+  " \n " 
84+  " #Calculate the variance to get the standard deviation\n " 
85+  " \n " 
86+  " #For unbiased max likelihood estimate we have to divide the var by N-1, and therefore the parameter ddof = 1\n " 
87+  " var_a = a.var(ddof=1)\n " 
88+  " var_b = b.var(ddof=1)\n " 
89+  " \n " 
90+  " #std deviation\n " 
91+  " s = np.sqrt((var_a + var_b)/2)\n " 
92+  " \n " 
93+  " print(\" Std Deviation:\" , s)\n " 
94+  " \n " 
95+  " ## Calculate the t-statistics\n " 
96+  " t = (a.mean() - b.mean())/(s*np.sqrt(2/N))\n " 
97+  " \n " 
98+  " print(\" T-value:\" , t)" 
99+  ],
100+  "execution_count" : 8 ,
101+  "outputs" : [
102+  {
103+  "output_type" : " stream" 
104+  "text" : [
105+  " Std Deviation: 0.7693967525636721\n " 
106+  " T-value: 7.0104093570005945\n " 
107+  ],
108+  "name" : " stdout" 
109+  }
110+  ]
111+  },
112+  {
113+  "cell_type" : " code" 
114+  "metadata" : {
115+  "id" : " 9atPC3HO3Z2U" 
116+  "colab_type" : " code" 
117+  "colab" : {
118+  "base_uri" : " https://localhost:8080/" 
119+  "height" : 51 
120+  },
121+  "outputId" : " 683ac7bd-8bd8-4e55-d3fc-7942748b66d1" 
122+  },
123+  "source" : [
124+  " ## Compare with the critical t-value\n " 
125+  " \n " 
126+  " #Degrees of freedom\n " 
127+  " df = 2*N - 2\n " 
128+  " \n " 
129+  " #p-value after comparison with the t\n " 
130+  " p = 1 - stats.t.cdf(t,df=df)\n " 
131+  " \n " 
132+  " print(\" t-Score = \"  + str(t))\n " 
133+  " print(\" p-Value = \"  + str(2*p))\n " 
134+  " \n " 
135+  " #Note that we multiply the p value by 2 because its a twp tail t-test\n " 
136+  " \n " 
137+  " ### You can see that after comparing the t statistic with the critical t value (computed internally)\n " 
138+  " # we get a good p value of 0.0005 and thus we reject the null hypothesis and thus it proves that the mean\n " 
139+  " # of the two distributions are different and statistically significant." 
140+  ],
141+  "execution_count" : 9 ,
142+  "outputs" : [
143+  {
144+  "output_type" : " stream" 
145+  "text" : [
146+  " t-Score = 7.0104093570005945\n " 
147+  " p-Value = 1.522899394812427e-06\n " 
148+  ],
149+  "name" : " stdout" 
150+  }
151+  ]
152+  },
153+  {
154+  "cell_type" : " code" 
155+  "metadata" : {
156+  "id" : " I_ve3N6a3Mlo" 
157+  "colab_type" : " code" 
158+  "colab" : {
159+  "base_uri" : " https://localhost:8080/" 
160+  "height" : 51 
161+  },
162+  "outputId" : " cc8bcc64-e1a2-4c05-98b9-db0cf91a0a01" 
163+  },
164+  "source" : [
165+  " ## Cross Checking with the internal scipy function\n " 
166+  " t2, p2 = stats.ttest_ind(a,b)\n " 
167+  " print(\" t = \"  + str(t2))\n " 
168+  " print(\" p = \"  + str(2*p2))" 
169+  ],
170+  "execution_count" : 10 ,
171+  "outputs" : [
172+  {
173+  "output_type" : " stream" 
174+  "text" : [
175+  " t = 7.010409357000594\n " 
176+  " p = 3.045798789679482e-06\n " 
177+  ],
178+  "name" : " stdout" 
179+  }
180+  ]
181+  }
182+  ]
183+ }
0 commit comments