Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 170253c

Browse files
author
Algorithmica
authored
Add files via upload
1 parent 4311a1b commit 170253c

File tree

3 files changed

+102
-0
lines changed

3 files changed

+102
-0
lines changed
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
##chi-square goodness of fit test
2+
#it tests whether the distribution of sample categorical data matches an
3+
#expected distribution
4+
from scipy import stats
5+
import pandas as pd
6+
import os
7+
8+
dir = 'F:/'
9+
titanic_train = pd.read_csv(os.path.join(dir, 'train.csv'))
10+
print(titanic_train.info())
11+
12+
#anova test
13+
#The one-way ANOVA tests whether the mean of some numeric variable differs
14+
#across the levels of one categorical variable(do any of the group means differ from one another?)
15+
fare_by_class1 = titanic_train.Fare[titanic_train.Pclass==1]
16+
fare_by_class2 = titanic_train.Fare[titanic_train.Pclass==2]
17+
fare_by_class3 = titanic_train.Fare[titanic_train.Pclass==3]
18+
19+
stats.f_oneway(fare_by_class1, fare_by_class2, fare_by_class3)
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
##chi-square goodness of fit test
2+
#it tests whether the distribution of sample categorical data matches an
3+
#expected distribution
4+
import numpy as np
5+
from scipy import stats
6+
import pandas as pd
7+
import os
8+
9+
n_trails = 120
10+
n_outcomes = 6
11+
result = np.random.randint(1, n_outcomes+1, n_trails)
12+
13+
outcomes, observed = np.unique(result, return_counts=True)
14+
for (o,f) in zip(outcomes, observed):
15+
print(o,f)
16+
expected = np.array(n_outcomes * [n_trails/n_outcomes], dtype=np.int64)
17+
18+
stats.chisquare(f_obs = observed, f_exp = expected)
19+
20+
observed = [15,29,18,19,20,19]
21+
observed = [20,20,20,20,20,20]
22+
observed = [30,10,20,20,20,20]
23+
observed = [10,30,30,10, 10,30]
24+
25+
#chi-square independence test
26+
#The chi-squared test of independence tests whether two categorical variables
27+
#are independent
28+
dir = 'F:/'
29+
titanic_train = pd.read_csv(os.path.join(dir, 'train.csv'))
30+
print(titanic_train.info())
31+
32+
observed = pd.crosstab(titanic_train.Sex, titanic_train.Survived)
33+
stats.chi2_contingency(observed = observed)
34+
35+
observed = pd.crosstab(titanic_train.Pclass, titanic_train.Survived)
36+
stats.chi2_contingency(observed = observed)
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
##chi-square goodness of fit test
2+
#it tests whether the distribution of sample categorical data matches an
3+
#expected distribution
4+
import numpy as np
5+
from scipy import stats
6+
import pandas as pd
7+
import os
8+
9+
dir = 'F:/'
10+
titanic_train = pd.read_csv(os.path.join(dir, 'train.csv'))
11+
print(titanic_train.info())
12+
13+
#one sample t-test
14+
#A one-sample t-test checks whether a sample mean differs from the population mean.
15+
fare_sample = titanic_train[['Fare']].sample(frac=0.6)
16+
stats.ttest_1samp(a = fare_sample, popmean = titanic_train[['Fare']].mean())
17+
18+
#two sample t-test
19+
#A two-sample t-test investigates whether the means of two independent data samples
20+
#differ from one another.
21+
fare_by_non_survived = titanic_train.Fare[titanic_train.Survived==0]
22+
fare_by_survived = titanic_train.Fare[titanic_train.Survived==1]
23+
stats.ttest_ind(a = fare_by_non_survived,
24+
b = fare_by_survived,
25+
equal_var=False)
26+
27+
#paired t-test
28+
#testing differences between samples of the same group at different points in time.
29+
#a hospital might want to test whether a weight-loss drug works
30+
#by checking the weights of the same group patients before and after treatment.
31+
#A paired t-test lets you check whether the means of samples from the same group differ.
32+
before= stats.norm.rvs(scale=30, loc=250, size=100)
33+
after = before + stats.norm.rvs(scale=5, loc=-1.25, size=100)
34+
weight_df = pd.DataFrame({"weight_before":before,
35+
"weight_after":after,
36+
"weight_change":after-before})
37+
weight_df.describe()
38+
stats.ttest_rel(a = before, b = after)
39+
40+
#anova test
41+
#The one-way ANOVA tests whether the mean of some numeric variable differs
42+
#across the levels of one categorical variable(do any of the group means differ from one another?)
43+
fare_by_class1 = titanic_train.Fare[titanic_train.Pclass==1]
44+
fare_by_class2 = titanic_train.Fare[titanic_train.Pclass==2]
45+
fare_by_class3 = titanic_train.Fare[titanic_train.Pclass==3]
46+
47+
stats.f_oneway(fare_by_class1, fare_by_class2, fare_by_class3)

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /