Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit c897d87

Browse files
Data Cleaning with numpy and pandas
1 parent f05b151 commit c897d87

File tree

1 file changed

+169
-0
lines changed

1 file changed

+169
-0
lines changed

‎Data-Mining/Data-Cleaning.ipynb‎

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
{
2+
"nbformat": 4,
3+
"nbformat_minor": 0,
4+
"metadata": {
5+
"colab": {
6+
"name": "Introduction-To-Data-PreProcessing.ipynb",
7+
"provenance": []
8+
},
9+
"kernelspec": {
10+
"name": "python3",
11+
"display_name": "Python 3"
12+
}
13+
},
14+
"cells": [
15+
{
16+
"cell_type": "code",
17+
"metadata": {
18+
"id": "n5SffVH7WyKD",
19+
"colab_type": "code",
20+
"colab": {}
21+
},
22+
"source": [
23+
"%matplotlib inline\n",
24+
"import pandas as pd\n",
25+
"import numpy as np\n",
26+
"import matplotlib.pyplot as plt\n"
27+
],
28+
"execution_count": 0,
29+
"outputs": []
30+
},
31+
{
32+
"cell_type": "code",
33+
"metadata": {
34+
"id": "hTx7f0phZFIR",
35+
"colab_type": "code",
36+
"colab": {}
37+
},
38+
"source": [
39+
"from google.colab import drive \n",
40+
"drive.mount('/mntDrive') \n",
41+
"!ls \"/mntDrive/My Drive/Colab Notebooks\"\n"
42+
],
43+
"execution_count": 0,
44+
"outputs": []
45+
},
46+
{
47+
"cell_type": "markdown",
48+
"metadata": {
49+
"id": "02ESSs6qZqXn",
50+
"colab_type": "text"
51+
},
52+
"source": [
53+
""
54+
]
55+
},
56+
{
57+
"cell_type": "code",
58+
"metadata": {
59+
"id": "H9cAjsktYGkV",
60+
"colab_type": "code",
61+
"colab": {}
62+
},
63+
"source": [
64+
"df=pd.read_csv('/mntDrive/My Drive/Colab Notebooks/pima1.csv')"
65+
],
66+
"execution_count": 0,
67+
"outputs": []
68+
},
69+
{
70+
"cell_type": "code",
71+
"metadata": {
72+
"id": "2MUSOJOrZPmW",
73+
"colab_type": "code",
74+
"colab": {}
75+
},
76+
"source": [
77+
"df"
78+
],
79+
"execution_count": 0,
80+
"outputs": []
81+
},
82+
{
83+
"cell_type": "code",
84+
"metadata": {
85+
"id": "9GryFGuFZ0kt",
86+
"colab_type": "code",
87+
"colab": {}
88+
},
89+
"source": [
90+
"df.at[0, 'pregnancy'] = 200 #Changing the value in a particular cell"
91+
],
92+
"execution_count": 0,
93+
"outputs": []
94+
},
95+
{
96+
"cell_type": "code",
97+
"metadata": {
98+
"id": "RJX9BEBFcgh7",
99+
"colab_type": "code",
100+
"colab": {}
101+
},
102+
"source": [
103+
"df.head()"
104+
],
105+
"execution_count": 0,
106+
"outputs": []
107+
},
108+
{
109+
"cell_type": "code",
110+
"metadata": {
111+
"id": "gwZmEHtJebme",
112+
"colab_type": "code",
113+
"colab": {}
114+
},
115+
"source": [
116+
"#Checking for outliers (Method 1)\n",
117+
"plt.boxplot(df['pregnancy'])\n",
118+
"plt.show"
119+
],
120+
"execution_count": 0,
121+
"outputs": []
122+
},
123+
{
124+
"cell_type": "code",
125+
"metadata": {
126+
"id": "2-8jcSDcg49x",
127+
"colab_type": "code",
128+
"colab": {}
129+
},
130+
"source": [
131+
"#Method 2\n",
132+
"Q1, Q3 = df.quantile(0.25), df.quantile(0.75)\n",
133+
"IQR = Q3 - Q1\n",
134+
"print(Q1)\n",
135+
"print(Q3)\n",
136+
"print(IQR)"
137+
],
138+
"execution_count": 0,
139+
"outputs": []
140+
},
141+
{
142+
"cell_type": "code",
143+
"metadata": {
144+
"id": "rYOgxaKuidx0",
145+
"colab_type": "code",
146+
"colab": {}
147+
},
148+
"source": [
149+
"lower, upper = Q1 - (1.5*IQR), Q3 + (1.5*IQR)\n",
150+
"print((df<lower) | (df>upper))"
151+
],
152+
"execution_count": 0,
153+
"outputs": []
154+
},
155+
{
156+
"cell_type": "code",
157+
"metadata": {
158+
"id": "F8Yp4E8_ixNt",
159+
"colab_type": "code",
160+
"colab": {}
161+
},
162+
"source": [
163+
""
164+
],
165+
"execution_count": 0,
166+
"outputs": []
167+
}
168+
]
169+
}

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /