1
+ {
2
+ "nbformat" : 4 ,
3
+ "nbformat_minor" : 0 ,
4
+ "metadata" : {
5
+ "colab" : {
6
+ "name" : " Multiple Linear Regression" ,
7
+ "provenance" : []
8
+ },
9
+ "kernelspec" : {
10
+ "name" : " python3" ,
11
+ "display_name" : " Python 3"
12
+ }
13
+ },
14
+ "cells" : [
15
+ {
16
+ "cell_type" : " markdown" ,
17
+ "metadata" : {
18
+ "id" : " CazISR8X_HUG"
19
+ },
20
+ "source" : [
21
+ " # Multiple Linear Regression"
22
+ ]
23
+ },
24
+ {
25
+ "cell_type" : " markdown" ,
26
+ "metadata" : {
27
+ "id" : " pOyqYHTk_Q57"
28
+ },
29
+ "source" : [
30
+ " ## Importing the libraries"
31
+ ]
32
+ },
33
+ {
34
+ "cell_type" : " code" ,
35
+ "metadata" : {
36
+ "id" : " T_YHJjnD_Tja"
37
+ },
38
+ "source" : [
39
+ " import numpy as np\n " ,
40
+ " import matplotlib.pyplot as plt\n " ,
41
+ " import pandas as pd"
42
+ ],
43
+ "execution_count" : 2 ,
44
+ "outputs" : []
45
+ },
46
+ {
47
+ "cell_type" : " markdown" ,
48
+ "metadata" : {
49
+ "id" : " vgC61-ah_WIz"
50
+ },
51
+ "source" : [
52
+ " ## Importing the dataset"
53
+ ]
54
+ },
55
+ {
56
+ "cell_type" : " code" ,
57
+ "metadata" : {
58
+ "id" : " UrxyEKGn_ez7"
59
+ },
60
+ "source" : [
61
+ " dataset = pd.read_csv('50_Startups.csv')\n " ,
62
+ " X = dataset.iloc[:, :-1].values\n " ,
63
+ " y = dataset.iloc[:, -1].values"
64
+ ],
65
+ "execution_count" : 3 ,
66
+ "outputs" : []
67
+ },
68
+ {
69
+ "cell_type" : " code" ,
70
+ "metadata" : {
71
+ "id" : " GOB3QhV9B5kD" ,
72
+ "outputId" : " eecdc574-cedd-4140-985d-60b93e6a7efe" ,
73
+ "colab" : {
74
+ "base_uri" : " https://localhost:8080/" ,
75
+ "height" : 857
76
+ }
77
+ },
78
+ "source" : [
79
+ " print(X)"
80
+ ],
81
+ "execution_count" : 4 ,
82
+ "outputs" : [
83
+ {
84
+ "output_type" : " stream" ,
85
+ "text" : [
86
+ " [[165349.2 136897.8 471784.1 'New York']\n " ,
87
+ " [162597.7 151377.59 443898.53 'California']\n " ,
88
+ " [153441.51 101145.55 407934.54 'Florida']\n " ,
89
+ " [144372.41 118671.85 383199.62 'New York']\n " ,
90
+ " [142107.34 91391.77 366168.42 'Florida']\n " ,
91
+ " [131876.9 99814.71 362861.36 'New York']\n " ,
92
+ " [134615.46 147198.87 127716.82 'California']\n " ,
93
+ " [130298.13 145530.06 323876.68 'Florida']\n " ,
94
+ " [120542.52 148718.95 311613.29 'New York']\n " ,
95
+ " [123334.88 108679.17 304981.62 'California']\n " ,
96
+ " [101913.08 110594.11 229160.95 'Florida']\n " ,
97
+ " [100671.96 91790.61 249744.55 'California']\n " ,
98
+ " [93863.75 127320.38 249839.44 'Florida']\n " ,
99
+ " [91992.39 135495.07 252664.93 'California']\n " ,
100
+ " [119943.24 156547.42 256512.92 'Florida']\n " ,
101
+ " [114523.61 122616.84 261776.23 'New York']\n " ,
102
+ " [78013.11 121597.55 264346.06 'California']\n " ,
103
+ " [94657.16 145077.58 282574.31 'New York']\n " ,
104
+ " [91749.16 114175.79 294919.57 'Florida']\n " ,
105
+ " [86419.7 153514.11 0.0 'New York']\n " ,
106
+ " [76253.86 113867.3 298664.47 'California']\n " ,
107
+ " [78389.47 153773.43 299737.29 'New York']\n " ,
108
+ " [73994.56 122782.75 303319.26 'Florida']\n " ,
109
+ " [67532.53 105751.03 304768.73 'Florida']\n " ,
110
+ " [77044.01 99281.34 140574.81 'New York']\n " ,
111
+ " [64664.71 139553.16 137962.62 'California']\n " ,
112
+ " [75328.87 144135.98 134050.07 'Florida']\n " ,
113
+ " [72107.6 127864.55 353183.81 'New York']\n " ,
114
+ " [66051.52 182645.56 118148.2 'Florida']\n " ,
115
+ " [65605.48 153032.06 107138.38 'New York']\n " ,
116
+ " [61994.48 115641.28 91131.24 'Florida']\n " ,
117
+ " [61136.38 152701.92 88218.23 'New York']\n " ,
118
+ " [63408.86 129219.61 46085.25 'California']\n " ,
119
+ " [55493.95 103057.49 214634.81 'Florida']\n " ,
120
+ " [46426.07 157693.92 210797.67 'California']\n " ,
121
+ " [46014.02 85047.44 205517.64 'New York']\n " ,
122
+ " [28663.76 127056.21 201126.82 'Florida']\n " ,
123
+ " [44069.95 51283.14 197029.42 'California']\n " ,
124
+ " [20229.59 65947.93 185265.1 'New York']\n " ,
125
+ " [38558.51 82982.09 174999.3 'California']\n " ,
126
+ " [28754.33 118546.05 172795.67 'California']\n " ,
127
+ " [27892.92 84710.77 164470.71 'Florida']\n " ,
128
+ " [23640.93 96189.63 148001.11 'California']\n " ,
129
+ " [15505.73 127382.3 35534.17 'New York']\n " ,
130
+ " [22177.74 154806.14 28334.72 'California']\n " ,
131
+ " [1000.23 124153.04 1903.93 'New York']\n " ,
132
+ " [1315.46 115816.21 297114.46 'Florida']\n " ,
133
+ " [0.0 135426.92 0.0 'California']\n " ,
134
+ " [542.05 51743.15 0.0 'New York']\n " ,
135
+ " [0.0 116983.8 45173.06 'California']]\n "
136
+ ],
137
+ "name" : " stdout"
138
+ }
139
+ ]
140
+ },
141
+ {
142
+ "cell_type" : " markdown" ,
143
+ "metadata" : {
144
+ "id" : " VadrvE7s_lS9"
145
+ },
146
+ "source" : [
147
+ " ## Encoding categorical data"
148
+ ]
149
+ },
150
+ {
151
+ "cell_type" : " code" ,
152
+ "metadata" : {
153
+ "id" : " wV3fD1mbAvsh"
154
+ },
155
+ "source" : [
156
+ " from sklearn.compose import ColumnTransformer\n " ,
157
+ " from sklearn.preprocessing import OneHotEncoder\n " ,
158
+ " ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [3])], remainder='passthrough')\n " ,
159
+ " X = np.array(ct.fit_transform(X))"
160
+ ],
161
+ "execution_count" : 5 ,
162
+ "outputs" : []
163
+ },
164
+ {
165
+ "cell_type" : " code" ,
166
+ "metadata" : {
167
+ "id" : " 4ym3HdYeCGYG" ,
168
+ "outputId" : " 75d3a14b-9bbd-4e62-a40b-63a6eeafca2a" ,
169
+ "colab" : {
170
+ "base_uri" : " https://localhost:8080/" ,
171
+ "height" : 857
172
+ }
173
+ },
174
+ "source" : [
175
+ " print(X)"
176
+ ],
177
+ "execution_count" : 7 ,
178
+ "outputs" : [
179
+ {
180
+ "output_type" : " stream" ,
181
+ "text" : [
182
+ " [[0.0 0.0 1.0 165349.2 136897.8 471784.1]\n " ,
183
+ " [1.0 0.0 0.0 162597.7 151377.59 443898.53]\n " ,
184
+ " [0.0 1.0 0.0 153441.51 101145.55 407934.54]\n " ,
185
+ " [0.0 0.0 1.0 144372.41 118671.85 383199.62]\n " ,
186
+ " [0.0 1.0 0.0 142107.34 91391.77 366168.42]\n " ,
187
+ " [0.0 0.0 1.0 131876.9 99814.71 362861.36]\n " ,
188
+ " [1.0 0.0 0.0 134615.46 147198.87 127716.82]\n " ,
189
+ " [0.0 1.0 0.0 130298.13 145530.06 323876.68]\n " ,
190
+ " [0.0 0.0 1.0 120542.52 148718.95 311613.29]\n " ,
191
+ " [1.0 0.0 0.0 123334.88 108679.17 304981.62]\n " ,
192
+ " [0.0 1.0 0.0 101913.08 110594.11 229160.95]\n " ,
193
+ " [1.0 0.0 0.0 100671.96 91790.61 249744.55]\n " ,
194
+ " [0.0 1.0 0.0 93863.75 127320.38 249839.44]\n " ,
195
+ " [1.0 0.0 0.0 91992.39 135495.07 252664.93]\n " ,
196
+ " [0.0 1.0 0.0 119943.24 156547.42 256512.92]\n " ,
197
+ " [0.0 0.0 1.0 114523.61 122616.84 261776.23]\n " ,
198
+ " [1.0 0.0 0.0 78013.11 121597.55 264346.06]\n " ,
199
+ " [0.0 0.0 1.0 94657.16 145077.58 282574.31]\n " ,
200
+ " [0.0 1.0 0.0 91749.16 114175.79 294919.57]\n " ,
201
+ " [0.0 0.0 1.0 86419.7 153514.11 0.0]\n " ,
202
+ " [1.0 0.0 0.0 76253.86 113867.3 298664.47]\n " ,
203
+ " [0.0 0.0 1.0 78389.47 153773.43 299737.29]\n " ,
204
+ " [0.0 1.0 0.0 73994.56 122782.75 303319.26]\n " ,
205
+ " [0.0 1.0 0.0 67532.53 105751.03 304768.73]\n " ,
206
+ " [0.0 0.0 1.0 77044.01 99281.34 140574.81]\n " ,
207
+ " [1.0 0.0 0.0 64664.71 139553.16 137962.62]\n " ,
208
+ " [0.0 1.0 0.0 75328.87 144135.98 134050.07]\n " ,
209
+ " [0.0 0.0 1.0 72107.6 127864.55 353183.81]\n " ,
210
+ " [0.0 1.0 0.0 66051.52 182645.56 118148.2]\n " ,
211
+ " [0.0 0.0 1.0 65605.48 153032.06 107138.38]\n " ,
212
+ " [0.0 1.0 0.0 61994.48 115641.28 91131.24]\n " ,
213
+ " [0.0 0.0 1.0 61136.38 152701.92 88218.23]\n " ,
214
+ " [1.0 0.0 0.0 63408.86 129219.61 46085.25]\n " ,
215
+ " [0.0 1.0 0.0 55493.95 103057.49 214634.81]\n " ,
216
+ " [1.0 0.0 0.0 46426.07 157693.92 210797.67]\n " ,
217
+ " [0.0 0.0 1.0 46014.02 85047.44 205517.64]\n " ,
218
+ " [0.0 1.0 0.0 28663.76 127056.21 201126.82]\n " ,
219
+ " [1.0 0.0 0.0 44069.95 51283.14 197029.42]\n " ,
220
+ " [0.0 0.0 1.0 20229.59 65947.93 185265.1]\n " ,
221
+ " [1.0 0.0 0.0 38558.51 82982.09 174999.3]\n " ,
222
+ " [1.0 0.0 0.0 28754.33 118546.05 172795.67]\n " ,
223
+ " [0.0 1.0 0.0 27892.92 84710.77 164470.71]\n " ,
224
+ " [1.0 0.0 0.0 23640.93 96189.63 148001.11]\n " ,
225
+ " [0.0 0.0 1.0 15505.73 127382.3 35534.17]\n " ,
226
+ " [1.0 0.0 0.0 22177.74 154806.14 28334.72]\n " ,
227
+ " [0.0 0.0 1.0 1000.23 124153.04 1903.93]\n " ,
228
+ " [0.0 1.0 0.0 1315.46 115816.21 297114.46]\n " ,
229
+ " [1.0 0.0 0.0 0.0 135426.92 0.0]\n " ,
230
+ " [0.0 0.0 1.0 542.05 51743.15 0.0]\n " ,
231
+ " [1.0 0.0 0.0 0.0 116983.8 45173.06]]\n "
232
+ ],
233
+ "name" : " stdout"
234
+ }
235
+ ]
236
+ },
237
+ {
238
+ "cell_type" : " markdown" ,
239
+ "metadata" : {
240
+ "id" : " WemVnqgeA70k"
241
+ },
242
+ "source" : [
243
+ " ## Splitting the dataset into the Training set and Test set"
244
+ ]
245
+ },
246
+ {
247
+ "cell_type" : " code" ,
248
+ "metadata" : {
249
+ "id" : " Kb_v_ae-A-20"
250
+ },
251
+ "source" : [
252
+ " from sklearn.model_selection import train_test_split\n " ,
253
+ " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)"
254
+ ],
255
+ "execution_count" : 8 ,
256
+ "outputs" : []
257
+ },
258
+ {
259
+ "cell_type" : " markdown" ,
260
+ "metadata" : {
261
+ "id" : " k-McZVsQBINc"
262
+ },
263
+ "source" : [
264
+ " ## Training the Multiple Linear Regression model on the Training set"
265
+ ]
266
+ },
267
+ {
268
+ "cell_type" : " code" ,
269
+ "metadata" : {
270
+ "id" : " ywPjx0L1BMiD" ,
271
+ "outputId" : " 721b8613-3990-468d-c2f0-c828fb4f3b7a" ,
272
+ "colab" : {
273
+ "base_uri" : " https://localhost:8080/" ,
274
+ "height" : 34
275
+ }
276
+ },
277
+ "source" : [
278
+ " from sklearn.linear_model import LinearRegression\n " ,
279
+ " regressor = LinearRegression()\n " ,
280
+ " regressor.fit(X_train, y_train)"
281
+ ],
282
+ "execution_count" : 9 ,
283
+ "outputs" : [
284
+ {
285
+ "output_type" : " execute_result" ,
286
+ "data" : {
287
+ "text/plain" : [
288
+ " LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
289
+ ]
290
+ },
291
+ "metadata" : {
292
+ "tags" : []
293
+ },
294
+ "execution_count" : 9
295
+ }
296
+ ]
297
+ },
298
+ {
299
+ "cell_type" : " markdown" ,
300
+ "metadata" : {
301
+ "id" : " xNkXL1YQBiBT"
302
+ },
303
+ "source" : [
304
+ " ## Predicting the Test set results"
305
+ ]
306
+ },
307
+ {
308
+ "cell_type" : " code" ,
309
+ "metadata" : {
310
+ "id" : " TQKmwvtdBkyb" ,
311
+ "outputId" : " 71d3e9ba-a6ef-4e16-9805-664f2a1b777e" ,
312
+ "colab" : {
313
+ "base_uri" : " https://localhost:8080/" ,
314
+ "height" : 185
315
+ }
316
+ },
317
+ "source" : [
318
+ " y_pred = regressor.predict(X_test)\n " ,
319
+ " np.set_printoptions(precision=2)\n " ,
320
+ " print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))"
321
+ ],
322
+ "execution_count" : 10 ,
323
+ "outputs" : [
324
+ {
325
+ "output_type" : " stream" ,
326
+ "text" : [
327
+ " [[103015.2 103282.38]\n " ,
328
+ " [132582.28 144259.4 ]\n " ,
329
+ " [132447.74 146121.95]\n " ,
330
+ " [ 71976.1 77798.83]\n " ,
331
+ " [178537.48 191050.39]\n " ,
332
+ " [116161.24 105008.31]\n " ,
333
+ " [ 67851.69 81229.06]\n " ,
334
+ " [ 98791.73 97483.56]\n " ,
335
+ " [113969.44 110352.25]\n " ,
336
+ " [167921.07 166187.94]]\n "
337
+ ],
338
+ "name" : " stdout"
339
+ }
340
+ ]
341
+ }
342
+ ]
343
+ }
0 commit comments