diff --git a/xinda/lesson10/intro_to_ML_revised.ipynb b/xinda/lesson10/intro_to_ML_revised.ipynb new file mode 100644 index 0000000..f3349e7 --- /dev/null +++ b/xinda/lesson10/intro_to_ML_revised.ipynb @@ -0,0 +1,531 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 机器学习\n", + "* 机器学习(Machine Learning)是人工智能的分支,其目标是通过算法从现有的数据中建立模型(学习)来解决问题。\n", + "* 机器学习是一门交叉学科,涉及概率统计(probability and statistics),优化(optimization),和计算机编程(computer programming)等等。\n", + "* 用途极为广泛:从预测信用卡违约风险,癌症病人五年生存概率到汽车无人驾驶,都有着机器学习的身影。\n", + "* 备受重视:人们在决策分析的时候越来越多得用定量方法(quantitative approach)来衡量一个决策的优劣。" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 机器学习的主要任务\n", + "## 监督学习\n", + "* 监督学习(Supervised Learning):从给定的训练数据集中学习出一个函数,当新的数据到来时,可以根据这个函数预测结果。监督学习的训练集(training data)要求是包括输入和输出,也可以说是特征和目标。\n", + "* 监督学习中又可进一步分为两大类主要问题:预测与分类。房价预测是一个典型的预测问题,房价作为目标是一个连续型变量。信用卡违约预测是一个典型的分类问题,是否违约作为一个目标是一个分类变量。\n", + " \n", + "## 无监督学习\n", + "* 无监督学习(Unsupervised Learning):训练集没有人为标注的结果。我们从输入数据本身探索规律。\n", + "* 无监督学习的例子包括图片聚类分析,文章主题分类,基因序列分析,和高纬数据(high dimensional data) 降维等等。" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 案例分析:波士顿地区房价\n", + "* 房价分析与预测是典型的监督学习中的预测问题。我们将使用boston房价为例子来引入监督学习的概念。\n", + "* 数据来源: Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the demand for clean air', J. Environ. Economics & Management, vol.5, 81-102, 1978.\n", + "* 我们使用这个案例来展示在python中使用机器学习的一般流程,让大家对机器学习有一个直观感受。撇开艰涩的理论,在应用层面对有python基础的人来说,使用机器学习是水到渠成的。\n", + "\n", + "### 数据描述\n", + "* 目标:自住房的中间价,以1000美元计价。\n", + "* 特征:13个特征,包括犯罪率,每个镇(town)非商用面积百分比,平均每个房子的房间数,房产税率,黑人比例,里波士顿工作中心的加权距离,等等。\n", + "\n", + "* CRIM: per capita crime rate by town 犯罪率\n", + "* ZN: proportion of residential land zoned for lots over 25,000 sq.ft. 后院面积大于25000平方英尺的比例\n", + "* INDUS: proportion of non-retail business acres per town \n", + "* CHAS: Charles River dummy variable (= 1 if tract bounds river; 0 otherwise) 是否靠近查尔斯河\n", + "* NOX: nitric oxides concentration (parts per 10 million) nitric oxides浓度\n", + "* RM: average number of rooms per dwelling 每间住宅的平均房间数\n", + "* AGE: proportion of owner-occupied units built prior to 1940 1940年前建造的自住房房屋比例\n", + "* DIS: weighted distances to five Boston employment centres 离波士顿五个工作中心的加权距离\n", + "* RAD: index of accessibility to radial highways 高速公路可得性指数\n", + "* TAX: full-value property-tax rate per \\1000ドル 每1000美元地税\n", + "* PTRATIO: pupil-teacher ratio by town 学生教师比\n", + "* B: 1000ドル(Bk - 0.63)^2$ where Bk is the proportion of blacks by town \n", + "* LSTAT: \\% lower status of the population 下层经济阶层百分比\n", + "* MEDV: Median value of owner-occupied homes in 1000ドル's 自住房中间价格" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# sklearn是机器学习模型的主要工具\n", + "import matplotlib.pyplot as plt #作图\n", + "from sklearn import datasets #数据\n", + "from sklearn.feature_selection import SelectKBest, f_regression #特征选取\n", + "from sklearn.linear_model import LinearRegression #线性回归\n", + "from sklearn import metrics #线性回归\n", + "import numpy as np\n", + "import pandas as pd\n", + "%matplotlib inline \n", + " #!/usr/bin/env python -W ignore::DeprecationWarning\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 首选,我们读取数据。\n", + "### 房价预测是典型的监督学习,所以我们的训练集(training data)要求特征和目标。" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(506, 13)\n", + "(506,)\n" + ] + } + ], + "source": [ + "#读取波士顿房价数据\n", + "boston_dataset = datasets.load_boston()\n", + "X_full = boston_dataset.data #导入特征\n", + "Y = boston_dataset.target #导入目标: 房屋中间价 1000美元计价。\n", + "# X_full是一个ndarray\n", + "print X_full.shape #shape method读取数据尺寸,我们有506个数据点和13个变量\n", + "print Y.shape #目标一般都是一维变量。Y是一个ndarray。" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',\n", + " 'TAX', 'PTRATIO', 'B', 'LSTAT'], \n", + " dtype='|S7')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# feature_names列举了所有特征名字,上方有其对应的描述\n", + "boston_dataset.feature_names" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# 把数据分为训练集合测试集\n", + "# 为了测试模型的表现,我们将数据分为70%的训练集和30%的测试集\n", + "# 使用train_test_split函数可以帮助我们随机选取训练集和测试集\n", + "from sklearn.cross_validation import train_test_split\n", + "X_train, X_test, Y_train, Y_test = train_test_split(X_full, Y, test_size=0.3, random_state=0) " + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# 让我们从最简单的形式开始,我们从这13个特征里只选择一个建立一个线性模型。\n", + "# 我们选择最后一个特征LSTAT (下层经济阶层百分比)\n", + "# 使用reshape将X转为为二维数组\n", + "X = X_train[:, 12].reshape((-1,1)) #X:LSTAT " + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEKCAYAAAAfGVI8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJztnX+QJGd53z/PjHY4zZ6wdHMKkY13FlKEhGAXoDOxy4RQ\nwTJYxuCQKsXyUjmDyUZjx8Gm7Fj4KgYnuQoI7GCooORsK16YjWxjI6NQMg4/RByHnyuQERgLObB7\ngQCCFfh0PlySb9/8Md17vbP94+3p7umeme+n6q2d6e1++53enef7vs/zvO9rzjmEEEIsLq26GyCE\nEKJeJARCCLHgSAiEEGLBkRAIIcSCIyEQQogFR0IghBALjoRACCEWHAmBEEIsOBICIYRYcC6ruwE+\nHD9+3K2urtbdDCGEmCnuueeerznnrs46byaEYHV1la2trbqbIYQQM4WZ7ficJ9eQEEIsOBICIYRY\ncCQEQgix4EgIhBBiwZEQCCHEglOpEJjZtpndZ2b3mtlWcOyYmb3HzB4Ifl5Vxb03NzdZXV2l1Wqx\nurrK5uZm4nlHjx7FzA6VK6644sDvjh8/vl9PUv2+9y37cwghxMQ45yorwDZwfOzYLcDNweubgddl\n1XPttde6PAyHQ9ftdh2wX7rdrhsOh4fOa7VaB87LKp1Oxw0Gg9j6k46P37fszyGEEHEAW87DVpur\ncKtKM9sGTjjnvhY5dj/wHOfcl8zsGuADzrknp9Vz4sQJl2cewerqKjs7h9Nn+/0+29vbmedl0W63\nuXjxovfx8fv64vs5hBAiDjO7xzl3IvO8ioXg88BfABeB/+KcO2Nm33DOXRn83oCvh+/Hrl0H1gFW\nVlauzWOwW60WcZ/LzNjb28s8r2zG7+uL7+cQQog4fIWg6mDxs5xzTwN+APhJM3t29JfB0CXWEjvn\nzjjnTjjnTlx9deYM6QOsrKx4HU86L4t2u53r+KT38f0cQghRhEqFwDn3xeDng8AdwDOBrwQuIYKf\nD5Z939OnT9Ptdg8c63a7nD59+tB5rVa+R9DpdFhfX4+tP+n4+H198f0cQghRCJ9AwiQFWAauiLz+\nIPB84PUcDBbfklVX3mCxc6NAa7/fd2bm+v1+YoB1OBy65eXl2MDw0aNHD/yu1+vt15NUf97jZX0O\nIYQYB89gcZVC8ETgT4LyaeBUcLwHvA94AHgvcCyrrkmEoCzKMMTK/hFC1IGvEFQaLC6LvFlDZbG5\nucn6+joXLlzYP9btdjlz5gxra2ve9Sj7RwhRB43IGiqLuoSgLAOu7B8hRB00JWtopjl79myu40ko\n+0cI0WQkBCmUZcCV/SOEaDISghTKMuBra2ucOXOGfr+PmdHv93PHGYQQoioUI8hgc3OTU6dOcfbs\nWVZWVjh9+rQMuBBiJlCwWAghFhwFi0tES0ELIeaZy+puQNMZn0uws7PD+vo6gFxEQoi5QCOCDE6d\nOnVgQhnAhQsXOHXqVE0tEkKIcpEQkO76KWsugRBCNJWFF4LQ9bOzs4Nzbt/1E4qBJoMJIeadhReC\nLNfP9ddfz2j/nEtoMpgQYp5YeCFIc/1sbm6ysbFxYJ0gM+PkyZMKFAsh5oaFF4I010/caME5x113\n3TWNpgkhxFRYeCFIW0ZCgWIhxCKw8EKQtg6QAsVCiEVg4YUARmKwvb3N3t4e29vb+/5/rRoqhFgE\nJAQpaNVQIcQiICEISJpUljRaEEKIeUFCQPykspe97GUcP35cC80JIeYeLTpH/KSyRx55hN3dXUAL\nzQkh5huNCPBLB9VCc0KIeUVCgH866DTnD2gPBCHEtJAQgHc66LTmD2QthCeEEGUiIWDk9+/1eqnn\nTHP+gPZAEEJMEwlBwA033HBoldHw/bTnD2hpCyHENJEQQOIqozfddBPOuanPH9DSFkKIaSIhIN4V\nU+cqo1raQggxTRZSCMYzcnZ2dmLPq8sVo6UthBDTxKLukKZy4sQJt7W1VUpdYUZOdARgZsQ9h36/\nz/b2din3FUKIaWNm9zjnTmSdt3AjgiQ3kLajFEIsKgsnBEnuHuecXDFCiIVk4dYaWllZiY0JyA0k\nhFhUFm5EoIwcIYQ4yMIJgTJyhBDiIJVnDZlZG9gCvuice4GZHQN+G1gFtoEbnHNfT6ujzKwhIYRY\nFJqUNfQK4DOR9zcD73POPQl4X/BepKCVSIUQVVKpEJjZ44EfBH49cvhFwEbwegP44SrbMOtoJVIh\nRNVUPSJ4I/Cvgb3Iscc5574UvP4y8Li4C81s3cy2zGzrq1/9asXNbC5aiVQIUTWVCYGZvQB40Dl3\nT9I5bhSgiA1SOOfOOOdOOOdOXH311VU1s/FoJVIhRNVUOSL4XuCFZrYN/Bbwj8xsCHzFzK4BCH4+\nWGEbZh6tRCqEqJrKhMA59yrn3OOdc6vAjwDvd869BLgTOBmcdhJ4Z1VtmAc070EIUTV1zCN4LXCd\nmT0AfF/wXiSgeQ9CiKpZuNVHhRBiUWjSPIK5R3n+QohZJlMIzOxxZvYbZvYHwfunmNmPV9+02UB5\n/kKIWcdnRPCbwB8C3xq8/yzw01U1qC4m7dUrz18IMev4CMFx59zvEEwKc879NXCx0lZNmbhe/Utf\n+lKOHz+eKQzK8xdCzDo+QvCXZtYjmPhlZt8N/EWlrZoycb36Rx99lN3d3Ux3T1I+f6vVkntICDET\n+AjBKxnl/v8tM/vfwFuBn6q0VVPGp/ee5O6Jy/MHuHjxomIFQoiZwCt91MwuA54MGHC/c+7RqhsW\nper00dXV1dhdy8YxM/b29g4d39zc5OTJk1y8eNhjpp3PhBB1UVr6qJn9M+BHgWuBZwA3Bsfmgs3N\nTc6fP+91bpIbaG1tLVYgoJpYgdJVhRBl4uMa+q5I+QfAa4AXVtimqREGiXd3dw8cX15eptPpHDiW\ntaxDGWsC+Rj4SdJVJRxCiFScc7kKcCXw7rzXFSnXXnutq4J+vx+ufnqg9Pt9NxwOXb/fd2a2/z6N\n4XDout3ugXq63W7mdXmuHw6Hrt1uJ7a5inYJIWYXYMv52HWfkw5cAEuM4gQzLwRmFmtUzWyi+vKK\nR5Q0UQrrHjfoPm3OqlcIMb/4CoFPjOC/m9mdQXkXcD9wR/6xR/Mo6s4Zd7kAbG9vs7e3x/b2dq6F\n4bLmI8SluPq0WfMchBBZ+MQI3gD8clD+A/Bs59xc7DNcZInnspaWCMXEJWRvhQY+zXCntVn7GQgh\nMvEZNtRdqnINOTe5OyfJ5dJutwvFBaIl6suf9H6KEQixuFA0RgA8DJyLKQ8D53wqL6sUEYIifvu0\nepKMd2hoB4NB5n3T6hm/pohBL+sZCCFmi8JC0KQyqRCU1RvO6rnHlfFAdNx98warZdCFEHnwFQLv\njWnM7G8ARyIupalFGyedWZw0YzjvbF/fmcdZjN/3+PHjh+YwAPR6Pb72ta8Vvp8QYrEpc2bxC4Nt\nJT8P/E9gG/iDwi2cAmVlzJSVYaNMHSFEE/HJGvp3wHcDn3XOPQF4LvDhSltVEmVlzOQ938y86nno\noYdiz0s6XgTNLhZCJOEjBI8653aBlpm1nHN3A5lDjSZQJD00q56lpaVDy1DAyK1z0003ed13Wqmd\n2kVNCJFKVhABeC9wFHgzcDvwq8AHfQIQZZWqs4YmPSftOt86p5HaqdnFQiwmlJU1BCwDbeAy4CTw\nr4CeT+VllarnEfga4ypSUXu9nuv1epVmApW9lIYQYjYoLATAfwK+16eSqkuVQuDbW64yFTVvPXkF\nSSMCIRaTMoTgFcCHGGUJ3QI83afCKkqVQuDbW57EmMYZ7KJGeRIh0exiIRaTMl1DfeDngU8Afwa8\nGvjbPpWXVZowIphk8te48U2qI4+bZlIh0WQ0IRYPXyHIzBpyzu04517nnHs6cCPww8Bnsq6bFZIy\ni66//voD6ZbHjh2LvT4pwydutdDR3yUen0yhzc3NxIltWXMU1tbWJl4ZVQgx3/hMKLvMzH7IzDYZ\nTSS7H3hx5S2bEmtra5w8eZJ2uw1Au93me77ne9jY2DiQbnnu3Llcu5alGebxeQadTofz58977UyW\nhFYTFUJMTNJQAbgOuA34MnAno32Ll32GGWWXaWcNJblwer2et3sla2G6aNbQ0tJSpv8+rT75+4UQ\ncVBCsPj9wMuBq3wqqrLUESNIM+A+21b2er3MOrJWH42SFl+QCAgh4igsBE0qdWQNpZVoD3w8CDsY\nDDL3GMg6JxyVRFEKqBAiL75C4LPExFyT5FtPWi8I4MKFC5w6dSp26YZbb701cUvJfr/PmTNnuOuu\nu1K3nYxrV1nLZQghxCF81KLuUsfM4nBjGVJ67HncStEeftYopOqZzXmfj9JOhZhNKNM1xGguwfcF\nry8HrvC5rqxSpRA4l27skox9u93OHVvIqjM8rynGtu6JaBIhIYpRmhAA/xz4GPB/gvdPAt7nU3lZ\npWohSGOS3cmyevh1G1hf6oxLzMozEqLJlCkE9wId4BORY/d5XHcE+CjwJ8CngV8Kjh8D3gM8EPzM\nzEqqUwicO9gzzTsSSNpcvuzebhW95zoXq1NwXIjilCkEHwl+fiL4eRnwSY/rDDgavF4CPsJog5tb\ngJuD4zcDr8uqq24hiJK1TESRHmySMc8y8lX1nus0xloxVYjilCkEtwC/wGidoeuAO4DTPpVH6ugC\nHwf+PqOZydcEx68B7s+6vklCkGQczWw/wDxJr3w4HB6aWLa0tBSbajpu5Ksy2HEC0+l0Kl822zmN\nCIQogzKFoMUoTvB24HeD1+ZV+Wgfg3uB82HPH/hG5PcWfZ9U6hCCtN55Um+1iJFKmoDmc68qe8/j\neyf4zIJOq8NXPBQjEKI4pQlBGQW4ErgbeOq44Qe+nnDNOrAFbK2srFT1nGLJMkJprqFJSarT517T\n6j1Pcp8iBl1ZQ0IUo8wRwQsYLUH9EHAOeBg451P5WD2/CPwsM+AayjJ4Sb33IoY3rxC02+3U2cxV\n9J4nGXnIxSNEffgKgc/M4jcy2qKy55x7rHPuCufcY7MuMrOrzezK4PXljOILf8ZoAbuTwWkngXd6\ntGGqJK0cevbsWTY3Nzl37tyh33U6nUKzfHu9Xuzxo0ePHppRDHDx4kWcG81m3tjY4OTJk/T7fcxs\nfwZz2UtNJ83CTlv5NO1ZCiEaQpZSMHLptHxUZey672Q0kvgk8CngF4PjPeB9jNJH3wscy6qrSSOC\npN/1er3E+uLWIxp3eQyHQ9fpdA7U2el09n+Xlb7q08Mu6mqZxM2jEYEQ9UGJrqHvAt4NvAp4ZVh8\nKi+rTFMIklYODQ1eGTuVjZelpaX9e4aGPslQZ90/Lchd1p7LecREQV8h6qNMIfgfwDuAX2K0TeWr\ngVf7VF5WmZYQJBntXq+3b7jy9nCTzk8raaubpsUn0oxunT1zBX2FqIcyheBTPhVVWaYlBD7GMm0j\nm6ibJzR8eUUgy7B3Op3EFM609muClhCLR5lCcAvw/T6VVVWmJQS+xjJqdMeviTPUk5S01U2TdkpL\na7989UIsHr5C4JM1NADebWZ/ZWYPB+Vw2swc4JsVE24E3+/3Q7Hc55FHHuHRRx8t3JZWq5W4Uf1D\nDz0UuxF9Wvvj9jMAOH/+fOweyUKIxSFTCNwoXbTlnDsSvPZKH51F8m7+kjcFMkztHAwG+6mevV6P\nTqdz6NyLFy8m1pNk8NPav7a2xpkzZw6lqe7u7rK+vi4xEGKR8Rk2AC8E3hCUF/hcU2aZdtaQb2Az\nTyA4aRXS8XtmrW6alXGT1X65iIRYHPB0DZkbc22MY2avZZRCGnYZbwwqf1UJOuTFiRMn3NbW1rRu\n5024VWV028lOp4NzLtY91O12Myd6tVqtQ+6mEDPjpptu4i1vecvEbU6q38zY29ubuF4hRPMws3uc\ncyeyzvOJEVwPXOecu805dxvwfOAHizZwHohzt1xxxRW8/OUvp91uHzr/woULvOQlL2F1dXXfFbO5\nucnq6iqtVovV1VWOHTuWeD/nHLfeeuuB6/MyyexgIcR847t5/ZWR199SRUNmmW9+85v7r3d3d9nY\n2Ej18e/s7LC+vs5P/MRPsL6+zs7ODs6Nlos4d+5cbMwg7vpJxCBvHKRqxoVQsQohaiDLd8TIFbQD\n/CawAXwe+Kc+fqeySpP2Ixgnyefus5NZ0jm9Xs/r+kn9+k2Z4KVZx0JUC2XFCADM7BpGcQKAjzrn\nvjyh7kxEU2MEkO7T73a7B+IHvpgZb3vb2w7FH+LOm2W//urqamyKbL/fZ3t7e/oNEmLOKBwjMLNn\nhIXRctFfCMq3BscEyb71cAXQfr+feG1cHCGsM4w/pF0/6359rUwqRDNIixH8cqR8gFHqaPj+DZW3\nbEbIyt3f3t5mOBzGnrO+vp7qr8+6vi6/flkocC1EQ/DxHxFsXF9XaXKMwDm/zeWji8VFF7EbDAb7\n8YB2u+0Gg8FE95hFFCMQolooc6tK4OM+51VVmi4EafgsQx0taZvDTyoGg8HAtVqtA2sPQfJS19Nk\nHgVOiKYgIShIWQZqkmWooyW6V8H4onI+vefBYJBaf5NEQQhRLoWFAHgz8KagPBh5/SbgTT6Vl1Wm\nLQSTuizixKPIUtQ+JSuFNDoSyCpyywgxX/gKQWL6qJmdjP1FgHNuI+33ZTLt9NFJ0hrjlpvodrtc\nfvnl7O7uVtXUzBRSM8tVn1I3hZgffNNHveYR1M20hWCS9XiOHz8ea/DNLLaussgy3HmFYNbnJggh\nLlHmWkMLR960xs3NzcRef5UiYGapKaSTLNcQ/Yw+yz9oiQgh5gAf/1HdpekxgqIB4SIlz2fIKuN7\nJWc9A6V/iqpQNlk5UGbWUN2l6VlDVQeEk0rYrrh2+ohT0paXaddHg9Pa20BUgToY5VGaEABXA78A\nnAFuC4tP5WWVps8jqGNE0O123WAwSPzC+IpT2PZwUltWtlN0/2bfPZ6jpAls9He9Xi9xPoWYb9TB\nKI8yheCDwOuAG4B/EhafyssqTReCSdwwRUo4AzlphdJ+v+8lTkmGvNvtHpgJnfRlzPuFTevpZT1D\n9QgXh0k6GCKeMoXgXp+KqixNFwLnRkbOZ+noskqa0TSzTMOaNWLo9XqHrg+vCT9nr9dznU7H22Cn\nCYePcKlHuBhoRFAevkLgkzX0LjO73uO8hefKK6/MPqkk0panHl+91Mzo9XoHNrwZ/Y8ks7u7e2D1\n02gabLjpzu7uLs45er0eZra/4iqM0mnNDDPj+PHjbG5upq426rPiaNo5ZWQvKQOqGTRt86SFIEsp\ngIeBPeCbwLng/TkflSmrNH1EMG3XUFpZWlqK7ZFnLTUxXtrt9v5ny5qdHO2pDYfDQ6OEsL40d1OR\nEUEZwUUFKJuFsobKAWUNTY8600fHS6fTiV39dJK6koz6eIn6btOexdGjR1NjBGn3mtTlVPRvKHeE\nmGVKFQLgKuCZwLPD4nNdWaXpQlBX+mhSGe+hTzJa8e2lR8/3yVZK6ukNh0O3tLSUWnfe558nuKgA\npZhHShMC4OXAfcDXgbsZuYje71N5WaXpQlDFiCA0QEWuLdK2wWCQW+DSso3SDHtaoN2nR15Gb94n\nS6oq5AYRVVGmENwHHCHIHgL+DvAOn8rLKk0XgipiBKEBmsSQR43XpKOVcLLZJNf5ioZP2mgobGkG\nsqh/P2k0EudmKxvFJkSVlCkEHwt+3gs8Jnj9aZ/KyypNFwLnLvXqyhCBrKUefK91Ll1IsgLIg8Eg\n1m+f5MIJjbZvYDqv+ynNQBbpVSe1odfrFfunKHBvxSZEGZQpBHcAVwKvAf4IeCdwl0/lZZVZEIKQ\nNMPm0zuPbmMZ4hvsPXLkyIHZuIPBILGH3ul0Mg12OHs5bpvNpHpD4+nTZjPLPWKpwkDWGR9QbEJU\nSWlCcOBk+IfAC4FOnuuKllkSgqQefK/Xc4PB4ECvNfo+a0mFrJ7zkSNHchnUooZ3eXk59vzl5eX9\nc3zqz7NxTlUGss5euUYEokoKCwHw2ODnsbjiU3lZZZaEwLnDm9VHe9hxLgsfP3HeeQBlljh8zi9y\nzySBqMJAxqWuTiM+EN5bMQJRFWUIwbuCn58HPhf8DMvnMiuGb2eUZfSnwKeBV7hLwvIe4IHg51VZ\ndc2aEDiXr6eXdW6dE9bCiWUhPrGQEN/AcVKZloGMCxYnTcyrAmUNiaooLARFC3AN8Izg9RXAZ4Gn\nALcANwfHbwZel1XXLApBHt9v1rl1T1hzLnmUkyRgw+Gw8Chm3JXmYyAnMaqz6p6RgIgsCgsB8Iy0\n4lP5WH3vBK4D7geucZfE4v6sa2dRCMocEdQ5YS00MHlHJEtLS16zktOKb9ZOdJQy/qx8RhF1BGyL\nGnG5lIQPZQjB3UH5EPAosAXcE7z+kE/lkbpWgbPAY4FvRI5b9H1SmUUhyPNFzTq3rhFB2IY6RyST\nPOe8gpImxFX0ussw4rM6ihHTpbAQuEvG+h3Ad0TePxX4XZ/Kg/OPBgLy4uD9N8Z+//WE69YD8dla\nWVmp8llVRh4jkrVhS5yxi5uNG2YfFTXA0TbUOSLJel6+9WQ9+zjDnLbxTxHKMOJZoxi5jYRz5QrB\nocljcccSrl0C/hB4ZeTYQriGyiZuyYdOpxObchpn2PIY88FgcODedY0IonMSxtNu87qqsnr3UWEJ\nBbbIshdppP0tfMkaxchtJJwrVwhuB34deE5Qfg243eM6A94KvHHs+Os5GCy+JasuCcFku4GNG8+8\nBjha17SzltrtduISFJOOULIyg3w/Z9HYQdLfMtxQyIc0Yy+3kQgpUwiOAD/DaIbxHcHrIx7XPSv4\nB/wko+Up7gWuB3rA+xilj74XjzkJEoLiAc287qJxJl3KepLSarVKj48kPb+o6Pneq6hBTVulNU/d\nSSMczVYWIaUJwaguLgee7HNuFUVCUNyvnNdwxvVMp+EiGndhlBGfyKojz73KcrGktbUoGhGIEF8h\nyNyq0sxeyKg3/+7g/dPM7M6s60S5xG3fB3D+/PlKtlR82ctetl9vuIXjzs5O7LmXXXYZvV5v4nuZ\nGcD+Vpdra2v7vzt27NjE9YaMvg/ZrKysxB5vt9sHtuKMtm9S+sEWoL5tyIO2ehS5yVIKRhk/3wJ8\nInLsPh+VKatoRDAi79IVUSbJJMozh2DS+QZZGS1lZECllahraJpB1qrvpayh6dD050yJMYIPBz+j\nQvBJn8rLKhKCS0w67E/bASyphP/cvueG9/E5P8voTWv+wnjWVfS+0fWO4laFLUrTjUgVzNNnnoXs\nrDKF4DeAH2UU9H0S8GbgP/tUXlaREFyiSCBw/Evo01v39dFH0zN9z01rZ17RCktSymcecapzo5p5\nZhYMZx5mIRZTphB0gdPAxxhN8DqNR9ZQmUVCcIky//myjHa41o+PAfXN7a/KjTVuyCdNd+1nbJbT\npC952VTdW58Fw5mHWcjOKk0ImlAkBJcos1eVZTCT/P5LS0uHXCp53DhZX/xJDfj4nIBJ6snaLGf8\nSz4vro5p9NZnwXDmYRaErbAQAHemFZ/KyyoSgoOUaXzSDGae5QrypHkmffEniQukGatJYgxZI4Jw\nolvY3nlxdUzDqM2C4czDLPz9yxCCrwIfB34OeDaj3cn2i0/lZRUJQbWU8QUtOiKYxJ0TNcpxpE3c\n6vV6iV/irBhFuC/zPBm2afTWZ8Fw5qXpI8IyhKANPB/YAD4B/Hvg7/lUWnaREFRLGV/QuDo6nc4h\ng5pUb97ee9pyDNEv59GjRxNHEVlrD6XFKrJGQE02DnFMS9SabjjnjcJCcOAkeAzwY8Eo4V/6XFNm\nkRBUj88XNG79orT3WcY2yiQziJM+R5wghTGN5eXl/bTQdrt9aIG9PO1KylCaZE+EupnH3rooSQgC\nAXgx8HZGWUP/Bvg2n4rLLBKC+vFx3RQxHEk90rx7F6f1bJMW3ksTg6yRiu+CeLPgLlJvff4oLASM\nVg79OCOX0FN9KquqSAjqx9d1M6nBGw4PbyAfCsH48TTBSWtbkqiM78s83q404+47N2NWM2OiZAmF\nhKR5lCEEe8DDQTkXKQ8D53wqL6tICOrH13VTxOAl+eR7vZ6XgUkz2lkljbiRRNLksry+9lkxnlmu\nI7mWmklhIWhSkRDUj++IwHef4TiKZq5Mki4aHREkGeU4IRjfyyAkj0GcJeOZJXDzlEE1T0gIRKn4\npncmGciwjrTeb1FjMokIwChGkGSU0zb0KdrLnyXjmSXS8zZZbF6QEIjSGTdwy8vL3obMp/dbpIc8\nqVvoKU95inMuf7C6DCM3S8ZTI4LZREIgKsfHkGXNFh43FJP6zCd1C4XzESYRkaJtnyXjOe0YwazE\nTpqOhEBUTpYh83EnFen9Ro1F1j2yDPokQlJ0NDNLMQLnppc1lPe51CkaTRcsCYGonKwvrI9xnbT3\n6xuzCIPXWamdeZe4WF5ePtCWpMllWZ+vakPSdEMVR56RUp1iOgtCLiEQUyHN0GT1xPN8acbvk2ep\n6n4wmSxrslfaOdESTR3NEpCq/f1Zy2TUaagmFaE8sZM63WtZLs8mCIKEQNROWV+UvL31JNF57nOf\nm7r8g+8IJtrurGuSerHTcKHUaSSLiFCedtcZcC+zo1MVEgJRO2X1SCcNBI+X0H0z/jM0xj6xhHHS\nromucRTeI0nUWq1W5rpHvs8lbGedRrKICOX5v2nqiGCa7UhDQiAaQRm930lnC+cp3W431d2UdyZx\nq9WKXXk1y6WVRwzS6klr2zSMU1K7fEXI9/+maTGCOkQ3DQmBmBuSDFp06Yler1dov+KwvqQv9vhE\nuWhabJy7adLtNtPWPRon6fNGZ0rXYSSz1meq4n51Zw0l/T01IpAQiJLwMWhluI/C7KEkAxsKT5zx\nD9/7upmyevNxzyAqellCE3fdtIxk0t8ibQ+JWafuwHwSEgIxV2QZtDLcR1m+dd/rnUsfxaS5E+JG\nBHmD5XX3QtOe3zzTxFRdCYFYKLJGBHkyPIqMLkLSeojD4TBxeY64GEGe9jShF1pnbEIcREIgForh\ncOjVU07bTS1a1yTpquO9+cFgcCBDadzIZ/0+xHeEkjclt6rea1PdJIuIhEDMFT6GK81vHl7ja/wm\nncAWvb5MQzEDAAAKPElEQVQsY+gzImi3295bhE7DUDfRTbKISAjE3OBruIbD4aGUTRilfg4Gg0LG\nz2eU4BMjGDfYZd17/PPGpa42YaKZmC4SAjE35DFcw+HwQO+91+ulpvjlMX5ZKaN5lteYRIjGs4bM\nLFfKbFYwvO6cd1E+EgIxN5RhuMo2flmuD98Ab9FeeJ4Mp/CzakSwOPgKQQshGsbm5iarq6u0Wi1W\nV1c5duxY7HkrKyvedSadm6eOKGtra2xvb7O3t8f29jZra2sHfn/69Gm63W5mPTs7O2xubk7UBpjs\nGcS1rdvtcvr06YnbIWYcH7Wou2hEsDjE+cOXlpZcp9MpFNysI5MlOmpIc+EU3cBlki1EFcxdDJBr\nSMwiaROxihqupPjBNMgy2ONumbwZTlmxgnBfBrFY1C4EwG3Ag8CnIseOAe8BHgh+XuVTl4Rgcagy\nkFl3fnvaXIdw+QXfYLTPZyv7+TURjWzSaYIQPBt4xpgQ3ALcHLy+GXidT10SgsWhykBmE4Kkky49\n4dPOtJHBPAaC6xb2WaB2IRi1gdUxIbgfuCZ4fQ1wv089EoLFocovdxPSJpM+n8+ENZ92LpJxbIKw\nN52mCsE3Iq8t+j6tSAgWi6qG+1mGY1puhrj7+KSB+hq4qItpfPOdeaIJwt50Gi8Ewfuvp1y7DmwB\nWysrKxU9JrFIZC0EV2dPOmvewSxkSU0bjQiyaaoQyDUkaiWp11+3UYkz3ON7HOSh7s8zDRZB7IrS\nVCF4PQeDxbf41CMhEFXTBDdDma6pJnyeaeC7guui4isElc0sNrPbgQ8BTzazL5jZjwOvBa4zsweA\n7wveC1E7Zcw8Hp8RnXfGcNZs5TyUPZM6JOszFn0GeduysbHBxYsXAbh48SIbGxuV3nNu8VGLuotG\nBKJqiroZmuamKNvVlFRn9DNO+xksgvurKDTBNVRWkRCIaVDENdNEo1RkglocWZ+x7Gcw6fak8+b+\nKoKvENjo3GZz4sQJt7W1VXczhEik1WoR910yM/b29mpo0SVWV1fZ2dk5dLzf77O9ve1dT9ZnLPMZ\nbG5usr6+zoULF/aPdbtdzpw5s+8yK+tzzTNmdo9z7kTWeVp9VIgSqMonXwZnz57NdTyJrM9Y5jM4\nderUAREAuHDhAqdOndp/r1VUy0NCIEQJNNkolWWgsz5jmc/AR7zW1tY4c+YM/X4fM6Pf7x8YMYgc\n+PiP6i6KEYhZYNynPRgMGrEgWplB3Cy/fVkpsE2MucwiKFgsRH3UnUXUVFHype7nVzdlCaqEQIga\nqbNHOy9GdFGXmC7z7+crBMoaEqIC6swiUjbNbFPm309ZQ0LUSJ1ZRGVlCTWJac5Yrps6/n4SAiEq\noM4soiansk5COKdgZ2cH5xw7Ozusr6/PrRjU8feTEAhRAXWmNjY5lXUSfOYUzBO1/P18Agl1FwWL\nhcjHPAVaF3EpiWlnDSlYLIRoNAp+T46CxUKIuWDeXF1NREIgxAyySFk0SfEWYGGeQdXINSTEjOGz\nMue8o2fgh69rSEIgxIwhn7megS+KEQgxp8zjhLG86BmUi4RAiBlj3iaMTYKeQblICISYMZRFo2dQ\nNhICIWYMbciiZ1A2ChYLIcScomCxEEIILyQEQgix4EgIhBBiwZEQCCHEgiMhEEKIBWcmsobM7GHg\n/rrb4clx4Gt1NyIHs9RetbUa1NZqaEJb+865q7NOumwaLSmB+31SoJqAmW3NSlthttqrtlaD2loN\ns9RWuYaEEGLBkRAIIcSCMytCcKbuBuRgltoKs9VetbUa1NZqmJm2zkSwWAghRHXMyohACCFERTRe\nCMzs+WZ2v5n9uZndXHd70jCzbTO7z8zuNbNGrZJnZreZ2YNm9qnIsWNm9h4zeyD4eVWdbYyS0N7X\nmNkXg+d7r5ldX2cbgzZ9u5ndbWZ/amafNrNXBMcb92xT2tq45wpgZkfM7KNm9idBe38pON7EZ5vU\n1kY+23Ea7RoyszbwWeA64AvAx4AbnXN/WmvDEjCzbeCEc67u3OFDmNmzgfPAW51zTw2O3QI85Jx7\nbSCyVznnfr7OdoYktPc1wHnn3BvqbFsUM7sGuMY593EzuwK4B/hh4Mdo2LNNaesNNOy5ApiZAcvO\nufNmtgT8MfAK4MU079kmtfX5NPDZjtP0EcEzgT93zn3OOfcI8FvAi2pu00zinPsj4KGxwy8CNoLX\nG4yMQiNIaG/jcM59yTn38eD1w8BngG+jgc82pa2NxI04H7xdCoqjmc82qa0zQdOF4NuA/xt5/wUa\n/I/L6A//XjO7x8zW626MB49zzn0peP1l4HF1NsaTnzKzTwauo9pdAlHMbBV4OvARGv5sx9oKDX2u\nZtY2s3uBB4H3OOca+2wT2goNfbZRmi4Es8aznHNPA34A+MnAvTETuJGPsOk9mFuBJwJPA74E/HK9\nzbmEmR0Ffg/4aefcuejvmvZsY9ra2OfqnLsYfKceDzzTzJ469vvGPNuEtjb22UZpuhB8Efj2yPvH\nB8caiXPui8HPB4E7GLm2msxXAr9x6D9+sOb2pOKc+0rwZdsDfo2GPN/AJ/x7wKZz7h3B4UY+27i2\nNvW5RnHOfQO4m5HPvZHPNiTa1ll4ttB8IfgY8CQze4KZdYAfAe6suU2xmNlyEIDDzJaB7wc+lX5V\n7dwJnAxenwTeWWNbMgm//AH/mAY83yBI+BvAZ5xzvxL5VeOebVJbm/hcAczsajO7Mnh9OaOkkT+j\nmc82tq1NfbbjNDprCCBIt3oj0AZuc86drrlJsZjZExmNAmC0mN9/a1Jbzex24DmMVkT8CvBq4PeB\n3wFWgB3gBudcIwK0Ce19DqMhtgO2gX8R8RXXgpk9C/hfwH3AXnD4Fxj53hv1bFPaeiMNe64AZvad\njILBbUad1t9xzv1bM+vRvGeb1Na30cBnO07jhUAIIUS1NN01JIQQomIkBEIIseBICIQQYsGREAgh\nxIIjIRBCiAVHQiAEYGbnY4492cw+EKwa+RkzO2Nmz4usJHneRivj3mtmb41c98ZgxclW8P6lkWse\nsUsr1L52mp9RiCSUPioEIyFwzh0dO/aHwFucc+8M3n+Hc+6+yO8/APysc24rcqwFfJ7RcgKvcs7d\nPVbnNg1doVYsLhoRCJHMNYwWOgQgKgIpPAf4NKM1Zm6spllClIuEQIhk/iPwfjP7AzP7mXAJgQxu\nBG5nNMv8B4O1fYRoNBICIRJwzv1X4O8Cb2fU0/+wmT0m6fxgPazrgd8PVvX8CPC8KTRViEJICIRI\nwTn3/5xztznnXgT8NfDUlNOfB1wJ3BfEAp6F3ENiBpAQCJGAjfbLXgpe/02gR/oy6DcCL3fOrTrn\nVoEnANeZWbfyxgpRgMvqboAQDaFrZl+IvP8VRvtf/KqZ/VVw7Oecc1+Ouzgw9s8HbgqPOef+0sz+\nGPgh4LerabYQxVH6qBBCLDhyDQkhxIIjIRBCiAVHQiCEEAuOhEAIIRYcCYEQQiw4EgIhhFhwJARC\nCLHgSAiEEGLB+f8WWVkZJBNHsgAAAABJRU5ErkJggg==\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# 数据可视化\n", + "plt.scatter(X, Y_train, color='black')\n", + "plt.ylabel('Median Home Value')\n", + "plt.xlabel('LSTAT')\n", + "plt.show() #看到什么规律了吗?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 商业观察:随着低收入阶层的减少, 房价攀升,这与我们的预期相符。\n", + "### 定量建模第一步:简单线性模型 (simple linear regression)。 $y=\\beta_0+\\beta_1 x+\\epsilon$ \n", + "### 我们将使用著名的最小二乘法来拟合一条直线,我们暂时可以将拟合的过程视为一个黑盒子" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEKCAYAAAAfGVI8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJztnX+YJVV55z9v9/QFbs+gcAcIin3bbNQVSB4jE5MYF3kw\nJEhU3OzGDXayo4+xQ6vJrD+iJPNsfm16I0QjmM3GTAzJaHd0NdFIDG4eUFBj1DgIETAZUOkeYRHI\njGRmHBCYPvtHVXXfvl0/Tt1bdatu3+/nec7T91ZXnTpVM/1+z3nfc95jzjmEEEKMLmNVN0AIIUS1\nSAiEEGLEkRAIIcSIIyEQQogRR0IghBAjjoRACCFGHAmBEEKMOBICIYQYcSQEQggx4mypugE+bN++\n3U1PT1fdDCGEGCpuueWWf3XOnZZ13lAIwfT0NPv27au6GUIIMVSY2bLPeXINCSHEiCMhEEKIEUdC\nIIQQI46EQAghRhwJgRBCjDilCoGZLZnZ7WZ2m5ntC4+damY3mNnd4c9Tyrj34uIi09PTjI2NMT09\nzeLiYuJ5W7duxcw2lG3btq373fbt21frSarf975FP4cQQvSMc660AiwB27uOXQVcEX6+Argyq57z\nzjvP5WFhYcE1m00HrJZms+kWFhY2nDc2NrbuvKzSaDTc3NxcbP1Jx7vvW/RzCCFEHMA+52GrzZW4\nVaWZLQE7nHP/2nFsP3CBc+5+MzsTuNk596y0enbs2OHyrCOYnp5meXnj9Nl2u83S0lLmeVmMj49z\n/Phx7+Pd9/XF9zmEECIOM7vFObcj87ySheAe4N+A48AfO+f2mNnDzrknh7834NvR965rZ4FZgKmp\nqfPyGOyxsTHinsvMWFlZyTyvaLrv64vvcwghRBy+QlB2sPgFzrnnAC8GXm9m53f+Mhy6xFpi59we\n59wO59yO007LXCG9jqmpKa/jSedlMT4+nut4r/fxfQ4hhOiHUoXAOXdf+PNB4KPA84AHQpcQ4c8H\ni77v/Pw8zWZz3bFms8n8/PyG88bG8r2CRqPB7OxsbP1Jx+fn53sK+vo+hxBC9IVPIKGXAkwC2zo+\n/wNwMfB7rA8WX5VVV95gsXNBoLXdbjszc+12OzHAurCw4CYnJ2MDw1u3bnWNRmP1+9jYmJubm0ut\nP+54P0Ff3+cQQohuqDpYbGbfSzAKgCC53V845+bNrAV8CJgCloFXOOcOpdWVN1hcFIuLi8zOznLs\n2LHVY81mkz179jAzM+Ndj4K+QogqqEWwuCiqEoKiDLiCvkKIKqhLsHioOXDgQK7jSSjoK4SoMxKC\nFIoy4Ar6CiHqjIQghaIM+MzMDHv27KHdbmNmtNvt3HEGIYQoC8UIMlhcXGT37t0cOHCAqakp5ufn\nZcCFEEOBb4xgKLaqrJLI6EdisHv37nXHhRBi2JEQZNA9hXR5eZnZ2VlAYiCE2BwoRpDB7t27160j\nADh27NjqyEAIIYYdCQHpOf+LmkIqhBB1ZeSFIHL9LC8v45xbdf1EYqA1AEKIzc7IC0GW6+eSSy4h\nyJa9htYACCE2EyMvBGmun8XFRfbu3bsuPYSZsXPnTgWKhRCbhpEXgjTXT9xowTnH9ddfP4imCSHE\nQBh5IUhbPaxAsRBiFBh5IUhL/6BAsRBiFBh5IYBADJaWllhZWWFpaWnV/69kcUKIUUBCENK9luB1\nr3vdaowg2otYyeKEEJsRJZ0jfieybnrZmUwIIapEG9PkIG52UDdKKyGE2KxICPCfBTTI2UJpaS+E\nEKJIJAT4zwIa1GyhrLQXQghRJBICgtlB3WkkuhnkbCFlPBVCDBIJAcH00QsvvHDD8UgcBj1bSAvZ\nhBCDREJA4Ir5/Oc/v+6YmXH55ZfjnFu3tmAQaCGbEGKQjKQQdAdid+3aVaucQlrIJoQYJCO3VWXc\n1pNJVOWK6d4neWpqivn5ea1hEEKUwsgtKJuenk41/p20222WlpYKua8QQgwaLShLwLeXL1eMEGJU\nGDkhSAq4tlqt2AykQgix2Rk5IUgKxF5zzTWxGUiFEGKzU7oQmNm4md1qZh8Pv59qZjeY2d3hz1PK\nbkMnafsPCCHEKFJ6sNjM3gTsAE52zr3EzK4CDjnn3m5mVwCnOOfellZH2dlHhRBiM1KLYLGZnQX8\nFPDejsOXAnvDz3uBl5fZhs2AEtAJIcqk7HUEVwNvBbZ1HDvDOXd/+PlbwBklt2GoiVv3MDs7CyB3\nlhCiEEobEZjZS4AHnXO3JJ3jAr9UrG/KzGbNbJ+Z7XvooYfKambtUQI6IUTZlOka+jHgZWa2BHwQ\nuNDMFoAHzOxMgPDng3EXO+f2OOd2OOd2nHbaaSU2s94oAZ0QomxKEwLn3K86585yzk0DPwt8yjn3\nc8B1wM7wtJ3Ax8pqw2ZACeiEEGVTxTqCtwMXmdndwI+H30UCSkAnhCibgSSdc87dDNwcfj4IvGgQ\n990MKAGdEKJsRm5lcRmUPb1zZmZGq56FEKUhIQjp1Zhrf2EhxLAzcmmo4+ieqw8wMTHBySefzKFD\nh1LdMUlprZXCWghRNb4riyUE+O1R0Gw2Y3MSjY2NEfcOzYyVlZVC2ymEEHmoRYqJYcFnTn7SIq6k\naZxjY2NyDwkhhgIJAf5z8uMEI256J8Dx48cVKxBCDAUjLwSLi4scPXrU69w4wYjSWo+Pj2/4XVmp\nIJSETghRJJlCYGZnmNmfmtknwu9nm9lrym9a+URB4oMHD647Pjk5SaPRWHcsbRHXzMxMYjwgTyoI\nHwPfyywlCYcQIhXnXGoBPgG8Avin8PsW4Pas64os5513niuDdrsdJb1bV9rttltYWHDtdtuZ2er3\nXuvyYWFhwTWbzXXXNpvNdfddWFhw4+Pjue7jU68QYnMC7HMeNtZHCL4U/ry149htPpUXVcoSAjOL\nNapmlruufg1ulpDE1e/T5n4FSggxvPgKgU+M4Dtm1goNCGb2I8C/eVxXe/pN6Nbpctm9ezc7d+7s\neQvMrCyjcemofdqs7KVCiEyylAJ4LvA5AuP/OeAu4Ad8VKaoUtaIoJ9efFIPvdVq5XK7RC6o7nro\n6rknjV6y2qwRgRCjC0W5hoK62AKcA5wLTPhcU2TpRwiyfP15YwERaca72Wy6ubm5zHqz3D2dBj7p\nfuPj46ltVoxAiNGlMCEA/mtc8am8qNKrEBRpBLsFI8l4R6W7Bx9336yRQHeguJ/RSy9iJ4QYbnyF\nIDPFhJn9QcfXEwlSSH/ZOfefUy8skF5TTBSVByguF1EvdN/XzBLPjft3WVxcVDpqIYQ3peUaMrMn\nAx90zl3ca+Py0qsQFJUHyCcXkQ/d992yZQvHjx/fcN74+DhPPPFE3/cTQow2ZeYa+g7w9B6uGzhF\nbfOYd4ZNUk+/+75xIpB2vB+0qEwIkYTPyuK/MbPrwvJxYD/w0fKb1j9FbfOYJBxxBr/VanH55Zd7\n3bfdbsfWm3S8V7RnghAilawgAvDCjvJjwFk+wYciS5mzhnzOSQrUps0MSquzc8qoT1C5XzSFVIjR\nhCKnj1ZdylpH4Jz/bJyFhQXXarV6Xi+Qdr9IDHxn9OSdBVTkCmohxPDQtxAAR4DDMeUIcNin8qJK\nL0KwsuLc6acHT/j85zt36FD8eb695V6mb8YZ7EHkJOr1GYUQm4uRHxHceGPwdN3lj/5o/XlpK3Y7\njXjnaMDHmKb1/Pvpnfdi1LWoTIjRpHAhAE4HpqLie10RpRchuO++eCGIyrOeFZyXZFjTjLaPAfdZ\ndJa3d76wsNCzkGhRmRCjh68Q+MwaepmZ3Q3cA3waWCJITV1rnvIUuOceOP30+N/v3w9msLy8xJYt\n7Q2/D95hNnmTvcHG2UaNRoOjR4967UOQtx0RMzMzLC0tsbKywtLSkhaiCSFW8VlH8D+AHwHucs49\nnWBl8RdKbVVBTE/DAw/Av2XkSn3iiSWCjvX+XPUnTUVdXFxkbCz51TrnVrOUtlotnHMcPHgQ55Kn\ndqZlH+1lSqwQQkT4CMHjzrmDwJiZjTnnbgIyV6rViZNPXnMK/fAPp535TNa8LfFEW1JGaaaBdQu1\nXve61zE7O5u6KKzdbjM/P8/U1BQHDx7k8ccfX/f7uC0u00YYedJdCyFENz65hm4EXg78LrAdeBD4\nIefc88tvXkCvKSbSeMtb4J3v9DnzmcDd6440m81VEejOQWRmqW6lZrPJzp072bt3b2ruou50FEXl\nTRJCjA5Fppi4FHgEeCPwf4GvAy/tr3nV8453BCOEpzzlRRln3kX3KCHqsce5a9JEIBpFXH/99ZkJ\n7Lp9/kWtks6LUlMIMQIkRZGBPwR+zCfiXHYZzIIyS51ltL74zwaKSuesoKwZSUlTOwc986fqaaea\n6SREf1DAgrJdwOcJZgldBfygT4VllDKFwLm4vQZ8BSF5U5k045lnH4IqqXIhWtUiJMRmoG8hWD0B\n2sDbgFuBfwF+A3imx3UnAv8I/BNwJ/Bb4fFTgRsIHO83AKdk1VW2ECThLwgPpgpBv7mLisiX1AtV\npqbQamgh+qcwIVh3MvxgKAjHPc41YGv4eQL4IsE01KuAK8LjVwBXZtVVlRBEXH31R3p2G6WtPO40\n3HNzc25iYmLdtRMTE25ubi6zZ1xW77lKY6z8SEL0T5Ejgi0EweFF4FvAB4FLfSrvqKMJfBn4YYLJ\n+meGx88E9mddX4UQJPWw8whCHmOclMIiySB2GuOyDHacwDQaDddqtXKNPHoZrWhEIET/9C0EwEXA\ntaHxvw54JTDpU2lHHePAbcDRqOcPPNzxe+v8nlQGLQRpPey1NA9+gvAzP+N3zzTXUlbPuMzec6cR\nb7VaG0YtPon3ehmtKEYgRP8UIQSfAn4BDx9+5k3gycBNwLndhh/4dsI1s8A+YN/U1FSJr2ojWb3R\nycnJjuM/7y0KaeQVgkGMCPK+l6KuidCsISH6w1cIcu9Z3Ctm9uvAMeC1wAXOufvN7EzgZufcs9Ku\nLWNBWRppex2///3v59WvfvWG1cCNRoPHHvuuV/1xr3z79u0cPHhww/GtW7eysrKybt1Bo9Fg27Zt\nHDp0iKmpKS655JINC9SiRW9FrjjuZQ/oovaNFkLkp8w9i30bcFq40T1mdhKBq+lfCNxMO8PTdgIf\nK6sNvZK21/Hu3bs3iADAtm3b1qIDGZhF5bLVRVrXXHMNjUZj3XmNRoP3vOc97NmzJzU30d69e9m5\nc+fqOdHCtW4R6HdxWC97QBe1b7QQokR8hg29FOAHCGYYfQW4A/j18HgL+CTB9NEbgVOz6hpkjKB7\nJ7KoRP7pPP74VsvPZQRrweLx8fFV10mcKyTL1ZLkTinC597r5jzy9QtRDRQ5fZRgLcGPh59PArb5\nXFdUGZQQxBktWL8tZV6fd3D+mLcgdBvKjYvdkoPHaUa3qDhCL357+fqFqIbChIDAp/8l4Ovh92cA\nn/SpvKgyKCHwMZY+ew53Gr6N9fkJwlln7ci1y1m73U5tv+blCzF6FCkEtwEN4NaOY7f7VF5UGZQQ\n+BrLzh529zWNRmPDFMv44jtCcLHtyeu2ShKJ8fFx9dCF2KT4CoFPsPi7zrnHoi9mtiU0IpsO38Bm\ntNtXu92OhHGVxx57LDaYvBELy9ke50Z2O/zmXGxgOK39cdlLAY4fPx67EY4QYnTwEYJPm9mvASeZ\n2UXAh4G/KbdZ1ZA31XPaZjFJtNtt5ubmOmYBPUijcQKBKGQRCEI73IOge9vJtPbPzMywZ8+e1Y11\nOonbCEcIMUJkDRkIxOK1BALwl+Fn8xluFFUGPWvIN7CZFryNKz7ppcnpNsrbfsUKhBgdKCPpXFWl\n6qRzSSTNMhobG0sVhO5pnZ2Ge/3U1Xf1LAhJKIePEKODrxBkuobM7CVmdquZHTKzw2Z2xMwO9zoC\n2UzMzMywc+dOzNa7dbZs2ZJ6XbRBfbS/8fLyMs4Fi8MOHz7csbDsjazFEtKJFqllUdVOZ0loBzQh\nakCWUgBfI1gcNlB3UGep64jAueQedrQwLK0kndNqtVKu9xsh/PZvJ7e5LvP6tdhMiHKhqFxDZnYT\n8CLnXGWJYQadaygPSbl0IOhpZ+1NHEeU02h2djblev+JWxn/xJUxPT3N8vLyhuNRMFwI0R9F5hp6\nK3C9mf2qmb0pKv03cXOQNGUzmtbZbrcTr42bwRPVGc3ySb7eaLenOX48u42+bqNBkzTrqpfZWEKI\n3vERgnmCrKEnAts6iiB7yubS0hJzc3Mb4gjNZpPZ2VkmJibWHZ+YmFj110fXLywsJN5jbKxj2VkG\nkSB84xs9PGgJKCGdEDUhy3cE3OHjYyqz1DlG4Fy6zz1pZlFS2bJlS+wOYN3J8DrzH3WTZ/pplf54\nxQiEKBcKTDFxFfATPpWVVeq0VWVe8q416C4TExOrApCUWiKJO+7wF4SqgsZ1CVwLsRkpUgiOACvA\no+HnI8Bhn8qLKnXaqjLrum6jlrSAq6iSNf9/bU2DnyC8//0yxEJsFnyFYGA7lPXDoGcN9TKbZXFx\nccMsn2azyUknnRS781hRZO301R2b2AyzjYQQfvjOGkpf+bRW2cuA88OvNzvnPt5P4+pOL7NZdu3a\ntWGq57Fjx3qaPpqH/IHVSBjeDfxS+pnhqRIEITY3PiuL3w7sAr4all1m9rtlN6xK8s5mWVxcLLXX\nn0baiuD0Vbq/TN5Vy3H1aWWwKAP9vxowWb4jgq0mxzq+jwNf8fE7FVXqHiPoNyDca4lmDvluTZld\n/OIIb3xjb+9JCB/0/6o4KDBY/BU69hUGTt3sQuBcvtksZQeE40qz2XRzc3O5t6bsLlEqi/HxcTc3\nN+ec8xODtZIvgJ011VYziIQSIxZHkUJwGbAM/DmwF7gH+C8+lRdV6r6OYNAjgshoJ+UjStuasrPE\nnRONMvKnxF5fbxxpPT2fLUDFaKBU6cVRmBAEdXEm8LKwfI/PNUWWugtBb26Y/kcEaQa+H3HqXLuw\nVnwF4WzXarVie/ZpPb2s9so1MDpoRFAcfQsB8Ny04lN5UaXuQuCcc3Nzc5W4iJL+YPoVp8nJyYTr\nfQVhbZRgZqnvx8y83l2vLidf5JqqB4oRFEcRQnBTRzkMfKrj+6d8Ki+q1F0IqhgRJJWJiYnYTW+y\nNsuJK91pLdaXZ+QWhKS6fEYEkWD4vv+8hkPGp15IlIuhbyFYdxLc6nNeWaXuQlDVrKG40mg0NvzR\nLCws9FTXwsKCazQaHuf6CcLk5LbUGEHWvZJGBEW4EuSOEJuRooXgyz7nlVXqLgR1cQnFGa9eRyuR\nnz/PPX0FgXCU0D3ddWJiIrH+tN55EcHFKgOU6v2KspAQDJCyRgS99uQ7jVevbXvRi16UW+CiKa3w\nP70FwbnAEGbt6JaWbbWI3nya26pM5JISZdK3EAB/QJCH4N3Agx2f3w2826fyokrdhaCMGEFkgHox\n5J3GK82Y+xjfXu49OTnZccx/hOAjNHEGsl9jmjQaiXOzFY1cUqJMihCCnWnFp/KiSt2FwLm14X0R\nItBpxPKKTOfc+7S1BhD0+tPqmpiYcFu2bIk9nnZdvGH1FYR3ZQpN2vvvxb2S9O/WarWK+K+RiubM\nizLpWwjqVIZBCCLSDPfWrVtXN51ptVqJwdFuN4iP66Ss0mq1YjfESZqFNDY2ttrm+Dp9BSF+lFCG\ngazSGGtEIMpEQlAhSdMuu90VnaOIrE1nAt/74IUAiH1Gn/Oz684vCGUYyCqNsWIEokwqFwLgaQRr\nDr4K3AnsCo+fCtwA3B3+PCWrrmETAufyGZesc6tcpzA+Pr6urT4usMg1s3XrVs/7+AnCiSf+UCkG\nMklko9xLZaNZQ6Is6iAEZxKuQCbY7P4u4GyCrS+vCI9fAVyZVdcwCkEed0PWuVWvU3AueZSTVCYm\nJnpwZ/mOENa/v05DGrmy8hjVYXXPSEBEFoUJAXAa8GvAHuDaqPhU3lXPx4CLgP3AmW5NLPZnXTuM\nQlDkiKDKdQqRgRnsiORp3oKQ1bYovUUaaUJclrHtt165lIQPRQrBPwBXAq8A/lNUfCrvqGMaOACc\nDDzccdw6vyeVYRSCPH+oWedWNSLIm9K66BKMKHxHCckpNCKDnkTarKEyjG0RRtzHnajRgihSCG7z\nqSjl+q3ALcBPh98f7vr9txOumwX2AfumpqZKe1FlkuePMStPf9aUzU7jFbdPQd7S2YYqRyTR8/sL\ngkt9nrh3nDSqSJoZ1a/LqIjpqlmjGI0WhHPFCsHvAJf4VBZz7QTwd8CbOo6NhGuoaJL88+Pj45mb\nvLRara5FXullcnJy3b2rGhHEu6V+uWdB6BbT7gR9vjGQfqeVpgmrr7FOGxEMa8xDFE+RQnAEWAEe\nIchCegQ47HGdAe8Dru46/nusDxZflVWXhKD/ue55Vwl3i8qgZy1Fq3rTRchfEJLeX2cv3FfwyhoR\n5Kk7rdevRWoiojAh6LUALwj/A34FuC0slwAt4JME00dvpGMbzKQiIUg2HuPj4169yF4NsXP5Zwz1\nWyYnJ3O6pXwF4X/FXh/hc6+iYgRJ9ecx1kmuLo0IREShQgCcAjwPOD8qPtcVVSQE6b1yH+PUi0HO\nM2Oon5XPaVtS5hOgR3KIwkYhSBPbooOuZSa5U4xARBQmBMAvALcD3yZYIPYI2pimEtJSTWQZkF56\n9Hm2vEwKUvqKTdyz9heb8BOEsbGx1edMC7KnZT/t9d+yTGNd11lDdW1Xr9T9eYoUgtuBEwlnDwH/\nHviIT+VFFQnBGr36f/PMPOo00r4zhqI/Al/BSTN6vbS1X0GA719No530DEVnI627ESmazTZSGYbn\nKVIIvhT+vA04Ifx8p0/lRRUJwRr9+H+7DU+W0Z6bm/PqlUcG1Hc0kGX0+olHJLuofAWhmGDuMFK2\nMG222MUwPE+RQvBR4MnAbwKfIVghfL1P5UUVCcEaRfZCslw5STGCiYmJDWkcfN04ZcUzOutOf67t\nOURh4/XdI6/N0qsfRO92s81mGobnKUwI1p0MLwReBjTyXNdvkRCsp0jj4zODxed+eRadJfWYeokL\nJAWa/VJ3+wrCWj2ds7SGwTXgyyB6t8PQg87DMDxP30IAnBz+PDWu+FReVJEQlEsR/6HzGPC4HlMv\ngeakqbO+grJ2P19BcKvtT3Ob1ckQ+DKI3u1mEk7nhuN5ihCCj4c/7wG+Ef6Myjd8Ki+qSAjKxfc/\ndFYajO46koxLnKHsZSTgm7cpqXRnKn396z/rLQhZI6BhcxcNStQ2iystou7P07cQ1KlICMon6z+0\nT6wg6iWnTcdM6jH1ks8ojryCkjQTKM8IIU6k6txLjGMYerciP0WMCJ6bVnwqL6pICKrH19WSZxSR\nt36fnmovgpKW7M1fEN4dKwJl9azLoN/ebd17x6NIEUJwU1g+DzxOkAn0lvDz530qL6pICKonz3qC\nXsjKkePbU01Lzpd3dLG+XV/se5SQVP+wGM+8rkGNKKqnbyFYPQE+Anx/x/dzgb/0qbyoIiGonjw9\n9l7JEpgsY9nPQrTo+rj7nH322V3n5xeEOIEcJuOZ1dbNFDjfTBQpBBsWj8UdK7NICKrHNwibtglM\nVu+3X2PS60K0KH1EnKFL2s84jyCccMJ/iH0nw2Q8s9o6DHPqR5EiheADwHuBC8LyJ8AHfCovqkgI\n6kGnIU8zur32fvvpIaeth8gqadNNkzan6TRy/iOE9W0eJuPZ677a/bgKh8VlVmeKFIITgTcSrDD+\naPj5RJ/KiyoSgnqSZSCdy57T320oejUAeYPN3ULQS5C5u+1//McfyiUIm2lEUPaK96zcVFWJRt0F\nqzAhCOriJOBZPueWUSQE9aQX41Bk77fzj7BXEYja24uQdK4w3jhC8hOEk056UiHGs2x8R3RFGMU8\nAlllnGUYYjxFjgheRrC95D3h9+cA1/lUXlSRENSTXgOIRfR+865ETtuqM4pr5Ak0R9t5ZrfDTxDA\nlTJls+h0JIPo/eZxmVU5qhqGEV2RQnAL8CTg1o5jt/tUXlSRENSXNOOQ1VPP03vqvk/ewHCz2UwU\ng+gP17fOzkVoWWIXGa/rrvMXhDzvPk2Iq+6xFu3mizOwVcZZ0v5/18VNVKQQfCH82SkEX/GpvKgi\nIRhOsmIDeUSgiD2TkwK/0awhHxdTXrHr3t0sCGoXIwhZBrPKHmu/gX/fa+s4IqhCdJMoUgj+FHgl\nwd7DzwD+AHiPT+VFFQnBcFJUj7SfQLBvaTabmSOCOOOSp20b7+EnCG94Q/x7SbuXc9X2lvvditN3\nNFG3GEEVgpRGkULQBOaBLxGsLp5Hs4aEJ0X4lfsNBvuWVquV+oc9Nze34bkiwxpnbP3v/VfeotB9\n77gyPj7unKuut5w2lbcMEarDrKFBPm8eChOCOhQJwWiT9IfWarVWDUD23gPZJQoaJ9UV3S/O0Hfv\ni9C7ePkJQlY9zlXXW85yCW5G6ho47lsIgOvSik/lRRUJwWjjY9CKGDVEf7S91tX5R58mXmmjjqg3\n7y8IO1LbUUVvOe39Ve0zL4uqA/NJFCEEDwFfBn4FOJ9gd7LV4lN5UUVCIHpNT9HdY08qeae9pvXE\no/Z2T0edmJhYDRgnzWDqdD85l0cQXG2MT5oIbmbquLisCCEYBy4G9gK3Ar8DnONTadFFQiCy8Ekx\n0flH2r13QlaqC58S9eajOhqNxrrfd+99MDc3t+qGGh8f3yACEYGITXoLQiQ2aUapTKNV197xKNK3\nEKw7CU4AXhWOEt7gc02RRUIgfAxXVv6jPMav13ULEUm94u7ppD5srMt3hLAmRINeX1DH3vEoUogQ\nhALw08CHCWYN/XfgqT4VF1kkBKONr+FKWh3caDRy7Zbm24Y4sYnwiTP0M68+qN9XEL6+rn11DWyK\n4inCNfQ+ghjB7wDn+lRWVpEQjDZ5DFd33p9osVgRxi9tymi3UfeNM/Q7rz5ox496i8LaNfFxFLG5\nKEIIVoAjYTncUY4Ah30qL6pICEaLbqOXZETzGK6ijZ+PD943ztCP26R3t5FGBKNAIa6huhQJweiQ\n7Abpz3BV4Q7pFIu0dQ797PWcLDi+gvC2xDaI4UdCIIaSJIOdZ9/iOJIEJmmmTtFkjRA6g9l5nzdt\nEVxg6P1Fx77IAAANF0lEQVREYRhRUDqdyoUAuBZ4ELij49ipwA3A3eHPU3zqkhCMDlkZHfv5g5+b\nm+tbUPoha4prP3lrsoQmT3B5WNA01WzqIATnA8/tEoKrgCvCz1cAV/rUJSEYHcp04dRhtkzatNI0\nEYiMeRppI4POZ/QVhDvvLPll9Ekd/j3rjq8QjFESzrnPAIe6Dl9KsECN8OfLy7q/GE7m5+dpNpvr\njjWbTebn5/uu+8CBA6nHFxcXmZ6eZmxsjOnpaRYXF/u+ZzdJz3f8+PHMa6emplJ/PzMzw969ezfU\nD3D06NHV54lM/bOfnX6/c84Bs6DUkax/T5EDH7XotQDTrB8RPNzx2Tq/x1w7S5DtdN/U1FRJeinq\nSFl+37Qe5CDdDHHPlzXdNO+ah7gFcEl1fPe7/qOEOqERQTZU7RpyGUIQfv+2Tz1yDYkiSDP2VRuV\ntNlSvYhhr8/jKwiPPNLHwxaEYgTZ1FUI9gNnhp/PBPb71CMhEEWRvjArv19+EG3rhX6fx1cQnv3s\n+3tuYxH45msaVeoqBL/H+mDxVT71SAhE2RS58rgOUxmLGuHcdZe/KDg32HegEUE2lQsB8AHgfuBx\n4F7gNUAL+CTB9NEbgVN96pIQiLLp16jUzSgV7Wpyzl8QBvUOqnbnDQOVC0GRRUIgBkE/vdk6GqVe\nF6glsVaHryD8Yl/vIOvfow7uvLojIRBigNTZKBUlUhvruSyX2ygPPiOsOopv3fAVgtLWEQgxSiTN\n8c+a+z8Iippvv3ENxAdoNidZWMheb5F3PcLu3bs5duzYumPHjh1j9+7dKe0pbs3JqCEhEKIA4oxS\no9Hg6NGjpS5Q86EokZqZmWHPnj20223MjHa7zZ49e5iZmcE5cgnC3Xenn+cjXmntETnxGTZUXeQa\nEsNAp0+71Wpt2CRn0HmNqmjLWlziw14uo23b4usZdbdPUbOvUIxAiOqo0pDF+dcbjYZrtVoDn9r6\nxBN+MYTuOELdZmENkiKf3VcILDi33uzYscPt27ev6mYI4c3Y2Bhxf1tmxsrKSqn3np6eZnl5ecPx\ndrvN0tJSqfdOwzdG8OijcMIJQe6n3bt3c+DAAaamppifnx8Jt0+R/35mdotzbkfWeYoRCFECVQaP\n65qMLer7n3NO+nknnhiIxmc+M8PS0hIrKyvMz8+ze/fuyuMtg6CKfz8JgRAlUOWMljrPYAK4445A\nEL71rfTz9uxZCy7Pzs6yvLyMc47l5WVmZ2c3rRhU8e8nIRCiBKqc0TIs0yrPOKNjLXIGx459h8Bd\nHn1fP5V0M1HJv59PIKHqomCxEPmoU96jPPivWr6wFov1ymLQs4YULBZC1I5PfxouuMDv3CEwYZWh\nYLEQm5hB7KZWJS98ob/bKIojbLZ3MEgkBEIMGYuLiyMVPHWeq5Z/7udmMIPDhwfQqE2GhECIIcMn\nD89mI0pj4Rxce236uU96UjBCeN/7BtO2zYBiBEIMGVUuVqsLwTsAyH7erVvhyJHSm1RLFCMQYpNS\n93UCgyB4VgdYWJI5enQtjvDEE4No3fAhIRBiyBiWdQJlsvEdGM3mJG9+882p101MBILw2c+W2ryh\nQ0IgxJCh9MvJ7+Ad77gA5+CRR9KvP//8QBAuumgw7a07ihEIITY1vsnuhsAU5kYxAiGEYG09wtVX\np58XxRG+/vXBtKtOSAiEECPBrl2BIDzwQPp53/d9gSC85S2DaVcdkBAIIUaK00/3W7X8znfm32t5\nWJEQCCFGlkgQXvOa9PMiQXj44cG0a9BICIQQI8973xsIwh13pJ93yimBIGStbh42JARCCBFyzjmB\nIGQt0H7NawJBmJgYTLvKRkIghBBdmK25jZ7znOTznnhizW10/Pjg2lc0EgIhhEjh1lsDQfjEJ9LP\n27IlEIS77x5Mu4pEQiCEEB5cfHEgCI8+mn7eM58ZCMIXvjCYdhWBhEAIIXJwwgl+009/9EcDQXjX\nu+q/arkSITCzi81sv5l9zcyuqKINQgjRL5EgXHdd8jlvehOMjcFLXgLf+c7g2paHgQuBmY0Dfwi8\nGDgbuMzMzh50O4QQoihe+tJAEL77XXjlK+PP+du/DfZGOOkk+OpXB9u+LKoYETwP+Jpz7hvOuceA\nDwKXVtAOIYQolEYDFhcDUfizP4s/59FHg2mqZrCwMNj2JVGFEDwV+GbH93vDY0IIsWl41asCQbj9\n9kAg4vj5nw8E4dWvhscfH2jz1lHbYLGZzZrZPjPb99BDD1XdHCGE6Ilzzw1cRkeOBDOP4vjzPw/E\not2Gb34z/pwyqUII7gOe1vH9rPDYOpxze5xzO5xzO0477bSBNU4IIcpg69ZgLcLKCrzjHfHnHDgA\nU1PBKCFr3UKRVCEEXwKeYWZPN7MG8LNASsxdCCE2D2bw5jcHbqPPfS75vEsuCc696aby2zRwIXDO\nPQG8Afg74J+BDznn7hx0O4QQomqe//xAEB56CM47L/6cCy8svx2VxAicc9c7557pnPt3zrnR2XFb\nCCFi2L4d9u0L8hW99a2Dv39tg8VCCDFqjI3BlVcGo4Trr4fXvhbuvbf8+24p/xZCCCHy8uIXB2UQ\naEQghBAjjoRACCFGHAmBEEKMOBICIYQYcSQEQggx4kgIhBBixJEQCCHEiGOu7nuoAWZ2BNhfdTs8\n2Q78a9WNyMEwtVdtLQe1tRzq0Na2cy4za+ewLCjb75zbUXUjfDCzfcPSVhiu9qqt5aC2lsMwtVWu\nISGEGHEkBEIIMeIMixDsqboBORimtsJwtVdtLQe1tRyGpq1DESwWQghRHsMyIhBCCFEStRcCM7vY\nzPab2dfM7Iqq25OGmS2Z2e1mdpuZ7au6PZ2Y2bVm9qCZ3dFx7FQzu8HM7g5/nlJlGztJaO9vmtl9\n4fu9zcwuqbKNYZueZmY3mdlXzexOM9sVHq/du01pa+3eK4CZnWhm/2hm/xS297fC43V8t0ltreW7\n7abWriEzGwfuAi4C7iXY7/gy59xXK21YAma2BOxwzlU9d3gDZnY+cBR4n3Pu3PDYVcAh59zbQ5E9\nxTn3tirbGZHQ3t8EjjrnErb+HjxmdiZwpnPuy2a2DbgFeDnwKmr2blPa+gpq9l4BzMyASefcUTOb\nAP4e2AX8NPV7t0ltvZgavttu6j4ieB7wNefcN5xzjwEfBC6tuE1DiXPuM8ChrsOXAnvDz3sJjEIt\nSGhv7XDO3e+c+3L4+QjBPtxPpYbvNqWttcQFHA2/ToTFUc93m9TWoaDuQvBU4Jsd3++lxv9xCf7h\nbzSzW8xsturGeHCGc+7+8PO3gDOqbIwnv2RmXwldR5W7BDoxs2ngB4EvUvN329VWqOl7NbNxM7sN\neBC4wTlX23eb0Fao6bvtpO5CMGy8wDn3HODFwOtD98ZQ4AIfYd17MH8EfC/wHOB+4J3VNmcNM9sK\n/BXw35xzhzt/V7d3G9PW2r5X59zx8G/qLOB5ZnZu1+9r824T2lrbd9tJ3YXgPuBpHd/PCo/VEufc\nfeHPB4GPEri26swDod848h8/WHF7UnHOPRD+sa0Af0JN3m/oE/4rYNE595HwcC3fbVxb6/peO3HO\nPQzcROBzr+W7jehs6zC8W6i/EHwJeIaZPd3MGsDPAtdV3KZYzGwyDMBhZpPATwB3pF9VOdcBO8PP\nO4GPVdiWTKI//pD/SA3ebxgk/FPgn51zv9/xq9q926S21vG9ApjZaWb25PDzSQSTRv6Fer7b2LbW\n9d12U+tZQwDhdKurgXHgWufcfMVNisXMvpdgFABBMr+/qFNbzewDwAUEGREfAH4D+GvgQ8AUsAy8\nwjlXiwBtQnsvIBhiO2AJ+MUOX3ElmNkLgM8CtwMr4eFfI/C91+rdprT1Mmr2XgHM7AcIgsHjBJ3W\nDznnftvMWtTv3Sa19f3U8N12U3shEEIIUS51dw0JIYQoGQmBEEKMOBICIYQYcSQEQggx4kgIhBBi\nxJEQCAGY2dGYY88ys5vDrJH/bGZ7zOwnOzJJHrUgM+5tZva+juuuDjNOjoXfX91xzWO2lqH27YN8\nRiGS0PRRIQiEwDm3tevY3wH/2zn3sfD79zvnbu/4/c3AW5xz+zqOjQH3EKQT+FXn3E1ddS5R0wy1\nYnTRiECIZM4kSHQIQKcIpHABcCdBjpnLymmWEMUiIRAimXcBnzKzT5jZG6MUAhlcBnyAYJX5T4W5\nfYSoNRICIRJwzv0Z8GzgwwQ9/S+Y2QlJ54f5sC4B/jrM6vlF4CcH0FQh+kJCIEQKzrn/55y71jl3\nKfAEcG7K6T8JPBm4PYwFvAC5h8QQICEQIgEL9sueCD9/D9AiPQ36ZcAvOOemnXPTwNOBi8ysWXpj\nheiDLVU3QIia0DSzezu+/z7B/hfXmNmj4bFfcc59K+7i0NhfDFweHXPOfcfM/h54KfB/ymm2EP2j\n6aNCCDHiyDUkhBAjjoRACCFGHAmBEEKMOBICIYQYcSQEQggx4kgIhBBixJEQCCHEiCMhEEKIEef/\nAyATSHf+aIYdAAAAAElFTkSuQmCC\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "regressor = LinearRegression(normalize=True) #使用sklearn里线性回归的模块\n", + "regressor.fit(X, Y_train)\n", + "plt.scatter(X, Y_train, color='black')\n", + "plt.plot(X, regressor.predict(X), color='blue', linewidth=3)\n", + "plt.ylabel('Median Home Value')\n", + "plt.xlabel('LSTAT')\n", + "plt.show() # 使用抛物线拟合会不会更好?" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "('Coefficients: \\n', array([-0.96814078]))\n" + ] + } + ], + "source": [ + "# 我们可以查看beta1的估计值\n", + "# 我们可以这样理解beta1:当下层经济百分比增加1%,房屋平均中间价位降低970ドル\n", + "print('Coefficients: \\n', regressor.coef_) #beta_1的点估计值" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "39.817150504744163" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 使用RMSE来检查预测误差,RMSE越大误差则越大。\n", + "X_1d_test = X_test[:, 12].reshape(-1, 1)\n", + "metrics.mean_squared_error(Y_test, regressor.predict(X_1d_test))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 后续问题:\n", + "* 如何解决非线性问题?\n", + "* 如何预测房价给定某个LSTAT?\n", + "* 仅仅使用一个特征是不是足够?如何将其他特征放入模型中?\n", + "* 如果要使用多个特征,我们应该如何选取?我们在后续课程中会介绍著名的lasso来帮助线性模型选取特征。\n", + "* 如何评判一个模型的好坏?在面对未知的数据是,如何评估一个模型预的测能力?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 使用经典lasso方法进行模型选取\n", + "## lasso的核心思想是加入$L_1$ panalty以达到参数稀疏性的目的,我们暂时可以理解为一个可以帮助我们选择有用特征的黑盒子方法。" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from sklearn import cross_validation\n", + "from sklearn.linear_model import Lasso, LassoCV" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(100,)" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# alpha是lasso需要的调谐参数(tuning parameters)。\n", + "alphas = 10**np.linspace(10,-2,100)*0.5\n", + "alphas.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 画出参数在不同alpha取值的估计值,当alpha变大时候大部分的参数都为零,从而起到了特征选取的目的 (并不是所有变量都是有用的,一些变量是噪音)。最优的alpha取值由训练集上的交叉检验(cross-validation)来决定。\n", + "* 核心思想是使用不同的alpha拟合模型,alpha越大系数估计值的绝对值趋小,最终alpha足够大时,所有的系数估计值都为0\n", + "* 我们的目标是选取一个合适的alpha,一部分特征值得系数估计值为0,而且模型有着好的预测能力。这样就起到了特征选择的目的。\n", + "* 不必完全理解里面的所有含义,我们使用这个案例来展示python代码的简洁易用性" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZQAAAEOCAYAAACuOOGFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xl8XXWd//HX5y7Z2zRJ9yVNN9ayx7IWC5StAyLoOMjM\niKJT0VFxnJ+Ojj+X3w91FHH86eiAHcERhxGQXUA2B2UXWgqlC4VSWts0bZqkSdrs997P7497k6Yh\naZP0brl5Px9e79nuOZ+TlPvO95zvOcfcHRERkcMVyHQBIiKSGxQoIiKSFAoUERFJCgWKiIgkhQJF\nRESSQoEiIiJJoUAREZGkUKCIiEhSKFBERCQpFCgiIpIUoUwXkE4TJ070qqqqTJchIjKqrFq1qt7d\nJx1quTEVKFVVVaxcuTLTZYiIjCpmtnUoy+mQl4iIJIUCRUREkkKBIiIiSaFAERGRpFCgiIhIUihQ\nREQkKcZUt+GR6mxrpbuzExKPS3YcHOKPT3Y8tv/de949Bu54LIa7x1+xGLFYNPEew6NRotEoHo0S\ni0UpLi1jwrTpFBSXZHR/RURGQoEyBM/8+jZee/zhtG2vqHQCE2fNZuq8BUyddwTTjzya4glladu+\niMhIKFCG4KgzFjOpsgoAM4tPtPj/mSVegfjRQwsE4sv0TLcAFrDE9ACBQAALBAgEgvH3UJBgMISZ\nsW9PI407trOntoa6dzaz8qH7iEWjAEyqmkvV8Scx95RFzDji6N7tiYhkC/PEYZyxoLq62kfTlfKR\nri7qtmxm27o1bF2zmpqNG4hFI4yfNIWjz1rCsUvOo2zq9EyXKSI5zsxWuXv1IZdToIweXR3tvP3y\ni6x/5im2rnkVgKPPei+nfeBKyqbNyHB1IpKrFCgDGO2B0te+xgZWPnw/rz3+CNFINwuXLOXsv7lG\nJ/RFJOkUKAPIpUDp0dq0h5ceuJvVj/6W4rJyLrz2OqqOPynTZYlIDhlqoOjM7ihXPKGMc67+O666\n/kby8gu459tf4/e33tx7Ml9EJF0UKDli6vwj+Jvv/YiTl13Gq489xAM3fovuzo5MlyUiY4gCJYeE\n8/I55+q/Y+knPs07q1fxm+u/SvvelkyXJSJjhAIlB51w/jIu/cKXqduymTu+8U8KFRFJCwVKjlqw\n6Aw+8JX/Q/OuWh781+8QjXRnuiQRyXFZGShmNsvMnjKz9Wa2zsyuG2CZJWbWbGavJl5fz0St2WzW\nscdzwbXXsX39Wn5/y02MpR59IpJ+2XrrlQjwj+7+ipmNA1aZ2RPuvr7fcs+4+yUZqG/UOGbxOTTW\nbONP991F+YxZVF9yeaZLEpEclZUtFHevdfdXEsN7gQ2ALgUfoTM/9DcsOPUM/vhft7LjzTcyXY6I\n5KisDJS+zKwKOAn40wCzzzCzNWb2OzM7NlU1eCQyqg8XWSDARZ/6PCXlFTyx4t90PkVEUiJbD3kB\nYGYlwD3A5929f1elV4BKd99nZsuA+4EFA6xjObAcoLKyckR17PreDTTddRfBsjKCEyYQKi8nNHky\noSlTCE+dQriykrzZswlPm4YFgyPaRqrlFRax9OOf5v4b/i8vP3APp33gykyXJCI5JmsDxczCxMPk\ndne/t//8vgHj7o+Y2b+b2UR3r++33ApgBcRvvTKSWorPOB3LCxPd00S0qYlIQz2dL24msns39Lki\n3fLyyJs/j4IjjqTg6KMoPOkkCo4+GguHR7LZpJt3yiKOPH0xL957BwtOO5OKGbMyXZKI5JCsvJeX\nxR868kug0d0/P8gyU4Fd7u5mtgi4G5jtB9mhZN/Ly6NRIrt30/XnP9O1dStd72yhc+NGOt7cSHR3\nPNesqIiiE0+kZMkSxi09j/D0zN5uvrVpD//5hU9RMWs2f/WNf9FzVUTkkEb1zSHN7CzgGeB1IJaY\n/M9AJYC732xmnwE+RbxHWDvwBXd//mDrTefNIbt31dH+yiraVq6i9U8v0rXpbQAKFi6k9IrLKX3f\nZQRLitNSS3+vP/U4j9/8Y/7iui9x1BlnZ6QGERk9RnWgpEom7zbctWULe598kuZHHqFz/QYCxcWU\nvv/9VFzzMcIz0tuBLRaLctsXP4u7c/WNPyEQyM7zPiKSHXS34SyTV1VFxSc+wZx77qHqzjsoOe9c\nmu66i7cvXkbdjTcSbUnf7VECgSCnf/AqGmu2sfH5Z9K2XRHJbQqUNDMzCk84gRk33MC8xx9j/MUX\n03DLrbx9wYW0PPpY2uo44tQzmFhZxQt3/1q3uheRpFCgZFB42jSmf++7zLnnbsKzK6n5/OdpuOWW\ntFzzYoEAZ3zwKvbU1vDGc39M+fZEJPcpULJAwTHHMPu22xh38UXUff9Gdl1/PR6JpHy7899zGpOq\n5vLCPWqliMjhU6BkiUB+PjN+8APKP34Ne/771+z4yj+nvKVigQCnf/DDNO2sZdPLL6R0WyKS+xQo\nWcQCAaZ88YtM+vx1tPz2t9T/5Kcp3+a8UxZRXFbO+meeSvm2RCS3KVCyUMUnP0npB66g/qc/pen+\n+1O6rUAgyFFnvpd3Vq+kraU5pdsSkdymQMlCZsa0b36TotNOo/ZrX6f1pZdSur1jFp9DLBpl4wvq\nQiwiI6dAyVIWDjPzxz8ib+ZMdnzxS0T37UvZtiZXzWVSZRUbntZhLxEZOQVKFguOH8/0732XyO7d\n1N14Y0q3dfTZ51K7aSONO2pSuh0RyV0KlCxXePzxlH/kIzTdcSetf0rdoa+jz3wvZgE2PKtWioiM\njAJlFJh03ecIz5pF7de/Rqy9PSXbKCmvoPK4E9jwzFOj+mFiIpI5CpRRIFBYyLTrr6d765+p/2nq\nuhIfs/gcmut2UbNxfcq2ISK5S4EyShSfdiqll11G46/+i+66upRsY/6i0wnl5bPx+adTsn4RyW0K\nlFFk4qc/hXd303jrL1Ky/ryCQuaeVM2bLz5HLKZbsYjI8ChQRpG82bMpvfQS9tx5J5GGhpRs44jT\nF9PW3ETNhnUpWb+I5C4FyihT8clr8Y4OGv/zP1Oy/rknVRPKz9dFjiIybAqUUSZ/7hzGL1tG4+3/\nTWTPnqSvP1xQwLyTF/Hmn57XHYhFZFhCmS5gMGZ2EfAjIAj83N2/22++JeYvA9qAj7r7K6moZV/r\nW3R21IIFMAwwMMMI9E4zC4AFMQKYBROv0P73QIiAhQkE8jDLIxAIxz8zAhOv/SQtjzxCw623MPkL\nX8A99q5l4j8e6xlLjA/NkacvZuMLz7Bt/evMPu7EEdUoImNPVj5T3syCwJvA+cB24GXgw+6+vs8y\ny4DPEg+UU4EfufupB1vvSJ8p/8bGr1NTc/uwP3coZnkEg/kEAoUEAwVgBnjif9F4UHisd9g90vuK\nRbv258Xwtgo9AUggETSBRMCFCATyCAQKaNy2i4LicqbPPYOS4iMoLl7A+PELCYfLkvkjEJFRYKjP\nlM/WFsoiYJO7bwYwszuAy4C+F0hcBtzm8UR80cwmmNk0d69NdjGzSy9mmh2J4zgxcAeceBRHExcC\nxuJf+r3v0T7jEdyjxLw7HgbeTcwjxGJdRL2bmHcS8674eswSORFItHbiX/jW2/oJECAIMWPfC6/R\n9U4thUfPpbh64f5WiPfUBtDTevFEnd4zFh92x4kS8xhOhJhHiMa62BfsoL2lifpdj1Eb+03v2grD\nMxmfv5DS/OMpLTieguAUjBgQxWKR+Hsg/vOwkGH5YSwUhmAYgnnx90Co92eIOySCc/8rGn+P9Qw7\n5BVBXkn8FcpL9q9YRJIgWwNlBrCtz/h24q2QQy0zA0h6oDz7mx8yf9+GAecZh27hWb9WYCDx2v9Z\nP6Cx4R5iKM2PfICJBVC3Ax7ZMehHDlh3YszetdX9S7sZEwAI4e8YbqVgMWIWw60Bt/+h0/6HOsAJ\n4B6Afj+H7s4gHj1grQcMW+/7gcMAAWJY4j2C81JRgJqQsTMcYHfQiFrP1iyxT/23Yom47Jnff7mR\nGlGTUCQrHNVZzfXX/Tyl28jWQEkaM1sOLAeorKwc0Tp25s0mWLR30PkDfzEfahnrN3/4fJjfb+b7\nt9M3zPpW1Df8jFifz+5vlfV8+QcshllPqyfxpR2B2N4YHgUL2QE17o8PeuPD+0SJY8T6RExNOMI9\nFY3U5ncDUBINUBoJEXQ7ICL6x8b+M0fvnnZ43v0zExkt0vGvNlsDpQaY1Wd8ZmLacJfB3VcAKyB+\nDmUkxbw55Wq+9uLWkXx0AA4WxQLdBAJdBALdWKCThe2FLGmaRkcwymPl29mZ3xH/6z/xtzoEwOOH\nwSAIHgSMIEHi50CCBAkSCAQIWoCghQgFAwQDQfICQcIhIxQMkBcKkBcMEA4aBeEgBeEg+aEAhXlB\nivKCFOWFGFcQf5XkhyktDDO+MMS4/CDPPP4Qb2zYwLJly1i0aNGBe+XOiqc38/3HNlJenMf3rjye\nJUdNHtFPKBKL8Mt1v+Rnr/6UcXnj+MGpX+WsGWdRFC46zJ+9iKRStp6UDxE/KX8e8ZB4GbjK3df1\nWeYvgM+w/6T8j9190QCr6zXSk/KdkSixfh2pBus0ZQY9fz/3LGPxehPvvKvHVSzm/Oxzf2DK7PFc\nfO1xFI7LrnMEsViM+++/nzVr1nDBBRdwxhlnHDA/GnO+/sBabv/Tn1l23FS+c/lxTCga/j5EY1Ee\n3fIoN712E1tbtrK0cilfO/1rlBeUJ2tXRGQERvVJeXePmNlngMeIdxu+1d3Xmdm1ifk3A48QD5NN\nxLsNfyxV9eSHgqlaNQBdbRFiEWfeyZOzLkyi0Sj3338/r7/+Oueee+67wqSjO8rnfr2ax9fv4tr3\nzuOfLjpyWF2UAdq623hsy2Pctv42NjVt4oiyI/jxOT9myawlw16XiGROVgYKgLs/Qjw0+k67uc+w\nA3+fjlqeq3mODY0HnpS3xLUdhhFIXE/SM2yWeO8zHrQghhEMBAlYgJCFCAaCBC1IZ0uUbaVvscm6\niOyqI2hBQoEQ4UC49z0vmNf7XhAsIBQIpfzLNhqNcu+997Ju3TrOO+88Fi9efMD8bY1tfPbXq3lt\nexPfvPQYPnrmnCGvuzvazaq6VTz6zqP87p3f0RZpY17pPL5/9ve5oOqC3p+piIweWRso2eSpbU9x\n58Y7U7uRY+DhbRzYb+0gAhagIFhAQaiAwlAhhaFCisPFFIeLKQmXMD5/POPzxjMhfwIVhRVUFFQw\nsXAiU4unMj5v/CHDqL29nQcffJANGzZw/vnnc+aZZx4w/8HXdvDVe18H4Ka/PpmLFk47ZM1NHU08\nXfM0f9j2B57f8Tyt3a0Uhgq5sOpCPrDgA5ww6QS1SERGsaw8h5IqIz2H0h3rxt17rzzpGe752Tme\nuI4jPs3diXq0d7xnXtSjxGKx+LvH6I51E/Uo296s55l7N3L2VUcwfmo+kViEaCxKd6ybSCxCd6yb\n7lg3XdEuOqOddEY76Yh00BHtoD3STkekg7buNlojrbR1t7G3ay8tXS20dLUQiUXetT+FoUKmFk9l\nesl0ZpbMZGbJTI4oO4KjKo5iQt4E1qxZwxNPPEFbWxsXXnghp512GhA/1/PylkZ+9eJWHlpTy8mV\nE/jRlScxq3zgk+Wd0U7W1q9ldd1qnqt5jtV1q4l6lEmFkzh75tm8d+Z7OXXaqTrZLpLlRvU5lGwT\nDoRTuv6A1bJpb4RFM97DhMnJ+3J1d1q7W2nsaKSho4HdbbvZ2bqTnW072dm6k+17t/P67tdp6Wrp\n/UyJlzC+bTxVFVVcctElxMrC/GLlH1i7o5ln36pn975OCsLGh86ayvtOhM2tL7O2eX+I7W7bzY59\nO9jRuoPNzZt7A21B2QKuWXgN51aeyzEVx+iQlkgOUgslA9ydlo4Itc3tbKlv4/kXa3h13W6qqicT\nDPWcj4FAwAiaEQgYoYAR7HkPGuFAgGDAenuVmSUuOsfj74mr5d0hlhiOeXxeNOa9w+0dHexq3M62\n5nW0BXYQKdpNtLCO7mD9iPatJFzCtJJpzCiewZwJczhp0kmcOPlEygp0yxaR0UotlCSLxZzuWIxI\n1OmOxuiKxOhMvDq6o73vbV1R2roi7OuM0NIeobm9m+b2bva0dtHQ2klDaxc7mzto6zrwTr5FYWiq\nad6/vT5f/lF3orH4dqMxJxJzItEYsUP8LRAPGwiYEUiMBI347Vbc8Vi09xYoecGFFBacTDH5lMRC\nFOV3Yfm1zCwPc+TUccybVExBOP7PxbB4Z4FgmJCFKA4XMy5vHOPzx5MfzE/uD15ERg0FyhB87f61\n/GqEFzaGg0ZpYZiyojwqSvI4eup4lhwxmWmlBUwtLWB2RRHbn9pB7WsNfPxLiw+9wj48ETqeGB7o\nWpfOzk5qamrYtm0bW7duZevWrUSjUUKhEHPmzGHevHnMnz+fiRMnjmj/RER6KFCGYMmRk6goySMc\nDBAKGOGeK85DAfJDAfJDQQrCAQrCQUryQxTlBSnODzG+IExBOHDInku1HdspKB7+eRqz+KGuWCzG\nvn17aWpqoqmpiYaGBurq6ti9ezeNjY29nQcmT57MokWLmD9/PpWVlYTDqT03JCJjiwJlCM47egrn\nHT0lZetv39dFsCjCrl276O7ufterq6uLzs5OOjs76ejooKOjg/b2dlpbW9m3bx+tra30PRdmZpSX\nlzN58mQWLlzIrFmzmDFjBoWFhSnbBxERBUqatbe309DQQENDA/X19dTW1vJO85+J0sUbNz150M8G\nAgHy8/MpKiqioKCAcePGMX36dEpKShg3bhwTJkygrKyM0tJStT5EJO0UKCPg7kSj0d4WRGdnZ28r\nor29vbcF0dbW1tuKaGlpobm5ma6urt71mBmTJk2iMFrB5ElTOOW8BYTDYcLhMHl5eYTDYUKhEPn5\n+eTl5REKpf7qeBGRkVKgDMGTTz7J6tWriUajRCIRotEoQ+luHQqFKC4upri4mIqKCubOnUtpaSnl\n5eVUVFRQVlZGKBTiZ9f9kbnHT+fYYxekYW9ERFJDgTIEkyZN4qijjiIYDBIKhQgGg70tiXA43NuC\nyM/Pp6CggMLCQgoKCsjLyztkiyLaHSPSGR3RSXkRkWyiQBmCE044gRNOOCEl6+5ojT88qqBEgSIi\no5vuf5FhvYGiFoqIjHIKlAzrCZT8YjUWRWR0U6BkmFooIpIrFCgZ1rFPgSIiuUGBkmE6KS8iuSLr\nDtyb2feBS4Eu4G3gY+7eNMByW4C9QBSIDOXWytmoszVCMBQgFFa2i8jolo3fYk8AC939eOBN4CsH\nWfYcdz9xtIYJxFsoBcW6Al5ERr+sCxR3f9zde55b+yIwM5P1pFpHa7cOd4lITsi6QOnnGuB3g8xz\n4EkzW2Vmy9NYU1LFWygKFBEZ/TJyDsXMngSmDjDrq+7+QGKZrwIR4PZBVnOWu9eY2WTgCTN7w92f\nHmBby4HlAJWVlUmpP5k6WiOUTU3ec+RFRDIlI4Hi7ksPNt/MPgpcApzng9yF0d1rEu91ZnYfsAh4\nV6C4+wpgBcSfKX94lSefWigikiuy7pCXmV0EfAl4n7u3DbJMsZmN6xkGLgDWpq/K5HB3OvcpUEQk\nN2RdoAA/AcYRP4z1qpndDGBm083skcQyU4Bnzew14CXgYXd/NDPljlx3R5RYzHXbFRHJCVn3Tebu\n8weZvgNYlhjeDKTm9r9ppNuuiEguycYWypihQBGRXKJAySDddkVEcokCJYN6A6VIgSIio58CJYM6\nW+M3BFALRURygQIlg/RwLRHJJQqUDOrY101eQZBgUL8GERn99E2WQR2t3eSrh5eI5AgFSgZ1tEbU\nZVhEcsaQAsXM5plZfmJ4iZl9zswmpLa03Kdb14tILhlqC+UeIGpm84nfaHEW8N8pq2qM0I0hRSSX\nDDVQYomHXl0O/Ju7fxGYlrqyxobO1m4KitTDS0Ryw1ADpdvMPgxcDTyUmKY/rQ9DLOZ0tkXI1yEv\nEckRQw2UjwGnA99293fMbA7wq9SVlfs6E9egFCpQRCRHDPV4y/nu/rmekUSodKSopjGhtbkLgKLx\n+RmuREQkOYbaQrl6gGkfTWIdY05bcycARaV5Ga5ERCQ5DtpCSZw3uQqYY2YP9pk1DmhMZWG5rqeF\nUqxAEZEccahDXs8DtcBE4Ad9pu8F1qSqqLGgrSXRQtEhLxHJEQcNFHffCmwlfkJekqituYtwQZBw\nfjDTpYiIJMVQr5S/wszeMrNmM2sxs71m1pKKgszsm2ZWk3ie/KtmtmyQ5S4ys41mtsnMvpyKWlKp\ntbmL4lK1TkQkdwy1l9cNwKXuviGVxfTxQ3e/cbCZZhYEfgqcD2wHXjazB919fZrqO2xtLZ0Ujdf5\nExHJHUPt5bUrjWEyFIuATe6+2d27gDuAyzJc07DEWygKFBHJHYfq5XVFYnClmd0J3A909sx393tT\nVNdnzewjwErgH919T7/5M4Btfca3A6emqJakc3famjspKp2Y6VJERJLmUIe8Lu0z3AZc0GfcgREF\nipk9CUwdYNZXgZuA6xPrv55477JrRrKdxLaWA8sBKisrR7qapOrujBLpiumQl4jklEP18vpYKjbq\n7kuHspyZ/Qf77x3WVw3xOx73mJmYNtC2VhC/QzLV1dU+vEpTo03XoIhIDhrSSXkz+/EAk5uBle7+\nQDILMrNp7l6bGL0cWDvAYi8DCxL3FKsBriR+Aeao0Np7lbx6eYlI7hjqSfkC4ETgrcTreOKtgo+b\n2f9Lck03mNnrZrYGOAf4BwAzm25mjwAkbqX/GeAxYANwl7uvS3IdKdPTQtFtV0Qklwy12/DxwJnu\nHgUws5uAZ4CzgNeTWZC7/+0g03cAy/qMPwI8ksxtp0tPC0XXoYhILhlqC6UMKOkzXgyUJwKmc+CP\nyGDaWroIhIx8PVxLRHLIcC5sfNXM/gAYcDbwHTMrBp5MUW05q625i6LxeZhZpksREUmaIQWKu9+S\nOH+xKDHpnxOHoAC+mJLKclhrc6cOd4lIzjnoIS8zOyrxfjLxZ8hvS7ymJqbJCLS1dOkaFBHJOYdq\noXyB+EWBPxhgngPnJr2iMaC1uZPp8ydkugwRkaQ61IWNyxPv56SnnNwX7Y7R2RpRl2ERyTlDvX19\nkZn9bzNbkRhfYGaXpLa03NS2t+dZ8goUEcktQ+02/AugCzgjMV4DfCslFeU4XYMiIrlqqIEyz91v\nALoB3L2NePdhGSZdJS8iuWqogdJlZoXET8RjZvPQBY0j0qYWiojkqKFe2PgN4FFglpndDpwJfDRV\nReWy1pYuMCgcF850KSIiSTXUQLkaeBi4G9gMXOfu9SmrKoe1NXdRWBImEBxq41BEZHQYaqDcAiwm\n/gz3ecBqM3va3X+UsspyVPxJjTrcJSK5Z6i3XnnKzJ4G3kP8lvLXAscCCpRh0rPkRSRXDfUBW78n\nfofhF4jftv497l6XysJyVVtzJxUzSw69oIjIKDPUA/lriF+HspD4s1EWJnp9yTB4zGnb262LGkUk\nJw31kFfPUxPHEe/d9QtgKqCTAcPQvq8bj7kOeYlIThrqIa/PED8pfwqwBbiV+KEvGYZ9ezoAKJ6g\nHBaR3DPUXl4FwL8CqxLPc08ZM7sTODIxOgFocvcTB1huC7AXiAIRd69OZV3JsLchHijjK3S0UERy\nz1APed2Y6kL6bOuveobN7AdA80EWP2c0XQ/TUp8IlIkFGa5ERCT5svah5hZ/Pu6HyKFnrrQ0tJNf\nFCK/SFfJi0juyebLtRcDu9z9rUHmO/Ckma0ys+VprGvEWuo7GFeh1omI5KaMtFDM7EnivcT6+6q7\nP5AY/jDw64Os5ix3rzGzycATZvaGuz89wLaWE3/qJJWVlYdZ+eHZ29BO2dTijNYgIpIqGQkUd196\nsPlmFgKuIN6rbLB11CTe68zsPmAR8K5AcfcVwAqA6upqP4yyD4u7s7ehg8qFFZkqQUQkpbL1kNdS\n4A133z7QTDMrTlwTg5kVAxcAa9NY37C1tXQR6Y6ph5eI5KxsDZQr6Xe4y8ymm9kjidEpwLNm9hrw\nEvCwuz+a5hqHpbfLsHp4iUiOyspeXu7+0QGm7QCWJYY3AyekuazD0tLQDqCT8iKSs7K1hZJzeq9B\n0SEvEclRCpQ02VvfTuG4MOH8YKZLERFJCQVKmrQ0dDB+olonIpK7FChp0tKgixpFJLcpUNIgFnP2\nNXbo/ImI5DQFShq0NnUSi7q6DItITlOgpEFLfbzLsFooIpLLFChp0HNRo86hiEguU6CkQUt9OxiM\nK1egiEjuUqCkQUtDByUT8gmG9eMWkdylb7g0aKlv1+EuEcl5CpQ02KuLGkVkDFCgpFg0EmNfU6da\nKCKS8xQoKba3sQNcXYZFJPcpUFKsaVcbAKWTFSgiktsUKCnWWNsKQPk0PUteRHKbAiXF9uxopag0\nj4LicKZLERFJKQVKijXsaFXrRETGhIwEipn9pZmtM7OYmVX3m/cVM9tkZhvN7MJBPl9uZk+Y2VuJ\n97L0VD48HnP27GylYnpJpksREUm5TLVQ1gJXAE/3nWhmxwBXAscCFwH/bmYDPeLwy8Dv3X0B8PvE\neNZpaegg0hWjfLpaKCKS+zISKO6+wd03DjDrMuAOd+9093eATcCiQZb7ZWL4l8D7U1Pp4ek9Ia9A\nEZExINvOocwAtvUZ356Y1t8Ud69NDO8Epgy2QjNbbmYrzWzl7t27k1fpEDTu2Aeoh5eIjA0pCxQz\ne9LM1g7wuiyZ23F3B/wg81e4e7W7V0+aNCmZmz6kxh2tlJTlk1cYSut2RUQyIWXfdO6+dAQfqwFm\n9RmfmZjW3y4zm+butWY2DagbSY2p1ljbqsNdIjJmZNshrweBK80s38zmAAuAlwZZ7urE8NXAA2mq\nb8hiMWdPbZsOd4nImJGpbsOXm9l24HTgYTN7DMDd1wF3AeuBR4G/d/do4jM/79PF+LvA+Wb2FrA0\nMZ5VWna3E43EKFeXYREZIzJycN/d7wPuG2Tet4FvDzD9E32GG4DzUlZgEjTuUA8vERlbsu2QV85o\nrI338CqbWpThSkRE0kOBkiINO1oZV1FAXoF6eInI2KBASZHGHa1U6HCXiIwhCpQUiEZjNO1q0/kT\nERlTFChS+F4IAAAJ4UlEQVQp0FzXTizq6jIsImOKAiUF6rftBaB8hroMi8jYoUBJgZ2bWwjlB3UO\nRUTGFAVKCuzc3MyUqnEEgvrxisjYoW+8JOvujFK/fR9T55RmuhQRkbRSoCRZ3dYWPOZMnatAEZGx\nRYGSZDs3NwMwZe74DFciIpJeCpQk27m5hQlTiigsyct0KSIiaaVASSJ3Z+fmZqbOUetERMYeBUoS\nNe9up2NfN1N0/kRExiAFShLtSpw/mTZPgSIiY48CJYl2bm4hXBCkTLdcEZExSIGSRLWbm5lSNZ5A\nwDJdiohI2mXqEcB/aWbrzCzW57G+mNn5ZrbKzF5PvJ87yOe/aWY1ZvZq4rUsfdUPrKsjQmPNPqbq\ncJeIjFGZevrTWuAK4Gf9ptcDl7r7DjNbCDwGzBhkHT909xtTWOOw1G1pwR1d0CgiY1amnim/AcDM\n+k9f3Wd0HVBoZvnu3pnG8kZk24ZGAgFToIjImJXN51A+ALxykDD5rJmtMbNbzawsnYUN5J01DUxb\nMIH8Qj3yV0TGppQFipk9aWZrB3hdNoTPHgt8D/jkIIvcBMwFTgRqgR8cZF3LzWylma3cvXv3CPbk\n0Jp3t7OntpU5x09MyfpFREaDlP057e5LR/I5M5sJ3Ad8xN3fHmTdu/os/x/AQwepYwWwAqC6utpH\nUtOhbFlTD0DV8RWpWL2IyKiQVYe8zGwC8DDwZXd/7iDLTeszejnxk/wZs+X1esqmFlE6qSiTZYiI\nZFSmug1fbmbbgdOBh83sscSszwDzga/36RI8OfGZn/fpYnxDomvxGuAc4B/SvQ89utoj7HiziSod\n7hKRMS5TvbzuI35Yq//0bwHfGuQzn+gz/Lepq254/ry+kVjMFSgiMuZl1SGv0WjLmnryi0PqLiwi\nY54C5TDEYs7WtQ3MXlih262IyJinQDkMOzc309HaTdVxOtwlIqJAOQybVtYRDAWoPFbdhUVEFCgj\nFOmK8uZLO5l70iRdHS8iggJlxN5evZvOtgjHnjU906WIiGQFBcoIrX92B6WTCpl+xIRMlyIikhUU\nKCPQtKuNHW81ccxZ0991x2QRkbFKgTIC65/dgQWMI0+bmulSRESyhgJlmKKRGG+8WEvVcRUUl+Zn\nuhwRkayhQBmmLWvqad/bzTE6GS8icgAFyjC4O6se3cr4iQW69kREpB8FyjC881o9u/+8l+plc3Sr\nFRGRfhQoQ+Qx56XfbmbClCKOPHVKpssREck6CpQh2vRKHQ01rbznL6oIBPVjExHpT9+MQxCLOS8/\n9A7l04uZX63WiYjIQBQoQ/DWy7vYs7ONRZfo3ImIyGAUKEPQVNfG5NnjmHvipEyXIiKStTL1TPm/\nNLN1Zhbr85x4zKzKzNr7PE/+5kE+X25mT5jZW4n3slTWe+qlc7niS6dgap2IiAwqUy2UtcAVwNMD\nzHvb3U9MvK4d5PNfBn7v7guA3yfGUyqoE/EiIgeVkW9Jd9/g7hsPYxWXAb9MDP8SeP/hVyUiIocj\nG//snpM43PVHM1s8yDJT3L02MbwTUNcrEZEMS9mjBs3sSWCg2/F+1d0fGORjtUCluzeY2SnA/WZ2\nrLu3DLYdd3cz84PUsRxYDlBZWTn0HRARkWFJWaC4+9IRfKYT6EwMrzKzt4EjgJX9Ft1lZtPcvdbM\npgF1B1nnCmAFQHV19aDBIyIihyerDnmZ2SQzCyaG5wILgM0DLPogcHVi+GpgsBaPiIikSaa6DV9u\nZtuB04GHzeyxxKyzgTVm9ipwN3CtuzcmPvPzPl2Mvwucb2ZvAUsT4yIikkHmPnaOAlVXV/vKlf2P\nnomIyMGY2Sp3rz7kcmMpUMxsN7A103UcwkSgPtNFJEGu7AdoX7JRruwHjI59me3uh7xVyJgKlNHA\nzFYO5S+BbJcr+wHal2yUK/sBubUvWXVSXkRERi8FioiIJIUCJfusyHQBSZIr+wHal2yUK/sBObQv\nOociIiJJoRaKiIgkhQJFRESSQoEiIiJJoUAZJcxsrpndYmZ3Z7qWkRjt9fdlZkeb2c1mdreZfSrT\n9YyUmS0xs2cS+7Ik0/UcDjNbnNiPn5vZ85mu53CY2TFmdpeZ3WRmH8x0PcOhQEkDM7vVzOrMbG2/\n6ReZ2UYz22RmB33qpLtvdvePp7bS4RnOfmVj/X0Nc182JJ4m+iHgzEzUO5hh/ltzYB9QAGxPd62H\nMszfyTOJ38lD7H/4XtYY5u/lYuDf3P1TwEfSXuzhcHe9UvwiftPLk4G1faYFgbeBuUAe8BpwDHAc\n8f8o+r4m9/nc3Znen5HsVzbWfzj7ArwP+B1wVaZrP4x/a4HE/CnA7ZmuPUn/vu4CxmW69sP8vUwG\nfgp8H3gu07UP56UWShq4+9NAY7/Ji4BNHv/LvQu4A7jM3V9390v6vQZ93ksmDWe/0l7cMA13X9z9\nQXe/GPjr9FZ6cMP8txZLzN8D5KexzCEZ7u/EzCqBZnffm95KD22Yv5c6d/974Mtk/z2+DqBAyZwZ\nwLY+49sT0wZkZhVmdjNwkpl9JdXFHYYB92sU1d/XYPuyxMx+bGY/Ax7JTGnDMth+XJHYh18BP8lI\nZcN3sP9uPg78Iu0Vjdxgv5cqM1sB3Ea8lTJqpOyJjZJc7t4AXJvpOkZqtNffl7v/AfhDhss4bO5+\nL3BvputIFnf/RqZrSAZ330LiseWjjVoomVMDzOozPjMxbbTLpf3KlX3Jlf0A7UtWU6BkzsvAAjOb\nY2Z5wJXEH2082uXSfuXKvuTKfoD2JaspUNLAzH4NvAAcaWbbzezj7h4BPgM8BmwA7nL3dZmsc7hy\nab9yZV9yZT9A+zIa6eaQIiKSFGqhiIhIUihQREQkKRQoIiKSFAoUERFJCgWKiIgkhQJFRESSQoEi\nkiZmtsXMJh7uMiLZSoEiIiJJoUARSQEzu9/MVpnZOjNb3m9elZm9YWa3m9mGxJMfi/os8lkze8XM\nXjezoxKfWWRmL5jZajN73syOTOsOiQyBAkUkNa5x91OAauBzZlbRb/6RwL+7+9FAC/DpPvPq3f1k\n4CbgfyWmvQEsdveTgK8D30lp9SIjoEARSY3PmdlrwIvE7yi7oN/8be7+XGL4v4Cz+szruaX8KqAq\nMVwK/CbxCNkfAsemomiRw6FAEUkyM1sCLAVOd/cTgNXEn9veV/+b6PUd70y8R9n/zKLrgafcfSFw\n6QDrE8k4BYpI8pUCe9y9LXEO5LQBlqk0s9MTw1cBzw5hnT3PyvhoUqoUSTIFikjyPQqEzGwD8F3i\nh7362wj8fWKZMuLnSw7mBuBfzGw1etKqZCndvl4kzcysCngocfhKJGeohSIiIkmhFoqIiCSFWigi\nIpIUChQREUkKBYqIiCSFAkVERJJCgSIiIkmhQBERkaT4/xrBNrsaT/xgAAAAAElFTkSuQmCC\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "lasso = Lasso(max_iter=10000, normalize=False)\n", + "coefs = []\n", + "# 使用不同的alpha训练一个lasso回归\n", + "for a in alphas:\n", + " lasso.set_params(alpha=a)\n", + " lasso.fit(X_train, Y_train)\n", + " coefs.append(lasso.coef_)\n", + "# gca是Get Current Axes的缩写,指的是获得当前图表\n", + "ax = plt.gca()\n", + "# coefs是一个列表,每个元素是13个特征在不同alpha下的估计值\n", + "ax.plot(alphas*2, coefs)\n", + "# 使用log为x坐标轴的计数法\n", + "ax.set_xscale('log')\n", + "plt.axis('tight')\n", + "plt.xlabel('alpha')\n", + "plt.ylabel('weights')\n", + "# 不同颜色的线代表不同变量,总共有13根线代表13个变量\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 现在我们的问题是在那么多alpha中应该选取哪一个?\n", + "* sklearn的lasso模块提供了使用交叉检验(cross-validation)功能帮助我们选取最优alpha" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.75489236729547671" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 利用cross-validation选取最优alpha,这里使用的标准是RMSE(Root Mean Squared Error)。\n", + "lassocv = LassoCV(alphas=None, cv=10, max_iter=100000, normalize=False)\n", + "lassocv.fit(X_train, Y_train)\n", + "# lassoCV帮助我们选取了最优的alpha值为0.755\n", + "lassocv.alpha_" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "30.866801508889896" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 将最优alpha代入lasso模型中即可!\n", + "lasso.set_params(alpha=lassocv.alpha_)\n", + "# 设定alpha之后,使用X_train和Y_train拟合模型\n", + "lasso.fit(X_train, Y_train)\n", + "# 放入多变量之后MRSE显著减小!\n", + "metrics.mean_squared_error(Y_test, lasso.predict(X_test))" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CRIM -0.078710\n", + "ZN 0.048115\n", + "INDUS -0.000000\n", + "CHAS 0.000000\n", + "NOX -0.000000\n", + "RM 1.652658\n", + "AGE 0.002939\n", + "DIS -0.874493\n", + "RAD 0.206365\n", + "TAX -0.014080\n", + "PTRATIO -0.852610\n", + "B 0.007214\n", + "LSTAT -0.677797\n", + "dtype: float64\n" + ] + } + ], + "source": [ + "#查看lasso参数估计,注意到有三个变量系数的估计值为0==》特征选取。\n", + "import pandas as pd\n", + "print(pd.Series(lasso.coef_, index=boston_dataset.feature_names))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 通过以上分析我们可以获得以下商业洞察\n", + " * 房价随着犯罪率的下降尔上升。\n", + "* 房价随着离波士顿五个就业中心的距离增加而下降。\n", + "* 房价随着税收比例的增加而下降。\n", + "* 房价随着下层经济人口百分比的增加而下降,\n", + "* 房价随着学生教师比的增加而下降,这点似乎出乎意料,可以进一步探究。\n", + "* 房价与是否靠近查尔斯河无关。\n", + "* 房价与一氧化氮浓度无关。\n", + "\n", + "# 我们还可以使用这个拟合好的模型对其他地区房价进行预测,以帮助合作伙伴决定新建房的位置面积等等。" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 在实践当中\n", + "* 数据的获取往往是很费周折的一个步骤,从确定所需要的数据到采集清理,这个过程可以长达数个月甚至一年。\n", + "* 确定一个有意义的问题是成功的一般,现实问题中我们往往要将商业需要的问题转化为一个数据能回答的问题。\n", + "* 和模型无关的步骤往往要占据整个过程90%以上的时间,正如我们刚看到的,使用python做机器学习是非常简洁的!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "anaconda-cloud": {}, + "kernelspec": { + "display_name": "Python [conda root]", + "language": "python", + "name": "conda-root-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/xinda/lesson10/intro_to_ML_revised.pptx b/xinda/lesson10/intro_to_ML_revised.pptx new file mode 100644 index 0000000..197a84d Binary files /dev/null and b/xinda/lesson10/intro_to_ML_revised.pptx differ

AltStyle によって変換されたページ (->オリジナル) /