同步操作将从 编程语言算法集/Python 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
"""Implementation of a basic regression decision tree.Input data set: The input data set must be 1-dimensional with continuous labels.Output: The decision tree maps a real number input to a real number output."""import numpy as npclass DecisionTree:def __init__(self, depth=5, min_leaf_size=5):self.depth = depthself.decision_boundary = 0self.left = Noneself.right = Noneself.min_leaf_size = min_leaf_sizeself.prediction = Nonedef mean_squared_error(self, labels, prediction):"""mean_squared_error:@param labels: a one dimensional numpy array@param prediction: a floating point valuereturn value: mean_squared_error calculates the error if prediction is used toestimate the labels>>> tester = DecisionTree()>>> test_labels = np.array([1,2,3,4,5,6,7,8,9,10])>>> test_prediction = float(6)>>> tester.mean_squared_error(test_labels, test_prediction) == (... TestDecisionTree.helper_mean_squared_error_test(test_labels,... test_prediction))True>>> test_labels = np.array([1,2,3])>>> test_prediction = float(2)>>> tester.mean_squared_error(test_labels, test_prediction) == (... TestDecisionTree.helper_mean_squared_error_test(test_labels,... test_prediction))True"""if labels.ndim != 1:print("Error: Input labels must be one dimensional")return np.mean((labels - prediction) ** 2)def train(self, x, y):"""train:@param x: a one dimensional numpy array@param y: a one dimensional numpy array.The contents of y are the labels for the corresponding X valuestrain does not have a return value""""""this section is to check that the inputs conform to our dimensionalityconstraints"""if x.ndim != 1:print("Error: Input data set must be one dimensional")returnif len(x) != len(y):print("Error: X and y have different lengths")returnif y.ndim != 1:print("Error: Data set labels must be one dimensional")returnif len(x) < 2 * self.min_leaf_size:self.prediction = np.mean(y)returnif self.depth == 1:self.prediction = np.mean(y)returnbest_split = 0min_error = self.mean_squared_error(x, np.mean(y)) * 2"""loop over all possible splits for the decision tree. find the best split.if no split exists that is less than 2 * error for the entire arraythen the data set is not split and the average for the entire array is used asthe predictor"""for i in range(len(x)):if len(x[:i]) < self.min_leaf_size:continueelif len(x[i:]) < self.min_leaf_size:continueelse:error_left = self.mean_squared_error(x[:i], np.mean(y[:i]))error_right = self.mean_squared_error(x[i:], np.mean(y[i:]))error = error_left + error_rightif error < min_error:best_split = imin_error = errorif best_split != 0:left_x = x[:best_split]left_y = y[:best_split]right_x = x[best_split:]right_y = y[best_split:]self.decision_boundary = x[best_split]self.left = DecisionTree(depth=self.depth - 1, min_leaf_size=self.min_leaf_size)self.right = DecisionTree(depth=self.depth - 1, min_leaf_size=self.min_leaf_size)self.left.train(left_x, left_y)self.right.train(right_x, right_y)else:self.prediction = np.mean(y)returndef predict(self, x):"""predict:@param x: a floating point value to predict the label ofthe prediction function works by recursively calling the predict functionof the appropriate subtrees based on the tree's decision boundary"""if self.prediction is not None:return self.predictionelif self.left or self.right is not None:if x >= self.decision_boundary:return self.right.predict(x)else:return self.left.predict(x)else:print("Error: Decision tree not yet trained")return Noneclass TestDecisionTree:"""Decision Tres test class"""@staticmethoddef helper_mean_squared_error_test(labels, prediction):"""helper_mean_squared_error_test:@param labels: a one dimensional numpy array@param prediction: a floating point valuereturn value: helper_mean_squared_error_test calculates the mean squared error"""squared_error_sum = float(0)for label in labels:squared_error_sum += (label - prediction) ** 2return float(squared_error_sum / labels.size)def main():"""In this demonstration we're generating a sample data set from the sin function innumpy. We then train a decision tree on the data set and use the decision tree topredict the label of 10 different test values. Then the mean squared error overthis test is displayed."""x = np.arange(-1.0, 1.0, 0.005)y = np.sin(x)tree = DecisionTree(depth=10, min_leaf_size=10)tree.train(x, y)test_cases = (np.random.rand(10) * 2) - 1predictions = np.array([tree.predict(x) for x in test_cases])avg_error = np.mean((predictions - test_cases) ** 2)print("Test values: " + str(test_cases))print("Predictions: " + str(predictions))print("Average error: " + str(avg_error))if __name__ == "__main__":main()import doctestdoctest.testmod(name="mean_squarred_error", verbose=True)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。