同步操作将从 编程语言算法集/Python 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
Top k most frequent words problem using a max-heap (#8685)
"""Linear Discriminant AnalysisAssumptions About Data :1. The input variables has a gaussian distribution.2. The variance calculated for each input variables by class grouping is thesame.3. The mix of classes in your training set is representative of the problem.Learning The Model :The LDA model requires the estimation of statistics from the training data :1. Mean of each input value for each class.2. Probability of an instance belong to each class.3. Covariance for the input data for each classCalculate the class means :mean(x) = 1/n ( for i = 1 to i = n --> sum(xi))Calculate the class probabilities :P(y = 0) = count(y = 0) / (count(y = 0) + count(y = 1))P(y = 1) = count(y = 1) / (count(y = 0) + count(y = 1))Calculate the variance :We can calculate the variance for dataset in two steps :1. Calculate the squared difference for each input variable from thegroup mean.2. Calculate the mean of the squared difference.------------------------------------------------Squared_Difference = (x - mean(k)) ** 2Variance = (1 / (count(x) - count(classes))) *(for i = 1 to i = n --> sum(Squared_Difference(xi)))Making Predictions :discriminant(x) = x * (mean / variance) -((mean ** 2) / (2 * variance)) + Ln(probability)---------------------------------------------------------------------------After calculating the discriminant value for each class, the class with thelargest discriminant value is taken as the prediction.Author: @EverLookNeverSee"""from collections.abc import Callablefrom math import logfrom os import name, systemfrom random import gauss, seedfrom typing import TypeVar# Make a training dataset drawn from a gaussian distributiondef gaussian_distribution(mean: float, std_dev: float, instance_count: int) -> list:"""Generate gaussian distribution instances based-on given mean and standard deviation:param mean: mean value of class:param std_dev: value of standard deviation entered by usr or default value of it:param instance_count: instance number of class:return: a list containing generated values based-on given mean, std_dev andinstance_count>>> gaussian_distribution(5.0, 1.0, 20) # doctest: +NORMALIZE_WHITESPACE[6.288184753155463, 6.4494456086997705, 5.066335808938262, 4.235456349028368,3.9078267848958586, 5.031334516831717, 3.977896829989127, 3.56317055489747,5.199311976483754, 5.133374604658605, 5.546468300338232, 4.086029056264687,5.005005283626573, 4.935258239627312, 3.494170998739258, 5.537997178661033,5.320711100998849, 7.3891120432406865, 5.202969177309964, 4.855297691835079]"""seed(1)return [gauss(mean, std_dev) for _ in range(instance_count)]# Make corresponding Y flags to detecting classesdef y_generator(class_count: int, instance_count: list) -> list:"""Generate y values for corresponding classes:param class_count: Number of classes(data groupings) in dataset:param instance_count: number of instances in class:return: corresponding values for data groupings in dataset>>> y_generator(1, [10])[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]>>> y_generator(2, [5, 10])[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]>>> y_generator(4, [10, 5, 15, 20]) # doctest: +NORMALIZE_WHITESPACE[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]"""return [k for k in range(class_count) for _ in range(instance_count[k])]# Calculate the class meansdef calculate_mean(instance_count: int, items: list) -> float:"""Calculate given class mean:param instance_count: Number of instances in class:param items: items that related to specific class(data grouping):return: calculated actual mean of considered class>>> items = gaussian_distribution(5.0, 1.0, 20)>>> calculate_mean(len(items), items)5.011267842911003"""# the sum of all items divided by number of instancesreturn sum(items) / instance_count# Calculate the class probabilitiesdef calculate_probabilities(instance_count: int, total_count: int) -> float:"""Calculate the probability that a given instance will belong to which class:param instance_count: number of instances in class:param total_count: the number of all instances:return: value of probability for considered class>>> calculate_probabilities(20, 60)0.3333333333333333>>> calculate_probabilities(30, 100)0.3"""# number of instances in specific class divided by number of all instancesreturn instance_count / total_count# Calculate the variancedef calculate_variance(items: list, means: list, total_count: int) -> float:"""Calculate the variance:param items: a list containing all items(gaussian distribution of all classes):param means: a list containing real mean values of each class:param total_count: the number of all instances:return: calculated variance for considered dataset>>> items = gaussian_distribution(5.0, 1.0, 20)>>> means = [5.011267842911003]>>> total_count = 20>>> calculate_variance([items], means, total_count)0.9618530973487491"""squared_diff = [] # An empty list to store all squared differences# iterate over number of elements in itemsfor i in range(len(items)):# for loop iterates over number of elements in inner layer of itemsfor j in range(len(items[i])):# appending squared differences to 'squared_diff' listsquared_diff.append((items[i][j] - means[i]) ** 2)# one divided by (the number of all instances - number of classes) multiplied by# sum of all squared differencesn_classes = len(means) # Number of classes in datasetreturn 1 / (total_count - n_classes) * sum(squared_diff)# Making predictionsdef predict_y_values(x_items: list, means: list, variance: float, probabilities: list) -> list:"""This function predicts new indexes(groups for our data):param x_items: a list containing all items(gaussian distribution of all classes):param means: a list containing real mean values of each class:param variance: calculated value of variance by calculate_variance function:param probabilities: a list containing all probabilities of classes:return: a list containing predicted Y values>>> x_items = [[6.288184753155463, 6.4494456086997705, 5.066335808938262,... 4.235456349028368, 3.9078267848958586, 5.031334516831717,... 3.977896829989127, 3.56317055489747, 5.199311976483754,... 5.133374604658605, 5.546468300338232, 4.086029056264687,... 5.005005283626573, 4.935258239627312, 3.494170998739258,... 5.537997178661033, 5.320711100998849, 7.3891120432406865,... 5.202969177309964, 4.855297691835079], [11.288184753155463,... 11.44944560869977, 10.066335808938263, 9.235456349028368,... 8.907826784895859, 10.031334516831716, 8.977896829989128,... 8.56317055489747, 10.199311976483754, 10.133374604658606,... 10.546468300338232, 9.086029056264687, 10.005005283626572,... 9.935258239627313, 8.494170998739259, 10.537997178661033,... 10.320711100998848, 12.389112043240686, 10.202969177309964,... 9.85529769183508], [16.288184753155463, 16.449445608699772,... 15.066335808938263, 14.235456349028368, 13.907826784895859,... 15.031334516831716, 13.977896829989128, 13.56317055489747,... 15.199311976483754, 15.133374604658606, 15.546468300338232,... 14.086029056264687, 15.005005283626572, 14.935258239627313,... 13.494170998739259, 15.537997178661033, 15.320711100998848,... 17.389112043240686, 15.202969177309964, 14.85529769183508]]>>> means = [5.011267842911003, 10.011267842911003, 15.011267842911002]>>> variance = 0.9618530973487494>>> probabilities = [0.3333333333333333, 0.3333333333333333, 0.3333333333333333]>>> predict_y_values(x_items, means, variance,... probabilities) # doctest: +NORMALIZE_WHITESPACE[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,2, 2, 2, 2, 2, 2, 2, 2, 2]"""# An empty list to store generated discriminant values of all items in dataset for# each classresults = []# for loop iterates over number of elements in listfor i in range(len(x_items)):# for loop iterates over number of inner items of each elementfor j in range(len(x_items[i])):temp = [] # to store all discriminant values of each item as a list# for loop iterates over number of classes we have in our datasetfor k in range(len(x_items)):# appending values of discriminants for each class to 'temp' listtemp.append(x_items[i][j] * (means[k] / variance)- (means[k] ** 2 / (2 * variance))+ log(probabilities[k]))# appending discriminant values of each item to 'results' listresults.append(temp)return [result.index(max(result)) for result in results]# Calculating Accuracydef accuracy(actual_y: list, predicted_y: list) -> float:"""Calculate the value of accuracy based-on predictions:param actual_y:a list containing initial Y values generated by 'y_generator'function:param predicted_y: a list containing predicted Y values generated by'predict_y_values' function:return: percentage of accuracy>>> actual_y = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,... 1, 1 ,1 ,1 ,1 ,1 ,1]>>> predicted_y = [0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0,... 0, 0, 1, 1, 1, 0, 1, 1, 1]>>> accuracy(actual_y, predicted_y)50.0>>> actual_y = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,... 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]>>> predicted_y = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,... 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]>>> accuracy(actual_y, predicted_y)100.0"""# iterate over one element of each list at a time (zip mode)# prediction is correct if actual Y value equals to predicted Y valuecorrect = sum(1 for i, j in zip(actual_y, predicted_y) if i == j)# percentage of accuracy equals to number of correct predictions divided by number# of all data and multiplied by 100return (correct / len(actual_y)) * 100num = TypeVar("num")def valid_input(input_type: Callable[[object], num], # Usually float or intinput_msg: str,err_msg: str,condition: Callable[[num], bool] = lambda x: True,default: str | None = None,) -> num:"""Ask for user value and validate that it fulfill a condition.:input_type: user input expected type of value:input_msg: message to show user in the screen:err_msg: message to show in the screen in case of error:condition: function that represents the condition that user input is valid.:default: Default value in case the user does not type anything:return: user's input"""while True:try:user_input = input_type(input(input_msg).strip() or default)if condition(user_input):return user_inputelse:print(f"{user_input}: {err_msg}")continueexcept ValueError:print(f"{user_input}: Incorrect input type, expected {input_type.__name__!r}")# Main Functiondef main():"""This function starts execution phase"""while True:print(" Linear Discriminant Analysis ".center(50, "*"))print("*" * 50, "\n")print("First of all we should specify the number of classes that")print("we want to generate as training dataset")# Trying to get number of classesn_classes = valid_input(input_type=int,condition=lambda x: x > 0,input_msg="Enter the number of classes (Data Groupings): ",err_msg="Number of classes should be positive!",)print("-" * 100)# Trying to get the value of standard deviationstd_dev = valid_input(input_type=float,condition=lambda x: x >= 0,input_msg=("Enter the value of standard deviation""(Default value is 1.0 for all classes): "),err_msg="Standard deviation should not be negative!",default="1.0",)print("-" * 100)# Trying to get number of instances in classes and theirs means to generate# datasetcounts = [] # An empty list to store instance counts of classes in datasetfor i in range(n_classes):user_count = valid_input(input_type=int,condition=lambda x: x > 0,input_msg=(f"Enter The number of instances for class_{i+1}: "),err_msg="Number of instances should be positive!",)counts.append(user_count)print("-" * 100)# An empty list to store values of user-entered means of classesuser_means = []for a in range(n_classes):user_mean = valid_input(input_type=float,input_msg=(f"Enter the value of mean for class_{a+1}: "),err_msg="This is an invalid value.",)user_means.append(user_mean)print("-" * 100)print("Standard deviation: ", std_dev)# print out the number of instances in classes in separated linefor i, count in enumerate(counts, 1):print(f"Number of instances in class_{i} is: {count}")print("-" * 100)# print out mean values of classes separated linefor i, user_mean in enumerate(user_means, 1):print(f"Mean of class_{i} is: {user_mean}")print("-" * 100)# Generating training dataset drawn from gaussian distributionx = [gaussian_distribution(user_means[j], std_dev, counts[j])for j in range(n_classes)]print("Generated Normal Distribution: \n", x)print("-" * 100)# Generating Ys to detecting corresponding classesy = y_generator(n_classes, counts)print("Generated Corresponding Ys: \n", y)print("-" * 100)# Calculating the value of actual mean for each classactual_means = [calculate_mean(counts[k], x[k]) for k in range(n_classes)]# for loop iterates over number of elements in 'actual_means' list and print# out them in separated linefor i, actual_mean in enumerate(actual_means, 1):print(f"Actual(Real) mean of class_{i} is: {actual_mean}")print("-" * 100)# Calculating the value of probabilities for each classprobabilities = [calculate_probabilities(counts[i], sum(counts)) for i in range(n_classes)]# for loop iterates over number of elements in 'probabilities' list and print# out them in separated linefor i, probability in enumerate(probabilities, 1):print(f"Probability of class_{i} is: {probability}")print("-" * 100)# Calculating the values of variance for each classvariance = calculate_variance(x, actual_means, sum(counts))print("Variance: ", variance)print("-" * 100)# Predicting Y values# storing predicted Y values in 'pre_indexes' variablepre_indexes = predict_y_values(x, actual_means, variance, probabilities)print("-" * 100)# Calculating Accuracy of the modelprint(f"Accuracy: {accuracy(y, pre_indexes)}")print("-" * 100)print(" DONE ".center(100, "+"))if input("Press any key to restart or 'q' for quit: ").strip().lower() == "q":print("\n" + "GoodBye!".center(100, "-") + "\n")breaksystem("cls" if name == "nt" else "clear") # noqa: S605if __name__ == "__main__":main()
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。