machine_learning/linear_discriminant_analysis.py · codenotsleep/Python

代码拉取完成,页面将自动刷新

扫描微信二维码支付

取消

支付完成

richgiteeai

Watch

不关注关注所有动态仅关注版本发行动态关注但不提醒动态

1 Star 0 Fork 324

codenotsleep/Python

forked from 编程语言算法集/Python

代码 Issues 0 Pull Requests 0 Wiki 统计流水线

服务

加入 Gitee

与超过 1400万开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)

免费加入

已有帐号? 立即登录

文件

master

分支 (76)

管理

master

fix-ruff

pipx-install-pre-commit-ruff

ruff-rule-ISC001

Re-enable-tests

Rename-is_palindrome.py-to-is_int_palindrome.py

Add-more-ruff-rules

pre-commit-ci-update-config

ruff

atbash.py-Tighten-up-the-benchmarks

prime_numbers.py-Tighten-up-the-benchmarks

dynamic_programming

maths/sum_of_digits.py-Streamline-benchmarks

maths/number_of_digits.py-Streamline-benchmarks

Python-3.11

7804-improve-the-maths-add-functionality-and-tests

7782-create-subtraction-method-in-maths-folder

quantum_random.py.DISABLED.txt

quantum_random.py.disabled

revert-7349-patch-4

克隆/下载

HTTPS SSH SVN SVN+SSH 下载ZIP

提示

下载代码请复制以下命令到终端执行

为确保你提交的代码身份被 Gitee 正确识别,请执行以下命令完成配置

git config --global user.name userName 
git config --global user.email userEmail

初次使用 SSH 协议进行代码克隆、推送等操作时,需按下述提示完成 SSH 配置

1 生成 RSA 密钥

2 获取 RSA 公钥内容,并配置到 SSH公钥中

在 Gitee 上使用 SVN,请访问使用指南

使用 HTTPS 协议时,命令行会出现如下账号密码验证步骤。基于安全考虑,Gitee 建议配置并使用私人令牌替代登录密码进行克隆、推送等操作

Username for 'https://gitee.com': userName

Password for 'https://userName@gitee.com': # 私人令牌

分支 76

标签 0

Python

machine_learning

linear_discriminant_analysis.py

linear_discriminant_analysis.py 16.63 KB

"""
 Linear Discriminant Analysis

Assumptions About Data :
 1. The input variables has a gaussian distribution.
 2. The variance calculated for each input variables by class grouping is the
 same.
 3. The mix of classes in your training set is representative of the problem.

Learning The Model :
 The LDA model requires the estimation of statistics from the training data :
 1. Mean of each input value for each class.
 2. Probability of an instance belong to each class.
 3. Covariance for the input data for each class

Calculate the class means :
 mean(x) = 1/n ( for i = 1 to i = n --> sum(xi))

Calculate the class probabilities :
 P(y = 0) = count(y = 0) / (count(y = 0) + count(y = 1))
 P(y = 1) = count(y = 1) / (count(y = 0) + count(y = 1))

Calculate the variance :
 We can calculate the variance for dataset in two steps :
 1. Calculate the squared difference for each input variable from the
 group mean.
 2. Calculate the mean of the squared difference.
 ------------------------------------------------
 Squared_Difference = (x - mean(k)) ** 2
 Variance = (1 / (count(x) - count(classes))) *
 (for i = 1 to i = n --> sum(Squared_Difference(xi)))

Making Predictions :
 discriminant(x) = x * (mean / variance) -
 ((mean ** 2) / (2 * variance)) + Ln(probability)
 ---------------------------------------------------------------------------
 After calculating the discriminant value for each class, the class with the
 largest discriminant value is taken as the prediction.

Author: @EverLookNeverSee
"""
from collections.abc import Callable
from math import log
from os import name, system
from random import gauss, seed
from typing import TypeVar

# Make a training dataset drawn from a gaussian distribution
def gaussian_distribution(mean: float, std_dev: float, instance_count: int) -> list:
 """
 Generate gaussian distribution instances based-on given mean and standard deviation
 :param mean: mean value of class
 :param std_dev: value of standard deviation entered by usr or default value of it
 :param instance_count: instance number of class
 :return: a list containing generated values based-on given mean, std_dev and
 instance_count

>>> gaussian_distribution(5.0, 1.0, 20) # doctest: +NORMALIZE_WHITESPACE
 [6.288184753155463, 6.4494456086997705, 5.066335808938262, 4.235456349028368,
 3.9078267848958586, 5.031334516831717, 3.977896829989127, 3.56317055489747,
 5.199311976483754, 5.133374604658605, 5.546468300338232, 4.086029056264687,
 5.005005283626573, 4.935258239627312, 3.494170998739258, 5.537997178661033,
 5.320711100998849, 7.3891120432406865, 5.202969177309964, 4.855297691835079]
 """
 seed(1)
 return [gauss(mean, std_dev) for _ in range(instance_count)]

# Make corresponding Y flags to detecting classes
def y_generator(class_count: int, instance_count: list) -> list:
 """
 Generate y values for corresponding classes
 :param class_count: Number of classes(data groupings) in dataset
 :param instance_count: number of instances in class
 :return: corresponding values for data groupings in dataset

>>> y_generator(1, [10])
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
 >>> y_generator(2, [5, 10])
 [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
 >>> y_generator(4, [10, 5, 15, 20]) # doctest: +NORMALIZE_WHITESPACE
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]
 """

return [k for k in range(class_count) for _ in range(instance_count[k])]

# Calculate the class means
def calculate_mean(instance_count: int, items: list) -> float:
 """
 Calculate given class mean
 :param instance_count: Number of instances in class
 :param items: items that related to specific class(data grouping)
 :return: calculated actual mean of considered class

>>> items = gaussian_distribution(5.0, 1.0, 20)
 >>> calculate_mean(len(items), items)
 5.011267842911003
 """
 # the sum of all items divided by number of instances
 return sum(items) / instance_count

# Calculate the class probabilities
def calculate_probabilities(instance_count: int, total_count: int) -> float:
 """
 Calculate the probability that a given instance will belong to which class
 :param instance_count: number of instances in class
 :param total_count: the number of all instances
 :return: value of probability for considered class

>>> calculate_probabilities(20, 60)
 0.3333333333333333
 >>> calculate_probabilities(30, 100)
 0.3
 """
 # number of instances in specific class divided by number of all instances
 return instance_count / total_count

# Calculate the variance
def calculate_variance(items: list, means: list, total_count: int) -> float:
 """
 Calculate the variance
 :param items: a list containing all items(gaussian distribution of all classes)
 :param means: a list containing real mean values of each class
 :param total_count: the number of all instances
 :return: calculated variance for considered dataset

>>> items = gaussian_distribution(5.0, 1.0, 20)
 >>> means = [5.011267842911003]
 >>> total_count = 20
 >>> calculate_variance([items], means, total_count)
 0.9618530973487491
 """
 squared_diff = [] # An empty list to store all squared differences
 # iterate over number of elements in items
 for i in range(len(items)):
 # for loop iterates over number of elements in inner layer of items
 for j in range(len(items[i])):
 # appending squared differences to 'squared_diff' list
 squared_diff.append((items[i][j] - means[i]) ** 2)

# one divided by (the number of all instances - number of classes) multiplied by
 # sum of all squared differences
 n_classes = len(means) # Number of classes in dataset
 return 1 / (total_count - n_classes) * sum(squared_diff)

# Making predictions
def predict_y_values(
 x_items: list, means: list, variance: float, probabilities: list
) -> list:
 """This function predicts new indexes(groups for our data)
 :param x_items: a list containing all items(gaussian distribution of all classes)
 :param means: a list containing real mean values of each class
 :param variance: calculated value of variance by calculate_variance function
 :param probabilities: a list containing all probabilities of classes
 :return: a list containing predicted Y values

>>> x_items = [[6.288184753155463, 6.4494456086997705, 5.066335808938262,
 ... 4.235456349028368, 3.9078267848958586, 5.031334516831717,
 ... 3.977896829989127, 3.56317055489747, 5.199311976483754,
 ... 5.133374604658605, 5.546468300338232, 4.086029056264687,
 ... 5.005005283626573, 4.935258239627312, 3.494170998739258,
 ... 5.537997178661033, 5.320711100998849, 7.3891120432406865,
 ... 5.202969177309964, 4.855297691835079], [11.288184753155463,
 ... 11.44944560869977, 10.066335808938263, 9.235456349028368,
 ... 8.907826784895859, 10.031334516831716, 8.977896829989128,
 ... 8.56317055489747, 10.199311976483754, 10.133374604658606,
 ... 10.546468300338232, 9.086029056264687, 10.005005283626572,
 ... 9.935258239627313, 8.494170998739259, 10.537997178661033,
 ... 10.320711100998848, 12.389112043240686, 10.202969177309964,
 ... 9.85529769183508], [16.288184753155463, 16.449445608699772,
 ... 15.066335808938263, 14.235456349028368, 13.907826784895859,
 ... 15.031334516831716, 13.977896829989128, 13.56317055489747,
 ... 15.199311976483754, 15.133374604658606, 15.546468300338232,
 ... 14.086029056264687, 15.005005283626572, 14.935258239627313,
 ... 13.494170998739259, 15.537997178661033, 15.320711100998848,
 ... 17.389112043240686, 15.202969177309964, 14.85529769183508]]

>>> means = [5.011267842911003, 10.011267842911003, 15.011267842911002]
 >>> variance = 0.9618530973487494
 >>> probabilities = [0.3333333333333333, 0.3333333333333333, 0.3333333333333333]
 >>> predict_y_values(x_items, means, variance,
 ... probabilities) # doctest: +NORMALIZE_WHITESPACE
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 2, 2, 2, 2, 2, 2, 2, 2, 2]

"""
 # An empty list to store generated discriminant values of all items in dataset for
 # each class
 results = []
 # for loop iterates over number of elements in list
 for i in range(len(x_items)):
 # for loop iterates over number of inner items of each element
 for j in range(len(x_items[i])):
 temp = [] # to store all discriminant values of each item as a list
 # for loop iterates over number of classes we have in our dataset
 for k in range(len(x_items)):
 # appending values of discriminants for each class to 'temp' list
 temp.append(
 x_items[i][j] * (means[k] / variance)
 - (means[k] ** 2 / (2 * variance))
 + log(probabilities[k])
 )
 # appending discriminant values of each item to 'results' list
 results.append(temp)

return [result.index(max(result)) for result in results]

# Calculating Accuracy
def accuracy(actual_y: list, predicted_y: list) -> float:
 """
 Calculate the value of accuracy based-on predictions
 :param actual_y:a list containing initial Y values generated by 'y_generator'
 function
 :param predicted_y: a list containing predicted Y values generated by
 'predict_y_values' function
 :return: percentage of accuracy

>>> actual_y = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
 ... 1, 1 ,1 ,1 ,1 ,1 ,1]
 >>> predicted_y = [0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0,
 ... 0, 0, 1, 1, 1, 0, 1, 1, 1]
 >>> accuracy(actual_y, predicted_y)
 50.0

>>> actual_y = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
 ... 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
 >>> predicted_y = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
 ... 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
 >>> accuracy(actual_y, predicted_y)
 100.0
 """
 # iterate over one element of each list at a time (zip mode)
 # prediction is correct if actual Y value equals to predicted Y value
 correct = sum(1 for i, j in zip(actual_y, predicted_y) if i == j)
 # percentage of accuracy equals to number of correct predictions divided by number
 # of all data and multiplied by 100
 return (correct / len(actual_y)) * 100

num = TypeVar("num")

def valid_input(
 input_type: Callable[[object], num], # Usually float or int
 input_msg: str,
 err_msg: str,
 condition: Callable[[num], bool] = lambda x: True,
 default: str | None = None,
) -> num:
 """
 Ask for user value and validate that it fulfill a condition.

:input_type: user input expected type of value
 :input_msg: message to show user in the screen
 :err_msg: message to show in the screen in case of error
 :condition: function that represents the condition that user input is valid.
 :default: Default value in case the user does not type anything
 :return: user's input
 """
 while True:
 try:
 user_input = input_type(input(input_msg).strip() or default)
 if condition(user_input):
 return user_input
 else:
 print(f"{user_input}: {err_msg}")
 continue
 except ValueError:
 print(
 f"{user_input}: Incorrect input type, expected {input_type.__name__!r}"
 )

# Main Function
def main():
 """This function starts execution phase"""
 while True:
 print(" Linear Discriminant Analysis ".center(50, "*"))
 print("*" * 50, "\n")
 print("First of all we should specify the number of classes that")
 print("we want to generate as training dataset")
 # Trying to get number of classes
 n_classes = valid_input(
 input_type=int,
 condition=lambda x: x > 0,
 input_msg="Enter the number of classes (Data Groupings): ",
 err_msg="Number of classes should be positive!",
 )

print("-" * 100)

# Trying to get the value of standard deviation
 std_dev = valid_input(
 input_type=float,
 condition=lambda x: x >= 0,
 input_msg=(
 "Enter the value of standard deviation"
 "(Default value is 1.0 for all classes): "
 ),
 err_msg="Standard deviation should not be negative!",
 default="1.0",
 )

print("-" * 100)

# Trying to get number of instances in classes and theirs means to generate
 # dataset
 counts = [] # An empty list to store instance counts of classes in dataset
 for i in range(n_classes):
 user_count = valid_input(
 input_type=int,
 condition=lambda x: x > 0,
 input_msg=(f"Enter The number of instances for class_{i+1}: "),
 err_msg="Number of instances should be positive!",
 )
 counts.append(user_count)
 print("-" * 100)

# An empty list to store values of user-entered means of classes
 user_means = []
 for a in range(n_classes):
 user_mean = valid_input(
 input_type=float,
 input_msg=(f"Enter the value of mean for class_{a+1}: "),
 err_msg="This is an invalid value.",
 )
 user_means.append(user_mean)
 print("-" * 100)

print("Standard deviation: ", std_dev)
 # print out the number of instances in classes in separated line
 for i, count in enumerate(counts, 1):
 print(f"Number of instances in class_{i} is: {count}")
 print("-" * 100)

# print out mean values of classes separated line
 for i, user_mean in enumerate(user_means, 1):
 print(f"Mean of class_{i} is: {user_mean}")
 print("-" * 100)

# Generating training dataset drawn from gaussian distribution
 x = [
 gaussian_distribution(user_means[j], std_dev, counts[j])
 for j in range(n_classes)
 ]
 print("Generated Normal Distribution: \n", x)
 print("-" * 100)

# Generating Ys to detecting corresponding classes
 y = y_generator(n_classes, counts)
 print("Generated Corresponding Ys: \n", y)
 print("-" * 100)

# Calculating the value of actual mean for each class
 actual_means = [calculate_mean(counts[k], x[k]) for k in range(n_classes)]
 # for loop iterates over number of elements in 'actual_means' list and print
 # out them in separated line
 for i, actual_mean in enumerate(actual_means, 1):
 print(f"Actual(Real) mean of class_{i} is: {actual_mean}")
 print("-" * 100)

# Calculating the value of probabilities for each class
 probabilities = [
 calculate_probabilities(counts[i], sum(counts)) for i in range(n_classes)
 ]

# for loop iterates over number of elements in 'probabilities' list and print
 # out them in separated line
 for i, probability in enumerate(probabilities, 1):
 print(f"Probability of class_{i} is: {probability}")
 print("-" * 100)

# Calculating the values of variance for each class
 variance = calculate_variance(x, actual_means, sum(counts))
 print("Variance: ", variance)
 print("-" * 100)

# Predicting Y values
 # storing predicted Y values in 'pre_indexes' variable
 pre_indexes = predict_y_values(x, actual_means, variance, probabilities)
 print("-" * 100)

# Calculating Accuracy of the model
 print(f"Accuracy: {accuracy(y, pre_indexes)}")
 print("-" * 100)
 print(" DONE ".center(100, "+"))

if input("Press any key to restart or 'q' for quit: ").strip().lower() == "q":
 print("\n" + "GoodBye!".center(100, "-") + "\n")
 break
 system("cls" if name == "nt" else "clear") # noqa: S605

if __name__ == "__main__":
 main()

一键复制编辑原始数据按行查看历史

Christian Clauss 提交于 2023年04月28日 01:32 +08:00 . Solving the Top k most frequent words problem using a max-heap (#8685)

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406

"""
 Linear Discriminant Analysis



 Assumptions About Data :
 1. The input variables has a gaussian distribution.
 2. The variance calculated for each input variables by class grouping is the
 same.
 3. The mix of classes in your training set is representative of the problem.


 Learning The Model :
 The LDA model requires the estimation of statistics from the training data :
 1. Mean of each input value for each class.
 2. Probability of an instance belong to each class.
 3. Covariance for the input data for each class

 Calculate the class means :
 mean(x) = 1/n ( for i = 1 to i = n --> sum(xi))

 Calculate the class probabilities :
 P(y = 0) = count(y = 0) / (count(y = 0) + count(y = 1))
 P(y = 1) = count(y = 1) / (count(y = 0) + count(y = 1))

 Calculate the variance :
 We can calculate the variance for dataset in two steps :
 1. Calculate the squared difference for each input variable from the
 group mean.
 2. Calculate the mean of the squared difference.
 ------------------------------------------------
 Squared_Difference = (x - mean(k)) ** 2
 Variance = (1 / (count(x) - count(classes))) *
 (for i = 1 to i = n --> sum(Squared_Difference(xi)))

 Making Predictions :
 discriminant(x) = x * (mean / variance) -
 ((mean ** 2) / (2 * variance)) + Ln(probability)
 ---------------------------------------------------------------------------
 After calculating the discriminant value for each class, the class with the
 largest discriminant value is taken as the prediction.

 Author: @EverLookNeverSee
"""
from collections.abc import Callable
from math import log
from os import name, system
from random import gauss, seed
from typing import TypeVar


# Make a training dataset drawn from a gaussian distribution
def gaussian_distribution(mean: float, std_dev: float, instance_count: int) -> list:
 """
 Generate gaussian distribution instances based-on given mean and standard deviation
 :param mean: mean value of class
 :param std_dev: value of standard deviation entered by usr or default value of it
 :param instance_count: instance number of class
 :return: a list containing generated values based-on given mean, std_dev and
 instance_count

 >>> gaussian_distribution(5.0, 1.0, 20) # doctest: +NORMALIZE_WHITESPACE
 [6.288184753155463, 6.4494456086997705, 5.066335808938262, 4.235456349028368,
 3.9078267848958586, 5.031334516831717, 3.977896829989127, 3.56317055489747,
 5.199311976483754, 5.133374604658605, 5.546468300338232, 4.086029056264687,
 5.005005283626573, 4.935258239627312, 3.494170998739258, 5.537997178661033,
 5.320711100998849, 7.3891120432406865, 5.202969177309964, 4.855297691835079]
 """
 seed(1)
 return [gauss(mean, std_dev) for _ in range(instance_count)]


# Make corresponding Y flags to detecting classes
def y_generator(class_count: int, instance_count: list) -> list:
 """
 Generate y values for corresponding classes
 :param class_count: Number of classes(data groupings) in dataset
 :param instance_count: number of instances in class
 :return: corresponding values for data groupings in dataset

 >>> y_generator(1, [10])
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
 >>> y_generator(2, [5, 10])
 [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
 >>> y_generator(4, [10, 5, 15, 20]) # doctest: +NORMALIZE_WHITESPACE
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]
 """

 return [k for k in range(class_count) for _ in range(instance_count[k])]


# Calculate the class means
def calculate_mean(instance_count: int, items: list) -> float:
 """
 Calculate given class mean
 :param instance_count: Number of instances in class
 :param items: items that related to specific class(data grouping)
 :return: calculated actual mean of considered class

 >>> items = gaussian_distribution(5.0, 1.0, 20)
 >>> calculate_mean(len(items), items)
 5.011267842911003
 """
 # the sum of all items divided by number of instances
 return sum(items) / instance_count


# Calculate the class probabilities
def calculate_probabilities(instance_count: int, total_count: int) -> float:
 """
 Calculate the probability that a given instance will belong to which class
 :param instance_count: number of instances in class
 :param total_count: the number of all instances
 :return: value of probability for considered class

 >>> calculate_probabilities(20, 60)
 0.3333333333333333
 >>> calculate_probabilities(30, 100)
 0.3
 """
 # number of instances in specific class divided by number of all instances
 return instance_count / total_count


# Calculate the variance
def calculate_variance(items: list, means: list, total_count: int) -> float:
 """
 Calculate the variance
 :param items: a list containing all items(gaussian distribution of all classes)
 :param means: a list containing real mean values of each class
 :param total_count: the number of all instances
 :return: calculated variance for considered dataset

 >>> items = gaussian_distribution(5.0, 1.0, 20)
 >>> means = [5.011267842911003]
 >>> total_count = 20
 >>> calculate_variance([items], means, total_count)
 0.9618530973487491
 """
 squared_diff = [] # An empty list to store all squared differences
 # iterate over number of elements in items
 for i in range(len(items)):
 # for loop iterates over number of elements in inner layer of items
 for j in range(len(items[i])):
 # appending squared differences to 'squared_diff' list
 squared_diff.append((items[i][j] - means[i]) ** 2)

 # one divided by (the number of all instances - number of classes) multiplied by
 # sum of all squared differences
 n_classes = len(means) # Number of classes in dataset
 return 1 / (total_count - n_classes) * sum(squared_diff)


# Making predictions
def predict_y_values(
 x_items: list, means: list, variance: float, probabilities: list
) -> list:
 """This function predicts new indexes(groups for our data)
 :param x_items: a list containing all items(gaussian distribution of all classes)
 :param means: a list containing real mean values of each class
 :param variance: calculated value of variance by calculate_variance function
 :param probabilities: a list containing all probabilities of classes
 :return: a list containing predicted Y values

 >>> x_items = [[6.288184753155463, 6.4494456086997705, 5.066335808938262,
 ... 4.235456349028368, 3.9078267848958586, 5.031334516831717,
 ... 3.977896829989127, 3.56317055489747, 5.199311976483754,
 ... 5.133374604658605, 5.546468300338232, 4.086029056264687,
 ... 5.005005283626573, 4.935258239627312, 3.494170998739258,
 ... 5.537997178661033, 5.320711100998849, 7.3891120432406865,
 ... 5.202969177309964, 4.855297691835079], [11.288184753155463,
 ... 11.44944560869977, 10.066335808938263, 9.235456349028368,
 ... 8.907826784895859, 10.031334516831716, 8.977896829989128,
 ... 8.56317055489747, 10.199311976483754, 10.133374604658606,
 ... 10.546468300338232, 9.086029056264687, 10.005005283626572,
 ... 9.935258239627313, 8.494170998739259, 10.537997178661033,
 ... 10.320711100998848, 12.389112043240686, 10.202969177309964,
 ... 9.85529769183508], [16.288184753155463, 16.449445608699772,
 ... 15.066335808938263, 14.235456349028368, 13.907826784895859,
 ... 15.031334516831716, 13.977896829989128, 13.56317055489747,
 ... 15.199311976483754, 15.133374604658606, 15.546468300338232,
 ... 14.086029056264687, 15.005005283626572, 14.935258239627313,
 ... 13.494170998739259, 15.537997178661033, 15.320711100998848,
 ... 17.389112043240686, 15.202969177309964, 14.85529769183508]]

 >>> means = [5.011267842911003, 10.011267842911003, 15.011267842911002]
 >>> variance = 0.9618530973487494
 >>> probabilities = [0.3333333333333333, 0.3333333333333333, 0.3333333333333333]
 >>> predict_y_values(x_items, means, variance,
 ... probabilities) # doctest: +NORMALIZE_WHITESPACE
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 2, 2, 2, 2, 2, 2, 2, 2, 2]

 """
 # An empty list to store generated discriminant values of all items in dataset for
 # each class
 results = []
 # for loop iterates over number of elements in list
 for i in range(len(x_items)):
 # for loop iterates over number of inner items of each element
 for j in range(len(x_items[i])):
 temp = [] # to store all discriminant values of each item as a list
 # for loop iterates over number of classes we have in our dataset
 for k in range(len(x_items)):
 # appending values of discriminants for each class to 'temp' list
 temp.append(
 x_items[i][j] * (means[k] / variance)
 - (means[k] ** 2 / (2 * variance))
 + log(probabilities[k])
 )
 # appending discriminant values of each item to 'results' list
 results.append(temp)

 return [result.index(max(result)) for result in results]


# Calculating Accuracy
def accuracy(actual_y: list, predicted_y: list) -> float:
 """
 Calculate the value of accuracy based-on predictions
 :param actual_y:a list containing initial Y values generated by 'y_generator'
 function
 :param predicted_y: a list containing predicted Y values generated by
 'predict_y_values' function
 :return: percentage of accuracy

 >>> actual_y = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
 ... 1, 1 ,1 ,1 ,1 ,1 ,1]
 >>> predicted_y = [0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0,
 ... 0, 0, 1, 1, 1, 0, 1, 1, 1]
 >>> accuracy(actual_y, predicted_y)
 50.0

 >>> actual_y = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
 ... 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
 >>> predicted_y = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
 ... 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
 >>> accuracy(actual_y, predicted_y)
 100.0
 """
 # iterate over one element of each list at a time (zip mode)
 # prediction is correct if actual Y value equals to predicted Y value
 correct = sum(1 for i, j in zip(actual_y, predicted_y) if i == j)
 # percentage of accuracy equals to number of correct predictions divided by number
 # of all data and multiplied by 100
 return (correct / len(actual_y)) * 100


num = TypeVar("num")


def valid_input(
 input_type: Callable[[object], num], # Usually float or int
 input_msg: str,
 err_msg: str,
 condition: Callable[[num], bool] = lambda x: True,
 default: str | None = None,
) -> num:
 """
 Ask for user value and validate that it fulfill a condition.

 :input_type: user input expected type of value
 :input_msg: message to show user in the screen
 :err_msg: message to show in the screen in case of error
 :condition: function that represents the condition that user input is valid.
 :default: Default value in case the user does not type anything
 :return: user's input
 """
 while True:
 try:
 user_input = input_type(input(input_msg).strip() or default)
 if condition(user_input):
 return user_input
 else:
 print(f"{user_input}: {err_msg}")
 continue
 except ValueError:
 print(
 f"{user_input}: Incorrect input type, expected {input_type.__name__!r}"
 )


# Main Function
def main():
 """This function starts execution phase"""
 while True:
 print(" Linear Discriminant Analysis ".center(50, "*"))
 print("*" * 50, "\n")
 print("First of all we should specify the number of classes that")
 print("we want to generate as training dataset")
 # Trying to get number of classes
 n_classes = valid_input(
 input_type=int,
 condition=lambda x: x > 0,
 input_msg="Enter the number of classes (Data Groupings): ",
 err_msg="Number of classes should be positive!",
 )

 print("-" * 100)

 # Trying to get the value of standard deviation
 std_dev = valid_input(
 input_type=float,
 condition=lambda x: x >= 0,
 input_msg=(
 "Enter the value of standard deviation"
 "(Default value is 1.0 for all classes): "
 ),
 err_msg="Standard deviation should not be negative!",
 default="1.0",
 )

 print("-" * 100)

 # Trying to get number of instances in classes and theirs means to generate
 # dataset
 counts = [] # An empty list to store instance counts of classes in dataset
 for i in range(n_classes):
 user_count = valid_input(
 input_type=int,
 condition=lambda x: x > 0,
 input_msg=(f"Enter The number of instances for class_{i+1}: "),
 err_msg="Number of instances should be positive!",
 )
 counts.append(user_count)
 print("-" * 100)

 # An empty list to store values of user-entered means of classes
 user_means = []
 for a in range(n_classes):
 user_mean = valid_input(
 input_type=float,
 input_msg=(f"Enter the value of mean for class_{a+1}: "),
 err_msg="This is an invalid value.",
 )
 user_means.append(user_mean)
 print("-" * 100)

 print("Standard deviation: ", std_dev)
 # print out the number of instances in classes in separated line
 for i, count in enumerate(counts, 1):
 print(f"Number of instances in class_{i} is: {count}")
 print("-" * 100)

 # print out mean values of classes separated line
 for i, user_mean in enumerate(user_means, 1):
 print(f"Mean of class_{i} is: {user_mean}")
 print("-" * 100)

 # Generating training dataset drawn from gaussian distribution
 x = [
 gaussian_distribution(user_means[j], std_dev, counts[j])
 for j in range(n_classes)
 ]
 print("Generated Normal Distribution: \n", x)
 print("-" * 100)

 # Generating Ys to detecting corresponding classes
 y = y_generator(n_classes, counts)
 print("Generated Corresponding Ys: \n", y)
 print("-" * 100)

 # Calculating the value of actual mean for each class
 actual_means = [calculate_mean(counts[k], x[k]) for k in range(n_classes)]
 # for loop iterates over number of elements in 'actual_means' list and print
 # out them in separated line
 for i, actual_mean in enumerate(actual_means, 1):
 print(f"Actual(Real) mean of class_{i} is: {actual_mean}")
 print("-" * 100)

 # Calculating the value of probabilities for each class
 probabilities = [
 calculate_probabilities(counts[i], sum(counts)) for i in range(n_classes)
 ]

 # for loop iterates over number of elements in 'probabilities' list and print
 # out them in separated line
 for i, probability in enumerate(probabilities, 1):
 print(f"Probability of class_{i} is: {probability}")
 print("-" * 100)

 # Calculating the values of variance for each class
 variance = calculate_variance(x, actual_means, sum(counts))
 print("Variance: ", variance)
 print("-" * 100)

 # Predicting Y values
 # storing predicted Y values in 'pre_indexes' variable
 pre_indexes = predict_y_values(x, actual_means, variance, probabilities)
 print("-" * 100)

 # Calculating Accuracy of the model
 print(f"Accuracy: {accuracy(y, pre_indexes)}")
 print("-" * 100)
 print(" DONE ".center(100, "+"))

 if input("Press any key to restart or 'q' for quit: ").strip().lower() == "q":
 print("\n" + "GoodBye!".center(100, "-") + "\n")
 break
 system("cls" if name == "nt" else "clear") # noqa: S605


if __name__ == "__main__":
 main()