I have different function for different task below this function. To reduce the code, I have written following function.
Is this the best way to apply OOPs? or any better way to do?
Function
def ML_algorithm(algo_name, y_train, y_test, y_pred_algoname, y_predtest_algoname):
print(algo_name)
print()
err=mean_squared_error(y_train, y_pred_algoname)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, y_predtest_algoname)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, y_pred_algoname)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, y_predtest_algoname)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train,y_pred_algoname)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test,y_predtest_algoname)
print("R2 TEST : "+str(err))
print()
print()
Actual code
print("LINEAR REGRESSION")
print()
err=mean_squared_error(y_train, y_predlr)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, y_predtestlr)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, y_predlr)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, y_predtestlr)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train,y_predlr)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test,y_predtestlr)
print("R2 TEST : "+str(err))
print()
print()
print("RANDOM FOREST REGRESSION")
print()
err=mean_squared_error(y_train, y_predrfr)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, y_predrfrtest)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, y_predrfr)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, y_predrfrtest)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train, y_predrfr)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test, y_predrfrtest)
print("R2 TEST : "+str(err))
print()
print()
print("RANDOM FOREST REGRESSION 2")
print()
err=mean_squared_error(y_train, y_predrfr2)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, y_predrfr2test)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, y_predrfr2)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, y_predrfr2test)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train, y_predrfr2)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test, y_predrfr2test)
print("R2 TEST : "+str(err))
print()
print()
print("XGBOOST")
print()
err=mean_squared_error(y_train, y_predxgb)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, y_predxgbtest)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, y_predxgb)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, y_predxgbtest)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train, y_predxgb)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test, y_predxgbtest)
print("R2 TEST : "+str(err))
print()
print()
print("SVM")
print()
err=mean_squared_error(y_train, ypredsvm)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, ypredsvmtest)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, ypredsvm)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, ypredsvmtest)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train, ypredsvm)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test, ypredsvmtest)
print("R2 TEST : "+str(err))
print()
print()
print("Bayesian")
print()
err=mean_squared_error(y_train, ypredbayesian)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, ypredbayesiantest)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, ypredbayesian)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, ypredbayesiantest)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train, ypredbayesian)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test, ypredbayesiantest)
print("R2 TEST : "+str(err))
print()
print()
print("SGD")
print()
err=mean_squared_error(y_train, ypredsgd)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, ypredsgdtest)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, ypredsgd)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, ypredsgdtest)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train, ypredsgd)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test, ypredsgdtest)
print("R2 TEST : "+str(err))
print()
print()
print("Decision Tree")
print()
err=mean_squared_error(y_train, ypreddectree)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, ypreddectreetest)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, ypreddectree)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, ypreddectreetest)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train, ypreddectree)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test, ypreddectreetest)
print("R2 TEST : "+str(err))
print()
print()
print("Neural Network")
print()
err=mean_squared_error(y_train, ypredneural)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, ypredneuraltest)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, ypredneural)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, ypredneuraltest)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train, ypredneural)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test, ypredneuraltest)
print("R2 TEST : "+str(err))
print()
print()
print("Lasso")
print()
err=mean_squared_error(y_train, ypredlaso)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, ypredlasotest)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, ypredlaso)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, ypredlasotest)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train, ypredlaso)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test, ypredlasotest)
print("R2 TEST : "+str(err))
print()
print()
-
3\$\begingroup\$ The current question title, which states your concerns about the code, applies to too many questions on this site to be useful. The site standard is for the title to simply state the task accomplished by the code. Please see How do I ask a good question?. \$\endgroup\$BCdotWEB– BCdotWEB2021年01月15日 17:27:47 +00:00Commented Jan 15, 2021 at 17:27
-
1\$\begingroup\$ What exactly does your code do? \$\endgroup\$pacmaninbw– pacmaninbw ♦2021年01月16日 12:42:07 +00:00Commented Jan 16, 2021 at 12:42
-
\$\begingroup\$ Next time, please add a description of what the code is supposed to be doing to the question. \$\endgroup\$Mast– Mast ♦2021年01月17日 19:12:25 +00:00Commented Jan 17, 2021 at 19:12
2 Answers 2
I would add some iteration to this, to reduce parts where you repeat yourself. Note that there are two things changing, the statistic you output (and its name) and whether you show it with the train or test results. The former is always constant, while the latter depends on your results, so I would pull the former into a global constant and build the latter within the function. Something like this:
STATISTICS = {"RMSE": sqrt_mean_squared_error,
"EXPLAINED_VARIANCE": explained_variance_score,
"R2": r2_score}
def sqrt_mean_squared_error(x):
return np.sqrt(mean_squared_error(x))
def print_performance(algo_name, y_train, y_test, y_train_pred, y_test_pred):
print(algo_name + '\n')
results = [("TRAIN", y_train, y_train_pred),
("TEST", y_test, y_test_pred)]
for stat_name, stat in STATISTICS.items():
for name, y, y_pred in results:
print(f"{stat_name} {name} : {stat(y, y_pred)}")
print('\n')
Note that I used an f-string to concisely output, which in this case is equivalent to:
print(stat_name, name, ":", stat(y, y_pred))
Also, Python's official style-guide, PEP8, recommends using four spaces as indentation.
The thing you're doing isn't OOP (object-oriented programming), and that's probably ok. OOP is just one way of structuring complicated programs, and I don't personally like the pure form of it much anyway.
Your idea of making a function to handle repetitive code is good, but you're still basically writing imperative code. The function you've def
ined is a good example of what used to be called a "subroutine"; you can still make it into a "function".
- Functions take arguments and
return
something that depended on those arguments. - In their pure form they don't do anything else. As you'll see, there's a lot of room for grey here.
- Ideally, it should only be "possible" to call a function with valid arguments. (Scare quotes because this is python, which is basically php in a fancy suite.)
- Handle printing separately from computation. You're going to violate this all the time while you troubleshoot stuff, but remember to clean up after yourself.
- There's just plain a lot of detailed language features in any good language that can help you write better. Keep learning👍
I'm using NamedTuple
below because I haven't gotten around to learning the ins&outs of dataclass
s yet, but I understand dataclass
is actually better for most situations. NamedTuple
requires introducing some type-hints, which are good, but which only do anything if you're using a type-checker like mypy.
from typing import NamedTuple
class ErrorScores(NamedTuple):
root_mean_square: float # I assume?
explained_variance: float
r2: float
def error_scores(data, predicate):
return ErrorScores(
root_mean_square=np.sqrt(mean_squared_error(data, predicate)),
explained_variance=explained_variance_score(data, predicate),
r2=r2_score(data, predicate)
)
class Algorithm(NamedTuple):
name: str # I'm just assuming these are strings, IDK
predicate_name: str
predicate_test_name: str
# There are a lot of variables here that IDK where they're coming from. It's suspiciously repetitive.
LINEAR_REGRESSION = Algorithm("LINEAR REGRESSION", y_predlr, y_predtestlr)
RANDOM_FOREST_REGRESSION = Algorithm("RANDOM FOREST REGRESSION", y_predrfr, y_predrfrtest)
RANDOM_FOREST_REGRESSION 2 = Algorithm("RANDOM FOREST REGRESSION 2", y_predrfr2, y_predrfr2test)
XGBOOST = Algorithm("XGBOOST", y_predxgb, y_predxgbtest)
SVM = Algorithm("SVM", ypredsvm, ypredsvmtest)
BAYESIAN = Algorithm("Bayesian", ypredbayesian, ypredbayesiantest)
SGD = Algorithm("SGD", ypredsgd, ypredsgdtest)
DECISION_TREE = Algorithm("Decision Tree", ypreddectree, ypreddectreetest)
NEURAL_NETWORK = Algorithm("Neural Network", ypredneural, ypredneuraltest)
LASSO = Algorithm("Lasso", ypredlaso, ypredlasotest)
def print_errors(algorithm: Algorithm, y_train, y_test):
training_errors = error_scores(y_train, algorithm.predicate_name)
testing_errors = error_scores(y_test, algorithm.predicate_test_name)
print('\n'.join((
f'{algorithm.name}',
f'RMSE TRAIN : {training_errors.root_mean_square}',
f'RMSE TEST : {testing_errors.root_mean_square}',
f'EXPLAINED VARIANCE TRAIN : {training_errors.explained_variance}',
f'EXPLAINED VARIANCE TEST : {testing_errors.explained_variance}',
f'R2 TRAIN : {training_errors.r2}',
f'R2 TEST : {testing_errors.r2}'
)))
def print_errors_for_all(y_train, y_test):
algorithms = (LINEAR_REGRESSION,
RANDOM_FOREST_REGRESSION,
RANDOM_FOREST_REGRESSION_2,
XGBOOST,
SVM,
BAYESIAN,
SGD,
DECISION_TREE,
NEURAL_NETWORK,
LASSO)
for algorithm in algorithms:
print_errors(algorithm, y_train, y_test)
Without knowing the surrounding context, this is probably good enough.