2
\$\begingroup\$

I have different function for different task below this function. To reduce the code, I have written following function.

Is this the best way to apply OOPs? or any better way to do?

Function

def ML_algorithm(algo_name, y_train, y_test, y_pred_algoname, y_predtest_algoname):
 print(algo_name)
 print()
 err=mean_squared_error(y_train, y_pred_algoname)
 print("RMSE TRAIN : "+str(np.sqrt(err)))
 err=mean_squared_error(y_test, y_predtest_algoname)
 print("RMSE TEST : "+str(np.sqrt(err)))
 err=explained_variance_score(y_train, y_pred_algoname)
 print("EXPLAINED VARIANCE TRAIN : "+str(err))
 err=explained_variance_score(y_test, y_predtest_algoname)
 print("EXPLAINED VARIANCE TEST : "+str(err))
 err=r2_score(y_train,y_pred_algoname)
 print("R2 TRAIN : "+str(err))
 err=r2_score(y_test,y_predtest_algoname)
 print("R2 TEST : "+str(err))
 print()
 print()

Actual code

print("LINEAR REGRESSION")
print()
err=mean_squared_error(y_train, y_predlr)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, y_predtestlr)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, y_predlr)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, y_predtestlr)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train,y_predlr)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test,y_predtestlr)
print("R2 TEST : "+str(err))
print()
print()
print("RANDOM FOREST REGRESSION")
print()
err=mean_squared_error(y_train, y_predrfr)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, y_predrfrtest)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, y_predrfr)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, y_predrfrtest)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train, y_predrfr)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test, y_predrfrtest)
print("R2 TEST : "+str(err))
print()
print()
print("RANDOM FOREST REGRESSION 2")
print()
err=mean_squared_error(y_train, y_predrfr2)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, y_predrfr2test)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, y_predrfr2)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, y_predrfr2test)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train, y_predrfr2)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test, y_predrfr2test)
print("R2 TEST : "+str(err))
print()
print()
print("XGBOOST")
print()
err=mean_squared_error(y_train, y_predxgb)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, y_predxgbtest)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, y_predxgb)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, y_predxgbtest)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train, y_predxgb)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test, y_predxgbtest)
print("R2 TEST : "+str(err))
print()
print()
print("SVM")
print()
err=mean_squared_error(y_train, ypredsvm)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, ypredsvmtest)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, ypredsvm)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, ypredsvmtest)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train, ypredsvm)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test, ypredsvmtest)
print("R2 TEST : "+str(err))
print()
print()
print("Bayesian")
print()
err=mean_squared_error(y_train, ypredbayesian)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, ypredbayesiantest)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, ypredbayesian)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, ypredbayesiantest)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train, ypredbayesian)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test, ypredbayesiantest)
print("R2 TEST : "+str(err))
print()
print()
print("SGD")
print()
err=mean_squared_error(y_train, ypredsgd)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, ypredsgdtest)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, ypredsgd)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, ypredsgdtest)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train, ypredsgd)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test, ypredsgdtest)
print("R2 TEST : "+str(err))
print()
print()
print("Decision Tree")
print()
err=mean_squared_error(y_train, ypreddectree)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, ypreddectreetest)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, ypreddectree)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, ypreddectreetest)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train, ypreddectree)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test, ypreddectreetest)
print("R2 TEST : "+str(err))
print()
print()
print("Neural Network")
print()
err=mean_squared_error(y_train, ypredneural)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, ypredneuraltest)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, ypredneural)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, ypredneuraltest)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train, ypredneural)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test, ypredneuraltest)
print("R2 TEST : "+str(err))
print()
print()
print("Lasso")
print()
err=mean_squared_error(y_train, ypredlaso)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, ypredlasotest)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, ypredlaso)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, ypredlasotest)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train, ypredlaso)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test, ypredlasotest)
print("R2 TEST : "+str(err))
print()
print()
Reinderien
71k5 gold badges76 silver badges256 bronze badges
asked Jan 15, 2021 at 13:41
\$\endgroup\$
3
  • 3
    \$\begingroup\$ The current question title, which states your concerns about the code, applies to too many questions on this site to be useful. The site standard is for the title to simply state the task accomplished by the code. Please see How do I ask a good question?. \$\endgroup\$ Commented Jan 15, 2021 at 17:27
  • 1
    \$\begingroup\$ What exactly does your code do? \$\endgroup\$ Commented Jan 16, 2021 at 12:42
  • \$\begingroup\$ Next time, please add a description of what the code is supposed to be doing to the question. \$\endgroup\$ Commented Jan 17, 2021 at 19:12

2 Answers 2

3
\$\begingroup\$

I would add some iteration to this, to reduce parts where you repeat yourself. Note that there are two things changing, the statistic you output (and its name) and whether you show it with the train or test results. The former is always constant, while the latter depends on your results, so I would pull the former into a global constant and build the latter within the function. Something like this:

STATISTICS = {"RMSE": sqrt_mean_squared_error,
 "EXPLAINED_VARIANCE": explained_variance_score,
 "R2": r2_score}
def sqrt_mean_squared_error(x):
 return np.sqrt(mean_squared_error(x))
def print_performance(algo_name, y_train, y_test, y_train_pred, y_test_pred):
 print(algo_name + '\n')
 results = [("TRAIN", y_train, y_train_pred),
 ("TEST", y_test, y_test_pred)]
 for stat_name, stat in STATISTICS.items():
 for name, y, y_pred in results:
 print(f"{stat_name} {name} : {stat(y, y_pred)}")
 print('\n')

Note that I used an f-string to concisely output, which in this case is equivalent to:

print(stat_name, name, ":", stat(y, y_pred))

Also, Python's official style-guide, PEP8, recommends using four spaces as indentation.

answered Jan 15, 2021 at 16:18
\$\endgroup\$
0
1
\$\begingroup\$

The thing you're doing isn't OOP (object-oriented programming), and that's probably ok. OOP is just one way of structuring complicated programs, and I don't personally like the pure form of it much anyway.

Your idea of making a function to handle repetitive code is good, but you're still basically writing imperative code. The function you've defined is a good example of what used to be called a "subroutine"; you can still make it into a "function".

  • Functions take arguments and return something that depended on those arguments.
  • In their pure form they don't do anything else. As you'll see, there's a lot of room for grey here.
  • Ideally, it should only be "possible" to call a function with valid arguments. (Scare quotes because this is python, which is basically php in a fancy suite.)
  • Handle printing separately from computation. You're going to violate this all the time while you troubleshoot stuff, but remember to clean up after yourself.
  • There's just plain a lot of detailed language features in any good language that can help you write better. Keep learning👍

I'm using NamedTuple below because I haven't gotten around to learning the ins&outs of dataclasss yet, but I understand dataclass is actually better for most situations. NamedTuple requires introducing some type-hints, which are good, but which only do anything if you're using a type-checker like mypy.

from typing import NamedTuple
class ErrorScores(NamedTuple):
 root_mean_square: float # I assume?
 explained_variance: float
 r2: float
def error_scores(data, predicate):
 return ErrorScores(
 root_mean_square=np.sqrt(mean_squared_error(data, predicate)),
 explained_variance=explained_variance_score(data, predicate),
 r2=r2_score(data, predicate)
 )
class Algorithm(NamedTuple):
 name: str # I'm just assuming these are strings, IDK
 predicate_name: str
 predicate_test_name: str
# There are a lot of variables here that IDK where they're coming from. It's suspiciously repetitive. 
LINEAR_REGRESSION = Algorithm("LINEAR REGRESSION", y_predlr, y_predtestlr)
RANDOM_FOREST_REGRESSION = Algorithm("RANDOM FOREST REGRESSION", y_predrfr, y_predrfrtest)
RANDOM_FOREST_REGRESSION 2 = Algorithm("RANDOM FOREST REGRESSION 2", y_predrfr2, y_predrfr2test)
XGBOOST = Algorithm("XGBOOST", y_predxgb, y_predxgbtest)
SVM = Algorithm("SVM", ypredsvm, ypredsvmtest)
BAYESIAN = Algorithm("Bayesian", ypredbayesian, ypredbayesiantest)
SGD = Algorithm("SGD", ypredsgd, ypredsgdtest)
DECISION_TREE = Algorithm("Decision Tree", ypreddectree, ypreddectreetest)
NEURAL_NETWORK = Algorithm("Neural Network", ypredneural, ypredneuraltest)
LASSO = Algorithm("Lasso", ypredlaso, ypredlasotest)
def print_errors(algorithm: Algorithm, y_train, y_test):
 training_errors = error_scores(y_train, algorithm.predicate_name)
 testing_errors = error_scores(y_test, algorithm.predicate_test_name)
 print('\n'.join((
 f'{algorithm.name}',
 f'RMSE TRAIN : {training_errors.root_mean_square}',
 f'RMSE TEST : {testing_errors.root_mean_square}',
 f'EXPLAINED VARIANCE TRAIN : {training_errors.explained_variance}',
 f'EXPLAINED VARIANCE TEST : {testing_errors.explained_variance}',
 f'R2 TRAIN : {training_errors.r2}',
 f'R2 TEST : {testing_errors.r2}'
 )))
def print_errors_for_all(y_train, y_test):
 algorithms = (LINEAR_REGRESSION,
 RANDOM_FOREST_REGRESSION,
 RANDOM_FOREST_REGRESSION_2,
 XGBOOST,
 SVM,
 BAYESIAN,
 SGD,
 DECISION_TREE,
 NEURAL_NETWORK,
 LASSO)
 for algorithm in algorithms:
 print_errors(algorithm, y_train, y_test)

Without knowing the surrounding context, this is probably good enough.

answered Jan 15, 2021 at 20:46
\$\endgroup\$
0

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.