How to convert traditional code to reusable function

Question 1

I have different function for different task below this function. To reduce the code, I have written following function.

Is this the best way to apply OOPs? or any better way to do?

Function

def ML_algorithm(algo_name, y_train, y_test, y_pred_algoname, y_predtest_algoname):
 print(algo_name)
 print()
 err=mean_squared_error(y_train, y_pred_algoname)
 print("RMSE TRAIN : "+str(np.sqrt(err)))
 err=mean_squared_error(y_test, y_predtest_algoname)
 print("RMSE TEST : "+str(np.sqrt(err)))
 err=explained_variance_score(y_train, y_pred_algoname)
 print("EXPLAINED VARIANCE TRAIN : "+str(err))
 err=explained_variance_score(y_test, y_predtest_algoname)
 print("EXPLAINED VARIANCE TEST : "+str(err))
 err=r2_score(y_train,y_pred_algoname)
 print("R2 TRAIN : "+str(err))
 err=r2_score(y_test,y_predtest_algoname)
 print("R2 TEST : "+str(err))
 print()
 print()

Actual code

print("LINEAR REGRESSION")
print()
err=mean_squared_error(y_train, y_predlr)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, y_predtestlr)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, y_predlr)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, y_predtestlr)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train,y_predlr)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test,y_predtestlr)
print("R2 TEST : "+str(err))
print()
print()
print("RANDOM FOREST REGRESSION")
print()
err=mean_squared_error(y_train, y_predrfr)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, y_predrfrtest)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, y_predrfr)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, y_predrfrtest)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train, y_predrfr)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test, y_predrfrtest)
print("R2 TEST : "+str(err))
print()
print()
print("RANDOM FOREST REGRESSION 2")
print()
err=mean_squared_error(y_train, y_predrfr2)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, y_predrfr2test)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, y_predrfr2)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, y_predrfr2test)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train, y_predrfr2)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test, y_predrfr2test)
print("R2 TEST : "+str(err))
print()
print()
print("XGBOOST")
print()
err=mean_squared_error(y_train, y_predxgb)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, y_predxgbtest)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, y_predxgb)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, y_predxgbtest)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train, y_predxgb)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test, y_predxgbtest)
print("R2 TEST : "+str(err))
print()
print()
print("SVM")
print()
err=mean_squared_error(y_train, ypredsvm)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, ypredsvmtest)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, ypredsvm)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, ypredsvmtest)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train, ypredsvm)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test, ypredsvmtest)
print("R2 TEST : "+str(err))
print()
print()
print("Bayesian")
print()
err=mean_squared_error(y_train, ypredbayesian)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, ypredbayesiantest)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, ypredbayesian)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, ypredbayesiantest)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train, ypredbayesian)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test, ypredbayesiantest)
print("R2 TEST : "+str(err))
print()
print()
print("SGD")
print()
err=mean_squared_error(y_train, ypredsgd)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, ypredsgdtest)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, ypredsgd)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, ypredsgdtest)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train, ypredsgd)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test, ypredsgdtest)
print("R2 TEST : "+str(err))
print()
print()
print("Decision Tree")
print()
err=mean_squared_error(y_train, ypreddectree)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, ypreddectreetest)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, ypreddectree)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, ypreddectreetest)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train, ypreddectree)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test, ypreddectreetest)
print("R2 TEST : "+str(err))
print()
print()
print("Neural Network")
print()
err=mean_squared_error(y_train, ypredneural)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, ypredneuraltest)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, ypredneural)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, ypredneuraltest)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train, ypredneural)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test, ypredneuraltest)
print("R2 TEST : "+str(err))
print()
print()
print("Lasso")
print()
err=mean_squared_error(y_train, ypredlaso)
print("RMSE TRAIN : "+str(np.sqrt(err)))
err=mean_squared_error(y_test, ypredlasotest)
print("RMSE TEST : "+str(np.sqrt(err)))
err=explained_variance_score(y_train, ypredlaso)
print("EXPLAINED VARIANCE TRAIN : "+str(err))
err=explained_variance_score(y_test, ypredlasotest)
print("EXPLAINED VARIANCE TEST : "+str(err))
err=r2_score(y_train, ypredlaso)
print("R2 TRAIN : "+str(err))
err=r2_score(y_test, ypredlasotest)
print("R2 TEST : "+str(err))
print()
print()

Question 2

The current question title, which states your concerns about the code, applies to too many questions on this site to be useful. The site standard is for the title to simply state the task accomplished by the code. Please see How do I ask a good question?.

Question 3

What exactly does your code do?

Question 4

Next time, please add a description of what the code is supposed to be doing to the question.

Question 5

I would add some iteration to this, to reduce parts where you repeat yourself. Note that there are two things changing, the statistic you output (and its name) and whether you show it with the train or test results. The former is always constant, while the latter depends on your results, so I would pull the former into a global constant and build the latter within the function. Something like this:

STATISTICS = {"RMSE": sqrt_mean_squared_error,
 "EXPLAINED_VARIANCE": explained_variance_score,
 "R2": r2_score}
def sqrt_mean_squared_error(x):
 return np.sqrt(mean_squared_error(x))
def print_performance(algo_name, y_train, y_test, y_train_pred, y_test_pred):
 print(algo_name + '\n')
 results = [("TRAIN", y_train, y_train_pred),
 ("TEST", y_test, y_test_pred)]
 for stat_name, stat in STATISTICS.items():
 for name, y, y_pred in results:
 print(f"{stat_name} {name} : {stat(y, y_pred)}")
 print('\n')

Note that I used an f-string to concisely output, which in this case is equivalent to:

print(stat_name, name, ":", stat(y, y_pred))

Also, Python's official style-guide, PEP8, recommends using four spaces as indentation.

Question 6

The thing you're doing isn't OOP (object-oriented programming), and that's probably ok. OOP is just one way of structuring complicated programs, and I don't personally like the pure form of it much anyway.

Your idea of making a function to handle repetitive code is good, but you're still basically writing imperative code. The function you've defined is a good example of what used to be called a "subroutine"; you can still make it into a "function".

Functions take arguments and return something that depended on those arguments.
In their pure form they don't do anything else. As you'll see, there's a lot of room for grey here.
Ideally, it should only be "possible" to call a function with valid arguments. (Scare quotes because this is python, which is basically php in a fancy suite.)
Handle printing separately from computation. You're going to violate this all the time while you troubleshoot stuff, but remember to clean up after yourself.
There's just plain a lot of detailed language features in any good language that can help you write better. Keep learning👍

I'm using NamedTuple below because I haven't gotten around to learning the ins&outs of dataclasss yet, but I understand dataclass is actually better for most situations. NamedTuple requires introducing some type-hints, which are good, but which only do anything if you're using a type-checker like mypy.

from typing import NamedTuple
class ErrorScores(NamedTuple):
 root_mean_square: float # I assume?
 explained_variance: float
 r2: float
def error_scores(data, predicate):
 return ErrorScores(
 root_mean_square=np.sqrt(mean_squared_error(data, predicate)),
 explained_variance=explained_variance_score(data, predicate),
 r2=r2_score(data, predicate)
 )
class Algorithm(NamedTuple):
 name: str # I'm just assuming these are strings, IDK
 predicate_name: str
 predicate_test_name: str
# There are a lot of variables here that IDK where they're coming from. It's suspiciously repetitive. 
LINEAR_REGRESSION = Algorithm("LINEAR REGRESSION", y_predlr, y_predtestlr)
RANDOM_FOREST_REGRESSION = Algorithm("RANDOM FOREST REGRESSION", y_predrfr, y_predrfrtest)
RANDOM_FOREST_REGRESSION 2 = Algorithm("RANDOM FOREST REGRESSION 2", y_predrfr2, y_predrfr2test)
XGBOOST = Algorithm("XGBOOST", y_predxgb, y_predxgbtest)
SVM = Algorithm("SVM", ypredsvm, ypredsvmtest)
BAYESIAN = Algorithm("Bayesian", ypredbayesian, ypredbayesiantest)
SGD = Algorithm("SGD", ypredsgd, ypredsgdtest)
DECISION_TREE = Algorithm("Decision Tree", ypreddectree, ypreddectreetest)
NEURAL_NETWORK = Algorithm("Neural Network", ypredneural, ypredneuraltest)
LASSO = Algorithm("Lasso", ypredlaso, ypredlasotest)
def print_errors(algorithm: Algorithm, y_train, y_test):
 training_errors = error_scores(y_train, algorithm.predicate_name)
 testing_errors = error_scores(y_test, algorithm.predicate_test_name)
 print('\n'.join((
 f'{algorithm.name}',
 f'RMSE TRAIN : {training_errors.root_mean_square}',
 f'RMSE TEST : {testing_errors.root_mean_square}',
 f'EXPLAINED VARIANCE TRAIN : {training_errors.explained_variance}',
 f'EXPLAINED VARIANCE TEST : {testing_errors.explained_variance}',
 f'R2 TRAIN : {training_errors.r2}',
 f'R2 TEST : {testing_errors.r2}'
 )))
def print_errors_for_all(y_train, y_test):
 algorithms = (LINEAR_REGRESSION,
 RANDOM_FOREST_REGRESSION,
 RANDOM_FOREST_REGRESSION_2,
 XGBOOST,
 SVM,
 BAYESIAN,
 SGD,
 DECISION_TREE,
 NEURAL_NETWORK,
 LASSO)
 for algorithm in algorithms:
 print_errors(algorithm, y_train, y_test)

Without knowing the surrounding context, this is probably good enough.

Graipher Graipher 41.7k7 gold badges70 silver badges134 bronze badges · Answer 1 · 2021-01-15 16:18:19Z

I would add some iteration to this, to reduce parts where you repeat yourself. Note that there are two things changing, the statistic you output (and its name) and whether you show it with the train or test results. The former is always constant, while the latter depends on your results, so I would pull the former into a global constant and build the latter within the function. Something like this:

STATISTICS = {"RMSE": sqrt_mean_squared_error,
 "EXPLAINED_VARIANCE": explained_variance_score,
 "R2": r2_score}
def sqrt_mean_squared_error(x):
 return np.sqrt(mean_squared_error(x))
def print_performance(algo_name, y_train, y_test, y_train_pred, y_test_pred):
 print(algo_name + '\n')
 results = [("TRAIN", y_train, y_train_pred),
 ("TEST", y_test, y_test_pred)]
 for stat_name, stat in STATISTICS.items():
 for name, y, y_pred in results:
 print(f"{stat_name} {name} : {stat(y, y_pred)}")
 print('\n')

Note that I used an f-string to concisely output, which in this case is equivalent to:

print(stat_name, name, ":", stat(y, y_pred))

Also, Python's official style-guide, PEP8, recommends using four spaces as indentation.

ShapeOfMatter ShapeOfMatter 4,4377 silver badges25 bronze badges · Answer 2 · 2021-01-15 20:46:38Z

The thing you're doing isn't OOP (object-oriented programming), and that's probably ok. OOP is just one way of structuring complicated programs, and I don't personally like the pure form of it much anyway.

Your idea of making a function to handle repetitive code is good, but you're still basically writing imperative code. The function you've defined is a good example of what used to be called a "subroutine"; you can still make it into a "function".

Functions take arguments and return something that depended on those arguments.
In their pure form they don't do anything else. As you'll see, there's a lot of room for grey here.
Ideally, it should only be "possible" to call a function with valid arguments. (Scare quotes because this is python, which is basically php in a fancy suite.)
Handle printing separately from computation. You're going to violate this all the time while you troubleshoot stuff, but remember to clean up after yourself.
There's just plain a lot of detailed language features in any good language that can help you write better. Keep learning👍

I'm using NamedTuple below because I haven't gotten around to learning the ins&outs of dataclasss yet, but I understand dataclass is actually better for most situations. NamedTuple requires introducing some type-hints, which are good, but which only do anything if you're using a type-checker like mypy.

from typing import NamedTuple
class ErrorScores(NamedTuple):
 root_mean_square: float # I assume?
 explained_variance: float
 r2: float
def error_scores(data, predicate):
 return ErrorScores(
 root_mean_square=np.sqrt(mean_squared_error(data, predicate)),
 explained_variance=explained_variance_score(data, predicate),
 r2=r2_score(data, predicate)
 )
class Algorithm(NamedTuple):
 name: str # I'm just assuming these are strings, IDK
 predicate_name: str
 predicate_test_name: str
# There are a lot of variables here that IDK where they're coming from. It's suspiciously repetitive. 
LINEAR_REGRESSION = Algorithm("LINEAR REGRESSION", y_predlr, y_predtestlr)
RANDOM_FOREST_REGRESSION = Algorithm("RANDOM FOREST REGRESSION", y_predrfr, y_predrfrtest)
RANDOM_FOREST_REGRESSION 2 = Algorithm("RANDOM FOREST REGRESSION 2", y_predrfr2, y_predrfr2test)
XGBOOST = Algorithm("XGBOOST", y_predxgb, y_predxgbtest)
SVM = Algorithm("SVM", ypredsvm, ypredsvmtest)
BAYESIAN = Algorithm("Bayesian", ypredbayesian, ypredbayesiantest)
SGD = Algorithm("SGD", ypredsgd, ypredsgdtest)
DECISION_TREE = Algorithm("Decision Tree", ypreddectree, ypreddectreetest)
NEURAL_NETWORK = Algorithm("Neural Network", ypredneural, ypredneuraltest)
LASSO = Algorithm("Lasso", ypredlaso, ypredlasotest)
def print_errors(algorithm: Algorithm, y_train, y_test):
 training_errors = error_scores(y_train, algorithm.predicate_name)
 testing_errors = error_scores(y_test, algorithm.predicate_test_name)
 print('\n'.join((
 f'{algorithm.name}',
 f'RMSE TRAIN : {training_errors.root_mean_square}',
 f'RMSE TEST : {testing_errors.root_mean_square}',
 f'EXPLAINED VARIANCE TRAIN : {training_errors.explained_variance}',
 f'EXPLAINED VARIANCE TEST : {testing_errors.explained_variance}',
 f'R2 TRAIN : {training_errors.r2}',
 f'R2 TEST : {testing_errors.r2}'
 )))
def print_errors_for_all(y_train, y_test):
 algorithms = (LINEAR_REGRESSION,
 RANDOM_FOREST_REGRESSION,
 RANDOM_FOREST_REGRESSION_2,
 XGBOOST,
 SVM,
 BAYESIAN,
 SGD,
 DECISION_TREE,
 NEURAL_NETWORK,
 LASSO)
 for algorithm in algorithms:
 print_errors(algorithm, y_train, y_test)

Without knowing the surrounding context, this is probably good enough.

Stack Exchange Network

How to convert traditional code to reusable function

2 Answers 2

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

How to convert traditional code to reusable function

2 Answers 2

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions