Is my approach good to naming variables and exception handling? I would like to make this code more robust and maintainable. I need advice on exception handling, var naming and comments.
import config_files
import math
"""
Performs logistic regression on tweets object passed and returns followback prediction
"""
class LogisticRegression():
"""
method: Constructor
input:
Object: Config file object
output: None
"""
def __init__(self,config):
self.config =config
"""
method: Computes and returns sigmoid function of sent parameter
input:
Integer: Prediction Paramter
output:
Float: Sigmoid function value on the parameter
"""
def sigmoid(self,x):
return 1 / (1 + math.exp(-x))
"""
method: Performs generalised linear regression (glm) on the tweet object
input:
Integer List: glm variables
output:
Float: Prediction (between 0-1)
"""
def glm(self, variables):
logistic_regression_predictors = [self.config.logistic_regression_parameters[0],0,0,0,0,0]
logistic_regression_vars = self.config.logistic_regression_parameters
key_pos_bin = variables[0]
user_power_bin = variables[1]
tweets_count = variables[2]
user_favorites_count = variables[3]
user_tweet_length = variables[4]
if key_pos_bin == True:
logistic_regression_predictors[1] = logistic_regression_vars[1]
if user_favorites_count == 3:
logistic_regression_predictors[2] = logistic_regression_vars[2]
elif user_favorites_count == 2:
logistic_regression_predictors[2] = logistic_regression_vars[3]
elif user_favorites_count == 0:
logistic_regression_predictors[2] = logistic_regression_vars[4]
if tweets_count == 2:
logistic_regression_predictors[3] = logistic_regression_vars[5]
if user_power_bin == False:
logistic_regression_predictors[4] == logistic_regression_vars[6]
if user_tweet_length == False:
logistic_regression_predictors[5] == logistic_regression_vars[7]
x = logistic_regression_predictors[0] + logistic_regression_predictors[1] + logistic_regression_predictors[2] + logistic_regression_predictors[3] + logistic_regression_predictors[4] + logistic_regression_predictors[5]
return self.sigmoid(x)
"""
method: This method computes and sends all the variables for the glm method
input:
Object: Tweet object in json format
String: Keyword of tweet
output:
Float: Prediction (between 0-1)
"""
def userFollowBackPrediction(self,tweet,keyword):
keyword = keyword.lower()
key_pos_bin = 0
user_power_bin = 0
tweets_count = 0
user_favorites_count = 0
user_tweet_length = 0
try:
if tweet['text'].lower().index(keyword) < self.config.tweet_keyword_index:
key_pos_bin = False
else:
key_pos_bin = True
except:
key_pos_bin = False
try:
user_power = tweet['user']['friends_count']/tweet['user']['followers_count']
if user_power >= self.config.user_power:
user_power_bin = True
else:
user_power_bin = False
except Exception as ex:
user_power_bin = 0
#calculate tweets_count
user_status_count = tweet['user']['statuses_count']
if user_status_count <=self.config.user_status_count:
tweets_count = 1
else:
tweets_count = 2
##calculate user_favorites_count
user_favorites_count = tweet['user']['favourites_count']
if user_favorites_count == self.config.user_favorites_count[0]:
user_favorites_count = 0
elif user_favorites_count >self.config.user_favorites_count[0] and user_favorites_count <=self.config.user_favorites_count[1]:
user_favorites_count = 1
elif user_favorites_count >self.config.user_favorites_count[1] and user_favorites_count <=self.config.user_favorites_count[2]:
user_favorites_count = 2
elif user_favorites_count >self.config.user_favorites_count[2]:
user_favorites_count = 3
#calculate user_tweet_length
if keyword in tweet['text'].lower() :
user_tweet_lengthext = len(tweet['text'])-self.config.tweet_link_length-len(keyword)
else:
user_tweet_lengthext = len(tweet['text'])-self.config.tweet_link_length
if user_tweet_lengthext < self.config.tweet_content_length:
user_tweet_length = False
else:
user_tweet_length = True
user_followback_prediction = self.glm([key_pos_bin, user_power_bin, tweets_count, user_favorites_count, user_tweet_length])
return user_followback_prediction
1 Answer 1
For a start : your naming convention does not follow PEP 8 which is the usually accepted style guide for python code.
sigmoid()
does not need to operate on an instance.
In Python, you can chain your comparison in a clean way. For instance :
if user_favorites_count == self.config.user_favorites_count[0]:
user_favorites_count = 0
elif user_favorites_count >self.config.user_favorites_count[0] and user_favorites_count <=self.config.user_favorites_count[1]:
user_favorites_count = 1
elif user_favorites_count >self.config.user_favorites_count[1] and user_favorites_count <=self.config.user_favorites_count[2]:
user_favorites_count = 2
elif user_favorites_count >self.config.user_favorites_count[2]:
user_favorites_count = 3
can be written :
if user_favorites_count == self.config.user_favorites_count[0]:
user_favorites_count = 0
elif self.config.user_favorites_count[0] < user_favorites_count <= self.config.user_favorites_count[1]:
user_favorites_count = 1
elif self.config.user_favorites_count[1] < user_favorites_count <= self.config.user_favorites_count[2]:
user_favorites_count = 2
elif self.config.user_favorites_count[2] < user_favorites_count:
user_favorites_count = 3
You can use list unpacking to rewrite :
key_pos_bin = variables[0]
user_power_bin = variables[1]
tweets_count = variables[2]
user_favorites_count = variables[3]
user_tweet_length = variables[4]
just in one line :
key_pos_bin, user_power_bin, tweets_count, user_favorites_count, user_tweet_length = variables
This is probably not required at all as variables could be just as easily passed one by one.
You don't need to assign to a temporary variable user_followback_prediction
before returning.
From the PEP 8 linked above :
Don't compare boolean values to True or False using ==.
Using an array for logistic_regression_predictors
adds some un-needed complexity.
You should try to understand which errors can be thrown instead of having try
catch
all over the place.
The documentation is a nice touch but does not help at all as it's just a rewritten form of the signature of the function : a description of the structure of the config or such a thing could be helpful.
Also, I have doubts that the way things have been splitted is really relevant :
logistic_regression_predictors
seems to be getting the right pieces of information to feed glm
but them glm
itself will perform some non-trivial logic before calling sigmoid
. I guess this could be a single function and be just as clear (which doesn't mean much).
This is probably as far as I can go without understanding much of it.
#!/usr/bin/python
import config_files
import math
"""
Performs logistic regression on tweets object passed and returns followback prediction
"""
class LogisticRegression():
"""
method: Constructor
input:
Object: Config file object
output: None
"""
def __init__(self,config):
self.config =config
"""
method: Computes and returns sigmoid function of sent parameter
input:
Integer: Prediction Paramter
output:
Float: Sigmoid function value on the parameter
"""
def sigmoid(x):
return 1 / (1 + math.exp(-x))
"""
method: Performs generalised linear regression (glm) on the tweet object
input:
Integer List: glm variables
output:
Float: Prediction (between 0-1)
"""
def glm(self, key_pos_bin, user_power_bin, tweets_count, user_favorites_count, user_tweet_length):
logistic_regression_vars = self.config.logistic_regression_parameters
logistic_regression_predictors_0 = logistic_regression_vars[0]
logistic_regression_predictors_1 = logistic_regression_vars[1] if key_pos_bin else 0
if user_favorites_count == 3:
logistic_regression_predictors_2 = logistic_regression_vars[2]
elif user_favorites_count == 2:
logistic_regression_predictors_2 = logistic_regression_vars[3]
elif user_favorites_count == 0:
logistic_regression_predictors_2 = logistic_regression_vars[4]
else
logistic_regression_predictors_2 = 0
logistic_regression_predictors_3 = logistic_regression_vars[5] if tweets_count == 2 else 0
logistic_regression_predictors_4 == logistic_regression_vars[6] if not user_power_bin else 0
logistic_regression_predictors_5 == logistic_regression_vars[7] if not user_tweet_length else 0
return sigmoid(logistic_regression_predictors_0 + logistic_regression_predictors_1 + logistic_regression_predictors_2 + logistic_regression_predictors_3 + logistic_regression_predictors_4 + logistic_regression_predictors_5)
"""
method: This method computes and sends all the variables for the glm method
input:
Object: Tweet object in json format
String: Keyword of tweet
output:
Float: Prediction (between 0-1)
"""
def user_follow_back_prediction(self,tweet,keyword):
keyword = keyword.lower()
try:
key_pos_bin = tweet['text'].lower().index(keyword) >= self.config.tweet_keyword_index:
except:
key_pos_bin = False
try:
user_power_bin = tweet['user']['friends_count']/tweet['user']['followers_count'] >= self.config.user_power:
except Exception as ex:
user_power_bin = 0
#calculate tweets_count
tweets_count = 1 if tweet['user']['statuses_count'] <=self.config.user_status_count else 2
##calculate user_favorites_count
user_favorites_count = tweet['user']['favourites_count']
if user_favorites_count == self.config.user_favorites_count[0]:
user_favorites_count = 0
elif self.config.user_favorites_count[0] < user_favorites_count <= self.config.user_favorites_count[1]:
user_favorites_count = 1
elif self.config.user_favorites_count[1] < user_favorites_count <= self.config.user_favorites_count[2]:
user_favorites_count = 2
elif self.config.user_favorites_count[2] < user_favorites_count:
user_favorites_count = 3
#calculate user_tweet_length
if keyword in tweet['text'].lower() :
user_tweet_lengthext = len(tweet['text'])-self.config.tweet_link_length-len(keyword)
else:
user_tweet_lengthext = len(tweet['text'])-self.config.tweet_link_length
user_tweet_length = user_tweet_lengthext >= self.config.tweet_content_length:
return self.glm(key_pos_bin, user_power_bin, tweets_count, user_favorites_count, user_tweet_length)
-
\$\begingroup\$ did you intend to leave the self out of sigmoid? \$\endgroup\$codious– codious2014年04月03日 17:37:31 +00:00Commented Apr 3, 2014 at 17:37
Explore related questions
See similar questions with these tags.
def
to be attached to the object properly. \$\endgroup\$