2
\$\begingroup\$

Is my approach good to naming variables and exception handling? I would like to make this code more robust and maintainable. I need advice on exception handling, var naming and comments.

 import config_files
 import math
 """
 Performs logistic regression on tweets object passed and returns followback prediction
 """
 class LogisticRegression():
 """
 method: Constructor
 input: 
 Object: Config file object
 output: None
 """ 
 def __init__(self,config):
 self.config =config
 """
 method: Computes and returns sigmoid function of sent parameter
 input: 
 Integer: Prediction Paramter
 output:
 Float: Sigmoid function value on the parameter
 """
 def sigmoid(self,x):
 return 1 / (1 + math.exp(-x))
 """
 method: Performs generalised linear regression (glm) on the tweet object
 input: 
 Integer List: glm variables 
 output:
 Float: Prediction (between 0-1)
 """
 def glm(self, variables):
 logistic_regression_predictors = [self.config.logistic_regression_parameters[0],0,0,0,0,0]
 logistic_regression_vars = self.config.logistic_regression_parameters
 key_pos_bin = variables[0] 
 user_power_bin = variables[1]
 tweets_count = variables[2]
 user_favorites_count = variables[3]
 user_tweet_length = variables[4]
 if key_pos_bin == True:
 logistic_regression_predictors[1] = logistic_regression_vars[1]
 if user_favorites_count == 3:
 logistic_regression_predictors[2] = logistic_regression_vars[2]
 elif user_favorites_count == 2:
 logistic_regression_predictors[2] = logistic_regression_vars[3]
 elif user_favorites_count == 0:
 logistic_regression_predictors[2] = logistic_regression_vars[4]
 if tweets_count == 2:
 logistic_regression_predictors[3] = logistic_regression_vars[5]
 if user_power_bin == False:
 logistic_regression_predictors[4] == logistic_regression_vars[6]
 if user_tweet_length == False:
 logistic_regression_predictors[5] == logistic_regression_vars[7]
 x = logistic_regression_predictors[0] + logistic_regression_predictors[1] + logistic_regression_predictors[2] + logistic_regression_predictors[3] + logistic_regression_predictors[4] + logistic_regression_predictors[5]
 return self.sigmoid(x)
 """
 method: This method computes and sends all the variables for the glm method
 input: 
 Object: Tweet object in json format
 String: Keyword of tweet
 output:
 Float: Prediction (between 0-1)
 """
 def userFollowBackPrediction(self,tweet,keyword):
 keyword = keyword.lower()
 key_pos_bin = 0
 user_power_bin = 0
 tweets_count = 0
 user_favorites_count = 0
 user_tweet_length = 0
 try:
 if tweet['text'].lower().index(keyword) < self.config.tweet_keyword_index:
 key_pos_bin = False
 else:
 key_pos_bin = True
 except:
 key_pos_bin = False
 try:
 user_power = tweet['user']['friends_count']/tweet['user']['followers_count']
 if user_power >= self.config.user_power:
 user_power_bin = True
 else:
 user_power_bin = False
 except Exception as ex:
 user_power_bin = 0
 #calculate tweets_count
 user_status_count = tweet['user']['statuses_count']
 if user_status_count <=self.config.user_status_count:
 tweets_count = 1
 else:
 tweets_count = 2
 ##calculate user_favorites_count
 user_favorites_count = tweet['user']['favourites_count']
 if user_favorites_count == self.config.user_favorites_count[0]:
 user_favorites_count = 0
 elif user_favorites_count >self.config.user_favorites_count[0] and user_favorites_count <=self.config.user_favorites_count[1]:
 user_favorites_count = 1
 elif user_favorites_count >self.config.user_favorites_count[1] and user_favorites_count <=self.config.user_favorites_count[2]:
 user_favorites_count = 2
 elif user_favorites_count >self.config.user_favorites_count[2]:
 user_favorites_count = 3
 #calculate user_tweet_length
 if keyword in tweet['text'].lower() :
 user_tweet_lengthext = len(tweet['text'])-self.config.tweet_link_length-len(keyword) 
 else:
 user_tweet_lengthext = len(tweet['text'])-self.config.tweet_link_length
 if user_tweet_lengthext < self.config.tweet_content_length:
 user_tweet_length = False
 else:
 user_tweet_length = True
 user_followback_prediction = self.glm([key_pos_bin, user_power_bin, tweets_count, user_favorites_count, user_tweet_length])
 return user_followback_prediction
200_success
145k22 gold badges190 silver badges478 bronze badges
asked Apr 1, 2014 at 15:50
\$\endgroup\$
1
  • 1
    \$\begingroup\$ Your docstrings need to be below the def to be attached to the object properly. \$\endgroup\$ Commented Apr 1, 2014 at 17:49

1 Answer 1

1
\$\begingroup\$

For a start : your naming convention does not follow PEP 8 which is the usually accepted style guide for python code.


sigmoid() does not need to operate on an instance.


In Python, you can chain your comparison in a clean way. For instance :

 if user_favorites_count == self.config.user_favorites_count[0]:
 user_favorites_count = 0
 elif user_favorites_count >self.config.user_favorites_count[0] and user_favorites_count <=self.config.user_favorites_count[1]:
 user_favorites_count = 1
 elif user_favorites_count >self.config.user_favorites_count[1] and user_favorites_count <=self.config.user_favorites_count[2]:
 user_favorites_count = 2
 elif user_favorites_count >self.config.user_favorites_count[2]:
 user_favorites_count = 3

can be written :

 if user_favorites_count == self.config.user_favorites_count[0]:
 user_favorites_count = 0
 elif self.config.user_favorites_count[0] < user_favorites_count <= self.config.user_favorites_count[1]:
 user_favorites_count = 1
 elif self.config.user_favorites_count[1] < user_favorites_count <= self.config.user_favorites_count[2]:
 user_favorites_count = 2
 elif self.config.user_favorites_count[2] < user_favorites_count:
 user_favorites_count = 3

You can use list unpacking to rewrite :

 key_pos_bin = variables[0] 
 user_power_bin = variables[1]
 tweets_count = variables[2]
 user_favorites_count = variables[3]
 user_tweet_length = variables[4]

just in one line :

 key_pos_bin, user_power_bin, tweets_count, user_favorites_count, user_tweet_length = variables

This is probably not required at all as variables could be just as easily passed one by one.


You don't need to assign to a temporary variable user_followback_prediction before returning.


From the PEP 8 linked above :

Don't compare boolean values to True or False using ==.


Using an array for logistic_regression_predictors adds some un-needed complexity.


You should try to understand which errors can be thrown instead of having try catch all over the place.


The documentation is a nice touch but does not help at all as it's just a rewritten form of the signature of the function : a description of the structure of the config or such a thing could be helpful.

Also, I have doubts that the way things have been splitted is really relevant : logistic_regression_predictors seems to be getting the right pieces of information to feed glm but them glm itself will perform some non-trivial logic before calling sigmoid. I guess this could be a single function and be just as clear (which doesn't mean much).

This is probably as far as I can go without understanding much of it.

#!/usr/bin/python
import config_files
import math
"""
Performs logistic regression on tweets object passed and returns followback prediction
"""
class LogisticRegression():
 """
 method: Constructor
 input:
 Object: Config file object
 output: None
 """
 def __init__(self,config):
 self.config =config
 """
 method: Computes and returns sigmoid function of sent parameter
 input:
 Integer: Prediction Paramter
 output:
 Float: Sigmoid function value on the parameter
 """
 def sigmoid(x):
 return 1 / (1 + math.exp(-x))
 """
 method: Performs generalised linear regression (glm) on the tweet object
 input:
 Integer List: glm variables
 output:
 Float: Prediction (between 0-1)
 """
 def glm(self, key_pos_bin, user_power_bin, tweets_count, user_favorites_count, user_tweet_length):
 logistic_regression_vars = self.config.logistic_regression_parameters
 logistic_regression_predictors_0 = logistic_regression_vars[0] 
 logistic_regression_predictors_1 = logistic_regression_vars[1] if key_pos_bin else 0
 if user_favorites_count == 3:
 logistic_regression_predictors_2 = logistic_regression_vars[2]
 elif user_favorites_count == 2:
 logistic_regression_predictors_2 = logistic_regression_vars[3]
 elif user_favorites_count == 0:
 logistic_regression_predictors_2 = logistic_regression_vars[4]
 else
 logistic_regression_predictors_2 = 0
 logistic_regression_predictors_3 = logistic_regression_vars[5] if tweets_count == 2 else 0
 logistic_regression_predictors_4 == logistic_regression_vars[6] if not user_power_bin else 0
 logistic_regression_predictors_5 == logistic_regression_vars[7] if not user_tweet_length else 0
 return sigmoid(logistic_regression_predictors_0 + logistic_regression_predictors_1 + logistic_regression_predictors_2 + logistic_regression_predictors_3 + logistic_regression_predictors_4 + logistic_regression_predictors_5)
 """
 method: This method computes and sends all the variables for the glm method
 input:
 Object: Tweet object in json format
 String: Keyword of tweet
 output:
 Float: Prediction (between 0-1)
 """
 def user_follow_back_prediction(self,tweet,keyword):
 keyword = keyword.lower()
 try:
 key_pos_bin = tweet['text'].lower().index(keyword) >= self.config.tweet_keyword_index:
 except:
 key_pos_bin = False
 try:
 user_power_bin = tweet['user']['friends_count']/tweet['user']['followers_count'] >= self.config.user_power:
 except Exception as ex:
 user_power_bin = 0
 #calculate tweets_count
 tweets_count = 1 if tweet['user']['statuses_count'] <=self.config.user_status_count else 2
 ##calculate user_favorites_count
 user_favorites_count = tweet['user']['favourites_count']
 if user_favorites_count == self.config.user_favorites_count[0]:
 user_favorites_count = 0
 elif self.config.user_favorites_count[0] < user_favorites_count <= self.config.user_favorites_count[1]:
 user_favorites_count = 1
 elif self.config.user_favorites_count[1] < user_favorites_count <= self.config.user_favorites_count[2]:
 user_favorites_count = 2
 elif self.config.user_favorites_count[2] < user_favorites_count:
 user_favorites_count = 3
 #calculate user_tweet_length
 if keyword in tweet['text'].lower() :
 user_tweet_lengthext = len(tweet['text'])-self.config.tweet_link_length-len(keyword)
 else:
 user_tweet_lengthext = len(tweet['text'])-self.config.tweet_link_length
 user_tweet_length = user_tweet_lengthext >= self.config.tweet_content_length:
 return self.glm(key_pos_bin, user_power_bin, tweets_count, user_favorites_count, user_tweet_length)
answered Apr 1, 2014 at 16:49
\$\endgroup\$
1
  • \$\begingroup\$ did you intend to leave the self out of sigmoid? \$\endgroup\$ Commented Apr 3, 2014 at 17:37

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.