Compute logistic regression on tweet objects

Question 1

Is my approach good to naming variables and exception handling? I would like to make this code more robust and maintainable. I need advice on exception handling, var naming and comments.

 import config_files
 import math
 """
 Performs logistic regression on tweets object passed and returns followback prediction
 """
 class LogisticRegression():
 """
 method: Constructor
 input: 
 Object: Config file object
 output: None
 """ 
 def __init__(self,config):
 self.config =config
 """
 method: Computes and returns sigmoid function of sent parameter
 input: 
 Integer: Prediction Paramter
 output:
 Float: Sigmoid function value on the parameter
 """
 def sigmoid(self,x):
 return 1 / (1 + math.exp(-x))
 """
 method: Performs generalised linear regression (glm) on the tweet object
 input: 
 Integer List: glm variables 
 output:
 Float: Prediction (between 0-1)
 """
 def glm(self, variables):
 logistic_regression_predictors = [self.config.logistic_regression_parameters[0],0,0,0,0,0]
 logistic_regression_vars = self.config.logistic_regression_parameters
 key_pos_bin = variables[0] 
 user_power_bin = variables[1]
 tweets_count = variables[2]
 user_favorites_count = variables[3]
 user_tweet_length = variables[4]
 if key_pos_bin == True:
 logistic_regression_predictors[1] = logistic_regression_vars[1]
 if user_favorites_count == 3:
 logistic_regression_predictors[2] = logistic_regression_vars[2]
 elif user_favorites_count == 2:
 logistic_regression_predictors[2] = logistic_regression_vars[3]
 elif user_favorites_count == 0:
 logistic_regression_predictors[2] = logistic_regression_vars[4]
 if tweets_count == 2:
 logistic_regression_predictors[3] = logistic_regression_vars[5]
 if user_power_bin == False:
 logistic_regression_predictors[4] == logistic_regression_vars[6]
 if user_tweet_length == False:
 logistic_regression_predictors[5] == logistic_regression_vars[7]
 x = logistic_regression_predictors[0] + logistic_regression_predictors[1] + logistic_regression_predictors[2] + logistic_regression_predictors[3] + logistic_regression_predictors[4] + logistic_regression_predictors[5]
 return self.sigmoid(x)
 """
 method: This method computes and sends all the variables for the glm method
 input: 
 Object: Tweet object in json format
 String: Keyword of tweet
 output:
 Float: Prediction (between 0-1)
 """
 def userFollowBackPrediction(self,tweet,keyword):
 keyword = keyword.lower()
 key_pos_bin = 0
 user_power_bin = 0
 tweets_count = 0
 user_favorites_count = 0
 user_tweet_length = 0
 try:
 if tweet['text'].lower().index(keyword) < self.config.tweet_keyword_index:
 key_pos_bin = False
 else:
 key_pos_bin = True
 except:
 key_pos_bin = False
 try:
 user_power = tweet['user']['friends_count']/tweet['user']['followers_count']
 if user_power >= self.config.user_power:
 user_power_bin = True
 else:
 user_power_bin = False
 except Exception as ex:
 user_power_bin = 0
 #calculate tweets_count
 user_status_count = tweet['user']['statuses_count']
 if user_status_count <=self.config.user_status_count:
 tweets_count = 1
 else:
 tweets_count = 2
 ##calculate user_favorites_count
 user_favorites_count = tweet['user']['favourites_count']
 if user_favorites_count == self.config.user_favorites_count[0]:
 user_favorites_count = 0
 elif user_favorites_count >self.config.user_favorites_count[0] and user_favorites_count <=self.config.user_favorites_count[1]:
 user_favorites_count = 1
 elif user_favorites_count >self.config.user_favorites_count[1] and user_favorites_count <=self.config.user_favorites_count[2]:
 user_favorites_count = 2
 elif user_favorites_count >self.config.user_favorites_count[2]:
 user_favorites_count = 3
 #calculate user_tweet_length
 if keyword in tweet['text'].lower() :
 user_tweet_lengthext = len(tweet['text'])-self.config.tweet_link_length-len(keyword) 
 else:
 user_tweet_lengthext = len(tweet['text'])-self.config.tweet_link_length
 if user_tweet_lengthext < self.config.tweet_content_length:
 user_tweet_length = False
 else:
 user_tweet_length = True
 user_followback_prediction = self.glm([key_pos_bin, user_power_bin, tweets_count, user_favorites_count, user_tweet_length])
 return user_followback_prediction

Question 2

Your docstrings need to be below the def to be attached to the object properly.

Question 3

For a start : your naming convention does not follow PEP 8 which is the usually accepted style guide for python code.

sigmoid() does not need to operate on an instance.

In Python, you can chain your comparison in a clean way. For instance :

 if user_favorites_count == self.config.user_favorites_count[0]:
 user_favorites_count = 0
 elif user_favorites_count >self.config.user_favorites_count[0] and user_favorites_count <=self.config.user_favorites_count[1]:
 user_favorites_count = 1
 elif user_favorites_count >self.config.user_favorites_count[1] and user_favorites_count <=self.config.user_favorites_count[2]:
 user_favorites_count = 2
 elif user_favorites_count >self.config.user_favorites_count[2]:
 user_favorites_count = 3

can be written :

 if user_favorites_count == self.config.user_favorites_count[0]:
 user_favorites_count = 0
 elif self.config.user_favorites_count[0] < user_favorites_count <= self.config.user_favorites_count[1]:
 user_favorites_count = 1
 elif self.config.user_favorites_count[1] < user_favorites_count <= self.config.user_favorites_count[2]:
 user_favorites_count = 2
 elif self.config.user_favorites_count[2] < user_favorites_count:
 user_favorites_count = 3

You can use list unpacking to rewrite :

 key_pos_bin = variables[0] 
 user_power_bin = variables[1]
 tweets_count = variables[2]
 user_favorites_count = variables[3]
 user_tweet_length = variables[4]

just in one line :

 key_pos_bin, user_power_bin, tweets_count, user_favorites_count, user_tweet_length = variables

This is probably not required at all as variables could be just as easily passed one by one.

You don't need to assign to a temporary variable user_followback_prediction before returning.

From the PEP 8 linked above :

Don't compare boolean values to True or False using ==.

Using an array for logistic_regression_predictors adds some un-needed complexity.

You should try to understand which errors can be thrown instead of having try catch all over the place.

The documentation is a nice touch but does not help at all as it's just a rewritten form of the signature of the function : a description of the structure of the config or such a thing could be helpful.

Also, I have doubts that the way things have been splitted is really relevant : logistic_regression_predictors seems to be getting the right pieces of information to feed glm but them glm itself will perform some non-trivial logic before calling sigmoid. I guess this could be a single function and be just as clear (which doesn't mean much).

This is probably as far as I can go without understanding much of it.

#!/usr/bin/python
import config_files
import math
"""
Performs logistic regression on tweets object passed and returns followback prediction
"""
class LogisticRegression():
 """
 method: Constructor
 input:
 Object: Config file object
 output: None
 """
 def __init__(self,config):
 self.config =config
 """
 method: Computes and returns sigmoid function of sent parameter
 input:
 Integer: Prediction Paramter
 output:
 Float: Sigmoid function value on the parameter
 """
 def sigmoid(x):
 return 1 / (1 + math.exp(-x))
 """
 method: Performs generalised linear regression (glm) on the tweet object
 input:
 Integer List: glm variables
 output:
 Float: Prediction (between 0-1)
 """
 def glm(self, key_pos_bin, user_power_bin, tweets_count, user_favorites_count, user_tweet_length):
 logistic_regression_vars = self.config.logistic_regression_parameters
 logistic_regression_predictors_0 = logistic_regression_vars[0] 
 logistic_regression_predictors_1 = logistic_regression_vars[1] if key_pos_bin else 0
 if user_favorites_count == 3:
 logistic_regression_predictors_2 = logistic_regression_vars[2]
 elif user_favorites_count == 2:
 logistic_regression_predictors_2 = logistic_regression_vars[3]
 elif user_favorites_count == 0:
 logistic_regression_predictors_2 = logistic_regression_vars[4]
 else
 logistic_regression_predictors_2 = 0
 logistic_regression_predictors_3 = logistic_regression_vars[5] if tweets_count == 2 else 0
 logistic_regression_predictors_4 == logistic_regression_vars[6] if not user_power_bin else 0
 logistic_regression_predictors_5 == logistic_regression_vars[7] if not user_tweet_length else 0
 return sigmoid(logistic_regression_predictors_0 + logistic_regression_predictors_1 + logistic_regression_predictors_2 + logistic_regression_predictors_3 + logistic_regression_predictors_4 + logistic_regression_predictors_5)
 """
 method: This method computes and sends all the variables for the glm method
 input:
 Object: Tweet object in json format
 String: Keyword of tweet
 output:
 Float: Prediction (between 0-1)
 """
 def user_follow_back_prediction(self,tweet,keyword):
 keyword = keyword.lower()
 try:
 key_pos_bin = tweet['text'].lower().index(keyword) >= self.config.tweet_keyword_index:
 except:
 key_pos_bin = False
 try:
 user_power_bin = tweet['user']['friends_count']/tweet['user']['followers_count'] >= self.config.user_power:
 except Exception as ex:
 user_power_bin = 0
 #calculate tweets_count
 tweets_count = 1 if tweet['user']['statuses_count'] <=self.config.user_status_count else 2
 ##calculate user_favorites_count
 user_favorites_count = tweet['user']['favourites_count']
 if user_favorites_count == self.config.user_favorites_count[0]:
 user_favorites_count = 0
 elif self.config.user_favorites_count[0] < user_favorites_count <= self.config.user_favorites_count[1]:
 user_favorites_count = 1
 elif self.config.user_favorites_count[1] < user_favorites_count <= self.config.user_favorites_count[2]:
 user_favorites_count = 2
 elif self.config.user_favorites_count[2] < user_favorites_count:
 user_favorites_count = 3
 #calculate user_tweet_length
 if keyword in tweet['text'].lower() :
 user_tweet_lengthext = len(tweet['text'])-self.config.tweet_link_length-len(keyword)
 else:
 user_tweet_lengthext = len(tweet['text'])-self.config.tweet_link_length
 user_tweet_length = user_tweet_lengthext >= self.config.tweet_content_length:
 return self.glm(key_pos_bin, user_power_bin, tweets_count, user_favorites_count, user_tweet_length)

Question 4

did you intend to leave the self out of sigmoid?

SylvainD SylvainD 29.7k1 gold badge49 silver badges93 bronze badges · Accepted Answer · 2014-04-01 16:49:51Z

For a start : your naming convention does not follow PEP 8 which is the usually accepted style guide for python code.

sigmoid() does not need to operate on an instance.

In Python, you can chain your comparison in a clean way. For instance :

 if user_favorites_count == self.config.user_favorites_count[0]:
 user_favorites_count = 0
 elif user_favorites_count >self.config.user_favorites_count[0] and user_favorites_count <=self.config.user_favorites_count[1]:
 user_favorites_count = 1
 elif user_favorites_count >self.config.user_favorites_count[1] and user_favorites_count <=self.config.user_favorites_count[2]:
 user_favorites_count = 2
 elif user_favorites_count >self.config.user_favorites_count[2]:
 user_favorites_count = 3

can be written :

 if user_favorites_count == self.config.user_favorites_count[0]:
 user_favorites_count = 0
 elif self.config.user_favorites_count[0] < user_favorites_count <= self.config.user_favorites_count[1]:
 user_favorites_count = 1
 elif self.config.user_favorites_count[1] < user_favorites_count <= self.config.user_favorites_count[2]:
 user_favorites_count = 2
 elif self.config.user_favorites_count[2] < user_favorites_count:
 user_favorites_count = 3

You can use list unpacking to rewrite :

 key_pos_bin = variables[0] 
 user_power_bin = variables[1]
 tweets_count = variables[2]
 user_favorites_count = variables[3]
 user_tweet_length = variables[4]

just in one line :

 key_pos_bin, user_power_bin, tweets_count, user_favorites_count, user_tweet_length = variables

This is probably not required at all as variables could be just as easily passed one by one.

You don't need to assign to a temporary variable user_followback_prediction before returning.

From the PEP 8 linked above :

Don't compare boolean values to True or False using ==.

Using an array for logistic_regression_predictors adds some un-needed complexity.

You should try to understand which errors can be thrown instead of having try catch all over the place.

The documentation is a nice touch but does not help at all as it's just a rewritten form of the signature of the function : a description of the structure of the config or such a thing could be helpful.

Also, I have doubts that the way things have been splitted is really relevant : logistic_regression_predictors seems to be getting the right pieces of information to feed glm but them glm itself will perform some non-trivial logic before calling sigmoid. I guess this could be a single function and be just as clear (which doesn't mean much).

This is probably as far as I can go without understanding much of it.

#!/usr/bin/python
import config_files
import math
"""
Performs logistic regression on tweets object passed and returns followback prediction
"""
class LogisticRegression():
 """
 method: Constructor
 input:
 Object: Config file object
 output: None
 """
 def __init__(self,config):
 self.config =config
 """
 method: Computes and returns sigmoid function of sent parameter
 input:
 Integer: Prediction Paramter
 output:
 Float: Sigmoid function value on the parameter
 """
 def sigmoid(x):
 return 1 / (1 + math.exp(-x))
 """
 method: Performs generalised linear regression (glm) on the tweet object
 input:
 Integer List: glm variables
 output:
 Float: Prediction (between 0-1)
 """
 def glm(self, key_pos_bin, user_power_bin, tweets_count, user_favorites_count, user_tweet_length):
 logistic_regression_vars = self.config.logistic_regression_parameters
 logistic_regression_predictors_0 = logistic_regression_vars[0] 
 logistic_regression_predictors_1 = logistic_regression_vars[1] if key_pos_bin else 0
 if user_favorites_count == 3:
 logistic_regression_predictors_2 = logistic_regression_vars[2]
 elif user_favorites_count == 2:
 logistic_regression_predictors_2 = logistic_regression_vars[3]
 elif user_favorites_count == 0:
 logistic_regression_predictors_2 = logistic_regression_vars[4]
 else
 logistic_regression_predictors_2 = 0
 logistic_regression_predictors_3 = logistic_regression_vars[5] if tweets_count == 2 else 0
 logistic_regression_predictors_4 == logistic_regression_vars[6] if not user_power_bin else 0
 logistic_regression_predictors_5 == logistic_regression_vars[7] if not user_tweet_length else 0
 return sigmoid(logistic_regression_predictors_0 + logistic_regression_predictors_1 + logistic_regression_predictors_2 + logistic_regression_predictors_3 + logistic_regression_predictors_4 + logistic_regression_predictors_5)
 """
 method: This method computes and sends all the variables for the glm method
 input:
 Object: Tweet object in json format
 String: Keyword of tweet
 output:
 Float: Prediction (between 0-1)
 """
 def user_follow_back_prediction(self,tweet,keyword):
 keyword = keyword.lower()
 try:
 key_pos_bin = tweet['text'].lower().index(keyword) >= self.config.tweet_keyword_index:
 except:
 key_pos_bin = False
 try:
 user_power_bin = tweet['user']['friends_count']/tweet['user']['followers_count'] >= self.config.user_power:
 except Exception as ex:
 user_power_bin = 0
 #calculate tweets_count
 tweets_count = 1 if tweet['user']['statuses_count'] <=self.config.user_status_count else 2
 ##calculate user_favorites_count
 user_favorites_count = tweet['user']['favourites_count']
 if user_favorites_count == self.config.user_favorites_count[0]:
 user_favorites_count = 0
 elif self.config.user_favorites_count[0] < user_favorites_count <= self.config.user_favorites_count[1]:
 user_favorites_count = 1
 elif self.config.user_favorites_count[1] < user_favorites_count <= self.config.user_favorites_count[2]:
 user_favorites_count = 2
 elif self.config.user_favorites_count[2] < user_favorites_count:
 user_favorites_count = 3
 #calculate user_tweet_length
 if keyword in tweet['text'].lower() :
 user_tweet_lengthext = len(tweet['text'])-self.config.tweet_link_length-len(keyword)
 else:
 user_tweet_lengthext = len(tweet['text'])-self.config.tweet_link_length
 user_tweet_length = user_tweet_lengthext >= self.config.tweet_content_length:
 return self.glm(key_pos_bin, user_power_bin, tweets_count, user_favorites_count, user_tweet_length)

\$\begingroup\$ did you intend to leave the self out of sigmoid? \$\endgroup\$

codious
– codious

2014年04月03日 17:37:31 +00:00
Commented Apr 3, 2014 at 17:37

Stack Exchange Network

Compute logistic regression on tweet objects

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Compute logistic regression on tweet objects

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions