I'm working on a project where I need to get all followers from a specific user and store this info in a database. I have done this and so far it suits my needs, but I'd like to make it more "professional" and would love to hear your suggestions.
The first code is twitter.py and the 2nd one is app.py
import requests
class TwitterAPI():
def __init__(self):
with open('token.ini') as ini:
self.token = ini.readline()
def get_follower(self, action, **kwargs):
params = kwargs.keys()
if 'user_id' not in params and 'screen_name' not in params:
raise TypeError('Incorrect parameters, expecting `user_id` or `screen_name`')
url = {
'list': 'https://api.twitter.com/1.1/followers/ids.json',
'info': 'https://api.twitter.com/1.1/users/show.json'
}
r = requests.get(
url[action],
params = kwargs,
headers = {'Authorization': 'Bearer ' + self.token},
)
return r.json() if r.status_code == 200 else r.status_code
import twitter
import sqlite3
from datetime import datetime
conn = sqlite3.connect('twitter_analytics.sqlite3')
c = conn.cursor()
api = twitter.TwitterAPI()
followers = api.get_follower(action='list', screen_name='Croves')
c.execute("INSERT INTO user(twitter_screen_name, created_at) VALUES(?, ?)", ('Croves', datetime.now()))
conn.commit()
user_id = c.lastrowid
for count, follower in enumerate(followers['ids']):
info = api.get_follower(action='info', user_id=follower)
c.execute("""
INSERT INTO user_followers(user_id, id_str, name, screen_name, location, url, description, protected, verified, followers_count, friends_count, profile_banner_url, profile_image_url_https, created_at)
VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
user_id,
info.get('id_str'),
info.get('name'),
info.get('screen_name'),
info.get('location'),
info.get('url'),
info.get('description'),
info.get('protected'),
info.get('verified'),
info.get('followers_count'),
info.get('friends_count'),
info.get('profile_banner_url'),
info.get('profile_image_url_https'),
datetime.now()
))
if count % 10 == 0:
conn.commit()
conn.commit()
conn.close()
1 Answer 1
A few suggestions:
PEP-8
Some selected excerpts from pep-8:
Avoid extraneous whitespace immediately inside parentheses, brackets or braces:
Don't use spaces around the = sign when used to indicate a keyword argument, or when used to indicate a default value for an unannotated function parameter
Surround top-level function and class definitions with two blank lines.
Use blank lines in functions, sparingly, to indicate logical sections.
Imports should be grouped in the following order:
- Standard library imports.
- Related third party imports.
- Local application/library specific imports.
You should put a blank line between each group of imports.
Token
Reading the token in the constructor of your API wrapper is a very bad behaviour. Your constructor should instead be initialised with the same, and the caller needs to provide proper tokens.
Functions
You currently have a single function doing 2 different tasks/actions. Instead, split it to do one thing each.
Return values/error handling
The return type of your API wrapper is inconsistent. In case of error, it just returns an integer value, whereas in the caller (app.py
) you are always assuming that the result will never be an integer (no errors ever?).
Raise an exception in the wrapper itself if the API fails, or the caller should have conditional check to validate the same.
sqlite3 bulk operation
The sqlite3 package has a method executemany
to perform bulk insert in a single pass. Check the docs for the same here.
if __name__
block
Put execution logic of your script inside the if __name__ == "__main__"
block. A more descriptive explanation can be checked on Stack Overflow.
Example rewrite
twitter.py
import requests
class TwitterAPI:
API_URL = "https://api.twitter.com/1.1"
def __init__(self, api_token: str):
self.token = api_token
self.headers = {
"Authorization": f"Bearer {self.token}"
}
def _request(self, url, **params) -> dict:
response = requests.get(
url,
params=params,
headers=self.headers,
)
if not response.ok:
raise Exception("API returned invalid response.")
return response.json()
def get_user_info(self, user_id: str) -> dict:
return self._request(
f"{self.API_URL}/users/show.json",
user_id=user_id
)
def get_followers(self, screen_name: str) -> dict:
return self._request(
f"{self.API_URL}/followers/ids.json",
screen_name=screen_name,
)
app.py
from datetime import datetime
import sqlite3
import twitter
SQLITE_DB = "twitter_analytics.sqlite3"
TWITTER_SCREEN_NAME = "Croves"
def initialise_sqlite(db_name: str):
connection = sqlite3.connect(db_name)
cursor = connection.cursor()
return connection, cursor
def get_api_token() -> str:
with open('token.ini') as ini:
return ini.readline()
def get_followers_info(api: twitter.TwitterAPI, follower_ids: list):
for user_id in follower_ids:
user_info = api.get_user_info(user_id=user_id)
yield [
user_info.get('id_str'),
user_info.get('name'),
user_info.get('screen_name'),
user_info.get('location'),
user_info.get('url'),
user_info.get('description'),
user_info.get('protected'),
user_info.get('verified'),
user_info.get('followers_count'),
user_info.get('friends_count'),
user_info.get('profile_banner_url'),
user_info.get('profile_image_url_https'),
]
def main():
token = get_api_token()
connection, cursor = initialise_sqlite(SQLITE_DB)
api = twitter.TwitterAPI(token)
followers = api.get_followers(screen_name=TWITTER_SCREEN_NAME)
cursor.execute(
"INSERT INTO user(twitter_screen_name, created_at) VALUES (?, ?)",
(TWITTER_SCREEN_NAME, datetime.now())
)
connection.commit()
user_id = cursor.lastrowid
user_followers_info = [
(user_id, *follower_info, datetime.now())
for follower_info in get_followers_info(api, followers["ids"])
]
cursor.executemany(
"""
INSERT INTO
user_followers(
user_id, id_str, name, screen_name, location, url,
description, protected, verified, followers_count,
friends_count, profile_banner_url, profile_image_url_https,
created_at
)
VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
user_followers_info,
)
connection.commit()
connection.close()
if __name__ == "__main__":
main()
Note
The above is an example rewrite. You can (and should) modify
- aggregating the followers' information
- a wrapper for interaction with sqlite3 etc.
- threading call for gathering information about followers in parallel
etc.
-
\$\begingroup\$ Thanks a lot for your comment. Your syntax is very different from mine
def get_user_info(self, user_id: str) -> dict:
what the arrow dict and the str mean? How can I learn this? \$\endgroup\$Croves– Croves2020年11月19日 22:19:13 +00:00Commented Nov 19, 2020 at 22:19 -
\$\begingroup\$ @Croves That's type hinting: devdocs.io/python~3.8/library/typing \$\endgroup\$hjpotter92– hjpotter922020年11月20日 12:04:44 +00:00Commented Nov 20, 2020 at 12:04