I wrote a class for some MongoDB operations. So far I'm self-taught and mostly I follow the style from already existing code at my workplace (a startup).
We don't have a code-review culture, so I'm pretty new to the process, but I'm open fortips for everything from code optimization, better logging, better documentation, etc.
import logging
from logging.handlers import TimedRotatingFileHandler
from pymongo import MongoClient
from pymongo import errors
from warnings import warn
HOST = 'MONGODB_HOST'
DEFAULT_HOST = 'localhost'
PORT = 'MONGODB_PORT'
DEFAULT_PORT = '27017'
TIMEOUT = 'MONGODB_CONN_TIMEOUT'
DATABASE = 'MONGODB_DB'
COLLECTION = 'MONGODB_DB_CLIENT'
USERNAME = 'MONGODB_USERNAME'
PASSWORD = 'MONGODB_PASSWORD'
LOG_FILE = 'LOG_FILE'
class MongoDB:
"""
A class used to manage connections to MongoDB
...
Attributes
----------
conf : dict
a dictionary that has the configuration for class instance.
client : MongoClient
the MongoClient from pymongo
database : pymongo.database.Database
the database instance from MongoClient
collection : pymongo.collection.Collection
the collection instance from database
verbose: int
the verbosity of the class (default 5)
logger: logging.logger
an logger to log all database operations
Methods
-------
__init_logger__()
Initializes the logger.
connect_to_client( host=None, port=None, timeout=3000, username=None, password=None)
Connects to the Mongodb instance using MongoClient from PyMongo.
connect_to_database(database_name=None)
Connects to the collection in the mongodb instance.
connect_to_collection(collection_name=None)
Connects to the collection in the mongodb database.
perform_bulk_operations(list_operations_to_perform_in_bulk=None)
Executes the operations against MongoDB in bulk.
"""
def __init__(self, conf, verbose=5):
self.conf = conf
self.client = None
self.database = None
self.collection = None
self.verbose = verbose
self.logger = self.__init_logger__()
def __init_logger__(self) -> logging.getLogger():
"""This function initializes the logger."""
logging_file = self.conf.get(LOG_FILE, 'db.log')
logger = logging.getLogger(__name__)
log_formatter = logging.Formatter('%(asctime)s|%(name)-12s|%(levelname)-8s|%(message)s')
log_handler = TimedRotatingFileHandler(filename=f'{logging_file}', when='s', interval=10)
log_handler.setFormatter(log_formatter)
logger.addHandler(log_handler)
logger.setLevel(logging.DEBUG)
return logger
def connect_to_client(self, host=None, port=None, timeout=3000, username=None, password=None):
"""This function connects to the Mongodb instance using MongoClient from PyMongo.
1. If the parameters are not passed they are set to their default value.
2. Connects to the database. The parameters determine the connection profile.
Raises OperationFailure if authentication fails.
3. Testing connection.
Raises ServerSelectionTimeoutError, if cannot connect to the database in a timely manner.
Parameters
----------
host: str, optional
The ip address of the mongodb address. The default 'localhost'.
port: str, optional
The port of the mongodb instance. Default is '27017'.
timeout: int, optional
The number of seconds to try connecting to the MongoDB instance before timing out. Default is 3 seconds.
username: str, optional
The username for authentication. Default is None.
password: str, optional
The password for authentication. Default is None.
Raises
------
errors.ServerSelectionTimeoutError
If attempt to connect to the server times out.
errors.OperationFailure
If authentication with the server fails.
"""
# 1. If the parameters are not passed they are set to their default value.
host = self.conf.get(HOST, host)
port = self.conf.get(PORT, port)
timeout = self.conf.get(TIMEOUT, timeout)
username = self.conf.get(USERNAME, username)
password = self.conf.get(PASSWORD, password)
if host is None:
self.logger.warning(f"No \'{host}\' defined in configuration. Connecting to {DEFAULT_HOST}.")
host = DEFAULT_HOST
if port is None:
self.logger.warning(f"No \'{port}\' defined in configuration. Connecting to {DEFAULT_PORT}.")
port = DEFAULT_PORT
connection_host_and_port = f'{host}:{port}'
try:
# 2. Connects to the database. The parameters determine the connection profile.
# Raises OperationFailure if authentication fails.
if username is not None and password is not None:
self.logger.warning(f"Username and password are defined in configuration. "
f"Connecting with login credentials.")
self.client = MongoClient(
host=connection_host_and_port, # <-- IP and port go here
serverSelectionTimeoutMS=timeout, # 3 se+cond timeout
username=f"{username}",
password=f"{password}",
authSource='admin',
)
else:
self.logger.warning(f"No username or password defined in configuration. "
f"Connecting without login credentials.")
self.client = MongoClient(
host=connection_host_and_port, # <-- IP and port go here
serverSelectionTimeoutMS=timeout, # 3 se+cond timeout
)
# 3. Testing connection.
# Raises ServerSelectionTimeoutError, if cannot connect to the database in a timely manner.
self.client.server_info()
except errors.ServerSelectionTimeoutError as err:
self.logger.error(f'Connection to \'{connection_host_and_port}\' timed out.')
raise err
except errors.OperationFailure as err:
self.logger.error(f'Authentication to \'{connection_host_and_port}\' failed.')
print(err)
raise err
else:
self.logger.debug(f'Created connection to {connection_host_and_port}')
def connect_to_database(self, database_name=None):
"""This function connects to the database in the mongodb instance.
Parameters
----------
database_name: str, optional
The name of the database. The default 'None'.
Raises
------
ValueError
If database name is None.
"""
database_name = self.conf.get(DATABASE, database_name)
if database_name is not None:
if self.conf[DATABASE] not in self.client.database_names():
msg = f'Database \'{database_name}\' does not exist. Creating database.'
self.logger.warning(msg)
if self.verbose >= 1:
warn(msg)
self.database = self.client[database_name]
self.logger.debug(f'Connected to database: \'{database_name}\'')
else:
msg = 'No Database specified.'
self.logger.error(msg)
if self.verbose >= 1:
warn(msg)
raise ValueError(msg)
def connect_to_collection(self, collection_name=None):
"""This function connects to the collection in the mongodb database.
Parameters
----------
collection_name: str, optional
The name of the collection. The default 'None'.
Raises
------
ValueError
If collection name is None.
"""
collection_name = self.conf.get(COLLECTION, collection_name)
if collection_name is not None:
if collection_name not in self.database.collection_names():
msg = f'Collection \'{collection_name}\' does not exist. Creating collection.'
self.logger.warning(msg)
if self.verbose >= 1:
warn(msg)
self.collection = self.database[collection_name]
self.logger.debug(f'Connected to Collection: \'{collection_name}\'')
else:
msg = 'No Collection specified.'
self.logger.error(msg)
if self.verbose >= 1:
warn(msg)
raise ValueError(msg)
def perform_bulk_operations(self, list_operations_to_perform_in_bulk=None):
"""This function executes the operations against MongoDB in bulk.
Parameters
----------
list_operations_to_perform_in_bulk: list, optional
The list of operations to perform. The default 'None'.
Raises
------
ValueError
If requests is empty.
"""
if list_operations_to_perform_in_bulk is None:
list_operations_to_perform_in_bulk = []
if len(list_operations_to_perform_in_bulk) > 0:
try:
res = self.collection.bulk_write(list_operations_to_perform_in_bulk, ordered=False)
except errors.BulkWriteError as bwe:
self.logger.error(bwe.details['writeErrors'])
if self.verbose >= 1:
warn(bwe.details)
raise bwe
else:
self.logger.info(res.bulk_api_result)
return res
else:
msg = 'No operations to perform.'
self.logger.error(msg)
raise ValueError(msg)
-
\$\begingroup\$ @Zeta Thank you helping to edit my question. I appreciate the help. \$\endgroup\$Vaibhav yB Shah– Vaibhav yB Shah2020年07月11日 15:53:20 +00:00Commented Jul 11, 2020 at 15:53
2 Answers 2
Highlevel review
Dunder methods are reserved by the Python language and reserves the right to make any unofficial dunders not work in a future Python version. Don't name things
__init_logger__
when you can use a sunder,_init_logger_
, or private,_init_logger
, name instead.Your type hint
logging.getLogger
is neither a type nor does it return a type. You should uselogging.Logger
instead.A lot of your documentation is documenting the types of the values. Any document generator worth their salt will have a way to add the type hints to the output.
Your style is clear and mostly consistent. Your code looks clean. Nice job!
Lowlevel review
Logging
- Warning that a default value is used is unhelpful. You can write a decorator that logs
self.conf
and the arguments if this information is needed.
msg = 'No Database specified.' self.logger.error(msg) if self.verbose >= 1: warn(msg) raise ValueError(msg)
Logging exceptions without handling them is terrible. There are two ways this can go:
-
- The exception propagates and is unhandled by the rest of the code.
- Your code now errors and exits giving you a complete traceback of the problem.
- You don't look at the logs because you have the full traceback.
-
- The exception propagates and is handled by the rest of the code.
- You don't look at these in the logs because they've been handled.
This is bad as it clogs up your logs with useless information. After a while users of these logs will recognize that information at the error level is predominantly useless and will start to ignore the level completely. This will mean that your logs become redundant and fail to notify you / your team of actual problems.
This problem is also explained in the boy who cried wolf fable from around 500 BCE.
-
Don't use
warnings.warn
andraise
an exception at the same time is not constructive. Much like logging andraise
ing,raise
makes the other obsolete.Using
warnings.warn
andlogging.error
at the same time is not constructive. You're basically just callinglogging.error
twice.
The times you should use these are:
Exceptions: If you have entered a breaking state.
Warnings: If you need to notify the user of your library of potentially breaking state.
- If you are in the process of renaming a function, the old name should warn the user.
- If you have ambiguity in your syntax,
[b]foo [i]bar[/b] baz[/i]
. Is that foo bar baz, foo bar baz or foo bar baz?
Logging: To help track down bugs.
Other
Your try in
connect_to_client
is very large that it's unclear what you're even testing. You can just build a dictionary to pass with the extra keywords.Personally I think
connect_to_client
shouldn't even be defined on the class.You can have a key error in
connect_to_database
.if self.conf[DATABASE] not in self.client.database_names():
In
perform_bulk_operations
you take a valuelist_operations_to_perform_in_bulk
which defaults toNone
and then, which you change to[]
and then you error if it's empty. Change it from a default to a required argument.
import logging
from logging.handlers import TimedRotatingFileHandler
from pymongo import MongoClient
from pymongo import errors
from warnings import warn
HOST = 'MONGODB_HOST'
DEFAULT_HOST = 'localhost'
PORT = 'MONGODB_PORT'
DEFAULT_PORT = '27017'
TIMEOUT = 'MONGODB_CONN_TIMEOUT'
DATABASE = 'MONGODB_DB'
COLLECTION = 'MONGODB_DB_CLIENT'
USERNAME = 'MONGODB_USERNAME'
PASSWORD = 'MONGODB_PASSWORD'
LOG_FILE = 'LOG_FILE'
def _init_logger(log_file) -> logging.Logger:
log_formatter = logging.Formatter('%(asctime)s|%(name)-12s|%(levelname)-8s|%(message)s')
log_handler = TimedRotatingFileHandler(filename=f'{log_file}', when='s', interval=10)
log_handler.setFormatter(log_formatter)
logger = logging.getLogger(__name__)
logger.addHandler(log_handler)
logger.setLevel(logging.DEBUG)
return logger
def connect_to_client(self, host=DEFAULT_HOST, port=DEFAULT_PORT, timeout=3000, username=None, password=None):
kwargs = {}
if username is not None and password is not None:
kwargs = {
'username': f"{username}",
'password': f"{password}",
'authSource': 'admin'.
}
client = MongoClient(
host=f'{host}:{port}',
serverSelectionTimeoutMS=timeout,
**kwargs
)
client.server_info()
return client
class MongoDB:
def __init__(self, conf, verbose=5):
self.conf = conf
self.client = None
self.database = None
self.collection = None
self.verbose = verbose
self.logger = _init_logger(self.conf.get(LOG_FILE, 'db.log'))
def connect_to_client(self, host=DEFAULT_HOST, port=DEFAULT_PORT, timeout=3000, username=None, password=None):
host = self.conf.get(HOST, host)
port = self.conf.get(PORT, port)
self.client = connect_to_client(
host=host,
port=port,
timeout=self.conf.get(TIMEOUT, timeout),
username=self.conf.get(USERNAME, username),
password=self.conf.get(PASSWORD, password),
)
self.logger.debug(f'Created connection to {host}:{port}')
def connect_to_database(self, database_name=None):
database_name = self.conf.get(DATABASE, database_name)
if database_name is None:
raise ValueError('No Database specified.')
if self.conf[DATABASE] not in self.client.database_names():
self.logger.debug(f'Database \'{database_name}\' does not exist. Creating database.')
self.database = self.client[database_name]
self.logger.debug(f'Connected to database: \'{database_name}\'')
def connect_to_collection(self, collection_name=None):
collection_name = self.conf.get(COLLECTION, collection_name)
if collection_name is None:
raise ValueError('No Collection specified.')
if collection_name not in self.database.collection_names():
self.logger.debug(f'Collection \'{collection_name}\' does not exist. Creating collection.')
self.collection = self.database[collection_name]
self.logger.debug(f'Connected to Collection: \'{collection_name}\'')
def perform_bulk_operations(self, list_operations_to_perform_in_bulk):
if not list_operations_to_perform_in_bulk:
raise ValueError('No operations to perform.')
res = self.collection.bulk_write(list_operations_to_perform_in_bulk, ordered=False)
self.logger.debug(res.bulk_api_result)
return res
Conclusion
You are overzealous with your logging. And to be honest I see no reason to use a level above debug here.
Your class should probably not exist.
- The methods would be better in every way if you just changed them to functions.
- You are abusing classes, it's not fully initialized upon exiting
__init__
and you're abusing state.
-
\$\begingroup\$ Thank you so much for your feedback. I really appreciate this and will implement the necessary changes. +1. \$\endgroup\$Vaibhav yB Shah– Vaibhav yB Shah2020年07月11日 15:54:30 +00:00Commented Jul 11, 2020 at 15:54
Configurations should be stored in a configuration file(.cfg /.ini /.env) .
You can use Python Decouple for simple usecase without namespaces. If you want to use namespaces check out the inbuilt ConfigParser module. This is because if you share your code for review or to some forum to get your errors resolved you might accidentally give your secret information which can allow other users to access your data. This has happened a few times with me when I was sharing error logs and code on IRC chat.
-
\$\begingroup\$ Thank you for posting your insight. However I disagree 1 you can just move the constant (config values) into a .py and import it. 2 The OP clearly knows this and is defaulting to
None
and checking againstNone
asNone
is intended to be used. \$\endgroup\$2020年07月11日 10:39:38 +00:00Commented Jul 11, 2020 at 10:39 -
\$\begingroup\$ For 1) I 'm sure you can. I use config files in every django project. 2)I think the OP is comparing some objects to None. I can't say if exactly those objects are equated to None. Therefore I added a
can
. \$\endgroup\$Vishesh Mangla– Vishesh Mangla2020年07月11日 10:43:54 +00:00Commented Jul 11, 2020 at 10:43 -
\$\begingroup\$ pypi.org/project/python-decouple/#id4. This is a popular package which people use to store django's SECURITY_KEY and database username, passwords etc to be read by python. \$\endgroup\$Vishesh Mangla– Vishesh Mangla2020年07月11日 10:46:53 +00:00Commented Jul 11, 2020 at 10:46
-
\$\begingroup\$ ok I just removed what I was not sure about. \$\endgroup\$Vishesh Mangla– Vishesh Mangla2020年07月11日 10:48:38 +00:00Commented Jul 11, 2020 at 10:48
-
\$\begingroup\$ Thanks @Peilonrayz for making me help my answers more valuable to the site. \$\endgroup\$Vishesh Mangla– Vishesh Mangla2020年07月11日 10:56:48 +00:00Commented Jul 11, 2020 at 10:56