4
\$\begingroup\$

I wrote a class for some MongoDB operations. So far I'm self-taught and mostly I follow the style from already existing code at my workplace (a startup).

We don't have a code-review culture, so I'm pretty new to the process, but I'm open fortips for everything from code optimization, better logging, better documentation, etc.

import logging
from logging.handlers import TimedRotatingFileHandler
from pymongo import MongoClient
from pymongo import errors
from warnings import warn
HOST = 'MONGODB_HOST'
DEFAULT_HOST = 'localhost'
PORT = 'MONGODB_PORT'
DEFAULT_PORT = '27017'
TIMEOUT = 'MONGODB_CONN_TIMEOUT'
DATABASE = 'MONGODB_DB'
COLLECTION = 'MONGODB_DB_CLIENT'
USERNAME = 'MONGODB_USERNAME'
PASSWORD = 'MONGODB_PASSWORD'
LOG_FILE = 'LOG_FILE'
class MongoDB:
 """
 A class used to manage connections to MongoDB
 ...
 Attributes
 ----------
 conf : dict
 a dictionary that has the configuration for class instance.
 client : MongoClient
 the MongoClient from pymongo
 database : pymongo.database.Database
 the database instance from MongoClient
 collection : pymongo.collection.Collection
 the collection instance from database
 verbose: int
 the verbosity of the class (default 5)
 logger: logging.logger
 an logger to log all database operations
 Methods
 -------
 __init_logger__()
 Initializes the logger.
 connect_to_client( host=None, port=None, timeout=3000, username=None, password=None)
 Connects to the Mongodb instance using MongoClient from PyMongo.
 connect_to_database(database_name=None)
 Connects to the collection in the mongodb instance.
 connect_to_collection(collection_name=None)
 Connects to the collection in the mongodb database.
 perform_bulk_operations(list_operations_to_perform_in_bulk=None)
 Executes the operations against MongoDB in bulk.
 """
 def __init__(self, conf, verbose=5):
 self.conf = conf
 self.client = None
 self.database = None
 self.collection = None
 self.verbose = verbose
 self.logger = self.__init_logger__()
 def __init_logger__(self) -> logging.getLogger():
 """This function initializes the logger."""
 logging_file = self.conf.get(LOG_FILE, 'db.log')
 logger = logging.getLogger(__name__)
 log_formatter = logging.Formatter('%(asctime)s|%(name)-12s|%(levelname)-8s|%(message)s')
 log_handler = TimedRotatingFileHandler(filename=f'{logging_file}', when='s', interval=10)
 log_handler.setFormatter(log_formatter)
 logger.addHandler(log_handler)
 logger.setLevel(logging.DEBUG)
 return logger
 def connect_to_client(self, host=None, port=None, timeout=3000, username=None, password=None):
 """This function connects to the Mongodb instance using MongoClient from PyMongo.
 1. If the parameters are not passed they are set to their default value.
 2. Connects to the database. The parameters determine the connection profile.
 Raises OperationFailure if authentication fails.
 3. Testing connection.
 Raises ServerSelectionTimeoutError, if cannot connect to the database in a timely manner.
 Parameters
 ----------
 host: str, optional
 The ip address of the mongodb address. The default 'localhost'.
 port: str, optional
 The port of the mongodb instance. Default is '27017'.
 timeout: int, optional
 The number of seconds to try connecting to the MongoDB instance before timing out. Default is 3 seconds.
 username: str, optional
 The username for authentication. Default is None.
 password: str, optional
 The password for authentication. Default is None.
 Raises
 ------
 errors.ServerSelectionTimeoutError
 If attempt to connect to the server times out.
 errors.OperationFailure
 If authentication with the server fails.
 """
 # 1. If the parameters are not passed they are set to their default value.
 host = self.conf.get(HOST, host)
 port = self.conf.get(PORT, port)
 timeout = self.conf.get(TIMEOUT, timeout)
 username = self.conf.get(USERNAME, username)
 password = self.conf.get(PASSWORD, password)
 
 if host is None:
 self.logger.warning(f"No \'{host}\' defined in configuration. Connecting to {DEFAULT_HOST}.")
 host = DEFAULT_HOST
 if port is None:
 self.logger.warning(f"No \'{port}\' defined in configuration. Connecting to {DEFAULT_PORT}.")
 port = DEFAULT_PORT
 connection_host_and_port = f'{host}:{port}'
 
 try:
 # 2. Connects to the database. The parameters determine the connection profile.
 # Raises OperationFailure if authentication fails.
 if username is not None and password is not None:
 self.logger.warning(f"Username and password are defined in configuration. "
 f"Connecting with login credentials.")
 self.client = MongoClient(
 host=connection_host_and_port, # <-- IP and port go here
 serverSelectionTimeoutMS=timeout, # 3 se+cond timeout
 username=f"{username}",
 password=f"{password}",
 authSource='admin',
 )
 else:
 self.logger.warning(f"No username or password defined in configuration. "
 f"Connecting without login credentials.")
 self.client = MongoClient(
 host=connection_host_and_port, # <-- IP and port go here
 serverSelectionTimeoutMS=timeout, # 3 se+cond timeout
 )
 # 3. Testing connection.
 # Raises ServerSelectionTimeoutError, if cannot connect to the database in a timely manner.
 self.client.server_info()
 except errors.ServerSelectionTimeoutError as err:
 self.logger.error(f'Connection to \'{connection_host_and_port}\' timed out.')
 raise err
 except errors.OperationFailure as err:
 self.logger.error(f'Authentication to \'{connection_host_and_port}\' failed.')
 print(err)
 raise err
 else:
 self.logger.debug(f'Created connection to {connection_host_and_port}')
 def connect_to_database(self, database_name=None):
 """This function connects to the database in the mongodb instance.
 Parameters
 ----------
 database_name: str, optional
 The name of the database. The default 'None'.
 Raises
 ------
 ValueError
 If database name is None.
 """
 database_name = self.conf.get(DATABASE, database_name)
 if database_name is not None:
 if self.conf[DATABASE] not in self.client.database_names():
 msg = f'Database \'{database_name}\' does not exist. Creating database.'
 self.logger.warning(msg)
 if self.verbose >= 1:
 warn(msg)
 self.database = self.client[database_name]
 self.logger.debug(f'Connected to database: \'{database_name}\'')
 else:
 msg = 'No Database specified.'
 self.logger.error(msg)
 if self.verbose >= 1:
 warn(msg)
 raise ValueError(msg)
 
 def connect_to_collection(self, collection_name=None):
 """This function connects to the collection in the mongodb database.
 Parameters
 ----------
 collection_name: str, optional
 The name of the collection. The default 'None'.
 Raises
 ------
 ValueError
 If collection name is None.
 """
 collection_name = self.conf.get(COLLECTION, collection_name)
 if collection_name is not None:
 if collection_name not in self.database.collection_names():
 msg = f'Collection \'{collection_name}\' does not exist. Creating collection.'
 self.logger.warning(msg)
 if self.verbose >= 1:
 warn(msg)
 self.collection = self.database[collection_name]
 self.logger.debug(f'Connected to Collection: \'{collection_name}\'')
 else:
 msg = 'No Collection specified.'
 self.logger.error(msg)
 if self.verbose >= 1:
 warn(msg)
 raise ValueError(msg)
 def perform_bulk_operations(self, list_operations_to_perform_in_bulk=None):
 """This function executes the operations against MongoDB in bulk.
 Parameters
 ----------
 list_operations_to_perform_in_bulk: list, optional 
 The list of operations to perform. The default 'None'.
 Raises
 ------
 ValueError
 If requests is empty.
 """
 if list_operations_to_perform_in_bulk is None:
 list_operations_to_perform_in_bulk = []
 if len(list_operations_to_perform_in_bulk) > 0:
 try:
 res = self.collection.bulk_write(list_operations_to_perform_in_bulk, ordered=False)
 except errors.BulkWriteError as bwe:
 self.logger.error(bwe.details['writeErrors'])
 if self.verbose >= 1:
 warn(bwe.details)
 raise bwe
 else:
 self.logger.info(res.bulk_api_result)
 return res
 else:
 msg = 'No operations to perform.'
 self.logger.error(msg)
 raise ValueError(msg)
Zeta
19.6k2 gold badges57 silver badges90 bronze badges
asked Jul 11, 2020 at 2:30
\$\endgroup\$
1
  • \$\begingroup\$ @Zeta Thank you helping to edit my question. I appreciate the help. \$\endgroup\$ Commented Jul 11, 2020 at 15:53

2 Answers 2

5
\$\begingroup\$

Highlevel review

  • Dunder methods are reserved by the Python language and reserves the right to make any unofficial dunders not work in a future Python version. Don't name things __init_logger__ when you can use a sunder, _init_logger_, or private, _init_logger, name instead.

  • Your type hint logging.getLogger is neither a type nor does it return a type. You should use logging.Logger instead.

  • A lot of your documentation is documenting the types of the values. Any document generator worth their salt will have a way to add the type hints to the output.

  • Your style is clear and mostly consistent. Your code looks clean. Nice job!

Lowlevel review

Logging

  • Warning that a default value is used is unhelpful. You can write a decorator that logs self.conf and the arguments if this information is needed.
msg = 'No Database specified.'
self.logger.error(msg)
if self.verbose >= 1:
 warn(msg)
raise ValueError(msg)
  • Logging exceptions without handling them is terrible. There are two ways this can go:

      • The exception propagates and is unhandled by the rest of the code.
      • Your code now errors and exits giving you a complete traceback of the problem.
      • You don't look at the logs because you have the full traceback.
      • The exception propagates and is handled by the rest of the code.
      • You don't look at these in the logs because they've been handled.

    This is bad as it clogs up your logs with useless information. After a while users of these logs will recognize that information at the error level is predominantly useless and will start to ignore the level completely. This will mean that your logs become redundant and fail to notify you / your team of actual problems.

    This problem is also explained in the boy who cried wolf fable from around 500 BCE.

  • Don't use warnings.warn and raise an exception at the same time is not constructive. Much like logging and raiseing, raise makes the other obsolete.

  • Using warnings.warn and logging.error at the same time is not constructive. You're basically just calling logging.error twice.

The times you should use these are:

  • Exceptions: If you have entered a breaking state.

  • Warnings: If you need to notify the user of your library of potentially breaking state.

    • If you are in the process of renaming a function, the old name should warn the user.
    • If you have ambiguity in your syntax, [b]foo [i]bar[/b] baz[/i]. Is that foo bar baz, foo bar baz or foo bar baz?
  • Logging: To help track down bugs.

Other

  • Your try in connect_to_client is very large that it's unclear what you're even testing. You can just build a dictionary to pass with the extra keywords.

  • Personally I think connect_to_client shouldn't even be defined on the class.

  • You can have a key error in connect_to_database.

    if self.conf[DATABASE] not in self.client.database_names():
    
  • In perform_bulk_operations you take a value list_operations_to_perform_in_bulk which defaults to None and then, which you change to [] and then you error if it's empty. Change it from a default to a required argument.

import logging
from logging.handlers import TimedRotatingFileHandler
from pymongo import MongoClient
from pymongo import errors
from warnings import warn
HOST = 'MONGODB_HOST'
DEFAULT_HOST = 'localhost'
PORT = 'MONGODB_PORT'
DEFAULT_PORT = '27017'
TIMEOUT = 'MONGODB_CONN_TIMEOUT'
DATABASE = 'MONGODB_DB'
COLLECTION = 'MONGODB_DB_CLIENT'
USERNAME = 'MONGODB_USERNAME'
PASSWORD = 'MONGODB_PASSWORD'
LOG_FILE = 'LOG_FILE'
def _init_logger(log_file) -> logging.Logger:
 log_formatter = logging.Formatter('%(asctime)s|%(name)-12s|%(levelname)-8s|%(message)s')
 log_handler = TimedRotatingFileHandler(filename=f'{log_file}', when='s', interval=10)
 log_handler.setFormatter(log_formatter)
 logger = logging.getLogger(__name__)
 logger.addHandler(log_handler)
 logger.setLevel(logging.DEBUG)
 return logger
def connect_to_client(self, host=DEFAULT_HOST, port=DEFAULT_PORT, timeout=3000, username=None, password=None):
 kwargs = {}
 if username is not None and password is not None:
 kwargs = {
 'username': f"{username}",
 'password': f"{password}",
 'authSource': 'admin'.
 }
 client = MongoClient(
 host=f'{host}:{port}',
 serverSelectionTimeoutMS=timeout,
 **kwargs
 )
 client.server_info()
 return client
class MongoDB:
 def __init__(self, conf, verbose=5):
 self.conf = conf
 self.client = None
 self.database = None
 self.collection = None
 self.verbose = verbose
 self.logger = _init_logger(self.conf.get(LOG_FILE, 'db.log'))
 def connect_to_client(self, host=DEFAULT_HOST, port=DEFAULT_PORT, timeout=3000, username=None, password=None):
 host = self.conf.get(HOST, host)
 port = self.conf.get(PORT, port)
 self.client = connect_to_client(
 host=host,
 port=port,
 timeout=self.conf.get(TIMEOUT, timeout),
 username=self.conf.get(USERNAME, username),
 password=self.conf.get(PASSWORD, password),
 )
 self.logger.debug(f'Created connection to {host}:{port}')
 def connect_to_database(self, database_name=None):
 database_name = self.conf.get(DATABASE, database_name)
 if database_name is None:
 raise ValueError('No Database specified.')
 
 if self.conf[DATABASE] not in self.client.database_names():
 self.logger.debug(f'Database \'{database_name}\' does not exist. Creating database.')
 self.database = self.client[database_name]
 self.logger.debug(f'Connected to database: \'{database_name}\'')
 
 def connect_to_collection(self, collection_name=None):
 collection_name = self.conf.get(COLLECTION, collection_name)
 if collection_name is None:
 raise ValueError('No Collection specified.')
 if collection_name not in self.database.collection_names():
 self.logger.debug(f'Collection \'{collection_name}\' does not exist. Creating collection.')
 self.collection = self.database[collection_name]
 self.logger.debug(f'Connected to Collection: \'{collection_name}\'')
 def perform_bulk_operations(self, list_operations_to_perform_in_bulk):
 if not list_operations_to_perform_in_bulk:
 raise ValueError('No operations to perform.')
 
 res = self.collection.bulk_write(list_operations_to_perform_in_bulk, ordered=False)
 self.logger.debug(res.bulk_api_result)
 return res

Conclusion

  • You are overzealous with your logging. And to be honest I see no reason to use a level above debug here.

  • Your class should probably not exist.

    1. The methods would be better in every way if you just changed them to functions.
    2. You are abusing classes, it's not fully initialized upon exiting __init__ and you're abusing state.
answered Jul 11, 2020 at 12:15
\$\endgroup\$
1
  • \$\begingroup\$ Thank you so much for your feedback. I really appreciate this and will implement the necessary changes. +1. \$\endgroup\$ Commented Jul 11, 2020 at 15:54
1
\$\begingroup\$

Configurations should be stored in a configuration file(.cfg /.ini /.env) .

You can use Python Decouple for simple usecase without namespaces. If you want to use namespaces check out the inbuilt ConfigParser module. This is because if you share your code for review or to some forum to get your errors resolved you might accidentally give your secret information which can allow other users to access your data. This has happened a few times with me when I was sharing error logs and code on IRC chat.

answered Jul 11, 2020 at 4:54
\$\endgroup\$
6
  • \$\begingroup\$ Thank you for posting your insight. However I disagree 1 you can just move the constant (config values) into a .py and import it. 2 The OP clearly knows this and is defaulting to None and checking against None as None is intended to be used. \$\endgroup\$ Commented Jul 11, 2020 at 10:39
  • \$\begingroup\$ For 1) I 'm sure you can. I use config files in every django project. 2)I think the OP is comparing some objects to None. I can't say if exactly those objects are equated to None. Therefore I added a can. \$\endgroup\$ Commented Jul 11, 2020 at 10:43
  • \$\begingroup\$ pypi.org/project/python-decouple/#id4. This is a popular package which people use to store django's SECURITY_KEY and database username, passwords etc to be read by python. \$\endgroup\$ Commented Jul 11, 2020 at 10:46
  • \$\begingroup\$ ok I just removed what I was not sure about. \$\endgroup\$ Commented Jul 11, 2020 at 10:48
  • \$\begingroup\$ Thanks @Peilonrayz for making me help my answers more valuable to the site. \$\endgroup\$ Commented Jul 11, 2020 at 10:56

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.