I am writing code to implement data sync from multiple sources. I want code to be maintainable to add more data sources if required.
Here is the pattern I wish to follow:
class Source1Client:
def get_domain(self):
pass
def get_endpoint(self):
pass
def get_headers(self):
pass
def make_post_request(self, url, data, headers):
response = requests.post(url, data, headers=headers)
return response
def make_get_request(self, url, data):
response = requests.get(url, data)
return response
class Source2Client:
def get_domain(self):
pass
def get_endpoint(self):
pass
def get_headers(self):
pass
def make_post_request(self, url, data, headers):
response = requests.post(url, data, headers=headers)
return response
def make_get_request(self, url, data):
response = requests.get(url, data)
return response
def call_stored_procedure(self, query):
pass
def run_query(self, query):
pass
class BaseSync:
def convert_data_to_serialized_objects(self, data, serializer, many=True):
db_object, db_object_list = {}, []
for i in data:
for column, value in i.items():
db_object = {**db_object, **{column: value}}
db_object_list.append(db_object)
schema = serializer(many=many)
m = schema.dump(db_object_list)
return m.data
def get_serialized_data(
self,source=None, query=None, proc=None, serializer=None, many=True, proc_kwargs={},
endpoint=None, method=None, data=None, headers=None
):
if source == "source1":
client = Source1Client()
if source == "source2":
client = Source2Client()
if proc:
formatted_query = proc.format(**proc_kwargs)
raw_data = cllient.call_stored_procedure(formatted_query)
if query:
raw_data = client.run_query(query)
else:
if method == "get":
raw_data == client.make_get_request(endpoint, data)
if method == "post":
raw_data == client.make_post_request(endpoint, data, headers)
serialized_data = self.convert_data_to_serialized_objects(
raw_data, serializer, many=many
)
return serialized_data
class DataSync(BaseSync):
def sync_source1_data():
source1_data = self.get_serialized_data(source="source1", endpoint="source1url/path")
# other operations
def sync_source2_data():
source1_data = self.get_serialized_data(source="source2", endpoint="source1url/path")
# other operations
In my code get_serialized_data
is using multiple if-else cases, which will make code dirty and hard to read if more sources are added. Also, it has too many parameters, which is against clean code principles.
What can be a better implementation to make code cleaner, maintainable and readable?
1 Answer 1
Use __init__ to handle bunch of arguments, then your self
will take of passing arguments to your respective definition - that's the good way to use OOP in python.
class Foo:
def __init__(self, value_a, value_b):
self.a = value_a
self.b = value_b
def func(self):
pass
To avoid multiple if else you can have list or dict and from there you can access. Hope this helps !!