3
\$\begingroup\$

I have a set of XML documents that have a rather long list of tags and corresponding values that I am interested in. For simplicity, I decided to create a class that would let me get an object with all the relevant attributes parsed from the XML file. Having little experience with OOP in Python, I guess I might have made mistakes or suboptimal choices in terms of class design. Probably, my decision to have a separate utility class for XMLParser was a bad idea, but at least I couldn't come up with a better one.

To sum up, I would appreciate any type of criticism, advice or suggestions with regard to how the classes are designed, as well as code quality in general. Even though it seems that everything works as I expected, I feel like it's a good opportunity to learn new approaches, concepts and level-up my understanding of OOP in Python. Thanks in advance!

from xml.dom import minidom
class XMLDoc:
 def __init__(self, path_to_xml):
 self.tree = minidom.parse(path_to_xml)
 self.parser = XMLParser(self.tree)
 self.goods = self._make_goods_list()
 self.language = self.parser._get('language')
 self.service_type = self.parser._get('serviceType')
 self.trader_declaration_number = self.parser._get('traderDeclarationNumber')
 self.trader_reference = self.parser._get('traderReference')
 self.clearance_location = self.parser._get('clearanceLocation')
 self.declaration_time = self.parser._get('declarationTime')
 self.declaration_type = self.parser._get('declarationType')
 self.correction_code = self.parser._get('correctionCode')
 self.customs_office_number = self.parser._get('customsOfficeNumber')
 self.dispatch_country = self.parser._get('dispatchCountry')
 self.transport_in_container = self.parser._get("transportInContainer")
 self.previous_document = PreviousDocument(self.tree, self.parser)
 self.consignor = Consignor(self.tree,self.parser)
 self.consignee = Consignee(self.tree, self.parser)
 self.importer = Importer(self.tree, self.parser)
 self.transport_means = Transport(self.tree,self.parser)
 self.business = BusinessData(self.tree, self.parser)
 def __repr__(self):
 return f"XMLDoc {self.trader_reference}"
 def _make_goods_list(self):
 goods_list = []
 all_goods = self.parser.get_goods()
 for g in all_goods:
 fields_list = self.parser.filter_nested_items(g)
 goods_item = GoodsItem(fields_list,self.parser)
 goods_list.append(goods_item)
 return goods_list
class Consignor:
 def __init__(self, tree, parser):
 self.tree = tree
 self.parser = parser
 self.name = self.parser._get("consignor", "name")
 self.street = self.parser._get("consignor", "street")
 self.postal_code = self.parser._get("consignor", "postalCode")
 self.city = self.parser._get("consignor", "city")
 self.country = self.parser._get("consignor", "country")
 def __repr__(self):
 return f"Consignor object: {self.name}"
class Importer:
 def __init__(self, tree, parser):
 self.tree = tree
 self.parser = parser
 self.name = self.parser._get("importer", "name")
 self.street = self.parser._get("importer", "street")
 self.postal_code = self.parser._get("importer", "postalCode")
 self.city = self.parser._get("importer", "city")
 self.country = self.parser._get("importer", "country")
 self.trader_id = self.parser._get("importer", "traderIdentificationNumber")
 self.reference = self.parser._get("importer", "importerReference")
 def __repr__(self):
 return f"Importer object: {self.name}"
class Consignee:
 def __init__(self, tree, parser):
 self.tree = tree
 self.parser = parser
 self.name = self.parser._get("consignee", "name")
 self.street = self.parser._get("consignee", "street")
 self.postal_code = self.parser._get("consignee", 
 "postalCode")
 self.city = self.parser._get("consignee", "city")
 self.country = self.parser._get("consignee", "country")
 self.trader_id = self.parser._get("consignee", 
 "traderIdentificationNumber")
 def __repr__(self):
 return f"Consignee object: {self.name}"
class Transport:
 def __init__(self, tree, parser):
 self.tree = tree
 self.parser = parser
 self.mode = self.parser._get("transportMeans", 
 "transportMode")
 self.transportation_type = self.parser._get("transportMeans", 
 "transportationType")
 self.transportation_country = self.parser._get("transportMeans", 
 "transportationCountry")
 self.transportation_number = self.parser._get("transportMeans", 
 "transportationNumber")
class GoodsItem:
 def __init__(self, fields_list, parser):
 self.fields_list = fields_list
 self.parser = parser
 self.trader_item_id = self._get("traderItemID")
 self.description = self._get("description")
 self.commodity_code = self._get("commodityCode") 
 self.gross_mass = self._get("grossMass")
 self.net_mass = self._get("netMass")
 self.permit_obligation = self._get("permitObligation") 
 self.non_customs_law_obligation = self._get("nonCustomsLawObligation")
 self.origin_country = self._get("origin", "originCountry")
 self.origin_preference = self._get("origin", "preference")
 self.package_type = self._get("packaging", "packagingType")
 self.package_quantity = self._get("packaging", "quantity")
 self.package_reference_number = self._get("packaging", 
 "packagingReferenceNumber")
 # self.document_type = self._get("producedDocument","documentType")
 # self.document_reference_number = self._get("producedDocument","packagingReferenceNumber")
 # self.document_issue_date = self._get("producedDocument","issueDate")
 self.vat_value = self._get("valuation", "VATValue")
 self.vat_code = self._get("valuation", "VATCode")
 def _get_inner_field(self, external, field):
 matching_tags = self.parser.search_dom_list(self.fields_list, external)
 filtered_matching_tags = self.parser.filter_nested_items(matching_tags)
 inner_tag = self.parser.search_dom_list(filtered_matching_tags, field)
 return inner_tag
 def _get(self, field_name, inner_field=None):
 if not inner_field:
 return self.parser.search_dom_list(self.fields_list, 
 field_name).firstChild.nodeValue
 else:
 return self._get_inner_field(field_name, 
 inner_field).firstChild.nodeValue
class BusinessData:
 def __init__(self,tree,parser):
 self.tree = tree
 self.parser = parser
 self.incoterms = self.parser._get("business", "incoterms")
 self.customs_account = self.parser._get("business", 
 "customsAccount")
 self.vat_account = self.parser._get("business", "VATAccount")
 self.vat_number = self.parser._get("business", "VATNumber")
 self.vat_suffix = self.parser._get("business", "VATSuffix")
 self.invoice_currency_type = self.parser._get("business", 
 "invoiceCurrencyType")
class PreviousDocument:
 def __init__(self, tree, parser):
 self.tree = tree
 self.parser = parser
 self.doctype = self.parser._get("previousDocument", 
 "previousDocumentType")
 self.reference = self.parser._get("previousDocument", 
 "previousDocumentReference")
class XMLParser:
 def __init__(self, tree):
 self.tree = tree
 def get_nested_tag(self, parent_tag, element):
 reference = [item.firstChild.nodeValue for item in
 self.tree.getElementsByTagName(parent_tag)[0].childNodes 
 if item.nodeType != 3 and item.tagName == element]
 try:
 return reference[0]
 except IndexError:
 return None
 def get_single_tag(self, element):
 reference = self.tree.getElementsByTagName(element)[0].firstChild.nodeValue
 return reference
 def get_goods(self):
 goods = self.tree.getElementsByTagName('goodsItem')
 return goods
 def search_dom_list(self, dom_list, name):
 dom_list = self.filter_items(dom_list)
 for item in dom_list:
 if item.tagName == name:
 return item
 def filter_nested_items(self, goods_item):
 values = [item for item in goods_item.childNodes if item.nodeType != 3]
 return values
 def filter_items(self, items):
 return [item for item in items if item.nodeType != 3]
 def _get(self, field_name, inner_field=None):
 if not inner_field:
 return self.get_single_tag(field_name)
 else:
 return self.get_nested_tag(field_name, inner_field)
asked Mar 8, 2020 at 15:45
\$\endgroup\$

1 Answer 1

1
\$\begingroup\$

You could save yourself a lot of repetition by defining a generic class that takes elements of the tree and saves them. This way you can separate the class from the data needed to build it.

In one file you can have all your fields defined:

XMLDOC_FIELDS = {"language": ("language",),
 "service_type": ("serviceType",),
 "trader_declaration_number": ("traderDeclarationNumber",),
 "trader_reference": ("traderReference",),
 "clearance_location": ("clearanceLocation",),
 "declaration_time": ("declarationTime",),
 "declaration_type": ("declarationType",),
 "correction_code": ("correctionCode",),
 "customs_office_number": ("customsOfficeNumber",),
 "dispatch_country": ("dispatchCountry",),
 "transport_in_container": ("transportInContainer",)}
CONSIGNOR_FIELDS = {"name": ("consignor", "name"),
 "street": ("consignor", "street"),
 "postal_code": ("consignor", "postalCode"),
 "city": ("consignor", "city"),
 "country": ("consignor", "country")}
IMPORTER_FIELDS = {"name": ("importer", "name"),
 "street": ("importer", "street"),
 "postal_code": ("importer", "postalCode"),
 "city": ("importer", "city"),
 "country": ("importer", "country"),
 "trader_id": ("importer", "traderIdentificationNumber"),
 "reference": ("importer", "importerReference")}
CONSIGNEE_FIELDS = {"name": ("consignee", "name"),
 "street": ("consignee", "street"),
 "postal_code": ("consignee", "postalCode"),
 "city": ("consignee", "city"),
 "country": ("consignee", "country"),
 "trader_id": ("consignee", "traderIdentificationNumber")}
...

Which you can then use like this in your main file:

from fields import *
class Entity:
 def __init__(self, cls_name, parser, fields):
 self.cls_name = cls_name
 self.name = ""
 for field_name, path in fields.items():
 setattr(self, field_name, parser._get(*path))
 def __repr__(self):
 return f"{self.cls_name}: {self.name}"
class XMLDoc(Entity):
 def __init__(self, path_to_xml):
 self.tree = minidom.parse(path_to_xml)
 self.parser = XMLParser(self.tree)
 self.goods = self._make_goods_list()
 super().__init__("XMLDoc", self.parser, XMLDOC_FIELDS)
 self.name = self.trader_reference
 self.consignor = Entity("Consignor", self.parser, CONSIGNOR_FIELDS)
 self.importer = Entity("Importer", self.parser, IMPORTER_FIELDS)
 self.consignee = Entity("Consignee", self.parser, CONSIGNEE_FIELDS)
 ...

Only the GoodsItem class cannot be replaced with this, for that you probably still need a separate class.

answered Mar 9, 2020 at 14:10
\$\endgroup\$
3
  • \$\begingroup\$ Thank you. That definitely helps get rid of duplication. You added the name attribute to XMLDoc for some reason as self.trader_reference. Was that intentional? \$\endgroup\$ Commented Mar 9, 2020 at 14:50
  • 1
    \$\begingroup\$ @DonDraper: It was, because it will be picked up by the Entity.__repr__ method, in order to (almost) replicate your current different implementations of __repr__ for the different classes. \$\endgroup\$ Commented Mar 9, 2020 at 14:59
  • 1
    \$\begingroup\$ I got it. At first, I thought that you had renamed the attribute, but you just set the name attribute to avoid duplication in __repr__ implementation. Other than that, do you think that the code is clean enough and, well, Pythonic? \$\endgroup\$ Commented Mar 9, 2020 at 15:06

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.