preparing for data extraction

This commit is contained in:
Khalim Conn-Kowlessar 2024-07-25 18:13:50 +01:00
parent c9d3bb6eec
commit 7b04e1edc7

View file

@ -44,8 +44,8 @@ def get_house_number(address: str) -> str | None:
class XmlParser:
epc = None
additional_data = None
epc = {}
additional_data = {}
uprn = None
# heating/emissions information
@ -72,12 +72,6 @@ class XmlParser:
floor_dimensions = None
# The value of the URPN tells us about the file type that we're parsing
UPRN_FILETYPE_MAP = {
0: "EPR",
-1: "RDSAP_EPR"
}
RATINGS_MAP = {
"0": "N/A",
"1": "Very Poor",
@ -122,14 +116,11 @@ class XmlParser:
self.filekey = filekey
self.surveyor_company = surveyor_company
# The xml parser is use to parse the EPC and EPR xmls and different file types will contain different
# information
# In order to identify the file type, we can look for the presence of the 'UPRN' tag
# If the UPRN tag is present, we can assume that the file is an EPC
# If the UPRN tag is not present, we can assume that the file is an EPR
self.get_uprn(uprn)
# We check if we have a lig xml or rdsap xml
# We look for the presence of the Schema-Version-Original tag
self.is_lig = len(self.xml.getElementsByTagName("Schema-Version-Original")) > 0
self.file_type = self.UPRN_FILETYPE_MAP.get(self.uprn, "EPC")
self.get_uprn(uprn)
@staticmethod
def get_node(node):
@ -145,10 +136,10 @@ class XmlParser:
return node_first_child.nodeValue
def run(self):
if self.file_type == "RDSAP_EPR":
# This file type contains just limited information compared to a regular EPR/EPC, and so we just exit
# unless we learn something else that determines that we need information from this file
if not self.is_lig:
return
self.get_assessor_details()
self.get_heating_and_emissions_data()