completed extraction of data

This commit is contained in:
Khalim Conn-Kowlessar 2024-07-25 18:09:37 +01:00
parent bc84ed2c2a
commit c9d3bb6eec
2 changed files with 24 additions and 2 deletions

View file

@ -115,11 +115,12 @@ class XmlParser:
"2": "Single"
}
def __init__(self, file, filekey, uprn=None):
def __init__(self, file, filekey, surveyor_company, uprn=None):
file.seek(0) # Ensure the file pointer is at the beginning
xml_string = file.read().decode('utf-8')
self.xml = parseString(xml_string)
self.filekey = filekey
self.surveyor_company = surveyor_company
# The xml parser is use to parse the EPC and EPR xmls and different file types will contain different
# information
@ -349,9 +350,21 @@ class XmlParser:
self.insulation_wall_area = self.get_insulation_wall_area()
boolean_lookup = {
"true": True,
"false": False,
"Y": True,
"N": False
}
cylinder_insulation_type = {
"1": "Foam",
}
self.additional_data = {
"file_location": self.filekey,
"surveyor_name": self.surveyor_name,
"surveyor_company": self.surveyor_company,
"space_heating_kwh": self.space_heating_kwh,
"water_heating_kwh": self.water_heating_kwh,
# "heating_system": self.heating_system,
@ -367,6 +380,10 @@ class XmlParser:
"rooms_with_mixer_shower_no_bath": self.get_node_value('Rooms-With-Mixer-Shower-No-Bath'),
"room_with_bath_and_mixer_shower": self.get_node_value('Rooms-With-Bath-And-Mixer-Shower'),
"percent_draftproofed": self.get_node_value('Percent-Draughtproofed'),
"has_hot_water_cylinder": boolean_lookup[self.get_node_value('Has-Hot-Water-Cylinder')],
"cylinder_insulation_type": cylinder_insulation_type[self.get_node_value('Cylinder-Insulation-Type')],
"cylinder_insulation_thickness": self.get_node_value('Cylinder-Insulation-Thickness'),
"cylinder_thermostat": boolean_lookup[self.get_node_value('Cylinder-Thermostat')],
}
def get_node_value(self, tag_name):

View file

@ -48,7 +48,12 @@ def main():
for xml in xmls:
xml_data = read_from_s3(bucket_name=BUCKET, s3_file_name=xml)
xml_data_io = BytesIO(xml_data)
xml_parser = XmlParser(file=xml_data_io, filekey=os.path.join(f"s3://{BUCKET}", xml), uprn=uprn)
xml_parser = XmlParser(
file=xml_data_io,
filekey=os.path.join(f"s3://{BUCKET}", xml),
uprn=uprn,
surveyor_company=SURVEYORS,
)
xml_parser.run()
logger.info(f"Extracted data from {xml}")