diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py index 53f7e859..d14dafc4 100644 --- a/etl/xml_survey_extraction/XmlParser.py +++ b/etl/xml_survey_extraction/XmlParser.py @@ -115,11 +115,12 @@ class XmlParser: "2": "Single" } - def __init__(self, file, filekey, uprn=None): + def __init__(self, file, filekey, surveyor_company, uprn=None): file.seek(0) # Ensure the file pointer is at the beginning xml_string = file.read().decode('utf-8') self.xml = parseString(xml_string) self.filekey = filekey + self.surveyor_company = surveyor_company # The xml parser is use to parse the EPC and EPR xmls and different file types will contain different # information @@ -349,9 +350,21 @@ class XmlParser: self.insulation_wall_area = self.get_insulation_wall_area() + boolean_lookup = { + "true": True, + "false": False, + "Y": True, + "N": False + } + + cylinder_insulation_type = { + "1": "Foam", + } + self.additional_data = { "file_location": self.filekey, "surveyor_name": self.surveyor_name, + "surveyor_company": self.surveyor_company, "space_heating_kwh": self.space_heating_kwh, "water_heating_kwh": self.water_heating_kwh, # "heating_system": self.heating_system, @@ -367,6 +380,10 @@ class XmlParser: "rooms_with_mixer_shower_no_bath": self.get_node_value('Rooms-With-Mixer-Shower-No-Bath'), "room_with_bath_and_mixer_shower": self.get_node_value('Rooms-With-Bath-And-Mixer-Shower'), "percent_draftproofed": self.get_node_value('Percent-Draughtproofed'), + "has_hot_water_cylinder": boolean_lookup[self.get_node_value('Has-Hot-Water-Cylinder')], + "cylinder_insulation_type": cylinder_insulation_type[self.get_node_value('Cylinder-Insulation-Type')], + "cylinder_insulation_thickness": self.get_node_value('Cylinder-Insulation-Thickness'), + "cylinder_thermostat": boolean_lookup[self.get_node_value('Cylinder-Thermostat')], } def get_node_value(self, tag_name): diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py index b3500e71..92048a68 100644 --- a/etl/xml_survey_extraction/app.py +++ b/etl/xml_survey_extraction/app.py @@ -48,7 +48,12 @@ def main(): for xml in xmls: xml_data = read_from_s3(bucket_name=BUCKET, s3_file_name=xml) xml_data_io = BytesIO(xml_data) - xml_parser = XmlParser(file=xml_data_io, filekey=os.path.join(f"s3://{BUCKET}", xml), uprn=uprn) + xml_parser = XmlParser( + file=xml_data_io, + filekey=os.path.join(f"s3://{BUCKET}", xml), + uprn=uprn, + surveyor_company=SURVEYORS, + ) xml_parser.run() logger.info(f"Extracted data from {xml}")