diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py index dccc0a9f..ef18c6db 100644 --- a/etl/xml_survey_extraction/XmlParser.py +++ b/etl/xml_survey_extraction/XmlParser.py @@ -198,9 +198,6 @@ class XmlParser: # This file type contains just limited information compared to a regular EPR/EPC, and so we just exit # unless we learn something else that determines that we need information from this file return - self.get_sap() - self.get_property_address() - self.get_dates() self.get_assessor_details() self.get_heating_and_emissions_data() @@ -239,6 +236,10 @@ class XmlParser: floor_level = "NO DATA!" self.epc = { + "uprn": self.uprn, + "property-type": property_type, + **self.get_sap(), + **self.get_property_address(), "low-energy-fixed-light-count": self.get_node_value('Low-Energy-Fixed-Lighting-Outlets-Count'), # TODO: Needs to be done more carefully # "floor-height" = self.get_node_value_from_floor_dimensions('Room-Height'), @@ -260,9 +261,8 @@ class XmlParser: "sheating-energy-eff": self.RATINGS_MAP[ self.get_property_summary_value('Secondary-Heating', 'Energy-Efficiency-Rating'), ], - # TODO: Doesn't seem to be included in the xml - # "local-authority": self.get_node_value('Local-Authority'), - "local-authority-label": self.get_node_value('Local-Authority-Label'), + "local-authority": "", # Not included in the xml + "local-authority-label": "", "fixed-lighting-outlets-count": self.get_node_value('Fixed-Lighting-Outlets-Count'), # TODO: Doesn't seem to be included in the xml # "energy-tariff": self.get_node_value('Energy-Tariff'), @@ -346,6 +346,13 @@ class XmlParser: "low-energy-lighting": self.get_node_value('Low-Energy-Lighting'), "walls-description": self.get_property_summary_value('Wall', 'Description'), "hotwater-description": self.get_property_summary_value('Hot-Water', 'Description'), + "co2-emissions-current": self.get_node_value('CO2-Emissions-Current'), + "heating-cost-current": self.get_node_value('Heating-Cost-Current'), + "hot-water-cost-current": self.get_node_value('Hot-Water-Cost-Current'), + "lighting-cost-current": self.get_node_value('Lighting-Cost-Current'), + "energy-consumption-current": self.get_node_value('Energy-Consumption-Current'), + "lodgement-date": self.get_node_value('Inspection-Date'), + } def get_node_value(self, tag_name): @@ -410,8 +417,11 @@ class XmlParser: sap_score = self.xml.getElementsByTagName('Energy-Rating-Current') sap_score = int(sap_score[0].firstChild.nodeValue) epc_rating = sap_to_epc(sap_score) - self.current_energy_efficiency = str(sap_score) - self.current_energy_rating = epc_rating + + return { + "current-energy-efficiency": str(sap_score), + "current-energy-rating": epc_rating + } def get_heating_and_emissions_data(self): """ @@ -431,19 +441,6 @@ class XmlParser: self.water_heating_kwh = self.xml.getElementsByTagName('Water-Heating')[0].firstChild.nodeValue - self.co2_emissions_current = self.xml.getElementsByTagName('CO2-Emissions-Current')[0].firstChild.nodeValue - self.heating_cost_current = self.xml.getElementsByTagName('Heating-Cost-Current')[0].firstChild.nodeValue - self.hot_water_cost_current = self.xml.getElementsByTagName('Hot-Water-Cost-Current')[0].firstChild.nodeValue - self.lighting_cost_current = self.xml.getElementsByTagName('Lighting-Cost-Current')[0].firstChild.nodeValue - - # Energy consumption - self.energy_consumption_current = ( - self.xml.getElementsByTagName("Energy-Consumption-Current")[0].firstChild.nodeValue - ) - self.energy_consumption_potential = ( - self.xml.getElementsByTagName("Energy-Consumption-Potential")[0].firstChild.nodeValue - ) - def get_detailed_heating_specs(self): """ Given the heating data that is found in the tag, we extract the detailed about the heating @@ -668,19 +665,23 @@ class XmlParser: property_tag = self.xml.getElementsByTagName("Property")[0] - self.address1 = self.get_node(property_tag.getElementsByTagName("Address-Line-1")[0]) - self.address2 = self.get_node(property_tag.getElementsByTagName("Address-Line-2")[0]) - self.address3 = self.get_node(property_tag.getElementsByTagName("Address-Line-3")[0]) - self.posttown = self.get_node(property_tag.getElementsByTagName("Post-Town")[0]) - self.postcode = self.get_node(property_tag.getElementsByTagName("Postcode")[0]) - self.address = ", ".join( + address1 = self.get_node(property_tag.getElementsByTagName("Address-Line-1")[0]) + address2 = self.get_node(property_tag.getElementsByTagName("Address-Line-2")[0]) + address3 = self.get_node(property_tag.getElementsByTagName("Address-Line-3")[0]) + posttown = self.get_node(property_tag.getElementsByTagName("Post-Town")[0]) + postcode = self.get_node(property_tag.getElementsByTagName("Postcode")[0]) + address = ", ".join( [x for x in [self.address1, self.address2, self.address3, self.posttown, self.postcode] if x is not None] ) - def get_dates(self): - self.survey_date = ( - self.xml.getElementsByTagName("Inspection-Date")[0].firstChild.nodeValue - ) + return { + "address1": address1, + "address2": address2, + "address3": address3, + "posttown": posttown, + "postcode": postcode, + "address": address + } def get_property_dimensions(self): """