From 50fa3f7ad2c3923b2944b85c70d552fce3af9fcf Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 4 Sep 2024 12:08:46 +0100 Subject: [PATCH] handling parsing of fields in flats --- etl/xml_survey_extraction/XmlParser.py | 54 +++++++++++++++++++++----- 1 file changed, 45 insertions(+), 9 deletions(-) diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py index c39e8f95..2ea8659a 100644 --- a/etl/xml_survey_extraction/XmlParser.py +++ b/etl/xml_survey_extraction/XmlParser.py @@ -180,6 +180,39 @@ class XmlParser: # Put together all of the additional data we capture self.extract_additional_data() + def _parse_heat_loss_corridor(self): + hlc_lookup = {"2": "unheated corridor", "Unheated": "unheated corridor"} + if self.is_lig: + heat_loss_corridor = self.get_node_value('Heat-Loss-Corridor') + else: + # For some reason, this tag is spelt incorrectly in the rdsap xml + heat_loss_corridor = self.get_node_value('FlatCoridor') + return hlc_lookup[heat_loss_corridor] + + def _parse_heat_loss_corridor_length(self): + if self.is_lig: + return self.get_node_value('Unheated-Corridor-Length') + return self.get_node_value('FlatShelteredWallLength') + + def _parse_flat_storey_count(self): + # in the EPR the tag is Storeys + if self.is_lig: + storeys = None + else: + storeys = self.get_node_value('Storeys') + return storeys + + def _parse_flat_top_storey(self): + if self.is_lig: + return self.get_node_value('Top-Storey') + return None + + def _parse_floor_level(self): + if self.is_lig: + flat_details = self.xml.getElementsByTagName('SAP-Flat-Details')[0] + return flat_details.getElementsByTagName("Level")[0].firstChild.nodeValue + return None + def extract_epc(self): if self.floor_dimensions is None: @@ -191,15 +224,18 @@ class XmlParser: property_type = self.get_property_type() if property_type == "Flat": - raise NotImplementedError( - "Need to handle: heat-loss-corridor, unheated-corridor-length, flat-storey-count, flat-top-storey, " - "floor-level" - ) - heat_loss_corridor = "NO DATA!" - unheated_corridor_length = "" - flat_storey_count = "" - flat_top_storey = "" - floor_level = "NO DATA!" + heat_loss_corridor = self._parse_heat_loss_corridor() + unheated_corridor_length = self._parse_heat_loss_corridor_length() + flat_storey_count = self._parse_flat_storey_count() + flat_top_storey = self._parse_flat_top_storey() + floor_level = self._parse_floor_level() + + else: + heat_loss_corridor = "NO DATA!" + unheated_corridor_length = "" + flat_storey_count = "" + flat_top_storey = "" + floor_level = "NO DATA!" floor_height = np.mean([ float(x['room_height']) for x in self.floor_dimensions if