From 40372c70d504005d50c317da5d09edaf0c602b28 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 13 Jun 2023 09:49:45 +0100 Subject: [PATCH] Take most recent epc if multiple found in the Property class --- epc_data/Property.py | 8 +- epc_data/app.py | 6 +- epc_data/attributes/WallAttributes.py | 192 ++++++++++++++++++++++++++ epc_data/requirements.txt | 3 +- epc_data/temp_inputs.py | 24 ++++ 5 files changed, 228 insertions(+), 5 deletions(-) create mode 100644 epc_data/attributes/WallAttributes.py diff --git a/epc_data/Property.py b/epc_data/Property.py index da963ef1..6706fce8 100644 --- a/epc_data/Property.py +++ b/epc_data/Property.py @@ -26,6 +26,12 @@ class Property: response = self.epc_client.domestic.search(params={"address": self.address1, "postcode": self.postcode}) if len(response["rows"]) > 1: - raise Exception("More than one result found for this address - investigate me") + newest_response = [ + r for r in response["rows"] if + r["inspection-date"] == max([x["inspection-date"] for x in response["rows"]]) + ] + if len(newest_response) > 1: + raise Exception("More than one result found for this address - investigate me") + response["rows"] = newest_response self.data = response["rows"][0] diff --git a/epc_data/app.py b/epc_data/app.py index 4ed726c0..ace7243f 100644 --- a/epc_data/app.py +++ b/epc_data/app.py @@ -40,11 +40,11 @@ def handler(): cleaner.clean() # For testing: - from epc_data.attributes.FloorAttributes import FloorAttributes - descriptions = {x["floor-description"] for x in data} + from epc_data.attributes.WallAttributes import WallAttributes + descriptions = {x["walls-description"] for x in data} out = [] for description in descriptions: - res = FloorAttributes(description).clean() + res = WallAttributes(description).clean() out.append( { "original_description": description, diff --git a/epc_data/attributes/WallAttributes.py b/epc_data/attributes/WallAttributes.py new file mode 100644 index 00000000..4b93f894 --- /dev/null +++ b/epc_data/attributes/WallAttributes.py @@ -0,0 +1,192 @@ +from epc_data.attributes.attribute_utils import extract_thermal_transmittence +from typing import Optional +import nltk +from nltk.corpus import stopwords + + +class WallAttributes: + IGNORE_STOP_WORDS = ["no"] + + def __init__(self, description: str): + """ + :param description: Description of the walls. + """ + self.description: str = description + + # TODO: Remove this out of here + nltk.download('stopwords', quiet=True) + self.stop_words = stopwords.words('english') + self.stop_words = [word for word in self.stop_words if word not in self.IGNORE_STOP_WORDS] + + def clean(self): + """ + + :return: + """ + + description_lower = self.description.lower().strip() + + thermal_transmittence: Optional[float] = None + thermal_transmittence_unit: Optional[str] = None + + is_cavity_wall: bool = "cavity wall" in description_lower + has_filled_cavity: bool = "filled cavity" in description_lower + is_solid_brick: bool = "solid brick" in description_lower + # TODO: Find out what this means - is_system_built + is_system_built: bool = "system built" in description_lower + is_timber_frame: bool = "timber frame" in description_lower + is_granite_or_whinstone: bool = "granite or whinstone" in description_lower + # The "as built" description indicates that these factors are based on the original construction + # specifications and materials. + as_built = "as built" in description_lower + assumed = "assumed" in description_lower + + insulation_thickness = None + if "thermal transmittance" in description_lower: + thermal_transmittence, thermal_transmittence_unit = extract_thermal_transmittence(description_lower) + + elif "insulation" in description_lower or "insulated" in description_lower: + insulation_thickness = self._characterise_insulation( + description_lower, + is_cavity_wall, + has_filled_cavity, + is_solid_brick, + as_built, + assumed, + is_system_built, + is_timber_frame, + is_granite_or_whinstone + ) + elif is_cavity_wall and has_filled_cavity: + # We can likely remove this branch of the if statement + thermal_transmittence, thermal_transmittence_unit, = None, None + else: + + raise Exception("H") + + return self._make_output( + thermal_transmittence=thermal_transmittence, + thermal_transmittence_unit=thermal_transmittence_unit, + is_solid_brick=is_solid_brick, + insulation_thickness=insulation_thickness + ) + + def _characterise_insulation( + self, + description_lower, + is_cavity_wall, + has_filled_cavity, + is_solid_brick, + as_built, + assumed, + is_system_built, + is_timber_frame, + is_granite_or_whinstone + ): + + search_description = description_lower.replace("(assumed)", "").strip() if assumed else description_lower + search_description = search_description.replace("as built,", "").strip() if as_built else search_description + search_description = search_description.replace("system built,", "").strip() if \ + is_system_built else search_description + search_description = search_description.replace("timber frame,", "").strip() if \ + is_timber_frame else search_description + search_description = search_description.replace("granite or whinstone,", "").strip() if \ + is_granite_or_whinstone else search_description + search_description = search_description.replace("cavity wall,", "").strip() if \ + is_cavity_wall else search_description + search_description = search_description.replace("filled cavity", "").strip() if \ + has_filled_cavity else search_description + + characterisation_map = { + "external": "external", + "internal": "internal" + } + + thickness_map = { + "external": "average", + "internal": "average", + "partial": "below average", + "no": "none", + # TODO: CHECK IF ADDITIONAL = ABOVE AVERAGE + "additional": "above average" + } + + insulation_term = "insulation" if "insulation" in search_description else "insulated" + + search_description = search_description.replace(insulation_term, "").strip() + + # TODO: We might not need all these if statements.. + if is_cavity_wall: + + if search_description == "": + insulation_thickness = "average" + insulation_characteristic = None + else: + insulation_characteristic = characterisation_map.get( + search_description.split(" ")[-1] + ) + + insulation_thickness = [k for k in thickness_map if k in search_description] + if not insulation_thickness or len(insulation_thickness) > 1: + raise Exception("Check me out") + + insulation_thickness = thickness_map.get(insulation_thickness[0]) + + if not insulation_thickness: + raise NotImplementedError("Implement me! - insulation_thickness") + elif is_solid_brick: + desc_split = search_description.split("solid brick,")[-1].strip().split("as built,")[-1] + + if desc_split == "": + insulation_thickness = "average" + insulation_characteristic = None + else: + insulation_thickness = thickness_map.get(desc_split.split(insulation_term)[0].strip()) + if not insulation_thickness: + insulation_thickness = "average" + + insulation_characteristic = characterisation_map.get(search_description.split(" ")[-1]) + + elif is_system_built: + if search_description == "": + insulation_thickness = "average" + insulation_characteristic = None + else: + insulation_characteristic = characterisation_map.get(search_description.split(" ")[-1]) + insulation_thickness = [k for k in thickness_map if k in search_description] + if not insulation_thickness or len(insulation_thickness) > 1: + raise Exception("Check me out") + + insulation_thickness = thickness_map.get(insulation_thickness[0]) + elif is_timber_frame: + if search_description == "": + insulation_thickness = "average" + insulation_characteristic = None + else: + insulation_characteristic = characterisation_map.get(search_description.split(" ")[-1]) + + insulation_thickness = [k for k in thickness_map if k in search_description] + if not insulation_thickness or len(insulation_thickness) > 1: + raise Exception("Check me out") + + insulation_thickness = thickness_map.get(insulation_thickness[0]) + + else: + insulation_characteristic = characterisation_map.get(search_description.split(" ")[-1]) + + insulation_thickness = [k for k in thickness_map if k in search_description] + if not insulation_thickness or len(insulation_thickness) > 1: + raise Exception("Check me out") + + insulation_thickness = thickness_map.get(insulation_thickness[0]) + + return insulation_thickness, insulation_characteristic + + @staticmethod + def _make_output(thermal_transmittence, thermal_transmittence_unit, is_solid_brick, insulation_thickness): + return { + "thermal_transmittence": thermal_transmittence, + "thermal_transmittence_unit": thermal_transmittence_unit, + "is_solid_brick": is_solid_brick, + "insulation_thickness": insulation_thickness + } diff --git a/epc_data/requirements.txt b/epc_data/requirements.txt index b8400828..4d844987 100644 --- a/epc_data/requirements.txt +++ b/epc_data/requirements.txt @@ -4,4 +4,5 @@ tqdm pandas mypy pytest -mock \ No newline at end of file +mock +nltk \ No newline at end of file diff --git a/epc_data/temp_inputs.py b/epc_data/temp_inputs.py index 264a7cb2..18248734 100644 --- a/epc_data/temp_inputs.py +++ b/epc_data/temp_inputs.py @@ -7,5 +7,29 @@ input_data = [ { "address1": "Flat 14 Godley V C House", "postcode": "E2 0LP" + }, + { + "address1": "49 Elderfield Road", + "postcode": "E5 0LF" + }, + { + "address1": "26 Stanhope Road", + "postcode": "N6 5NG" + }, + { + "address1": "Flat 3 Frederick Building", + "postcode": "N1 4BD" + }, + { + "address1": "Flat 4 Frederick Building", + "postcode": "N1 4BD" + }, + { + "address1": "Flat 28, 22 Adelina Grove", + "postcode": "E1 3BX" + }, + { + "address1": "Flat 39, 239 Long Lane", + "postcode": "SE1 4PT" } ]