diff --git a/.idea/Model.iml b/.idea/Model.iml
index b0f9c00d..3a3ec5a2 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
-
+
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 1122b380..605a6457 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
-
+
diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index 3bd2c84e..ff603cae 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -134,7 +134,7 @@ class SearchEpc:
# Finally, we identify the newest epc and the rest, and then return
newest_epc, older_epcs = self.filter_newest_epc(list_of_epcs=rows)
- return newest_epc, older_epcs
+ return newest_epc, older_epcs, full_sap_epc
@staticmethod
def filter_newest_epc(list_of_epcs: List):
@@ -142,9 +142,13 @@ class SearchEpc:
r for r in list_of_epcs if
r["lodgement-datetime"] == max([x["lodgement-datetime"] for x in list_of_epcs])
]
- if len(newest_response) > 1:
+
+ if not newest_response:
+ return {}, []
+
+ if len(newest_response) != 1:
raise Exception("More than one result found for this address - investigate me")
- older_epcs = [epc for epc in ["rows"] if epc["lmk-key"] != newest_response[0]["lmk-key"]]
+ older_epcs = [epc for epc in list_of_epcs if epc["lmk-key"] != newest_response[0]["lmk-key"]]
return newest_response[0], older_epcs
diff --git a/etl/eligibility/MeasureSuitibility.py b/etl/eligibility/MeasureSuitibility.py
index a9368f67..54deec4d 100644
--- a/etl/eligibility/MeasureSuitibility.py
+++ b/etl/eligibility/MeasureSuitibility.py
@@ -1,3 +1,8 @@
+from recommendations.recommendation_utils import convert_thickness_to_numeric
+from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes
+
+
class MeasureSuitibility:
"""
Given the epc data about a property, this class holds the logic for determining if the home
@@ -14,7 +19,65 @@ class MeasureSuitibility:
self.epc = epc
self.cleaned = cleaned
- walls_description = self.epc["walls-description"]
- # Get the cleaned version of the description
+ self.walls = self.parse_fabric("walls-description")
+ self.roof = self.parse_fabric("roof-description")
- # def loft_insulation(self):
+ def parse_fabric(self, key):
+
+ if "thermal transmittance" in self.epc[key]:
+ if key == "walls-description":
+ return WallAttributes(self.epc["walls-description"]).process()
+
+ if key == "roof-description":
+ return RoofAttributes(self.epc["roof-description"]).process()
+
+ raise ValueError("Invalid Key")
+
+ # Get the cleaned version of the description
+ return [
+ data for data in self.cleaned[key] if
+ data["original_description"] == self.epc[key]
+ ][0]
+
+ def loft_insulation(self, loft_thickness_threshold: int = None):
+ """
+ Given the description of roof, this function determines whether or not the property is suitable for loft
+ insulation. A loft existing insulation with a thickness below loft_thickness_threshold, is deemed to
+ be suitable for loft insulation
+ :param loft_thickness_threshold: Integer, Optional. If provided, any loft found with insulation lower than
+ this thickness is deemed to be suitable for loft insulation. If this
+ parameter is not provided, this method will default to the variable specified
+ in LOFT_INSULATION_THRESHOLD
+ """
+
+ loft_thickness_threshold = (
+ self.LOFT_INSULATION_THRESHOLD if loft_thickness_threshold is None else loft_thickness_threshold
+ )
+
+ # We firstly check if the roof is a loft
+ is_loft = self.roof["is_pitched"] and (not self.roof["is_roof_room"])
+
+ if not is_loft:
+ return {
+ "suitablility": False,
+ "thickness": None
+ }
+
+ # If it is a loft, we'll convert the textual thickenss to a numerical value we can easily use
+ insulation_thickness = convert_thickness_to_numeric(
+ string_thickness=self.roof["insulation_thickness"],
+ is_pitched=self.roof["is_pitched"],
+ is_flat=self.roof["is_flat"]
+ )
+
+ if insulation_thickness > loft_thickness_threshold:
+ # Insulation is already thick enough
+ return {
+ "suitablility": False,
+ "thickness": insulation_thickness
+ }
+
+ return {
+ "suitablility": True,
+ "thickness": insulation_thickness
+ }
diff --git a/etl/eligibility/ha_15_32/app.py b/etl/eligibility/ha_15_32/app.py
index 34204aad..47426d5d 100644
--- a/etl/eligibility/ha_15_32/app.py
+++ b/etl/eligibility/ha_15_32/app.py
@@ -13,6 +13,9 @@ from utils.s3 import read_from_s3
from dotenv import load_dotenv
from backend.SearchEpc import SearchEpc
from backend.Property import Property
+from etl.eligibility.MeasureSuitibility import MeasureSuitibility
+from etl.epc.DataProcessor import DataProcessor
+from backend.app.utils import read_parquet_from_s3
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
@@ -335,34 +338,68 @@ def app():
)
cleaned = msgpack.unpackb(cleaned, raw=False)
+ cleaning_data = read_parquet_from_s3(
+ bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+ )
+
# We want to retrieve EPCs for every single property
+ ha_data = ha32
+ house_number_key = "Dwelling num"
+ address_key = "Street"
+ postcode_key = "Postcode"
- ha32_scoring_data = []
- for _, house in tqdm(ha32.iterrows(), total=len(ha32)):
- searcher = SearchEpc(
- address1=" ".join([house["No."], house["Address"]]),
- postcode=house["Postcode"]
- )
+ def get_data(ha_data, house_number_key, address_key, postcode_key):
+ ha_scoring_data = []
+ for _, house in tqdm(ha_data.iterrows(), total=len(ha_data)):
+ searcher = SearchEpc(
+ address1=" ".join([house[house_number_key], house[address_key]]),
+ postcode=house[postcode_key]
+ )
- searcher.search()
+ searcher.search()
- newest_epc, older_epcs = searcher.retrieve()
- # We also want to get the penultimate epc
- penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
+ newest_epc, older_epcs, _ = searcher.retrieve()
+ # We also want to get the penultimate epc
+ penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
+ if not penultimate_epc:
+ penultimate_epc = newest_epc
- from etl.eligibility.MeasureSuitibility import MeasureSuitibility
- suitability = MeasureSuitibility(
- epc=newest_epc, cleaned=cleaned
- )
+ suitability = MeasureSuitibility(
+ epc=newest_epc, cleaned=cleaned
+ )
+ suitable = suitability.loft_insulation()
- from pprint import pprint
- len(searcher.data["rows"])
+ modelling_epc = newest_epc.copy()
+ if not suitable["suitablility"]:
+ # if unsuccessful with newest EPC, try penultimate
+ suitability = MeasureSuitibility(
+ epc=penultimate_epc, cleaned=cleaned
+ )
+ suitable = suitability.loft_insulation()
+ modelling_epc = penultimate_epc.copy()
- # TODO: Integegrate SearchEPC into the Property class
- p = Property(
- id=house["row_id"],
- postcode=house["postcode"],
- address1=house["address1"],
- epc_client=None,
- data=searcher.data
- )
+ if not suitable["suitablility"]:
+ raise ValueError("DO SOMETHING")
+
+ p = Property(
+ id=house["row_id"],
+ postcode=modelling_epc["postcode"],
+ address1=modelling_epc["address1"],
+ epc_client=None,
+ data=modelling_epc
+ )
+ ################################################################################
+ # Prepare the data for modelling, in the same fasion as the engine
+ ################################################################################
+
+ p.get_components(cleaned)
+ # This is temp - this should happen after scoring
+ cleaned_property_data = DataProcessor.apply_averages_cleaning(
+ data_to_clean=pd.DataFrame([dict(**p.get_model_data(), LOCAL_AUTHORITY=p.data["local-authority"])]),
+ cleaning_data=cleaning_data,
+ cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
+ )
+ p.set_number_lighting_outlets(cleaned_property_data)
+
+ from pprint import pprint
+ len(searcher.data["rows"])
diff --git a/etl/eligibility/ha_15_32/requirements.txt b/etl/eligibility/ha_15_32/requirements.txt
index 390d7de9..74fcd97f 100644
--- a/etl/eligibility/ha_15_32/requirements.txt
+++ b/etl/eligibility/ha_15_32/requirements.txt
@@ -1,3 +1,9 @@
pandas
pydantic==1.10.11
epc-api-python==1.0.2
+msgpack
+tqdm
+python-dotenv
+boto3
+textblob
+pyarrow==12.0.1