diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
-
+
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
-
+
diff --git a/BaseUtility.py b/BaseUtility.py
index bd2f091e..e799144d 100644
--- a/BaseUtility.py
+++ b/BaseUtility.py
@@ -45,7 +45,9 @@ class Definitions:
# contain a ‘null’ value. A resolution to correct these anomalies will be considered for future data releases.
"NULL",
# We sometimes see fields populated with just an empty string.
- ""
+ "",
+ # An older value which rarely shows up but has been seen in the data.
+ "UNKNOWN",
}
DATA_ANOMALY_SUBSTRINGS = {
diff --git a/backend/Property.py b/backend/Property.py
index 98325b15..c9cad22f 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -13,7 +13,7 @@ from etl.epc_clean.epc_attributes.all_cleaners import all_cleaner_map
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
from utils.logger import setup_logger
from utils.s3 import read_dataframe_from_s3_parquet
-from BaseUtility import Definitions
+from etl.epc.settings import DATA_ANOMALY_MATCHES
from recommendations.rdsap_tables import england_wales_age_band_lookup, FLOOR_LEVEL_MAP
from recommendations.recommendation_utils import (
estimate_perimeter, get_wall_type, estimate_external_wall_area, esimtate_pitched_roof_area, estimate_windows
@@ -25,7 +25,7 @@ DATA_BUCKET = os.environ.get('DATA_BUCKET', 'retrofit-data-dev' if ENVIRONMENT =
logger = setup_logger()
-class Property(Definitions):
+class Property:
ATTRIBUTE_MAP = {
"floor-description": "floor",
"hotwater-description": "hotwater",
@@ -51,6 +51,8 @@ class Property(Definitions):
spatial = None
base_difference_record = None
+ DATA_ANOMALY_MATCHES = DATA_ANOMALY_MATCHES
+
def __init__(self, id, postcode, address, epc_record):
self.epc_record = epc_record
@@ -68,7 +70,7 @@ class Property(Definitions):
self.in_conservation_area, self.is_listed, self.is_heritage = None, None, None
self.restricted_measures = False
self.year_built = epc_record.get("year_built")
- self.number_of_rooms = epc_record.prepared_epc.get("number_of_rooms")
+ self.number_of_rooms = epc_record.prepared_epc.get("number_habitable_rooms")
self.age_band = epc_record.get("age_band")
self.construction_age_band = epc_record.get("construction_age_band")
self.number_of_floors = epc_record.get("number_of_floors")
@@ -88,22 +90,24 @@ class Property(Definitions):
}
self.solar_hot_water = {
"solar_hot_water": epc_record.get("solar_water_heating_flag"),
+ "solar_hot_water_boolean": epc_record.get("solar_water_heating_flag_bool"),
}
self.wind_turbine = {
"wind_turbine": epc_record.prepared_epc.get("wind_turbine_count"),
}
self.number_of_open_fireplaces = {
- "number_of_open_fireplaces": epc_record.prepared_epc.get("number_of_open_fireplaces"),
+ "number_of_open_fireplaces": epc_record.prepared_epc.get("number_open_fireplaces"),
}
self.number_of_extensions = {
- "number_of_extensions": epc_record.prepared_epc.get("number_of_extensions"),
+ "number_of_extensions": epc_record.prepared_epc.get("extension_count"),
}
self.number_of_storeys = {
- "number_of_storeys": epc_record.prepared_epc.get("number_of_storeys"),
+ "number_of_storeys": epc_record.prepared_epc.get("flat_storey_count"),
}
self.heat_loss_corridor = {
"heat_loss_corridor": epc_record.prepared_epc.get("heat_loss_corridor"),
"length": epc_record.prepared_epc.get("unheated_corridor_length"),
+ "heat_loss_corridor_boolean": epc_record.get("heat_loss_corridor_bool"),
}
self.mains_gas = epc_record.prepared_epc.get('mains_gas_flag')
self.floor_height = epc_record.prepared_epc.get('floor_height')
@@ -222,7 +226,10 @@ class Property(Definitions):
proposed_depth = min(valid_numeric_values, key=lambda x: abs(x - proposed_depth))
recommendation_record["roof_insulation_thickness_ending"] = str(proposed_depth)
- recommendation_record["roof_energy_eff_ending"] = "Very Good"
+ if recommendation["type"] == "loft_insulation":
+ recommendation_record["roof_energy_eff_ending"] = "Good"
+ else:
+ recommendation_record["roof_energy_eff_ending"] = "Very Good"
else:
# Fill missing roof u-values - this fill is not based on recommended upgrades
if recommendation_record["roof_thermal_transmittance_ending"] is None:
@@ -297,6 +304,7 @@ class Property(Definitions):
self.set_basic_property_dimensions()
for description, attribute in cleaned.items():
+
if self.data[description] in self.DATA_ANOMALY_MATCHES:
template = cleaned[description][0]
fill_dict = dict(zip(template.keys(), [None] * len(template)))
@@ -314,6 +322,7 @@ class Property(Definitions):
attributes = [
x for x in cleaned[description] if x["original_description"] == self.data[description]
]
+
if len(attributes) > 1:
raise ValueError("Either No attributes or multiple found for %s" % description)
@@ -433,10 +442,10 @@ class Property(Definitions):
"mainfuel": self.main_fuel["clean_description"],
"ventilation": self.ventilation["ventilation"],
"solar_pv": self.solar_pv["solar_pv"],
- "solar_hot_water": self.solar_hot_water["solar_hot_water"],
+ "solar_hot_water": self.solar_hot_water["solar_hot_water_boolean"],
"wind_turbine": self.wind_turbine["wind_turbine"],
"floor_height": self.floor_height,
- "heat_loss_corridor": self.heat_loss_corridor["heat_loss_corridor"],
+ "heat_loss_corridor": self.heat_loss_corridor["heat_loss_corridor_boolean"],
"unheated_corridor_length": self.heat_loss_corridor["length"],
"number_of_open_fireplaces": self.number_of_open_fireplaces["number_of_open_fireplaces"],
"number_of_extensions": self.number_of_extensions["number_of_extensions"],
diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index d69d8d86..4f6fd33d 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -472,7 +472,7 @@ class SearchEpc:
if not epc_data.empty:
# Further processing of the EPC data
- epc_data['lodgement-datetime'] = pd.to_datetime(epc_data['lodgement-datetime'], format='mixed')
+ epc_data['lodgement-datetime'] = pd.to_datetime(epc_data['lodgement-datetime'], errors='coerce')
epc_data = epc_data.sort_values("lodgement-datetime", ascending=False).groupby("uprn").head(1)
epc_data["house_number"] = epc_data["address"].apply(lambda add1: self.get_house_number(add1))
epc_data["numeric_house_number"] = epc_data["house_number"].apply(
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 8c199145..b3d1c623 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -28,8 +28,6 @@ from backend.app.utils import epc_to_sap_lower_bound, read_csv_from_s3, sap_to_e
from backend.ml_models.api import ModelApi
from backend.Property import Property
-from etl.epc.DataProcessor import EPCDataProcessor
-from etl.epc.settings import COLUMNS_TO_MERGE_ON
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
from recommendations.optimiser.CostOptimiser import CostOptimiser
@@ -68,7 +66,6 @@ async def trigger_plan(body: PlanTriggerRequest):
)
input_properties = []
-
for config in plan_input:
# We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
@@ -96,13 +93,16 @@ async def trigger_plan(body: PlanTriggerRequest):
)
epc_records = {
- 'original_epc': epc_searcher.newest_epc,
- 'full_sap_epc': epc_searcher.full_sap_epc,
- 'old_data': epc_searcher.older_epcs,
+ 'original_epc': epc_searcher.newest_epc.copy(),
+ 'full_sap_epc': epc_searcher.full_sap_epc.copy(),
+ 'old_data': epc_searcher.older_epcs.copy(),
}
- prepared_epc = EPCRecord(epc_records=epc_records, run_mode="newdata",
- cleaning_data=cleaning_data) # This uses all the epc records to clean the data
+ prepared_epc = EPCRecord(
+ epc_records=epc_records,
+ run_mode="newdata",
+ cleaning_data=cleaning_data
+ )
input_properties.append(
Property(
@@ -173,8 +173,6 @@ async def trigger_plan(body: PlanTriggerRequest):
"carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET
}
)
- # all_predictions["heat_demand_predictions"]= all_predictions["sap_change_predictions"].copy()
- # all_predictions["carbon_change_predictions"] = all_predictions["sap_change_predictions"].copy()
# Insert the predictions into the recommendations and run the optimiser
logger.info("Optimising recommendations")
@@ -310,10 +308,6 @@ async def trigger_plan(body: PlanTriggerRequest):
}
)
- # all_combined_predictions["heat_demand_predictions"]= all_combined_predictions["sap_change_predictions"].copy()
- # all_combined_predictions["carbon_change_predictions"] = all_combined_predictions[
- # "sap_change_predictions"].copy()
-
# We update the carbon and heat demand predictions
for property_id, property_recommendations in recommendations.items():
combined_heat_demand = all_combined_predictions["heat_demand_predictions"]
diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py
index 018b4678..ff771252 100644
--- a/backend/ml_models/Valuation.py
+++ b/backend/ml_models/Valuation.py
@@ -22,6 +22,8 @@ class PropertyValuation:
100021192109: 650000, # Based on Zoopla
766249482: 358000, # Based on Zoopla estimate for 19 Spring Lane, 3 bedroom semi-detached
100120703802: 277000, # Based on Zoopla
+ 10014469685: 286000, # Based on Zoopla
+ 10001328782: 196000, # Based on Zoopla
}
# We base our valuation uplifts on a number of sources
@@ -96,11 +98,11 @@ class PropertyValuation:
if not value:
return {
- "current_value": None,
- "lower_bound_increased_value": None,
- "upper_bound_increased_value": None,
- "average_increased_value": None,
- "average_increase": None
+ "current_value": 0,
+ "lower_bound_increased_value": 0,
+ "upper_bound_increased_value": 0,
+ "average_increased_value": 0,
+ "average_increase": 0
}
current_epc = property_instance.data["current-energy-rating"]
diff --git a/backend/tests/test_property.py b/backend/tests/test_property.py
index 09594a40..43149791 100644
--- a/backend/tests/test_property.py
+++ b/backend/tests/test_property.py
@@ -1,9 +1,9 @@
import pandas as pd
import pytest
from unittest.mock import Mock
-from epc_api.client import EpcClient
from backend.Property import Property
from etl.epc_clean.EpcClean import EpcClean
+from etl.epc.Record import EPCRecord
# Define some test data
mock_epc_response = {
@@ -196,12 +196,21 @@ class TestProperty:
@pytest.fixture(autouse=True)
def property_instance(self, mock_cleaner):
- property_instance = Property(id=1, postcode="AB12CD", address="Test Address", data=mock_epc_response["rows"][0])
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = mock_epc_response["rows"][0]
+
+ property_instance = Property(id=1, postcode="AB12CD", address="Test Address", epc_record=epc_record)
+ property_instance.number_of_floors = 2
+ property_instance.number_of_rooms = 5
+ property_instance.floor_area = 100
+ property_instance.floor_height = 2.5
return property_instance
@pytest.fixture(autouse=True)
def property_instance_dupe_data(self):
- property_instance_dupe_data = Property(id=2, postcode="AB12CD", address="Test Address")
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = mock_epc_response_dupe["rows"][0]
+ property_instance_dupe_data = Property(id=2, postcode="AB12CD", address="Test Address", epc_record=epc_record)
return property_instance_dupe_data
# @pytest.fixture
@@ -271,15 +280,17 @@ class TestProperty:
return mock_cleaner
def test_init(self):
- inst1 = Property(0, postcode="AB12CD", address="Test Address")
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {"uprn": 1}
+ inst1 = Property(0, postcode="AB12CD", address="Test Address", epc_record=epc_record)
- assert inst1.data is None
+ assert inst1.data is not None
- inst2 = Property(3, "AB12CD", "Test Address")
+ inst2 = Property(3, "AB12CD", "Test Address", epc_record=epc_record)
assert inst2.id == 3
- inst3 = Property(4, "AB12CD", "Test Address", data={"some": "data", "uprn": 123})
- assert inst3.data == {"some": "data", "uprn": 123}
+ inst3 = Property(4, "AB12CD", "Test Address", epc_record=epc_record)
+ assert inst3.data == {"uprn": 1}
def test_get_components(
self, property_instance, mock_cleaner, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
@@ -372,7 +383,9 @@ class TestProperty:
property_instance.get_components(cleaned, mock_photo_supply_lookup, mock_floor_area_decile_thresholds)
def test_set_spatial(self):
- prop = Property(1, postcode="AB12CD", address="Test Address")
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = mock_epc_response["rows"][0]
+ prop = Property(1, postcode="AB12CD", address="Test Address", epc_record=epc_record)
spatial1 = pd.DataFrame([{
'X_COORDINATE': 411143.0, 'Y_COORDINATE': 281701.0, 'LATITUDE': 52.4331896, 'LONGITUDE': -1.8375238,
@@ -386,7 +399,7 @@ class TestProperty:
assert prop.is_heritage
assert prop.restricted_measures
- prop2 = Property(1, "AB12CD", "Test Address")
+ prop2 = Property(1, "AB12CD", "Test Address", epc_record=epc_record)
spatial2 = pd.DataFrame([{
'X_COORDINATE': 411143.0, 'Y_COORDINATE': 281701.0, 'LATITUDE': 52.4331896, 'LONGITUDE': -1.8375238,
@@ -403,8 +416,9 @@ class TestProperty:
def test_set_floor_level(self):
# In this case, we have a flat which looks looks it's on the first floor, but it's actually on the ground
# floor, so we should set floor_level to 0
- prop = Property(1, postcode="AB12CD", address="Test Address")
- prop.data = {'floor-level': '01', 'property-type': 'Flat'}
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {'floor-level': '01', 'property-type': 'Flat'}
+ prop = Property(1, postcode="AB12CD", address="Test Address", epc_record=epc_record)
prop.floor = {
'original_description': 'Solid, no insulation (assumed)', 'clean_description': 'Solid, no insulation',
'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_assumed': True,
@@ -419,8 +433,9 @@ class TestProperty:
# This property is labelled as being on the ground floor but actually has another property below
# so we set floor level to 1
- prop2 = Property(1, postcode="AB12CD", address="Test Address")
- prop2.data = {'floor-level': 'Ground', 'property-type': 'Flat'}
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {'floor-level': 'Ground', 'property-type': 'Flat'}
+ prop2 = Property(1, postcode="AB12CD", address="Test Address", epc_record=epc_record)
prop2.floor = {
'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation',
'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_assumed': False,
@@ -434,8 +449,9 @@ class TestProperty:
assert prop2.floor_level == 1
# this property is correctly labelled as being on the 2nd floor
- prop3 = Property(1, postcode="AB12CD", address="Test Address")
- prop3.data = {'floor-level': '02', 'property-type': 'Flat'}
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {'floor-level': '02', 'property-type': 'Flat'}
+ prop3 = Property(1, postcode="AB12CD", address="Test Address", epc_record=epc_record)
prop3.floor = {
'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation',
'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_assumed': False,
@@ -449,8 +465,9 @@ class TestProperty:
assert prop3.floor_level == 2
# Example of a house
- prop4 = Property(1, postcode="AB12CD", address="Test Address")
- prop4.data = {'floor-level': '', 'property-type': 'House'}
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {'floor-level': '', 'property-type': 'House'}
+ prop4 = Property(1, postcode="AB12CD", address="Test Address", epc_record=epc_record)
prop4.floor = {
'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation',
'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_assumed': False,
diff --git a/backend/tests/test_sap_model_prep.py b/backend/tests/test_sap_model_prep.py
deleted file mode 100644
index 89c436ce..00000000
--- a/backend/tests/test_sap_model_prep.py
+++ /dev/null
@@ -1,1000 +0,0 @@
-from backend.Property import Property
-from etl.epc.DataProcessor import DataProcessor
-from backend.app.plan.utils import create_recommendation_scoring_data, get_cleaned
-from etl.epc.settings import COLUMNS_TO_MERGE_ON
-import pandas as pd
-import pytest
-import msgpack
-
-from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3
-
-
-# Handy code for selecting testing data
-# import pickle
-#
-# with open("sap_dataset.pickle", "rb") as f:
-# sap_change_dataset = pickle.load(f)
-#
-# search_from = sap_change_dataset[
-# (sap_change_dataset["walls_thermal_transmittance_ENDING"] == sap_change_dataset["walls_thermal_transmittance"]) &
-# sap_change_dataset["is_to_unheated_space"]
-# ]
-# search_from = search_from[
-# (search_from["roof_thermal_transmittance_ENDING"] == search_from["roof_thermal_transmittance"]) &
-# (search_from["floor_thermal_transmittance_ENDING"] != search_from["floor_thermal_transmittance"]) &
-# (search_from["MECHANICAL_VENTILATION_ENDING"] == search_from["MECHANICAL_VENTILATION_STARTING"]) &
-# (search_from["SECONDHEAT_DESCRIPTION_ENDING"] == search_from["SECONDHEAT_DESCRIPTION_STARTING"]) &
-# (search_from["GLAZED_TYPE_ENDING"] == search_from["GLAZED_TYPE_STARTING"])
-# ]
-#
-# # Find a record where the only difference is cavity wall getting filled
-# ending_cols = [c for c in search_from.columns if "_ENDING" in c]
-#
-# ignore = [
-# "SAP_ENDING", "HEAT_DEMAND_ENDING", "CARBON_ENDING", "TRANSACTION_TYPE_ENDING", "FLOOR_HEIGHT_ENDING",
-# "DAYS_TO_ENDING", "TOTAL_FLOOR_AREA_ENDING"
-# ]
-#
-# ending_cols = [c for c in ending_cols if c not in ignore]
-#
-# for _, row in tqdm(search_from.iterrows(), total=search_from.shape[0]):
-#
-# same = True
-# starting_cols = []
-# for c in ending_cols:
-#
-# starting_col = c.replace("_ENDING", "")
-# if starting_col not in search_from.columns:
-# starting_col = c.replace("_ENDING", "_STARTING")
-# if starting_col not in search_from.columns:
-# raise Exception("something went wrong")
-#
-# starting_cols.append(starting_col)
-#
-# # We want them to be different
-# if c == "floor_thermal_transmittance_ENDING":
-# if (row[c] == row[starting_col]) | (row[starting_col] != "natural"):
-# same = False
-# break
-# else:
-# continue
-#
-# # We now check if the starting and ending values are the same
-# if row[c] != row[starting_col]:
-# same = False
-# break
-#
-# if same:
-# raise Exception("We found one!")
-#
-# fixed_cols = [c for c in search_from.columns if c not in starting_cols + ending_cols]
-#
-# import pandas as pd
-#
-# start = row[["SAP_STARTING"] + starting_cols]
-# start.index = [c.replace("_STARTING", "") for c in start.index]
-# end = row[["SAP_ENDING"] + ending_cols]
-# end.index = [c.replace("_ENDING", "") for c in end.index]
-# start["type"] = "starting"
-# end["type"] = "ending"
-#
-# compare = pd.concat([start, end], axis=1)
-#
-# ending_lmk = "1252008839062019090910572351658131"
-# starting_lmk = "1252008819542014122308482236142128"
-#
-# client = EpcClient(auth_token=EPC_AUTH_TOKEN)
-# result = client.domestic.search(params={"address": "Flat 14 Charles House, Freemens Way", "postcode": "CT14 9DL"})
-# starting_epc = [x for x in result["rows"] if x["lmk-key"] == starting_lmk][0]
-# ending_epc = [x for x in result["rows"] if x["lmk-key"] == ending_lmk][0]
-
-
-# with open(
-# os.path.abspath(os.path.dirname(__file__)) + "/backend/tests/test_data/cleaned.pickle", "rb"
-# ) as f:
-# cleaned = pickle.load(f)
-
-# with open(
-# os.path.abspath(os.path.dirname(__file__)) + "/backend/tests/test_data/cleaning_data.pickle", "rb"
-# ) as f:
-# cleaning_data = pickle.load(f)
-
-# TODO: Need to do floors, suspended and solid and to unheated space
-
-
-class TestSapModelPrep:
-
- @pytest.fixture
- def cleaning_data(self):
- return read_dataframe_from_s3_parquet(
- bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
- )
-
- @pytest.fixture
- def cleaned(self):
- cleaned = read_from_s3(
- s3_file_name="cleaned_epc_data/cleaned.bson",
- bucket_name="retrofit-data-dev"
- )
-
- cleaned = msgpack.unpackb(cleaned, raw=False)
- return cleaned
-
- @pytest.fixture
- def photo_supply_lookup(self):
- photo_supply_lookup = read_dataframe_from_s3_parquet(
- bucket_name="retrofit-data-dev", file_key="solar_pv_supply/photo_supply_lookup.parquet",
- )
- return photo_supply_lookup
-
- @pytest.fixture
- def floor_area_decile_thresholds(self):
- floor_area_decile_thresholds = read_dataframe_from_s3_parquet(
- bucket_name="retrofit-data-dev", file_key="solar_pv_supply/floor_area_decile_thresholds.parquet",
- )
- return floor_area_decile_thresholds
-
- def test_fill_cavity_wall(self, cleaned, cleaning_data, photo_supply_lookup, floor_area_decile_thresholds):
- """
- We ensure that the process that prepares the data in the engine code results in the same data as
- the model is trained on
- """
-
- # This is an actual starting EPC
- starting_epc = {
- 'low-energy-fixed-light-count': '', 'address': '26, Vicarage Lane, Eaton',
- 'uprn-source': 'Address Matched', 'floor-height': '2.39', 'heating-cost-potential': '942',
- 'unheated-corridor-length': '', 'hot-water-cost-potential': '97',
- 'construction-age-band': 'England and Wales: 1967-1975', 'potential-energy-rating': 'D',
- 'mainheat-energy-eff': 'Average', 'windows-env-eff': 'Good', 'lighting-energy-eff': 'Average',
- 'environment-impact-potential': '53',
- 'glazed-type': 'double glazing installed during or after 2002', 'heating-cost-current': '1475',
- 'address3': '', 'mainheatcont-description': 'Programmer, room thermostat and TRVs',
- 'sheating-energy-eff': 'N/A', 'property-type': 'House', 'local-authority-label': 'Melton',
- 'fixed-lighting-outlets-count': '', 'energy-tariff': 'Single',
- 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '96', 'county': 'Leicestershire',
- 'postcode': 'NG32 1SP', 'solar-water-heating-flag': 'Y', 'constituency': 'E14000909',
- 'co2-emissions-potential': '5.7', 'number-heated-rooms': '7',
- 'floor-description': 'Suspended, no insulation (assumed)',
- 'energy-consumption-potential': '177', 'local-authority': 'E07000133', 'built-form': 'Detached',
- 'number-open-fireplaces': '1', 'windows-description': 'Fully double glazed',
- 'glazed-area': 'Normal', 'inspection-date': '2016-09-22', 'mains-gas-flag': 'N',
- 'co2-emiss-curr-per-floor-area': '87', 'address1': '26, Vicarage Lane',
- 'heat-loss-corridor': 'NO DATA!', 'flat-storey-count': '',
- 'constituency-label': 'Rutland and Melton', 'roof-energy-eff': 'Very Poor',
- 'total-floor-area': '116.0', 'building-reference-number': '4940047478',
- 'environment-impact-current': '29', 'co2-emissions-current': '10.0',
- 'roof-description': 'Pitched, limited insulation (assumed)', 'floor-energy-eff': 'NO DATA!',
- 'number-habitable-rooms': '7', 'address2': 'Eaton', 'hot-water-env-eff': 'Good',
- 'posttown': 'GRANTHAM', 'mainheatc-energy-eff': 'Good', 'main-fuel': 'oil (not community)',
- 'lighting-env-eff': 'Average', 'windows-energy-eff': 'Good', 'floor-env-eff': 'N/A',
- 'sheating-env-eff': 'N/A',
- 'lighting-description': 'Low energy lighting in 31% of fixed outlets',
- 'roof-env-eff': 'Very Poor', 'walls-energy-eff': 'Poor', 'photo-supply': '',
- 'lighting-cost-potential': '69', 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100',
- 'main-heating-controls': '2106', 'lodgement-datetime': '2016-09-23 20:29:01',
- 'flat-top-storey': '', 'current-energy-rating': 'F',
- 'secondheat-description': 'Room heaters, dual fuel (mineral and wood)', 'walls-env-eff': 'Poor',
- 'transaction-type': 'marketed sale', 'uprn': '100030534042', 'current-energy-efficiency': '34',
- 'energy-consumption-current': '343', 'mainheat-description': 'Boiler and radiators, oil',
- 'lighting-cost-current': '117', 'lodgement-date': '2016-09-23', 'extension-count': '2',
- 'mainheatc-env-eff': 'Good', 'lmk-key': '1481856849902016092320290148762028',
- 'wind-turbine-count': '0', 'tenure': 'owner-occupied', 'floor-level': 'NODATA!',
- 'potential-energy-efficiency': '64', 'hot-water-energy-eff': 'Good',
- 'low-energy-lighting': '31',
- 'walls-description': 'Cavity wall, as built, no insulation (assumed)',
- 'hotwater-description': 'From main system, plus solar'
- }
-
- # This is the training data as we prepare it in the engine
- # This is an actual record from the training data
- row = {
- 'UPRN': '100030534042', 'RDSAP_CHANGE': 12, 'HEAT_DEMAND_CHANGE': -72,
- 'CARBON_CHANGE': -2.0999999999999996, 'SAP_STARTING': 34, 'SAP_ENDING': 46, 'HEAT_DEMAND_STARTING': 343,
- 'HEAT_DEMAND_ENDING': 271, 'CARBON_STARTING': 10.0, 'CARBON_ENDING': 7.9, 'PROPERTY_TYPE': 'House',
- 'BUILT_FORM': 'Detached', 'CONSTITUENCY': 'E14000909', 'NUMBER_HABITABLE_ROOMS': 7.0,
- 'NUMBER_HEATED_ROOMS': 7.0, 'FIXED_LIGHTING_OUTLETS_COUNT': 21.0,
- 'CONSTRUCTION_AGE_BAND': 'England and Wales: 1967-1975', 'TRANSACTION_TYPE_STARTING': 'marketed sale',
- 'MECHANICAL_VENTILATION_STARTING': 'natural',
- 'SECONDHEAT_DESCRIPTION_STARTING': 'Room heaters, dual fuel (mineral and wood)',
- 'ENERGY_TARIFF_STARTING': 'Single', 'SOLAR_WATER_HEATING_FLAG_STARTING': 'Y',
- 'PHOTO_SUPPLY_STARTING': 0.0, 'GLAZED_TYPE_STARTING': 'double glazing installed during or after 2002',
- 'MULTI_GLAZE_PROPORTION_STARTING': 100.0, 'LOW_ENERGY_LIGHTING_STARTING': 31.0,
- 'NUMBER_OPEN_FIREPLACES_STARTING': 1.0, 'EXTENSION_COUNT_STARTING': 2.0,
- 'TOTAL_FLOOR_AREA_STARTING': 116.0, 'FLOOR_HEIGHT_STARTING': 2.39,
- 'TRANSACTION_TYPE_ENDING': 'marketed sale', 'MECHANICAL_VENTILATION_ENDING': 'natural',
- 'SECONDHEAT_DESCRIPTION_ENDING': 'Room heaters, dual fuel (mineral and wood)',
- 'ENERGY_TARIFF_ENDING': 'Single', 'SOLAR_WATER_HEATING_FLAG_ENDING': 'Y', 'PHOTO_SUPPLY_ENDING': 0.0,
- 'GLAZED_TYPE_ENDING': 'double glazing installed during or after 2002',
- 'MULTI_GLAZE_PROPORTION_ENDING': 100.0, 'LOW_ENERGY_LIGHTING_ENDING': 31.0,
- 'NUMBER_OPEN_FIREPLACES_ENDING': 1.0, 'EXTENSION_COUNT_ENDING': 2.0, 'TOTAL_FLOOR_AREA_ENDING': 116.0,
- 'FLOOR_HEIGHT_ENDING': 2.41, 'DAYS_TO_STARTING': 784, 'DAYS_TO_ENDING': 867,
- 'walls_thermal_transmittance': 1.5, 'is_cavity_wall': True, 'is_filled_cavity': False,
- 'is_solid_brick': False, 'is_system_built': False, 'is_timber_frame': False,
- 'is_granite_or_whinstone': False, 'is_as_built': True, 'is_cob': False,
- 'is_sandstone_or_limestone': False, 'is_park_home': False, 'walls_insulation_thickness': 'none',
- 'external_insulation': False, 'internal_insulation': False, 'walls_thermal_transmittance_ENDING': 0.7,
- 'is_park_home_ENDING': False, 'walls_insulation_thickness_ENDING': 'average',
- 'external_insulation_ENDING': False, 'internal_insulation_ENDING': False,
- 'floor_thermal_transmittance': 0.52, 'is_to_unheated_space': False, 'is_to_external_air': False,
- 'is_suspended': True, 'is_solid': False, 'another_property_below': False,
- 'floor_insulation_thickness': 'none', 'floor_thermal_transmittance_ENDING': 0.52,
- 'floor_insulation_thickness_ENDING': 'none', 'roof_thermal_transmittance': 1.5, 'is_pitched': True,
- 'is_roof_room': False, 'is_loft': False, 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False,
- 'has_dwelling_above': False, 'roof_insulation_thickness': 'below average',
- 'roof_thermal_transmittance_ENDING': 1.5, 'roof_insulation_thickness_ENDING': 'below average',
- 'heater_type': 'Unknown', 'system_type': 'from main system', 'thermostat_characteristics': 'Unknown',
- 'heating_scope': 'Unknown', 'energy_recovery': 'Unknown', 'hotwater_tariff_type': 'Unknown',
- 'extra_features': 'plus solar', 'chp_systems': 'Unknown', 'distribution_system': 'Unknown',
- 'no_system_present': 'Unknown', 'appliance': 'Unknown', 'heater_type_ENDING': 'Unknown',
- 'system_type_ENDING': 'from main system', 'thermostat_characteristics_ENDING': 'Unknown',
- 'heating_scope_ENDING': 'Unknown', 'energy_recovery_ENDING': 'Unknown',
- 'hotwater_tariff_type_ENDING': 'Unknown', 'extra_features_ENDING': 'plus solar',
- 'chp_systems_ENDING': 'Unknown', 'distribution_system_ENDING': 'Unknown',
- 'no_system_present_ENDING': 'Unknown', 'appliance_ENDING': 'Unknown', 'has_radiators': True,
- 'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False,
- 'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': True,
- 'has_air_source_heat_pump': False, 'has_room_heaters': False, 'has_electric_storage_heaters': False,
- 'has_warm_air': False, 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False,
- 'has_community_scheme': False, 'has_ground_source_heat_pump': False, 'has_no_system_present': False,
- 'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False,
- 'has_electric_heat_pump': False, 'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False,
- 'has_exhaust_source_heat_pump': False, 'has_community_heat_pump': False, 'has_electric': False,
- 'has_mains_gas': False, 'has_wood_logs': False, 'has_coal': False, 'has_oil': True,
- 'has_wood_pellets': False, 'has_anthracite': False, 'has_dual_fuel_mineral_and_wood': False,
- 'has_smokeless_fuel': False, 'has_lpg': False, 'has_b30k': False, 'has_electricaire': False,
- 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False, 'has_radiators_ENDING': True,
- 'has_fan_coil_units_ENDING': False, 'has_pipes_in_screed_above_insulation_ENDING': False,
- 'has_pipes_in_insulated_timber_floor_ENDING': False, 'has_pipes_in_concrete_slab_ENDING': False,
- 'has_boiler_ENDING': True, 'has_air_source_heat_pump_ENDING': False, 'has_room_heaters_ENDING': False,
- 'has_electric_storage_heaters_ENDING': False, 'has_warm_air_ENDING': False,
- 'has_electric_underfloor_heating_ENDING': False, 'has_electric_ceiling_heating_ENDING': False,
- 'has_community_scheme_ENDING': False, 'has_ground_source_heat_pump_ENDING': False,
- 'has_no_system_present_ENDING': False, 'has_portable_electric_heaters_ENDING': False,
- 'has_water_source_heat_pump_ENDING': False, 'has_electric_heat_pump_ENDING': False,
- 'has_micro-cogeneration_ENDING': False, 'has_solar_assisted_heat_pump_ENDING': False,
- 'has_exhaust_source_heat_pump_ENDING': False, 'has_community_heat_pump_ENDING': False,
- 'has_electric_ENDING': False, 'has_mains_gas_ENDING': False, 'has_wood_logs_ENDING': False,
- 'has_coal_ENDING': False, 'has_oil_ENDING': True, 'has_wood_pellets_ENDING': False,
- 'has_anthracite_ENDING': False, 'has_dual_fuel_mineral_and_wood_ENDING': False,
- 'has_smokeless_fuel_ENDING': False, 'has_lpg_ENDING': False, 'has_b30k_ENDING': False,
- 'has_electricaire_ENDING': False, 'has_assumed_for_most_rooms_ENDING': False,
- 'has_underfloor_heating_ENDING': False, 'thermostatic_control': 'room thermostat',
- 'charging_system': 'Unknown', 'switch_system': 'programmer', 'no_control': 'Unknown',
- 'dhw_control': 'Unknown', 'community_heating': 'Unknown', 'multiple_room_thermostats': False,
- 'auxiliary_systems': 'Unknown', 'trvs': 'trvs', 'rate_control': 'Unknown',
- 'thermostatic_control_ENDING': 'room thermostat', 'charging_system_ENDING': 'Unknown',
- 'switch_system_ENDING': 'programmer', 'no_control_ENDING': 'Unknown', 'dhw_control_ENDING': 'Unknown',
- 'community_heating_ENDING': 'Unknown', 'multiple_room_thermostats_ENDING': False,
- 'auxiliary_systems_ENDING': 'Unknown', 'trvs_ENDING': 'trvs', 'rate_control_ENDING': 'Unknown',
- 'glazing_type': 'double', 'glazing_type_ENDING': 'double', 'fuel_type': 'oil',
- 'main-fuel_tariff_type': 'Unknown', 'is_community': False,
- 'no_individual_heating_or_community_network': False, 'complex_fuel_type': 'Unknown',
- 'fuel_type_ENDING': 'oil', 'main-fuel_tariff_type_ENDING': 'Unknown', 'is_community_ENDING': False,
- 'no_individual_heating_or_community_network_ENDING': False, 'complex_fuel_type_ENDING': 'Unknown',
- 'estimated_perimeter_STARTING': 30.531014675946444, 'estimated_perimeter_ENDING': 30.531014675946444,
- 'HOT_WATER_ENERGY_EFF_STARTING': "Good",
- "FLOOR_ENERGY_EFF_STARTING": "Unknown",
- "WINDOWS_ENERGY_EFF_STARTING": "Good",
- "WALLS_ENERGY_EFF_STARTING": "Poor",
- "SHEATING_ENERGY_EFF_STARTING": "Unknown",
- "ROOF_ENERGY_EFF_STARTING": "Very Poor",
- "MAINHEAT_ENERGY_EFF_STARTING": "Average",
- "MAINHEATC_ENERGY_EFF_STARTING": "Good",
- "LIGHTING_ENERGY_EFF_STARTING": "Average",
- "POTENTIAL_ENERGY_EFFICIENCY": 64,
- "ENVIRONMENT_IMPACT_POTENTIAL": 53,
- "ENERGY_CONSUMPTION_POTENTIAL": 177.0,
- "CO2_EMISSIONS_POTENTIAL": 5.7,
- "HOT_WATER_ENERGY_EFF_ENDING": "Good",
- "FLOOR_ENERGY_EFF_ENDING": "Unknown",
- "WINDOWS_ENERGY_EFF_ENDING": "Good",
- "WALLS_ENERGY_EFF_ENDING": "Good",
- "SHEATING_ENERGY_EFF_ENDING": "Unknown",
- "ROOF_ENERGY_EFF_ENDING": "Very Poor",
- "MAINHEAT_ENERGY_EFF_ENDING": "Average",
- "MAINHEATC_ENERGY_EFF_ENDING": "Good",
- "LIGHTING_ENERGY_EFF_ENDING": "Average",
- }
-
- home = Property(
- id=0,
- postcode=starting_epc["postcode"],
- address=starting_epc["address1"],
- data=starting_epc
- )
- home.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
-
- data_processor = DataProcessor(None, newdata=True)
- data_processor.insert_data(pd.DataFrame([home.get_model_data()]))
-
- data_processor.pre_process()
-
- starting_epc_data = data_processor.get_component_features(suffix="_STARTING")
- ending_epc_data = data_processor.get_component_features(suffix="_ENDING")
- fixed_data = data_processor.get_fixed_features()
-
- ending_lodgement_date = '2016-12-15'
-
- ending_epc_data["DAYS_TO_ENDING"] = data_processor.calculate_days_to(ending_lodgement_date)
-
- recommendation = {
- "recommendation_id": 0,
- "new_u_value": 0.7,
- "type": "cavity_wall_insulation"
- }
-
- test_record = create_recommendation_scoring_data(
- property=home,
- recommendation=recommendation,
- starting_epc_data=starting_epc_data,
- ending_epc_data=ending_epc_data,
- fixed_data=fixed_data,
- )
- test_record = pd.DataFrame([test_record])
-
- # Test the final cleaning:
- test_record = DataProcessor.apply_averages_cleaning(
- data_to_clean=test_record,
- cleaning_data=cleaning_data,
- cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"]
- ).drop(columns=["LOCAL_AUTHORITY"])
-
- test_record = DataProcessor.clean_missings_after_description_process(
- test_record, [
- c for c in test_record.columns if
- ("thermal_transmittance" in c) or ("insulation_thickness" in c)
- ]
- )
-
- # Test that the data has been set up correctly
-
- # Things to fix:
- # [] Filled cavity should have an average insulation thickness in the cleaned data
-
- for c in test_record.columns:
- if c in ["id", "SAP_ENDING", "HEAT_DEMAND_ENDING", "CARBON_ENDING"]:
- continue
-
- if c == "FLOOR_HEIGHT_ENDING":
- assert (row[c] - test_record[c].values[0]) <= 0.020001
- continue
-
- if c == "walls_insulation_thickness_ENDING":
- assert row[c] == "average"
- assert test_record[c].values[0] == "above average"
- continue
-
- assert test_record[c].values[0] == row[c]
-
- def test_internal_wall_insulation(self, cleaned, cleaning_data, photo_supply_lookup, floor_area_decile_thresholds):
-
- starting_epc2 = {
- 'low-energy-fixed-light-count': '2', 'address': 'FLAT 12, WAREHOUSE W, 3 WESTERN GATEWAY',
- 'uprn-source': 'Energy Assessor', 'floor-height': '3.64', 'heating-cost-potential': '465',
- 'unheated-corridor-length': '', 'hot-water-cost-potential': '185',
- 'construction-age-band': 'England and Wales: 1900-1929', 'potential-energy-rating': 'C',
- 'mainheat-energy-eff': 'Very Poor', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Poor',
- 'environment-impact-potential': '51', 'glazed-type': 'double glazing installed during or after 2002',
- 'heating-cost-current': '1223', 'address3': '3 WESTERN GATEWAY',
- 'mainheatcont-description': 'Programmer and appliance thermostats', 'sheating-energy-eff': 'N/A',
- 'property-type': 'Flat', 'local-authority-label': 'Newham', 'fixed-lighting-outlets-count': '12',
- 'energy-tariff': 'off-peak 7 hour', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '342',
- 'county': '', 'postcode': 'E16 1BD', 'solar-water-heating-flag': 'N', 'constituency': 'E14001032',
- 'co2-emissions-potential': '3.6', 'number-heated-rooms': '2', 'floor-description': '(other premises below)',
- 'energy-consumption-potential': '307', 'local-authority': 'E09000025', 'built-form': 'Mid-Terrace',
- 'number-open-fireplaces': '0', 'windows-description': 'Partial double glazing', 'glazed-area': 'Normal',
- 'inspection-date': '2020-10-14', 'mains-gas-flag': 'N', 'co2-emiss-curr-per-floor-area': '66',
- 'address1': 'FLAT 12', 'heat-loss-corridor': 'heated corridor', 'flat-storey-count': '',
- 'constituency-label': 'West Ham', 'roof-energy-eff': 'N/A', 'total-floor-area': '70.0',
- 'building-reference-number': '10000539740', 'environment-impact-current': '42',
- 'co2-emissions-current': '4.6', 'roof-description': '(another dwelling above)', 'floor-energy-eff': 'N/A',
- 'number-habitable-rooms': '2', 'address2': 'WAREHOUSE W', 'hot-water-env-eff': 'Poor', 'posttown': 'LONDON',
- 'mainheatc-energy-eff': 'Good', 'main-fuel': 'electricity (not community)', 'lighting-env-eff': 'Poor',
- 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
- 'lighting-description': 'Low energy lighting in 17% of fixed outlets', 'roof-env-eff': 'N/A',
- 'walls-energy-eff': 'Very Poor', 'photo-supply': '0.0', 'lighting-cost-potential': '67',
- 'mainheat-env-eff': 'Poor', 'multi-glaze-proportion': '61', 'main-heating-controls': '',
- 'lodgement-datetime': '2020-10-14 00:00:00', 'flat-top-storey': 'N', 'current-energy-rating': 'F',
- 'secondheat-description': 'None', 'walls-env-eff': 'Very Poor', 'transaction-type': 'marketed sale',
- 'uprn': '10012839482', 'current-energy-efficiency': '33', 'energy-consumption-current': '393',
- 'mainheat-description': 'Room heaters, electric', 'lighting-cost-current': '110',
- 'lodgement-date': '2020-10-14', 'extension-count': '0', 'mainheatc-env-eff': 'Good',
- 'lmk-key': 'b0d82f468273bec55ec5676a809b8e36b55db940ffa92f482a482f6aaa38eb1d', 'wind-turbine-count': '0',
- 'tenure': 'Owner-occupied', 'floor-level': '01', 'potential-energy-efficiency': '71',
- 'hot-water-energy-eff': 'Very Poor', 'low-energy-lighting': '17',
- 'walls-description': 'Solid brick, as built, no insulation (assumed)',
- 'hotwater-description': 'Electric immersion, standard tariff'
- }
-
- row2 = {
- 'UPRN': '10012839482', 'RDSAP_CHANGE': 8, 'HEAT_DEMAND_CHANGE': -59,
- 'CARBON_CHANGE': -0.5999999999999996, 'SAP_STARTING': 33, 'SAP_ENDING': 41, 'HEAT_DEMAND_STARTING': 393,
- 'HEAT_DEMAND_ENDING': 334, 'CARBON_STARTING': 4.6, 'CARBON_ENDING': 4.0, 'PROPERTY_TYPE': 'Flat',
- 'BUILT_FORM': 'Mid-Terrace', 'CONSTITUENCY': 'E14001032', 'NUMBER_HABITABLE_ROOMS': 2.0,
- 'NUMBER_HEATED_ROOMS': 2.0, 'FIXED_LIGHTING_OUTLETS_COUNT': 12.0,
- 'CONSTRUCTION_AGE_BAND': 'England and Wales: 1996-2002', 'TRANSACTION_TYPE_STARTING': 'marketed sale',
- 'MECHANICAL_VENTILATION_STARTING': 'natural', 'SECONDHEAT_DESCRIPTION_STARTING': 'None',
- 'ENERGY_TARIFF_STARTING': 'off-peak 7 hour', 'SOLAR_WATER_HEATING_FLAG_STARTING': 'N',
- 'PHOTO_SUPPLY_STARTING': 0.0, 'GLAZED_TYPE_STARTING': 'double glazing installed during or after 2002',
- 'MULTI_GLAZE_PROPORTION_STARTING': 61.0, 'LOW_ENERGY_LIGHTING_STARTING': 17.0,
- 'NUMBER_OPEN_FIREPLACES_STARTING': 0.0, 'EXTENSION_COUNT_STARTING': 0.0,
- 'TOTAL_FLOOR_AREA_STARTING': 70.0, 'FLOOR_HEIGHT_STARTING': 3.64,
- 'TRANSACTION_TYPE_ENDING': 'marketed sale', 'MECHANICAL_VENTILATION_ENDING': 'natural',
- 'SECONDHEAT_DESCRIPTION_ENDING': 'None', 'ENERGY_TARIFF_ENDING': 'off-peak 7 hour',
- 'SOLAR_WATER_HEATING_FLAG_ENDING': 'N', 'PHOTO_SUPPLY_ENDING': 0.0,
- 'GLAZED_TYPE_ENDING': 'double glazing installed during or after 2002',
- 'MULTI_GLAZE_PROPORTION_ENDING': 61.0, 'LOW_ENERGY_LIGHTING_ENDING': 17.0,
- 'NUMBER_OPEN_FIREPLACES_ENDING': 0.0, 'EXTENSION_COUNT_ENDING': 0.0, 'TOTAL_FLOOR_AREA_ENDING': 70.0,
- 'FLOOR_HEIGHT_ENDING': 3.64, 'DAYS_TO_STARTING': 2266, 'DAYS_TO_ENDING': 2307,
- 'walls_thermal_transmittance': 1.7, 'is_cavity_wall': False, 'is_filled_cavity': False,
- 'is_solid_brick': True, 'is_system_built': False, 'is_timber_frame': False,
- 'is_granite_or_whinstone': False, 'is_as_built': True, 'is_cob': False,
- 'is_sandstone_or_limestone': False, 'is_park_home': False, 'walls_insulation_thickness': 'none',
- 'external_insulation': False, 'internal_insulation': False, 'walls_thermal_transmittance_ENDING': 0.21,
- 'is_park_home_ENDING': False, 'walls_insulation_thickness_ENDING': 'average',
- 'external_insulation_ENDING': False, 'internal_insulation_ENDING': False,
- 'floor_thermal_transmittance': 0.0, 'is_to_unheated_space': False, 'is_to_external_air': False,
- 'is_suspended': False, 'is_solid': False, 'another_property_below': True,
- 'floor_insulation_thickness': 'none', 'floor_thermal_transmittance_ENDING': 0.0,
- 'floor_insulation_thickness_ENDING': 'none', 'roof_thermal_transmittance': 0.0, 'is_pitched': False,
- 'is_roof_room': False, 'is_loft': False, 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False,
- 'has_dwelling_above': True, 'roof_insulation_thickness': 'none',
- 'roof_thermal_transmittance_ENDING': 0.0, 'roof_insulation_thickness_ENDING': 'none',
- 'heater_type': 'electric immersion', 'system_type': 'Unknown', 'thermostat_characteristics': 'Unknown',
- 'heating_scope': 'Unknown', 'energy_recovery': 'Unknown', 'hotwater_tariff_type': 'standard tariff',
- 'extra_features': 'Unknown', 'chp_systems': 'Unknown', 'distribution_system': 'Unknown',
- 'no_system_present': 'Unknown', 'appliance': 'Unknown', 'heater_type_ENDING': 'electric immersion',
- 'system_type_ENDING': 'Unknown', 'thermostat_characteristics_ENDING': 'Unknown',
- 'heating_scope_ENDING': 'Unknown', 'energy_recovery_ENDING': 'Unknown',
- 'hotwater_tariff_type_ENDING': 'standard tariff', 'extra_features_ENDING': 'Unknown',
- 'chp_systems_ENDING': 'Unknown', 'distribution_system_ENDING': 'Unknown',
- 'no_system_present_ENDING': 'Unknown', 'appliance_ENDING': 'Unknown', 'has_radiators': False,
- 'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False,
- 'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': False,
- 'has_air_source_heat_pump': False, 'has_room_heaters': True, 'has_electric_storage_heaters': False,
- 'has_warm_air': False, 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False,
- 'has_community_scheme': False, 'has_ground_source_heat_pump': False, 'has_no_system_present': False,
- 'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False,
- 'has_electric_heat_pump': False, 'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False,
- 'has_exhaust_source_heat_pump': False, 'has_community_heat_pump': False, 'has_electric': True,
- 'has_mains_gas': False, 'has_wood_logs': False, 'has_coal': False, 'has_oil': False,
- 'has_wood_pellets': False, 'has_anthracite': False, 'has_dual_fuel_mineral_and_wood': False,
- 'has_smokeless_fuel': False, 'has_lpg': False, 'has_b30k': False, 'has_electricaire': False,
- 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False, 'has_radiators_ENDING': False,
- 'has_fan_coil_units_ENDING': False, 'has_pipes_in_screed_above_insulation_ENDING': False,
- 'has_pipes_in_insulated_timber_floor_ENDING': False, 'has_pipes_in_concrete_slab_ENDING': False,
- 'has_boiler_ENDING': False, 'has_air_source_heat_pump_ENDING': False, 'has_room_heaters_ENDING': True,
- 'has_electric_storage_heaters_ENDING': False, 'has_warm_air_ENDING': False,
- 'has_electric_underfloor_heating_ENDING': False, 'has_electric_ceiling_heating_ENDING': False,
- 'has_community_scheme_ENDING': False, 'has_ground_source_heat_pump_ENDING': False,
- 'has_no_system_present_ENDING': False, 'has_portable_electric_heaters_ENDING': False,
- 'has_water_source_heat_pump_ENDING': False, 'has_electric_heat_pump_ENDING': False,
- 'has_micro-cogeneration_ENDING': False, 'has_solar_assisted_heat_pump_ENDING': False,
- 'has_exhaust_source_heat_pump_ENDING': False, 'has_community_heat_pump_ENDING': False,
- 'has_electric_ENDING': True, 'has_mains_gas_ENDING': False, 'has_wood_logs_ENDING': False,
- 'has_coal_ENDING': False, 'has_oil_ENDING': False, 'has_wood_pellets_ENDING': False,
- 'has_anthracite_ENDING': False, 'has_dual_fuel_mineral_and_wood_ENDING': False,
- 'has_smokeless_fuel_ENDING': False, 'has_lpg_ENDING': False, 'has_b30k_ENDING': False,
- 'has_electricaire_ENDING': False, 'has_assumed_for_most_rooms_ENDING': False,
- 'has_underfloor_heating_ENDING': False, 'thermostatic_control': 'appliance thermostats',
- 'charging_system': 'Unknown', 'switch_system': 'programmer', 'no_control': 'Unknown',
- 'dhw_control': 'Unknown', 'community_heating': 'Unknown', 'multiple_room_thermostats': False,
- 'auxiliary_systems': 'Unknown', 'trvs': 'Unknown', 'rate_control': 'Unknown',
- 'thermostatic_control_ENDING': 'appliance thermostats', 'charging_system_ENDING': 'Unknown',
- 'switch_system_ENDING': 'programmer', 'no_control_ENDING': 'Unknown', 'dhw_control_ENDING': 'Unknown',
- 'community_heating_ENDING': 'Unknown', 'multiple_room_thermostats_ENDING': False,
- 'auxiliary_systems_ENDING': 'Unknown', 'trvs_ENDING': 'Unknown', 'rate_control_ENDING': 'Unknown',
- 'glazing_type': 'double', 'glazing_type_ENDING': 'double', 'fuel_type': 'electricity',
- 'main-fuel_tariff_type': 'Unknown', 'is_community': False,
- 'no_individual_heating_or_community_network': False, 'complex_fuel_type': 'Unknown',
- 'fuel_type_ENDING': 'electricity', 'main-fuel_tariff_type_ENDING': 'Unknown',
- 'is_community_ENDING': False, 'no_individual_heating_or_community_network_ENDING': False,
- 'complex_fuel_type_ENDING': 'Unknown', 'estimated_perimeter_STARTING': 35.4964786985977,
- 'estimated_perimeter_ENDING': 35.4964786985977,
- 'HOT_WATER_ENERGY_EFF_STARTING': "Very Poor",
- "FLOOR_ENERGY_EFF_STARTING": "Unknown",
- "WINDOWS_ENERGY_EFF_STARTING": "Average",
- "WALLS_ENERGY_EFF_STARTING": "Very Poor",
- "SHEATING_ENERGY_EFF_STARTING": "Unknown",
- "ROOF_ENERGY_EFF_STARTING": "Unknown",
- "MAINHEAT_ENERGY_EFF_STARTING": "Very Poor",
- "MAINHEATC_ENERGY_EFF_STARTING": "Good",
- "LIGHTING_ENERGY_EFF_STARTING": "Poor",
- "POTENTIAL_ENERGY_EFFICIENCY": 71,
- "ENVIRONMENT_IMPACT_POTENTIAL": 51,
- "ENERGY_CONSUMPTION_POTENTIAL": 307,
- "CO2_EMISSIONS_POTENTIAL": 3.6,
- 'HOT_WATER_ENERGY_EFF_ENDING': "Very Poor",
- "FLOOR_ENERGY_EFF_ENDING": "Unknown",
- "WINDOWS_ENERGY_EFF_ENDING": "Average",
- "WALLS_ENERGY_EFF_ENDING": "Good",
- "SHEATING_ENERGY_EFF_ENDING": "Unknown",
- "ROOF_ENERGY_EFF_ENDING": "Unknown",
- "MAINHEAT_ENERGY_EFF_ENDING": "Very Poor",
- "MAINHEATC_ENERGY_EFF_ENDING": "Good",
- "LIGHTING_ENERGY_EFF_ENDING": "Poor",
- }
-
- home2 = Property(
- id=0,
- postcode=starting_epc2["postcode"],
- address=starting_epc2["address1"],
- data=starting_epc2
- )
- home2.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
- home2.set_number_lighting_outlets(None)
-
- data_processor2 = DataProcessor(None, newdata=True)
- data_processor2.insert_data(pd.DataFrame([home2.get_model_data()]))
-
- data_processor2.pre_process()
-
- starting_epc_data2 = data_processor2.get_component_features(suffix="_STARTING")
- ending_epc_data2 = data_processor2.get_component_features(suffix="_ENDING")
- fixed_data2 = data_processor2.get_fixed_features()
-
- ending_lodgement_date2 = '2020-11-24'
-
- ending_epc_data2["DAYS_TO_ENDING"] = data_processor2.calculate_days_to(ending_lodgement_date2)
-
- recommendation2 = {
- "recommendation_id": 0,
- "new_u_value": 0.21,
- "type": "internal_wall_insulation"
- }
-
- test_record2 = create_recommendation_scoring_data(
- property=home2,
- recommendation=recommendation2,
- starting_epc_data=starting_epc_data2,
- ending_epc_data=ending_epc_data2,
- fixed_data=fixed_data2,
- )
- test_record2 = pd.DataFrame([test_record2])
-
- # Test the final cleaning:
- test_record2 = DataProcessor.apply_averages_cleaning(
- data_to_clean=test_record2,
- cleaning_data=cleaning_data,
- cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"]
- ).drop(columns=["LOCAL_AUTHORITY"])
-
- test_record2 = DataProcessor.clean_missings_after_description_process(
- test_record2, [
- c for c in test_record2.columns if
- ("thermal_transmittance" in c) or ("insulation_thickness" in c)
- ]
- )
-
- for c in test_record2.columns:
- if c in ["id", "SAP_ENDING", "HEAT_DEMAND_ENDING", "CARBON_ENDING"]:
- continue
-
- if c == "FLOOR_HEIGHT_ENDING":
- assert (row2[c] - test_record2[c].values[0]) <= 0.020001
- continue
-
- if c == "walls_insulation_thickness_ENDING":
- assert row2[c] == "average"
- assert test_record2[c].values[0] == "above average"
- continue
-
- if c == "CONSTRUCTION_AGE_BAND":
- # For this, we have different values in the original data
- assert row2[c] == "England and Wales: 1996-2002"
- assert test_record2[c].values[0] == "England and Wales: 1900-1929"
- continue
-
- assert test_record2[c].values[0] == row2[c]
-
- def test_ventilation(self, cleaned, cleaning_data, photo_supply_lookup, floor_area_decile_thresholds):
-
- starting_epc3 = {
- 'low-energy-fixed-light-count': '', 'address': '45 Shepperson Road', 'uprn-source': 'Energy Assessor',
- 'floor-height': '1.87', 'heating-cost-potential': '645', 'unheated-corridor-length': '',
- 'hot-water-cost-potential': '69', 'construction-age-band': 'England and Wales: 1900-1929',
- 'potential-energy-rating': 'C', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average',
- 'lighting-energy-eff': 'Average', 'environment-impact-potential': '75',
- 'glazed-type': 'double glazing, unknown install date', 'heating-cost-current': '1028', 'address3': '',
- 'mainheatcont-description': 'Programmer, TRVs and bypass', 'sheating-energy-eff': 'N/A',
- 'property-type': 'House', 'local-authority-label': 'Sheffield', 'fixed-lighting-outlets-count': '21',
- 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '96',
- 'county': '', 'postcode': 'S6 4FG', 'solar-water-heating-flag': 'N', 'constituency': 'E14000921',
- 'co2-emissions-potential': '2.9', 'number-heated-rooms': '5',
- 'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '152',
- 'local-authority': 'E08000019', 'built-form': 'Enclosed Mid-Terrace', 'number-open-fireplaces': '0',
- 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2022-06-13',
- 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '59', 'address1': '45 Shepperson Road',
- 'heat-loss-corridor': '', 'flat-storey-count': '',
- 'constituency-label': 'Sheffield, Brightside and Hillsborough', 'roof-energy-eff': 'Very Poor',
- 'total-floor-area': '107.0', 'building-reference-number': '10002892085', 'environment-impact-current': '46',
- 'co2-emissions-current': '6.3', 'roof-description': 'Pitched, no insulation (assumed)',
- 'floor-energy-eff': 'N/A', 'number-habitable-rooms': '5', 'address2': '', 'hot-water-env-eff': 'Good',
- 'posttown': 'SHEFFIELD', 'mainheatc-energy-eff': 'Average', 'main-fuel': 'mains gas (not community)',
- 'lighting-env-eff': 'Average', 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A',
- 'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in 43% of fixed outlets',
- 'roof-env-eff': 'Very Poor', 'walls-energy-eff': 'Very Poor', 'photo-supply': '0.0',
- 'lighting-cost-potential': '83', 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100',
- 'main-heating-controls': '', 'lodgement-datetime': '2023-05-27 12:15:21', 'flat-top-storey': '',
- 'current-energy-rating': 'E', 'secondheat-description': 'None', 'walls-env-eff': 'Very Poor',
- 'transaction-type': 'marketed sale', 'uprn': '100051073214', 'current-energy-efficiency': '54',
- 'energy-consumption-current': '335', 'mainheat-description': 'Boiler and radiators, mains gas',
- 'lighting-cost-current': '131', 'lodgement-date': '2023-05-27', 'extension-count': '1',
- 'mainheatc-env-eff': 'Average',
- 'lmk-key': 'dc1a4da246562656132b8e36e0534cd90b09fa40fc584e25e644e2d9ab86a247', 'wind-turbine-count': '0',
- 'tenure': 'Not defined - use in the case of a new dwelling for which the intended tenure in not known. It '
- 'is not to be used for an existing dwelling',
- 'floor-level': '', 'potential-energy-efficiency': '80', 'hot-water-energy-eff': 'Good',
- 'low-energy-lighting': '43',
- 'walls-description': 'Sandstone or limestone, as built, no insulation (assumed)',
- 'hotwater-description': 'From main system'
- }
-
- row3 = {
- 'UPRN': '100051073214', 'RDSAP_CHANGE': 2, 'HEAT_DEMAND_CHANGE': -22, 'CARBON_CHANGE': -0.39999999999999947,
- 'SAP_STARTING': 54, 'SAP_ENDING': 56, 'HEAT_DEMAND_STARTING': 335, 'HEAT_DEMAND_ENDING': 313,
- 'CARBON_STARTING': 6.3, 'CARBON_ENDING': 5.9, 'PROPERTY_TYPE': 'House', 'BUILT_FORM': 'Mid-Terrace',
- 'CONSTITUENCY': 'E14000921', 'NUMBER_HABITABLE_ROOMS': 5.0, 'NUMBER_HEATED_ROOMS': 5.0,
- 'FIXED_LIGHTING_OUTLETS_COUNT': 21.0, 'CONSTRUCTION_AGE_BAND': 'England and Wales: 1900-1929',
- 'TRANSACTION_TYPE_STARTING': 'marketed sale', 'MECHANICAL_VENTILATION_STARTING': 'natural',
- 'SECONDHEAT_DESCRIPTION_STARTING': 'None', 'ENERGY_TARIFF_STARTING': 'Single',
- 'SOLAR_WATER_HEATING_FLAG_STARTING': 'N', 'PHOTO_SUPPLY_STARTING': 0.0,
- 'GLAZED_TYPE_STARTING': 'double glazing, unknown install date', 'MULTI_GLAZE_PROPORTION_STARTING': 100.0,
- 'LOW_ENERGY_LIGHTING_STARTING': 43.0, 'NUMBER_OPEN_FIREPLACES_STARTING': 0.0,
- 'EXTENSION_COUNT_STARTING': 1.0, 'TOTAL_FLOOR_AREA_STARTING': 107.0, 'FLOOR_HEIGHT_STARTING': 1.87,
- 'TRANSACTION_TYPE_ENDING': 'marketed sale', 'MECHANICAL_VENTILATION_ENDING': 'mechanical, extract only',
- 'SECONDHEAT_DESCRIPTION_ENDING': 'None', 'ENERGY_TARIFF_ENDING': 'Single',
- 'SOLAR_WATER_HEATING_FLAG_ENDING': 'N', 'PHOTO_SUPPLY_ENDING': 0.0,
- 'GLAZED_TYPE_ENDING': 'double glazing, unknown install date', 'MULTI_GLAZE_PROPORTION_ENDING': 100.0,
- 'LOW_ENERGY_LIGHTING_ENDING': 43.0, 'NUMBER_OPEN_FIREPLACES_ENDING': 0.0, 'EXTENSION_COUNT_ENDING': 1.0,
- 'TOTAL_FLOOR_AREA_ENDING': 107.0, 'FLOOR_HEIGHT_ENDING': 1.87, 'DAYS_TO_STARTING': 3221,
- 'DAYS_TO_ENDING': 2874, 'walls_thermal_transmittance': 2.0, 'is_cavity_wall': False,
- 'is_filled_cavity': False, 'is_solid_brick': False, 'is_system_built': False, 'is_timber_frame': False,
- 'is_granite_or_whinstone': False, 'is_as_built': True, 'is_cob': False, 'is_sandstone_or_limestone': True,
- 'is_park_home': False, 'walls_insulation_thickness': 'none', 'external_insulation': False,
- 'internal_insulation': False, 'walls_thermal_transmittance_ENDING': 2.0, 'is_park_home_ENDING': False,
- 'walls_insulation_thickness_ENDING': 'none', 'external_insulation_ENDING': False,
- 'internal_insulation_ENDING': False, 'floor_thermal_transmittance': 0.51, 'is_to_unheated_space': False,
- 'is_to_external_air': False, 'is_suspended': True, 'is_solid': False, 'another_property_below': False,
- 'floor_insulation_thickness': 'none', 'floor_thermal_transmittance_ENDING': 0.51,
- 'floor_insulation_thickness_ENDING': 'none', 'roof_thermal_transmittance': 2.3, 'is_pitched': True,
- 'is_roof_room': False, 'is_loft': False, 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False,
- 'has_dwelling_above': False, 'roof_insulation_thickness': 'none', 'roof_thermal_transmittance_ENDING': 2.3,
- 'roof_insulation_thickness_ENDING': 'none', 'heater_type': 'Unknown', 'system_type': 'from main system',
- 'thermostat_characteristics': 'Unknown', 'heating_scope': 'Unknown', 'energy_recovery': 'Unknown',
- 'hotwater_tariff_type': 'Unknown', 'extra_features': 'Unknown', 'chp_systems': 'Unknown',
- 'distribution_system': 'Unknown', 'no_system_present': 'Unknown', 'appliance': 'Unknown',
- 'heater_type_ENDING': 'Unknown', 'system_type_ENDING': 'from main system',
- 'thermostat_characteristics_ENDING': 'Unknown', 'heating_scope_ENDING': 'Unknown',
- 'energy_recovery_ENDING': 'Unknown', 'hotwater_tariff_type_ENDING': 'Unknown',
- 'extra_features_ENDING': 'Unknown', 'chp_systems_ENDING': 'Unknown',
- 'distribution_system_ENDING': 'Unknown', 'no_system_present_ENDING': 'Unknown',
- 'appliance_ENDING': 'Unknown', 'has_radiators': True, 'has_fan_coil_units': False,
- 'has_pipes_in_screed_above_insulation': False, 'has_pipes_in_insulated_timber_floor': False,
- 'has_pipes_in_concrete_slab': False, 'has_boiler': True, 'has_air_source_heat_pump': False,
- 'has_room_heaters': False, 'has_electric_storage_heaters': False, 'has_warm_air': False,
- 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False,
- 'has_community_scheme': False, 'has_ground_source_heat_pump': False, 'has_no_system_present': False,
- 'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False,
- 'has_electric_heat_pump': False, 'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False,
- 'has_exhaust_source_heat_pump': False, 'has_community_heat_pump': False, 'has_electric': False,
- 'has_mains_gas': True, 'has_wood_logs': False, 'has_coal': False, 'has_oil': False,
- 'has_wood_pellets': False, 'has_anthracite': False, 'has_dual_fuel_mineral_and_wood': False,
- 'has_smokeless_fuel': False, 'has_lpg': False, 'has_b30k': False, 'has_electricaire': False,
- 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False, 'has_radiators_ENDING': True,
- 'has_fan_coil_units_ENDING': False, 'has_pipes_in_screed_above_insulation_ENDING': False,
- 'has_pipes_in_insulated_timber_floor_ENDING': False, 'has_pipes_in_concrete_slab_ENDING': False,
- 'has_boiler_ENDING': True, 'has_air_source_heat_pump_ENDING': False, 'has_room_heaters_ENDING': False,
- 'has_electric_storage_heaters_ENDING': False, 'has_warm_air_ENDING': False,
- 'has_electric_underfloor_heating_ENDING': False, 'has_electric_ceiling_heating_ENDING': False,
- 'has_community_scheme_ENDING': False, 'has_ground_source_heat_pump_ENDING': False,
- 'has_no_system_present_ENDING': False, 'has_portable_electric_heaters_ENDING': False,
- 'has_water_source_heat_pump_ENDING': False, 'has_electric_heat_pump_ENDING': False,
- 'has_micro-cogeneration_ENDING': False, 'has_solar_assisted_heat_pump_ENDING': False,
- 'has_exhaust_source_heat_pump_ENDING': False, 'has_community_heat_pump_ENDING': False,
- 'has_electric_ENDING': False, 'has_mains_gas_ENDING': True, 'has_wood_logs_ENDING': False,
- 'has_coal_ENDING': False, 'has_oil_ENDING': False, 'has_wood_pellets_ENDING': False,
- 'has_anthracite_ENDING': False, 'has_dual_fuel_mineral_and_wood_ENDING': False,
- 'has_smokeless_fuel_ENDING': False, 'has_lpg_ENDING': False, 'has_b30k_ENDING': False,
- 'has_electricaire_ENDING': False, 'has_assumed_for_most_rooms_ENDING': False,
- 'has_underfloor_heating_ENDING': False, 'thermostatic_control': 'Unknown', 'charging_system': 'Unknown',
- 'switch_system': 'programmer', 'no_control': 'Unknown', 'dhw_control': 'Unknown',
- 'community_heating': 'Unknown', 'multiple_room_thermostats': False, 'auxiliary_systems': 'bypass',
- 'trvs': 'trvs', 'rate_control': 'Unknown', 'thermostatic_control_ENDING': 'Unknown',
- 'charging_system_ENDING': 'Unknown', 'switch_system_ENDING': 'programmer', 'no_control_ENDING': 'Unknown',
- 'dhw_control_ENDING': 'Unknown', 'community_heating_ENDING': 'Unknown',
- 'multiple_room_thermostats_ENDING': False, 'auxiliary_systems_ENDING': 'bypass', 'trvs_ENDING': 'trvs',
- 'rate_control_ENDING': 'Unknown', 'glazing_type': 'double', 'glazing_type_ENDING': 'double',
- 'fuel_type': 'mains gas', 'main-fuel_tariff_type': 'Unknown', 'is_community': False,
- 'no_individual_heating_or_community_network': False, 'complex_fuel_type': 'Unknown',
- 'fuel_type_ENDING': 'mains gas', 'main-fuel_tariff_type_ENDING': 'Unknown', 'is_community_ENDING': False,
- 'no_individual_heating_or_community_network_ENDING': False, 'complex_fuel_type_ENDING': 'Unknown',
- 'estimated_perimeter_STARTING': 30.06908711617298, 'estimated_perimeter_ENDING': 30.06908711617298,
- 'HOT_WATER_ENERGY_EFF_STARTING': "Good",
- "FLOOR_ENERGY_EFF_STARTING": "Unknown",
- "WINDOWS_ENERGY_EFF_STARTING": "Average",
- "WALLS_ENERGY_EFF_STARTING": "Very Poor",
- "SHEATING_ENERGY_EFF_STARTING": "Unknown",
- "ROOF_ENERGY_EFF_STARTING": "Very Poor",
- "MAINHEAT_ENERGY_EFF_STARTING": "Good",
- "MAINHEATC_ENERGY_EFF_STARTING": "Average",
- "LIGHTING_ENERGY_EFF_STARTING": "Average",
- "POTENTIAL_ENERGY_EFFICIENCY": 80,
- "ENVIRONMENT_IMPACT_POTENTIAL": 75,
- "ENERGY_CONSUMPTION_POTENTIAL": 152,
- "CO2_EMISSIONS_POTENTIAL": 2.9,
- 'HOT_WATER_ENERGY_EFF_ENDING': "Good",
- "FLOOR_ENERGY_EFF_ENDING": "Unknown",
- "WINDOWS_ENERGY_EFF_ENDING": "Average",
- "WALLS_ENERGY_EFF_ENDING": "Very Poor",
- "SHEATING_ENERGY_EFF_ENDING": "Unknown",
- "ROOF_ENERGY_EFF_ENDING": "Very Poor",
- "MAINHEAT_ENERGY_EFF_ENDING": "Good",
- "MAINHEATC_ENERGY_EFF_ENDING": "Average",
- "LIGHTING_ENERGY_EFF_ENDING": "Average",
- }
-
- home3 = Property(
- id=0,
- postcode=starting_epc3["postcode"],
- address=starting_epc3["address1"],
- data=starting_epc3
- )
- home3.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
- home3.set_number_lighting_outlets(None)
-
- data_processor3 = DataProcessor(None, newdata=True)
- data_processor3.insert_data(pd.DataFrame([home3.get_model_data()]))
-
- data_processor3.pre_process()
-
- starting_epc_data3 = data_processor3.get_component_features(suffix="_STARTING")
- ending_epc_data3 = data_processor3.get_component_features(suffix="_ENDING")
- fixed_data3 = data_processor3.get_fixed_features()
-
- ending_lodgement_date3 = '2022-06-14'
-
- ending_epc_data3["DAYS_TO_ENDING"] = data_processor3.calculate_days_to(ending_lodgement_date3)
-
- recommendation3 = {
- "recommendation_id": 0,
- "type": "mechanical_ventilation"
- }
-
- test_record3 = create_recommendation_scoring_data(
- property=home3,
- recommendation=recommendation3,
- starting_epc_data=starting_epc_data3,
- ending_epc_data=ending_epc_data3,
- fixed_data=fixed_data3,
- )
- test_record3 = pd.DataFrame([test_record3])
-
- # Test the final cleaning:
- test_record3 = DataProcessor.apply_averages_cleaning(
- data_to_clean=test_record3,
- cleaning_data=cleaning_data,
- cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"]
- ).drop(columns=["LOCAL_AUTHORITY"])
-
- test_record3 = DataProcessor.clean_missings_after_description_process(
- test_record3, [
- c for c in test_record3.columns if
- ("thermal_transmittance" in c) or ("insulation_thickness" in c)
- ]
- )
-
- for c in test_record3.columns:
- if c in ["id", "SAP_ENDING", "HEAT_DEMAND_ENDING", "CARBON_ENDING"]:
- continue
-
- assert test_record3[c].values[0] == row3[c]
-
- def test_fireplaces(self, cleaned, cleaning_data, photo_supply_lookup, floor_area_decile_thresholds):
-
- starting_epc4 = {
- 'low-energy-fixed-light-count': '', 'address': '9 Glebe Road, Asfordby Hill',
- 'uprn-source': 'Energy Assessor', 'floor-height': '2.4', 'heating-cost-potential': '501',
- 'unheated-corridor-length': '', 'hot-water-cost-potential': '70',
- 'construction-age-band': 'England and Wales: 1930-1949', 'potential-energy-rating': 'C',
- 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Average',
- 'environment-impact-potential': '76', 'glazed-type': 'double glazing, unknown install date',
- 'heating-cost-current': '723', 'address3': '',
- 'mainheatcont-description': 'Programmer and room thermostat', 'sheating-energy-eff': 'N/A',
- 'property-type': 'House', 'local-authority-label': 'Melton',
- 'fixed-lighting-outlets-count': '14', 'energy-tariff': 'dual',
- 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '98',
- 'county': 'Leicestershire', 'postcode': 'LE14 3QT', 'solar-water-heating-flag': 'N',
- 'constituency': 'E14000909', 'co2-emissions-potential': '2.4', 'number-heated-rooms': '5',
- 'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '153',
- 'local-authority': 'E07000133', 'built-form': 'Semi-Detached', 'number-open-fireplaces': '1',
- 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal',
- 'inspection-date': '2022-06-27', 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '46',
- 'address1': '9 Glebe Road', 'heat-loss-corridor': '', 'flat-storey-count': '',
- 'constituency-label': 'Rutland and Melton', 'roof-energy-eff': 'Good',
- 'total-floor-area': '87.0', 'building-reference-number': '10002396876',
- 'environment-impact-current': '60', 'co2-emissions-current': '4.0',
- 'roof-description': 'Pitched, 200 mm loft insulation', 'floor-energy-eff': 'N/A',
- 'number-habitable-rooms': '5', 'address2': 'Asfordby Hill', 'hot-water-env-eff': 'Good',
- 'posttown': 'MELTON MOWBRAY', 'mainheatc-energy-eff': 'Average',
- 'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Average',
- 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
- 'lighting-description': 'Low energy lighting in 29% of fixed outlets', 'roof-env-eff': 'Good',
- 'walls-energy-eff': 'Very Poor', 'photo-supply': '15.0', 'lighting-cost-potential': '79',
- 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
- 'lodgement-datetime': '2022-06-27 15:28:18', 'flat-top-storey': '',
- 'current-energy-rating': 'D',
- 'secondheat-description': 'Room heaters, dual fuel (mineral and wood)',
- 'walls-env-eff': 'Very Poor', 'transaction-type': 'ECO assessment', 'uprn': '100030539619',
- 'current-energy-efficiency': '66', 'energy-consumption-current': '256',
- 'mainheat-description': 'Boiler and radiators, mains gas', 'lighting-cost-current': '135',
- 'lodgement-date': '2022-06-27', 'extension-count': '1', 'mainheatc-env-eff': 'Average',
- 'lmk-key': '736b6f4803a11d9e45b49bf98f36eb8a7f357b0dd24f3e7cddef5295518e5bef',
- 'wind-turbine-count': '0', 'tenure': 'Owner-occupied', 'floor-level': '',
- 'potential-energy-efficiency': '78', 'hot-water-energy-eff': 'Good',
- 'low-energy-lighting': '29',
- 'walls-description': 'Solid brick, as built, no insulation (assumed)',
- 'hotwater-description': 'From main system'
- }
-
- row4 = {
- 'UPRN': '100030539619', 'RDSAP_CHANGE': 7, 'HEAT_DEMAND_CHANGE': -41, 'CARBON_CHANGE': -0.5,
- 'SAP_STARTING': 66, 'SAP_ENDING': 73, 'HEAT_DEMAND_STARTING': 256, 'HEAT_DEMAND_ENDING': 215,
- 'CARBON_STARTING': 4.0, 'CARBON_ENDING': 3.5, 'PROPERTY_TYPE': 'House', 'BUILT_FORM': 'Semi-Detached',
- 'CONSTITUENCY': 'E14000909', 'NUMBER_HABITABLE_ROOMS': 5.0, 'NUMBER_HEATED_ROOMS': 5.0,
- 'FIXED_LIGHTING_OUTLETS_COUNT': 14.0, 'CONSTRUCTION_AGE_BAND': 'England and Wales: 1930-1949',
- 'TRANSACTION_TYPE_STARTING': 'eco assessment', 'MECHANICAL_VENTILATION_STARTING': 'natural',
- 'SECONDHEAT_DESCRIPTION_STARTING': 'Room heaters, dual fuel (mineral and wood)',
- 'ENERGY_TARIFF_STARTING': 'dual', 'SOLAR_WATER_HEATING_FLAG_STARTING': 'N', 'PHOTO_SUPPLY_STARTING': 15.0,
- 'GLAZED_TYPE_STARTING': 'double glazing, unknown install date', 'MULTI_GLAZE_PROPORTION_STARTING': 100.0,
- 'LOW_ENERGY_LIGHTING_STARTING': 29.0, 'NUMBER_OPEN_FIREPLACES_STARTING': 1.0,
- 'EXTENSION_COUNT_STARTING': 1.0, 'TOTAL_FLOOR_AREA_STARTING': 87.0, 'FLOOR_HEIGHT_STARTING': 2.4,
- 'TRANSACTION_TYPE_ENDING': 'eco assessment', 'MECHANICAL_VENTILATION_ENDING': 'natural',
- 'SECONDHEAT_DESCRIPTION_ENDING': 'Room heaters, dual fuel (mineral and wood)',
- 'ENERGY_TARIFF_ENDING': 'dual', 'SOLAR_WATER_HEATING_FLAG_ENDING': 'N', 'PHOTO_SUPPLY_ENDING': 15.0,
- 'GLAZED_TYPE_ENDING': 'double glazing, unknown install date', 'MULTI_GLAZE_PROPORTION_ENDING': 100.0,
- 'LOW_ENERGY_LIGHTING_ENDING': 29.0, 'NUMBER_OPEN_FIREPLACES_ENDING': 0, 'EXTENSION_COUNT_ENDING': 1.0,
- 'TOTAL_FLOOR_AREA_ENDING': 87.0, 'FLOOR_HEIGHT_ENDING': 2.4, 'DAYS_TO_STARTING': 2887,
- 'DAYS_TO_ENDING': 2960, 'walls_thermal_transmittance': 1.7, 'is_cavity_wall': False,
- 'is_filled_cavity': False, 'is_solid_brick': True, 'is_system_built': False, 'is_timber_frame': False,
- 'is_granite_or_whinstone': False, 'is_as_built': True, 'is_cob': False, 'is_sandstone_or_limestone': False,
- 'is_park_home': False, 'walls_insulation_thickness': 'none', 'external_insulation': False,
- 'internal_insulation': False, 'walls_thermal_transmittance_ENDING': 1.7, 'is_park_home_ENDING': False,
- 'walls_insulation_thickness_ENDING': 'none', 'external_insulation_ENDING': False,
- 'internal_insulation_ENDING': False, 'floor_thermal_transmittance': 0.53, 'is_to_unheated_space': False,
- 'is_to_external_air': False, 'is_suspended': False, 'is_solid': True, 'another_property_below': False,
- 'floor_insulation_thickness': 'none', 'floor_thermal_transmittance_ENDING': 0.53,
- 'floor_insulation_thickness_ENDING': 'none', 'roof_thermal_transmittance': 0.21, 'is_pitched': True,
- 'is_roof_room': False, 'is_loft': True, 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False,
- 'has_dwelling_above': False, 'roof_insulation_thickness': '200', 'roof_thermal_transmittance_ENDING': 0.21,
- 'roof_insulation_thickness_ENDING': '200', 'heater_type': 'Unknown', 'system_type': 'from main system',
- 'thermostat_characteristics': 'Unknown', 'heating_scope': 'Unknown', 'energy_recovery': 'Unknown',
- 'hotwater_tariff_type': 'Unknown', 'extra_features': 'Unknown', 'chp_systems': 'Unknown',
- 'distribution_system': 'Unknown', 'no_system_present': 'Unknown', 'appliance': 'Unknown',
- 'heater_type_ENDING': 'Unknown', 'system_type_ENDING': 'from main system',
- 'thermostat_characteristics_ENDING': 'Unknown', 'heating_scope_ENDING': 'Unknown',
- 'energy_recovery_ENDING': 'Unknown', 'hotwater_tariff_type_ENDING': 'Unknown',
- 'extra_features_ENDING': 'Unknown', 'chp_systems_ENDING': 'Unknown',
- 'distribution_system_ENDING': 'Unknown', 'no_system_present_ENDING': 'Unknown',
- 'appliance_ENDING': 'Unknown', 'has_radiators': True, 'has_fan_coil_units': False,
- 'has_pipes_in_screed_above_insulation': False, 'has_pipes_in_insulated_timber_floor': False,
- 'has_pipes_in_concrete_slab': False, 'has_boiler': True, 'has_air_source_heat_pump': False,
- 'has_room_heaters': False, 'has_electric_storage_heaters': False, 'has_warm_air': False,
- 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False,
- 'has_community_scheme': False, 'has_ground_source_heat_pump': False, 'has_no_system_present': False,
- 'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False,
- 'has_electric_heat_pump': False, 'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False,
- 'has_exhaust_source_heat_pump': False, 'has_community_heat_pump': False, 'has_electric': False,
- 'has_mains_gas': True, 'has_wood_logs': False, 'has_coal': False, 'has_oil': False,
- 'has_wood_pellets': False, 'has_anthracite': False, 'has_dual_fuel_mineral_and_wood': False,
- 'has_smokeless_fuel': False, 'has_lpg': False, 'has_b30k': False, 'has_electricaire': False,
- 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False, 'has_radiators_ENDING': True,
- 'has_fan_coil_units_ENDING': False, 'has_pipes_in_screed_above_insulation_ENDING': False,
- 'has_pipes_in_insulated_timber_floor_ENDING': False, 'has_pipes_in_concrete_slab_ENDING': False,
- 'has_boiler_ENDING': True, 'has_air_source_heat_pump_ENDING': False, 'has_room_heaters_ENDING': False,
- 'has_electric_storage_heaters_ENDING': False, 'has_warm_air_ENDING': False,
- 'has_electric_underfloor_heating_ENDING': False, 'has_electric_ceiling_heating_ENDING': False,
- 'has_community_scheme_ENDING': False, 'has_ground_source_heat_pump_ENDING': False,
- 'has_no_system_present_ENDING': False, 'has_portable_electric_heaters_ENDING': False,
- 'has_water_source_heat_pump_ENDING': False, 'has_electric_heat_pump_ENDING': False,
- 'has_micro-cogeneration_ENDING': False, 'has_solar_assisted_heat_pump_ENDING': False,
- 'has_exhaust_source_heat_pump_ENDING': False, 'has_community_heat_pump_ENDING': False,
- 'has_electric_ENDING': False, 'has_mains_gas_ENDING': True, 'has_wood_logs_ENDING': False,
- 'has_coal_ENDING': False, 'has_oil_ENDING': False, 'has_wood_pellets_ENDING': False,
- 'has_anthracite_ENDING': False, 'has_dual_fuel_mineral_and_wood_ENDING': False,
- 'has_smokeless_fuel_ENDING': False, 'has_lpg_ENDING': False, 'has_b30k_ENDING': False,
- 'has_electricaire_ENDING': False, 'has_assumed_for_most_rooms_ENDING': False,
- 'has_underfloor_heating_ENDING': False, 'thermostatic_control': 'room thermostat',
- 'charging_system': 'Unknown', 'switch_system': 'programmer', 'no_control': 'Unknown',
- 'dhw_control': 'Unknown', 'community_heating': 'Unknown', 'multiple_room_thermostats': False,
- 'auxiliary_systems': 'Unknown', 'trvs': 'Unknown', 'rate_control': 'Unknown',
- 'thermostatic_control_ENDING': 'room thermostat', 'charging_system_ENDING': 'Unknown',
- 'switch_system_ENDING': 'programmer', 'no_control_ENDING': 'Unknown', 'dhw_control_ENDING': 'Unknown',
- 'community_heating_ENDING': 'Unknown', 'multiple_room_thermostats_ENDING': False,
- 'auxiliary_systems_ENDING': 'Unknown', 'trvs_ENDING': 'Unknown', 'rate_control_ENDING': 'Unknown',
- 'glazing_type': 'double', 'glazing_type_ENDING': 'double', 'fuel_type': 'mains gas',
- 'main-fuel_tariff_type': 'Unknown', 'is_community': False,
- 'no_individual_heating_or_community_network': False, 'complex_fuel_type': 'Unknown',
- 'fuel_type_ENDING': 'mains gas', 'main-fuel_tariff_type_ENDING': 'Unknown', 'is_community_ENDING': False,
- 'no_individual_heating_or_community_network_ENDING': False, 'complex_fuel_type_ENDING': 'Unknown',
- 'estimated_perimeter_STARTING': 27.113649698998472, 'estimated_perimeter_ENDING': 27.113649698998472,
- 'HOT_WATER_ENERGY_EFF_STARTING': "Good",
- "FLOOR_ENERGY_EFF_STARTING": "Unknown",
- "WINDOWS_ENERGY_EFF_STARTING": "Average",
- "WALLS_ENERGY_EFF_STARTING": "Very Poor",
- "SHEATING_ENERGY_EFF_STARTING": "Unknown",
- "ROOF_ENERGY_EFF_STARTING": "Good",
- "MAINHEAT_ENERGY_EFF_STARTING": "Good",
- "MAINHEATC_ENERGY_EFF_STARTING": "Average",
- "LIGHTING_ENERGY_EFF_STARTING": "Average",
- "POTENTIAL_ENERGY_EFFICIENCY": 78,
- "ENVIRONMENT_IMPACT_POTENTIAL": 76,
- "ENERGY_CONSUMPTION_POTENTIAL": 153,
- "CO2_EMISSIONS_POTENTIAL": 2.4,
- 'HOT_WATER_ENERGY_EFF_ENDING': "Good",
- "FLOOR_ENERGY_EFF_ENDING": "Unknown",
- "WINDOWS_ENERGY_EFF_ENDING": "Average",
- "WALLS_ENERGY_EFF_ENDING": "Very Poor",
- "SHEATING_ENERGY_EFF_ENDING": "Unknown",
- "ROOF_ENERGY_EFF_ENDING": "Good",
- "MAINHEAT_ENERGY_EFF_ENDING": "Good",
- "MAINHEATC_ENERGY_EFF_ENDING": "Average",
- "LIGHTING_ENERGY_EFF_ENDING": "Average",
- }
-
- home4 = Property(
- id=0,
- postcode=starting_epc4["postcode"],
- address=starting_epc4["address1"],
- data=starting_epc4
- )
- home4.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
- home4.set_number_lighting_outlets(None)
-
- data_processor4 = DataProcessor(None, newdata=True)
- data_processor4.insert_data(pd.DataFrame([home4.get_model_data()]))
-
- data_processor4.pre_process()
-
- starting_epc_data4 = data_processor4.get_component_features(suffix="_STARTING")
- ending_epc_data4 = data_processor4.get_component_features(suffix="_ENDING")
- fixed_data4 = data_processor4.get_fixed_features()
-
- ending_lodgement_date4 = '2022-09-08'
-
- ending_epc_data4["DAYS_TO_ENDING"] = data_processor4.calculate_days_to(ending_lodgement_date4)
-
- recommendation4 = {
- "recommendation_id": 0,
- "type": "sealing_open_fireplace"
- }
-
- test_record4 = create_recommendation_scoring_data(
- property=home4,
- recommendation=recommendation4,
- starting_epc_data=starting_epc_data4,
- ending_epc_data=ending_epc_data4,
- fixed_data=fixed_data4,
- )
- test_record4 = pd.DataFrame([test_record4])
-
- # Test the final cleaning:
- test_record4 = DataProcessor.apply_averages_cleaning(
- data_to_clean=test_record4,
- cleaning_data=cleaning_data,
- cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"]
- ).drop(columns=["LOCAL_AUTHORITY"])
-
- test_record4 = DataProcessor.clean_missings_after_description_process(
- test_record4, [
- c for c in test_record4.columns if
- ("thermal_transmittance" in c) or ("insulation_thickness" in c)
- ]
- )
-
- for c in test_record4.columns:
- if c in ["id", "SAP_ENDING", "HEAT_DEMAND_ENDING", "CARBON_ENDING"]:
- continue
-
- assert test_record4[c].values[0] == row4[c]
diff --git a/etl/eligibility/Eligibility.py b/etl/eligibility/Eligibility.py
index 13966655..906ff594 100644
--- a/etl/eligibility/Eligibility.py
+++ b/etl/eligibility/Eligibility.py
@@ -114,7 +114,8 @@ class Eligibility:
self.loft = {
"suitability": False,
"thickness": None,
- "reason": "roof not loft"
+ "reason": "roof not loft",
+ "thickness_classification": None
}
return
@@ -125,29 +126,34 @@ class Eligibility:
is_flat=self.roof["is_flat"]
)
+ if insulation_thickness <= 100:
+ thickness_classification = "0-100mm"
+ elif insulation_thickness <= high_loft_thickness_threshold:
+ thickness_classification = "100-270mm"
+ else:
+ thickness_classification = "270mm+"
+
if insulation_thickness <= loft_thickness_threshold:
+ # We produce a thiclkness classification for the loft
+ # 0 - 100mm insulation
+ # 100 - 270mm insulation
+ # 270mm+ insulation
+
self.loft = {
"suitability": True,
"thickness": insulation_thickness,
- "reason": None
+ "reason": None,
+ "thickness_classification": thickness_classification
}
- if insulation_thickness <= high_loft_thickness_threshold:
- self.loft = {
- "suitability": True,
- "thickness": insulation_thickness,
- "reason": "high loft thickness but below regulation"
- }
- return
-
- if insulation_thickness > high_loft_thickness_threshold:
- # Insulation is already thick enough
- self.loft = {
- "suitability": False,
- "thickness": insulation_thickness,
- "reason": "existing insulation"
- }
- return
+ # Insulation is already thick enough
+ self.loft = {
+ "suitability": False,
+ "thickness": insulation_thickness,
+ "reason": "existing insulation",
+ "thickness_classification": thickness_classification
+ }
+ return
def cavity_insulation(self):
@@ -161,15 +167,13 @@ class Eligibility:
is_empty = (not self.walls["is_filled_cavity"]) or (
self.walls["is_as_built"] and self.walls["insulation_thickness"] not in ["average", "above average"]
)
- is_partial_filled = (
- self.walls["is_as_built"] and self.walls["insulation_thickness"] not in ["below average"]
- )
+ is_partial_filled = "partial" in self.walls["clean_description"].lower()
# We look for potentially under performing cavities - anything that is assumed, as built and insulated
is_underperforming = (
self.walls["is_as_built"] and self.walls["insulation_thickness"] in ["average"] and self.walls["is_assumed"]
)
- is_unfilled_cavity = is_cavity and is_empty
+ is_unfilled_cavity = is_cavity and (is_empty and not is_partial_filled)
is_partial_filled_cavity = is_cavity and is_partial_filled
is_underperforming_cavity = is_cavity and is_underperforming
@@ -233,6 +237,13 @@ class Eligibility:
def room_roof_insulation(self):
is_room_roof = self.roof["is_roof_room"]
+ if not is_room_roof:
+ self.room_roof = {
+ "suitability": False,
+ "thickness": None
+ }
+ return
+
insulation_thickness = convert_thickness_to_numeric(
self.roof["insulation_thickness"],
self.roof["is_pitched"],
@@ -246,6 +257,14 @@ class Eligibility:
def flat_roof_insulation(self):
is_flat = self.roof["is_flat"]
+
+ if not is_flat:
+ self.flat_roof = {
+ "suitability": False,
+ "thickness": None
+ }
+ return
+
insulation_thickness = convert_thickness_to_numeric(
self.roof["insulation_thickness"],
self.roof["is_pitched"],
@@ -356,20 +375,21 @@ class Eligibility:
"""
current_sap = int(self.epc["current-energy-efficiency"])
-
- if current_sap >= 69:
- self.eco4_warmfront = {
- "eligible": False,
- "message": "sap too high"
- }
- return
-
self.cavity_insulation()
self.loft_insulation()
# make sure conditions 2 and 3 are true
is_eligible = self.cavity["suitability"] & self.loft["suitability"]
+ if current_sap >= 69:
+ self.eco4_warmfront = {
+ "eligible": False,
+ "message": "sap too high",
+ "cavity_type": self.cavity["type"],
+ "loft_type": self.loft["thickness_classification"]
+ }
+ return
+
if post_retrofit_sap is None:
if current_sap >= 55:
@@ -386,7 +406,9 @@ class Eligibility:
self.eco4_warmfront = {
"eligible": is_eligible,
- "message": message
+ "message": message,
+ "cavity_type": self.cavity["type"],
+ "loft_type": self.loft["thickness_classification"]
}
return
@@ -394,7 +416,9 @@ class Eligibility:
self.eco4_warmfront = {
"eligible": is_eligible,
- "message": None
+ "message": None,
+ "cavity_type": self.cavity["type"],
+ "loft_type": self.loft["thickness_classification"]
}
return
diff --git a/etl/eligibility/ha_15_32/WFT Sales data analysis.py b/etl/eligibility/ha_15_32/WFT Sales data analysis.py
new file mode 100644
index 00000000..a088fe43
--- /dev/null
+++ b/etl/eligibility/ha_15_32/WFT Sales data analysis.py
@@ -0,0 +1,665 @@
+import numpy as np
+import pandas as pd
+
+ECO4_NEW_RATES = 1710
+GBIS_NEW_RATES = 600
+
+
+def app():
+ # Load in the excel
+ nov_ha_data = pd.read_excel(
+ 'etl/eligibility/ha_15_32/ALL HA FIGURES AND ASSIGNED INSTALLERS 21.11.2023 with sales data.xlsx',
+ )
+ # Drop rows where HA name is null
+ nov_ha_data = nov_ha_data.dropna(subset=["HA Name"])
+ nov_ha_data["ha_number"] = nov_ha_data["HA Name"].str.extract(r"(\d+)").astype(int)
+ nov_ha_data = nov_ha_data.sort_values("ha_number", ascending=True)
+
+ variance_explanations = pd.read_excel(
+ 'etl/eligibility/ha_15_32/ALL HA FIGURES AND ASSIGNED INSTALLERS 21.11.2023 with sales data.xlsx',
+ sheet_name="Variance explanations"
+ )
+
+ september_figures = pd.read_excel(
+ "etl/eligibility/ha_15_32/ALL HA FIGURES AND ASSIGNED INSTALLERS SEP 23 UPDATE (2).xlsx",
+ sheet_name="HA Stats"
+ )
+
+ historical_invoices = pd.read_excel(
+ "etl/eligibility/ha_15_32/ALL HA FIGURES AND ASSIGNED INSTALLERS 21.11.2023 with sales data.xlsx",
+ sheet_name="Jul 22 to Oct 23"
+ )
+ # Drop rows where installer rates is null
+ historical_invoices = historical_invoices[~pd.isnull(historical_invoices["INSTALLER RATES"])]
+ historical_invoices = historical_invoices[historical_invoices["INSTALLER RATES"] != "NA "]
+ # By Scheme, take a weighted mean of the INSTALLER RATES, weighted on the number of rows
+ n_invoices = historical_invoices.groupby(["Scheme", "INSTALLER RATES"])["Invoice number"].count().reset_index()
+ n_invoices = n_invoices[n_invoices["Scheme"].isin(["Eco 4", "GBIS"])]
+ historical_scheme_rates = n_invoices.groupby("Scheme").apply(
+ lambda x: np.average(x["INSTALLER RATES"], weights=x["Invoice number"])
+ ).reset_index().rename(columns={0: "Historical rates"})
+
+ # we take just entries sales data that have sales > 0
+ sales_data = nov_ha_data[nov_ha_data["Sales"] > 0]
+
+ # We now need to adjust sales data depending on the variance explanations
+ sales_data = sales_data.merge(
+ variance_explanations[["HA", 'Which figure is correct']],
+ how="left",
+ left_on="ha_number",
+ right_on="HA"
+ )
+
+ def adjust_sales(row):
+ if pd.isnull(row["Which figure is correct"]):
+ return row["Sales"]
+
+ if row["Which figure is correct"] == "HA facts & figures":
+ return row['No. of Tech surveys complete']
+
+ if row["Which figure is correct"] == "Billed amount":
+ return row["Sales"]
+
+ if row["Which figure is correct"] in ["Both correct, HA facts and figures includes November", "Both correct"]:
+ return row["Sales"]
+
+ raise ValueError(f"Unknown value for 'Which figure is correct': {row['Which figure is correct']}")
+
+ # We now need to adjust sales data depending on the variance explanations
+ sales_data["adjusted_sales"] = sales_data.apply(lambda row: adjust_sales(row), axis=1)
+
+ # We therefore adjust GBIS and ECO4 sales data based on adjusted sales
+ sales_data["adjusted_eco4_sales"] = sales_data["No. of Tech surveys complete - Eco 4"] / sales_data["Sales"] * \
+ sales_data["adjusted_sales"]
+
+ sales_data["adjusted_gbis_sales"] = sales_data["No. of Tech surveys complete - GBIS"] / sales_data["Sales"] * \
+ sales_data["adjusted_sales"]
+
+ sales_data["cancellation_rate"] = (sales_data["Sales"] - sales_data["adjusted_sales"]) / sales_data["Sales"]
+
+ # The difference between the adjusted sales and the actual sales is the cancellation
+ cancellations = (sales_data["adjusted_sales"].sum() - sales_data["Sales"].sum()) / sales_data["Sales"].sum()
+
+ # Given the cancellations, we can now adjust the expected remaining surveys
+ sales_data["No. of Tech surveys remaining"] = sales_data["No. of Tech surveys remaining"] * (
+ 1 - sales_data["cancellation_rate"]
+ )
+
+ # We now merge on the expected values for September
+ sales_data = sales_data.merge(
+ september_figures[["Redacted HA", "ECO4", "GBIS"]].rename(
+ columns={"Redacted HA": "HA Name", "ECO4": "Sept Expected ECO4", "GBIS": "Sept Expected GBIS"}
+ ),
+ how="left",
+ on="HA Name",
+ )
+
+ sales_data["Sept Expected ECO4"] = sales_data["Sept Expected ECO4"].fillna(0)
+ sales_data["Sept Expected GBIS"] = sales_data["Sept Expected GBIS"].fillna(0)
+
+ # We calculate the ECO4 and GBIS conversion rates with the adjusted numbers
+ sales_data["ECO4 Conversion"] = sales_data["adjusted_eco4_sales"] / sales_data["adjusted_sales"]
+ sales_data["GBIS Conversion"] = sales_data["adjusted_gbis_sales"] / sales_data["adjusted_sales"]
+
+ # We now calculate the expected remaining ECO4 and GBIS sales
+ # We take the number of remaining surveys and multiply by the conversion rate for each scheme, which tells us
+ # how many more we should expect to see
+ sales_data["Expected Remaining ECO4"] = sales_data["No. of Tech surveys remaining"] * sales_data["ECO4 Conversion"]
+ sales_data["Expected Remaining GBIS"] = sales_data["No. of Tech surveys remaining"] * sales_data["GBIS Conversion"]
+
+ # We now produce a forecasted ECO4 and GBIS sales figure
+ sales_data["Forecasted ECO4 Sales"] = sales_data["adjusted_eco4_sales"] + sales_data["Expected Remaining ECO4"]
+ sales_data["Forecasted GBIS Sales"] = sales_data["adjusted_gbis_sales"] + sales_data["Expected Remaining GBIS"]
+
+ # Take the columns we're interestd in
+ # HA # Properties Sept ECO4 Figures Sept GBIS Figures Nov Total Sales Nov ECO4 Sales Nov GBIS Sales
+ # Remaining Surveys ECO4 conversion GBIS conversion Forecasted ECO4 Sales Forecasted GBIS sales ECO4 Change
+ # GBIS Change
+ sales_data_formatted = sales_data[[
+ "HA Name",
+ "ASSET LIST no.",
+ "Sept Expected ECO4",
+ "Sept Expected GBIS",
+ "adjusted_sales",
+ "adjusted_eco4_sales",
+ "adjusted_gbis_sales",
+ "No. of Tech surveys remaining",
+ "ECO4 Conversion",
+ "GBIS Conversion",
+ "Forecasted ECO4 Sales",
+ "Forecasted GBIS Sales"
+ ]].rename(
+ columns={
+ "adjusted_sales": "Oct Total Sales (adjusted for variance)",
+ "adjusted_eco4_sales": "Oct ECO4 Sales (adjusted for variance)",
+ "adjusted_gbis_sales": "Oct GBIS Sales (adjusted for variance)",
+ "No. of Tech surveys remaining": "Remaining Surveys",
+ }
+ )
+
+ # Convert columns which should be integers to integers
+ for col in ["ASSET LIST no.", "Remaining Surveys", "Sept Expected ECO4", "Sept Expected GBIS",
+ "Oct Total Sales (adjusted for variance)", "Oct ECO4 Sales (adjusted for variance)",
+ "Oct GBIS Sales (adjusted for variance)", "Forecasted ECO4 Sales", "Forecasted GBIS Sales"]:
+ sales_data_formatted[col] = sales_data_formatted[col].fillna(0)
+ sales_data_formatted[col] = sales_data_formatted[col].astype(int)
+
+ # Remove HA 17 because this was EPCs only. We also remove HA33 because they do not have access to the full portfolio
+ sales_data_formatted = sales_data_formatted[
+ ~sales_data_formatted["HA Name"].isin(["HA 17", "HA 33"])
+ ]
+
+ # September expected ECO4 and GBIS
+ sept_expected_eco4 = sales_data_formatted["Sept Expected ECO4"].sum()
+ sept_expected_gbis = sales_data_formatted["Sept Expected GBIS"].sum()
+
+ # Completed so far
+ oct_eco4_sales = sales_data_formatted["Oct ECO4 Sales (adjusted for variance)"].sum()
+ oct_gbis_sales = sales_data_formatted["Oct GBIS Sales (adjusted for variance)"].sum()
+
+ # Forecasted figures
+ forecasted_eco4_sales = sales_data_formatted["Forecasted ECO4 Sales"].sum()
+ forecasted_gbis_sales = sales_data_formatted["Forecasted GBIS Sales"].sum()
+
+ # Expected remaining sales
+ expected_remaining_eco4_sales = forecasted_eco4_sales - oct_eco4_sales
+ expected_remaining_gbis_sales = forecasted_gbis_sales - oct_gbis_sales
+
+ # Forecast change vs September
+ forecasted_eco4_change = 100 * (forecasted_eco4_sales - sept_expected_eco4) / sept_expected_eco4
+ forecasted_gbis_change = 100 * (forecasted_gbis_sales - sept_expected_gbis) / sept_expected_gbis
+
+ aggregates = pd.DataFrame(
+ columns=["Scheme", "Sept Expected", "Oct Completed", "Forecasted Remaining Sales", "Forecasted Total Sales",
+ "Forecasted Change vs Sept"],
+ data=[
+ ["ECO4", sept_expected_eco4, oct_eco4_sales, expected_remaining_eco4_sales, forecasted_eco4_sales,
+ forecasted_eco4_change],
+ ["GBIS", sept_expected_gbis, oct_gbis_sales, expected_remaining_gbis_sales, forecasted_gbis_sales,
+ forecasted_gbis_change],
+ ]
+ )
+
+ # Multiply by histoical rates to get revenue
+ # For ECO4, this is ~£1456 and for GBIS it's ~£432
+ historical_gbis_price = historical_scheme_rates[
+ historical_scheme_rates["Scheme"] == "GBIS"
+ ]["Historical rates"].iloc[0]
+
+ historical_eco4_price = historical_scheme_rates[
+ historical_scheme_rates["Scheme"] == "Eco 4"
+ ]["Historical rates"].iloc[0]
+
+ aggregates["Sept Expected Revenue"] = np.where(
+ aggregates["Scheme"] == "ECO4",
+ aggregates["Sept Expected"] * historical_eco4_price,
+ aggregates["Sept Expected"] * historical_gbis_price
+ )
+
+ aggregates["Completed Revenue"] = np.where(
+ aggregates["Scheme"] == "ECO4",
+ aggregates["Oct Completed"] * historical_eco4_price,
+ aggregates["Oct Completed"] * historical_gbis_price
+ )
+
+ # We use the new rates for the forecasted revenue
+ aggregates["Forecasted Remaining Revenue"] = np.where(
+ aggregates["Scheme"] == "ECO4",
+ aggregates["Forecasted Remaining Sales"] * ECO4_NEW_RATES,
+ aggregates["Forecasted Remaining Sales"] * GBIS_NEW_RATES
+ )
+
+ # We also calculate the forecasted remaining revenue at the original price
+ aggregates["Forecasted Remaining Revenue (original price)"] = np.where(
+ aggregates["Scheme"] == "ECO4",
+ aggregates["Forecasted Remaining Sales"] * historical_eco4_price,
+ aggregates["Forecasted Remaining Sales"] * historical_gbis_price
+ )
+
+ aggregates["Forecasted Revenue"] = aggregates["Completed Revenue"] + aggregates["Forecasted Remaining Revenue"]
+
+ # Forecasted revenue with original price
+ aggregates["Forecasted Revenue (original price)"] = (
+ aggregates["Completed Revenue"] + aggregates["Forecasted Remaining Revenue (original price)"]
+ )
+
+ # Create a totals row which sums up the two rows
+
+ forecasted_change_vs_sept = 100 * (
+ aggregates["Forecasted Total Sales"].sum() - aggregates["Sept Expected"].sum()
+ ) / aggregates["Sept Expected"].sum()
+
+ aggregates = pd.concat(
+ [
+ aggregates,
+ pd.DataFrame(
+ [
+ ["Total", aggregates["Sept Expected"].sum(), aggregates["Oct Completed"].sum(),
+ aggregates["Forecasted Remaining Sales"].sum(), aggregates["Forecasted Total Sales"].sum(),
+ forecasted_change_vs_sept,
+ aggregates["Sept Expected Revenue"].sum(), aggregates["Completed Revenue"].sum(),
+ aggregates["Forecasted Remaining Revenue"].sum(),
+ aggregates["Forecasted Remaining Revenue (original price)"].sum(),
+ aggregates["Forecasted Revenue"].sum(),
+ aggregates["Forecasted Revenue (original price)"].sum(),
+ ]
+ ],
+ columns=aggregates.columns
+ )
+ ]
+ )
+
+ # For each property in the asset list, we now calculate an average conversion rate to ECO4 and GBIS
+ # We do this by taking the forecasted sales values for each schemes and dividing by the number of properties
+
+ number_properties = sales_data_formatted["ASSET LIST no."].sum()
+ eco4_conversion_rate = forecasted_eco4_sales / number_properties
+ gbis_conversion_rate = forecasted_gbis_sales / number_properties
+
+ # We also attribute a future value per property
+ future_eco4_value = ECO4_NEW_RATES * eco4_conversion_rate
+ future_gbis_value = GBIS_NEW_RATES * gbis_conversion_rate
+
+ # We also calulate a revenue figure for the old rates
+ historical_eco4_value = historical_eco4_price * eco4_conversion_rate
+ historical_gbis_value = historical_gbis_price * gbis_conversion_rate
+
+ # For the HAs that have not begun selling, we estimate the value of the projects
+ # We start with some problem HAs
+
+ # HA 7, HA 24, HA 25
+ # These HAs have no sales data, so we use the expected figures
+
+ problem_has_data = nov_ha_data[
+ (nov_ha_data["HA Name"].isin(["HA 7", "HA 24", "HA 25"]))
+ ].copy()
+ # Merge on the september expected figures
+ problem_has_data = problem_has_data.merge(
+ september_figures[["Redacted HA", "ECO4", "GBIS"]].rename(
+ columns={"Redacted HA": "HA Name", "ECO4": "Sept Expected ECO4", "GBIS": "Sept Expected GBIS"}
+ ),
+ how="left",
+ on="HA Name",
+ )
+ # Fill NAs
+ problem_has_data["Sept Expected ECO4"] = problem_has_data["Sept Expected ECO4"].fillna(0)
+ problem_has_data["Sept Expected GBIS"] = problem_has_data["Sept Expected GBIS"].fillna(0)
+
+ # We now calculate the expected ECO4 and GBIS sales based on the average conversion rates
+ problem_has_data["Expected ECO4 Sales"] = problem_has_data["ASSET LIST no."] * eco4_conversion_rate
+ problem_has_data["Expected GBIS Sales"] = problem_has_data["ASSET LIST no."] * gbis_conversion_rate
+
+ # Filter just on columns we're interested in
+ problem_has_data = problem_has_data[[
+ "HA Name",
+ "ASSET LIST no.",
+ "Sept Expected ECO4",
+ "Sept Expected GBIS",
+ "ECO4",
+ "GBIS",
+ "Expected ECO4 Sales",
+ "Expected GBIS Sales"
+ ]].rename(
+ columns={
+ "ECO4": "Nov Expected ECO4",
+ "GBIS": "Nov Expected GBIS",
+ }
+ )
+
+ # Fill NAs
+ problem_has_data["Nov Expected ECO4"] = problem_has_data["Nov Expected ECO4"].fillna(0)
+ problem_has_data["Nov Expected GBIS"] = problem_has_data["Nov Expected GBIS"].fillna(0)
+
+ # We calculate HA level Sept, Nov expected revenue, based on historical rates and then forecasted revenue
+ problem_has_data["Sept Expected ECO4 Value"] = problem_has_data["Sept Expected ECO4"] * historical_eco4_price
+ problem_has_data["Sept Expected GBIS Value"] = problem_has_data["Sept Expected GBIS"] * historical_gbis_price
+
+ problem_has_data["Nov Expected ECO4 Value"] = problem_has_data["Nov Expected ECO4"] * historical_eco4_price
+ problem_has_data["Nov Expected GBIS Value"] = problem_has_data["Nov Expected GBIS"] * historical_gbis_price
+
+ problem_has_data["Forecasted ECO4 Revenue"] = problem_has_data["ASSET LIST no."] * future_eco4_value
+ problem_has_data["Forecasted GBIS Revenue"] = problem_has_data["ASSET LIST no."] * future_gbis_value
+
+ # Totals
+ problem_has_data["Sept Expected Total Value"] = problem_has_data["Sept Expected ECO4 Value"] + \
+ problem_has_data["Sept Expected GBIS Value"]
+ problem_has_data["Nov Expected Total Value"] = problem_has_data["Nov Expected ECO4 Value"] + \
+ problem_has_data["Nov Expected GBIS Value"]
+ problem_has_data["Forecasted Total Revenue"] = problem_has_data["Forecasted ECO4 Revenue"] + \
+ problem_has_data["Forecasted GBIS Revenue"]
+
+ # We calculate a total expected value for September, November and then forecasted
+ problem_has_expected_eco4_value = problem_has_data["Sept Expected ECO4"].sum() * historical_eco4_price
+ problem_has_expected_gbis_value = problem_has_data["Sept Expected GBIS"].sum() * historical_gbis_price
+ problem_has_expected_total_value = problem_has_expected_eco4_value + problem_has_expected_gbis_value
+
+ problem_has_nov_eco4_value = problem_has_data["Nov Expected ECO4"].sum() * historical_eco4_price
+ problem_has_nov_gbis_value = problem_has_data["Nov Expected GBIS"].sum() * historical_gbis_price
+ problem_has_nov_total_value = problem_has_nov_eco4_value + problem_has_nov_gbis_value
+
+ forecasted_eco4_value = problem_has_data["ASSET LIST no."].sum() * future_eco4_value
+ forecasted_gbis_value = problem_has_data["ASSET LIST no."].sum() * future_gbis_value
+ problem_has_forecasted_total_value = forecasted_eco4_value + forecasted_gbis_value
+
+ problem_has_summary = pd.DataFrame(
+ columns=["Scheme", "Sept Expected", "Nov Expected", "Forecasted"],
+ data=[
+ ["ECO4", problem_has_expected_eco4_value, problem_has_nov_eco4_value, forecasted_eco4_value],
+ ["GBIS", problem_has_expected_gbis_value, problem_has_nov_gbis_value, forecasted_gbis_value],
+ ["Total", problem_has_expected_total_value, problem_has_nov_total_value, problem_has_forecasted_total_value]
+ ]
+ )
+
+ # We now also estimate the value of the remaining HAs based on historical sales performance and new rates
+ # We take the has that are not in the sales data
+ remaining_has = nov_ha_data[
+ ~nov_ha_data["HA Name"].isin(sales_data_formatted["HA Name"])
+ ].copy()
+
+ # Merge on the september expected figures
+ remaining_has = remaining_has.merge(
+ september_figures[["Redacted HA", "ECO4", "GBIS"]].rename(
+ columns={"Redacted HA": "HA Name", "ECO4": "Sept Expected ECO4", "GBIS": "Sept Expected GBIS"}
+ ),
+ how="left",
+ on="HA Name",
+ )
+
+ # We update the asset list size for HA 33, because they do not have access to the full portfolio
+ remaining_has.loc[remaining_has["HA Name"] == "HA 33", "ASSET LIST no."] = 20699
+ # We also remove HA 17
+ remaining_has = remaining_has[~remaining_has["HA Name"].isin(["HA 17"])]
+
+ # We now calculate the expected ECO4 and GBIS sales based on the average conversion rates
+ remaining_has["Expected ECO4 Sales"] = remaining_has["ASSET LIST no."] * eco4_conversion_rate
+ remaining_has["Expected GBIS Sales"] = remaining_has["ASSET LIST no."] * gbis_conversion_rate
+
+ # Filter just on columns we're interested in
+ remaining_has = remaining_has[[
+ "HA Name",
+ "ASSET LIST no.",
+ "Sept Expected ECO4",
+ "Sept Expected GBIS",
+ "ECO4",
+ "GBIS",
+ ]].rename(
+ columns={
+ "ECO4": "Nov Expected ECO4",
+ "GBIS": "Nov Expected GBIS",
+ }
+ )
+
+ remaining_has = remaining_has.fillna(0)
+
+ # We take just HAs that had an initial september expectation for ECO4 or GBIS, or that now have a Nov expectation
+ remaining_has = remaining_has[
+ (remaining_has["Sept Expected ECO4"] > 0) | (remaining_has["Sept Expected GBIS"] > 0) |
+ (remaining_has["Nov Expected ECO4"] > 0) | (remaining_has["Nov Expected GBIS"] > 0)
+ ]
+
+ # Expected sales based on asset list size and conversion rate
+ remaining_has["Forecasted Sales ECO4"] = remaining_has["ASSET LIST no."] * eco4_conversion_rate
+ remaining_has["Forecasted Sales GBIS"] = remaining_has["ASSET LIST no."] * gbis_conversion_rate
+
+ # Calculat the total expected value for September and November
+ remaining_has["Sept Expected ECO4 Value"] = remaining_has["Sept Expected ECO4"] * historical_eco4_price
+ remaining_has["Sept Expected GBIS Value"] = remaining_has["Sept Expected GBIS"] * historical_gbis_price
+
+ remaining_has["Nov Expected ECO4 Value"] = remaining_has["Nov Expected ECO4"] * historical_eco4_price
+ remaining_has["Nov Expected GBIS Value"] = remaining_has["Nov Expected GBIS"] * historical_gbis_price
+
+ # Calculate forecasted revenue
+ remaining_has["Forecasted ECO4 Revenue"] = remaining_has["ASSET LIST no."] * future_eco4_value
+ remaining_has["Forecasted GBIS Revenue"] = remaining_has["ASSET LIST no."] * future_gbis_value
+
+ # We also calculate forecasted revenue with the original price
+ remaining_has["Forecasted ECO4 Revenue (original price)"] = remaining_has["ASSET LIST no."] * historical_eco4_value
+ remaining_has["Forecasted GBIS Revenue (original price)"] = remaining_has["ASSET LIST no."] * historical_gbis_value
+
+ # Calculate totals for each scheme
+ remaining_has_september_eco4_sales = remaining_has["Sept Expected ECO4"].sum()
+ remaining_has_september_gbis_sales = remaining_has["Sept Expected GBIS"].sum()
+
+ remaining_has_november_eco4_sales = remaining_has["Nov Expected ECO4"].sum()
+ remaining_has_november_gbis_sales = remaining_has["Nov Expected GBIS"].sum()
+
+ remaining_has_forecasted_eco4_sales = remaining_has["Forecasted Sales ECO4"].sum()
+ remaining_has_forecasted_gbis_sales = remaining_has["Forecasted Sales GBIS"].sum()
+
+ remaining_has_september_eco4_value = remaining_has["Sept Expected ECO4 Value"].sum()
+ remaining_has_september_gbis_value = remaining_has["Sept Expected GBIS Value"].sum()
+
+ remaining_has_november_eco4_value = remaining_has["Nov Expected ECO4 Value"].sum()
+ remaining_has_november_gbis_value = remaining_has["Nov Expected GBIS Value"].sum()
+
+ remaining_has_forecasted_eco4_value = remaining_has["Forecasted ECO4 Revenue"].sum()
+ remaining_has_forecasted_gbis_value = remaining_has["Forecasted GBIS Revenue"].sum()
+
+ remaining_has_forecasted_eco4_value_original_price = remaining_has["Forecasted ECO4 Revenue (original price)"].sum()
+ remaining_has_forecasted_gbis_value_original_price = remaining_has["Forecasted GBIS Revenue (original price)"].sum()
+
+ # Calculate the change in forecasted sales against the September expected sales
+ remaining_has_foecast_change_eco4 = 100 * (
+ remaining_has["Forecasted Sales ECO4"].sum() - remaining_has["Sept Expected ECO4"].sum()
+ ) / remaining_has["Sept Expected ECO4"].sum()
+
+ remaining_has_foecast_change_gbis = 100 * (
+ remaining_has["Forecasted Sales GBIS"].sum() - remaining_has["Sept Expected GBIS"].sum()
+ ) / remaining_has["Sept Expected GBIS"].sum()
+
+ # Total change
+ remaining_has_foecast_change_total = 100 * (
+ remaining_has["Forecasted Sales ECO4"].sum() + remaining_has["Forecasted Sales GBIS"].sum() -
+ remaining_has["Sept Expected ECO4"].sum() - remaining_has["Sept Expected GBIS"].sum()
+ ) / (remaining_has["Sept Expected ECO4"].sum() + remaining_has["Sept Expected GBIS"].sum())
+
+ asset_list_size = remaining_has["ASSET LIST no."].sum()
+
+ # Create a summary table of the rest with the totals for ECO4, GBIS and then a total row
+ remaining_has_aggregate = pd.DataFrame(
+ columns=["Scheme", "Asset List Size", "Sept Expected Sales", "Nov Expected Sales", "Forecasted Sales",
+ "Forecasted Change vs Sept",
+ "Sept Expected Value", "Nov Expected Value", "Forecasted Value", "Forecasted Value (original price)"],
+ data=[
+ [
+ "ECO4", asset_list_size, remaining_has_september_eco4_sales, remaining_has_november_eco4_sales,
+ remaining_has_forecasted_eco4_sales, remaining_has_foecast_change_eco4,
+ remaining_has_september_eco4_value,
+ remaining_has_november_eco4_value, remaining_has_forecasted_eco4_value,
+ remaining_has_forecasted_eco4_value_original_price
+ ],
+ [
+ "GBIS", asset_list_size, remaining_has_september_gbis_sales, remaining_has_november_gbis_sales,
+ remaining_has_forecasted_gbis_sales, remaining_has_foecast_change_gbis,
+ remaining_has_september_gbis_value,
+ remaining_has_november_gbis_value, remaining_has_forecasted_gbis_value,
+ remaining_has_forecasted_gbis_value_original_price
+ ],
+ [
+ "Total", asset_list_size, remaining_has_september_eco4_sales + remaining_has_september_gbis_sales,
+ remaining_has_november_eco4_sales + remaining_has_november_gbis_sales,
+ remaining_has_forecasted_eco4_sales + remaining_has_forecasted_gbis_sales,
+ remaining_has_foecast_change_total,
+ remaining_has_september_eco4_value + remaining_has_september_gbis_value,
+ remaining_has_november_eco4_value + remaining_has_november_gbis_value,
+ remaining_has_forecasted_eco4_value + remaining_has_forecasted_gbis_value,
+ remaining_has_forecasted_eco4_value_original_price +
+ remaining_has_forecasted_gbis_value_original_price
+ ]
+ ]
+ )
+
+ # Calculate pipeline value
+ pipeline_value = aggregates[["Scheme", "Completed Revenue", "Forecasted Remaining Revenue"]].merge(
+ remaining_has_aggregate[["Scheme", "Forecasted Value"]].rename(
+ columns={"Forecasted Value": "Forecasted Revenue, Unconfirmed HAs"}
+ ), how="inner", on="Scheme"
+ )
+
+ # Calculate the total
+ pipeline_value["Total Value"] = (
+ pipeline_value["Completed Revenue"] + pipeline_value["Forecasted Remaining Revenue"] + pipeline_value[
+ "Forecasted Revenue, Unconfirmed HAs"]
+ )
+
+ # TODO: Insert model figures
+ model_results = pd.DataFrame(
+ [
+ {
+ # This one, we don't have sales data
+ "HA Name": "HA 15",
+ "Model Expected Additional ECO4 (unit level)": None,
+ "Model Expected Total ECO4 (unit level)": 296,
+ "Model Expected Additional GBIS (unit level)": None,
+ "Model Expected Total GBIS (unit level)": 209,
+ },
+ {
+ "HA Name": "HA 16",
+ # Old before re-run
+ # "Model Expected Additional ECO4 (unit level)": 418,
+ # "Model Expected Total ECO4 (unit level)": 1820,
+ # "Model Expected Additional GBIS (unit level)": 576,
+ # "Model Expected Total GBIS (unit level)": 612,
+
+ # IN the partial sales data, WFT have completed 1407 ECO4, 36 GBIS
+ "Model Expected Additional ECO4 (unit level)": 411 + 342 + 235,
+ "Model Expected Total ECO4 (unit level)": 1407 + 411 + 342 + 235,
+ "Model Expected Additional GBIS (unit level)": 223,
+ "Model Expected Total GBIS (unit level)": 36 + 223,
+ },
+ {
+ "HA Name": "HA 24",
+ "Model Expected Additional ECO4 (unit level)": 224,
+ "Model Expected Total ECO4 (unit level)": 848,
+ "Model Expected Additional GBIS (unit level)": 552,
+ "Model Expected Total GBIS (unit level)": 552,
+ },
+ {
+ "HA Name": "HA 25",
+ "Model Expected Additional ECO4 (unit level)": None,
+ "Model Expected Total ECO4 (unit level)": 1709 + 59,
+ "Model Expected Additional GBIS (unit level)": None,
+ "Model Expected Total GBIS (unit level)": 2004 + 107,
+ }
+ ]
+ )
+
+ sales_data_formatted["Remaining ECO4 Sales"] = (
+ sales_data_formatted["Forecasted ECO4 Sales"] - sales_data_formatted["Oct ECO4 Sales (adjusted for variance)"]
+ )
+
+ sales_data_formatted["Remaining GBIS Sales"] = (
+ sales_data_formatted["Forecasted GBIS Sales"] - sales_data_formatted["Oct GBIS Sales (adjusted for variance)"]
+ )
+
+ sales_data_formatted["Completed ECO4 Revenue"] = (sales_data_formatted[
+ "Oct ECO4 Sales (adjusted for variance)"] *
+ historical_eco4_price)
+ sales_data_formatted["Completed GBIS Revenue"] = (sales_data_formatted[
+ "Oct GBIS Sales (adjusted for variance)"] *
+ historical_gbis_price)
+
+ ha_subset_with_sales = ["HA 15", "HA 16", "HA 24"]
+
+ has_subset_with_sales_value = sales_data_formatted[
+ sales_data_formatted["HA Name"].isin(ha_subset_with_sales)
+ ].copy()[
+ [
+ "HA Name",
+ "Oct ECO4 Sales (adjusted for variance)",
+ "Oct GBIS Sales (adjusted for variance)",
+ "Remaining ECO4 Sales",
+ "Remaining GBIS Sales",
+ "Forecasted ECO4 Sales",
+ "Forecasted GBIS Sales",
+ "Completed ECO4 Revenue",
+ "Completed GBIS Revenue"
+ ]
+ ]
+
+ has_subset_with_sales_value["Remaining ECO4 Revenue"] = has_subset_with_sales_value[
+ "Remaining ECO4 Sales"] * ECO4_NEW_RATES
+ has_subset_with_sales_value["Remaining GBIS Revenue"] = has_subset_with_sales_value[
+ "Remaining GBIS Sales"] * GBIS_NEW_RATES
+
+ has_subset_with_sales_value["Remaining Total Revenue"] = (
+ has_subset_with_sales_value["Remaining ECO4 Revenue"] + has_subset_with_sales_value["Remaining GBIS Revenue"]
+ )
+
+ model_results["Model Expected Additional ECO4 Revenue"] = (
+ model_results["Model Expected Additional ECO4 (unit level)"] * ECO4_NEW_RATES
+ )
+
+ model_results["Model Expected Additional GBIS revenue"] = (
+ model_results["Model Expected Additional GBIS (unit level)"] * GBIS_NEW_RATES
+ )
+
+ model_results["Model Expected Additional Total Revenue"] = (
+ model_results["Model Expected Additional ECO4 Revenue"] + model_results[
+ "Model Expected Additional GBIS revenue"]
+ )
+
+ # Show more columns with pandas
+ pd.set_option('display.max_rows', 500)
+ pd.set_option('display.max_columns', 500)
+ pd.set_option('display.width', 1000)
+
+ # Look at HA 16
+ ha16_model = model_results[model_results["HA Name"] == "HA 16"]
+ has_subset_with_sales_value[has_subset_with_sales_value["HA Name"] == "HA 16"]
+
+ # WFT: For HA 16: 4,598,190 ECO4, 57,000 GBIS
+ # Model:
+
+ # Look at HA 24
+ ha24_model = model_results[model_results["HA Name"] == "HA 24"]
+ has_subset_with_sales_value[has_subset_with_sales_value["HA Name"] == "HA 24"]
+
+ # Look at HA 15
+ ha15_data = has_subset_with_sales_value[has_subset_with_sales_value["HA Name"] == "HA 15"]
+ ha15_portfolio_value = ha15_data["Completed ECO4 Revenue"] + ha15_data[
+ "Completed GBIS Revenue"] + ha15_data["Remaining Total Revenue"]
+ # # This doesn't have sales data so in the model analysis, we just value the ha as a whole
+ ha15_model = model_results[model_results["HA Name"] == "HA 15"]
+ ha15_value = ha15_model["Model Expected Total ECO4 (unit level)"].iloc[0] * ECO4_NEW_RATES + \
+ ha15_model["Model Expected Total GBIS (unit level)"].iloc[0] * GBIS_NEW_RATES
+
+ model_results["Expected ECO4 Revenue"] = model_results["Model Expected Total ECO4 (unit level)"] * ECO4_NEW_RATES
+ model_results["Expected GBIS Revenue"] = model_results["Model Expected Total GBIS (unit level)"] * GBIS_NEW_RATES
+ model_results["Expected Total Revenue"] = model_results["Expected ECO4 Revenue"] + model_results[
+ "Expected GBIS Revenue"]
+ model_results[model_results["HA Name"].isin(["HA 15"])]
+
+ # We now create a final excel with all of the data
+ # We want:
+ # 1) aggregates
+ # 2) sales_data_formatted
+ # 3) remaining_has_aggregate
+ # 4) remaining_has
+ # 5) problem_has_summary
+
+ # Function to get the maximum column width
+ def get_col_widths(dataframe):
+ # First we find the maximum length of the index column
+ idx_max = max([len(str(s)) for s in dataframe.index.values] + [len(str(dataframe.index.name))])
+ # Then, we concatenate this to the max of the lengths of column name and its max value for each column, row-wise
+ return [idx_max] + [max(dataframe[col].astype(str).map(len).max(), len(col)) for col in dataframe.columns]
+
+ # Create a Pandas Excel writer using XlsxWriter as the engine
+ with pd.ExcelWriter('HA Pipeline Analysis.xlsx', engine='xlsxwriter') as writer:
+ # Write each dataframe to a different worksheet without the index
+ for df, sheet in [(aggregates, 'Forecasted Sales'),
+ (sales_data_formatted, 'Sales Data'),
+ (remaining_has_aggregate, 'Remaining HAs Value'),
+ (remaining_has, 'Remaining HAs data'),
+ (pipeline_value, 'Pipeline Value'),
+ (problem_has_summary, 'Problem HAs Analysis'),
+ (problem_has_data, 'Problem HAs Data')
+
+ ]:
+
+ df.to_excel(writer, sheet_name=sheet, index=False)
+
+ # Auto-adjust columns' width
+ for i, width in enumerate(get_col_widths(df)):
+ writer.sheets[sheet].set_column(i, i, width)
diff --git a/etl/eligibility/ha_15_32/app.py b/etl/eligibility/ha_15_32/app.py
index 76aadcc4..a68bf272 100644
--- a/etl/eligibility/ha_15_32/app.py
+++ b/etl/eligibility/ha_15_32/app.py
@@ -4,6 +4,7 @@ used by the Warmfront team, to identify which properties are eligible for ECO4 a
work is being done in December 2023, prior to completion of acquisition
"""
import pickle
+from etl.epc.Record import EPCRecord
from pathlib import Path
from tqdm import tqdm
import pandas as pd
@@ -16,8 +17,6 @@ from dotenv import load_dotenv
from backend.SearchEpc import SearchEpc
from backend.Property import Property
from etl.eligibility.Eligibility import Eligibility
-from etl.epc.DataProcessor import DataProcessor
-from backend.app.plan.utils import create_recommendation_scoring_data
from etl.epc.settings import COLUMNS_TO_MERGE_ON
from backend.ml_models.api import ModelApi
@@ -347,48 +346,31 @@ def prepare_model_data_row(
:param modelling_epc:
:return:
"""
+
+ epc_records = {
+ 'original_epc': modelling_epc.copy(),
+ 'full_sap_epc': full_sap_epc.copy(),
+ 'old_data': old_data.copy(),
+ }
+
+ prepared_epc = EPCRecord(
+ epc_records=epc_records,
+ run_mode="newdata",
+ cleaning_data=cleaning_data
+ )
+
p = Property(
id=property_id,
postcode=modelling_epc["postcode"],
address=modelling_epc["address1"],
- data=modelling_epc,
- old_data=old_data,
- full_sap_epc=full_sap_epc
+ epc_record=prepared_epc
)
- p.get_components(cleaned, photo_supply_lookup=photo_supply_lookup,
- floor_area_decile_thresholds=floor_area_decile_thresholds)
-
- # THIS IS TEMP AND SHOULDN'T BE HERE
- data_to_clean = p.get_model_data()
- if data_to_clean["NUMBER_HEATED_ROOMS"] in ['', None]:
- data_to_clean["NUMBER_HEATED_ROOMS"] = data_to_clean["NUMBER_HABITABLE_ROOMS"]
- p.data["number-heated-rooms"] = data_to_clean["NUMBER_HABITABLE_ROOMS"]
-
- # This is temp - this should happen after scoring
- cleaned_property_data = DataProcessor.apply_averages_cleaning(
- data_to_clean=pd.DataFrame([dict(**data_to_clean, LOCAL_AUTHORITY=p.data["local-authority"])]),
- cleaning_data=cleaning_data,
- cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
+ p.get_components(
+ cleaned, photo_supply_lookup=photo_supply_lookup, floor_area_decile_thresholds=floor_area_decile_thresholds
)
- p.set_number_lighting_outlets(cleaned_property_data)
- data_processor = DataProcessor(None, newdata=True)
- data_processor.insert_data(pd.DataFrame([p.get_model_data()]))
-
- data_processor.pre_process()
-
- starting_epc_data = data_processor.get_component_features(suffix="_STARTING")
- ending_epc_data = data_processor.get_component_features(suffix="_ENDING")
- fixed_data = data_processor.get_fixed_features()
-
- # We update the ending record with the recommended updates and we set lodgement date to today
- ending_epc_data["DAYS_TO_ENDING"] = data_processor.calculate_days_to(created_at)
-
- # We simulate the impact of the retrofit using expected performance of the wall and roof,
- # after retrofit. We use the minimal u-values required to meet building regulations part L
- # TODO: Check the performance of the materials warmfront's installers use, particularly for
- # cavity
+ p.create_base_difference_epc_record(cleaned_lookup=cleaned)
cavity_simulation = {
"recommendation_id": "-".join([property_id, "cavity"]),
@@ -404,21 +386,16 @@ def prepare_model_data_row(
"parts": [{"depth": 270}]
}
- cavity_scoring = create_recommendation_scoring_data(
- property=p,
- recommendation=cavity_simulation,
- starting_epc_data=starting_epc_data,
- ending_epc_data=ending_epc_data,
- fixed_data=fixed_data,
- )
+ simulations = [
+ [cavity_simulation],
+ [loft_simulation]
+ ]
- loft_scoring = create_recommendation_scoring_data(
- property=p,
- recommendation=loft_simulation,
- starting_epc_data=starting_epc_data,
- ending_epc_data=ending_epc_data,
- fixed_data=fixed_data,
- )
+ p.adjust_difference_record_with_recommendations(simulations)
+
+ # Make sure we definitely have the correct data
+ cavity_scoring = [x for x in p.recommendations_scoring_data if "cavity" in x["id"]][0]
+ loft_scoring = [x for x in p.recommendations_scoring_data if "loft" in x["id"]][0]
return [cavity_scoring, loft_scoring]
diff --git a/etl/eligibility/ha_15_32/cancellation.py b/etl/eligibility/ha_15_32/cancellation.py
new file mode 100644
index 00000000..849add45
--- /dev/null
+++ b/etl/eligibility/ha_15_32/cancellation.py
@@ -0,0 +1,113 @@
+import openpyxl
+import pandas as pd
+import numpy as np
+
+
+def get_excel_survey_list(workbook_path, worksheet_name=None):
+ survey_workbook = openpyxl.load_workbook(workbook_path)
+ if worksheet_name is not None:
+ survey_sheet = survey_workbook[worksheet_name]
+ else:
+ survey_sheet = survey_workbook.active
+
+ survey_rows = []
+ survey_colors = []
+
+ for row in survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
+ row_data = [cell.value for cell in row] # This will get you the cell values
+ row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+ # row_color = COLOR_INDEX[row_color]
+ survey_rows.append(row_data)
+ survey_colors.append(row_color)
+
+ survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
+ survey_list["row_colour"] = survey_colors
+
+ return survey_list
+
+
+def load_data():
+ # Load for HA 16 - ECO 4
+ ha16_survey_list = get_excel_survey_list('etl/eligibility/ha_15_32/HESTIA- HA 16 ECO4 SURVEY LIST.xlsx')
+
+ # Load for HA 24 - ECO 4
+ ha24_survey_list = get_excel_survey_list('etl/eligibility/ha_15_32/HESTIA - HA 24 ECO4 SURVEY LIST.xlsx')
+
+ # Load for HA 25 - ECO 3
+ ha25_survey_list = get_excel_survey_list(
+ 'etl/eligibility/ha_15_32/HESTIA - HA 25 ECO3 SURVEY LIST.xlsx', worksheet_name="CAVITY"
+ )
+
+ # Remove columns with None column names
+ ha25_survey_list = ha25_survey_list.dropna(axis=1, how='all')
+
+ # Standardised this installation status columns
+ ha16_survey_list["survey_status"] = ha16_survey_list["INSTALLED OR CANCELLED"].copy()
+ ha16_survey_list["survey_status"] = ha16_survey_list["survey_status"].replace(
+ {
+ "NO UPDATE - CHECKED 2.10.23": "no update",
+ "NO UPDATE - CHECKED 18.12.23": "no update",
+ "INSTALLED": "installed",
+ "CANCELLED": "cancelled",
+ "LOFT STILL TO BE INSTALLED": "loft remaining",
+ }
+ )
+
+ ha24_survey_list["survey_status"] = ha24_survey_list["INSTALLED OR CANCELLED"].copy()
+ ha24_survey_list["survey_status"] = ha24_survey_list["survey_status"].replace(
+ {
+ "NO UPDATE - CHECKED 21.11.23": "no update",
+ "NO UPDATE - CHECKED 18.12.23": "no update",
+ "INSTALLED": "installed",
+ "CANCELLED": "cancelled",
+ "LOFT STILL TO BE INSTALLED": "loft remaining",
+ "SEE NOTES >>": "see notes",
+ }
+ )
+
+ # We need to prepare HA25 differently
+ ha25_survey_list["survey_status"] = np.where(
+ ha25_survey_list["row_colour"] == "FF7030A0", "installed",
+ np.where(ha25_survey_list["row_colour"] == "FF92D050", "installed",
+ np.where(ha25_survey_list["row_colour"] == "FFFF0000", "cancelled",
+ np.where(ha25_survey_list["row_colour"] == "FFFFFF00", "filler row - drop",
+ np.where(ha25_survey_list["row_colour"] == "FF38FD23", "installed", "unknown")
+ )
+ )
+ )
+ )
+ ha25_survey_list = ha25_survey_list[ha25_survey_list["survey_status"] != "filler row - drop"]
+
+ # We standardise the cancellation reasons - just create a new column
+ ha16_survey_list["cancellation_reason"] = ha16_survey_list["INSTALLERS NOTES ; REASONS FOR CANCELLATIONS"].copy()
+ ha24_survey_list["cancellation_reason"] = ha24_survey_list["INSTALLERS NOTES ; REASONS FOR CANCELLATIONS"].copy()
+ # There's no cancellation reason for HA25
+ ha25_survey_list["cancellation_reason"] = "No reason provided"
+
+ # Combine the dataframes
+ ha16_survey_list["HA"] = "HA 16"
+ ha24_survey_list["HA"] = "HA 24"
+ ha25_survey_list["HA"] = "HA 25"
+
+ cancellation_data = pd.concat(
+ [
+ ha16_survey_list[["HA", "survey_status", "cancellation_reason"]],
+ ha24_survey_list[["HA", "survey_status", "cancellation_reason"]],
+ ha25_survey_list[["HA", "survey_status", "cancellation_reason"]]
+ ]
+ )
+
+ # Take just rows that we have a confirmed status for
+ cancellation_data = cancellation_data[~cancellation_data["survey_status"].isin(["no update", "loft remaining"])]
+
+ return cancellation_data
+
+
+def app():
+ """
+ This application is used to analyse the cancellation data provided by warmfront
+ :return:
+ """
+
+ # This is cancellations of jobs that completed invasive surveys and the installer could not conclude the work
+ sales_cancellation_data = load_data()
diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
new file mode 100644
index 00000000..e94babcd
--- /dev/null
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -0,0 +1,1400 @@
+import os
+import openpyxl
+from pathlib import Path
+import msgpack
+from datetime import datetime
+import pandas as pd
+import numpy as np
+from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet, save_pickle_to_s3, read_pickle_from_s3
+from utils.logger import setup_logger
+from dotenv import load_dotenv
+from tqdm import tqdm
+from backend.SearchEpc import SearchEpc
+from etl.eligibility.Eligibility import Eligibility
+from etl.eligibility.ha_15_32.app import prepare_model_data_row
+from backend.ml_models.api import ModelApi
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+from recommendations.recommendation_utils import calculate_cavity_age
+
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
+
+logger = setup_logger()
+load_dotenv(ENV_FILE)
+
+
+class DataLoader:
+ COLOUR_CONFIG = {
+ "ha_1": {
+ "asset_list": {"red": "FFFF0000", "green": "FF00B050"},
+ },
+ "ha_6": {
+ "asset_list": {"red": "FFFF0000", "green": "FF00B050"},
+ "survey_list": {
+ "green": "FF92D050", "purple": "FF7030A0", "red": "FFFF0000", "blue": "FF00B0F0"
+ }
+ },
+ "ha_14": {
+ "asset_list": {"red": "FFFF0000", "green": "FF00B050"},
+ },
+ "ha_39": {
+ "asset_list": {"red": "FFFF0000", "green": "FF00B050"},
+ },
+ "ha_107": {
+ "asset_list": {"red": "FFFF0000", "green": "FF00B050"},
+ }
+ }
+
+ MIN_ROWS = {
+ "ha_1": 2,
+ "ha_6": 2,
+ "ha_14": 3, # The spreadsheet starts from the third row
+ "ha_39": 2,
+ "ha_107": 2,
+ }
+
+ def __init__(self, files, use_cache):
+ self.files = files
+ self.use_cache = use_cache
+
+ self.data = {}
+
+ def load_asset_list(self, file_path, ha_name, sheet_name=None):
+ workbook = openpyxl.load_workbook(file_path)
+ if sheet_name is not None:
+ sheet = workbook[sheet_name]
+ else:
+ sheet = workbook.active
+ sheet_colnames = [cell.value for cell in sheet[self.MIN_ROWS[ha_name] - 1]]
+
+ rows_data = []
+ rows_colors = []
+ for row in tqdm(
+ sheet.iter_rows(min_row=self.MIN_ROWS[ha_name], values_only=False)
+ ): # Assuming the first row is headers
+
+ row_data = [cell.value for cell in row] # This will get you the cell values
+ row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+ # row_color = COLOR_INDEX[row_color]
+ rows_data.append(row_data)
+ rows_colors.append(row_color)
+
+ asset_list = pd.DataFrame(rows_data, columns=sheet_colnames)
+ asset_list = asset_list.loc[:, asset_list.columns.notnull()]
+
+ asset_list['row_color'] = rows_colors
+
+ # Remove entirely empty roww - consider all rows apart from row_color
+ asset_list = asset_list.loc[asset_list.loc[:, asset_list.columns != 'row_color'].notnull().any(axis=1)]
+
+ asset_list_colours = self.COLOUR_CONFIG[ha_name]["asset_list"]
+
+ asset_list["row_colour_name"] = np.where(
+ asset_list["row_color"] == asset_list_colours["red"], "red",
+ np.where(asset_list["row_color"] == asset_list_colours["green"], "green", "yellow")
+ )
+
+ asset_list["row_meaning"] = np.where(
+ asset_list["row_colour_name"] == "red", "does not meet criteria",
+ np.where(
+ asset_list["row_colour_name"] == "green", "identified potential eco works (CWI)", "maybe in the future"
+ )
+ )
+
+ # Add in asset_list_row_id
+ asset_list["asset_list_row_id"] = [ha_name + str(i) for i in range(0, len(asset_list))]
+
+ # Prepare the asset list
+ # Depending on the HA, we need to rename some columns
+ if ha_name == "ha_1":
+ asset_list["matching_address"] = asset_list["Address"].str.lower().str.strip()
+ asset_list["matching_postcode"] = asset_list["Address - Postcode"].str.lower().str.strip()
+ elif ha_name == "ha_6":
+ asset_list["matching_address"] = asset_list["propertyaddress"].str.lower().str.strip()
+ asset_list["matching_postcode"] = asset_list["Post Code"].str.lower().str.strip()
+ elif ha_name == "ha_14":
+ # Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode
+ asset_list["matching_address"] = asset_list["Address 1"].str.lower().str.strip() + ", " + \
+ asset_list["Address 2"].str.lower().str.strip() + ", " + \
+ asset_list["Address 3"].str.lower().str.strip() + ", " + \
+ asset_list["Address 4"].str.lower().str.strip() + ", " + \
+ asset_list["Postcode"].str.lower().str.strip()
+ asset_list["matching_postcode"] = asset_list["Postcode"].str.lower().str.strip()
+ elif ha_name == "ha_39":
+ # Create matching_address by concatenating add_1, add_2, add_3, add_4, add_5, post_code
+ asset_list["matching_address"] = asset_list["add_1"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["add_2"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["add_3"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["add_4"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["add_5"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["post_code"].astype(str).str.lower().str.strip()
+ asset_list["matching_postcode"] = asset_list["post_code"].str.lower().str.strip()
+ elif ha_name == "ha_107":
+ # Create matching_address by concatenating House No, Street, Town, District, Postcode
+ asset_list["matching_address"] = asset_list["House No"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Street"].str.lower().str.strip() + ", " + \
+ asset_list["Town"].str.lower().str.strip() + ", " + \
+ asset_list["District"].str.lower().str.strip() + ", " + \
+ asset_list["Postcode"].str.lower().str.strip()
+ asset_list["matching_postcode"] = asset_list["Postcode"].str.lower().str.strip()
+ else:
+ raise NotImplementedError("implement me")
+
+ if ha_name in ["ha_107"]:
+ asset_list["HouseNo"] = asset_list["House No"].copy()
+ else:
+ split_addresses = asset_list['matching_address'].str.split(',', expand=True)
+ house_numbers = split_addresses[0].str.split(' ', expand=True)
+ # THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how
+ # many columns there might be
+ house_numbers = house_numbers.iloc[:, 0:1]
+ house_numbers.columns = ['HouseNo']
+
+ asset_list = pd.concat([asset_list, house_numbers[["HouseNo"]]], axis=1)
+
+ # Finally, we process property_type or built form, where needed
+ if ha_name == "ha_6":
+ asset_list["built_form"] = asset_list["Property Type"].apply(self.identify_built_form_ha6)
+
+ return asset_list
+
+ def load_survey_list(self, file_path, ha_name, asset_list, sheet_name=None):
+ survey_workbook = openpyxl.load_workbook(file_path)
+ if sheet_name is not None:
+ survey_sheet = survey_workbook[sheet_name]
+ else:
+ survey_sheet = survey_workbook.active
+
+ survey_rows = []
+ survey_colors = []
+
+ for row in tqdm(survey_sheet.iter_rows(min_row=2, values_only=False)): # Assuming the first row is headers
+ row_data = [cell.value for cell in row] # This will get you the cell values
+ row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+ survey_rows.append(row_data)
+ survey_colors.append(row_color)
+
+ survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
+ # Remove columns that are None
+ survey_list = survey_list.loc[:, survey_list.columns.notnull()]
+
+ survey_list["row_colour"] = survey_colors
+ survey_list_colours = self.COLOUR_CONFIG[ha_name]["survey_list"]
+
+ # The survey list has 4 possible colours:
+ # PURPLE - Installer advised install complete and a complimentary post works EPC has been completed.
+ # GREEN - Installer advised install complete.
+ # RED - Cancelled
+ # BLUE - Loft Only Installed
+ # NO FILL - No official update from installer (could be installed or cancelled)
+
+ survey_list["row_colour_name"] = np.where(
+ survey_list["row_colour"] == survey_list_colours["red"], "red",
+ np.where(survey_list["row_colour"] == survey_list_colours["green"], "green",
+ np.where(survey_list["row_colour"] == survey_list_colours["purple"], "purple",
+ np.where(survey_list["row_colour"] == survey_list_colours["blue"], "blue", "no fill")))
+ )
+
+ survey_list["row_meaning"] = np.where(
+ survey_list["row_colour_name"] == "red", "Cancelled",
+ np.where(
+ survey_list["row_colour_name"] == "green",
+ "Installer advised install complete",
+ np.where(
+ survey_list["row_colour_name"] == "purple",
+ "Installer advised install complete and a complimentary post works EPC has been completed",
+ np.where(
+ survey_list["row_colour_name"] == "blue",
+ "Loft Only Installed",
+ "No official update from installer (could be installed or cancelled)"
+ )
+ )
+ )
+ )
+
+ # Add in asset_list_row_id
+ survey_list["survey_list_row_id"] = [ha_name + "_surveys_" + str(i) for i in range(0, len(survey_list))]
+
+ # We now do the matching between the asset list and the survey list.
+ # What we'll get from this is a lookup table from the asset list to the survey list
+
+ if ha_name == "ha_6":
+ matched_lookup = self.merge_ha_6(asset_list, survey_list)
+ else:
+ raise NotImplementedError("Only HA 6 has surveys")
+
+ return survey_list, matched_lookup
+
+ @staticmethod
+ def merge_ha_6(asset_list, survey_list):
+
+ # Correct the asset list across propertyaddress and matching_address
+ asset_list["propertyaddress"] = asset_list["propertyaddress"].str.replace("Baggott Place", "Baggotts Place")
+ asset_list["matching_address"] = asset_list["matching_address"].str.replace("baggott place", "baggotts place")
+
+ asset_list["propertyaddress"] = asset_list["propertyaddress"].str.replace("Cherry Tree", "Cherrytree")
+ asset_list["matching_address"] = asset_list["matching_address"].str.replace("cherry tree", "cherrytree")
+
+ asset_list["propertyaddress"] = asset_list["propertyaddress"].str.replace("Maryhill Close", "Mary Hill Close")
+ asset_list["matching_address"] = asset_list["matching_address"].str.replace("maryhill close", "mary hill close")
+
+ asset_list["propertyaddress"] = asset_list["propertyaddress"].str.replace("Moffat Way", "Moffatt Way")
+ asset_list["matching_address"] = asset_list["matching_address"].str.replace("moffat way", "moffatt way")
+
+ # Correct the survey list
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Seabridge Road", "Seabridge Lane"
+ )
+
+ # Strip out /KNUTTON from the street name
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/KNUTTON", "")
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Clevend Road", "Cleveland Road"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "TURNERS AVENUE", "Turner Avenue"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "WEDGEWWOD AVENUE", "Wedgwood Avenue"
+ )
+ # The cherrytree record has wrong postcode
+ survey_list.loc[survey_list["Street / Block Name"] == "Cherrytree road", "Post Code"] = "ST5 7BP"
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "MONUMENT RD", "Monument Road"
+ )
+
+ # Generally replace " RD" with " Road"
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(" RD", " Road")
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "HILARY Road", "Hillary Road"
+ )
+
+ # Remove full stops from the street name
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(".", "")
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Chatworth road", "Chatsworth Place"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Wood Croft", "Woodcroft"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Milstone Avenue", "Millstone Avenue"
+ )
+
+ # Strip out /TALKE from the street name
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/TALKE", "")
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Woodcutts Street", "Woodshutts Street"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "HILLARY AVENUE", "Hillary Road"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "HILLARY AVENUE", "Hillary Road"
+ )
+
+ # Replace " Rd" with " Road"
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(" Rd", " Road")
+
+ # We have a record listed as 19, MAPLE AVENUE ST7 1JX, when it should be 19, Hollins Crescent ST7 1JX
+ survey_list.loc[
+ (survey_list["Street / Block Name"] == "MAPLE AVENUE") &
+ (survey_list["NO."].isin([19])) &
+ (survey_list["Post Code"] == "ST7 1JX"),
+ "Street / Block Name"
+ ] = "Hollins Crescent"
+
+ # However, some of the maple avenue records, are indeed Maple avenue, but are listed with the wrong postcode.
+ # E.g. number 26
+ survey_list.loc[
+ (survey_list["Street / Block Name"] == "MAPLE AVENUE") &
+ (survey_list["NO."].isin([26])) &
+ (survey_list["Post Code"] == "ST7 1JX"),
+ "Post Code"
+ ] = "ST7 1JW"
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "BURSLEY Road", "Bursley Way"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Brittania Avenue", "Brittain Avenue"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Hawthorn Road", "Hawthorne Road"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Eastdale Place", "Easdale Place"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Wedgewood Road", "Wedgwood Road"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Droitwich Drive", "Droitwich Close"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Longdale Road", "Langdale Road"
+ )
+
+ # We have 2 addresses in the survey list that don't have postcodes. We'll manually add them in
+ survey_list.loc[
+ (survey_list["Street / Block Name"] == "Rogers Avenue") &
+ pd.isnull(survey_list["Post Code"]),
+ "Post Code"
+ ] = "ST5 9AT"
+
+ survey_list.loc[
+ (survey_list["Street / Block Name"] == "Cedar Road") &
+ pd.isnull(survey_list["Post Code"]),
+ "Post Code"
+ ] = "ST5 7BY"
+
+ missed_postcodes = [
+ postcode.lower() for postcode in survey_list["Post Code"] if
+ postcode.lower() not in asset_list["matching_postcode"].values
+ ]
+
+ matching_lookup = []
+ for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)):
+
+ house_number = row["NO."]
+ if isinstance(house_number, str):
+ house_number = house_number.lower().strip()
+
+ # Filter on the first line of the address
+ df = asset_list[
+ asset_list["matching_address"].str.contains(row["Street / Block Name"].lower().strip())
+ ].copy()
+
+ df = df[df["matching_address"].str.contains(str(house_number))]
+ if df.shape[0] != 1:
+ df = df[df["HouseNo"] == str(house_number)]
+ if df.shape[0] != 1:
+ df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower())]
+ if df.shape[0] != 1:
+ postcode_lower = row["Post Code"].lower()
+ if postcode_lower in missed_postcodes:
+ matching_lookup.append(
+ {
+ "survey_list_row_id": row["survey_list_row_id"],
+ "asset_list_row_id": None,
+ }
+ )
+ continue
+
+ print(row["Street / Block Name"])
+ print(house_number)
+ print(row["Post Code"].lower())
+ raise ValueError("Investigate")
+
+ matching_lookup.append(
+ {
+ "survey_list_row_id": row["survey_list_row_id"],
+ "asset_list_row_id": df["asset_list_row_id"].values[0],
+ }
+ )
+
+ matching_lookup = pd.DataFrame(matching_lookup)
+
+ return matching_lookup
+
+ @staticmethod
+ def identify_built_form_ha6(property_string):
+ """
+ Identify the built form of a property from the given string.
+
+ :param property_string: The string describing the property
+ :return: The identified built form, or None if it cannot be identified
+ """
+ # Define keywords for each built form
+ built_forms = {
+ 'Semi-Detached': ['semi detached'],
+ 'Detached': ['detached'],
+ 'Mid-Terrace': ['mid terrace', 'mid town house'],
+ 'End-Terrace': ['end terrace', 'end town house']
+ }
+
+ # Normalize the input string to lower case for comparison
+ property_string_normalized = property_string.lower()
+
+ # Search for each built form keyword in the input string
+ for built_form, keywords in built_forms.items():
+ for keyword in keywords:
+ if keyword in property_string_normalized:
+ return built_form
+
+ # Return None if no built form is identified
+ return None
+
+ def load(self):
+
+ if self.use_cache:
+ self.data = read_pickle_from_s3(
+ bucket_name="retrofit-datalake-dev",
+ s3_file_name="ha-analysis/batch3-inputs.pickle",
+ )
+ return
+
+ data = {}
+ for ha_name, file_config in self.files.items():
+ # Load asset list
+ logger.info("Loading asset list for {}".format(ha_name))
+ asset_list = self.load_asset_list(
+ file_path=file_config["asset_list"]["filepath"],
+ ha_name=ha_name,
+ sheet_name=file_config["asset_list"]["sheetname"]
+ )
+
+ if file_config.get("survey_list"):
+ logger.info("Loading survey list for {}".format(ha_name))
+ survey_list, matched_lookup = self.load_survey_list(
+ asset_list=asset_list,
+ file_path=file_config["survey_list"]["filepath"],
+ ha_name=ha_name,
+ sheet_name=file_config["survey_list"]["sheetname"]
+ )
+ else:
+ survey_list = None
+ matched_lookup = None
+
+ data[ha_name] = {
+ "asset_list": asset_list,
+ "survey_list": survey_list,
+ "matched_lookup": matched_lookup
+ }
+
+ self.data = data
+
+ # Cache the data in s3
+ # We need to pickle the data and store in s3
+ save_pickle_to_s3(
+ data=self.data,
+ bucket_name="retrofit-datalake-dev",
+ s3_file_name="ha-analysis/batch3-inputs.pickle",
+ )
+
+
+def get_epc_data(
+ loader, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds, pull_data=True
+):
+ if not loader.data:
+ raise ValueError("Data not found - please run loader.load() first")
+
+ property_type_lookup = {
+ "ha_1": {
+ "built_form": {
+ 'Mid Terrace': 'Mid-Terrace',
+ 'Semi-Detached': 'Semi-Detached',
+ 'End Terrace': 'End-Terrace',
+ 'Detached': 'Detached',
+ 'Enclosed Mid': 'Mid-Terrace',
+ 'Detached Local Connect': 'Detached',
+ }
+ },
+ "ha_6": {
+ "property_type": {
+ 'HOUSE': "House",
+ 'GROUND FLOOR FLAT': "Flat",
+ 'UPPER FLOOR FLAT': "Flat",
+ 'MAISONETTE': "Maisonette",
+ 'BUNGALOW': "Bungalow",
+ 'WARDEN BUNGALOW': "Bungalow",
+ 'WARDEN FLAT': "Flat",
+ 'EXTRACARE SCHEME': "Flat",
+ }
+ },
+ "ha_14": {
+ "property_type": {
+ "House": "House",
+ "Flat": "Flat",
+ "Bungalow": "Bungalow",
+ "Maisonette": "Maisonette",
+ }
+ },
+ "ha_39": {
+ "Semi house": {"property_type": "House", "built_form": "Semi-Detached"},
+ "1st floor flat": {"property_type": "Flat", "built_form": None},
+ "Mid terrace house": {"property_type": "House", "built_form": "Mid-Terrace"},
+ "Ground floor flat": {"property_type": "Flat", "built_form": None},
+ "End terrace house": {"property_type": "House", "built_form": "End-Terrace"},
+ "Semi bungalow": {"property_type": "Bungalow", "built_form": "Semi-Detached"},
+ "End terrace bungalow": {"property_type": "Bungalow", "built_form": "End-Terrace"},
+ "2nd floor flat": {"property_type": "Flat", "built_form": None},
+ "Mid terrace bungalow": {"property_type": "Bungalow", "built_form": "Mid-Terrace"},
+ "3rd floor flat": {"property_type": "Flat", "built_form": None},
+ "Detached bungalow": {"property_type": "Bungalow", "built_form": "Detached"},
+ "Maisonette": {"property_type": "Maisonette", "built_form": None},
+ "Detached house": {"property_type": "House", "built_form": "Detached"},
+ "Lower ground floor flat": {"property_type": "Flat", "built_form": None},
+ "Dormer bungalow": {"property_type": "Bungalow", "built_form": None},
+ "Basement flat": {"property_type": "Flat", "built_form": None},
+ "Cluster House": {"property_type": "House", "built_form": "Detached"},
+ "2nd/3rd floor duplex flat": {"property_type": "Flat", "built_form": None},
+ "Ground floor flat with study": {"property_type": "Flat", "built_form": None},
+ "4th floor flat": {"property_type": "Flat", "built_form": None},
+ "1st floor flat with study room": {"property_type": "Flat", "built_form": None},
+ "2nd floor flat with study": {"property_type": "Flat", "built_form": None},
+ },
+ "ha_107": {
+ "property_type": {
+ "HOUSE": "House",
+ "BUNGALOW": "Bungalow",
+ "GRD FLOOR FLAT": "Flat",
+ "FIRST FLOOR FLAT": "Flat",
+ "SHELTERED BUNGALOW": "Bungalow",
+ "MAISONETTE": "Maisonette",
+ "SECOND FLOOR FLAT": "Flat",
+ "SHELTERED FIRST FLR": "Flat",
+ "SHELTERED GROUND FLR": "Flat",
+ "GRD FLOOR BED SIT": "House"
+ },
+ "built_form": {
+ "Semi Detached": "Semi-Detached",
+ "Mid Terrace": "Mid-Terrace",
+ "End Terrace": "End-Terrace",
+ "Detached": "Detached",
+ "Detatched": "Detached",
+ }
+ }
+ }
+
+ outputs = {}
+ for ha_name, data_assets in loader.data.items():
+
+ if not pull_data:
+ # Then we retrieve the data from S3
+ processed_ha_results = read_pickle_from_s3(
+ bucket_name="retrofit-datalake-dev",
+ s3_file_name=f"ha-analysis/{ha_name}/processed_results.pickle"
+ )
+
+ outputs[ha_name] = {
+ "results_df": processed_ha_results["results_df"],
+ "scoring_df": processed_ha_results["scoring_df"],
+ "nodata": processed_ha_results["nodata"]
+ }
+ continue
+
+ # For each HA, we read pull in the data required, and store in S3
+ asset_list = data_assets["asset_list"].copy()
+
+ # If the survey list is missing, it means we have no yet completed any surveys and therefore should only
+ # consider the most recent EPC
+ consider_penultimate_epc = data_assets["survey_list"] is not None
+
+ # We iterate through the asset list and pull what we need
+ results = []
+ scoring_data = []
+ nodata = []
+ for index, property_meta in tqdm(asset_list.iterrows(), total=len(asset_list)):
+
+ if property_meta["matching_postcode"] is None:
+ continue
+
+ if ha_name == "ha_1":
+ property_type = property_meta["Asset Type"]
+ # We correct a small error
+ if property_type == "a":
+ property_type = "House"
+
+ # Remap bedsits to flats
+ if property_type in ["Bedsit", "Room"]:
+ property_type = "Flat"
+
+ built_form = property_type_lookup[ha_name]["built_form"].get(property_meta["Property Type"], None)
+ elif ha_name == "ha_6":
+ property_type = property_type_lookup[ha_name]["property_type"][property_meta["Dwelling type"]]
+ built_form = property_meta["built_form"]
+ elif ha_name == "ha_14":
+ if property_meta["Asset Type Description"] == "Block - Repair":
+ # We try and deduce if it's a flat or house, depending on if it has "room" or "flats" in the address
+ if "room" in property_meta["Address 1"].lower():
+ property_type = "House"
+ else:
+ property_type = "Flat"
+
+ else:
+ property_type = property_type_lookup[ha_name]["property_type"][
+ property_meta["Asset Type Description"]
+ ]
+
+ built_form = None
+ elif ha_name == "ha_39":
+
+ property_type_config = property_type_lookup[ha_name].get(property_meta["ConstructionStyle"], {})
+ property_type = property_type_config.get("property_type", None)
+ built_form = property_type_config.get("built_form", None)
+
+ if property_type is None:
+ # We check for the presence of room or flat
+ if "flat" in property_meta["matching_address"]:
+ property_type = "Flat"
+ else:
+ property_type = "House"
+ elif ha_name == "ha_107":
+
+ dwelling_style = property_meta["Dwelling Style"]
+ if isinstance(dwelling_style, str):
+ dwelling_style = dwelling_style.strip()
+
+ property_type = property_type_lookup[ha_name]["property_type"].get(property_meta["DwellingType"])
+ built_form = property_type_lookup[ha_name]["built_form"].get(dwelling_style, None)
+
+ if property_type is None:
+ if built_form in ["Semi-Detached", "Mid-Terrace", "End-Terrace", "Detached"]:
+ property_type = "House"
+
+ if "flat" in property_meta["Wall Construction"].lower():
+ property_type = "Flat"
+
+ if (property_meta["DwellingType"] == "UNKNOWN") & (property_meta["Dwelling Style"] == 0):
+ # Hand a few specific cases
+ property_type = "Bungalow"
+
+ if property_meta["Street"] == "School View":
+ property_type = "Bungalow"
+
+ else:
+ raise NotImplementedError("Implement me")
+
+ searcher = SearchEpc(
+ address1=str(property_meta["HouseNo"]),
+ postcode=property_meta["matching_postcode"],
+ auth_token=EPC_AUTH_TOKEN,
+ os_api_key=None,
+ full_address=property_meta["matching_address"]
+ )
+ searcher.ordnance_survey_client.property_type = property_type
+ searcher.ordnance_survey_client.built_form = built_form
+ searcher.find_property(skip_os=True)
+
+ if searcher.newest_epc is None:
+ nodata.append(property_meta)
+ continue
+
+ if searcher.newest_epc.get("estimated"):
+ # We insert the row ID as our proxy for UPRN
+ searcher.newest_epc["uprn"] = int(property_meta["asset_list_row_id"].split(ha_name)[1])
+
+ newest_epc = searcher.newest_epc
+ older_epcs = searcher.older_epcs
+ full_sap_epc = searcher.full_sap_epc
+
+ # If we have a survey list, we check the penultimate, because the property might have been installed
+ penultimate_epc = newest_epc
+ if consider_penultimate_epc:
+ # We also want to get the penultimate epc
+ penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
+ if not penultimate_epc:
+ penultimate_epc = newest_epc
+
+ eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
+ eligibility.check_gbis_warmfront()
+ eligibility.check_eco4_warmfront()
+
+ if (not eligibility.eco4_warmfront["eligible"]) and (
+ not eligibility.gbis_warmfront
+ ) and consider_penultimate_epc:
+ # We check the penultimate epc
+ eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
+ eligibility.check_gbis_warmfront()
+ eligibility.check_eco4_warmfront()
+ # If this is the case, we need to update the older epcs
+ # We don't update just to make data cleaning easier
+ if penultimate_epc.get("estimated") is None:
+ older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
+
+ # If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
+ # Loft MUST be suitable
+ cavity_age = None
+ if (
+ eligibility.walls["is_cavity_wall"] and
+ eligibility.walls["is_filled_cavity"] and
+ eligibility.loft["suitability"] and
+ eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
+ ):
+ # We check the age of the cavity and if it's particularly old, we flag it
+ cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
+
+ # Full checks
+ eligibility.check_gbis()
+ eligibility.check_eco4()
+
+ if eligibility.eco4_warmfront["eligible"]:
+ if eligibility.epc["uprn"] == "":
+ eligibility.epc["uprn"] = int(property_meta["asset_list_row_id"].split(ha_name)[1])
+
+ scoring_dictionary = prepare_model_data_row(
+ property_id=property_meta["asset_list_row_id"],
+ modelling_epc=eligibility.epc,
+ cleaned=cleaned,
+ cleaning_data=cleaning_data,
+ created_at=created_at,
+ old_data=older_epcs,
+ full_sap_epc=full_sap_epc,
+ photo_supply_lookup=photo_supply_lookup,
+ floor_area_decile_thresholds=floor_area_decile_thresholds
+ )
+ scoring_data.extend(scoring_dictionary)
+
+ results.append(
+ {
+ "row_id": property_meta["asset_list_row_id"],
+ "uprn": eligibility.epc["uprn"],
+ "property_type": eligibility.epc["property-type"],
+ "gbis_eligible": eligibility.gbis_warmfront,
+ "eco4_eligible": eligibility.eco4_warmfront["eligible"],
+ "eco4_message": eligibility.eco4_warmfront["message"],
+ "sap": float(eligibility.epc["current-energy-efficiency"]),
+ "gbis_eligible_future": eligibility.gbis["eligible"],
+ "gbis_eligible_future_message": eligibility.gbis["message"],
+ "eco4_eligible_future": eligibility.eco4["eligible"],
+ "eco4_eligible_future_message": eligibility.eco4["message"],
+ # Property components
+ "roof": eligibility.roof["clean_description"],
+ "walls": eligibility.walls["clean_description"],
+ "cavity_type": eligibility.cavity["type"],
+ "heating": eligibility.epc["mainheat-description"],
+ "tenure": eligibility.tenure,
+ "date_epc": eligibility.epc["lodgement-date"],
+ "loft_thickness": eligibility.roof["insulation_thickness"],
+ "cavity_age": cavity_age,
+ **eligibility.walls,
+ **eligibility.roof,
+ "is_estimated": searcher.newest_epc.get("estimated") is not None,
+ "eligibility_cavity_type": eligibility.eco4_warmfront["cavity_type"],
+ "eligibility_loft_type": eligibility.eco4_warmfront["loft_type"]
+ }
+ )
+
+ scoring_df = pd.DataFrame(scoring_data)
+ scoring_df = scoring_df.drop(
+ columns=[
+ "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+ "carbon_ending"
+ ]
+ )
+
+ model_api = ModelApi(portfolio_id="-".join([ha_name, "eligibility"]), timestamp=created_at)
+
+ # scoring_df["is_community"].value_counts()
+ # scoring_df[scoring_df["is_community"] == "Unknown"]
+ # property_meta = asset_list[asset_list["asset_list_row_id"] == "ha_67238"].squeeze()
+
+ all_predictions = model_api.predict_all(
+ df=scoring_df,
+ bucket="retrofit-data-dev",
+ prediction_buckets={
+ "sap_change_predictions": "retrofit-sap-predictions-dev",
+ "heat_demand_predictions": "retrofit-heat-predictions-dev",
+ "carbon_change_predictions": "retrofit-carbon-predictions-dev"
+ }
+ )
+
+ results_df = pd.DataFrame(results)
+
+ predictions = all_predictions["sap_change_predictions"].copy()
+
+ predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
+ results_df[["row_id", "sap"]], how="left", on="row_id"
+ )
+ predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
+ predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
+
+ results_df = results_df.merge(
+ predictions[["sap_uplift", "row_id"]],
+ how="left",
+ on="row_id"
+ )
+ results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
+
+ eligibility_assessment = []
+ for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
+ # The upgrade requirements are dependent on the current SAP
+
+ # If the property is an F or G, it only needs to upgrade to an %
+ if row["sap"] <= 38:
+ if row["post_install_sap"] >= 57:
+ eligibility_classification = "highest confidence"
+ elif row["post_install_sap"] >= 55:
+ eligibility_classification = "high confidence"
+ elif row["post_install_sap"] >= 53:
+ eligibility_classification = "medium confidence"
+ else:
+ eligibility_classification = "unlikely"
+ else:
+
+ if row["post_install_sap"] >= 71:
+ eligibility_classification = "highest confidence"
+ elif row["post_install_sap"] >= 69:
+ eligibility_classification = "high confidence"
+ elif row["post_install_sap"] >= 67:
+ eligibility_classification = "medium confidence"
+ else:
+ eligibility_classification = "unlikely"
+
+ eligibility_assessment.append(
+ {
+ "row_id": row["row_id"],
+ "eligibility_classification": eligibility_classification
+ }
+ )
+
+ eligibility_assessment = pd.DataFrame(eligibility_assessment)
+
+ results_df = results_df.merge(
+ eligibility_assessment, how="left", on="row_id"
+ )
+
+ # We store the results in S3 as a pickle
+ save_pickle_to_s3(
+ data={
+ "results_df": results_df,
+ "scoring_df": scoring_df,
+ "nodata": nodata
+ },
+ bucket_name="retrofit-datalake-dev",
+ s3_file_name=f"ha-analysis/{ha_name}/processed_results.pickle"
+ )
+
+ outputs[ha_name] = {
+ "results_df": results_df,
+ "scoring_df": scoring_df,
+ "nodata": nodata
+ }
+
+ return outputs
+
+
+def get_col_widths(dataframe):
+ # First we find the maximum length of the index column
+ idx_max = max([len(str(s)) for s in dataframe.index.values] + [len(str(dataframe.index.name))])
+ # Then, we concatenate this to the max of the lengths of column name and its max value for each column, row-wise
+ return [idx_max] + [max(dataframe[col].astype(str).map(len).max(), len(col)) for col in dataframe.columns]
+
+
+def analyse_ha_data(outputs, loader):
+ """
+ The approach we take within this function is the following:
+ For properties that have been identified by warmfront as eligible properties, characterise them by scheme. The
+ characterisation can be broken down as the following:
+ 1) The property has been identified by Warmfront and is eligible for ECO4/GBIS work, under the strictest criteria
+ 2) The property has been identified by Warmfront, however it has a full cavity, and therefore would be subject to
+ a CIGA check
+ 3) The property has been identified by Warmfront, but the EPC shows that the property has more than 100mm loft
+ insulation
+ 4) The property has been identified by Warmfront, but doesn't look like a property that would likely qualify under
+ any cirsumstances, given the available data
+
+ Then, for any property that has NOT been identifid by Warmfront, we identify properties that look like they would
+ qualify under the strictest criteria, and mark these as potential additional opportunities.
+
+ :return:
+ """
+
+ eco4_rate = 1710
+ gbis_rate = 600
+
+ ha_analysis_results = []
+ ha_revenue_results = []
+ for ha_name, datasets in outputs.items():
+
+ inputs = [x for k, x in loader.data.items() if k == ha_name][0]
+ # TODO: This is placeholder because we don't have the schemes that the properties have been qualified for
+ # yet
+ #
+ import random
+ randomly_allocated_schemes = random.choices(["ECO4", "GBIS"], k=inputs["asset_list"].shape[0])
+ inputs["asset_list"]["randomly_allocated_schemes"] = randomly_allocated_schemes
+ inputs["asset_list"]["funding_scheme"] = None
+ inputs["asset_list"]["funding_scheme"] = np.where(
+ inputs["asset_list"]["row_meaning"] == "identified potential eco works (CWI)",
+ inputs["asset_list"]["randomly_allocated_schemes"],
+ inputs["asset_list"]["funding_scheme"]
+ )
+
+ # TODO: Also temp, just for HA 6
+ if ha_name == "ha_6":
+ inputs["survey_list"]["funding_scheme"] = None
+ inputs["survey_list"]["funding_scheme"] = np.where(
+ inputs["survey_list"][
+ 'AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION '] == "AFFORDABLE WARMTH",
+ "ECO4",
+ "GBIS"
+ )
+
+ # End placholder
+
+ results_df = datasets["results_df"].copy()
+
+ analysis_data = inputs["asset_list"][['asset_list_row_id', "row_meaning", "funding_scheme"]].rename(
+ columns={"row_meaning": "asset_identification_status"}
+ ).merge(
+ results_df,
+ how="left",
+ right_on="row_id",
+ left_on="asset_list_row_id"
+ )
+
+ # We now merge the survey list onto the analysis data and remove anything that is sold, to give us just what is
+ # remaining
+
+ if inputs["matched_lookup"] is not None:
+ analysis_data = analysis_data.merge(
+ inputs["matched_lookup"], how="left", on="asset_list_row_id"
+ )
+ # Drop any rows that have a survey_list_row_id
+ analysis_data = analysis_data[pd.isnull(analysis_data["survey_list_row_id"])]
+
+ # If we have a survey list, we merge this onto the results
+ n_properties_in_asset_list = analysis_data["asset_list_row_id"].nunique()
+
+ properties_sold = (
+ inputs["survey_list"].groupby("funding_scheme")["survey_list_row_id"].nunique().reset_index() if
+ inputs["survey_list"] is not None else pd.DataFrame(columns=["funding_scheme"])
+ )
+ properties_sold_eco4 = (
+ properties_sold[properties_sold["funding_scheme"] == "ECO4"]["survey_list_row_id"].values[0] if
+ (not properties_sold.empty) and ("ECO4" in properties_sold["funding_scheme"].values) else 0
+ )
+ properties_sold_gbis = (
+ properties_sold[properties_sold["funding_scheme"] == "GBIS"]["survey_list_row_id"].values[0] if
+ (not properties_sold.empty) and ("GBIS" in properties_sold["funding_scheme"].values) else 0
+ )
+
+ # We now calculate the number of remaining properties, by scheme
+ remaining_properties = analysis_data[
+ analysis_data["asset_identification_status"] == "identified potential eco works (CWI)"
+ ].copy()
+ remaining_properties["prospect_type"] = None
+
+ remaining_properties_by_scheme = (
+ remaining_properties.groupby("funding_scheme")["asset_list_row_id"].nunique().reset_index()
+ )
+
+ n_remaining_properties_eco4 = remaining_properties_by_scheme[
+ remaining_properties_by_scheme["funding_scheme"] == "ECO4"
+ ]["asset_list_row_id"].values[0]
+
+ n_remaining_properties_gbis = remaining_properties_by_scheme[
+ remaining_properties_by_scheme["funding_scheme"] == "GBIS"
+ ]["asset_list_row_id"].values[0]
+
+ # For the remaining properties, we use the results of the eligibility process to classify the property into
+ # one of multiple categories
+ #
+ # For properties that have been identified as ECO4
+ # 1) Strict ECO4 candidate - Has required fabric and EPC is D or below. We consider D or below here, because
+ # Warmfront regularly re-surveys properties which then fall within the SAP requirement
+ # - This is not the very strictest definition of ECO4 eligible, but we aim to characterise the properties
+ # here and re-surveying is a common practicce by Warmfront. Additionally, many of the social homes have
+ # very old EPCs which may score lower when re-done
+ # 2) Meets Fabric requirements, not SAP
+ # Warmfront has identified the property as eligible, but the EPC is not D or below. We consider this but
+ # label is separately as not a strict
+ # 3) Subject to CIGA check - Meets loft conditions but shows a filled cavity.
+ # - we don't have a SAP constraint here because the EPC is (currently) showing what the property might
+ # actually look like after retrofit and so the EPC currently being a C or above means little, because
+ # the updated EPC, showing an empty cavity, could bring the property within
+ # 4) Loft insulation too thick - Meets empty cavity but shows a loft with between 101 and 270mm insulation.
+ # - No SAP constraint, for the same reason as in category 2)
+ # 5) Looks like GBIS instead
+ # 6) Does not look like ECO4 candidate
+ #
+ # For properties that have been identified as GBIS
+ # 1) Strict GBIS candidates
+ # 2) Properties that actually look like strict GBIS candidates
+ # 3) Subject to CIGA check - Filled cavity
+ # 4) Does not look like a GBIS candidate
+
+ remaining_eco4_df = remaining_properties[
+ remaining_properties["funding_scheme"] == "ECO4"
+ ].copy()
+
+ ####################################
+ # ECO4
+ ####################################
+
+ # 1) We identify this if:
+ # - remaining_properties["eco4_eligible"] == True
+
+ remaining_eco4_df["prospect_type"] = np.where(
+ (remaining_eco4_df["eco4_eligible"] == True),
+ "strict ECO4",
+ remaining_eco4_df["prospect_type"]
+ )
+
+ # 2) Meets fabric requirements
+ remaining_eco4_df["prospect_type"] = np.where(
+ (
+ (remaining_eco4_df["eco4_message"] == "sap too high") &
+ remaining_eco4_df["eligibility_cavity_type"].isin(["partial", "empty"]) &
+ remaining_eco4_df["eligibility_loft_type"].isin(["0-100mm"]) &
+ pd.isnull(remaining_eco4_df["prospect_type"])
+ ),
+ "ECO4 if SAP downgrade",
+ remaining_eco4_df["prospect_type"]
+ )
+
+ # 3) We identify this if it has a filled cavity but meets the loft conditions
+ # TODO: Consider if we should also allow 100-270mm or if we should add some slight tolerance (e.g. 150mm)
+ # to account for measurement error
+ remaining_eco4_df["prospect_type"] = np.where(
+ (
+ remaining_eco4_df["eligibility_cavity_type"].isin(["full"]) &
+ remaining_eco4_df["eligibility_loft_type"].isin(["0-100mm"])
+ ),
+ "ECO4 - Filled cavity - subject to CIGA check",
+ remaining_eco4_df["prospect_type"]
+ )
+
+ # 4) We identify this by ensuring the cavity if empty or partial, and the loft has between 101 and 270mm
+ remaining_eco4_df["prospect_type"] = np.where(
+ (
+ remaining_eco4_df["eligibility_cavity_type"].isin(["empty", "partial"]) &
+ remaining_eco4_df["eligibility_loft_type"].isin(["100-270mm"])
+ ),
+ "ECO4 prospect - empty cavity, loft insulation below regulation",
+ remaining_eco4_df["prospect_type"]
+ )
+
+ # 5) Looks like GBIS instead
+ remaining_eco4_df["prospect_type"] = np.where(
+ (remaining_eco4_df["gbis_eligible"] == True) & pd.isnull(remaining_eco4_df["prospect_type"]),
+ "Looks like GBIS",
+ remaining_eco4_df["prospect_type"]
+ )
+
+ # 6) This is everything else (i.e. both the cavity is full and the loft insulation is above 100mm)
+ remaining_eco4_df["prospect_type"] = remaining_eco4_df["prospect_type"].fillna(
+ "Does not look like ECO4 candidate"
+ )
+
+ ####################################
+ # GBIS
+ ####################################
+
+ remaining_gbis = remaining_properties[
+ remaining_properties["funding_scheme"] == "GBIS"
+ ].copy()
+
+ # 1) Strict GBIS candidates
+ remaining_gbis["prospect_type"] = np.where(
+ (
+ (remaining_gbis["gbis_eligible"] == True) & (remaining_gbis["eco4_eligible"] == False)
+ ),
+ "strict GBIS",
+ remaining_gbis["prospect_type"]
+ )
+
+ # 2) GBIS candidates that look like strict ECO4 candidates
+ remaining_gbis["prospect_type"] = np.where(
+ (remaining_gbis["eco4_eligible"] == True),
+ "GBIS - Upgradable to ECO4",
+ remaining_gbis["prospect_type"]
+ )
+
+ # 3) Subject to CIGA check - Filled cavity
+ remaining_gbis["prospect_type"] = np.where(
+ (
+ remaining_gbis["eligibility_cavity_type"].isin(["full"]) &
+ pd.isnull(remaining_gbis["prospect_type"])
+ ),
+ "GBIS - Filled cavity - subject to CIGA check",
+ remaining_gbis["prospect_type"]
+ )
+
+ # 4) Everything else
+ remaining_gbis["prospect_type"] = remaining_gbis["prospect_type"].fillna(
+ "Does not look like GBIS candidate"
+ )
+
+ ####################################
+ # Surplus properties
+ ####################################
+
+ # Take properties that were not identified by Warmfront and identify those that look like they would qualify
+ # under the strictest criteria
+ surplus_df = analysis_data[
+ analysis_data["asset_identification_status"] != "identified potential eco works (CWI)"
+ ].copy()
+
+ eco4_surplus = surplus_df[
+ (
+ (surplus_df["eco4_eligible"] == True) & (surplus_df["eco4_message"] == "subject to post retrofit sap") &
+ (
+ surplus_df["eligibility_classification"].isin(
+ ["high confidence", "highest confidence", "medium confidence"]
+ )
+ )
+ )
+ ].copy()
+
+ gbis_surplus = surplus_df[
+ (
+ (surplus_df["gbis_eligible"] == True) & (surplus_df["eco4_eligible"] == False) & (
+ surplus_df["eligibility_cavity_type"].isin(["empty", "partial"])
+ )
+ )
+ ].copy()
+
+ # Perform some checks to make sure we have all of the values
+ remaining_eco4_dict = remaining_eco4_df["prospect_type"].value_counts().to_dict()
+ if n_remaining_properties_eco4 != sum([v for k, v in remaining_eco4_dict.items()]):
+ raise ValueError(
+ "Number of remaining properties does not match the number of properties in remaining ECO4 dict"
+ )
+
+ remaining_gbis_dict = remaining_gbis["prospect_type"].value_counts().to_dict()
+ if n_remaining_properties_gbis != sum([v for k, v in remaining_gbis_dict.items()]):
+ raise ValueError(
+ "Number of remaining properties does not match the number of properties in remaining GBIS dict"
+ )
+
+ to_append = {
+ "ha_name": ha_name,
+ "n_properties_in_asset_list": n_properties_in_asset_list,
+ ############
+ # ECO4
+ ############
+ "properties_sold_eco4": properties_sold_eco4,
+ "n_remaining_properties_eco4": n_remaining_properties_eco4,
+ **remaining_eco4_dict,
+ ############
+ # GBIS
+ ############
+ "properties_sold_gbis": properties_sold_gbis,
+ "n_remaining_properties_gbis": n_remaining_properties_gbis,
+ **remaining_gbis_dict,
+ ############
+ # GBIS
+ ############
+ "n_eco4_surplus": eco4_surplus.shape[0],
+ "n_gbis_surplus": gbis_surplus.shape[0],
+ }
+
+ ha_analysis_results.append(to_append)
+
+ revenue_to_append = {
+ "ha_name": ha_name,
+ "£ Remaining from asset list": (
+ n_remaining_properties_eco4 * eco4_rate + n_remaining_properties_gbis * gbis_rate
+ ),
+ "Of which: Strict": (
+ to_append.get('strict ECO4', 0) * eco4_rate + to_append.get('strict GBIS', 0) * gbis_rate +
+ to_append.get('GBIS - Upgradable to ECO4', 0) * gbis_rate
+ ),
+ "Of which: Subject to CIGA": (
+ to_append.get("ECO4 - Filled cavity - subject to CIGA check", 0) * eco4_rate +
+ to_append.get("GBIS - Filled cavity - subject to CIGA check", 0) * gbis_rate
+ ),
+ "Of which: Prospect, not perfect strict prospect": (
+ to_append.get("ECO4 prospect - empty cavity, loft insulation below regulation", 0) * eco4_rate +
+ to_append.get("ECO4 if SAP downgrade", 0) * eco4_rate
+ ),
+ "Of which: Potential downgrade to GBIS": to_append["Looks like GBIS"] * eco4_rate,
+ "Of which: Does not look like prospect": (
+ to_append.get("Does not look like ECO4 candidate", 0) * eco4_rate +
+ to_append.get("Does not look like GBIS candidate", 0) * gbis_rate
+ ),
+ "Surplus: Unidentified properties": eco4_surplus.shape[0] * eco4_rate + gbis_surplus.shape[0] * gbis_rate,
+ "Surplus: GBIS Updates to ECO4": to_append.get("GBIS - Upgradable to ECO4", 0) * (eco4_rate - gbis_rate)
+ }
+
+ # Perform a quick check:
+ if revenue_to_append["£ Remaining from asset list"] - (
+ revenue_to_append["Of which: Strict"] + revenue_to_append["Of which: Subject to CIGA"] +
+ revenue_to_append["Of which: Prospect, not perfect strict prospect"] +
+ revenue_to_append["Of which: Potential downgrade to GBIS"] +
+ revenue_to_append["Of which: Does not look like prospect"]
+ ) > 1:
+ raise ValueError("Error between top level revenue figures and breakdown - investigate me")
+
+ ha_revenue_results.append(revenue_to_append)
+
+ ha_analysis_results = pd.DataFrame(ha_analysis_results)
+ ha_revenue_results = pd.DataFrame(ha_revenue_results)
+
+ # Automate creation of the excel
+ # Create a Pandas Excel writer using XlsxWriter as the engine
+ with pd.ExcelWriter('HA Analysis - batch3.xlsx', engine='xlsxwriter') as writer:
+ # Write each dataframe to a different worksheet without the index
+ for df, sheet in [(ha_revenue_results, 'Total Revenue'),
+ (ha_analysis_results, 'By ECO4 and GBIS')]:
+
+ df.to_excel(writer, sheet_name=sheet, index=False)
+
+ # Auto-adjust columns' width
+ for i, width in enumerate(get_col_widths(df)):
+ writer.sheets[sheet].set_column(i, i, width)
+
+
+def app():
+ """
+ This app contains the housign association analysis for HAs 1, 6, 14, 39 and 107.
+ Only HA 6 has surveys
+ :return:
+ """
+
+ use_cache = True
+
+ files = {
+ "ha_1": {
+ "asset_list": {
+ "filepath": "etl/eligibility/ha_15_32/HA 1 - ASSET LIST.xlsx",
+ "sheetname": "HA 1"
+ }
+ },
+ "ha_6": {
+ "asset_list": {
+ "filepath": "etl/eligibility/ha_15_32/HA 6 - ASSET LIST.xlsx",
+ "sheetname": "HA 6"
+ },
+ "survey_list": {
+ "filepath": "etl/eligibility/ha_15_32/HA 6 - SURVEY LIST.xlsx",
+ "sheetname": "HA 6"
+ }
+ },
+ "ha_14": {
+ "asset_list": {
+ "filepath": "etl/eligibility/ha_15_32/HA 14 - ASSET LIST.xlsx",
+ "sheetname": "HA 14"
+ }
+ },
+ "ha_39": {
+ "asset_list": {
+ "filepath": "etl/eligibility/ha_15_32/HA 39 - ASSET LIST.xlsx",
+ "sheetname": "Sheet1"
+ }
+ },
+ "ha_107": {
+ "asset_list": {
+ "filepath": "etl/eligibility/ha_15_32/HA 107 - ASSET LIST.xlsx",
+ "sheetname": "HA 107"
+ }
+ }
+ }
+
+ loader = DataLoader(files, use_cache)
+ loader.load()
+
+ # TODO: We probably need to make sure that we have all of the columns that we need
+
+ # We load in the additional data required to perform the analysis
+
+ cleaned = read_from_s3(
+ s3_file_name="cleaned_epc_data/cleaned.bson",
+ bucket_name="retrofit-data-dev"
+ )
+ cleaned = msgpack.unpackb(cleaned, raw=False)
+
+ # Patch to handle the a missing description
+ cleaned["floor-description"].extend(
+ [
+ {'original_description': 'To external air, uninsulated (assumed)',
+ 'clean_description': 'To external air, no insulation', 'thermal_transmittance': None,
+ 'thermal_transmittance_unit': None, 'is_assumed': True, 'is_to_unheated_space': False,
+ 'is_to_external_air': True, 'is_suspended': False, 'is_solid': False, 'another_property_below': False,
+ 'insulation_thickness': 'none'},
+ {'original_description': 'To unheated space, uninsulated (assumed)',
+ 'clean_description': 'To unheated space, uninsulated', 'thermal_transmittance': None,
+ 'thermal_transmittance_unit': None, 'is_assumed': True, 'is_to_unheated_space': True,
+ 'is_to_external_air': False, 'is_suspended': False, 'is_solid': False, 'another_property_below': False,
+ 'insulation_thickness': 'average'}
+ ]
+ )
+
+ # We treat unknown loft insulation as no insulation
+ cleaned["roof-description"].extend(
+ [
+ {'original_description': 'Pitched, Unknown loft insulation', 'clean_description': 'Pitched, no insulation',
+ 'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_pitched': True,
+ 'is_roof_room': False,
+ 'is_loft': False, 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': True,
+ 'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': 'none'}
+ ]
+ )
+
+ # Patch mainheatcont-description
+ cleaned["mainheatcont-description"].extend(
+ [
+ {'original_description': 'None', 'clean_description': 'None', 'thermostatic_control': None,
+ 'charging_system': None, 'switch_system': None, 'no_control': None, 'dhw_control': None,
+ 'community_heating': None, 'multiple_room_thermostats': False, 'auxiliary_systems': None, 'trvs': None,
+ 'rate_control': None}
+ ]
+ )
+
+ # We patch this record because there is another property below
+ for x in cleaned["floor-description"]:
+ if x["original_description"] == '(Same dwelling below) insulated (assumed)':
+ x["another_property_below"] = True
+ x["thermal_transmittance"] = 0
+
+ cleaning_data = read_dataframe_from_s3_parquet(
+ bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+ )
+
+ created_at = datetime.now().isoformat()
+
+ photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+ outputs = get_epc_data(
+ loader, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds, pull_data=False
+ )
+
+ # for ha_name, datasets in outputs.items():
+ # datasets["results_df"] = datasets["results_df"].drop(
+ # columns=["eligibility_cavity_type", "eligibility_loft_type"]
+ # )
+ #
+ # # Re-do
+ # res = []
+ # for _, row in tqdm(datasets["results_df"].iterrows(), total=datasets["results_df"].shape[0]):
+ # epc = {
+ # "walls-description": row["walls"],
+ # "roof-description": row["roof"],
+ # "floor-description": "",
+ # "tenure": "",
+ # "current-energy-efficiency": row["sap"],
+ # }
+ # eligibility = Eligibility(epc=epc, cleaned=cleaned)
+ # eligibility.check_eco4_warmfront()
+ # res.append(
+ # {
+ # "row_id": row["row_id"],
+ # "eligibility_cavity_type": eligibility.eco4_warmfront["cavity_type"],
+ # "eligibility_loft_type": eligibility.eco4_warmfront["loft_type"]
+ # }
+ # )
+ #
+ # # Merge back on
+ # res = pd.DataFrame(res)
+ # datasets["results_df"] = datasets["results_df"].merge(res, how="left", on="row_id")
+ #
+ # # Re-save in s3
+ # save_pickle_to_s3(
+ # data={
+ # "results_df": datasets["results_df"],
+ # "scoring_df": datasets["scoring_df"],
+ # "nodata": datasets["nodata"]
+ # },
+ # bucket_name="retrofit-datalake-dev",
+ # s3_file_name=f"ha-analysis/{ha_name}/processed_results.pickle"
+ # )
diff --git a/etl/epc/DataProcessor.py b/etl/epc/DataProcessor.py
index 4987a23e..4615d2c4 100644
--- a/etl/epc/DataProcessor.py
+++ b/etl/epc/DataProcessor.py
@@ -33,7 +33,6 @@ NO_SUFFIX_COMPONENT_COLS = [x.lower() for x in NO_SUFFIX_COMPONENT_COLS]
ENDING_SUFFIX_COMPONENT_COLS = [x.lower() for x in ENDING_SUFFIX_COMPONENT_COLS]
POTENTIAL_COLUMNS = [x.lower() for x in POTENTIAL_COLUMNS]
-
# These lookups are used to clean the construction age band
construction_age_bounds_map = {
"England and Wales: before 1900": {"l": 0, "u": 1899},
@@ -74,7 +73,8 @@ class EPCDataProcessor:
Handle data loading and data preprocessing
"""
- def __init__(self, data: pd.DataFrame | None = None, cleaning_averages: pd.DataFrame | None = None, run_mode: str = "training", violation_mode: bool = False) -> None:
+ def __init__(self, data: pd.DataFrame | None = None, cleaning_averages: pd.DataFrame | None = None,
+ run_mode: str = "training", violation_mode: bool = False) -> None:
"""
:param filepath: If specified, is the physical location of the data
:param is_newdata: Indicates if we are processing new, testing data.
@@ -82,23 +82,23 @@ class EPCDataProcessor:
want to perform, such as confine_data()
"""
is_data_a_dataframe = isinstance(data, pd.DataFrame)
- self.data : pd.DataFrame = data if is_data_a_dataframe else pd.DataFrame()
+ self.data: pd.DataFrame = data if is_data_a_dataframe else pd.DataFrame()
is_cleaning_averages_a_dataframe = isinstance(cleaning_averages, pd.DataFrame)
- self.cleaning_averages : pd.DataFrame = cleaning_averages if is_cleaning_averages_a_dataframe else pd.DataFrame()
+ self.cleaning_averages: pd.DataFrame = cleaning_averages if is_cleaning_averages_a_dataframe else pd.DataFrame()
# FOR NOW IF VIOLATION MODE IS ON, WE USE RUN MODE AS NEWDATA
self.violation_mode = violation_mode
if run_mode not in ["training", "newdata"]:
raise ValueError("Run mode must be either training or newdata")
self.run_mode = run_mode if not violation_mode else "newdata"
-
+
def prepare_data(self, filepath: Path | str | None = None) -> None:
"""
Given the run mode, we apply the relevant pipeline steps
Ignore step is used to highlight which steps are not needed in newdata
"""
-
+
ignore_step = True if self.run_mode == "newdata" else False
if filepath is not None:
@@ -126,7 +126,7 @@ class EPCDataProcessor:
self.fill_na_fields()
self.sort_data_by_uprn_lodgement_date(ignore_step=ignore_step)
-
+
# Final re-casting after data transformed and prepared
self.recast_df_columns(column_mappings=COLUMNTYPES, auto_subset_columns=True)
self.recast_all_data(column_mappings=COLUMNTYPES, auto_subset_columns=True)
@@ -138,31 +138,35 @@ class EPCDataProcessor:
self.add_local_authority_to_cleaning_average(ignore_step=ignore_step)
# TODO: check if this has impact on training dataset
- cleaned_data = self.apply_averages_cleaning(
- data_to_clean=self.data,
- cleaning_data=self.cleaning_averages,
- cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
- colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
- )
+ # cleaned_data = self.apply_averages_cleaning(
+ # data_to_clean=self.data,
+ # cleaning_data=self.cleaning_averages,
+ # cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
+ # colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
+ # )
+
+ # When running in newdata mode, cleaning_averages has lower cases so we co-erce back to upper
+ cleaning_averages = self.cleaning_averages.copy()
+ if self.run_mode == "newdata":
+ cleaning_averages.columns = cleaning_averages.columns.str.upper()
cleaned_data = self.apply_averages_cleaning(
- data_to_clean=self.data,
- cleaning_data=self.cleaning_averages,
- cols_to_merge_on=COLUMNS_TO_MERGE_ON,
- )
-
+ data_to_clean=self.data,
+ cleaning_data=cleaning_averages,
+ cols_to_merge_on=COLUMNS_TO_MERGE_ON,
+ )
+
self.data = self.data if cleaned_data is None else cleaned_data
self.cast_cleaning_averages_columns_to_lower(ignore_step=ignore_step)
self.cast_data_columns_to_lower()
-
def cast_data_columns_to_lower(self):
"""
Convert all columns names to lower
"""
self.data.columns = self.data.columns.str.lower()
-
+
def cast_cleaning_averages_columns_to_lower(self, ignore_step: bool = False):
"""
Convert all column names to lower
@@ -171,9 +175,9 @@ class EPCDataProcessor:
if ignore_step:
return
-
+
self.cleaning_averages.columns = self.cleaning_averages.columns.str.lower()
-
+
def add_local_authority_to_cleaning_average(self, ignore_step: bool = False):
"""
Add the Local authority column to the cleaning averages
@@ -182,7 +186,7 @@ class EPCDataProcessor:
if ignore_step:
return
-
+
self.cleaning_averages["LOCAL_AUTHORITY"] = self.data["LOCAL_AUTHORITY"].values[0]
def fill_invalid_constituency_fields(self, ignore_step: bool = False):
@@ -195,7 +199,7 @@ class EPCDataProcessor:
if ignore_step:
return
-
+
self.data = self.data.fillna({"CONSTITUENCY": self.data["CONSTITUENCY"].mode().values[0]})
def sort_data_by_uprn_lodgement_date(self, ignore_step: bool = False):
@@ -218,7 +222,6 @@ class EPCDataProcessor:
for col in convert_to_lower:
self.data[col] = self.data[col].str.lower()
-
def remap_build_form(self):
"""
Remap build form to standard values
@@ -226,7 +229,6 @@ class EPCDataProcessor:
"""
self.data["BUILT_FORM"] = self.data["BUILT_FORM"].replace(BUILT_FORM_REMAP)
-
def remap_anomalies(self):
"""
Remap anomalies to None
@@ -258,7 +260,7 @@ class EPCDataProcessor:
if ignore_step:
return
-
+
self.data["FLOOR_LEVEL"] = self.data["FLOOR_LEVEL"].replace(FLOOR_LEVEL_MAP)
def load_data(self, filepath, low_memory=False) -> None:
@@ -404,7 +406,8 @@ class EPCDataProcessor:
# self.data = self.data.sort_values(["UPRN", "LODGEMENT_DATE"], ascending=True)
# # Final re-casting after data transformed and prepared
- # coltypes = {k: v for k, v in COLUMNTYPES.items() if k in self.data.columns} if self.is_newdata else COLUMNTYPES
+ # coltypes = {k: v for k, v in COLUMNTYPES.items() if k in self.data.columns} if self.is_newdata else
+ # COLUMNTYPES
# for k, v in coltypes.items():
# self.data[k] = self.data[k].astype(v)
# self.data = self.data.astype(coltypes)
@@ -423,7 +426,7 @@ class EPCDataProcessor:
# cleaning_data=self.cleaning_averages,
# cols_to_merge_on=COLUMNS_TO_MERGE_ON
# )
-
+
# self.cleaning_averages["LOCAL_AUTHORITY"] = self.data["LOCAL_AUTHORITY"].values[0]
# self.cleaning_averages.columns = self.cleaning_averages.columns.str.lower()
@@ -431,7 +434,6 @@ class EPCDataProcessor:
# return self.data, self.cleaning_averages
-
def na_remapping(self, auto_subset_columns: bool = False):
fill_na_map_apply = {
@@ -578,7 +580,7 @@ class EPCDataProcessor:
if self.violation_mode:
# TODO: to fill in
return
-
+
if ignore_step:
return
@@ -604,15 +606,15 @@ class EPCDataProcessor:
self.data[key] = self.data[key].astype(value)
else:
self.data[key] = self.data[key].astype(values)
-
+
def recast_all_data(self, column_mappings: dict, auto_subset_columns: bool = False) -> None:
"""
Using a dictionary to recast all columns at once
- """
+ """
if auto_subset_columns:
column_mappings = {k: v for k, v in column_mappings.items() if k in self.data.columns}
-
+
self.data = self.data.astype(column_mappings)
def confine_data(self, ignore_step: bool = False):
@@ -642,7 +644,7 @@ class EPCDataProcessor:
violation_missing_hotwater_description,
violation_missing_roof_description,
violation_invalid_property_type,
- ], axis=1,
+ ], axis=1,
keys=[
"violation_uprn_missing",
"violation_old_lodgment_date",
@@ -654,8 +656,8 @@ class EPCDataProcessor:
"violation_missing_roof_description",
"violation_invalid_property_type",
]
- )
-
+ )
+
self.data = pd.concat([self.data, violation_df], axis=1)
if ignore_step:
@@ -703,7 +705,7 @@ class EPCDataProcessor:
if self.violation_mode:
# TODO:
return
-
+
if ignore_step:
return
@@ -721,7 +723,9 @@ class EPCDataProcessor:
self.data["PHOTO_SUPPLY"] = self.data["PHOTO_SUPPLY"].fillna(0)
@staticmethod
- def apply_averages_cleaning(data_to_clean, cleaning_data, cols_to_merge_on, colnames=None, ignore_step: bool = False):
+ def apply_averages_cleaning(
+ data_to_clean, cleaning_data, cols_to_merge_on, colnames=None, ignore_step: bool = False
+ ):
"""
Clean the input DataFrame using averages from a cleaning DataFrame.
diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py
index 217c65b5..322f3238 100644
--- a/etl/epc/Dataset.py
+++ b/etl/epc/Dataset.py
@@ -15,6 +15,37 @@ from recommendations.recommendation_utils import (
get_wall_type,
)
+# TODO: Can probably produce this in the property change app and store in S3
+BOOLEAN_VARIABLES = [
+ 'is_cavity_wall', 'is_filled_cavity', 'is_solid_brick', 'is_system_built', 'is_timber_frame',
+ 'is_granite_or_whinstone', 'is_as_built', 'is_cob', 'is_sandstone_or_limestone', 'is_park_home',
+ 'external_insulation', 'internal_insulation', 'is_park_home_ending', 'external_insulation_ending',
+ 'internal_insulation_ending', 'is_to_unheated_space', 'is_to_external_air', 'is_suspended', 'is_solid',
+ 'another_property_below', 'is_pitched', 'is_roof_room', 'is_loft', 'is_flat', 'is_thatched', 'is_at_rafters',
+ 'has_dwelling_above', 'has_radiators', 'has_fan_coil_units', 'has_pipes_in_screed_above_insulation',
+ 'has_pipes_in_insulated_timber_floor', 'has_pipes_in_concrete_slab', 'has_boiler', 'has_air_source_heat_pump',
+ 'has_room_heaters', 'has_electric_storage_heaters', 'has_warm_air', 'has_electric_underfloor_heating',
+ 'has_electric_ceiling_heating', 'has_community_scheme', 'has_ground_source_heat_pump', 'has_no_system_present',
+ 'has_portable_electric_heaters', 'has_water_source_heat_pump', 'has_electric_heat_pump', 'has_micro-cogeneration',
+ 'has_solar_assisted_heat_pump', 'has_exhaust_source_heat_pump', 'has_community_heat_pump', 'has_electric',
+ 'has_mains_gas', 'has_wood_logs', 'has_coal', 'has_oil', 'has_wood_pellets', 'has_anthracite',
+ 'has_dual_fuel_mineral_and_wood', 'has_smokeless_fuel', 'has_lpg', 'has_b30k', 'has_electricaire',
+ 'has_assumed_for_most_rooms', 'has_underfloor_heating', 'has_radiators_ending', 'has_fan_coil_units_ending',
+ 'has_pipes_in_screed_above_insulation_ending', 'has_pipes_in_insulated_timber_floor_ending',
+ 'has_pipes_in_concrete_slab_ending', 'has_boiler_ending', 'has_air_source_heat_pump_ending',
+ 'has_room_heaters_ending', 'has_electric_storage_heaters_ending', 'has_warm_air_ending',
+ 'has_electric_underfloor_heating_ending', 'has_electric_ceiling_heating_ending', 'has_community_scheme_ending',
+ 'has_ground_source_heat_pump_ending', 'has_no_system_present_ending', 'has_portable_electric_heaters_ending',
+ 'has_water_source_heat_pump_ending', 'has_electric_heat_pump_ending', 'has_micro-cogeneration_ending',
+ 'has_solar_assisted_heat_pump_ending', 'has_exhaust_source_heat_pump_ending', 'has_community_heat_pump_ending',
+ 'has_electric_ending', 'has_mains_gas_ending', 'has_wood_logs_ending', 'has_coal_ending', 'has_oil_ending',
+ 'has_wood_pellets_ending', 'has_anthracite_ending', 'has_dual_fuel_mineral_and_wood_ending',
+ 'has_smokeless_fuel_ending', 'has_lpg_ending', 'has_b30k_ending', 'has_electricaire_ending',
+ 'has_assumed_for_most_rooms_ending', 'has_underfloor_heating_ending', 'multiple_room_thermostats',
+ 'multiple_room_thermostats_ending', 'is_community', 'no_individual_heating_or_community_network',
+ 'is_community_ending', 'no_individual_heating_or_community_network_ending'
+]
+
class BaseDataset:
"""
@@ -616,7 +647,7 @@ class TrainingDataset(BaseDataset):
for col in missings.index:
unique_values = self.df[col].unique()
- if True in unique_values or False in unique_values:
+ if (True in unique_values) or (False in unique_values) or (col in BOOLEAN_VARIABLES):
self.df[col] = self.df[col].fillna(False)
if "none" in unique_values:
self.df[col] = self.df[col].fillna("none")
diff --git a/etl/epc/Record.py b/etl/epc/Record.py
index ac86a636..79e36d5b 100644
--- a/etl/epc/Record.py
+++ b/etl/epc/Record.py
@@ -105,6 +105,8 @@ class EPCRecord:
year_built: int = None
number_of_floors: int = None
number_of_open_fireplaces: int = None
+ heat_loss_corridor_bool: bool = None
+ solar_water_heating_flag_bool: bool = None
def __post_init__(self):
# We can have validation and cleaning steps for each of the fields
@@ -378,9 +380,8 @@ class EPCRecord:
raise ValueError("EPC Recrod doesn not contain epc data")
self.prepared_epc["floor-level"] = (
- FLOOR_LEVEL_MAP[self.prepared_epc["floor-level"]]
- if self.prepared_epc["floor-level"] not in DATA_ANOMALY_MATCHES
- else None
+ FLOOR_LEVEL_MAP[self.prepared_epc["floor-level"]] if
+ self.prepared_epc["floor-level"] not in DATA_ANOMALY_MATCHES else None
)
def _clean_number_lighting_outlets(self):
@@ -390,7 +391,7 @@ class EPCRecord:
if not self.prepared_epc:
raise ValueError("EPC Recrod doesn not contain epc data")
- if self.prepared_epc["fixed-lighting-outlets-count"] == "":
+ if self.prepared_epc["fixed-lighting-outlets-count"] in DATA_ANOMALY_MATCHES:
# We check old EPCs and the full SAP EPC
lighting_data = []
@@ -415,18 +416,19 @@ class EPCRecord:
np.median(lighting_data)
)
else:
- # Use averages from the cleaning dataset, based on the property type, built form, construction age band and local authority
+ # Use averages from the cleaning dataset, based on the property type, built form, construction age
+ # band and local authority
+
+ cleaning_data = self.cleaning_data.copy()
+ # When running in new-data more, the columns will have been coerced to lower case so we push them
+ # back to upper case
+ if self.run_mode == "newdata":
+ cleaning_data.columns = [x.upper() for x in cleaning_data.columns]
+
cleaned_property_data = EPCDataProcessor.apply_averages_cleaning(
- data_to_clean=self.epc_record_as_dataframe(
- "prepared_epc", replace_empty_string=True
- ),
- cleaning_data=self.cleaning_data,
- cols_to_merge_on=[
- "PROPERTY_TYPE",
- "BUILT_FORM",
- "CONSTRUCTION_AGE_BAND",
- "LOCAL_AUTHORITY",
- ],
+ data_to_clean=self.epc_record_as_dataframe("prepared_epc", replace_empty_string=True),
+ cleaning_data=cleaning_data,
+ cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
)
self.prepared_epc["fixed-lighting-outlets-count"] = round(
cleaned_property_data["FIXED_LIGHTING_OUTLETS_COUNT"].values[0]
@@ -535,19 +537,14 @@ class EPCRecord:
if not self.prepared_epc:
raise ValueError("EPC Recrod doesn not contain epc data")
- map = {
+ mains_gas_map = {
"Y": True,
"N": False,
}
- self.prepared_epc["mains-gas-flag"] = (
- None
- if (
- self.prepared_epc["mains-gas-flag"] == ""
- or self.prepared_epc["mains-gas-flag"] in DATA_ANOMALY_MATCHES
- )
- else map[self.prepared_epc["mains-gas-flag"]]
- )
+ self.prepared_epc["mains-gas-flag"] = None if (
+ self.prepared_epc["mains-gas-flag"] == "" or self.prepared_epc["mains-gas-flag"] in DATA_ANOMALY_MATCHES
+ ) else mains_gas_map[self.prepared_epc["mains-gas-flag"]]
def _clean_heat_loss_corridor(self):
"""
@@ -556,24 +553,33 @@ class EPCRecord:
if not self.prepared_epc:
raise ValueError("EPC Recrod doesn not contain epc data")
- map = {
+ valid_values = [
+ "no corridor",
+ "unheated corridor",
+ "heated corridor"
+ ]
+
+ boolean_map = {
"no corridor": False,
"unheated corridor": True,
"heated corridor": False,
}
self.prepared_epc["heat-loss-corridor"] = (
- False
- if self.prepared_epc["heat-loss-corridor"] in DATA_ANOMALY_MATCHES
- else map[self.prepared_epc["heat-loss-corridor"]]
+ "no corridor" if self.prepared_epc["heat-loss-corridor"] in DATA_ANOMALY_MATCHES else
+ self.prepared_epc["heat-loss-corridor"]
)
+ if self.prepared_epc["heat-loss-corridor"] not in valid_values:
+ self.prepared_epc["heat-loss-corridor"] = "no corridor"
self.prepared_epc["unheated-corridor-length"] = (
- float(self.prepared_epc["unheated-corridor-length"])
- if self.prepared_epc["unheated-corridor-length"] != ""
- else None
+ float(self.prepared_epc["unheated-corridor-length"]) if
+ self.prepared_epc["unheated-corridor-length"] not in ["", None] else None
)
+ # We create boolean versions of heat-loss-corridor
+ self.heat_loss_corridor_bool = boolean_map[self.prepared_epc["heat-loss-corridor"]]
+
def _clean_count_variables(self):
"""
This method will clean the count variables, if empty or invalid
@@ -581,26 +587,24 @@ class EPCRecord:
if not self.prepared_epc:
raise ValueError("EPC Recrod doesn not contain epc data")
- fields = {
- "number_of_open_fireplaces": "number-open-fireplaces",
- "number_of_extensions": "extension-count",
- "number_of_storeys": "flat-storey-count",
- "number_of_rooms": "number-habitable-rooms",
- }
+ fields = [
+ "number-open-fireplaces",
+ "extension-count",
+ "flat-storey-count",
+ "number-habitable-rooms"
+ ]
- null_attributes = ["number_of_storeys", "number_of_rooms"]
+ null_attributes = ["flat-storey-count", "number-habitable-rooms"]
- for attribute, epc_field in fields.items():
- # TODO: check this
- # value = self.data["extension-count"]
- value = self.prepared_epc[epc_field]
- if value == "" or value in DATA_ANOMALY_MATCHES:
+ for attribute in fields:
+ value = self.prepared_epc[attribute]
+ if value in DATA_ANOMALY_MATCHES:
if attribute in null_attributes:
value = None
else:
value = 0
else:
- value = int(value)
+ value = int(float(value))
self.prepared_epc[attribute] = value
@@ -611,11 +615,9 @@ class EPCRecord:
if not self.prepared_epc:
raise ValueError("EPC Recrod doesn not contain epc data")
- self.prepared_epc["wind-turbine-count"] = (
- int(self.prepared_epc["wind-turbine-count"])
- if self.prepared_epc["wind-turbine-count"] != ""
- else None
- )
+ self.prepared_epc['wind-turbine-count'] = int(
+ self.prepared_epc['wind-turbine-count']
+ ) if self.prepared_epc['wind-turbine-count'] not in DATA_ANOMALY_MATCHES else None
def _clean_solar_hot_water(self):
"""
@@ -625,15 +627,24 @@ class EPCRecord:
raise ValueError("EPC Recrod doesn not contain epc data")
value_map = {
+ "Y": "Y",
+ "N": "N",
+ "": "N",
+ None: "N"
+ }
+
+ boolean_map = {
"Y": True,
"N": False,
- "": None,
}
self.prepared_epc["solar-water-heating-flag"] = value_map[
self.prepared_epc["solar-water-heating-flag"]
]
+ # Create a boolean version for storage in the database
+ self.solar_water_heating_flag_bool = boolean_map[self.prepared_epc['solar-water-heating-flag']]
+
def _clean_solar_pv(self):
"""
This method will clean the solar pv, if empty or invalid
@@ -641,11 +652,8 @@ class EPCRecord:
if not self.prepared_epc:
raise ValueError("EPC Recrod doesn not contain epc data")
- self.prepared_epc["photo-supply"] = (
- float(self.prepared_epc["photo-supply"])
- if self.prepared_epc["photo-supply"] != ""
- else None
- )
+ self.prepared_epc['photo-supply'] = float(self.prepared_epc['photo-supply']) if (
+ self.prepared_epc['photo-supply'] not in DATA_ANOMALY_MATCHES) else None
def _clean_energy(self):
"""
@@ -668,12 +676,13 @@ class EPCRecord:
if not self.prepared_epc:
raise ValueError("EPC Recrod doesn not contain epc data")
- self.prepared_epc["built-form"] = BUILT_FORM_REMAP.get(
+ self.prepared_epc['built-form'] = BUILT_FORM_REMAP.get(
self.prepared_epc["built-form"], self.prepared_epc["built-form"]
)
+
if self.prepared_epc["built-form"] in DATA_ANOMALY_MATCHES:
- if self.prepared_epc["property-type"] == "Flat":
- self.prepared_epc["built-form"] = "Semi-Detached"
+ if self.prepared_epc["property-type"] in ["Flat", "Maisonette"]:
+ self.prepared_epc["built-form"] = "End-Terrace"
def _clean_age_band(self):
"""
@@ -682,10 +691,11 @@ class EPCRecord:
if not self.prepared_epc:
raise ValueError("EPC Recrod doesn not contain epc data")
- self.construction_age_band = EPCDataProcessor.clean_construction_age_band(
+ self.prepared_epc["construction-age-band"] = EPCDataProcessor.clean_construction_age_band(
self.prepared_epc["construction-age-band"]
)
- if self.construction_age_band in DATA_ANOMALY_MATCHES:
+
+ if self.prepared_epc["construction-age-band"] in DATA_ANOMALY_MATCHES:
if self.old_data:
# Take the most recent
max_datetime = max(
@@ -693,31 +703,31 @@ class EPCRecord:
old_record["lodgement-datetime"]
for old_record in self.old_data
if old_record["construction-age-band"]
- not in DATA_ANOMALY_MATCHES
+ not in DATA_ANOMALY_MATCHES
]
)
- most_recent = [
- old_record
- for old_record in self.old_data
- if old_record["lodgement-datetime"] == max_datetime
- ]
- self.construction_age_band = (
- EPCDataProcessor.clean_construction_age_band(
- most_recent[0]["construction-age-band"]
- )
+ most_recent = [old_record for old_record in self.old_data if
+ old_record["lodgement-datetime"] == max_datetime]
+
+ self.prepared_epc["construction-age-band"] = EPCDataProcessor.clean_construction_age_band(
+ most_recent[0]["construction-age-band"]
)
+ self.construction_age_band = self.prepared_epc["construction-age-band"]
self.age_band = england_wales_age_band_lookup.get(self.construction_age_band)
if (self.prepared_epc["transaction-type"] == "new dwelling") and (
self.age_band is None
):
self.age_band = "L"
- self.construction_age_band = "England and Wales: 2012 onwards"
+ self.construction_age_band = 'England and Wales: 2012 onwards'
+ self.prepared_epc["construction-age-band"] = self.construction_age_band
if self.age_band is None:
- raise ValueError("age_band is missing")
+ self.age_band = "C"
+ self.construction_age_band = "England and Wales: 1930-1949"
+ self.prepared_epc["construction-age-band"] = self.construction_age_band
def _clean_year_built(self):
"""
@@ -750,13 +760,10 @@ class EPCRecord:
"""
This method will clean the ventilation, if empty or invalid
"""
- self.prepared_epc["mechanical-ventilation"] = (
- None
- if (
- self.mechanical_ventilation == ""
- or self.mechanical_ventilation in DATA_ANOMALY_MATCHES
- )
- else self.mechanical_ventilation
+ self.prepared_epc['mechanical-ventilation'] = None if (
+ self.prepared_epc['mechanical-ventilation'] in DATA_ANOMALY_MATCHES
+ ) else (
+ self.prepared_epc['mechanical-ventilation']
)
def _field_validation(self):
@@ -793,13 +800,15 @@ class EPCRecord:
validation_config["function"](field_value)
except:
raise ValueError(
- f"Field {record_key} has value {field_value} which does not pass the validation function {validation_config['function']}"
+ f"Field {record_key} has value {field_value} which does not pass the validation function "
+ f"{validation_config['function']}"
)
if validation_config["acceptable_values"] is not None:
if field_value not in validation_config["acceptable_values"]:
raise ValueError(
- f"Field {record_key} has value {field_value} which is not in the acceptable values of {validation_config['acceptable_values']}"
+ f"Field {record_key} has value {field_value} which is not in the acceptable values of "
+ f"{validation_config['acceptable_values']}"
)
def _validate_float(
@@ -818,7 +827,8 @@ class EPCRecord:
validation_config["function"](field_value)
except:
raise ValueError(
- f"Field {record_key} has value {field_value} which does not pass the validation function {validation_config['function']}"
+ f"Field {record_key} has value {field_value} which does not pass the validation function "
+ f"{validation_config['function']}"
)
if validation_config["range"] is not None:
@@ -827,7 +837,8 @@ class EPCRecord:
or field_value > validation_config["range"][1]
):
raise ValueError(
- f"Field {record_key} has value {field_value} which is not in the acceptable range of {validation_config['range']}"
+ f"Field {record_key} has value {field_value} which is not in the acceptable range of "
+ f"{validation_config['range']}"
)
def __sub__(self, other):
@@ -1045,7 +1056,8 @@ class EPCDifferenceRecord:
def ensure_adequate_data(self) -> bool:
"""
- This method will ensure that the difference record has adequate data, to keep record, even if rdsap change is zero
+ This method will ensure that the difference record has adequate data, to keep record, even if rdsap change is
+ zero
Can move into the initiation of the difference record
"""
wall_check = self.record1.walls_description == self.record2.walls_description
diff --git a/etl/epc/settings.py b/etl/epc/settings.py
index 24c23ebc..87f27972 100644
--- a/etl/epc/settings.py
+++ b/etl/epc/settings.py
@@ -43,7 +43,11 @@ DATA_ANOMALY_MATCHES = {
# contain a ‘null’ value. A resolution to correct these anomalies will be considered for future data releases.
"NULL",
# We sometimes see fields populated with just an empty string.
- ""
+ "",
+ # We sometimes find None values - particulatly when we produce an estimated EPC
+ None,
+ # An older value which rarely shows up but has been seen in the data.
+ "UNKNOWN",
}
DATA_ANOMALY_SUBSTRINGS = {
diff --git a/etl/epc/tests/test_epcrecord.py b/etl/epc/tests/test_epcrecord.py
new file mode 100644
index 00000000..cf0361b1
--- /dev/null
+++ b/etl/epc/tests/test_epcrecord.py
@@ -0,0 +1,358 @@
+import pytest
+from utils.s3 import read_dataframe_from_s3_parquet
+from etl.epc.Record import EPCRecord
+from etl.epc.settings import DATA_ANOMALY_MATCHES
+import random
+
+
+class TestEpcRecord:
+
+ @pytest.fixture()
+ def cleaning_data(self):
+ cleaning_data = read_dataframe_from_s3_parquet(
+ bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+ )
+
+ return cleaning_data
+
+ @pytest.fixture()
+ def epc_records_1(self):
+ epc_records_1 = {
+ 'original_epc': {
+ 'low-energy-fixed-light-count': '', 'address': '139 School Road, Hall Green',
+ 'uprn-source': 'Energy Assessor', 'floor-height': '2.6', 'heating-cost-potential': '1138',
+ 'unheated-corridor-length': '', 'hot-water-cost-potential': '175',
+ 'construction-age-band': 'England and Wales: 1900-1929', 'potential-energy-rating': 'B',
+ 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Very Good',
+ 'environment-impact-potential': '82', 'glazed-type': 'double glazing, unknown install date',
+ 'heating-cost-current': '2711', 'address3': '',
+ 'mainheatcont-description': 'Programmer, TRVs and bypass',
+ 'sheating-energy-eff': 'N/A', 'property-type': 'House', 'local-authority-label': 'Birmingham',
+ 'fixed-lighting-outlets-count': '11', 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural',
+ 'hot-water-cost-current': '310', 'county': '', 'postcode': 'B28 8JF', 'solar-water-heating-flag': 'N',
+ 'constituency': 'E14000562', 'co2-emissions-potential': '2.0', 'number-heated-rooms': '4',
+ 'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '107',
+ 'local-authority': 'E08000025', 'built-form': 'Semi-Detached', 'number-open-fireplaces': '0',
+ 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2023-07-05',
+ 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '65', 'address1': '139 School Road',
+ 'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Birmingham, Hall Green',
+ 'roof-energy-eff': 'Average', 'total-floor-area': '103.0', 'building-reference-number': '10004697322',
+ 'environment-impact-current': '43', 'co2-emissions-current': '6.7',
+ 'roof-description': 'Pitched, 100 mm loft insulation', 'floor-energy-eff': 'N/A',
+ 'number-habitable-rooms': '4', 'address2': 'Hall Green', 'hot-water-env-eff': 'Good',
+ 'posttown': 'BIRMINGHAM', 'mainheatc-energy-eff': 'Average', 'main-fuel': 'mains gas (not community)',
+ 'lighting-env-eff': 'Very Good', 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A',
+ 'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in 82% of fixed outlets',
+ 'roof-env-eff': 'Average', 'walls-energy-eff': 'Very Poor', 'photo-supply': '0.0',
+ 'lighting-cost-potential': '182', 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100',
+ 'main-heating-controls': '', 'lodgement-datetime': '2023-07-13 08:23:07', 'flat-top-storey': '',
+ 'current-energy-rating': 'E', 'secondheat-description': 'None', 'walls-env-eff': 'Very Poor',
+ 'transaction-type': 'rental', 'uprn': '100070505235', 'current-energy-efficiency': '51',
+ 'energy-consumption-current': '366', 'mainheat-description': 'Boiler and radiators, mains gas',
+ 'lighting-cost-current': '182', 'lodgement-date': '2023-07-13', 'extension-count': '0',
+ 'mainheatc-env-eff': 'Average',
+ 'lmk-key': 'c1d137711da433fb3cced74b1a6848da8bbc1159d076455d26d7b4668982601e',
+ 'wind-turbine-count': '0',
+ 'tenure': 'Rented (social)', 'floor-level': '', 'potential-energy-efficiency': '84',
+ 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '82',
+ 'walls-description': 'Solid brick, as built, no insulation (assumed)',
+ 'hotwater-description': 'From main system'}, 'full_sap_epc': {}, 'old_data': []
+ }
+ return epc_records_1
+
+ def test_clean_mechanical_ventilation(self, cleaning_data, epc_records_1):
+ # We have an epc with Natural ventilation - the resulting epc should also have natural ventulation
+
+ record = EPCRecord(cleaning_data=cleaning_data)
+ record.prepared_epc = {
+ "mechanical-ventilation": "natural"
+ }
+ record._clean_ventilation()
+
+ assert record.prepared_epc["mechanical-ventilation"] == "natural"
+
+ record2 = EPCRecord(cleaning_data=cleaning_data)
+ record2.prepared_epc = {
+ "mechanical-ventilation": ""
+ }
+
+ record2._clean_ventilation()
+
+ assert record2.prepared_epc["mechanical-ventilation"] is None
+
+ record3 = EPCRecord(cleaning_data=cleaning_data)
+ record3.prepared_epc = {
+ "mechanical-ventilation": None
+ }
+
+ record3._clean_ventilation()
+
+ assert record3.prepared_epc["mechanical-ventilation"] is None
+
+ record4 = EPCRecord(cleaning_data=cleaning_data)
+ record4.prepared_epc = {
+ "mechanical-ventilation": "INVALID"
+ }
+
+ record4._clean_ventilation()
+
+ assert record4.prepared_epc["mechanical-ventilation"] is None
+
+ def test_clean_energy_valid_values(self, cleaning_data, epc_records_1):
+ record = EPCRecord(cleaning_data=cleaning_data)
+ record.prepared_epc = {
+ "energy-consumption-current": "200",
+ "co2-emissions-current": "5.5"
+ }
+ record._clean_energy()
+
+ assert record.prepared_epc["energy-consumption-current"] == 200.0
+ assert record.prepared_epc["co2-emissions-current"] == 5.5
+
+ def test_clean_energy_empty_values(self, cleaning_data):
+ # We cannot have invalid values so this should raise an exception
+ record = EPCRecord(cleaning_data=cleaning_data)
+ record.prepared_epc = {
+ "energy-consumption-current": "",
+ "co2-emissions-current": ""
+ }
+
+ with pytest.raises(ValueError):
+ record._clean_energy()
+
+ def test_clean_built_form_valid_remap(self, cleaning_data, epc_records_1):
+ record = EPCRecord(cleaning_data=cleaning_data)
+ # Assuming "Semi" should be remapped to "Semi-Detached"
+ record.prepared_epc = {
+ "built-form": "Semi-Detached",
+ "property-type": "Flat" # Assuming this affects the remapping
+ }
+ record._clean_built_form()
+
+ assert record.prepared_epc["built-form"] == "Semi-Detached"
+
+ def test_clean_built_form_anomaly(self, cleaning_data, epc_records_1):
+ record = EPCRecord(cleaning_data=cleaning_data)
+
+ record.prepared_epc = {
+ "built-form": "",
+ "property-type": "Flat"
+ }
+ record._clean_built_form()
+
+ assert record.prepared_epc["built-form"] == "End-Terrace"
+
+ def test_clean_floor_area_valid(self, cleaning_data):
+ record = EPCRecord(cleaning_data=cleaning_data)
+ record.prepared_epc = {
+ "total-floor-area": "120.5"
+ }
+ record._clean_floor_area()
+
+ assert record.prepared_epc["total-floor-area"] == 120.5
+
+ def test_clean_floor_area_empty(self, cleaning_data):
+ record = EPCRecord(cleaning_data=cleaning_data)
+ record.prepared_epc = {
+ "total-floor-area": ""
+ }
+ # We have no known case of missing floor area
+ with pytest.raises(ValueError):
+ record._clean_floor_area()
+
+ def test_clean_heat_loss_corridor_valid(self, cleaning_data):
+ record = EPCRecord(cleaning_data=cleaning_data)
+ record.prepared_epc = {
+ "heat-loss-corridor": "unheated corridor",
+ "unheated-corridor-length": ""
+ }
+ record._clean_heat_loss_corridor()
+
+ assert record.prepared_epc["heat-loss-corridor"] == "unheated corridor"
+
+ record = EPCRecord(cleaning_data=cleaning_data)
+ record.prepared_epc = {
+ "heat-loss-corridor": "unheated corridor",
+ "unheated-corridor-length": None
+ }
+ record._clean_heat_loss_corridor()
+
+ assert record.prepared_epc["heat-loss-corridor"] == "unheated corridor"
+ assert record.prepared_epc["unheated-corridor-length"] is None
+
+ def test_clean_heat_loss_corridor_anomaly(self, cleaning_data):
+ record = EPCRecord(cleaning_data=cleaning_data)
+ # Assuming "InvalidCorridor" is an anomaly
+ record.prepared_epc = {
+ "heat-loss-corridor": "InvalidCorridor",
+ "unheated-corridor-length": ""
+ }
+ record._clean_heat_loss_corridor()
+
+ assert record.prepared_epc["heat-loss-corridor"] == "no corridor"
+
+ def test_clean_mains_gas_valid(self, cleaning_data):
+ record = EPCRecord(cleaning_data=cleaning_data)
+ record.prepared_epc = {
+ "mains-gas-flag": "Y"
+ }
+ record._clean_mains_gas()
+
+ assert record.prepared_epc["mains-gas-flag"] is True
+
+ def test_clean_mains_gas_anomaly(self, cleaning_data):
+ record = EPCRecord(cleaning_data=cleaning_data)
+ record.prepared_epc = {
+ "mains-gas-flag": "InvalidValue"
+ }
+ # It should always be Y or N or an anomally value
+ with pytest.raises(KeyError):
+ record._clean_mains_gas()
+
+ record = EPCRecord(cleaning_data=cleaning_data)
+ record.prepared_epc = {
+ "mains-gas-flag": random.choice(list(DATA_ANOMALY_MATCHES))
+ }
+ record._clean_mains_gas()
+
+ assert record.prepared_epc["mains-gas-flag"] is None
+
+ def test_clean_solar_hot_water_valid(self, cleaning_data):
+ record = EPCRecord(cleaning_data=cleaning_data)
+ record.prepared_epc = {
+ "solar-water-heating-flag": "Y"
+ }
+ record._clean_solar_hot_water()
+
+ assert record.prepared_epc["solar-water-heating-flag"] == "Y"
+ assert record.solar_water_heating_flag_bool is True
+
+ def test_clean_solar_hot_water_empty(self, cleaning_data):
+ record = EPCRecord(cleaning_data=cleaning_data)
+ record.prepared_epc = {
+ "solar-water-heating-flag": ""
+ }
+ record._clean_solar_hot_water()
+
+ assert record.prepared_epc["solar-water-heating-flag"] == "N"
+ assert record.solar_water_heating_flag_bool is False
+
+ def test_clean_number_lighting_outlets_valid(self, cleaning_data, epc_records_1):
+ record = EPCRecord(cleaning_data=cleaning_data, epc_records=epc_records_1)
+ record.prepared_epc = {
+ "fixed-lighting-outlets-count": "5"
+ }
+ record._clean_number_lighting_outlets()
+
+ assert record.prepared_epc["fixed-lighting-outlets-count"] == 5.0
+
+ def test_clean_number_lighting_outlets_empty(self, cleaning_data, epc_records_1):
+ record = EPCRecord(cleaning_data=cleaning_data)
+ record.run_mode = "newdata"
+ record.prepared_epc = {
+ "fixed-lighting-outlets-count": "",
+ "property-type": "Flat",
+ "built-form": "Semi-Detached",
+ "construction-age-band": "England and Wales: 1900-1929",
+ "local-authority": "E08000025",
+ "number-habitable-rooms": "4",
+ "number-heated-rooms": "4",
+ }
+ record.old_data = []
+ record.full_sap_epc = []
+ record._clean_number_lighting_outlets()
+
+ assert record.prepared_epc["fixed-lighting-outlets-count"] == 8.0
+
+ def test_clean_count_variables(self, cleaning_data):
+ record = EPCRecord(cleaning_data=cleaning_data)
+
+ record.prepared_epc = {
+ "number-open-fireplaces": "1",
+ "extension-count": None,
+ "flat-storey-count": "",
+ "number-habitable-rooms": "INVALID!",
+ }
+
+ record._clean_count_variables()
+
+ assert record.prepared_epc["number-open-fireplaces"] == 1.0
+ assert record.prepared_epc["extension-count"] == 0
+ assert record.prepared_epc["flat-storey-count"] is None
+ assert record.prepared_epc["number-habitable-rooms"] is None
+
+ def test_clean_floor_level(self, cleaning_data):
+ record = EPCRecord(cleaning_data=cleaning_data)
+
+ record.prepared_epc = {
+ "floor-level": "1",
+ }
+
+ record._clean_floor_level()
+
+ assert record.prepared_epc["floor-level"] == 1.0
+
+ record = EPCRecord(cleaning_data=cleaning_data)
+
+ record.prepared_epc = {
+ "floor-level": "",
+ }
+
+ record._clean_floor_level()
+
+ assert record.prepared_epc["floor-level"] is None
+
+ record = EPCRecord(cleaning_data=cleaning_data)
+
+ record.prepared_epc = {
+ "floor-level": None,
+ }
+
+ record._clean_floor_level()
+
+ assert record.prepared_epc["floor-level"] is None
+
+ def test_clean_solar_hot_water(self, cleaning_data):
+ record = EPCRecord(cleaning_data=cleaning_data)
+
+ record.prepared_epc = {
+ "solar-water-heating-flag": "Y",
+ }
+
+ record._clean_solar_hot_water()
+
+ assert record.prepared_epc["solar-water-heating-flag"] == "Y"
+ assert record.solar_water_heating_flag_bool is True
+
+ record = EPCRecord(cleaning_data=cleaning_data)
+
+ record.prepared_epc = {
+ "solar-water-heating-flag": "N",
+ }
+
+ record._clean_solar_hot_water()
+
+ assert record.prepared_epc["solar-water-heating-flag"] == "N"
+ assert record.solar_water_heating_flag_bool is False
+
+ record = EPCRecord(cleaning_data=cleaning_data)
+
+ record.prepared_epc = {
+ "solar-water-heating-flag": "",
+ }
+
+ record._clean_solar_hot_water()
+
+ assert record.prepared_epc["solar-water-heating-flag"] == "N"
+ assert record.solar_water_heating_flag_bool is False
+
+ record = EPCRecord(cleaning_data=cleaning_data)
+
+ record.prepared_epc = {
+ "solar-water-heating-flag": None,
+ }
+
+ record._clean_solar_hot_water()
+
+ assert record.prepared_epc["solar-water-heating-flag"] == "N"
+ assert record.solar_water_heating_flag_bool is False
diff --git a/etl/testing_data/livewest_pilot.py b/etl/testing_data/livewest_pilot.py
new file mode 100644
index 00000000..580c16d0
--- /dev/null
+++ b/etl/testing_data/livewest_pilot.py
@@ -0,0 +1,38 @@
+"""
+This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
+testing
+"""
+import os
+
+import pandas as pd
+from utils.s3 import save_csv_to_s3
+
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN", None)
+USER_ID = 8
+PORTFOLIO_ID = 61
+
+
+def app():
+ pilot_file = pd.DataFrame(
+ [
+ {"address": "42, Foxes Field", "postcode": "TR18 3RJ", "Notes": None},
+ {"address": "11, Cranley Gardens", "postcode": "TQ13 8UT", "Notes": None},
+ ]
+ )
+
+ # Store the data in s3
+ filename = f"{USER_ID}/{PORTFOLIO_ID}/livewest_pilot_file.csv"
+ save_csv_to_s3(
+ dataframe=pilot_file,
+ bucket_name="retrofit-plan-inputs-dev",
+ file_name=filename
+ )
+
+ body = {
+ "portfolio_id": str(PORTFOLIO_ID),
+ "housing_type": "Social",
+ "goal": "Increase EPC",
+ "goal_value": "C",
+ "trigger_file_path": filename
+ }
+ print(body)
diff --git a/etl/testing_data/the_guiness_partnership_pilot.py b/etl/testing_data/the_guiness_partnership_pilot.py
new file mode 100644
index 00000000..496ea7ea
--- /dev/null
+++ b/etl/testing_data/the_guiness_partnership_pilot.py
@@ -0,0 +1,38 @@
+"""
+This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
+testing
+"""
+import os
+
+import pandas as pd
+from utils.s3 import save_csv_to_s3
+
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN", None)
+USER_ID = 8
+PORTFOLIO_ID = 59
+
+
+def app():
+ pilot_file = pd.DataFrame(
+ [
+ {"address": "10 Elm Close", "postcode": "CV37 8XL", "Notes": None},
+ {"address": "21, Spring Lane", "postcode": "MK17 0QP", "Notes": None},
+ ]
+ )
+
+ # Store the data in s3
+ filename = f"{USER_ID}/{PORTFOLIO_ID}/the_guiness_partnership_pilot_file.csv"
+ save_csv_to_s3(
+ dataframe=pilot_file,
+ bucket_name="retrofit-plan-inputs-dev",
+ file_name=filename
+ )
+
+ body = {
+ "portfolio_id": str(PORTFOLIO_ID),
+ "housing_type": "Social",
+ "goal": "Increase EPC",
+ "goal_value": "C",
+ "trigger_file_path": filename
+ }
+ print(body)
diff --git a/recommendations/FloorRecommendations.py b/recommendations/FloorRecommendations.py
index a246c8cb..2f568264 100644
--- a/recommendations/FloorRecommendations.py
+++ b/recommendations/FloorRecommendations.py
@@ -109,6 +109,7 @@ class FloorRecommendations(Definitions):
insulation_thickness=self.property.floor["insulation_thickness"],
wall_type=self.property.wall_type
)
+
self.estimated_u_value = u_value
if u_value < self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py
index dc1aff3f..0bbfd69d 100644
--- a/recommendations/RoofRecommendations.py
+++ b/recommendations/RoofRecommendations.py
@@ -91,6 +91,7 @@ class RoofRecommendations:
raise NotImplementedError("Implement me")
u_value = get_roof_u_value(**{**self.property.roof, "age_band": self.property.age_band})
+
self.estimated_u_value = u_value
if u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
# The Roof is already compliant
diff --git a/recommendations/tests/test_data/floor_uvalue_test_cases.py b/recommendations/tests/test_data/floor_uvalue_test_cases.py
index 91d3814f..7104fd9d 100644
--- a/recommendations/tests/test_data/floor_uvalue_test_cases.py
+++ b/recommendations/tests/test_data/floor_uvalue_test_cases.py
@@ -29,4 +29,34 @@ floor_uvalue_test_cases = [
"insulation_thickness": None,
"expected": ValueError,
},
+ # 16 Glastonbury road EPR - the EPR has 0.71 due to the property having 320mm wall thickness, but default being 250
+ {
+ "floor_type": "suspended",
+ "area": 34.5,
+ "perimeter": 16.7,
+ "age_band": "D",
+ "wall_type": "cavity",
+ "insulation_thickness": None,
+ "expected": 0.72,
+ },
+ # 31 Loddon Way - the EPR has 0.5 due to the property having 320mm wall thickness, but default being 250
+ {
+ "floor_type": "solid",
+ "area": 52.08,
+ "perimeter": 16.2,
+ "age_band": "E",
+ "wall_type": "cavity",
+ "insulation_thickness": None,
+ "expected": 0.52,
+ },
+ # 62 Pearmain Drive
+ {
+ "floor_type": "solid",
+ "area": 38.64,
+ "perimeter": 18.1,
+ "age_band": "E",
+ "wall_type": "cavity",
+ "insulation_thickness": None,
+ "expected": 0.69,
+ },
]
diff --git a/recommendations/tests/test_data/wall_uvalue_test_cases.py b/recommendations/tests/test_data/wall_uvalue_test_cases.py
index e0c6ebe3..87f1ad3f 100644
--- a/recommendations/tests/test_data/wall_uvalue_test_cases.py
+++ b/recommendations/tests/test_data/wall_uvalue_test_cases.py
@@ -76,5 +76,33 @@ wall_uvalue_test_cases = [
"is_granite_or_whinstone": False,
"is_sandstone_or_limestone": False,
"uvalue": 0
+ },
+ {
+ "clean_description": "Cavity wall, as built, insulated",
+ "age_band": "F",
+ "is_granite_or_whinstone": False,
+ "is_sandstone_or_limestone": False,
+ "uvalue": 0.4
+ },
+ {
+ "clean_description": "Cavity wall, as built, insulated",
+ "age_band": "D",
+ "is_granite_or_whinstone": False,
+ "is_sandstone_or_limestone": False,
+ "uvalue": 0.7
+ },
+ {
+ "clean_description": "Cavity wall, filled cavity",
+ "age_band": "E",
+ "is_granite_or_whinstone": False,
+ "is_sandstone_or_limestone": False,
+ "uvalue": 0.7
+ },
+ {
+ "clean_description": "Cavity wall, as built, no insulation",
+ "age_band": "E",
+ "is_granite_or_whinstone": False,
+ "is_sandstone_or_limestone": False,
+ "uvalue": 1.5
}
]
diff --git a/recommendations/tests/test_fireplace_recommendations.py b/recommendations/tests/test_fireplace_recommendations.py
index a91d6697..f21d6bc3 100644
--- a/recommendations/tests/test_fireplace_recommendations.py
+++ b/recommendations/tests/test_fireplace_recommendations.py
@@ -1,16 +1,18 @@
from backend.Property import Property
-from unittest.mock import Mock
from recommendations.FireplaceRecommendations import FireplaceRecommendations
+from etl.epc.Record import EPCRecord
class TestFirepaceRecommendations:
def test_no_fireplaces(self):
- property_instance = Property(id=0, address="fake", postcode="fake")
- property_instance.data = {
- "number-open-fireplaces": 0
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {
+ "number-open-fireplaces": 0,
}
+ property_instance = Property(id=0, address="fake", postcode="fake", epc_record=epc_record)
+
recommender = FireplaceRecommendations(
property_instance=property_instance
)
@@ -22,10 +24,11 @@ class TestFirepaceRecommendations:
assert recommender.recommendation is None
def test_one_fireplace(self):
- property_instance = Property(id=0, address="fake", postcode="fake")
- property_instance.data = {
- "number-open-fireplaces": 1
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {
+ "number-open-fireplaces": 1,
}
+ property_instance = Property(id=0, address="fake", postcode="fake", epc_record=epc_record)
recommender = FireplaceRecommendations(
property_instance=property_instance
@@ -40,10 +43,11 @@ class TestFirepaceRecommendations:
assert recommender.recommendation[0]["total"] == 300
def test_multiple_fireplaces(self):
- property_instance = Property(id=0, address="fake", postcode="fake")
- property_instance.data = {
- "number-open-fireplaces": 3
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {
+ "number-open-fireplaces": 3,
}
+ property_instance = Property(id=0, address="fake", postcode="fake", epc_record=epc_record)
recommender = FireplaceRecommendations(
property_instance=property_instance
diff --git a/recommendations/tests/test_lighting_recommendations.py b/recommendations/tests/test_lighting_recommendations.py
index 964f1da0..45213d70 100644
--- a/recommendations/tests/test_lighting_recommendations.py
+++ b/recommendations/tests/test_lighting_recommendations.py
@@ -1,5 +1,5 @@
import pytest
-from unittest.mock import Mock
+from etl.epc.Record import EPCRecord
from backend.Property import Property
from recommendations.LightingRecommendations import LightingRecommendations
@@ -9,18 +9,20 @@ from recommendations.tests.test_data.materials import materials
class TestLightingRecommendations:
def test_init_invalid_materials(self):
- input_property0 = Property(id=1, postcode="F4k3 6", address="623 fake street")
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {"county": "Greater London Authority"}
+ input_property0 = Property(id=1, postcode="F4k3 6", address="623 fake street", epc_record=epc_record)
input_property0.lighting = {"low_energy_proportion": 0}
- input_property0.data = {"county": "Greater London Authority"}
# Test for invalid materials
with pytest.raises(ValueError):
LightingRecommendations(input_property0, [])
def test_recommend_no_action_needed(self):
# Case where no recommendation is needed
- input_property1 = Property(id=1, postcode="F4k3 6", address="623 fake street")
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {"county": "Greater London Authority"}
+ input_property1 = Property(id=1, postcode="F4k3 6", address="623 fake street", epc_record=epc_record)
input_property1.lighting = {"low_energy_proportion": 100}
- input_property1.data = {"county": "Greater London Authority"}
lr = LightingRecommendations(input_property1, materials)
lr.recommend()
@@ -28,9 +30,9 @@ class TestLightingRecommendations:
def test_recommend_action_needed(self):
# Case where recommendation is needed
- input_property1 = Property(id=1, postcode="F4k3 6", address="623 fake street")
- input_property1.lighting = {"low_energy_proportion": 100}
- input_property1.data = {"county": "Greater London Authority"}
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {"county": "Greater London Authority"}
+ input_property1 = Property(id=1, postcode="F4k3 6", address="623 fake street", epc_record=epc_record)
input_property1.lighting = {"low_energy_proportion": 0.80}
input_property1.number_lighting_outlets = 20
diff --git a/recommendations/tests/test_roof_recommendations.py b/recommendations/tests/test_roof_recommendations.py
index 75b7ddb2..3d555a4f 100644
--- a/recommendations/tests/test_roof_recommendations.py
+++ b/recommendations/tests/test_roof_recommendations.py
@@ -1,12 +1,17 @@
from backend.Property import Property
from recommendations.RoofRecommendations import RoofRecommendations
from recommendations.tests.test_data.materials import materials
+from etl.epc.Record import EPCRecord
class TestRoofRecommendations:
def test_loft_insulation_recommendation_no_insulation(self):
- property_instance = Property(id=0, address="fake", postcode="fake")
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {
+ "county": "Cambridgeshire",
+ }
+ property_instance = Property(id=0, address="fake", postcode="fake", epc_record=epc_record)
property_instance.age_band = "F"
property_instance.insulation_floor_area = 100
property_instance.roof = {
@@ -18,9 +23,6 @@ class TestRoofRecommendations:
'is_at_rafters': False, 'is_assumed': True, 'has_dwelling_above': False, 'is_valid': True,
'insulation_thickness': 'none', 'roof_thermal_transmittance': None, 'roof_insulation_thickness': 'none'
}
- property_instance.data = {
- "county": "Cambridgeshire",
- }
roof_recommender = RoofRecommendations(property_instance=property_instance, materials=materials)
@@ -31,7 +33,9 @@ class TestRoofRecommendations:
assert len(roof_recommender.recommendations)
def test_loft_insulation_recommendation_50mm_insulation(self):
- property_instance2 = Property(id=0, address="fake", postcode="fake")
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {"county": "Kent"}
+ property_instance2 = Property(id=0, address="fake", postcode="fake", epc_record=epc_record)
property_instance2.age_band = "F"
property_instance2.insulation_floor_area = 100
property_instance2.roof = {
@@ -43,7 +47,6 @@ class TestRoofRecommendations:
'is_at_rafters': False, 'is_assumed': True, 'has_dwelling_above': False, 'is_valid': True,
'insulation_thickness': '50', 'roof_thermal_transmittance': None, 'roof_insulation_thickness': 'none'
}
- property_instance2.data = {"county": "Kent"}
roof_recommender2 = RoofRecommendations(property_instance=property_instance2, materials=materials)
@@ -57,7 +60,9 @@ class TestRoofRecommendations:
assert roof_recommender2.recommendations[0]["new_u_value"] == 0.14
assert roof_recommender2.recommendations[0]["starting_u_value"] == 0.68
- property_instance3 = Property(id=0, address="fake", postcode="fake")
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {"county": "Greater London Authority"}
+ property_instance3 = Property(id=0, address="fake", postcode="fake", epc_record=epc_record)
property_instance3.age_band = "F"
property_instance3.insulation_floor_area = 100
property_instance3.roof = {
@@ -69,7 +74,6 @@ class TestRoofRecommendations:
'is_at_rafters': False, 'is_assumed': True, 'has_dwelling_above': False, 'is_valid': True,
'insulation_thickness': '50', 'roof_thermal_transmittance': None, 'roof_insulation_thickness': 'none'
}
- property_instance3.data = {"county": "Greater London Authority"}
roof_recommender3 = RoofRecommendations(property_instance=property_instance3, materials=materials)
@@ -82,7 +86,9 @@ class TestRoofRecommendations:
assert roof_recommender3.recommendations[0]["parts"][0]["depth"] == 270
def test_loft_insulation_recommendation_150mm_insulation(self):
- property_instance4 = Property(id=0, address="fake", postcode="fake")
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {"county": "North East Lincolnshire"}
+ property_instance4 = Property(id=0, address="fake", postcode="fake", epc_record=epc_record)
property_instance4.age_band = "F"
property_instance4.insulation_floor_area = 100
property_instance4.roof = {
@@ -94,7 +100,6 @@ class TestRoofRecommendations:
'is_at_rafters': False, 'is_assumed': True, 'has_dwelling_above': False, 'is_valid': True,
'insulation_thickness': '150', 'roof_thermal_transmittance': None, 'roof_insulation_thickness': 'none'
}
- property_instance4.data = {"county": "North East Lincolnshire"}
roof_recommender4 = RoofRecommendations(property_instance=property_instance4, materials=materials)
@@ -109,7 +114,9 @@ class TestRoofRecommendations:
assert roof_recommender4.recommendations[0]["starting_u_value"] == 0.3
assert roof_recommender4.recommendations[0]["parts"][0]["depth"] == 150
- property_instance5 = Property(id=0, address="fake", postcode="fake")
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {"county": "Somerset"}
+ property_instance5 = Property(id=0, address="fake", postcode="fake", epc_record=epc_record)
property_instance5.age_band = "F"
property_instance5.insulation_floor_area = 100
property_instance5.roof = {
@@ -121,7 +128,6 @@ class TestRoofRecommendations:
'is_at_rafters': False, 'is_assumed': True, 'has_dwelling_above': False, 'is_valid': True,
'insulation_thickness': '150', 'roof_thermal_transmittance': None, 'roof_insulation_thickness': 'none'
}
- property_instance5.data = {"county": "Somerset"}
roof_recommender5 = RoofRecommendations(property_instance=property_instance5, materials=materials)
@@ -136,7 +142,9 @@ class TestRoofRecommendations:
def test_loft_insulation_recommendation_270mm_insulation(self):
# We shouldn't recommend anything in this case
- property_instance6 = Property(id=0, address="fake", postcode="fake")
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {"county": "Portsmouth"}
+ property_instance6 = Property(id=0, address="fake", postcode="fake", epc_record=epc_record)
property_instance6.age_band = "F"
property_instance6.insulation_floor_area = 100
property_instance6.roof = {
@@ -148,7 +156,6 @@ class TestRoofRecommendations:
'is_at_rafters': False, 'is_assumed': True, 'has_dwelling_above': False, 'is_valid': True,
'insulation_thickness': '270', 'roof_thermal_transmittance': None, 'roof_insulation_thickness': 'none'
}
- property_instance6.data = {"county": "Portsmouth"}
roof_recommender6 = RoofRecommendations(property_instance=property_instance6, materials=materials)
@@ -277,7 +284,9 @@ class TestRoofRecommendations:
# "Insulate your room roof with 270mm of Example room roof insulation"
def test_flat_no_insulation(self):
- property_instance11 = Property(id=11, address="fake", postcode="fake")
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {"county": "Swindon"}
+ property_instance11 = Property(id=11, address="fake", postcode="fake", epc_record=epc_record)
property_instance11.age_band = "D"
property_instance11.insulation_floor_area = 33.5
property_instance11.perimeter = 24
@@ -288,7 +297,6 @@ class TestRoofRecommendations:
'is_roof_room': False, 'is_loft': False, 'is_flat': True, 'is_thatched': False, 'is_at_rafters': False,
'is_assumed': True, 'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': 'none'
}
- property_instance11.data = {"county": "Swindon"}
roof_recommender11 = RoofRecommendations(property_instance=property_instance11, materials=materials)
@@ -306,7 +314,9 @@ class TestRoofRecommendations:
"Insulate the home's flat roof with 150mm of Ecotherm Eco-Versal General Purpose Insulation Board"
def test_flat_insulated(self):
- property_instance12 = Property(id=12, address="fake", postcode="fake")
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {"county": "Thurrock"}
+ property_instance12 = Property(id=12, address="fake", postcode="fake", epc_record=epc_record)
property_instance12.age_band = "D"
property_instance12.insulation_floor_area = 40
property_instance12.perimeter = 30
@@ -319,7 +329,6 @@ class TestRoofRecommendations:
'is_loft': False, 'is_flat': True, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': True,
'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': 'average'
}
- property_instance12.data = {"county": "Thurrock"}
roof_recommender12 = RoofRecommendations(property_instance=property_instance12, materials=materials)
@@ -330,7 +339,9 @@ class TestRoofRecommendations:
assert not roof_recommender12.recommendations
def test_flat_limited_insulation(self):
- property_instance13 = Property(id=12, address="fake", postcode="fake")
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {"county": "Tyne and Wear"}
+ property_instance13 = Property(id=12, address="fake", postcode="fake", epc_record=epc_record)
property_instance13.age_band = "D"
property_instance13.insulation_floor_area = 40
property_instance13.perimeter = 40
@@ -342,7 +353,6 @@ class TestRoofRecommendations:
'is_loft': False, 'is_flat': True, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': True,
'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': 'below average'
}
- property_instance13.data = {"county": "Tyne and Wear"}
roof_recommender13 = RoofRecommendations(property_instance=property_instance13, materials=materials)
@@ -362,7 +372,9 @@ class TestRoofRecommendations:
"Insulate the home's flat roof with 150mm of Ecotherm Eco-Versal General Purpose Insulation Board"
def test_property_above(self):
- property_instance14 = Property(id=0, address="fake", postcode="fake")
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {"county": "Suffolk"}
+ property_instance14 = Property(id=0, address="fake", postcode="fake", epc_record=epc_record)
property_instance14.age_band = "F"
property_instance14.insulation_floor_area = 100
property_instance14.roof = {
@@ -373,7 +385,6 @@ class TestRoofRecommendations:
'is_assumed': False, 'has_dwelling_above': True, 'is_valid': True,
'insulation_thickness': None
}
- property_instance14.data = {"county": "Suffolk"}
roof_recommender14 = RoofRecommendations(property_instance=property_instance14, materials=materials)
diff --git a/recommendations/tests/test_solar_pv_recommendations.py b/recommendations/tests/test_solar_pv_recommendations.py
index f2436cb1..5481cb17 100644
--- a/recommendations/tests/test_solar_pv_recommendations.py
+++ b/recommendations/tests/test_solar_pv_recommendations.py
@@ -1,45 +1,50 @@
import pytest
from recommendations.SolarPvRecommendations import SolarPvRecommendations
from backend.Property import Property
+from etl.epc.Record import EPCRecord
class TestSolarPvRecommendations:
@pytest.fixture
def property_instance_invalid_type(self):
# Setup the property_instance with an invalid property type
- property_instance_invalid_type = Property(id=1, address="", postcode="")
- property_instance_invalid_type.data = {
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {
"property-type": "InvalidType", "county": "Broxbourne", "photo-supply": None
}
+ property_instance_invalid_type = Property(id=1, address="", postcode="", epc_record=epc_record)
property_instance_invalid_type.roof = {"is_flat": False, "is_pitched": False, "is_roof_room": False}
return property_instance_invalid_type
@pytest.fixture
def property_instance_invalid_roof(self):
# Setup the property_instance with invalid roof type
- property_instance_invalid_roof = Property(id=1, address="", postcode="")
- property_instance_invalid_roof.data = {
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {
"county": "Huntingdonshire", "property-type": "House", "photo-supply": None
}
+ property_instance_invalid_roof = Property(id=1, address="", postcode="", epc_record=epc_record)
property_instance_invalid_roof.roof = {"is_flat": False, "is_pitched": False, "is_roof_room": False}
return property_instance_invalid_roof
@pytest.fixture
def property_instance_has_solar_pv(self):
# Setup the property_instance without existing solar pv
- property_instance_has_solar_pv = Property(id=1, address="", postcode="")
- property_instance_has_solar_pv.data = {"photo-supply": "40", "county": "Huntingdonshire",
- "property-type": "House"}
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {"photo-supply": "40", "county": "Huntingdonshire",
+ "property-type": "House"}
+ property_instance_has_solar_pv = Property(id=1, address="", postcode="", epc_record=epc_record)
property_instance_has_solar_pv.roof = {"is_flat": True}
return property_instance_has_solar_pv
@pytest.fixture
def property_instance_valid_all(self):
# Setup a valid property_instance that passes all conditions
- property_instance_valid_all = Property(id=1, address="", postcode="")
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {"property-type": "House", "photo-supply": None, "county": "Huntingdonshire"}
+ property_instance_valid_all = Property(id=1, address="", postcode="", epc_record=epc_record)
property_instance_valid_all.solar_pv_roof_area = 20
property_instance_valid_all.solar_pv_percentage = 40
- property_instance_valid_all.data = {"property-type": "House", "photo-supply": None, "county": "Huntingdonshire"}
property_instance_valid_all.roof = {"is_flat": True}
return property_instance_valid_all
diff --git a/recommendations/tests/test_ventilation_recommendations.py b/recommendations/tests/test_ventilation_recommendations.py
index 3242b1d1..aa992253 100644
--- a/recommendations/tests/test_ventilation_recommendations.py
+++ b/recommendations/tests/test_ventilation_recommendations.py
@@ -1,13 +1,15 @@
from backend.Property import Property
from recommendations.VentilationRecommendations import VentilationRecommendations
from recommendations.tests.test_data.materials import materials
+from etl.epc.Record import EPCRecord
class TestVentilationRecommendations:
def test_natural_ventilation(self):
- input_property1 = Property(id=1, postcode="F4k3 6", address="623 fake street")
- input_property1.data = {"mechanical-ventilation": "natural"}
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {"mechanical-ventilation": "natural"}
+ input_property1 = Property(id=1, postcode="F4k3 6", address="623 fake street", epc_record=epc_record)
recommender = VentilationRecommendations(
property_instance=input_property1,
@@ -27,8 +29,9 @@ class TestVentilationRecommendations:
assert recommender.recommendation[0]["parts"][0]["quantity"] == 2
def test_missing_ventilation(self):
- input_property2 = Property(id=1, postcode="F4k3 6", address="623 fake street")
- input_property2.data = {"mechanical-ventilation": None}
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {"mechanical-ventilation": None}
+ input_property2 = Property(id=1, postcode="F4k3 6", address="623 fake street", epc_record=epc_record)
recommender2 = VentilationRecommendations(
property_instance=input_property2,
@@ -48,8 +51,9 @@ class TestVentilationRecommendations:
assert recommender2.recommendation[0]["parts"][0]["quantity"] == 2
def test_nodata_ventilation(self):
- input_property3 = Property(id=1, postcode="F4k3 6", address="623 fake street")
- input_property3.data = {"mechanical-ventilation": "NO DATA!!"}
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {"mechanical-ventilation": "NO DATA!!"}
+ input_property3 = Property(id=1, postcode="F4k3 6", address="623 fake street", epc_record=epc_record)
recommender3 = VentilationRecommendations(
property_instance=input_property3,
@@ -69,8 +73,9 @@ class TestVentilationRecommendations:
assert recommender3.recommendation[0]["parts"][0]["quantity"] == 2
def test_existing_ventilation_1(self):
- input_property4 = Property(id=1, postcode="F4k3 6", address="623 fake street")
- input_property4.data = {"mechanical-ventilation": 'mechanical, extract only'}
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {"mechanical-ventilation": "mechanical, extract only"}
+ input_property4 = Property(id=1, postcode="F4k3 6", address="623 fake street", epc_record=epc_record)
recommender4 = VentilationRecommendations(
property_instance=input_property4,
@@ -85,8 +90,9 @@ class TestVentilationRecommendations:
assert recommender4.has_ventilaion
def test_existing_ventilation_2(self):
- input_property5 = Property(id=1, postcode="F4k3 6", address="623 fake street")
- input_property5.data = {"mechanical-ventilation": 'mechanical, supply and extract'}
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {"mechanical-ventilation": "mechanical, supply and extract"}
+ input_property5 = Property(id=1, postcode="F4k3 6", address="623 fake street", epc_record=epc_record)
recommender5 = VentilationRecommendations(
property_instance=input_property5,
diff --git a/recommendations/tests/test_wall_recommendations.py b/recommendations/tests/test_wall_recommendations.py
index bfc681f5..580ebb91 100644
--- a/recommendations/tests/test_wall_recommendations.py
+++ b/recommendations/tests/test_wall_recommendations.py
@@ -7,6 +7,7 @@ from recommendations.WallRecommendations import WallRecommendations
from backend.Property import Property
from recommendations.recommendation_utils import is_diminishing_returns
from recommendations.tests.test_data.materials import materials
+from etl.epc.Record import EPCRecord
# with open(
@@ -231,7 +232,9 @@ class TestWallRecommendationsBase:
class TestCavityWallRecommensations:
def test_fill_empty_cavity(self):
- input_property = Property(id=1, postcode="F4k3", address="123 fake street")
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {"county": "Derbyshire"}
+ input_property = Property(id=1, postcode="F4k3", address="123 fake street", epc_record=epc_record)
input_property.walls = {
'original_description': 'Cavity wall, as built, no insulation (assumed)',
'clean_description': 'Cavity wall, as built, no insulation',
@@ -245,7 +248,6 @@ class TestCavityWallRecommensations:
}
input_property.age_band = "C"
input_property.insulation_wall_area = 50
- input_property.data = {"county": "Derbyshire"}
recommender = WallRecommendations(
property_instance=input_property,
@@ -265,7 +267,9 @@ class TestCavityWallRecommensations:
assert np.isclose(recommender.recommendations[1]["total"], 2004.6600000000003)
def test_fill_partial_filled_cavity(self):
- input_property = Property(id=1, postcode="F4k3", address="123 fake street")
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {"county": "County Durham"}
+ input_property = Property(id=1, postcode="F4k3", address="123 fake street", epc_record=epc_record)
input_property.walls = {
'original_description': 'Cavity wall, as built, partial insulation (assumed)',
'clean_description': 'Cavity wall, as built, partial insulation',
@@ -279,7 +283,6 @@ class TestCavityWallRecommensations:
}
input_property.age_band = "C"
input_property.insulation_wall_area = 50
- input_property.data = {"county": "County Durham"}
recommender = WallRecommendations(
property_instance=input_property,
@@ -299,7 +302,9 @@ class TestCavityWallRecommensations:
assert np.isclose(recommender.recommendations[1]["total"], 1999.9350000000002)
def test_system_built_wall(self):
- input_property2 = Property(id=1, postcode="F4k3 2", address="223 fake street")
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {"property-type": "House", "county": "Derbyshire", "built-form": "Detached"}
+ input_property2 = Property(id=1, postcode="F4k3 2", address="223 fake street", epc_record=epc_record)
input_property2.walls = {
'original_description': 'System built, as built, no insulation (assumed)',
'clean_description': 'System built, as built, no insulation',
@@ -314,7 +319,6 @@ class TestCavityWallRecommensations:
input_property2.age_band = "F"
input_property2.insulation_wall_area = 120
input_property2.restricted_measures = False
- input_property2.data = {"property-type": "House", "county": "Derbyshire", "built-form": "Detached"}
assert input_property2.walls["is_system_built"]
@@ -346,7 +350,9 @@ class TestCavityWallRecommensations:
assert recommender2.recommendations[6]["parts"][0]["depth"] == 52.5
def test_timber_frame_wall(self):
- input_property3 = Property(id=1, postcode="F4k3 2", address="223 fake street")
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {"property-type": "House", "county": "Derbyshire", "built-form": "Semi-Detached"}
+ input_property3 = Property(id=1, postcode="F4k3 2", address="223 fake street", epc_record=epc_record)
input_property3.walls = {
'original_description': 'Timber frame, as built, no insulation (assumed)',
'clean_description': 'Timber frame, as built, no insulation',
@@ -361,7 +367,6 @@ class TestCavityWallRecommensations:
input_property3.age_band = "B"
input_property3.insulation_wall_area = 99
input_property3.restricted_measures = False
- input_property3.data = {"property-type": "House", "county": "Derbyshire", "built-form": "Semi-Detached"}
assert input_property3.walls["is_timber_frame"]
@@ -388,7 +393,9 @@ class TestCavityWallRecommensations:
assert recommender3.recommendations[1]["parts"][0]["depth"] == 150.0
def test_granite_or_whinstone_wall(self):
- input_property4 = Property(id=1, postcode="F4k3 2", address="223 fake street")
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {"property-type": "Bungalow", "county": "Derbyshire", "built-form": "Detached"}
+ input_property4 = Property(id=1, postcode="F4k3 2", address="223 fake street", epc_record=epc_record)
input_property4.walls = {
'original_description': 'Granite or whinstone, as built, no insulation (assumed)',
'clean_description': 'Granite or whinstone, as built, no insulation',
@@ -403,7 +410,6 @@ class TestCavityWallRecommensations:
input_property4.age_band = "A"
input_property4.insulation_wall_area = 223
input_property4.restricted_measures = False
- input_property4.data = {"property-type": "Bungalow", "county": "Derbyshire", "built-form": "Detached"}
assert input_property4.walls["is_granite_or_whinstone"]
@@ -430,7 +436,9 @@ class TestCavityWallRecommensations:
assert recommender4.recommendations[1]["parts"][0]["depth"] == 150
def test_cob_wall(self):
- input_property5 = Property(id=1, postcode="F4k3 2", address="223 fake street")
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {"property-type": "Bungalow", "county": "Derbyshire", "built-form": "Detached"}
+ input_property5 = Property(id=1, postcode="F4k3 2", address="223 fake street", epc_record=epc_record)
input_property5.walls = {
'original_description': 'Cob, as built',
'clean_description': 'Cob, as built',
@@ -445,7 +453,6 @@ class TestCavityWallRecommensations:
input_property5.age_band = "E"
input_property5.insulation_wall_area = 77
input_property5.restricted_measures = False
- input_property5.data = {"property-type": "Bungalow", "county": "Derbyshire", "built-form": "Detached"}
assert input_property5.walls["is_cob"]
@@ -472,7 +479,9 @@ class TestCavityWallRecommensations:
assert recommender5.recommendations[3]["parts"][0]["depth"] == 100
def test_sandstone_or_limestone_wall(self):
- input_property6 = Property(id=1, postcode="F4k3 6", address="623 fake street")
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {"property-type": "House", "county": "Derbyshire", "built-form": "Mid-Terrace"}
+ input_property6 = Property(id=1, postcode="F4k3 6", address="623 fake street", epc_record=epc_record)
input_property6.walls = {
'original_description': 'Sandstone or limestone, as built, no insulation (assumed)',
'clean_description': 'Sandstone or limestone, as built, no insulation',
@@ -487,7 +496,6 @@ class TestCavityWallRecommensations:
input_property6.age_band = "F"
input_property6.insulation_wall_area = 350
input_property6.restricted_measures = False
- input_property6.data = {"property-type": "House", "county": "Derbyshire", "built-form": "Mid-Terrace"}
assert input_property6.walls["is_sandstone_or_limestone"]
diff --git a/recommendations/tests/test_window_recommendations.py b/recommendations/tests/test_window_recommendations.py
index 664a1e39..36e70834 100644
--- a/recommendations/tests/test_window_recommendations.py
+++ b/recommendations/tests/test_window_recommendations.py
@@ -1,6 +1,7 @@
from recommendations.WindowsRecommendations import WindowsRecommendations
from backend.Property import Property
from recommendations.tests.test_data.materials import materials
+from etl.epc.Record import EPCRecord
class TestWindowRecommendations:
@@ -10,16 +11,17 @@ class TestWindowRecommendations:
For this property, we expect all windows to be single glazed and should recommend full double glazing
:return:
"""
-
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {
+ "county": "Wychavon",
+ "multi-glaze-proportion": 0,
+ "uprn": 0
+ }
property_1 = Property(
id=1,
postcode='1',
address='1',
- data={
- "county": "Wychavon",
- "multi-glaze-proportion": 0,
- "uprn": 0
- }
+ epc_record=epc_record
)
property_1.windows = {
'original_description': 'Single glazed', 'has_glazing': False, 'glazing_coverage': 'full',
@@ -47,16 +49,17 @@ class TestWindowRecommendations:
double glazing
:return:
"""
-
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {
+ "county": "Wychavon",
+ "multi-glaze-proportion": 33,
+ "uprn": 0
+ }
property_2 = Property(
id=1,
postcode='1',
address='1',
- data={
- "county": "Wychavon",
- "multi-glaze-proportion": 33,
- "uprn": 0
- }
+ epc_record=epc_record
)
property_2.windows = {'original_description': 'Mostly double glazing', 'has_glazing': True,
'glazing_coverage': 'most',
@@ -81,16 +84,17 @@ class TestWindowRecommendations:
This property has full double glazing so we shouldn't recommend anything
:return:
"""
-
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {
+ "county": "Wychavon",
+ "multi-glaze-proportion": 100,
+ "uprn": 0
+ }
property_3 = Property(
id=1,
postcode='1',
address='1',
- data={
- "county": "Wychavon",
- "multi-glaze-proportion": 80,
- "uprn": 0
- }
+ epc_record=epc_record
)
property_3.windows = {'original_description': 'Fully double glazed', 'has_glazing': True,
'glazing_coverage': 'full',
@@ -106,15 +110,17 @@ class TestWindowRecommendations:
assert not recommender3.recommendation
def test_fully_secondary_glazed(self):
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {
+ "county": "Wychavon",
+ "multi-glaze-proportion": 100,
+ "uprn": 0
+ }
property_4 = Property(
id=1,
postcode='1',
address='1',
- data={
- "county": "Wychavon",
- "multi-glaze-proportion": 100,
- "uprn": 0
- }
+ epc_record=epc_record
)
property_4.windows = {'original_description': 'Full secondary glazing', 'has_glazing': True,
'glazing_coverage': 'full',
@@ -130,15 +136,17 @@ class TestWindowRecommendations:
assert not recommender4.recommendation
def test_partial_secondary_glazing(self):
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {
+ "county": "Wychavon",
+ "multi-glaze-proportion": 50,
+ "uprn": 0
+ }
property_5 = Property(
id=1,
postcode='1',
address='1',
- data={
- "county": "Wychavon",
- "multi-glaze-proportion": 50,
- "uprn": 0
- }
+ epc_record=epc_record
)
property_5.windows = {'original_description': 'Partial secondary glazing', 'has_glazing': True,
'glazing_coverage': 'partial',
@@ -160,15 +168,18 @@ class TestWindowRecommendations:
'labour_days': 0.8125, 'is_secondary_glazing': True}]
def test_single_glazed_restricted_measures(self):
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {
+ "county": "Wychavon",
+ "multi-glaze-proportion": 0,
+ "uprn": 0
+ }
+
property_6 = Property(
id=1,
postcode='1',
address='1',
- data={
- "county": "Wychavon",
- "multi-glaze-proportion": 0,
- "uprn": 0
- }
+ epc_record=epc_record
)
property_6.windows = {'original_description': 'Single glazed', 'has_glazing': False, 'glazing_coverage': None,
'glazing_type': 'single',
@@ -195,15 +206,17 @@ class TestWindowRecommendations:
]
def test_full_triple_glazed(self):
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {
+ "county": "Wychavon",
+ "multi-glaze-proportion": 100,
+ "uprn": 0
+ }
property_7 = Property(
id=1,
postcode='1',
address='1',
- data={
- "county": "Wychavon",
- "multi-glaze-proportion": 100,
- "uprn": 0
- }
+ epc_record=epc_record
)
property_7.windows = {'original_description': 'Fully triple glazed', 'has_glazing': True,
'glazing_coverage': 'full',
@@ -222,16 +235,17 @@ class TestWindowRecommendations:
"""
We should just recommend double glazing to the remaining windows, since it's a cheaper option
"""
-
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {
+ "county": "Wychavon",
+ "multi-glaze-proportion": 80,
+ "uprn": 1
+ }
property_8 = Property(
id=1,
postcode='1',
address='1',
- data={
- "county": "Wychavon",
- "multi-glaze-proportion": 80,
- "uprn": 1
- }
+ epc_record=epc_record
)
property_8.windows = {'original_description': 'Mostly triple glazing', 'has_glazing': True,
'glazing_coverage': 'most',
diff --git a/utils/s3.py b/utils/s3.py
index e63b7192..3d6cf038 100644
--- a/utils/s3.py
+++ b/utils/s3.py
@@ -1,3 +1,4 @@
+import pickle
import boto3
from io import BytesIO, StringIO
from botocore.exceptions import NoCredentialsError, PartialCredentialsError
@@ -141,5 +142,56 @@ def save_csv_to_s3(dataframe, bucket_name, file_name):
s3.put_object(Body=csv_buffer.getvalue(), Bucket=bucket_name, Key=file_name)
return True
except Exception as e:
- print(f"An error occurred: {e}")
+ logger.error(f"An error occurred: {e}")
return False
+
+
+def save_pickle_to_s3(data, bucket_name, s3_file_name):
+ """
+ Save an object to an S3 bucket as a pickle file.
+
+ :param data: The data to save
+ :param bucket_name: The name of the S3 bucket
+ :param s3_file_name: The file name to use for the saved data in S3 (should end in .pkl)
+ """
+ # Serialize data to a pickle format
+ try:
+ serialized_data = pickle.dumps(data)
+ except Exception as e:
+ print(f'Failed to serialize data: {str(e)}')
+ return
+
+ # Use save_data_to_s3 function to upload the serialized data to S3
+ save_data_to_s3(serialized_data, bucket_name, s3_file_name)
+
+
+def read_pickle_from_s3(bucket_name, s3_file_name):
+ """
+ Read a pickle file from an S3 bucket and return the data.
+
+ :param bucket_name: The name of the S3 bucket
+ :param s3_file_name: The file name of the pickle file in S3
+ :return: The data read from the pickle file
+ """
+ try:
+ s3 = boto3.client('s3')
+ s3_response = s3.get_object(Bucket=bucket_name, Key=s3_file_name)
+ serialized_data = s3_response['Body'].read()
+ except NoCredentialsError:
+ logger.errpr("Credentials not available.")
+ return None
+ except PartialCredentialsError:
+ logger.errpr("Incomplete credentials provided.")
+ return None
+ except Exception as e:
+ logger.errpr(f'Failed to download data from {bucket_name}/{s3_file_name}: {str(e)}')
+ return None
+
+ # Deserialize data from pickle format
+ try:
+ data = pickle.loads(serialized_data)
+ except Exception as e:
+ logger.errpr(f'Failed to deserialize data: {str(e)}')
+ return None
+
+ return data