diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
-
+
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
-
+
diff --git a/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py b/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py
new file mode 100644
index 00000000..2ba82e77
--- /dev/null
+++ b/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py
@@ -0,0 +1,78 @@
+import pandas as pd
+from tqdm import tqdm
+from utils.s3 import save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
+from utils.logger import setup_logger
+from etl.epc.settings import EARLIEST_EPC_DATE
+
+logger = setup_logger()
+
+
+class AirSourceHeatPumpEfficiency:
+
+ def __init__(self, file_directories, cleaned_lookup):
+ """
+ :param file_directories: A list of directories where files are stored.
+ :param cleaned_lookup: A dictionary containing cleaned lookup data.
+ """
+ self.file_directories = file_directories
+ self.cleaned_lookup = cleaned_lookup
+
+ self.results = []
+
+ def create_dataset(self):
+ logger.info("Creating solar photo supply dataset")
+ for dir in tqdm(self.file_directories):
+ filepath = dir / "certificates.csv"
+ df = pd.read_csv(filepath, low_memory=False)
+ df = df[~pd.isnull(df["UPRN"])]
+ df["UPRN"] = df["UPRN"].astype(int).astype(str)
+ # Take entries after SAP12
+ df["LODGEMENT_DATE"] = pd.to_datetime(df["LODGEMENT_DATE"])
+ df = df[df["LODGEMENT_DATE"] > EARLIEST_EPC_DATE]
+
+ df = df[
+ ~df["TENURE"].isin(
+ [
+ "unknown",
+ "Not defined - use in the case of a new dwelling for which the intended tenure in not known. "
+ "It is not to be used for an existing dwelling"
+ ]
+ )
+ ]
+
+ # Take entries that contain an air source heat pump
+ df = df[
+ df["MAINHEAT_DESCRIPTION"].str.contains("air source heat pump", case=False, na=False)
+ ]
+ # Get the columns we're interested in
+ df = df[
+ [
+ "MAINHEAT_DESCRIPTION",
+ "MAINHEAT_ENERGY_EFF",
+ "MAINHEATCONT_DESCRIPTION",
+ "MAINHEATC_ENERGY_EFF",
+ "MAIN_FUEL",
+ "HOTWATER_DESCRIPTION",
+ "HOT_WATER_ENERGY_EFF",
+ "MAINS_GAS_FLAG"
+ ]
+ ]
+
+ counts = df.groupby(
+ [
+ "MAINHEAT_DESCRIPTION",
+ "MAINHEAT_ENERGY_EFF",
+ "MAINHEATCONT_DESCRIPTION",
+ "MAINHEATC_ENERGY_EFF",
+ "MAIN_FUEL",
+ "HOTWATER_DESCRIPTION",
+ "HOT_WATER_ENERGY_EFF",
+ "MAINS_GAS_FLAG"
+ ]
+ ).size().reset_index(name="count")
+
+ # Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA
+ for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]:
+ df = df[~pd.isnull(df[col])]
+ # Take newest LODGEMENT_DATE per UPRN
+ df = df.sort_values(by="LODGEMENT_DATE", ascending=False).drop_duplicates(subset=["UPRN"])
diff --git a/etl/air_source_heat_pump/app.py b/etl/air_source_heat_pump/app.py
new file mode 100644
index 00000000..ac87b34b
--- /dev/null
+++ b/etl/air_source_heat_pump/app.py
@@ -0,0 +1,24 @@
+from pathlib import Path
+from backend.app.plan.utils import get_cleaned
+from etl.air_source_heat_pump.AirSourceHeatPumpEfficiency import AirSourceHeatPumpEfficiency
+
+DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
+
+
+def app():
+ """
+ This code reads in the EPC dataset and looks at the efficiency values for heating systems that inclue air source
+ heat pumps. This dataset is then used to inform the recommendations for the air source heat pump, so we know
+ how to set the simulation
+ :return:
+ """
+
+ directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
+ cleaned_lookup = get_cleaned()
+
+ ashp_data_client = AirSourceHeatPumpEfficiency(
+ file_directories=directories,
+ cleaned_lookup=cleaned_lookup
+ )
+
+ ashp_data_client.create_dataset()
diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py
index 9756e00b..0da8f885 100644
--- a/etl/customers/immo/pilot/asset_list.py
+++ b/etl/customers/immo/pilot/asset_list.py
@@ -19,6 +19,40 @@ council_tax_bands = [
]
council_tax_bands = pd.DataFrame(council_tax_bands)
+# This is information we need to override on the EPC itself, for instance if a new survey has been conducted and
+# that has not reached the API
+patches = [
+ {
+ 'address': '6 Beech Road', 'postcode': 'DY1 4BP',
+ 'walls-description': 'Mixed: Filled cavity and external insulated solid brick',
+ 'walls-energy-eff': 'Good',
+ 'roof-description': 'Pitched, 12 mm loft insulation',
+ 'roof-energy-eff': 'Very Poor',
+ 'windows-description': 'Fully double glazed',
+ 'windows-energy-eff': 'Good',
+ 'mainheat-description': 'Room heaters, electric',
+ 'mainheat-energy-eff': 'Very Poor',
+ 'mainheatcont-description': 'Appliance thermostats',
+ 'mainheatc-energy-eff': 'Good',
+ 'lighting-description': 'Low energy lighting in 25% of fixed outlets',
+ 'lighting-energy-eff': 'Good',
+ 'floor-description': 'Mixed: Solid no insulation and suspended no insulation',
+ 'secondheat-description': 'None',
+ 'current-energy-efficiency': '32',
+ }
+]
+
+# This is information that is found as a result of the non-invasives, that mean that certain measures
+# have been installed already. To reflect this in the front end, it is included in the recommendation, however
+# the cost is removed and instead, a message is presented saying that the measure is already installed.
+overrides = [
+ {
+ 'address': '5 Oaklands',
+ 'postcode': 'B62 0JA',
+ "overrides": ["windows_glazing"]
+ }
+]
+
def app():
raw_asset_list = read_excel_from_s3(
@@ -41,7 +75,7 @@ def app():
}
)
- # Store the data in s3
+ # Store the asset list in s3
filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
save_csv_to_s3(
dataframe=asset_list,
@@ -49,12 +83,44 @@ def app():
file_name=filename
)
+ # Store overrides in s3
+ overrides_filename = f"{USER_ID}/{PORTFOLIO_ID}/overrides.json"
+ save_csv_to_s3(
+ dataframe=pd.DataFrame(overrides),
+ bucket_name="retrofit-plan-inputs-dev",
+ file_name=overrides_filename
+ )
+
+ # Store patches in s3
+ patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json"
+ save_csv_to_s3(
+ dataframe=pd.DataFrame(patches),
+ bucket_name="retrofit-plan-inputs-dev",
+ file_name=patches_filename
+ )
+
+ # EPC C portoflio
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increase EPC",
- "goal_value": "A",
+ "goal_value": "C",
"trigger_file_path": filename,
+ "overrides_file_path": overrides_filename,
+ "patches_file_path": patches_filename,
+ "budget": None,
+ }
+ print(body)
+
+ # EPC B portoflio
+ body = {
+ "portfolio_id": str(PORTFOLIO_ID + 1),
+ "housing_type": "Private",
+ "goal": "Increase EPC",
+ "goal_value": "B",
+ "trigger_file_path": filename,
+ "overrides_file_path": overrides_filename,
+ "patches_file_path": patches_filename,
"budget": None,
}
print(body)