diff --git a/.idea/Model.iml b/.idea/Model.iml index 4413bb06..b0f9c00d 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 6f308057..1122b380 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py b/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py new file mode 100644 index 00000000..2ba82e77 --- /dev/null +++ b/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py @@ -0,0 +1,78 @@ +import pandas as pd +from tqdm import tqdm +from utils.s3 import save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet +from utils.logger import setup_logger +from etl.epc.settings import EARLIEST_EPC_DATE + +logger = setup_logger() + + +class AirSourceHeatPumpEfficiency: + + def __init__(self, file_directories, cleaned_lookup): + """ + :param file_directories: A list of directories where files are stored. + :param cleaned_lookup: A dictionary containing cleaned lookup data. + """ + self.file_directories = file_directories + self.cleaned_lookup = cleaned_lookup + + self.results = [] + + def create_dataset(self): + logger.info("Creating solar photo supply dataset") + for dir in tqdm(self.file_directories): + filepath = dir / "certificates.csv" + df = pd.read_csv(filepath, low_memory=False) + df = df[~pd.isnull(df["UPRN"])] + df["UPRN"] = df["UPRN"].astype(int).astype(str) + # Take entries after SAP12 + df["LODGEMENT_DATE"] = pd.to_datetime(df["LODGEMENT_DATE"]) + df = df[df["LODGEMENT_DATE"] > EARLIEST_EPC_DATE] + + df = df[ + ~df["TENURE"].isin( + [ + "unknown", + "Not defined - use in the case of a new dwelling for which the intended tenure in not known. " + "It is not to be used for an existing dwelling" + ] + ) + ] + + # Take entries that contain an air source heat pump + df = df[ + df["MAINHEAT_DESCRIPTION"].str.contains("air source heat pump", case=False, na=False) + ] + # Get the columns we're interested in + df = df[ + [ + "MAINHEAT_DESCRIPTION", + "MAINHEAT_ENERGY_EFF", + "MAINHEATCONT_DESCRIPTION", + "MAINHEATC_ENERGY_EFF", + "MAIN_FUEL", + "HOTWATER_DESCRIPTION", + "HOT_WATER_ENERGY_EFF", + "MAINS_GAS_FLAG" + ] + ] + + counts = df.groupby( + [ + "MAINHEAT_DESCRIPTION", + "MAINHEAT_ENERGY_EFF", + "MAINHEATCONT_DESCRIPTION", + "MAINHEATC_ENERGY_EFF", + "MAIN_FUEL", + "HOTWATER_DESCRIPTION", + "HOT_WATER_ENERGY_EFF", + "MAINS_GAS_FLAG" + ] + ).size().reset_index(name="count") + + # Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA + for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]: + df = df[~pd.isnull(df[col])] + # Take newest LODGEMENT_DATE per UPRN + df = df.sort_values(by="LODGEMENT_DATE", ascending=False).drop_duplicates(subset=["UPRN"]) diff --git a/etl/air_source_heat_pump/app.py b/etl/air_source_heat_pump/app.py new file mode 100644 index 00000000..ac87b34b --- /dev/null +++ b/etl/air_source_heat_pump/app.py @@ -0,0 +1,24 @@ +from pathlib import Path +from backend.app.plan.utils import get_cleaned +from etl.air_source_heat_pump.AirSourceHeatPumpEfficiency import AirSourceHeatPumpEfficiency + +DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates" + + +def app(): + """ + This code reads in the EPC dataset and looks at the efficiency values for heating systems that inclue air source + heat pumps. This dataset is then used to inform the recommendations for the air source heat pump, so we know + how to set the simulation + :return: + """ + + directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()] + cleaned_lookup = get_cleaned() + + ashp_data_client = AirSourceHeatPumpEfficiency( + file_directories=directories, + cleaned_lookup=cleaned_lookup + ) + + ashp_data_client.create_dataset() diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py index 9756e00b..0da8f885 100644 --- a/etl/customers/immo/pilot/asset_list.py +++ b/etl/customers/immo/pilot/asset_list.py @@ -19,6 +19,40 @@ council_tax_bands = [ ] council_tax_bands = pd.DataFrame(council_tax_bands) +# This is information we need to override on the EPC itself, for instance if a new survey has been conducted and +# that has not reached the API +patches = [ + { + 'address': '6 Beech Road', 'postcode': 'DY1 4BP', + 'walls-description': 'Mixed: Filled cavity and external insulated solid brick', + 'walls-energy-eff': 'Good', + 'roof-description': 'Pitched, 12 mm loft insulation', + 'roof-energy-eff': 'Very Poor', + 'windows-description': 'Fully double glazed', + 'windows-energy-eff': 'Good', + 'mainheat-description': 'Room heaters, electric', + 'mainheat-energy-eff': 'Very Poor', + 'mainheatcont-description': 'Appliance thermostats', + 'mainheatc-energy-eff': 'Good', + 'lighting-description': 'Low energy lighting in 25% of fixed outlets', + 'lighting-energy-eff': 'Good', + 'floor-description': 'Mixed: Solid no insulation and suspended no insulation', + 'secondheat-description': 'None', + 'current-energy-efficiency': '32', + } +] + +# This is information that is found as a result of the non-invasives, that mean that certain measures +# have been installed already. To reflect this in the front end, it is included in the recommendation, however +# the cost is removed and instead, a message is presented saying that the measure is already installed. +overrides = [ + { + 'address': '5 Oaklands', + 'postcode': 'B62 0JA', + "overrides": ["windows_glazing"] + } +] + def app(): raw_asset_list = read_excel_from_s3( @@ -41,7 +75,7 @@ def app(): } ) - # Store the data in s3 + # Store the asset list in s3 filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv" save_csv_to_s3( dataframe=asset_list, @@ -49,12 +83,44 @@ def app(): file_name=filename ) + # Store overrides in s3 + overrides_filename = f"{USER_ID}/{PORTFOLIO_ID}/overrides.json" + save_csv_to_s3( + dataframe=pd.DataFrame(overrides), + bucket_name="retrofit-plan-inputs-dev", + file_name=overrides_filename + ) + + # Store patches in s3 + patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json" + save_csv_to_s3( + dataframe=pd.DataFrame(patches), + bucket_name="retrofit-plan-inputs-dev", + file_name=patches_filename + ) + + # EPC C portoflio body = { "portfolio_id": str(PORTFOLIO_ID), "housing_type": "Private", "goal": "Increase EPC", - "goal_value": "A", + "goal_value": "C", "trigger_file_path": filename, + "overrides_file_path": overrides_filename, + "patches_file_path": patches_filename, + "budget": None, + } + print(body) + + # EPC B portoflio + body = { + "portfolio_id": str(PORTFOLIO_ID + 1), + "housing_type": "Private", + "goal": "Increase EPC", + "goal_value": "B", + "trigger_file_path": filename, + "overrides_file_path": overrides_filename, + "patches_file_path": patches_filename, "budget": None, } print(body)