diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py new file mode 100644 index 00000000..526c34a0 --- /dev/null +++ b/etl/customers/gla_croydon_demo/asset_list.py @@ -0,0 +1,145 @@ +import pandas as pd +from utils.s3 import save_csv_to_s3 + +USER_ID = 8 +PORTFOLIO_ID = 67 + + +def app(): + """ + We shall define a small portfolio of properties, based in Croydon + :return: + """ + + # Firstly, read in the EPC data for Croydon + epc_data = pd.read_csv( + "local_data/all-domestic-certificates/domestic-E09000008-Croydon/certificates.csv", + low_memory=False + ) + + # Filter on entries where we have a UPRN + epc_data = epc_data[~pd.isnull(epc_data["UPRN"])] + + # Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this + epc_data["LODGEMENT_DATE"] = pd.to_datetime(epc_data["LODGEMENT_DATE"]) + + epc_data = epc_data.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN") + + # Now filter on social properties + epc_data = epc_data[epc_data["TENURE"].isin(["rental (social)", "Rented (social)"])] + # There are 17337 properties with a registered EPC in Croydon + # Take below EPC C properties + epc_data = epc_data[epc_data["CURRENT_ENERGY_EFFICIENCY"].astype(int) < 69] + # 7994 properties are below EPC C (46%) + + # 79% D, 19% E, 1% F, 0.2% G - it probably makes the most sense to focus on E and D properties + epc_data["CURRENT_ENERGY_RATING"].value_counts(normalize=True) + + # For the purpose of the sample, take the properties have surveys done in the last 2 years + # This gives us 1023 remaining properties + two_years_ago = pd.Timestamp.now() - pd.DateOffset(days=int(2.5 * 365)) + epc_data = epc_data[epc_data["LODGEMENT_DATE"] >= two_years_ago] + + # Archetype 1: defined below: + # 1) House + # 2) Unfilled cavity + # 3) A roof that could be insulated (flat or pitched with no more than 50mm insulation) + # 4) EPC E + # Different buckets of properties + archetype_1_sample = epc_data[ + epc_data["PROPERTY_TYPE"].isin(["House"]) & + (epc_data["CURRENT_ENERGY_RATING"] == "E") & + epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) & + epc_data["ROOF_DESCRIPTION"].isin( + [ + "Pitched, 12 mm loft insulation", + "Pitched, 0 mm loft insulation", + "Pitched, no insulation", + "Pitched, 50 mm loft insulation", + "Flat, no insulation (assumed)", + "Pitched, no insulation (assumed)" + ] + ) + ] + archetype_1_sample_asset_list = archetype_1_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy() + archetype_1_sample_asset_list["ARCHETYPE"] = "Archetype 1" + + # Archetype 2: defined below: + # 1) Flat + # 2) Unfilled cavity + # 3) Another property above + # 4) EPC E + archetype_2_sample = epc_data[ + epc_data["PROPERTY_TYPE"].isin(["Flat"]) & + (epc_data["CURRENT_ENERGY_RATING"] == "E") & + epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) & + epc_data["ROOF_DESCRIPTION"].isin( + [ + "(another dwelling above)" + ] + ) + ] + archetype_2_sample_asset_list = archetype_2_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy() + archetype_2_sample_asset_list["ARCHETYPE"] = "Archetype 2" + + # Archetype 3: defined below: + # 1) EPC F + # 2) Solid brick wall + # 3) House + # 4) Pitched roof with no insulation + # Just 1 property (more expensive to retrofit) + archetype_3_sample = epc_data[ + epc_data["PROPERTY_TYPE"].isin(["House"]) & + (epc_data["CURRENT_ENERGY_RATING"] == "F") & + epc_data["ROOF_DESCRIPTION"].isin(["Pitched, no insulation"]) + ] + archetype_3_sample_asset_list = archetype_3_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy() + archetype_3_sample_asset_list["ARCHETYPE"] = "Archetype 3" + + # Archetype 4: defined below: + # 1) Maisonette + # 2) Empty cavity + # 3) EPC E + # 14 properties here + archetype_4_sample = epc_data[ + epc_data["PROPERTY_TYPE"].isin(["Maisonette"]) & + epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) + ] + archetype_4_sample_asset_list = archetype_4_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy() + archetype_4_sample_asset_list["ARCHETYPE"] = "Archetype 4" + + asset_list = pd.concat( + [ + archetype_1_sample_asset_list, + archetype_2_sample_asset_list, + archetype_3_sample_asset_list, + archetype_4_sample_asset_list + ] + ) + + asset_list = asset_list.rename( + columns={ + "UPRN": "uprn", + "ADDRESS1": "address", + "POSTCODE": "postcode", + "ARCHETYPE": "archetype" + } + ) + + filename = f"{USER_ID}/{PORTFOLIO_ID}/inputs.csv" + save_csv_to_s3( + dataframe=asset_list, + bucket_name="retrofit-plan-inputs-dev", + file_name=filename + ) + + body = { + "portfolio_id": str(PORTFOLIO_ID), + "housing_type": "Social", + "goal": "Increase EPC", + "goal_value": "C", + "trigger_file_path": filename, + "budget": None, + "exclusions": ["floor_insulation"] + } + print(body) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index e414cd00..b4b82d0b 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -6692,6 +6692,92 @@ def create_final_report(): revenue.to_csv("HA Analysis Final - revenue.csv") +def identify_eco_works(loader): + # ha_names = [ + # "HA16", # For Housing + # "HA39", # Rooftop + # "HA41", # Settle + # "HA23", # Lambeth + # "HA14", # EMH + # "HA7", # Believe + # "HA102", # Thrive + # ] + + # Unitas, fairhive, acis, LHP + ha_names = [ + "HA50", # Unitas + "HA15", # Fairhive + "HA107", # ACIS + "HA24", # LHP + ] + names = { + "HA50": "Unitas", + "HA15": "Fairhive", + "HA107": "ACIS", + "HA24": "LHP" + } + + # gbis rate + breakdowns = [] + # lists = {} + for ha, data_assets in loader.data.items(): + if ha not in ha_names: + continue + + asset_list = data_assets["asset_list"].copy() + survey_list = data_assets["survey_list"].copy() + # Remove things that have sold + if not survey_list.empty: + asset_list = asset_list.merge( + survey_list[["asset_list_row_id", "installation_status"]], + how="left", + on="asset_list_row_id" + ) + # Anything that has an installation has gone to installation, and therefore is not remaining + asset_list = asset_list[pd.isnull(asset_list["installation_status"])] + asset_list = asset_list.drop(columns=["installation_status"]) + + # Needing a CIGA check + needs_cga = asset_list[ + asset_list["ECO Eligibility"] == "eco4 (subject to ciga)" + ].copy() + + eco4 = asset_list[ + asset_list["ECO Eligibility"] == "eco4" + ].copy() + + eco4_passed_ciga = asset_list[ + asset_list["ECO Eligibility"] == "eco4 - passed ciga" + ].copy() + + # lists[ha] = { + # "needs_cga": needs_cga, + # "eco4": eco4, + # "eco4_passed_ciga": eco4_passed_ciga + # } + + # Store the data + if not needs_cga.empty: + needs_cga.to_csv(f"local_data/{names[ha]} - needs ciga.csv") + + if not eco4.empty: + eco4.to_csv(f"local_data/{names[ha]} - eco4.csv") + + if not eco4_passed_ciga.empty: + eco4_passed_ciga.to_csv(f"local_data/{names[ha]} - eco4 passed ciga.csv") + + summary = { + "HA Name": ha, + "n_needing_ciga": needs_cga.shape[0], + "eco4": eco4.shape[0], + "eco4_passed_ciga": eco4_passed_ciga.shape[0] + } + + breakdowns.append(summary) + breakdowns = pd.DataFrame(breakdowns) + breakdowns = breakdowns.fillna(0) + + def app(): """ This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107. @@ -6739,29 +6825,8 @@ def app(): loader = DataLoader(directories, december_figures_filepath, use_cache, rebuild_inputs) loader.load() loader.ha_facts_and_figures() - forecast_remaining_sales(loader) - # gbis rate - # breakdowns = [] - # for ha, data_assets in loader.data.items(): - # asset_list = data_assets["asset_list"].copy() - # breakdown = asset_list["ECO Eligibility"].value_counts().to_dict() - # breakdowns.append(breakdown) - # breakdowns = pd.DataFrame(breakdowns) - # - # installer = [] - # for ha, data_assets in loader.data.items(): - # survey_list = data_assets["survey_list"] - # if survey_list.empty: - # continue - # if "INSTALLER" not in survey_list.columns: - # continue - # - # installers = survey_list["INSTALLER"].value_counts().to_dict() - # installers["ha_name"] = ha - # installer.append(installers) - # installer = pd.DataFrame(installer) - # installer.drop(columns=["ha_name"]).sum().sum() + forecast_remaining_sales(loader) # Adhoc - for HA16, get the properties that still need a CIGA check asset_list_ha16 = loader.data["HA16"]["asset_list"].copy()