diff --git a/etl/customers/orbit/archetypes.py b/etl/customers/orbit/archetypes.py index 73665bcb..988da74f 100644 --- a/etl/customers/orbit/archetypes.py +++ b/etl/customers/orbit/archetypes.py @@ -21,7 +21,7 @@ def clean_colnames(df): return df -def main(): +def lesney_farms(): """ Some rough and ready analysis to get a view of what the achetypes could be, ahead of a meeting with Wates on the 28th Aug 2024 @@ -150,16 +150,25 @@ def main(): ].drop_duplicates() system_build_data_comparison = system_builds.merge( - epc_data[["Asset Reference", "walls-description", "roof-description", "current-energy-rating"]], + epc_data[ + ["Asset Reference", "walls-description", "roof-description", "current-energy-rating", "lodgement-date", + "current-energy-efficiency"]], left_on='Asset Reference', right_on='Asset Reference', how="left" ) - system_build_data_comparison["PRE CALCULATED EPC"].value_counts() - system_build_data_comparison["current-energy-rating"].value_counts() + # Apply patches + patches = { + 25847: {"Property Type": "Semi Detached House"}, + } - epc_cs_system_builds = system_build_data_comparison[system_build_data_comparison["current-energy-rating"] == "C"] + for asset_ref, patch in patches.items(): + for k, v in patch.items(): + system_build_data_comparison.loc[ + system_build_data_comparison["Asset Reference"] == asset_ref, + k + ] = v archetype_columns = [ ["Asset Type", "Property Type", "Wall Type", "Location"], @@ -194,53 +203,34 @@ def main(): ) counts = archetyped_data["archetype ID"].value_counts() - # Archetype 0: Semi D, Uninsulated system built, Pre calculated EPC D, flat insulated roof, (Lesney-0) + # Archetype 0: Semi D, As built system built, Pre calculated EPC D, flat insulated roof, (Lesney-0) # Archetype 1: Semi D, Externally insulated system built, Pre calculated EPC D, flat insulated roof (Lesney-1) - # Archetype 5: Semi D, System built with unknown insulation, Pre calculated EPC D, flat roof insulated (Lesney-2) + # Archetype 4: Semi D, System built with unknown insulation, Pre calculated EPC D, flat roof insulated (Lesney-2) # Archetype 3: Semi D, Externally insulated system built, Pre calculated EPC D, flat roof uninsulated (assumed) ( # Lesney-3) - # 0 21 - # 1 10 - # 5 10 - # 3 3 - # 2 1 - # 4 1 - # 6 1 - # 7 1 - # 8 1 - # 9 1 - # 10 1 - # 11 1 + # 0 21 + # 1 11 + # 4 11 + # 3 3 + # 2 1 + # 5 1 + # 6 1 + # 7 1 + # 8 1 + # 9 1 # This archetype is the same as 0, apart from the pre calculate EPC being an E. The registry says this is a D # This has been added to additonal units eg1 = archetyped_data[archetyped_data["archetype ID"] == 2] - # This archetype is the same as 3, apart from it having limited flat roof insulation. - # TODO: The insulation status of this property should be confirmed - eg2 = archetyped_data[archetyped_data["archetype ID"] == 4] - eg2["roof-description"] - z = epc_data[epc_data["Asset Reference"] == eg2["Asset Reference"].values[0]] + # Semi D, System built with unknown insulation, Pre calculated EPC D, flat roof insulated + # This looks like it would fit either in archetype + eg2 = archetyped_data[archetyped_data["archetype ID"] == 5] - # This is the one mid-terrace - the EPC data indicates that this is Semi-detached - # Otherwise this is archetype 5 - # this should be semi-detached eg3 = archetyped_data[archetyped_data["archetype ID"] == 6] - eg3_epc_data = epc_data[epc_data["Asset Reference"] == eg3["Asset Reference"].values[0]] - # This warrants its own archetype - # Semi D, System built with unknown insulation, Pre calculated EPC D, flat uninsulated roof - eg4 = archetyped_data[archetyped_data["archetype ID"] == 7] - - # This property stands out due to the mixed cavity and system built wall, but besides that it's similar to - # archetype 0 - # The latest EPC agrees that this is a mixed wall type but the EPC suggests solid and cavity, with an assumed - # insulated cavity, as built - eg5 = archetyped_data[archetyped_data["archetype ID"] == 8] - - # Archetypes 9, 10, 11 are all similar, Semi D, Uninsulated system built, with pitched lofts with up to 200mm + # Archetypes 7, 8, 9 are all similar, Semi D, Uninsulated system built, with pitched lofts with up to 200mm # insulation in the lofts - eg6 = archetyped_data[archetyped_data["archetype ID"] == 9] # It's just the three units # They're all labelled as @@ -266,6 +256,164 @@ def main(): } ] - patches = { - 25847: {"Property Type": "Semi Detached House", "archetype ID": 5}, - } + # These are As Built, System Built + system_built_streets = ( + archetyped_data["Address"].str.split(",").str[0].str.split(" ").str[1].unique() + ) + + all_assets_w_epcs = all_assets.merge(epc_data, on="Asset Reference", how="left") + + # Grab all of the properties on this street that aren't system built + streets_not_system_builds = all_assets_w_epcs[ + all_assets_w_epcs["Address"].str.split(",").str[0].str.split(" ").str[1].isin(system_built_streets) & + ~all_assets_w_epcs["Wall Type"].str.contains("SystemBuilt") + ] + + system_builds = archetyped_data[ + archetyped_data["Wall Type"].str.contains("SystemBuilt") + ][["Asset Reference", "Address", "Wall Type", "walls-description"]].sort_values("Address") + + birling_street_system_builds = system_builds[system_builds["Address"].str.contains("Birling")] + halstead_street_system_builds = system_builds[system_builds["Address"].str.contains("Halstead")] + brasted_street_system_builds = system_builds[system_builds["Address"].str.contains("Brasted")] + frinstead_street_system_builds = system_builds[ + system_builds["Address"].str.contains("Frinstead") | system_builds["Address"].str.contains("Frinsted") + ] + + pd.set_option('display.max_rows', 500) + pd.set_option('display.max_columns', 500) + pd.set_option('display.width', 1000) + streets_not_system_builds[["Asset Reference", "Address", "Wall Type", "walls-description"]] + + system_builds[system_builds["Address"].str.contains("Birling")] + + # Possible System Builds + + # Create the proposed sample + # lesney-0 + archetyped_data["lodgement-date"] = pd.to_datetime(archetyped_data["lodgement-date"]) + + lesney_0 = archetyped_data[archetyped_data["archetype ID"] == 0].copy() + # Get the oldest EPC per postcode + lesney_0 = lesney_0.sort_values(["Address - Postcode", "lodgement-date"]) + lesney_0[["Address", "Address - Postcode", "lodgement-date"]] + + lesney_1 = archetyped_data[archetyped_data["archetype ID"] == 1].copy() + lesney_1 = lesney_1.sort_values(["Address - Postcode", "lodgement-date"]) + lesney_1[["Address", "Address - Postcode", "lodgement-date"]] + + lesney_2 = archetyped_data[archetyped_data["archetype ID"] == 4].copy() + lesney_2 = lesney_2.sort_values(["Address - Postcode", "lodgement-date"]) + lesney_2[["Address", "Address - Postcode", "lodgement-date"]] + + lesney_3 = archetyped_data[archetyped_data["archetype ID"] == 3].copy() + lesney_3 = lesney_3.sort_values(["Address - Postcode", "lodgement-date"]) + lesney_3[["Address", "Address - Postcode", "lodgement-date", "roof-description"]] + + # Get the pitched roof properties, which are lesney-4 + lesney_4 = archetyped_data[archetyped_data["archetype ID"].isin([7, 8, 9])].copy() + lesney_4 = lesney_4.sort_values(["Address - Postcode", "lodgement-date"]) + lesney_4[["Address", "Address - Postcode", "lodgement-date", "roof-description"]] + + assigned_archetypes = archetyped_data[ + ["Asset Reference", "archetype ID", "Address"] + chosen_combination + + ["lodgement-date", "current-energy-rating", "current-energy-efficiency", "walls-description"] + ].copy() + # Map the archetype ID to their string representation + assigned_archetypes["archetype ID"] = assigned_archetypes["archetype ID"].replace( + { + 0: "Lesney-0", + 1: "Lesney-1", + 4: "Lesney-2", + 3: "Lesney-3", + 7: "Lesney-4", + 8: "Lesney-4", + 9: "Lesney-4", + 2: "Lesney-0", + 5: "Lesney-2", + 6: "Lesney-0", + } + ) + + assigned_archetypes["Asset Reference"] = assigned_archetypes["Asset Reference"].astype(int) + + assigned_archetypes.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Orbit - Wates/assigned_archetypes.csv", index=False + ) + + +def culworth_court(): + """ + Some rough works on Cuthwork Court + + They're looking at an ASHP/GSHP + + :return: + """ + + asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Orbit - Wates/001 - EPC CULWORTH COURT.xlsx", + sheet_name="EPC C", + header=1 + ) + asset_list = clean_colnames(asset_list) + + # Let's get the EPC data + # Get the EPC data + epc_data = [] + for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)): + + address = home["Address"] + # Spelling error + if "Frinstead" in address: + address = address.replace("Frinstead", "Frinsted") + + address1 = address.split(",")[0] + + asset_type_map = { + "HOUSE": "House", + "BUNGALOWS": "Bungalow", + "FLATS": "Flat", + "MAISONETTES": "Maisonette", + } + + searcher = SearchEpc( + address1=address1, + postcode=home["Address - Postcode"], + auth_token=EPC_AUTH_TOKEN, + os_api_key="", + full_address=address, + ) + searcher.ordnance_survey_client.property_type = asset_type_map[home["Asset Type"]] + searcher.ordnance_survey_client.built_form = None + + searcher.find_property(skip_os=True) + if searcher.newest_epc is None: + raise Exception("Couldn't find") + + epc_data.append( + { + "Asset Reference": home["Asset Reference"], + **searcher.newest_epc.copy() + } + ) + epc_data = pd.DataFrame(epc_data) + + asset_list = asset_list.merge(epc_data, on="Asset Reference", how="left") + asset_list["floor-level"] = np.where( + asset_list["floor-level"] == "NODATA!", + "", + asset_list["floor-level"] + ) + + asset_list["built-form"] = np.where( + asset_list["built-form"] == "Enclosed End-Terrace", + "End-Terrace", + asset_list["built-form"] + ) + + archetype_combinations = asset_list[ + ["Asset Type", "Property Type", "built-form", "floor-level"] + ].drop_duplicates() + + z = asset_list[asset_list["built-form"] == "Enclosed End-Terrace"] diff --git a/recommendations/tests/test_data/heating_recommendations_data.py b/recommendations/tests/test_data/heating_recommendations_data.py index 0656e917..8bc43efb 100644 --- a/recommendations/tests/test_data/heating_recommendations_data.py +++ b/recommendations/tests/test_data/heating_recommendations_data.py @@ -55,11 +55,17 @@ testing_examples = [ 'fixed-lighting-outlets-count': 10.0, 'low-energy-fixed-light-count': 7.0, 'uprn': 100110195416.0, 'uprn-source': 'Address Matched' }, - "kwh": { - - }, - "recommendation_descripptions": [ - - ] + "heating_recommendation_descriptions": [ + "Install an air source heat pump, and upgrade heating controls to Smart Thermostats, room sensors and " + "smart radiator valves (time & temperature zone control). The cost includes the £7500 boiler upgrade " + "scheme grant", + ], + "heating_controls_recommendation_descriptions": [ + "Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & " + "temperature zone control)" + ], + "notes": "This property has a boiler, radiators & mains gas with good efficiency so the only recommendation" + "we expect here is for an air source heat pump. The heating controls are a programmer, room thermostat" + "and TRVs and so we should expect a TTZC recommendation" } ] diff --git a/recommendations/tests/test_heating_recommendations.py b/recommendations/tests/test_heating_recommendations.py index 76927702..35373729 100644 --- a/recommendations/tests/test_heating_recommendations.py +++ b/recommendations/tests/test_heating_recommendations.py @@ -1,3 +1,4 @@ +from datetime import datetime import pandas as pd import msgpack from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3 @@ -29,7 +30,18 @@ class TestHeatingRecommendations: @pytest.fixture def kwh_client(self): - return KwhData(bucket="retrofit-data-dev", read_consumption_data=True) + client = KwhData(bucket="retrofit-data-dev", read_consumption_data=False) + # We fix this pricing table for these tests + client.retail_price_comparison = pd.DataFrame( + [ + { + "Date": datetime.today().strftime("%Y-%m-%d"), + 'Average standard variable tariff (Large legacy suppliers)': 1 + } + ] + ) + client.retail_price_comparison["Date"] = pd.to_datetime(client.retail_price_comparison["Date"]) + return client @pytest.mark.parametrize( "test_case", @@ -60,8 +72,21 @@ class TestHeatingRecommendations: "energy_assessment_is_newer": False } ) - # TODO: Implement me - kwh_predictions = test_case["kwhs"] + + # For these tests, this can be fixed + kwh_predictions = { + "heating_kwh_predictions": pd.DataFrame( + [ + {"id": p.uprn, "predictions": 12000} + ] + ), + "hotwater_kwh_predictions": pd.DataFrame( + [ + {"id": p.uprn, "predictions": 3000} + ] + ), + } + p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=kwh_predictions) recommender = HeatingRecommender(property_instance=p) @@ -71,4 +96,19 @@ class TestHeatingRecommendations: recommender.recommend(has_cavity_or_loft_recommendations=False) - # TODO: We check results against expected behaviour + assert len(recommender.heating_recommendations) == len(test_case["heating_recommendation_descriptions"]) + assert ( + len(recommender.heating_control_recommendations) == + len(test_case["heating_controls_recommendation_descriptions"]) + ) + + # Check the exact descriptions + assert ( + {x["description"] for x in recommender.heating_recommendations} == + set(test_case["heating_recommendation_descriptions"]) + ) + + assert ( + {x["description"] for x in recommender.heating_control_recommendations} == + set(test_case["heating_controls_recommendation_descriptions"]) + )