diff --git a/.idea/Model.iml b/.idea/Model.iml index 4413bb06..b0f9c00d 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 6f308057..1122b380 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 44178792..db9ec4ff 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -193,33 +193,32 @@ class SearchEpc: @classmethod def get_house_number(cls, address: str) -> str | None: """ - This method will use the usaddress library to parse an address and extract the house number - :return: + This method uses the usaddress library to parse an address and extract the primary house or flat number. """ + try: - parsed = usaddress.parse(address) - parsed_house_number = [x for x in parsed if (x[1] == "AddressNumber")] - parsed_house_number = parsed_house_number[0][0] if parsed_house_number else None - - if parsed_house_number is None: - # Because usaddress isn't optimal for parsing addresses with some prefixes such as 'Flat', - # we also add a custom approach - - # Pattern to look for 'Flat' or 'Apartment' followed by a number, or just a number at the beginning + # Custom regex to catch a broad range of cases pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)' - match = re.search(pattern, address) - if match: - # Return the first non-None group found return next(g for g in match.groups() if g is not None) - else: - return None - # Remove training commas - parsed_house_number = parsed_house_number.replace(",", "") + parsed = usaddress.parse(address) + # First, try to get the 'OccupancyIdentifier' if 'OccupancyType' is detected + for part, type_ in parsed: + if type_ == 'OccupancyIdentifier': + return part # This assumes the first 'OccupancyIdentifier' after 'OccupancyType' is the primary + # number - return parsed_house_number + # Fallback to 'AddressNumber' if no 'OccupancyIdentifier' is found + address_number = next((part for part, type_ in parsed if type_ == 'AddressNumber'), None) + if address_number: + return address_number.replace(",", "") # Remove any trailing commas + + except Exception as e: + print(f"Error parsing address: {e}") + + return None @staticmethod def extract_numeric_housenumber_part(house_number: str | None) -> int | None: diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index ebaf482d..06d1aadf 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -282,16 +282,16 @@ async def trigger_plan(body: PlanTriggerRequest): property_id, is_new = create_property( session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn ) - # if not is_new: - # continue - # - # create_property_targets( - # session, - # property_id=property_id, - # portfolio_id=body.portfolio_id, - # epc_target=body.goal_value, - # heat_demand_target=None - # ) + if not is_new: + continue + + create_property_targets( + session, + property_id=property_id, + portfolio_id=body.portfolio_id, + epc_target=body.goal_value, + heat_demand_target=None + ) epc_records = { 'original_epc': epc_searcher.newest_epc.copy(), diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py index 39ea5a98..5c781979 100644 --- a/backend/ml_models/Valuation.py +++ b/backend/ml_models/Valuation.py @@ -71,6 +71,14 @@ class PropertyValuation: 90013980: 148_000, # Based on Zoopla 90087154: 184_000, # Based on Zoopla 90046817: 167_000, # Based on Zoopla + # Goldman Sachs Pilot for inrto - search by going to https://www.zoopla.co.uk/property/uprn/{uprn}/ + 100070358888: 153_000, # Based on Zoopla + 10090436544: 282_000, # Based on Zoopla + 100070365751: 177_000, # Based on Zoopla + 10095952767: 168_000, # Based on Zoopla + 100070520130: 177_000, # Based on Zoopla + 100070333957: 185_000, # Based on Zoopla + 100070543258: 211_000, # Based on Zoopla } # We base our valuation uplifts on a number of sources @@ -108,6 +116,29 @@ class PropertyValuation: # {"start": "D", "end": "A", "increase_percentage": 0.017}, ] + # Found here: https://www.rightmove.co.uk/news/articles/property-news/green-premium-epc-ratings/ + # F -> C is + 15% + # E -> C is +7% + # D -> C is +3% + RIGHTMOVE_MAPPING = [ + {"start": "G", "end": "C", "increase_percentage": 0.15}, + {"start": "G", "end": "B", "increase_percentage": 0.15}, + {"start": "G", "end": "A", "increase_percentage": 0.15}, + + {"start": "F", "end": "C", "increase_percentage": 0.15}, + {"start": "F", "end": "B", "increase_percentage": 0.15}, + {"start": "F", "end": "A", "increase_percentage": 0.15}, + + {"start": "E", "end": "C", "increase_percentage": 0.07}, + {"start": "E", "end": "B", "increase_percentage": 0.07}, + {"start": "E", "end": "A", "increase_percentage": 0.07}, + + {"start": "D", "end": "C", "increase_percentage": 0.03}, + {"start": "D", "end": "B", "increase_percentage": 0.03}, + {"start": "D", "end": "A", "increase_percentage": 0.03}, + + ] + EPC_BANDS = ["G", "F", "E", "D", "C", "B", "A"] @classmethod @@ -159,14 +190,18 @@ class PropertyValuation: msm_increase, lloyds_increase = cls.get_increase(epc_band_range) - # We now use the knight frank and nationwide data to get further valuation evidence, if we have it + # We now use the knight frank, nationwide and Rightmove data to get further valuation evidence, if we have it kf_increase = [x for x in cls.KNIGHT_FRANK_MAPPING if x["start"] == current_epc and x["end"] == target_epc] nw_increase = [x for x in cls.NATIONWIDE_MAPPING if x["start"] == current_epc and x["end"] == target_epc] + rm_increase = [x for x in cls.RIGHTMOVE_MAPPING if x["start"] == current_epc and x["end"] == target_epc] kf_increase = kf_increase[0]["increase_percentage"] if kf_increase else None nw_increase = nw_increase[0]["increase_percentage"] if nw_increase else None + rm_increase = rm_increase[0]["increase_percentage"] if rm_increase else None - all_increases = [x for x in [msm_increase, lloyds_increase, kf_increase, nw_increase] if x is not None] + all_increases = [ + x for x in [msm_increase, lloyds_increase, kf_increase, nw_increase, rm_increase] if x is not None + ] max_increase = max(all_increases) min_increase = min(all_increases) diff --git a/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py b/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py index 2ba82e77..044cc830 100644 --- a/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py +++ b/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py @@ -21,6 +21,8 @@ class AirSourceHeatPumpEfficiency: def create_dataset(self): logger.info("Creating solar photo supply dataset") + + all_counts = [] for dir in tqdm(self.file_directories): filepath = dir / "certificates.csv" df = pd.read_csv(filepath, low_memory=False) @@ -44,9 +46,15 @@ class AirSourceHeatPumpEfficiency: df = df[ df["MAINHEAT_DESCRIPTION"].str.contains("air source heat pump", case=False, na=False) ] + + # Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA + for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]: + df = df[~pd.isnull(df[col])] # Get the columns we're interested in df = df[ [ + "PROPERTY_TYPE", + "BUILT_FORM", "MAINHEAT_DESCRIPTION", "MAINHEAT_ENERGY_EFF", "MAINHEATCONT_DESCRIPTION", @@ -60,6 +68,8 @@ class AirSourceHeatPumpEfficiency: counts = df.groupby( [ + "PROPERTY_TYPE", + "BUILT_FORM", "MAINHEAT_DESCRIPTION", "MAINHEAT_ENERGY_EFF", "MAINHEATCONT_DESCRIPTION", @@ -71,8 +81,34 @@ class AirSourceHeatPumpEfficiency: ] ).size().reset_index(name="count") - # Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA - for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]: - df = df[~pd.isnull(df[col])] - # Take newest LODGEMENT_DATE per UPRN - df = df.sort_values(by="LODGEMENT_DATE", ascending=False).drop_duplicates(subset=["UPRN"]) + all_counts.append(counts) + + all_counts = pd.concat(all_counts) + + all_counts_agg = all_counts.groupby( + [ + "PROPERTY_TYPE", + "BUILT_FORM", + "MAINHEAT_DESCRIPTION", + "MAINHEAT_ENERGY_EFF", + "MAINHEATCONT_DESCRIPTION", + "MAINHEATC_ENERGY_EFF", + "MAIN_FUEL", + "HOTWATER_DESCRIPTION", + "HOT_WATER_ENERGY_EFF", + "MAINS_GAS_FLAG" + ] + )["count"].sum().reset_index() + + all_counts_agg.groupby("PROPERTY_TYPE")["count"].sum() + # In houses, 68% of the cases where we see air source heat pumps are in detached and semi-detached houses + all_counts_agg[all_counts_agg["PROPERTY_TYPE"] == "House"]["BUILT_FORM"].value_counts(normalize=True) + + all_counts_agg[all_counts_agg["PROPERTY_TYPE"] == "Flat"]["BUILT_FORM"].value_counts() + + # In Bungalows, 74% of cases where we see air source heat pumps are in detached and semi-detached houses + all_counts_agg[all_counts_agg["PROPERTY_TYPE"] == "Bungalow"]["BUILT_FORM"].value_counts(normalize=True) + + # TODO: Research options for mid and end-terrace houses + # TODO: Research the options for flats - we see them appear in flats, but practically speaking, how does the + # install process work? diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py index 1655979b..52e9422c 100644 --- a/etl/customers/gla_croydon_demo/asset_list.py +++ b/etl/customers/gla_croydon_demo/asset_list.py @@ -34,10 +34,6 @@ def app(): low_memory=False ) - z = epc_data[epc_data["MAINHEAT_DESCRIPTION"] == "Boiler and radiators, mains gas"] - z["HOTWATER_DESCRIPTION"].value_counts() - z["MAIN_FUEL"].value_counts() - # Filter on entries where we have a UPRN epc_data = epc_data[~pd.isnull(epc_data["UPRN"])] diff --git a/etl/customers/goldman/asset_list.py b/etl/customers/goldman/asset_list.py new file mode 100644 index 00000000..afe3c64c --- /dev/null +++ b/etl/customers/goldman/asset_list.py @@ -0,0 +1,63 @@ +import pandas as pd +from utils.s3 import read_excel_from_s3 +from utils.s3 import save_csv_to_s3 + +PORTFOLIO_ID = 75 +USER_ID = 8 + + +def app(): + asset_list = [ + { + "address": "19 Emily Gardens", + "postcode": "B16 0ED", + }, + { + "address": "Flat 6 41 Bradford Street", + "postcode": "B5 6HX", + }, + { + "address": "197 FIELD LANE", + "postcode": "B32 4HL", + }, + { + "address": "FLAT 4 108 SUMMER ROAD", + "postcode": "B23 6DY", + }, + { + "address": "1, St. Benedicts Road", + "postcode": "B10 9DP", + }, + { + "address": "29 COOKSEY LANE", + "postcode": "B44 9QL", + }, + { + "address": "40 TRITTIFORD ROAD", + "postcode": "B13 0HG", + } + ] + + asset_list = pd.DataFrame(asset_list) + + # Store the asset list in s3 + filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv" + save_csv_to_s3( + dataframe=asset_list, + bucket_name="retrofit-plan-inputs-dev", + file_name=filename + ) + + # EPC C portoflio + body = { + "portfolio_id": str(PORTFOLIO_ID), + "housing_type": "Private", + "goal": "Increase EPC", + "goal_value": "B", + "trigger_file_path": filename, + "already_installed_file_path": "", + "patches_file_path": "", + "non_invasive_recommendations_file_path": "", + "budget": None, + } + print(body) diff --git a/etl/customers/goldman/epc_f_g_properties.py b/etl/customers/goldman/epc_f_g_properties.py new file mode 100644 index 00000000..28197126 --- /dev/null +++ b/etl/customers/goldman/epc_f_g_properties.py @@ -0,0 +1,25 @@ +import pandas as pd + + +def app(): + """ + Pulling the list of EPC G & F properties in Birmingham for Goldman Sachs + """ + epc_data = pd.read_csv( + "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv", + low_memory=False + ) + + epc_data = epc_data[~pd.isnull(epc_data["UPRN"])] + epc_data["UPRN"] = epc_data["UPRN"].astype(int).astype(str) + + # Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this + epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], format='mixed') + + epc_data = epc_data.sort_values("LODGEMENT_DATETIME", ascending=False).drop_duplicates("UPRN") + + # Get G & F properties + epc_data = epc_data[epc_data["CURRENT_ENERGY_RATING"].isin(["G", "F"])] + + # Save as an excel + epc_data.to_excel("Birmingham EPC F & G Properties.xlsx", index=False) diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py new file mode 100644 index 00000000..24922f68 --- /dev/null +++ b/etl/customers/goldman/property_ownership.py @@ -0,0 +1,407 @@ +import re +import pandas as pd +from tqdm import tqdm +import Levenshtein +from backend.SearchEpc import SearchEpc + +# Average value of a property in the midlands in 2024 was £238,000. Since these are EPC F & G properties, we assume +# £207,000 since they trade at a discount. This is based on the rightmove study where moving from an EPC F/G -> C has a +# +15% impact on valuation and D -> C has a +3% impact on valuation. +# The mode EPC rating is D, so we associate the £238k valuation with an EPC D property +# Therefore value_of_F * 1.15 = value_of_D * 1.03 +# Therefore value_of_F = value_of_D * 1.03/1.15 = 238k * (1.03/1.15) = 213165 +PROPERTY_VALUE_ESTIMATE = 213_165 + + +def aggregate_matches(matching_lookup, company_ownership, properties): + df = matching_lookup.merge( + company_ownership, how="left", on="Title Number" + ).merge( + properties[["UPRN", "LOCAL_AUTHORITY_LABEL"]], how="left", on="UPRN" + ) + counts = ( + df.groupby(["Company Registration No. (1)", "Proprietor Name (1)", "LOCAL_AUTHORITY_LABEL"])["UPRN"] + .count() + .reset_index(name="number_of_properties") + ) + counts = counts.sort_values("number_of_properties", ascending=False) + + pivot_counts = counts.pivot_table( + index=["Company Registration No. (1)", "Proprietor Name (1)"], # Rows: companies and proprietors + columns="LOCAL_AUTHORITY_LABEL", # Columns: each local authority + values="number_of_properties", # The counts of properties + fill_value=0 # Fill missing values with 0 (where there are no properties owned) + ).reset_index() + + total_counts = ( + df.groupby(["Company Registration No. (1)", "Proprietor Name (1)"])["UPRN"] + .count() + .reset_index(name="total_number_of_properties") + ) + + pivot_counts = pivot_counts.merge( + total_counts, how="left", on=["Company Registration No. (1)", "Proprietor Name (1)"] + ) + + pivot_counts = pivot_counts.sort_values("total_number_of_properties", ascending=False) + + pivot_counts["approx_value"] = PROPERTY_VALUE_ESTIMATE * pivot_counts["total_number_of_properties"] + pivot_counts["cumulative_value"] = pivot_counts["approx_value"].cumsum() + + return pivot_counts + + +def find_f_g_properties(paths): + data = [] + for path in tqdm(paths): + epc_data = pd.read_csv(path, low_memory=False) + + epc_data = epc_data[~pd.isnull(epc_data["UPRN"])] + epc_data["UPRN"] = epc_data["UPRN"].astype(int).astype(str) + + # Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this + epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], format='mixed') + + epc_data = epc_data.sort_values("LODGEMENT_DATETIME", ascending=False).drop_duplicates("UPRN") + + # Get G & F properties + epc_data = epc_data[epc_data["CURRENT_ENERGY_RATING"].isin(["G", "F"])] + data.append(epc_data) + + data = pd.concat(data) + + # Save as an excel + data.to_excel("EPC F & G Properties.xlsx", index=False) + + +def remove_text_in_brackets(address: str) -> str: + """ + Removes any text within parentheses, including the parentheses themselves. + + Parameters: + - address (str): The address string to clean. + + Returns: + - str: The cleaned address with text in parentheses removed. + """ + # Regex to find and remove content in parentheses + cleaned_address = re.sub(r'\s*\([^)]*\)', '', address) + return cleaned_address + + +def extract_numeric_part(house_number: str) -> str: + """ + Extracts only the numeric part from a house number that may contain letters. + + Parameters: + - house_number (str): The house number string possibly containing letters. + + Returns: + - str: The numeric part of the house number. + """ + # Use regular expression to replace all non-digit characters with nothing + numeric_part = re.sub(r'\D', '', house_number) + return numeric_part + + +def levenstein_match(matching_string, df, address_col): + match_to = df[address_col].tolist() + # Strip out punctuation and spaces + match_to = [re.sub(r'[^\w\s]', '', x) for x in match_to] + match_to = [x.replace(" ", "") for x in match_to] + + # Perform matching between full key and match_to + distances = [Levenshtein.distance(matching_string, s) for s in match_to] + best_match_index = distances.index(min(distances)) + # We might want to consider a threshold for the distance, however for the momeny, + # we don't consider this for the moment + df = df.iloc[best_match_index:best_match_index + 1] + + return df + + +def extract_range_from_house_number(house_number_range: str): + """ + Detects if the house number includes a numeric range (formatted as 'x-y') and extracts all values within this range. + Non-numeric strings containing hyphens are ignored. + + Parameters: + - house_number_range (str): The house number string that might contain a range. + + Returns: + - list of str: A list of all numbers within the range if it is a range; otherwise, returns None. + """ + + if not house_number_range: + return None + + if '-' in house_number_range: + parts = house_number_range.split('-') + if len(parts) == 2 and parts[0].isdigit() and parts[1].isdigit(): + # Both parts are numeric, so it's a valid range + start, end = map(int, parts) # Convert parts to integers + return [str(x) for x in range(start, end + 1)] + else: + # Not a valid numeric range + return None + else: + # No hyphen present or not a range + return None + + +def is_in_range(row, house_no): + """ Check if the house number is within the range provided in the row. """ + if row and any(house_no == num for num in row): + return True + return False + + +def remove_duplicate_matches(matching_lookup, properties, company_ownership): + duplicated_titles = matching_lookup[matching_lookup["Title Number"].duplicated()]["Title Number"].unique() + + to_drop = [] + for dupe_title in duplicated_titles: + dupe_data = matching_lookup[matching_lookup["Title Number"] == dupe_title].copy() + matched_addresses = dupe_data.merge( + properties[["UPRN", "ADDRESS"]].rename(columns={"ADDRESS": "epc_address"}), + how="left", on="UPRN" + ).merge( + company_ownership[["Title Number", "Property Address"]], + how="left", on="Title Number" + ) + # We perform levenstein to get the best match + best_match = levenstein_match( + matching_string=matched_addresses["Property Address"].values[0], + df=matched_addresses, + address_col="epc_address" + ) + matches_to_drop = matched_addresses[ + ~matched_addresses["UPRN"].isin(best_match["UPRN"].values) + ] + + to_drop.append( + matches_to_drop[["UPRN", "Title Number"]].copy() + ) + + to_drop = pd.concat(to_drop) + + if not to_drop.empty: + merged = pd.merge(matching_lookup, to_drop, on=['UPRN', 'Title Number'], how='left', indicator=True) + merged[merged['_merge'] == 'left_only'].drop(columns=['_merge']) + + return merged + + return matching_lookup + + +def app(): + """ + This script is for scoping property ownership for EPC F & G rated properties in Birmingam, for Goldman Sachs + """ + # paths = [ + # "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E08000031-Wolverhampton/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E08000026-Coventry/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E06000016-Leicester/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E06000015-Derby/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E06000021-Stoke-on-Trent/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E06000018-Nottingham/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E07000154-Northampton/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E06000061-North-Northamptonshire/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E06000062-West-Northamptonshire/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E07000152-East-Northamptonshire/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E07000155-South-Northamptonshire/certificates.csv", + # # + # "local_data/all-domestic-certificates/domestic-E08000027-Dudley/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E08000029-Solihull/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E07000234-Bromsgrove/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E08000030-Walsall/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E08000028-Sandwell/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E06000019-Herefordshire-County-of/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E06000020-Telford-and-Wrekin/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E07000218-North-Warwickshire/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E07000222-Warwick/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E07000237-Worcester/certificates.csv", + # # East midlands + # "local_data/all-domestic-certificates/domestic-E07000035-Derbyshire-Dales/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E07000038-North-East-Derbyshire/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E07000039-South-Derbyshire/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E06000012-North-East-Lincolnshire/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E06000013-North-Lincolnshire/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E07000138-Lincoln/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E07000134-North-West-Leicestershire/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E06000017-Rutland/certificates.csv", + # ] + # paths = list(set(paths)) + # find_f_g_properties(paths) + + properties = pd.read_excel("EPC F & G Properties.xlsx") + company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_04.csv") + company_ownership["is_overseas"] = False + overseas_company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/OCOD_FULL_2024_04 2.csv") + overseas_company_ownership["is_overseas"] = True + + company_ownership = pd.concat([company_ownership, overseas_company_ownership]) + + # FIlter on relevant postcodes + company_ownership = company_ownership[ + company_ownership["Postcode"].str.lower().isin(properties["POSTCODE"].str.lower().unique())] + + # Now we filter properties the other way around + properties = properties[properties["POSTCODE"].str.lower().isin(company_ownership["Postcode"].str.lower().unique())] + # We end up with 7.4k entires on a postcode match, however we need to now do a direct address match + # Take just private rentals + properties = properties[ + properties["TENURE"].isin(["rental (private)", "Rented (private)", "owner-occupied", "Owner-occupied"]) + ] + + # Remove entries where the address begins with the term "land adjoining", or other records that don't reference the + # the property itself + starting_terms = [ + "land adjoining", "land on the", "land to the rear of", "land and buildings on the", + "garage adjoining", "car park adjoining", "the land adjoining", "land and buildings adjoining", + "all royal mines" + ] + for starting_term in starting_terms: + company_ownership = company_ownership[ + ~company_ownership["Property Address"].str.lower().str.startswith(starting_term) + ] + + freehold_matching_lookup = [] # 634 + leasehold_matching_lookup = [] # 86 + shared_leasehold_match = [] + shared_freehold_match = [] + for _, address in tqdm(properties.iterrows(), total=len(properties)): + match_type = "exact" + filtered = company_ownership[ + company_ownership["Postcode"].str.lower() == address["POSTCODE"].lower() + ].copy() + + # Remove postcode and remove trailing commas + filtered["house_number"] = ( + filtered["Property Address"] + .apply(remove_text_in_brackets) + .apply(SearchEpc.get_house_number) + .str.lower() + .str.replace(",", "") + ) + house_no = SearchEpc.get_house_number(address["ADDRESS1"]) + if house_no is not None: + house_no = house_no.replace(",", "") + + if house_no is None: + # It's hard for us to get a reliable match + # filtered = filtered[filtered["Property Address"].str.contains(address["ADDRESS1"])] + # if filtered.shape[0] > 1: + # raise Exception("No valid - maybe we should do levenstein?") + continue + + else: + + if house_no not in filtered["house_number"].values: + # If this happens, we check house_number for a x-y range of addresses + filtered["house_number_range"] = filtered["house_number"].apply(extract_range_from_house_number) + # If we have found a house number range, we check if the house number is in the range and if not, + # we drop the row + filtered['is_in_range'] = filtered['house_number_range'].apply(lambda x: is_in_range(x, house_no)) + + if filtered['is_in_range'].any(): + # If house_no is found in any range, keep only rows where it is in range + filtered = filtered[filtered['is_in_range']] + else: + # If house_no is not found in any range, filter out rows where 'house_number_range' is not None + filtered = filtered[filtered['house_number_range'].isnull()] + + # Strip out letters from house_no and house_number + house_no = extract_numeric_part(house_no) + filtered["house_number"] = filtered["house_number"].astype(str).apply(extract_numeric_part) + match_type = "approximate" + + filtered = filtered[filtered["house_number"] == house_no] + + if filtered.empty: + continue + + filtered_freehold = filtered[filtered["Tenure"] == "Freehold"] + filtered_leasehold = filtered[filtered["Tenure"] == "Leasehold"] + + if filtered_freehold.shape[0] > 1: + matched = filtered_leasehold[["Title Number"]].copy() + matched.insert(0, "UPRN", address["UPRN"]) + shared_freehold_match.append(matched) + elif not filtered_freehold.empty: + freehold_matching_lookup.append( + { + "UPRN": address["UPRN"], + "Title Number": filtered_freehold["Title Number"].values[0], + "match_type": match_type, + } + ) + + if filtered_leasehold.shape[0] > 1: + matched = filtered_leasehold[["Title Number"]].copy() + matched.insert(0, "UPRN", address["UPRN"]) + shared_leasehold_match.append(matched) + elif not filtered_leasehold.empty: + leasehold_matching_lookup.append( + { + "UPRN": address["UPRN"], + "Title Number": filtered_leasehold["Title Number"].values[0], + "match_type": match_type, + } + ) + + freehold_matching_lookup = pd.DataFrame(freehold_matching_lookup) + leasehold_matching_lookup = pd.DataFrame(leasehold_matching_lookup) + shared_leasehold_match = pd.concat(shared_leasehold_match) + + # The approximate matches aren't very good + freehold_matching_lookup = freehold_matching_lookup[freehold_matching_lookup["match_type"] == "exact"] + leasehold_matching_lookup = leasehold_matching_lookup[leasehold_matching_lookup["match_type"] == "exact"] + + # There are some cases where we have duplicates + freehold_matching_lookup = remove_duplicate_matches(freehold_matching_lookup, properties, company_ownership) + leasehold_matching_lookup = remove_duplicate_matches(leasehold_matching_lookup, properties, company_ownership) + + matched_addresses = freehold_matching_lookup.merge( + properties[["UPRN", "ADDRESS"]].rename(columns={"ADDRESS": "epc_address"}), + how="left", on="UPRN" + ).merge( + company_ownership[["Title Number", "Property Address"]], + how="left", on="Title Number" + ) + + # shared_freehold_match = pd.DataFrame(shared_freehold_match) + # Strore these files + freehold_matching_lookup.to_excel("freehold_matching_lookup.xlsx") + leasehold_matching_lookup.to_excel("leasehold_matching_lookup.xlsx") + shared_leasehold_match.to_excel("shared_leasehold_match.xlsx") + # shared_freehold_match.to_excel("shared_freehold_match.xlsx") + + freehold_aggregate = aggregate_matches(freehold_matching_lookup, company_ownership, properties) + leasehold_aggregate = aggregate_matches(leasehold_matching_lookup, company_ownership, properties) + + combined_aggregate = aggregate_matches( + pd.concat([freehold_matching_lookup, leasehold_matching_lookup]), company_ownership, properties + ) + + df = pd.concat([freehold_matching_lookup, leasehold_matching_lookup]) + + investment_20m = combined_aggregate[combined_aggregate["cumulative_value"] <= 20_500_000] + investment_50m = combined_aggregate[combined_aggregate["cumulative_value"] <= 51_000_000] + + properties["WALLS_DESCRIPTION"].value_counts(normalize=True) + + +def company_aggregation(): + company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_04.csv") + aggregation = ( + company_ownership + .groupby(["Proprietor Name (1)", "Company Registration No. (1)"]) + ["Property Address"] + .count() + .reset_index(name="Number of Properties") + ) + aggregation = aggregation.sort_values("Number of Properties", ascending=False) + + aggregation.to_excel("Company ownership aggregation.xlsx") diff --git a/etl/customers/guiness/route_march.py b/etl/customers/guiness/route_march.py new file mode 100644 index 00000000..28f350d3 --- /dev/null +++ b/etl/customers/guiness/route_march.py @@ -0,0 +1,98 @@ +import os + +import pandas as pd +from tqdm import tqdm + +from dotenv import load_dotenv +from utils.s3 import read_excel_from_s3 +from backend.SearchEpc import SearchEpc +from epc_api.client import EpcClient +from utils.s3 import save_csv_to_s3 + +load_dotenv(dotenv_path="backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") + + +def app(): + """ + This app is satisying an adhoc request to retrieve EPC data for properties owned by Guiness, to help plan the + route march + + These properties were provided to us by Ecosurv + :return: + """ + asset_list = read_excel_from_s3( + bucket_name="retrofit-datalake-dev", + file_key="customers/guiness/TGP CW Properties PV.xlsx", + header_row=0 + ) + + epc_data = [] + for _, guiness_property in tqdm(asset_list.iterrows(), total=len(asset_list)): + + searcher = SearchEpc( + address1=str(guiness_property["Address"]), + postcode=guiness_property["POSTCODES"], + auth_token=EPC_AUTH_TOKEN, + os_api_key="", + property_type=None, + fast=True + ) + # Force the skipping of estimating the EPC + searcher.ordnance_survey_client.property_type = None + searcher.ordnance_survey_client.built_form = None + + searcher.find_property(skip_os=True) + if searcher.newest_epc is None: + continue + + epc = { + "asset_list_address": guiness_property["Address"], + "asset_list_postcode": guiness_property["POSTCODES"], + **searcher.newest_epc.copy() + } + + epc_data.append(epc) + + epc_df = pd.DataFrame(epc_data) + + # Retrieve just the data we need + epc_df = epc_df[ + [ + "asset_list_address", + "asset_list_postcode", + "uprn", + "property-type", + "built-form", + "inspection-date", + "current-energy-rating", + "current-energy-efficiency", + "roof-description", + "walls-description", + "transaction-type" + ] + ] + + asset_list = asset_list.merge( + epc_df, how="left", left_on=["Address", "POSTCODES"], right_on=["asset_list_address", "asset_list_postcode"] + ) + + # De-dupe on the address and postcode, since 137 Badger Avenue was duplicated + asset_list = asset_list.drop_duplicates(subset=["Address", "POSTCODES"]) + asset_list = asset_list.drop(columns=["asset_list_address", "asset_list_postcode"]) + + # Rename the columns + asset_list = asset_list.rename(columns={ + "property-type": "Property Type", + "built-form": "Archetype", + "inspection-date": "Last EPC Inspection Date", + "current-energy-rating": "Last survey EPC Rating", + "current-energy-efficiency": "Last survey SAP Score", + "roof-description": "Roof Construction", + "walls-description": "Wall Construction", + "transaction-type": "Last EPC Reason" + }) + + # Store as an excel + filename = "Guiness EPC data.xlsx" + asset_list.to_excel(filename, index=False) diff --git a/etl/customers/livewest/route_march.py b/etl/customers/livewest/route_march.py new file mode 100644 index 00000000..9e69fd43 --- /dev/null +++ b/etl/customers/livewest/route_march.py @@ -0,0 +1,134 @@ +import os + +import pandas as pd +from tqdm import tqdm + +from dotenv import load_dotenv +from utils.s3 import read_excel_from_s3 +from backend.SearchEpc import SearchEpc +from epc_api.client import EpcClient +from utils.s3 import save_csv_to_s3 + +load_dotenv(dotenv_path="backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") + + +def route_march_may_2024(): + """ + This code pulls supplementary data for a route march that is expected to happen in May 2024. This code + was authored on the 30th April 2024. + """ + + asset_list = read_excel_from_s3( + bucket_name="retrofit-datalake-dev", + file_key="customers/Livewest/Livewest proposed route march Apr-May 2024.xlsx", + header_row=0 + ) + + epc_data = [] + for _, unit in tqdm(asset_list.iterrows(), total=len(asset_list)): + + lst = [unit["NO"], unit["ADDRESS 1"], unit["ADDRESS 2"], unit["ADDRESS 3"], unit["POSTCODE"]] + lst = [str(x).strip() for x in lst if not pd.isnull(x)] + + full_address = ", ".join(lst) + + searcher = SearchEpc( + address1=str(unit["NO"]), + postcode=unit["POSTCODE"], + auth_token=EPC_AUTH_TOKEN, + os_api_key="", + property_type=None, + fast=True, + full_address=full_address + ) + # Force the skipping of estimating the EPC + searcher.ordnance_survey_client.property_type = None + searcher.ordnance_survey_client.built_form = None + + searcher.find_property(skip_os=True) + if searcher.newest_epc is None: + # We try with a different address 1 + add1 = str(unit["NO"]).lower() + add1 = ( + add1 + .replace("flat", "") + .replace("ft", "") + .replace("t", "").strip() + ) + + searcher = SearchEpc( + address1=add1, + postcode=unit["POSTCODE"], + auth_token=EPC_AUTH_TOKEN, + os_api_key="", + property_type=None, + fast=True, + full_address=full_address + ) + # Force the skipping of estimating the EPC + searcher.ordnance_survey_client.property_type = None + searcher.ordnance_survey_client.built_form = None + + searcher.find_property(skip_os=True) + + if searcher.newest_epc is None: + continue + + epc = { + "asset_list_house_no": unit["NO"], + "asset_list_address1": unit["ADDRESS 1"], + "asset_list_postcode": unit["POSTCODE"], + **searcher.newest_epc.copy() + } + + epc_data.append(epc) + + epc_df = pd.DataFrame(epc_data) + + # + + # Retrieve just the data we need + epc_df = epc_df[ + [ + "asset_list_house_no", + "asset_list_address1", + "asset_list_postcode", + "uprn", + "address", + "property-type", + "built-form", + "inspection-date", + "current-energy-rating", + "current-energy-efficiency", + "roof-description", + "walls-description", + "transaction-type" + ] + ].rename(columns={"address": "Matched EPC Address"}) + + asset_list = asset_list.merge( + epc_df, + how="left", + left_on=["NO", "ADDRESS 1", "POSTCODE"], + right_on=["asset_list_house_no", "asset_list_address1", "asset_list_postcode"] + ) + + asset_list = asset_list.drop_duplicates(subset=["NO", "ADDRESS 1", "POSTCODE"]) + asset_list = asset_list.drop(columns=["asset_list_house_no", "asset_list_address1", "asset_list_postcode"]) + + # Rename the columns + asset_list = asset_list.rename(columns={ + "property-type": "Property Type", + "built-form": "Archetype", + "inspection-date": "Last EPC Inspection Date", + "current-energy-rating": "Last survey EPC Rating", + "current-energy-efficiency": "Last survey SAP Score", + "roof-description": "Roof Construction", + "walls-description": "Wall Construction", + "transaction-type": "Last EPC Reason" + }) + + # Store as an excel + filename = "Livewest EPC data.xlsx" + asset_list.to_excel(filename, index=False) diff --git a/etl/customers/places_for_people/route_march.py b/etl/customers/places_for_people/route_march.py new file mode 100644 index 00000000..c38c71d3 --- /dev/null +++ b/etl/customers/places_for_people/route_march.py @@ -0,0 +1,137 @@ +import os + +import pandas as pd +from tqdm import tqdm + +from dotenv import load_dotenv +from utils.s3 import read_excel_from_s3 +from backend.SearchEpc import SearchEpc +from epc_api.client import EpcClient +from utils.s3 import save_csv_to_s3 + +load_dotenv(dotenv_path="backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") + + +def app(): + """ + This app is satisying an adhoc request to retrieve EPC data for properties owned by Guiness, to help plan the + route march + + These properties were provided to us by Ecosurv + :return: + """ + asset_list = read_excel_from_s3( + bucket_name="retrofit-datalake-dev", + file_key="customers/Places For People/PFP ROUTE MARCH PHASE 1.xlsx", + header_row=1 + ) + + epc_data = [] + for _, pfp_property in tqdm(asset_list.iterrows(), total=len(asset_list)): + + lst = [ + pfp_property["ADDRESS"], + pfp_property["ADDRESS.1"], + pfp_property["ADDRESS.2"], + pfp_property["POSTCODE"] + ] + lst = [str(x).strip() for x in lst if not pd.isnull(x)] + + full_address = ", ".join(lst) + + searcher = SearchEpc( + address1=str(pfp_property["ADDRESS"]), + postcode=pfp_property["POSTCODE"], + auth_token=EPC_AUTH_TOKEN, + os_api_key="", + property_type=None, + fast=True, + full_address=full_address + ) + # Force the skipping of estimating the EPC + searcher.ordnance_survey_client.property_type = None + searcher.ordnance_survey_client.built_form = None + + searcher.find_property(skip_os=True) + if searcher.newest_epc is None: + # We try with a different address 1 + add1 = str(pfp_property["ADDRESS"]).lower() + add1 = add1.replace("ft", "").replace("t", "").strip() + + searcher = SearchEpc( + address1=add1, + postcode=pfp_property["POSTCODE"], + auth_token=EPC_AUTH_TOKEN, + os_api_key="", + property_type=None, + fast=True, + full_address=full_address + ) + # Force the skipping of estimating the EPC + searcher.ordnance_survey_client.property_type = None + searcher.ordnance_survey_client.built_form = None + + searcher.find_property(skip_os=True) + + if searcher.newest_epc is None: + continue + + epc = { + "asset_list_address": pfp_property["ADDRESS"], + "asset_list_address1": pfp_property["ADDRESS.1"], + "asset_list_postcode": pfp_property["POSTCODE"], + **searcher.newest_epc.copy() + } + + epc_data.append(epc) + + epc_df = pd.DataFrame(epc_data) + + # 702 + + # Retrieve just the data we need + epc_df = epc_df[ + [ + "asset_list_address", + "asset_list_address1", + "asset_list_postcode", + "uprn", + "address", + "property-type", + "built-form", + "inspection-date", + "current-energy-rating", + "current-energy-efficiency", + "roof-description", + "walls-description", + "transaction-type" + ] + ].rename(columns={"address": "Matched EPC Address"}) + + asset_list = asset_list.merge( + epc_df, + how="left", + left_on=["ADDRESS", "ADDRESS.1", "POSTCODE"], + right_on=["asset_list_address", "asset_list_address1", "asset_list_postcode"] + ) + + # De-dupe on the address and postcode, since 137 Badger Avenue was duplicated + asset_list = asset_list.drop_duplicates(subset=["ADDRESS", "ADDRESS.1", "POSTCODE"]) + asset_list = asset_list.drop(columns=["asset_list_address", "asset_list_address1", "asset_list_postcode"]) + + # Rename the columns + asset_list = asset_list.rename(columns={ + "property-type": "Property Type", + "built-form": "Archetype", + "inspection-date": "Last EPC Inspection Date", + "current-energy-rating": "Last survey EPC Rating", + "current-energy-efficiency": "Last survey SAP Score", + "roof-description": "Roof Construction", + "walls-description": "Wall Construction", + "transaction-type": "Last EPC Reason" + }) + + # Store as an excel + filename = "Places For People EPC data.xlsx" + asset_list.to_excel(filename, index=False) diff --git a/etl/customers/vander_elliot/__init__.py b/etl/customers/vander_elliot/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/etl/customers/vander_elliot/single_property_pilot.py b/etl/customers/vander_elliot/single_property_pilot.py new file mode 100644 index 00000000..99624dfc --- /dev/null +++ b/etl/customers/vander_elliot/single_property_pilot.py @@ -0,0 +1,56 @@ +import pandas as pd +from utils.s3 import read_excel_from_s3 +from utils.s3 import save_csv_to_s3 + +PORTFOLIO_ID = 77 +USER_ID = 8 + +patches = [ + { + "address": "79 Perryn Road", + "postcode": "W3 7LT", + "roof-description": "Pitched, no insulation (assumed)" + } +] + + +def app(): + asset_list = [ + { + 'uprn': 12103117, + "address": "79 Perryn Road", + "postcode": "W3 7LT", + }, + + ] + + asset_list = pd.DataFrame(asset_list) + + # Store the asset list in s3 + filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv" + save_csv_to_s3( + dataframe=asset_list, + bucket_name="retrofit-plan-inputs-dev", + file_name=filename + ) + + # Store patches in s3 + patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json" + save_csv_to_s3( + dataframe=pd.DataFrame(patches), + bucket_name="retrofit-plan-inputs-dev", + file_name=patches_filename + ) + + body = { + "portfolio_id": str(PORTFOLIO_ID), + "housing_type": "Private", + "goal": "Increase EPC", + "goal_value": "B", + "trigger_file_path": filename, + "already_installed_file_path": "", + "patches_file_path": patches_filename, + "non_invasive_recommendations_file_path": "", + "budget": None, + } + print(body) diff --git a/recommendations/Costs.py b/recommendations/Costs.py index d7a8ad2f..fd3c1692 100644 --- a/recommendations/Costs.py +++ b/recommendations/Costs.py @@ -37,6 +37,25 @@ MCS_SOLAR_PV_COST_DATA = { "average_cost_per_kwh-Northern Ireland": 2126.09, } +# This data is based on the MCS database, We use the larger figure between the 2023 and 2024 average, +# to be conservative +MCS_AIR_SOURCE_HEAT_PUMP_COST_DATA = { + "Outer London": 13220, + "Inner London": 13220, + "South East England": 13547, + "South West England": 12776, + "East of England": 12585, + "East Midlands": 12239, + "West Midlands": 13182, + "North East England": 11829, + "North West England": 11714, + "Yorkshire and the Humber": 11919, + "Wales": 13701, + "Scotland": 12586, + "Northern Ireland": 12000, # There are hardly any air source heat pump installs going on in Northern Ireland +} +BOILER_UPGRADE_SCHEME_ASHP_VALUE = 7500 + # This is based on quotes from installers BATTERY_COST = 3500 @@ -1240,3 +1259,29 @@ class Costs: "labour_hours": labour_hours, "labour_days": labour_days, } + + def air_source_heat_pump(self): + """ + Based on the region and type of property, this function will produce a cost estimation for an air source heat + pump. This cost will include the boiler upgrade scheme grant + + """ + + # This is the average cost of a project, we'll add some additional contingency + regional_cost = MCS_AIR_SOURCE_HEAT_PUMP_COST_DATA[self.region] + + total_cost = regional_cost * (1 + self.CONTINGENCY) - BOILER_UPGRADE_SCHEME_ASHP_VALUE + subtotal_before_vat = total_cost / (1 + self.VAT_RATE) + vat = total_cost - subtotal_before_vat + + # We assume 3 days installation + labour_days = 3 + labour_hours = labour_days * 8 + + return { + "total": total_cost, + "subtotal": subtotal_before_vat, + "vat": vat, + "labour_hours": labour_hours, + "labour_days": labour_days, + } diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py index d24ad811..76da6c37 100644 --- a/recommendations/HeatingControlRecommender.py +++ b/recommendations/HeatingControlRecommender.py @@ -35,6 +35,9 @@ class HeatingControlRecommender: return + if heating_description in ["Air source heat pump, radiators, electric"]: + self.recommend_time_temperature_zone_controls() + def recommend_room_heaters_electric_controls(self): """ If the home has Room heaters, electric, we start by identifying potential heating controls that could diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 537125a1..a51803f2 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -1,6 +1,4 @@ -import pandas as pd - -from recommendations.Costs import Costs +from recommendations.Costs import Costs, BOILER_UPGRADE_SCHEME_ASHP_VALUE from recommendations.recommendation_utils import check_simulation_difference, override_costs from backend.Property import Property from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes @@ -18,7 +16,14 @@ class HeatingRecommender: self.heating_recommendations = [] self.heating_control_recommendations = [] - def recommend(self, phase=0): + def recommend(self, has_cavity_or_loft_recommendations, phase=0): + """ + Produces heating recommendations + :param has_cavity_or_loft_recommendations: boolean indicating if we have produced a cavity or loft insulation + recommendation. If there are cavity or loft recommendations, the property would need to complete those measures + before being able to get the boiler upgrade scheme benefits. The messaging in the front end would be to + :param phase: indicates the phase of the retrofit programme + """ # TODO: We could have a system flush recommendation for an existing boiler, where there is no need to replace # the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this @@ -81,8 +86,122 @@ class HeatingRecommender: phase=phase, system_change=system_change, exising_room_heaters=exising_room_heaters ) + # We recommend air source heat pumps + # Heat pumps are suitable for all property types: + # https://energysavingtrust.org.uk/from-flats-to-terraced-houses-heat-pumps-are-suitable-for-all-property-types/ + # Just seems least probable for flats, so we'll allow houses and bungalows + # In the future, we'll allow overrides, so that non-intrusive surveys can contradict these conditions + # and either allow or prevent the recommendation of an air source heat pump + + suitable_property_type = self.property.data["property-type"] in ["House", "Bungalow"] + has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"] + + if suitable_property_type and not has_air_source_heat_pump: + self.recommend_air_source_heat_pump( + phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations + ) + return + def recommend_air_source_heat_pump(self, phase, has_cavity_or_loft_recommendations): + """ + This method will implement the recommendation for an air source heat pump + This is ultimately an overhaul to the heating system and so is recommended as an alternative to other + heating system recommendations + :return: + """ + + controls_recommender = HeatingControlRecommender(self.property) + controls_recommender.recommend(heating_description="Air source heat pump, radiators, electric") + + ashp_costs = self.costs.air_source_heat_pump() + # We add the costs of the heating controls, onto each key in the costs dictionary + if controls_recommender.recommendation: + for key in ashp_costs: + ashp_costs[key] += controls_recommender.recommendation[0][key] + + already_installed = "air_source_heat_pump" in self.property.already_installed + if already_installed: + ashp_costs = override_costs(ashp_costs) + description = "The property already has an air source heat pump, no further action needed." + else: + if controls_recommender.recommendation: + description = ("Install an air source heat pump, and upgrade heating controls to Smart Thermostats, " + "room sensors and smart radiator valves (time & temperature zone control).") + else: + description = "Install an air source heat pump." + + # If the property does not have existing cavity and loft insulation, we include a note that the cost + # includes the boiler upgrade scheme and that the cavity and loft need to be treated, to ensure access + # to the funding + if has_cavity_or_loft_recommendations: + description = description + (f" The cost includes the £" + f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant. " + f"You must ensure that the property has an insulated cavity and " + f"270mm+ loft insulation to qualify for the grant") + else: + description = description + (f" The cost includes the £" + f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant") + + simulation_config = { + "mainheat_energy_eff_ending": "Good", + "hot_water_energy_eff_ending": "Good" + } + # Installation of a boiler improves the hot water system so we need to reflect this in + # the outcome of the recommendation + heating_ending_config = MainHeatAttributes("Air source heat pump, radiators, electric").process() + hotwater_ending_config = HotWaterAttributes("From main system").process() + + # If the property does not currently have electric main fuel, we'll simulate the change + fuel_ending_config = {} + if self.property.main_fuel["fuel_type"] != "electricity": + fuel_ending_config = MainFuelAttributes("electricity (not community)").process() + + # Check the simulation differences + heating_simulation_config = check_simulation_difference( + new_config=heating_ending_config, old_config=self.property.main_heating + ) + hotwater_simulation_config = check_simulation_difference( + new_config=hotwater_ending_config, old_config=self.property.hotwater + ) + fuel_simulation_config = check_simulation_difference( + new_config=fuel_ending_config, old_config=self.property.main_fuel + ) + + simulation_config = { + **simulation_config, + **heating_simulation_config, + **hotwater_simulation_config, + **fuel_simulation_config, + } + + if controls_recommender.recommendation: + # We should have just the single recommendation for heat controls, which is time + # and temperature zone controls + if len(controls_recommender.recommendation) != 1: + raise NotImplementedError("More than one heat controls recommendation for air source heat pump") + simulation_config = { + **simulation_config, + **controls_recommender.recommendation[0]["simulation_config"] + } + + ashp_recommendation = { + "phase": phase, + "parts": [ + # TODO + ], + "type": "heating", + "description": description, + "starting_u_value": None, + "new_u_value": None, + "sap_points": None, + "already_installed": already_installed, + "simulation_config": simulation_config, + **ashp_costs + } + + self.heating_recommendations.append(ashp_recommendation) + @staticmethod def check_simulation_difference(old_config, new_config): """ @@ -146,7 +265,7 @@ class HeatingRecommender: recommendation_description = f"{description} and {controls_description}" - already_installed = "cavity_wall_insulation" in self.property.already_installed + already_installed = "heating_controls" in self.property.already_installed if already_installed: total_costs = override_costs(total_costs) recommendation_description = "Heating system has already been upgraded, no further action needed." @@ -359,7 +478,6 @@ class HeatingRecommender: **heating_simulation_config, **hotwater_simulation_config, **fuel_simulation_config, - "hot_water_energy_eff_ending": "Good" } boiler_costs = self.costs.boiler( diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index 06dc2d61..c8113cdc 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -109,16 +109,42 @@ class Recommendations: # Heating and Electical systems if "heating" not in self.exclusions: - self.heating_recommender.recommend(phase=phase) + + cavity_or_loft_recommendations = [ + r for r in self.wall_recomender.recommendations + self.roof_recommender.recommendations + if r["type"] in ["cavity_wall_insulation", "loft_insulation"] + ] + has_cavity_or_loft_recommendations = len(cavity_or_loft_recommendations) > 0 + + self.heating_recommender.recommend( + phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations + ) if ( self.heating_recommender.heating_recommendations or self.heating_recommender.heating_control_recommendations ): - if self.heating_recommender.heating_recommendations: - property_recommendations.append(self.heating_recommender.heating_recommendations) - if self.heating_recommender.heating_control_recommendations: - property_recommendations.append(self.heating_recommender.heating_control_recommendations) + # We split into first and second phase recommendations + first_phase_recommendations = [ + r for r in ( + self.heating_recommender.heating_recommendations + + self.heating_recommender.heating_control_recommendations + ) + if r["phase"] == phase + ] + second_phase_recommendations = [ + r for r in ( + self.heating_recommender.heating_recommendations + + self.heating_recommender.heating_control_recommendations + ) + if r["phase"] == phase + 1 + ] + + if first_phase_recommendations: + property_recommendations.append(first_phase_recommendations) + + if second_phase_recommendations: + property_recommendations.append(second_phase_recommendations) # We check if we have distinct heating and heating controls recommendations # If so, we increment by 2 (one of the heating system, one for the heating controls) diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py index b44557ab..58d4b123 100644 --- a/recommendations/SolarPvRecommendations.py +++ b/recommendations/SolarPvRecommendations.py @@ -44,7 +44,7 @@ class SolarPvRecommendations: :return: """ - is_valid_property_type = self.property.data["property-type"] in ["House", "Bungalow"] + is_valid_property_type = self.property.data["property-type"] in ["House", "Bungalow", "Maisonette"] is_valid_roof_type = ( self.property.roof["is_flat"] or self.property.roof["is_pitched"] or self.property.roof["is_roof_room"] ) diff --git a/recommendations/tests/test_air_source_heat_pump.py b/recommendations/tests/test_air_source_heat_pump.py new file mode 100644 index 00000000..0d69b10d --- /dev/null +++ b/recommendations/tests/test_air_source_heat_pump.py @@ -0,0 +1,944 @@ +import pandas as pd +import msgpack +from datetime import datetime + +from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3 +from backend.Property import Property +from recommendations.HeatingRecommender import HeatingRecommender +from recommendations.Recommendations import Recommendations +from etl.epc.Record import EPCRecord +from etl.solar.SolarPhotoSupply import SolarPhotoSupply +from backend.ml_models.api import ModelApi + + +def find_examples(): + """ Some scrappy helper code to find EPC examples""" + # Let's look for some testing data, where the only thing different pre and post is the installation of an + # air source heat pump + data = read_dataframe_from_s3_parquet( + bucket_name="retrofit-data-dev", + file_key="sap_change_model/2024-03-24-15-51-13/dataset_no_cleaning.parquet" + ) + + # Firstly, take records where before there was no air source heat pump and afterwards there was + data = data[ + data["has_air_source_heat_pump_ending"] & ~data["has_air_source_heat_pump"] + ] + + # Start with a property that has a boiler + data = data[data["has_boiler"]] + + static_columns = [ + # Walls + 'walls_thermal_transmittance_ending', + 'is_filled_cavity_ending', + 'is_park_home_ending', + 'walls_insulation_thickness_ending', + 'external_insulation_ending', + 'internal_insulation_ending', + # Floors + # 'floor_thermal_transmittance_ending', # Don't subset on this, because it changes based on floor area + 'floor_insulation_thickness_ending', + # Roof + 'roof_thermal_transmittance_ending', + 'is_at_rafters_ending', + 'roof_insulation_thickness_ending', + # Hot water - air source heat pump will shange the hot water system (probably from whatever it was -> main) + # 'heater_type_ending', + # 'system_type_ending', + # 'thermostat_characteristics_ending', + # 'heating_scope_ending', + # 'energy_recovery_ending', + # 'hotwater_tariff_type_ending', + # 'extra_features_ending', + # 'chp_systems_ending', + # 'distribution_system_ending', + # 'no_system_present_ending', + # 'appliance_ending', + # Heating - Will change when installing an ASHP + # 'has_radiators_ending', + # 'has_fan_coil_units_ending', + # 'has_pipes_in_screed_above_insulation_ending', + # 'has_pipes_in_insulated_timber_floor_ending', + # 'has_pipes_in_concrete_slab_ending', + # 'has_boiler_ending', + # 'has_air_source_heat_pump_ending', # We want the air source heat pump to change + # 'has_room_heaters_ending', + # 'has_electric_storage_heaters_ending', + # 'has_warm_air_ending', + # 'has_electric_underfloor_heating_ending', + # 'has_electric_ceiling_heating_ending', + # 'has_community_scheme_ending', + # 'has_ground_source_heat_pump_ending', + # 'has_no_system_present_ending', + # 'has_portable_electric_heaters_ending', + # 'has_water_source_heat_pump_ending', + # 'has_electric_heat_pump_ending', + # 'has_micro-cogeneration_ending', + # 'has_solar_assisted_heat_pump_ending', + # 'has_exhaust_source_heat_pump_ending', + # 'has_community_heat_pump_ending', + # 'has_electric_ending', + # 'has_mains_gas_ending', + # 'has_wood_logs_ending', 'has_coal_ending', 'has_oil_ending', + # 'has_wood_pellets_ending', 'has_anthracite_ending', 'has_dual_fuel_mineral_and_wood_ending', + # 'has_smokeless_fuel_ending', 'has_lpg_ending', 'has_b30k_ending', 'has_electricaire_ending', + # 'has_assumed_for_most_rooms_ending', 'has_underfloor_heating_ending', + # 'thermostatic_control_ending', + # 'charging_system_ending', + # 'switch_system_ending', + # 'no_control_ending', + # 'dhw_control_ending', + # 'community_heating_ending', + # 'multiple_room_thermostats_ending', + # 'auxiliary_systems_ending', + # 'trvs_ending', + # 'rate_control_ending', + # Window + 'glazing_type_ending', + # Fuel - could change with ASHP + # 'fuel_type_ending', + # 'main-fuel_tariff_type_ending', + # 'is_community_ending', + # 'no_individual_heating_or_community_network_ending', + # 'complex_fuel_type_ending', + + 'mechanical_ventilation_ending', 'secondheat_description_ending', 'glazed_type_ending', + 'multi_glaze_proportion_ending', 'low_energy_lighting_ending', 'number_open_fireplaces_ending', + 'solar_water_heating_flag_ending', + 'photo_supply_ending', + 'energy_tariff_ending', + 'extension_count_ending', + 'total_floor_area_ending', + # 'hot_water_energy_eff_ending', + 'floor_energy_eff_ending', + 'windows_energy_eff_ending', + 'walls_energy_eff_ending', + 'sheating_energy_eff_ending', + 'roof_energy_eff_ending', + # 'mainheat_energy_eff_ending', + # 'mainheatc_energy_eff_ending', + 'lighting_energy_eff_ending', + 'number_habitable_rooms_ending', + 'number_heated_rooms_ending', + ] + + for col in static_columns: + + base_starting = col.split("_ending")[0] + if base_starting + "_starting" in data.columns: + starting_col = base_starting + "_starting" + else: + starting_col = base_starting + # Filter + print("Column: %s" % col) + print("Starting size: %s" % data.shape[0]) + data = data[data[starting_col] == data[col]] + print("Ending size: %s" % data.shape[0]) + + z = data[['uprn', col, starting_col]] + + # Great example UPRNs + # 100030969273 + # 10034685399 - Completely transforms the heating and hot water systems in the home (goes from oil -> electricity) + # 100091200828 - goes from a liquid petroleum gas boiler to ashp + + # Look for starting with a gas boiler + data[ + data["has_boiler"] & data["has_radiators"] & data["has_mains_gas"] & ~data["has_boiler_ending"] + ] + + # UPRN: 100011776843 + + +class TestAirSourceHeatPump: + + def test_eligible(self): + # This tests a house, which will be suitable for an air source heat pump + epc_record = EPCRecord() + epc_record.prepared_epc = { + "county": "Broxbourne", + "mainheat-energy-eff": "Good", + "hot-water-energy-eff": "Good", + "mainheatc-energy-eff": "Good", + "number-heated-rooms": 5, + "property-type": "House", + "built-form": "Semi-Detached" + } + + property_instance = Property(id=0, address="fake", postcode="fake", epc_record=epc_record) + property_instance.main_heating = { + 'original_description': 'Boiler and radiators, mains gas', + "clean_description": "Boiler and radiators, mains gas", + 'has_radiators': True, + 'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False, + 'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': True, + 'has_air_source_heat_pump': False, + 'has_room_heaters': False, 'has_electric_storage_heaters': False, + 'has_warm_air': False, + 'has_electric_underfloor_heating': False, + 'has_electric_ceiling_heating': False, 'has_community_scheme': False, + 'has_ground_source_heat_pump': False, 'has_no_system_present': False, + 'has_portable_electric_heaters': False, + 'has_water_source_heat_pump': False, 'has_electric': False, + 'has_mains_gas': True, 'has_wood_logs': False, + 'has_coal': False, 'has_oil': False, 'has_wood_pellets': False, + 'has_anthracite': False, + 'has_dual_fuel_mineral_and_wood': False, 'has_smokeless_fuel': False, + 'has_lpg': False, 'has_assumed': False, + 'has_electricaire': False, 'has_assumed_for_most_rooms': False, + 'has_underfloor_heating': False, + "has_electric_heat_pumps": False, + "has_micro-cogeneration": False + } + property_instance.main_fuel = { + 'original_description': 'mains gas (not community)', 'fuel_type': 'mains gas', + 'tariff_type': None, + 'is_community': False, 'no_individual_heating_or_community_network': False, + 'complex_fuel_type': None + } + property_instance.hotwater = { + 'original_description': 'From main system', + 'clean_description': 'From main system', + 'heater_type': None, + 'system_type': 'from main system', + 'thermostat_characteristics': None, 'heating_scope': None, + 'energy_recovery': None, 'tariff_type': None, + 'extra_features': None, 'chp_systems': None, 'distribution_system': None, + 'no_system_present': None, + 'assumed': False, "appliance": None + } + property_instance.main_heating_controls = { + 'original_description': 'Programmer, room thermostat and TRVs', + 'thermostatic_control': 'room thermostat', 'charging_system': None, 'switch_system': 'programmer', + 'no_control': None, 'dhw_control': None, 'community_heating': None, 'multiple_room_thermostats': False, + 'auxiliary_systems': None, 'trvs': 'trvs', 'rate_control': None + + } + + recommender = HeatingRecommender(property_instance=property_instance) + + assert not recommender.heating_recommendations + + recommender.recommend(phase=0) + + assert recommender.recommendation is None + + def test_air_source_heat_pump_gas_boiler_starting(self): + starting_epc = { + 'low-energy-fixed-light-count': '', 'address': '430 Gidlow Lane', 'uprn-source': 'Energy Assessor', + 'floor-height': '2.62', 'heating-cost-potential': '599', 'unheated-corridor-length': '', + 'hot-water-cost-potential': '67', 'construction-age-band': 'England and Wales: 1950-1966', + 'potential-energy-rating': 'C', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Good', + 'lighting-energy-eff': 'Very Good', 'environment-impact-potential': '72', + 'glazed-type': 'double glazing installed during or after 2002', 'heating-cost-current': '913', + 'address3': '', 'mainheatcont-description': 'Programmer, no room thermostat', 'sheating-energy-eff': 'N/A', + 'property-type': 'House', 'local-authority-label': 'Wigan', 'fixed-lighting-outlets-count': '9', + 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '210', + 'county': '', 'postcode': 'WN6 8RG', 'solar-water-heating-flag': 'N', 'constituency': 'E14001039', + 'co2-emissions-potential': '2.6', 'number-heated-rooms': '4', + 'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '180', + 'local-authority': 'E08000010', 'built-form': 'Mid-Terrace', 'number-open-fireplaces': '0', + 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2022-02-15', + 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '78', 'address1': '430 Gidlow Lane', + 'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Wigan', + 'roof-energy-eff': 'Very Poor', 'total-floor-area': '80.0', 'building-reference-number': '10002334112', + 'environment-impact-current': '38', 'co2-emissions-current': '6.2', + 'roof-description': 'Pitched, no insulation (assumed)', 'floor-energy-eff': 'N/A', + 'number-habitable-rooms': '4', 'address2': '', 'hot-water-env-eff': 'Poor', 'posttown': 'WIGAN', + 'mainheatc-energy-eff': 'Very Poor', 'main-fuel': 'mains gas (not community)', + 'lighting-env-eff': 'Very Good', 'windows-energy-eff': 'Good', 'floor-env-eff': 'N/A', + 'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in all fixed outlets', + 'roof-env-eff': 'Very Poor', 'walls-energy-eff': 'Average', 'photo-supply': '0.0', + 'lighting-cost-potential': '67', 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', + 'main-heating-controls': '', 'lodgement-datetime': '2022-02-23 16:39:41', 'flat-top-storey': '', + 'current-energy-rating': 'E', 'secondheat-description': 'Room heaters, mains gas', + 'walls-env-eff': 'Average', 'transaction-type': 'ECO assessment', 'uprn': '100011776843', + 'current-energy-efficiency': '45', 'energy-consumption-current': '441', + 'mainheat-description': 'Boiler and radiators, mains gas', 'lighting-cost-current': '67', + 'lodgement-date': '2022-02-23', 'extension-count': '1', 'mainheatc-env-eff': 'Very Poor', + 'lmk-key': '46cb404438a6d88ddff8965cab8b3027ec15c32d93e0b6a5f0381a5109b9bb0d', 'wind-turbine-count': '0', + 'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '77', + 'hot-water-energy-eff': 'Poor', 'low-energy-lighting': '100', + 'walls-description': 'Cavity wall, filled cavity', + 'hotwater-description': 'From main system, no cylinder thermostat' + } + + ending_epc = { + 'low-energy-fixed-light-count': '', 'address': '430 Gidlow Lane', 'uprn-source': 'Energy Assessor', + 'floor-height': '2.62', 'heating-cost-potential': '803', 'unheated-corridor-length': '', + 'hot-water-cost-potential': '292', 'construction-age-band': 'England and Wales: 1950-1966', + 'potential-energy-rating': 'C', 'mainheat-energy-eff': 'Very Good', 'windows-env-eff': 'Good', + 'lighting-energy-eff': 'Very Good', 'environment-impact-potential': '78', + 'glazed-type': 'double glazing installed during or after 2002', 'heating-cost-current': '861', + 'address3': '', 'mainheatcont-description': 'Time and temperature zone control', + 'sheating-energy-eff': 'N/A', 'property-type': 'House', 'local-authority-label': 'Wigan', + 'fixed-lighting-outlets-count': '9', 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', + 'hot-water-cost-current': '434', 'county': '', 'postcode': 'WN6 8RG', 'solar-water-heating-flag': 'N', + 'constituency': 'E14001039', 'co2-emissions-potential': '2.0', 'number-heated-rooms': '4', + 'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '147', + 'local-authority': 'E08000010', 'built-form': 'Mid-Terrace', 'number-open-fireplaces': '0', + 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2022-05-11', + 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '43', 'address1': '430 Gidlow Lane', + 'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Wigan', + 'roof-energy-eff': 'Very Poor', 'total-floor-area': '80.0', 'building-reference-number': '10002334112', + 'environment-impact-current': '63', 'co2-emissions-current': '3.4', + 'roof-description': 'Pitched, no insulation (assumed)', 'floor-energy-eff': 'N/A', + 'number-habitable-rooms': '4', 'address2': '', 'hot-water-env-eff': 'Poor', 'posttown': 'WIGAN', + 'mainheatc-energy-eff': 'Very Good', 'main-fuel': 'electricity (not community)', + 'lighting-env-eff': 'Very Good', 'windows-energy-eff': 'Good', 'floor-env-eff': 'N/A', + 'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in all fixed outlets', + 'roof-env-eff': 'Very Poor', 'walls-energy-eff': 'Average', 'photo-supply': '0.0', + 'lighting-cost-potential': '67', 'mainheat-env-eff': 'Very Good', 'multi-glaze-proportion': '100', + 'main-heating-controls': '', 'lodgement-datetime': '2022-06-06 13:01:20', 'flat-top-storey': '', + 'current-energy-rating': 'E', 'secondheat-description': 'Room heaters, mains gas', + 'walls-env-eff': 'Average', 'transaction-type': 'ECO assessment', 'uprn': '100011776843', + 'current-energy-efficiency': '53', 'energy-consumption-current': '252', + 'mainheat-description': 'Air source heat pump, radiators, electric', 'lighting-cost-current': '67', + 'lodgement-date': '2022-06-06', 'extension-count': '1', 'mainheatc-env-eff': 'Very Good', + 'lmk-key': '672d5947f3d4a55d97255af71651d6127a939418fa66a687070af77e0ba90df2', 'wind-turbine-count': '0', + 'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '70', + 'hot-water-energy-eff': 'Very Poor', 'low-energy-lighting': '100', + 'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system' + } + + # differences = [] + # for k, v in ending_epc.items(): + # if v != starting_epc[k]: + # differences.append( + # { + # "variable": k, + # "starting_value": starting_epc[k], + # "ending_value": v + # } + # ) + # differences = pd.DataFrame(differences) + # + # diffs = differences[ + # differences["variable"].isin( + # [ + # "mainheat-energy-eff", + # "mainheatcont-description", + # "mainheatc-energy-eff", + # "main-fuel", + # "mainheat-env-eff", + # "mainheat-description", + # "hot-water-energy-eff", + # "hotwater-description" + # ] + # ) + # ] + + cleaning_data = read_dataframe_from_s3_parquet( + bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", + ) + + cleaned = read_from_s3( + s3_file_name="cleaned_epc_data/cleaned.bson", + bucket_name="retrofit-data-dev" + ) + cleaned = msgpack.unpackb(cleaned, raw=False) + + photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev") + + epc = EPCRecord( + epc_records={ + 'original_epc': starting_epc, + 'full_sap_epc': {}, + 'old_data': [] + }, + run_mode="newdata", + cleaning_data=cleaning_data + ) + + home = Property( + id=0, + address="", + postcode="", + epc_record=epc, + already_installed={}, + non_invasive_recommendations={}, + ) + home.in_conservation_area = False + home.is_listed = False + home.is_heritage = False + home.restricted_measures = True + home.get_components( + cleaned=cleaned, + photo_supply_lookup=photo_supply_lookup, + floor_area_decile_thresholds=floor_area_decile_thresholds + ) + + recommender = HeatingRecommender(property_instance=home) + recommender.recommend_air_source_heat_pump(phase=0, has_cavity_or_loft_recommendations=False) + + # Patch - for this property, the hot water energy efficiency is very poor. it's not clear why this is, + # but we insert this for this test + recommender.heating_recommendations[0]["simulation_config"]["hot_water_energy_eff_ending"] = "Very Poor" + + property_recommendations = Recommendations.insert_temp_recommendation_id([recommender.heating_recommendations]) + + assert len(recommender.heating_recommendations) == 1 + + home.create_base_difference_epc_record(cleaned_lookup=cleaned) + home.adjust_difference_record_with_recommendations( + property_recommendations, [] + ) + + scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop( + columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", + "carbon_ending"] + ) + + model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat()) + model_api.MODEL_PREFIXES = ["sap_change_predictions"] + + predictions_dict = model_api.predict_all( + df=scoring_data, + bucket="retrofit-data-dev", + prediction_buckets={ + "sap_change_predictions": "retrofit-sap-predictions-dev", + } + ) + assert predictions_dict["sap_change_predictions"]["predictions"].values[0] == 52.2 + + def test_air_source_heat_pump_gas_boiler_starting_2(self): + """ + This property seems to have miniscule movement in SAP - just 2 poins + :return: + """ + + starting_epc = { + 'low-energy-fixed-light-count': '', 'address': '31 Whinney Hill Park', 'uprn-source': 'Energy Assessor', + 'floor-height': '2.3', 'heating-cost-potential': '394', 'unheated-corridor-length': '', + 'hot-water-cost-potential': '48', 'construction-age-band': 'England and Wales: 1967-1975', + 'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average', + 'lighting-energy-eff': 'Good', 'environment-impact-potential': '87', + 'glazed-type': 'double glazing, unknown install date', 'heating-cost-current': '487', 'address3': '', + 'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'sheating-energy-eff': 'N/A', + 'property-type': 'Bungalow', 'local-authority-label': 'Calderdale', 'fixed-lighting-outlets-count': '5', + 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '86', + 'county': '', 'postcode': 'HD6 2PX', 'solar-water-heating-flag': 'N', 'constituency': 'E14000614', + 'co2-emissions-potential': '0.8', 'number-heated-rooms': '2', + 'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '105', + 'local-authority': 'E08000033', 'built-form': 'End-Terrace', 'number-open-fireplaces': '0', + 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2021-11-25', + 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '56', 'address1': '31 Whinney Hill Park', + 'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Calder Valley', + 'roof-energy-eff': 'Good', 'total-floor-area': '44.0', 'building-reference-number': '10001772583', + 'environment-impact-current': '62', 'co2-emissions-current': '2.5', + 'roof-description': 'Pitched, 250 mm loft insulation', 'floor-energy-eff': 'N/A', + 'number-habitable-rooms': '2', 'address2': '', 'hot-water-env-eff': 'Good', 'posttown': 'BRIGHOUSE', + 'mainheatc-energy-eff': 'Good', 'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Good', + 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A', + 'lighting-description': 'Low energy lighting in 60% of fixed outlets', 'roof-env-eff': 'Good', + 'walls-energy-eff': 'Average', 'photo-supply': '0.0', 'lighting-cost-potential': '40', + 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '', + 'lodgement-datetime': '2021-11-25 11:39:35', 'flat-top-storey': '', 'current-energy-rating': 'D', + 'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average', + 'transaction-type': 'rental', 'uprn': '100051304421', 'current-energy-efficiency': '62', + 'energy-consumption-current': '322', 'mainheat-description': 'Boiler and radiators, mains gas', + 'lighting-cost-current': '56', 'lodgement-date': '2021-11-25', 'extension-count': '0', + 'mainheatc-env-eff': 'Good', 'lmk-key': '077f70657e9c3f1f0ce5392798398398616b159493b2a8ca2338961596631c27', + 'wind-turbine-count': '0', 'tenure': 'Rented (social)', 'floor-level': '', + 'potential-energy-efficiency': '86', 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '60', + 'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system' + } + + ending_epc = { + 'low-energy-fixed-light-count': '', 'address': '31 Whinney Hill Park', + 'uprn-source': 'Energy Assessor', 'floor-height': '2.3', 'heating-cost-potential': '277', + 'unheated-corridor-length': '', 'hot-water-cost-potential': '266', + 'construction-age-band': 'England and Wales: 1967-1975', 'potential-energy-rating': 'B', + 'mainheat-energy-eff': 'Very Good', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Good', + 'environment-impact-potential': '90', 'glazed-type': 'double glazing, unknown install date', + 'heating-cost-current': '331', 'address3': '', + 'mainheatcont-description': 'Programmer and room thermostat', 'sheating-energy-eff': 'N/A', + 'property-type': 'Bungalow', 'local-authority-label': 'Calderdale', + 'fixed-lighting-outlets-count': '5', 'energy-tariff': 'Single', + 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '404', 'county': '', + 'postcode': 'HD6 2PX', 'solar-water-heating-flag': 'N', 'constituency': 'E14000614', + 'co2-emissions-potential': '0.7', 'number-heated-rooms': '2', + 'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '92', + 'local-authority': 'E08000033', 'built-form': 'End-Terrace', 'number-open-fireplaces': '0', + 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', + 'inspection-date': '2021-11-25', 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '48', + 'address1': '31 Whinney Hill Park', 'heat-loss-corridor': '', 'flat-storey-count': '', + 'constituency-label': 'Calder Valley', 'roof-energy-eff': 'Good', 'total-floor-area': '44.0', + 'building-reference-number': '10001772583', 'environment-impact-current': '68', + 'co2-emissions-current': '2.1', 'roof-description': 'Pitched, 250 mm loft insulation', + 'floor-energy-eff': 'N/A', 'number-habitable-rooms': '2', 'address2': '', + 'hot-water-env-eff': 'Poor', 'posttown': 'BRIGHOUSE', 'mainheatc-energy-eff': 'Average', + 'main-fuel': 'electricity (not community)', 'lighting-env-eff': 'Good', + 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A', + 'lighting-description': 'Low energy lighting in 60% of fixed outlets', 'roof-env-eff': 'Good', + 'walls-energy-eff': 'Average', 'photo-supply': '0.0', 'lighting-cost-potential': '40', + 'mainheat-env-eff': 'Very Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '', + 'lodgement-datetime': '2022-03-23 16:06:21', 'flat-top-storey': '', 'current-energy-rating': 'D', + 'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average', + 'transaction-type': 'rental', 'uprn': '100051304421', 'current-energy-efficiency': '64', + 'energy-consumption-current': '283', + 'mainheat-description': 'Air source heat pump, radiators, electric', + 'lighting-cost-current': '57', 'lodgement-date': '2022-03-23', 'extension-count': '0', + 'mainheatc-env-eff': 'Average', + 'lmk-key': '6296248141447b53426a40f1c39da17dad5f4786485db55ee38737891111a4d4', + 'wind-turbine-count': '0', 'tenure': 'Rented (social)', 'floor-level': '', + 'potential-energy-efficiency': '89', 'hot-water-energy-eff': 'Very Poor', + 'low-energy-lighting': '60', 'walls-description': 'Cavity wall, filled cavity', + 'hotwater-description': 'From main system' + } + + # differences = [] + # for k, v in ending_epc.items(): + # if v != starting_epc[k]: + # differences.append( + # { + # "variable": k, + # "starting_value": starting_epc[k], + # "ending_value": v + # } + # ) + # differences = pd.DataFrame(differences) + # + # diffs = differences[ + # differences["variable"].isin( + # [ + # "mainheat-energy-eff", + # "mainheatcont-description", + # "mainheatc-energy-eff", + # "main-fuel", + # "mainheat-env-eff", + # "mainheat-description", + # "hot-water-energy-eff", + # "hotwater-description" + # ] + # ) + # ] + + cleaning_data = read_dataframe_from_s3_parquet( + bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", + ) + + cleaned = read_from_s3( + s3_file_name="cleaned_epc_data/cleaned.bson", + bucket_name="retrofit-data-dev" + ) + cleaned = msgpack.unpackb(cleaned, raw=False) + + photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev") + + epc = EPCRecord( + epc_records={ + 'original_epc': starting_epc, + 'full_sap_epc': {}, + 'old_data': [] + }, + run_mode="newdata", + cleaning_data=cleaning_data + ) + + home = Property( + id=0, + address="", + postcode="", + epc_record=epc, + already_installed={}, + non_invasive_recommendations={}, + ) + home.in_conservation_area = False + home.is_listed = False + home.is_heritage = False + home.restricted_measures = True + home.get_components( + cleaned=cleaned, + photo_supply_lookup=photo_supply_lookup, + floor_area_decile_thresholds=floor_area_decile_thresholds + ) + + recommender = HeatingRecommender(property_instance=home) + recommender.recommend_air_source_heat_pump(phase=0, has_cavity_or_loft_recommendations=False) + property_recommendations = Recommendations.insert_temp_recommendation_id([recommender.heating_recommendations]) + + assert len(recommender.heating_recommendations) == 1 + + home.create_base_difference_epc_record(cleaned_lookup=cleaned) + home.adjust_difference_record_with_recommendations( + property_recommendations, [] + ) + + scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop( + columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", + "carbon_ending"] + ) + + model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat()) + model_api.MODEL_PREFIXES = ["sap_change_predictions"] + + predictions_dict = model_api.predict_all( + df=scoring_data, + bucket="retrofit-data-dev", + prediction_buckets={ + "sap_change_predictions": "retrofit-sap-predictions-dev", + } + ) + assert predictions_dict["sap_change_predictions"]["predictions"].values[0] == 69.3 + + # In actuality with this property, the heating controls get downgraded, so we test a manual patch of this + patched_simulation_config = { + 'mainheat_energy_eff_ending': "Very Good", + 'hot_water_energy_eff_ending': 'Very Poor', + 'has_boiler_ending': False, + 'has_air_source_heat_pump_ending': True, + 'has_electric_ending': True, + 'has_mains_gas_ending': False, + 'fuel_type_ending': 'electricity', + 'trvs_ending': None, + "mainheatc_energy_eff_ending": 'Average' + } + + # PATCHING + property_recommendations_patch = Recommendations.insert_temp_recommendation_id( + [recommender.heating_recommendations] + ) + property_recommendations_patch[0][0]["simulation_config"] = patched_simulation_config + + home.create_base_difference_epc_record(cleaned_lookup=cleaned) + home.adjust_difference_record_with_recommendations( + property_recommendations_patch, [] + ) + + scoring_data_patch = pd.DataFrame(home.recommendations_scoring_data).drop( + columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", + "carbon_ending"] + ) + + model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat()) + model_api.MODEL_PREFIXES = ["sap_change_predictions"] + + predictions_dict_patch = model_api.predict_all( + df=scoring_data_patch, + bucket="retrofit-data-dev", + prediction_buckets={ + "sap_change_predictions": "retrofit-sap-predictions-dev", + } + ) + # The error is only 0.3, so the model is working + assert predictions_dict_patch["sap_change_predictions"]["predictions"].values[0] == 64.3 + assert ending_epc["current-energy-efficiency"] == '64' + + def test_air_source_heat_pump_lpg_boiler(self): + starting_epc = { + 'low-energy-fixed-light-count': '', 'address': 'Holly Lodge, The Drive, Perry', + 'uprn-source': 'Energy Assessor', 'floor-height': '2.8', 'heating-cost-potential': '1628', + 'unheated-corridor-length': '', 'hot-water-cost-potential': '175', + 'construction-age-band': 'England and Wales: 1950-1966', 'potential-energy-rating': 'D', + 'mainheat-energy-eff': 'Poor', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Average', + 'environment-impact-potential': '70', 'glazed-type': 'double glazing, unknown install date', + 'heating-cost-current': '2158', 'address3': 'Perry', + 'mainheatcont-description': 'No time or thermostatic control of room temperature', + 'sheating-energy-eff': 'N/A', 'property-type': 'Bungalow', 'local-authority-label': 'Huntingdonshire', + 'fixed-lighting-outlets-count': '12', 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', + 'hot-water-cost-current': '257', 'county': 'Cambridgeshire', 'postcode': 'PE28 0SX', + 'solar-water-heating-flag': 'N', 'constituency': 'E14000757', 'co2-emissions-potential': '3.3', + 'number-heated-rooms': '5', 'floor-description': 'Solid, no insulation (assumed)', + 'energy-consumption-potential': '128', 'local-authority': 'E07000011', 'built-form': 'Semi-Detached', + 'number-open-fireplaces': '0', 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', + 'inspection-date': '2023-08-31', 'mains-gas-flag': 'N', 'co2-emiss-curr-per-floor-area': '51', + 'address1': 'Holly Lodge', 'heat-loss-corridor': '', 'flat-storey-count': '', + 'constituency-label': 'Huntingdon', 'roof-energy-eff': 'Good', 'total-floor-area': '117.0', + 'building-reference-number': '10005199915', 'environment-impact-current': '50', + 'co2-emissions-current': '5.9', 'roof-description': 'Pitched, 270 mm loft insulation', + 'floor-energy-eff': 'N/A', 'number-habitable-rooms': '5', 'address2': 'The Drive', + 'hot-water-env-eff': 'Good', 'posttown': 'HUNTINGDON', 'mainheatc-energy-eff': 'Very Poor', + 'main-fuel': 'LPG (not community)', 'lighting-env-eff': 'Average', 'windows-energy-eff': 'Average', + 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A', + 'lighting-description': 'Low energy lighting in 33% of fixed outlets', 'roof-env-eff': 'Good', + 'walls-energy-eff': 'Average', 'photo-supply': '0.0', 'lighting-cost-potential': '166', + 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '', + 'lodgement-datetime': '2023-10-30 13:46:54', 'flat-top-storey': '', 'current-energy-rating': 'F', + 'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average', + 'transaction-type': 'ECO assessment', 'uprn': '100091200828', 'current-energy-efficiency': '32', + 'energy-consumption-current': '243', 'mainheat-description': 'Boiler and radiators, LPG', + 'lighting-cost-current': '277', 'lodgement-date': '2023-10-30', 'extension-count': '0', + 'mainheatc-env-eff': 'Very Poor', + 'lmk-key': 'f1d3bd4b8b50bc9b006231ccb158537c408523b748b3f4ef7e98cd03b144afa5', 'wind-turbine-count': '0', + 'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '56', + 'hot-water-energy-eff': 'Poor', 'low-energy-lighting': '33', + 'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system' + } + + ending_epc = { + 'low-energy-fixed-light-count': '', 'address': 'Holly Lodge, The Drive, Perry', + 'uprn-source': 'Energy Assessor', 'floor-height': '2.8', 'heating-cost-potential': '917', + 'unheated-corridor-length': '', 'hot-water-cost-potential': '328', + 'construction-age-band': 'England and Wales: 1950-1966', 'potential-energy-rating': 'A', + 'mainheat-energy-eff': 'Very Good', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Average', + 'environment-impact-potential': '96', 'glazed-type': 'double glazing, unknown install date', + 'heating-cost-current': '1098', 'address3': 'Perry', + 'mainheatcont-description': 'Programmer, TRVs and bypass', 'sheating-energy-eff': 'N/A', + 'property-type': 'Bungalow', 'local-authority-label': 'Huntingdonshire', + 'fixed-lighting-outlets-count': '12', 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', + 'hot-water-cost-current': '328', 'county': 'Cambridgeshire', 'postcode': 'PE28 0SX', + 'solar-water-heating-flag': 'N', 'constituency': 'E14000757', 'co2-emissions-potential': '0.3', + 'number-heated-rooms': '5', 'floor-description': 'Solid, no insulation (assumed)', + 'energy-consumption-potential': '16', 'local-authority': 'E07000011', 'built-form': 'Semi-Detached', + 'number-open-fireplaces': '0', 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', + 'inspection-date': '2023-10-05', 'mains-gas-flag': 'N', 'co2-emiss-curr-per-floor-area': '6', + 'address1': 'Holly Lodge', 'heat-loss-corridor': '', 'flat-storey-count': '', + 'constituency-label': 'Huntingdon', 'roof-energy-eff': 'Good', 'total-floor-area': '117.0', + 'building-reference-number': '10005199915', 'environment-impact-current': '92', + 'co2-emissions-current': '0.7', 'roof-description': 'Pitched, 270 mm loft insulation', + 'floor-energy-eff': 'N/A', 'number-habitable-rooms': '5', 'address2': 'The Drive', + 'hot-water-env-eff': 'Very Good', 'posttown': 'HUNTINGDON', 'mainheatc-energy-eff': 'Average', + 'main-fuel': 'electricity (not community)', 'lighting-env-eff': 'Average', 'windows-energy-eff': 'Average', + 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A', + 'lighting-description': 'Low energy lighting in 33% of fixed outlets', 'roof-env-eff': 'Good', + 'walls-energy-eff': 'Average', 'photo-supply': '', 'lighting-cost-potential': '166', + 'mainheat-env-eff': 'Very Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '', + 'lodgement-datetime': '2023-11-01 16:29:16', 'flat-top-storey': '', 'current-energy-rating': 'A', + 'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average', + 'transaction-type': 'ECO assessment', 'uprn': '100091200828', 'current-energy-efficiency': '92', + 'energy-consumption-current': '37', 'mainheat-description': 'Air source heat pump, radiators, electric', + 'lighting-cost-current': '277', 'lodgement-date': '2023-11-01', 'extension-count': '0', + 'mainheatc-env-eff': 'Average', + 'lmk-key': 'cb7f2838b727907767c8c2a385cd22f722b1e4745463391d910d228e52124515', 'wind-turbine-count': '0', + 'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '95', + 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '33', + 'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system' + } + + cleaning_data = read_dataframe_from_s3_parquet( + bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", + ) + + cleaned = read_from_s3( + s3_file_name="cleaned_epc_data/cleaned.bson", + bucket_name="retrofit-data-dev" + ) + cleaned = msgpack.unpackb(cleaned, raw=False) + + photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev") + + epc = EPCRecord( + epc_records={ + 'original_epc': starting_epc, + 'full_sap_epc': {}, + 'old_data': [] + }, + run_mode="newdata", + cleaning_data=cleaning_data + ) + + home = Property( + id=0, + address="", + postcode="", + epc_record=epc, + already_installed={}, + non_invasive_recommendations={}, + ) + home.in_conservation_area = False + home.is_listed = False + home.is_heritage = False + home.restricted_measures = True + home.get_components( + cleaned=cleaned, + photo_supply_lookup=photo_supply_lookup, + floor_area_decile_thresholds=floor_area_decile_thresholds + ) + + recommender = HeatingRecommender(property_instance=home) + recommender.recommend_air_source_heat_pump(phase=0, has_cavity_or_loft_recommendations=False) + property_recommendations = Recommendations.insert_temp_recommendation_id([recommender.heating_recommendations]) + + assert len(recommender.heating_recommendations) == 1 + + home.create_base_difference_epc_record(cleaned_lookup=cleaned) + home.adjust_difference_record_with_recommendations( + property_recommendations, [] + ) + + scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop( + columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", + "carbon_ending"] + ) + + model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat()) + model_api.MODEL_PREFIXES = ["sap_change_predictions"] + + predictions_dict = model_api.predict_all( + df=scoring_data, + bucket="retrofit-data-dev", + prediction_buckets={ + "sap_change_predictions": "retrofit-sap-predictions-dev", + } + ) + # We predict a huge uplift but not quite as much as the EPC, due to some distinct differences between our + # recommendation and the EPC + assert predictions_dict["sap_change_predictions"]["predictions"].values[0] == 81.3 + assert ending_epc['current-energy-efficiency'] == '92' + + # PATCH + # We patch the simulation config, to reflect the ending EPC, to see if we get the ending EPC's config + patched_simulation_config = { + 'mainheat_energy_eff_ending': "Very Good", + 'hot_water_energy_eff_ending': 'Good', + 'has_boiler_ending': False, + 'has_air_source_heat_pump_ending': True, + 'has_electric_ending': True, + 'has_lpg_ending': False, + 'fuel_type_ending': 'electricity', + 'switch_system_ending': 'programmer', + 'no_control_ending': None, + 'auxiliary_systems_ending': 'bypass', + 'trvs_ending': 'trvs', + "mainheatc_energy_eff_ending": 'Average' + } + + # PATCHING + property_recommendations_patch = Recommendations.insert_temp_recommendation_id( + [recommender.heating_recommendations] + ) + property_recommendations_patch[0][0]["simulation_config"] = patched_simulation_config + + home.create_base_difference_epc_record(cleaned_lookup=cleaned) + home.adjust_difference_record_with_recommendations( + property_recommendations_patch, [] + ) + + scoring_data_patch = pd.DataFrame(home.recommendations_scoring_data).drop( + columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", + "carbon_ending"] + ) + + model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat()) + model_api.MODEL_PREFIXES = ["sap_change_predictions"] + + predictions_dict_patch = model_api.predict_all( + df=scoring_data_patch, + bucket="retrofit-data-dev", + prediction_buckets={ + "sap_change_predictions": "retrofit-sap-predictions-dev", + } + ) + + assert predictions_dict_patch["sap_change_predictions"]["predictions"].values[0] == 88.9 + # We still underpredict but the improvement is notable + + def test_offgrid(self): + """ + We test on a property we've worked with before, where we compare two options + a) Upgrading to a boiler + b) Upgrading to a heat pump + :return: + """ + + starting_epc = { + 'low-energy-fixed-light-count': '', 'address': '6 Beech Road', 'uprn-source': 'Energy Assessor', + 'floor-height': '2.4', 'heating-cost-potential': '612', 'unheated-corridor-length': '', + 'hot-water-cost-potential': '123', 'construction-age-band': 'England and Wales: 1930-1949', + 'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Very Poor', 'windows-env-eff': 'Good', + 'lighting-energy-eff': 'Good', 'environment-impact-potential': '87', + 'glazed-type': 'double glazing installed during or after 2002', 'heating-cost-current': '2278', + 'address3': '', 'mainheatcont-description': 'Appliance thermostats', 'sheating-energy-eff': 'N/A', + 'property-type': 'House', 'local-authority-label': 'Dudley', 'fixed-lighting-outlets-count': '9', + 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '604', + 'county': '', 'postcode': 'DY1 4BP', 'solar-water-heating-flag': 'N', 'constituency': 'E14000671', + 'co2-emissions-potential': '1.0', 'number-heated-rooms': '4', + 'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '93', + 'local-authority': 'E08000027', 'built-form': 'End-Terrace', 'number-open-fireplaces': '0', + 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2024-03-13', + 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '83', 'address1': '6 Beech Road', + 'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Dudley North', + 'roof-energy-eff': 'Very Poor', 'total-floor-area': '60.0', 'building-reference-number': '10005780080', + 'environment-impact-current': '41', 'co2-emissions-current': '5.0', + 'roof-description': 'Pitched, 12 mm loft insulation', 'floor-energy-eff': 'N/A', + 'number-habitable-rooms': '4', 'address2': '', 'hot-water-env-eff': 'Poor', 'posttown': 'DUDLEY', + 'mainheatc-energy-eff': 'Good', 'main-fuel': 'electricity (not community)', 'lighting-env-eff': 'Good', + 'windows-energy-eff': 'Good', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A', + 'lighting-description': 'Low energy lighting in 67% of fixed outlets', 'roof-env-eff': 'Very Poor', + 'walls-energy-eff': 'Average', 'photo-supply': '0.0', 'lighting-cost-potential': '113', + 'mainheat-env-eff': 'Poor', 'multi-glaze-proportion': '100', 'main-heating-controls': '', + 'lodgement-datetime': '2024-03-13 11:29:11', 'flat-top-storey': '', 'current-energy-rating': 'F', + 'secondheat-description': 'None', 'walls-env-eff': 'Average', 'transaction-type': 'rental', + 'uprn': '90055152', 'current-energy-efficiency': '32', 'energy-consumption-current': '491', + 'mainheat-description': 'Room heaters, electric', 'lighting-cost-current': '113', + 'lodgement-date': '2024-03-13', 'extension-count': '1', 'mainheatc-env-eff': 'Good', + 'lmk-key': '78ddf851b660e599a0894924d0e6b503980f5e0ad1aa711f8411718dc2989c44', 'wind-turbine-count': '0', + 'tenure': 'Rented (social)', 'floor-level': '', 'potential-energy-efficiency': '87', + 'hot-water-energy-eff': 'Very Poor', 'low-energy-lighting': '67', + 'walls-description': 'Cavity wall, filled cavity', + 'hotwater-description': 'Electric immersion, standard tariff' + } + + cleaning_data = read_dataframe_from_s3_parquet( + bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", + ) + + cleaned = read_from_s3( + s3_file_name="cleaned_epc_data/cleaned.bson", + bucket_name="retrofit-data-dev" + ) + cleaned = msgpack.unpackb(cleaned, raw=False) + + photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev") + + epc = EPCRecord( + epc_records={ + 'original_epc': starting_epc, + 'full_sap_epc': {}, + 'old_data': [] + }, + run_mode="newdata", + cleaning_data=cleaning_data + ) + + home = Property( + id=0, + address="", + postcode="", + epc_record=epc, + already_installed={}, + non_invasive_recommendations={}, + ) + home.in_conservation_area = False + home.is_listed = False + home.is_heritage = False + home.restricted_measures = True + home.get_components( + cleaned=cleaned, + photo_supply_lookup=photo_supply_lookup, + floor_area_decile_thresholds=floor_area_decile_thresholds + ) + + recommender = HeatingRecommender(property_instance=home) + recommender.recommend_air_source_heat_pump(phase=0, has_cavity_or_loft_recommendations=False) + recommender.recommend_boiler_upgrades(phase=0, system_change=True, exising_room_heaters=False) + + assert len(recommender.heating_recommendations) == 3 + + property_recommendations = Recommendations.insert_temp_recommendation_id([recommender.heating_recommendations]) + + home.create_base_difference_epc_record(cleaned_lookup=cleaned) + home.adjust_difference_record_with_recommendations( + property_recommendations, [] + ) + + scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop( + columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", + "carbon_ending"] + ) + + model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat()) + model_api.MODEL_PREFIXES = ["sap_change_predictions"] + + predictions_dict = model_api.predict_all( + df=scoring_data, + bucket="retrofit-data-dev", + prediction_buckets={ + "sap_change_predictions": "retrofit-sap-predictions-dev", + } + ) + + # The ASHP isn't better under SAP, compared to a gas boiler with good heat controls + assert predictions_dict["sap_change_predictions"]["predictions"].tolist() == [66.9, 65.5, 65.9] diff --git a/recommendations/tests/test_solar_pv_recommendations.py b/recommendations/tests/test_solar_pv_recommendations.py index 5481cb17..fbbfe3a1 100644 --- a/recommendations/tests/test_solar_pv_recommendations.py +++ b/recommendations/tests/test_solar_pv_recommendations.py @@ -2,6 +2,13 @@ import pytest from recommendations.SolarPvRecommendations import SolarPvRecommendations from backend.Property import Property from etl.epc.Record import EPCRecord +import pandas as pd +from datetime import datetime +from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3 +from etl.solar.SolarPhotoSupply import SolarPhotoSupply +from recommendations.Recommendations import Recommendations +from backend.ml_models.api import ModelApi +import msgpack class TestSolarPvRecommendations: @@ -82,3 +89,321 @@ class TestSolarPvRecommendations: 'photo_supply': 4000 } ] + + def test_model(self): + """ + This function tests the recommendation engine, in conjunction with the model + :return: + """ + + starting_epc = { + 'low-energy-fixed-light-count': '', 'address': '27 Cromwell Street', 'uprn-source': 'Energy Assessor', + 'floor-height': '2.5', 'heating-cost-potential': '443', 'unheated-corridor-length': '', + 'hot-water-cost-potential': '53', 'construction-age-band': 'England and Wales: before 1900', + 'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average', + 'lighting-energy-eff': 'Very Poor', 'environment-impact-potential': '85', + 'glazed-type': 'double glazing installed before 2002', 'heating-cost-current': '904', 'address3': '', + 'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'sheating-energy-eff': 'N/A', + 'property-type': 'House', 'local-authority-label': 'West Lindsey', 'fixed-lighting-outlets-count': '10', + 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '79', + 'county': 'Lincolnshire', 'postcode': 'DN21 1DH', 'solar-water-heating-flag': 'N', + 'constituency': 'E14000707', 'co2-emissions-potential': '1.5', 'number-heated-rooms': '5', + 'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '92', + 'local-authority': 'E07000142', 'built-form': 'Mid-Terrace', 'number-open-fireplaces': '0', + 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2021-11-17', + 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '61', 'address1': '27 Cromwell Street', + 'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Gainsborough', + 'roof-energy-eff': 'Very Poor', 'total-floor-area': '89.0', 'building-reference-number': '10001989430', + 'environment-impact-current': '47', 'co2-emissions-current': '5.4', + 'roof-description': 'Pitched, no insulation (assumed)', 'floor-energy-eff': 'N/A', + 'number-habitable-rooms': '5', 'address2': '', 'hot-water-env-eff': 'Good', 'posttown': 'GAINSBOROUGH', + 'mainheatc-energy-eff': 'Good', 'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Very Poor', + 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A', + 'lighting-description': 'No low energy lighting', 'roof-env-eff': 'Very Poor', + 'walls-energy-eff': 'Very Poor', 'photo-supply': '0.0', 'lighting-cost-potential': '72', + 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '', + 'lodgement-datetime': '2021-12-01 10:12:23', 'flat-top-storey': '', 'current-energy-rating': 'E', + 'secondheat-description': 'Room heaters, mains gas', 'walls-env-eff': 'Very Poor', + 'transaction-type': 'ECO assessment', 'uprn': '100030949912', 'current-energy-efficiency': '54', + 'energy-consumption-current': '346', 'mainheat-description': 'Boiler and radiators, mains gas', + 'lighting-cost-current': '144', 'lodgement-date': '2021-12-01', 'extension-count': '2', + 'mainheatc-env-eff': 'Good', 'lmk-key': '3ec5533af02ec78361c1f9bea8dd2e878c2c6fa6cf59e5cc505c3eeb038e0f91', + 'wind-turbine-count': '0', 'tenure': 'Owner-occupied', 'floor-level': '', + 'potential-energy-efficiency': '86', 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '0', + 'walls-description': 'Solid brick, as built, no insulation (assumed)', + 'hotwater-description': 'From main system' + } + + ending_epc = { + 'low-energy-fixed-light-count': '', 'address': '27 Cromwell Street', 'uprn-source': 'Energy Assessor', + 'floor-height': '2.5', 'heating-cost-potential': '443', 'unheated-corridor-length': '', + 'hot-water-cost-potential': '53', 'construction-age-band': 'England and Wales: before 1900', + 'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average', + 'lighting-energy-eff': 'Very Poor', 'environment-impact-potential': '86', + 'glazed-type': 'double glazing installed before 2002', 'heating-cost-current': '904', 'address3': '', + 'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'sheating-energy-eff': 'N/A', + 'property-type': 'House', 'local-authority-label': 'West Lindsey', 'fixed-lighting-outlets-count': '10', + 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '79', + 'county': 'Lincolnshire', 'postcode': 'DN21 1DH', 'solar-water-heating-flag': 'N', + 'constituency': 'E14000707', 'co2-emissions-potential': '1.4', 'number-heated-rooms': '5', + 'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '84', + 'local-authority': 'E07000142', 'built-form': 'Mid-Terrace', 'number-open-fireplaces': '0', + 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2021-12-21', + 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '49', 'address1': '27 Cromwell Street', + 'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Gainsborough', + 'roof-energy-eff': 'Very Poor', 'total-floor-area': '89.0', 'building-reference-number': '10001989430', + 'environment-impact-current': '55', 'co2-emissions-current': '4.4', + 'roof-description': 'Pitched, no insulation (assumed)', 'floor-energy-eff': 'N/A', + 'number-habitable-rooms': '5', 'address2': '', 'hot-water-env-eff': 'Good', 'posttown': 'GAINSBOROUGH', + 'mainheatc-energy-eff': 'Good', 'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Very Poor', + 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A', + 'lighting-description': 'No low energy lighting', 'roof-env-eff': 'Very Poor', + 'walls-energy-eff': 'Very Poor', 'photo-supply': '50.0', 'lighting-cost-potential': '72', + 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '', + 'lodgement-datetime': '2021-12-21 17:33:09', 'flat-top-storey': '', 'current-energy-rating': 'D', + 'secondheat-description': 'Room heaters, mains gas', 'walls-env-eff': 'Very Poor', + 'transaction-type': 'ECO assessment', 'uprn': '100030949912', 'current-energy-efficiency': '65', + 'energy-consumption-current': '277', 'mainheat-description': 'Boiler and radiators, mains gas', + 'lighting-cost-current': '144', 'lodgement-date': '2021-12-21', 'extension-count': '2', + 'mainheatc-env-eff': 'Good', 'lmk-key': 'b0b19583c59afbc69db12f4d6c98cd8837e80da3214d577c426eb3e672d424fc', + 'wind-turbine-count': '0', 'tenure': 'Owner-occupied', 'floor-level': '', + 'potential-energy-efficiency': '88', 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '0', + 'walls-description': 'Solid brick, as built, no insulation (assumed)', + 'hotwater-description': 'From main system' + } + + cleaning_data = read_dataframe_from_s3_parquet( + bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", + ) + + cleaned = read_from_s3( + s3_file_name="cleaned_epc_data/cleaned.bson", + bucket_name="retrofit-data-dev" + ) + cleaned = msgpack.unpackb(cleaned, raw=False) + + photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev") + + epc = EPCRecord( + epc_records={ + 'original_epc': starting_epc, + 'full_sap_epc': {}, + 'old_data': [] + }, + run_mode="newdata", + cleaning_data=cleaning_data + ) + + home = Property( + id=0, + address="", + postcode="", + epc_record=epc, + already_installed={}, + non_invasive_recommendations={}, + ) + home.in_conservation_area = False + home.is_listed = False + home.is_heritage = False + home.restricted_measures = True + home.get_components( + cleaned=cleaned, + photo_supply_lookup=photo_supply_lookup, + floor_area_decile_thresholds=floor_area_decile_thresholds + ) + + recommender = SolarPvRecommendations(property_instance=home) + recommender.recommend(phase=0) + + coverage_50_percent = [x for x in recommender.recommendation if x["photo_supply"] == 50] + assert len(coverage_50_percent) == 2 + + property_recommendations = Recommendations.insert_temp_recommendation_id([coverage_50_percent]) + + home.create_base_difference_epc_record(cleaned_lookup=cleaned) + home.adjust_difference_record_with_recommendations( + property_recommendations, [] + ) + + scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop( + columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", + "carbon_ending"] + ) + + model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat()) + model_api.MODEL_PREFIXES = ["sap_change_predictions"] + + predictions_dict = model_api.predict_all( + df=scoring_data, + bucket="retrofit-data-dev", + prediction_buckets={ + "sap_change_predictions": "retrofit-sap-predictions-dev", + } + ) + + assert predictions_dict["sap_change_predictions"]["predictions"].tolist() == [65.9, 65.9] + assert ending_epc["current-energy-efficiency"] == '65' + + def test_model2(self): + data[["uprn", "sap_ending"]] + # + + searcher = SearchEpc( + address1="", + postcode="", + auth_token="a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA=", + os_api_key="", + full_address="", + uprn=100030952942, + ) + searcher.find_property(False) + + ending_epc = { + 'low-energy-fixed-light-count': '', 'address': '6 Kenmare Crescent', + 'uprn-source': 'Energy Assessor', 'floor-height': '2.49', 'heating-cost-potential': '464', + 'unheated-corridor-length': '', 'hot-water-cost-potential': '46', + 'construction-age-band': 'England and Wales: 1967-1975', 'potential-energy-rating': 'B', + 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Very Good', + 'environment-impact-potential': '91', 'glazed-type': 'not defined', 'heating-cost-current': '535', + 'address3': '', 'mainheatcont-description': 'Programmer, room thermostat and TRVs', + 'sheating-energy-eff': 'N/A', 'property-type': 'Bungalow', + 'local-authority-label': 'West Lindsey', 'fixed-lighting-outlets-count': '9', + 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '69', + 'county': 'Lincolnshire', 'postcode': 'DN21 1PR', 'solar-water-heating-flag': 'N', + 'constituency': 'E14000707', 'co2-emissions-potential': '0.7', 'number-heated-rooms': '3', + 'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '56', + 'local-authority': 'E07000142', 'built-form': 'Semi-Detached', 'number-open-fireplaces': '0', + 'windows-description': 'Fully double glazed', 'glazed-area': 'Much More Than Typical', + 'inspection-date': '2022-08-24', 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '18', + 'address1': '6 Kenmare Crescent', 'heat-loss-corridor': '', 'flat-storey-count': '', + 'constituency-label': 'Gainsborough', 'roof-energy-eff': 'Very Good', 'total-floor-area': '66.0', + 'building-reference-number': '10002845316', 'environment-impact-current': '85', + 'co2-emissions-current': '1.2', 'roof-description': 'Pitched, 300 mm loft insulation', + 'floor-energy-eff': 'N/A', 'number-habitable-rooms': '3', 'address2': '', + 'hot-water-env-eff': 'Good', 'posttown': 'GAINSBOROUGH', 'mainheatc-energy-eff': 'Good', + 'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Very Good', + 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A', + 'lighting-description': 'Low energy lighting in all fixed outlets', 'roof-env-eff': 'Very Good', + 'walls-energy-eff': 'Average', 'photo-supply': '40.0', 'lighting-cost-potential': '65', + 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '', + 'lodgement-datetime': '2022-08-24 15:39:42', 'flat-top-storey': '', 'current-energy-rating': 'B', + 'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average', + 'transaction-type': 'ECO assessment', 'uprn': '100030952942', 'current-energy-efficiency': '87', + 'energy-consumption-current': '100', 'mainheat-description': 'Boiler and radiators, mains gas', + 'lighting-cost-current': '65', 'lodgement-date': '2022-08-24', 'extension-count': '0', + 'mainheatc-env-eff': 'Good', + 'lmk-key': 'e20be883431b1fed15db7fa1f52634fb7655d2b80c2fdad37df779f93ec4dafd', + 'wind-turbine-count': '0', 'tenure': 'Owner-occupied', 'floor-level': '', + 'potential-energy-efficiency': '91', 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '100', + 'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system' + } + starting_epc = { + 'low-energy-fixed-light-count': '', 'address': '6 Kenmare Crescent', 'uprn-source': 'Energy Assessor', + 'floor-height': '2.49', 'heating-cost-potential': '464', 'unheated-corridor-length': '', + 'hot-water-cost-potential': '46', 'construction-age-band': 'England and Wales: 1967-1975', + 'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average', + 'lighting-energy-eff': 'Very Good', 'environment-impact-potential': '85', 'glazed-type': 'not defined', + 'heating-cost-current': '535', 'address3': '', + 'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'sheating-energy-eff': 'N/A', + 'property-type': 'Bungalow', 'local-authority-label': 'West Lindsey', 'fixed-lighting-outlets-count': '9', + 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '69', + 'county': 'Lincolnshire', 'postcode': 'DN21 1PR', 'solar-water-heating-flag': 'N', + 'constituency': 'E14000707', 'co2-emissions-potential': '1.2', 'number-heated-rooms': '3', + 'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '102', + 'local-authority': 'E07000142', 'built-form': 'Semi-Detached', 'number-open-fireplaces': '0', + 'windows-description': 'Fully double glazed', 'glazed-area': 'Much More Than Typical', + 'inspection-date': '2022-05-31', 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '40', + 'address1': '6 Kenmare Crescent', 'heat-loss-corridor': '', 'flat-storey-count': '', + 'constituency-label': 'Gainsborough', 'roof-energy-eff': 'Very Good', 'total-floor-area': '66.0', + 'building-reference-number': '10002845316', 'environment-impact-current': '68', + 'co2-emissions-current': '2.6', 'roof-description': 'Pitched, 300 mm loft insulation', + 'floor-energy-eff': 'N/A', 'number-habitable-rooms': '3', 'address2': '', 'hot-water-env-eff': 'Good', + 'posttown': 'GAINSBOROUGH', 'mainheatc-energy-eff': 'Good', 'main-fuel': 'mains gas (not community)', + 'lighting-env-eff': 'Very Good', 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', + 'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in all fixed outlets', + 'roof-env-eff': 'Very Good', 'walls-energy-eff': 'Average', 'photo-supply': '0.0', + 'lighting-cost-potential': '65', 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', + 'main-heating-controls': '', 'lodgement-datetime': '2022-06-15 08:38:02', 'flat-top-storey': '', + 'current-energy-rating': 'D', 'secondheat-description': 'Room heaters, electric', + 'walls-env-eff': 'Average', 'transaction-type': 'ECO assessment', 'uprn': '100030952942', + 'current-energy-efficiency': '68', 'energy-consumption-current': '227', + 'mainheat-description': 'Boiler and radiators, mains gas', 'lighting-cost-current': '65', + 'lodgement-date': '2022-06-15', 'extension-count': '0', 'mainheatc-env-eff': 'Good', + 'lmk-key': 'ce181970b7077cb9b4626242bfb010b30a0e48541b5f22427e81f1adbeeec4f2', 'wind-turbine-count': '0', + 'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '85', + 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '100', + 'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system' + } + + cleaning_data = read_dataframe_from_s3_parquet( + bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", + ) + + cleaned = read_from_s3( + s3_file_name="cleaned_epc_data/cleaned.bson", + bucket_name="retrofit-data-dev" + ) + cleaned = msgpack.unpackb(cleaned, raw=False) + + photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev") + + epc = EPCRecord( + epc_records={ + 'original_epc': starting_epc, + 'full_sap_epc': {}, + 'old_data': [] + }, + run_mode="newdata", + cleaning_data=cleaning_data + ) + + home = Property( + id=0, + address="", + postcode="", + epc_record=epc, + already_installed={}, + non_invasive_recommendations={}, + ) + home.in_conservation_area = False + home.is_listed = False + home.is_heritage = False + home.restricted_measures = True + home.get_components( + cleaned=cleaned, + photo_supply_lookup=photo_supply_lookup, + floor_area_decile_thresholds=floor_area_decile_thresholds + ) + + recommender = SolarPvRecommendations(property_instance=home) + recommender.recommend(phase=0) + + coverage_40_percent = [x for x in recommender.recommendation if x["photo_supply"] == 40] + assert len(coverage_40_percent) == 2 + + property_recommendations = Recommendations.insert_temp_recommendation_id([coverage_40_percent]) + + home.create_base_difference_epc_record(cleaned_lookup=cleaned) + home.adjust_difference_record_with_recommendations( + property_recommendations, [] + ) + + scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop( + columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", + "carbon_ending"] + ) + + model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat()) + model_api.MODEL_PREFIXES = ["sap_change_predictions"] + + predictions_dict = model_api.predict_all( + df=scoring_data, + bucket="retrofit-data-dev", + prediction_buckets={ + "sap_change_predictions": "retrofit-sap-predictions-dev", + } + ) + + assert predictions_dict["sap_change_predictions"]["predictions"].tolist() == [87.1, 87.1] + assert ending_epc["current-energy-efficiency"] == '87' + assert starting_epc["current-energy-efficiency"] == '68'