diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
-
+
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
-
+
diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index 44178792..db9ec4ff 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -193,33 +193,32 @@ class SearchEpc:
@classmethod
def get_house_number(cls, address: str) -> str | None:
"""
- This method will use the usaddress library to parse an address and extract the house number
- :return:
+ This method uses the usaddress library to parse an address and extract the primary house or flat number.
"""
+ try:
- parsed = usaddress.parse(address)
- parsed_house_number = [x for x in parsed if (x[1] == "AddressNumber")]
- parsed_house_number = parsed_house_number[0][0] if parsed_house_number else None
-
- if parsed_house_number is None:
- # Because usaddress isn't optimal for parsing addresses with some prefixes such as 'Flat',
- # we also add a custom approach
-
- # Pattern to look for 'Flat' or 'Apartment' followed by a number, or just a number at the beginning
+ # Custom regex to catch a broad range of cases
pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)'
-
match = re.search(pattern, address)
-
if match:
- # Return the first non-None group found
return next(g for g in match.groups() if g is not None)
- else:
- return None
- # Remove training commas
- parsed_house_number = parsed_house_number.replace(",", "")
+ parsed = usaddress.parse(address)
+ # First, try to get the 'OccupancyIdentifier' if 'OccupancyType' is detected
+ for part, type_ in parsed:
+ if type_ == 'OccupancyIdentifier':
+ return part # This assumes the first 'OccupancyIdentifier' after 'OccupancyType' is the primary
+ # number
- return parsed_house_number
+ # Fallback to 'AddressNumber' if no 'OccupancyIdentifier' is found
+ address_number = next((part for part, type_ in parsed if type_ == 'AddressNumber'), None)
+ if address_number:
+ return address_number.replace(",", "") # Remove any trailing commas
+
+ except Exception as e:
+ print(f"Error parsing address: {e}")
+
+ return None
@staticmethod
def extract_numeric_housenumber_part(house_number: str | None) -> int | None:
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index ebaf482d..06d1aadf 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -282,16 +282,16 @@ async def trigger_plan(body: PlanTriggerRequest):
property_id, is_new = create_property(
session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
)
- # if not is_new:
- # continue
- #
- # create_property_targets(
- # session,
- # property_id=property_id,
- # portfolio_id=body.portfolio_id,
- # epc_target=body.goal_value,
- # heat_demand_target=None
- # )
+ if not is_new:
+ continue
+
+ create_property_targets(
+ session,
+ property_id=property_id,
+ portfolio_id=body.portfolio_id,
+ epc_target=body.goal_value,
+ heat_demand_target=None
+ )
epc_records = {
'original_epc': epc_searcher.newest_epc.copy(),
diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py
index 39ea5a98..5c781979 100644
--- a/backend/ml_models/Valuation.py
+++ b/backend/ml_models/Valuation.py
@@ -71,6 +71,14 @@ class PropertyValuation:
90013980: 148_000, # Based on Zoopla
90087154: 184_000, # Based on Zoopla
90046817: 167_000, # Based on Zoopla
+ # Goldman Sachs Pilot for inrto - search by going to https://www.zoopla.co.uk/property/uprn/{uprn}/
+ 100070358888: 153_000, # Based on Zoopla
+ 10090436544: 282_000, # Based on Zoopla
+ 100070365751: 177_000, # Based on Zoopla
+ 10095952767: 168_000, # Based on Zoopla
+ 100070520130: 177_000, # Based on Zoopla
+ 100070333957: 185_000, # Based on Zoopla
+ 100070543258: 211_000, # Based on Zoopla
}
# We base our valuation uplifts on a number of sources
@@ -108,6 +116,29 @@ class PropertyValuation:
# {"start": "D", "end": "A", "increase_percentage": 0.017},
]
+ # Found here: https://www.rightmove.co.uk/news/articles/property-news/green-premium-epc-ratings/
+ # F -> C is + 15%
+ # E -> C is +7%
+ # D -> C is +3%
+ RIGHTMOVE_MAPPING = [
+ {"start": "G", "end": "C", "increase_percentage": 0.15},
+ {"start": "G", "end": "B", "increase_percentage": 0.15},
+ {"start": "G", "end": "A", "increase_percentage": 0.15},
+
+ {"start": "F", "end": "C", "increase_percentage": 0.15},
+ {"start": "F", "end": "B", "increase_percentage": 0.15},
+ {"start": "F", "end": "A", "increase_percentage": 0.15},
+
+ {"start": "E", "end": "C", "increase_percentage": 0.07},
+ {"start": "E", "end": "B", "increase_percentage": 0.07},
+ {"start": "E", "end": "A", "increase_percentage": 0.07},
+
+ {"start": "D", "end": "C", "increase_percentage": 0.03},
+ {"start": "D", "end": "B", "increase_percentage": 0.03},
+ {"start": "D", "end": "A", "increase_percentage": 0.03},
+
+ ]
+
EPC_BANDS = ["G", "F", "E", "D", "C", "B", "A"]
@classmethod
@@ -159,14 +190,18 @@ class PropertyValuation:
msm_increase, lloyds_increase = cls.get_increase(epc_band_range)
- # We now use the knight frank and nationwide data to get further valuation evidence, if we have it
+ # We now use the knight frank, nationwide and Rightmove data to get further valuation evidence, if we have it
kf_increase = [x for x in cls.KNIGHT_FRANK_MAPPING if x["start"] == current_epc and x["end"] == target_epc]
nw_increase = [x for x in cls.NATIONWIDE_MAPPING if x["start"] == current_epc and x["end"] == target_epc]
+ rm_increase = [x for x in cls.RIGHTMOVE_MAPPING if x["start"] == current_epc and x["end"] == target_epc]
kf_increase = kf_increase[0]["increase_percentage"] if kf_increase else None
nw_increase = nw_increase[0]["increase_percentage"] if nw_increase else None
+ rm_increase = rm_increase[0]["increase_percentage"] if rm_increase else None
- all_increases = [x for x in [msm_increase, lloyds_increase, kf_increase, nw_increase] if x is not None]
+ all_increases = [
+ x for x in [msm_increase, lloyds_increase, kf_increase, nw_increase, rm_increase] if x is not None
+ ]
max_increase = max(all_increases)
min_increase = min(all_increases)
diff --git a/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py b/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py
index 2ba82e77..044cc830 100644
--- a/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py
+++ b/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py
@@ -21,6 +21,8 @@ class AirSourceHeatPumpEfficiency:
def create_dataset(self):
logger.info("Creating solar photo supply dataset")
+
+ all_counts = []
for dir in tqdm(self.file_directories):
filepath = dir / "certificates.csv"
df = pd.read_csv(filepath, low_memory=False)
@@ -44,9 +46,15 @@ class AirSourceHeatPumpEfficiency:
df = df[
df["MAINHEAT_DESCRIPTION"].str.contains("air source heat pump", case=False, na=False)
]
+
+ # Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA
+ for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]:
+ df = df[~pd.isnull(df[col])]
# Get the columns we're interested in
df = df[
[
+ "PROPERTY_TYPE",
+ "BUILT_FORM",
"MAINHEAT_DESCRIPTION",
"MAINHEAT_ENERGY_EFF",
"MAINHEATCONT_DESCRIPTION",
@@ -60,6 +68,8 @@ class AirSourceHeatPumpEfficiency:
counts = df.groupby(
[
+ "PROPERTY_TYPE",
+ "BUILT_FORM",
"MAINHEAT_DESCRIPTION",
"MAINHEAT_ENERGY_EFF",
"MAINHEATCONT_DESCRIPTION",
@@ -71,8 +81,34 @@ class AirSourceHeatPumpEfficiency:
]
).size().reset_index(name="count")
- # Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA
- for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]:
- df = df[~pd.isnull(df[col])]
- # Take newest LODGEMENT_DATE per UPRN
- df = df.sort_values(by="LODGEMENT_DATE", ascending=False).drop_duplicates(subset=["UPRN"])
+ all_counts.append(counts)
+
+ all_counts = pd.concat(all_counts)
+
+ all_counts_agg = all_counts.groupby(
+ [
+ "PROPERTY_TYPE",
+ "BUILT_FORM",
+ "MAINHEAT_DESCRIPTION",
+ "MAINHEAT_ENERGY_EFF",
+ "MAINHEATCONT_DESCRIPTION",
+ "MAINHEATC_ENERGY_EFF",
+ "MAIN_FUEL",
+ "HOTWATER_DESCRIPTION",
+ "HOT_WATER_ENERGY_EFF",
+ "MAINS_GAS_FLAG"
+ ]
+ )["count"].sum().reset_index()
+
+ all_counts_agg.groupby("PROPERTY_TYPE")["count"].sum()
+ # In houses, 68% of the cases where we see air source heat pumps are in detached and semi-detached houses
+ all_counts_agg[all_counts_agg["PROPERTY_TYPE"] == "House"]["BUILT_FORM"].value_counts(normalize=True)
+
+ all_counts_agg[all_counts_agg["PROPERTY_TYPE"] == "Flat"]["BUILT_FORM"].value_counts()
+
+ # In Bungalows, 74% of cases where we see air source heat pumps are in detached and semi-detached houses
+ all_counts_agg[all_counts_agg["PROPERTY_TYPE"] == "Bungalow"]["BUILT_FORM"].value_counts(normalize=True)
+
+ # TODO: Research options for mid and end-terrace houses
+ # TODO: Research the options for flats - we see them appear in flats, but practically speaking, how does the
+ # install process work?
diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py
index 1655979b..52e9422c 100644
--- a/etl/customers/gla_croydon_demo/asset_list.py
+++ b/etl/customers/gla_croydon_demo/asset_list.py
@@ -34,10 +34,6 @@ def app():
low_memory=False
)
- z = epc_data[epc_data["MAINHEAT_DESCRIPTION"] == "Boiler and radiators, mains gas"]
- z["HOTWATER_DESCRIPTION"].value_counts()
- z["MAIN_FUEL"].value_counts()
-
# Filter on entries where we have a UPRN
epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
diff --git a/etl/customers/goldman/asset_list.py b/etl/customers/goldman/asset_list.py
new file mode 100644
index 00000000..afe3c64c
--- /dev/null
+++ b/etl/customers/goldman/asset_list.py
@@ -0,0 +1,63 @@
+import pandas as pd
+from utils.s3 import read_excel_from_s3
+from utils.s3 import save_csv_to_s3
+
+PORTFOLIO_ID = 75
+USER_ID = 8
+
+
+def app():
+ asset_list = [
+ {
+ "address": "19 Emily Gardens",
+ "postcode": "B16 0ED",
+ },
+ {
+ "address": "Flat 6 41 Bradford Street",
+ "postcode": "B5 6HX",
+ },
+ {
+ "address": "197 FIELD LANE",
+ "postcode": "B32 4HL",
+ },
+ {
+ "address": "FLAT 4 108 SUMMER ROAD",
+ "postcode": "B23 6DY",
+ },
+ {
+ "address": "1, St. Benedicts Road",
+ "postcode": "B10 9DP",
+ },
+ {
+ "address": "29 COOKSEY LANE",
+ "postcode": "B44 9QL",
+ },
+ {
+ "address": "40 TRITTIFORD ROAD",
+ "postcode": "B13 0HG",
+ }
+ ]
+
+ asset_list = pd.DataFrame(asset_list)
+
+ # Store the asset list in s3
+ filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
+ save_csv_to_s3(
+ dataframe=asset_list,
+ bucket_name="retrofit-plan-inputs-dev",
+ file_name=filename
+ )
+
+ # EPC C portoflio
+ body = {
+ "portfolio_id": str(PORTFOLIO_ID),
+ "housing_type": "Private",
+ "goal": "Increase EPC",
+ "goal_value": "B",
+ "trigger_file_path": filename,
+ "already_installed_file_path": "",
+ "patches_file_path": "",
+ "non_invasive_recommendations_file_path": "",
+ "budget": None,
+ }
+ print(body)
diff --git a/etl/customers/goldman/epc_f_g_properties.py b/etl/customers/goldman/epc_f_g_properties.py
new file mode 100644
index 00000000..28197126
--- /dev/null
+++ b/etl/customers/goldman/epc_f_g_properties.py
@@ -0,0 +1,25 @@
+import pandas as pd
+
+
+def app():
+ """
+ Pulling the list of EPC G & F properties in Birmingham for Goldman Sachs
+ """
+ epc_data = pd.read_csv(
+ "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
+ low_memory=False
+ )
+
+ epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
+ epc_data["UPRN"] = epc_data["UPRN"].astype(int).astype(str)
+
+ # Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
+ epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], format='mixed')
+
+ epc_data = epc_data.sort_values("LODGEMENT_DATETIME", ascending=False).drop_duplicates("UPRN")
+
+ # Get G & F properties
+ epc_data = epc_data[epc_data["CURRENT_ENERGY_RATING"].isin(["G", "F"])]
+
+ # Save as an excel
+ epc_data.to_excel("Birmingham EPC F & G Properties.xlsx", index=False)
diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py
new file mode 100644
index 00000000..24922f68
--- /dev/null
+++ b/etl/customers/goldman/property_ownership.py
@@ -0,0 +1,407 @@
+import re
+import pandas as pd
+from tqdm import tqdm
+import Levenshtein
+from backend.SearchEpc import SearchEpc
+
+# Average value of a property in the midlands in 2024 was £238,000. Since these are EPC F & G properties, we assume
+# £207,000 since they trade at a discount. This is based on the rightmove study where moving from an EPC F/G -> C has a
+# +15% impact on valuation and D -> C has a +3% impact on valuation.
+# The mode EPC rating is D, so we associate the £238k valuation with an EPC D property
+# Therefore value_of_F * 1.15 = value_of_D * 1.03
+# Therefore value_of_F = value_of_D * 1.03/1.15 = 238k * (1.03/1.15) = 213165
+PROPERTY_VALUE_ESTIMATE = 213_165
+
+
+def aggregate_matches(matching_lookup, company_ownership, properties):
+ df = matching_lookup.merge(
+ company_ownership, how="left", on="Title Number"
+ ).merge(
+ properties[["UPRN", "LOCAL_AUTHORITY_LABEL"]], how="left", on="UPRN"
+ )
+ counts = (
+ df.groupby(["Company Registration No. (1)", "Proprietor Name (1)", "LOCAL_AUTHORITY_LABEL"])["UPRN"]
+ .count()
+ .reset_index(name="number_of_properties")
+ )
+ counts = counts.sort_values("number_of_properties", ascending=False)
+
+ pivot_counts = counts.pivot_table(
+ index=["Company Registration No. (1)", "Proprietor Name (1)"], # Rows: companies and proprietors
+ columns="LOCAL_AUTHORITY_LABEL", # Columns: each local authority
+ values="number_of_properties", # The counts of properties
+ fill_value=0 # Fill missing values with 0 (where there are no properties owned)
+ ).reset_index()
+
+ total_counts = (
+ df.groupby(["Company Registration No. (1)", "Proprietor Name (1)"])["UPRN"]
+ .count()
+ .reset_index(name="total_number_of_properties")
+ )
+
+ pivot_counts = pivot_counts.merge(
+ total_counts, how="left", on=["Company Registration No. (1)", "Proprietor Name (1)"]
+ )
+
+ pivot_counts = pivot_counts.sort_values("total_number_of_properties", ascending=False)
+
+ pivot_counts["approx_value"] = PROPERTY_VALUE_ESTIMATE * pivot_counts["total_number_of_properties"]
+ pivot_counts["cumulative_value"] = pivot_counts["approx_value"].cumsum()
+
+ return pivot_counts
+
+
+def find_f_g_properties(paths):
+ data = []
+ for path in tqdm(paths):
+ epc_data = pd.read_csv(path, low_memory=False)
+
+ epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
+ epc_data["UPRN"] = epc_data["UPRN"].astype(int).astype(str)
+
+ # Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
+ epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], format='mixed')
+
+ epc_data = epc_data.sort_values("LODGEMENT_DATETIME", ascending=False).drop_duplicates("UPRN")
+
+ # Get G & F properties
+ epc_data = epc_data[epc_data["CURRENT_ENERGY_RATING"].isin(["G", "F"])]
+ data.append(epc_data)
+
+ data = pd.concat(data)
+
+ # Save as an excel
+ data.to_excel("EPC F & G Properties.xlsx", index=False)
+
+
+def remove_text_in_brackets(address: str) -> str:
+ """
+ Removes any text within parentheses, including the parentheses themselves.
+
+ Parameters:
+ - address (str): The address string to clean.
+
+ Returns:
+ - str: The cleaned address with text in parentheses removed.
+ """
+ # Regex to find and remove content in parentheses
+ cleaned_address = re.sub(r'\s*\([^)]*\)', '', address)
+ return cleaned_address
+
+
+def extract_numeric_part(house_number: str) -> str:
+ """
+ Extracts only the numeric part from a house number that may contain letters.
+
+ Parameters:
+ - house_number (str): The house number string possibly containing letters.
+
+ Returns:
+ - str: The numeric part of the house number.
+ """
+ # Use regular expression to replace all non-digit characters with nothing
+ numeric_part = re.sub(r'\D', '', house_number)
+ return numeric_part
+
+
+def levenstein_match(matching_string, df, address_col):
+ match_to = df[address_col].tolist()
+ # Strip out punctuation and spaces
+ match_to = [re.sub(r'[^\w\s]', '', x) for x in match_to]
+ match_to = [x.replace(" ", "") for x in match_to]
+
+ # Perform matching between full key and match_to
+ distances = [Levenshtein.distance(matching_string, s) for s in match_to]
+ best_match_index = distances.index(min(distances))
+ # We might want to consider a threshold for the distance, however for the momeny,
+ # we don't consider this for the moment
+ df = df.iloc[best_match_index:best_match_index + 1]
+
+ return df
+
+
+def extract_range_from_house_number(house_number_range: str):
+ """
+ Detects if the house number includes a numeric range (formatted as 'x-y') and extracts all values within this range.
+ Non-numeric strings containing hyphens are ignored.
+
+ Parameters:
+ - house_number_range (str): The house number string that might contain a range.
+
+ Returns:
+ - list of str: A list of all numbers within the range if it is a range; otherwise, returns None.
+ """
+
+ if not house_number_range:
+ return None
+
+ if '-' in house_number_range:
+ parts = house_number_range.split('-')
+ if len(parts) == 2 and parts[0].isdigit() and parts[1].isdigit():
+ # Both parts are numeric, so it's a valid range
+ start, end = map(int, parts) # Convert parts to integers
+ return [str(x) for x in range(start, end + 1)]
+ else:
+ # Not a valid numeric range
+ return None
+ else:
+ # No hyphen present or not a range
+ return None
+
+
+def is_in_range(row, house_no):
+ """ Check if the house number is within the range provided in the row. """
+ if row and any(house_no == num for num in row):
+ return True
+ return False
+
+
+def remove_duplicate_matches(matching_lookup, properties, company_ownership):
+ duplicated_titles = matching_lookup[matching_lookup["Title Number"].duplicated()]["Title Number"].unique()
+
+ to_drop = []
+ for dupe_title in duplicated_titles:
+ dupe_data = matching_lookup[matching_lookup["Title Number"] == dupe_title].copy()
+ matched_addresses = dupe_data.merge(
+ properties[["UPRN", "ADDRESS"]].rename(columns={"ADDRESS": "epc_address"}),
+ how="left", on="UPRN"
+ ).merge(
+ company_ownership[["Title Number", "Property Address"]],
+ how="left", on="Title Number"
+ )
+ # We perform levenstein to get the best match
+ best_match = levenstein_match(
+ matching_string=matched_addresses["Property Address"].values[0],
+ df=matched_addresses,
+ address_col="epc_address"
+ )
+ matches_to_drop = matched_addresses[
+ ~matched_addresses["UPRN"].isin(best_match["UPRN"].values)
+ ]
+
+ to_drop.append(
+ matches_to_drop[["UPRN", "Title Number"]].copy()
+ )
+
+ to_drop = pd.concat(to_drop)
+
+ if not to_drop.empty:
+ merged = pd.merge(matching_lookup, to_drop, on=['UPRN', 'Title Number'], how='left', indicator=True)
+ merged[merged['_merge'] == 'left_only'].drop(columns=['_merge'])
+
+ return merged
+
+ return matching_lookup
+
+
+def app():
+ """
+ This script is for scoping property ownership for EPC F & G rated properties in Birmingam, for Goldman Sachs
+ """
+ # paths = [
+ # "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E08000031-Wolverhampton/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E08000026-Coventry/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E06000016-Leicester/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E06000015-Derby/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E06000021-Stoke-on-Trent/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E06000018-Nottingham/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E07000154-Northampton/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E06000061-North-Northamptonshire/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E06000062-West-Northamptonshire/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E07000152-East-Northamptonshire/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E07000155-South-Northamptonshire/certificates.csv",
+ # #
+ # "local_data/all-domestic-certificates/domestic-E08000027-Dudley/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E08000029-Solihull/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E07000234-Bromsgrove/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E08000030-Walsall/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E08000028-Sandwell/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E06000019-Herefordshire-County-of/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E06000020-Telford-and-Wrekin/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E07000218-North-Warwickshire/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E07000222-Warwick/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E07000237-Worcester/certificates.csv",
+ # # East midlands
+ # "local_data/all-domestic-certificates/domestic-E07000035-Derbyshire-Dales/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E07000038-North-East-Derbyshire/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E07000039-South-Derbyshire/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E06000012-North-East-Lincolnshire/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E06000013-North-Lincolnshire/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E07000138-Lincoln/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E07000134-North-West-Leicestershire/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E06000017-Rutland/certificates.csv",
+ # ]
+ # paths = list(set(paths))
+ # find_f_g_properties(paths)
+
+ properties = pd.read_excel("EPC F & G Properties.xlsx")
+ company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_04.csv")
+ company_ownership["is_overseas"] = False
+ overseas_company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/OCOD_FULL_2024_04 2.csv")
+ overseas_company_ownership["is_overseas"] = True
+
+ company_ownership = pd.concat([company_ownership, overseas_company_ownership])
+
+ # FIlter on relevant postcodes
+ company_ownership = company_ownership[
+ company_ownership["Postcode"].str.lower().isin(properties["POSTCODE"].str.lower().unique())]
+
+ # Now we filter properties the other way around
+ properties = properties[properties["POSTCODE"].str.lower().isin(company_ownership["Postcode"].str.lower().unique())]
+ # We end up with 7.4k entires on a postcode match, however we need to now do a direct address match
+ # Take just private rentals
+ properties = properties[
+ properties["TENURE"].isin(["rental (private)", "Rented (private)", "owner-occupied", "Owner-occupied"])
+ ]
+
+ # Remove entries where the address begins with the term "land adjoining", or other records that don't reference the
+ # the property itself
+ starting_terms = [
+ "land adjoining", "land on the", "land to the rear of", "land and buildings on the",
+ "garage adjoining", "car park adjoining", "the land adjoining", "land and buildings adjoining",
+ "all royal mines"
+ ]
+ for starting_term in starting_terms:
+ company_ownership = company_ownership[
+ ~company_ownership["Property Address"].str.lower().str.startswith(starting_term)
+ ]
+
+ freehold_matching_lookup = [] # 634
+ leasehold_matching_lookup = [] # 86
+ shared_leasehold_match = []
+ shared_freehold_match = []
+ for _, address in tqdm(properties.iterrows(), total=len(properties)):
+ match_type = "exact"
+ filtered = company_ownership[
+ company_ownership["Postcode"].str.lower() == address["POSTCODE"].lower()
+ ].copy()
+
+ # Remove postcode and remove trailing commas
+ filtered["house_number"] = (
+ filtered["Property Address"]
+ .apply(remove_text_in_brackets)
+ .apply(SearchEpc.get_house_number)
+ .str.lower()
+ .str.replace(",", "")
+ )
+ house_no = SearchEpc.get_house_number(address["ADDRESS1"])
+ if house_no is not None:
+ house_no = house_no.replace(",", "")
+
+ if house_no is None:
+ # It's hard for us to get a reliable match
+ # filtered = filtered[filtered["Property Address"].str.contains(address["ADDRESS1"])]
+ # if filtered.shape[0] > 1:
+ # raise Exception("No valid - maybe we should do levenstein?")
+ continue
+
+ else:
+
+ if house_no not in filtered["house_number"].values:
+ # If this happens, we check house_number for a x-y range of addresses
+ filtered["house_number_range"] = filtered["house_number"].apply(extract_range_from_house_number)
+ # If we have found a house number range, we check if the house number is in the range and if not,
+ # we drop the row
+ filtered['is_in_range'] = filtered['house_number_range'].apply(lambda x: is_in_range(x, house_no))
+
+ if filtered['is_in_range'].any():
+ # If house_no is found in any range, keep only rows where it is in range
+ filtered = filtered[filtered['is_in_range']]
+ else:
+ # If house_no is not found in any range, filter out rows where 'house_number_range' is not None
+ filtered = filtered[filtered['house_number_range'].isnull()]
+
+ # Strip out letters from house_no and house_number
+ house_no = extract_numeric_part(house_no)
+ filtered["house_number"] = filtered["house_number"].astype(str).apply(extract_numeric_part)
+ match_type = "approximate"
+
+ filtered = filtered[filtered["house_number"] == house_no]
+
+ if filtered.empty:
+ continue
+
+ filtered_freehold = filtered[filtered["Tenure"] == "Freehold"]
+ filtered_leasehold = filtered[filtered["Tenure"] == "Leasehold"]
+
+ if filtered_freehold.shape[0] > 1:
+ matched = filtered_leasehold[["Title Number"]].copy()
+ matched.insert(0, "UPRN", address["UPRN"])
+ shared_freehold_match.append(matched)
+ elif not filtered_freehold.empty:
+ freehold_matching_lookup.append(
+ {
+ "UPRN": address["UPRN"],
+ "Title Number": filtered_freehold["Title Number"].values[0],
+ "match_type": match_type,
+ }
+ )
+
+ if filtered_leasehold.shape[0] > 1:
+ matched = filtered_leasehold[["Title Number"]].copy()
+ matched.insert(0, "UPRN", address["UPRN"])
+ shared_leasehold_match.append(matched)
+ elif not filtered_leasehold.empty:
+ leasehold_matching_lookup.append(
+ {
+ "UPRN": address["UPRN"],
+ "Title Number": filtered_leasehold["Title Number"].values[0],
+ "match_type": match_type,
+ }
+ )
+
+ freehold_matching_lookup = pd.DataFrame(freehold_matching_lookup)
+ leasehold_matching_lookup = pd.DataFrame(leasehold_matching_lookup)
+ shared_leasehold_match = pd.concat(shared_leasehold_match)
+
+ # The approximate matches aren't very good
+ freehold_matching_lookup = freehold_matching_lookup[freehold_matching_lookup["match_type"] == "exact"]
+ leasehold_matching_lookup = leasehold_matching_lookup[leasehold_matching_lookup["match_type"] == "exact"]
+
+ # There are some cases where we have duplicates
+ freehold_matching_lookup = remove_duplicate_matches(freehold_matching_lookup, properties, company_ownership)
+ leasehold_matching_lookup = remove_duplicate_matches(leasehold_matching_lookup, properties, company_ownership)
+
+ matched_addresses = freehold_matching_lookup.merge(
+ properties[["UPRN", "ADDRESS"]].rename(columns={"ADDRESS": "epc_address"}),
+ how="left", on="UPRN"
+ ).merge(
+ company_ownership[["Title Number", "Property Address"]],
+ how="left", on="Title Number"
+ )
+
+ # shared_freehold_match = pd.DataFrame(shared_freehold_match)
+ # Strore these files
+ freehold_matching_lookup.to_excel("freehold_matching_lookup.xlsx")
+ leasehold_matching_lookup.to_excel("leasehold_matching_lookup.xlsx")
+ shared_leasehold_match.to_excel("shared_leasehold_match.xlsx")
+ # shared_freehold_match.to_excel("shared_freehold_match.xlsx")
+
+ freehold_aggregate = aggregate_matches(freehold_matching_lookup, company_ownership, properties)
+ leasehold_aggregate = aggregate_matches(leasehold_matching_lookup, company_ownership, properties)
+
+ combined_aggregate = aggregate_matches(
+ pd.concat([freehold_matching_lookup, leasehold_matching_lookup]), company_ownership, properties
+ )
+
+ df = pd.concat([freehold_matching_lookup, leasehold_matching_lookup])
+
+ investment_20m = combined_aggregate[combined_aggregate["cumulative_value"] <= 20_500_000]
+ investment_50m = combined_aggregate[combined_aggregate["cumulative_value"] <= 51_000_000]
+
+ properties["WALLS_DESCRIPTION"].value_counts(normalize=True)
+
+
+def company_aggregation():
+ company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_04.csv")
+ aggregation = (
+ company_ownership
+ .groupby(["Proprietor Name (1)", "Company Registration No. (1)"])
+ ["Property Address"]
+ .count()
+ .reset_index(name="Number of Properties")
+ )
+ aggregation = aggregation.sort_values("Number of Properties", ascending=False)
+
+ aggregation.to_excel("Company ownership aggregation.xlsx")
diff --git a/etl/customers/guiness/route_march.py b/etl/customers/guiness/route_march.py
new file mode 100644
index 00000000..28f350d3
--- /dev/null
+++ b/etl/customers/guiness/route_march.py
@@ -0,0 +1,98 @@
+import os
+
+import pandas as pd
+from tqdm import tqdm
+
+from dotenv import load_dotenv
+from utils.s3 import read_excel_from_s3
+from backend.SearchEpc import SearchEpc
+from epc_api.client import EpcClient
+from utils.s3 import save_csv_to_s3
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def app():
+ """
+ This app is satisying an adhoc request to retrieve EPC data for properties owned by Guiness, to help plan the
+ route march
+
+ These properties were provided to us by Ecosurv
+ :return:
+ """
+ asset_list = read_excel_from_s3(
+ bucket_name="retrofit-datalake-dev",
+ file_key="customers/guiness/TGP CW Properties PV.xlsx",
+ header_row=0
+ )
+
+ epc_data = []
+ for _, guiness_property in tqdm(asset_list.iterrows(), total=len(asset_list)):
+
+ searcher = SearchEpc(
+ address1=str(guiness_property["Address"]),
+ postcode=guiness_property["POSTCODES"],
+ auth_token=EPC_AUTH_TOKEN,
+ os_api_key="",
+ property_type=None,
+ fast=True
+ )
+ # Force the skipping of estimating the EPC
+ searcher.ordnance_survey_client.property_type = None
+ searcher.ordnance_survey_client.built_form = None
+
+ searcher.find_property(skip_os=True)
+ if searcher.newest_epc is None:
+ continue
+
+ epc = {
+ "asset_list_address": guiness_property["Address"],
+ "asset_list_postcode": guiness_property["POSTCODES"],
+ **searcher.newest_epc.copy()
+ }
+
+ epc_data.append(epc)
+
+ epc_df = pd.DataFrame(epc_data)
+
+ # Retrieve just the data we need
+ epc_df = epc_df[
+ [
+ "asset_list_address",
+ "asset_list_postcode",
+ "uprn",
+ "property-type",
+ "built-form",
+ "inspection-date",
+ "current-energy-rating",
+ "current-energy-efficiency",
+ "roof-description",
+ "walls-description",
+ "transaction-type"
+ ]
+ ]
+
+ asset_list = asset_list.merge(
+ epc_df, how="left", left_on=["Address", "POSTCODES"], right_on=["asset_list_address", "asset_list_postcode"]
+ )
+
+ # De-dupe on the address and postcode, since 137 Badger Avenue was duplicated
+ asset_list = asset_list.drop_duplicates(subset=["Address", "POSTCODES"])
+ asset_list = asset_list.drop(columns=["asset_list_address", "asset_list_postcode"])
+
+ # Rename the columns
+ asset_list = asset_list.rename(columns={
+ "property-type": "Property Type",
+ "built-form": "Archetype",
+ "inspection-date": "Last EPC Inspection Date",
+ "current-energy-rating": "Last survey EPC Rating",
+ "current-energy-efficiency": "Last survey SAP Score",
+ "roof-description": "Roof Construction",
+ "walls-description": "Wall Construction",
+ "transaction-type": "Last EPC Reason"
+ })
+
+ # Store as an excel
+ filename = "Guiness EPC data.xlsx"
+ asset_list.to_excel(filename, index=False)
diff --git a/etl/customers/livewest/route_march.py b/etl/customers/livewest/route_march.py
new file mode 100644
index 00000000..9e69fd43
--- /dev/null
+++ b/etl/customers/livewest/route_march.py
@@ -0,0 +1,134 @@
+import os
+
+import pandas as pd
+from tqdm import tqdm
+
+from dotenv import load_dotenv
+from utils.s3 import read_excel_from_s3
+from backend.SearchEpc import SearchEpc
+from epc_api.client import EpcClient
+from utils.s3 import save_csv_to_s3
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def route_march_may_2024():
+ """
+ This code pulls supplementary data for a route march that is expected to happen in May 2024. This code
+ was authored on the 30th April 2024.
+ """
+
+ asset_list = read_excel_from_s3(
+ bucket_name="retrofit-datalake-dev",
+ file_key="customers/Livewest/Livewest proposed route march Apr-May 2024.xlsx",
+ header_row=0
+ )
+
+ epc_data = []
+ for _, unit in tqdm(asset_list.iterrows(), total=len(asset_list)):
+
+ lst = [unit["NO"], unit["ADDRESS 1"], unit["ADDRESS 2"], unit["ADDRESS 3"], unit["POSTCODE"]]
+ lst = [str(x).strip() for x in lst if not pd.isnull(x)]
+
+ full_address = ", ".join(lst)
+
+ searcher = SearchEpc(
+ address1=str(unit["NO"]),
+ postcode=unit["POSTCODE"],
+ auth_token=EPC_AUTH_TOKEN,
+ os_api_key="",
+ property_type=None,
+ fast=True,
+ full_address=full_address
+ )
+ # Force the skipping of estimating the EPC
+ searcher.ordnance_survey_client.property_type = None
+ searcher.ordnance_survey_client.built_form = None
+
+ searcher.find_property(skip_os=True)
+ if searcher.newest_epc is None:
+ # We try with a different address 1
+ add1 = str(unit["NO"]).lower()
+ add1 = (
+ add1
+ .replace("flat", "")
+ .replace("ft", "")
+ .replace("t", "").strip()
+ )
+
+ searcher = SearchEpc(
+ address1=add1,
+ postcode=unit["POSTCODE"],
+ auth_token=EPC_AUTH_TOKEN,
+ os_api_key="",
+ property_type=None,
+ fast=True,
+ full_address=full_address
+ )
+ # Force the skipping of estimating the EPC
+ searcher.ordnance_survey_client.property_type = None
+ searcher.ordnance_survey_client.built_form = None
+
+ searcher.find_property(skip_os=True)
+
+ if searcher.newest_epc is None:
+ continue
+
+ epc = {
+ "asset_list_house_no": unit["NO"],
+ "asset_list_address1": unit["ADDRESS 1"],
+ "asset_list_postcode": unit["POSTCODE"],
+ **searcher.newest_epc.copy()
+ }
+
+ epc_data.append(epc)
+
+ epc_df = pd.DataFrame(epc_data)
+
+ #
+
+ # Retrieve just the data we need
+ epc_df = epc_df[
+ [
+ "asset_list_house_no",
+ "asset_list_address1",
+ "asset_list_postcode",
+ "uprn",
+ "address",
+ "property-type",
+ "built-form",
+ "inspection-date",
+ "current-energy-rating",
+ "current-energy-efficiency",
+ "roof-description",
+ "walls-description",
+ "transaction-type"
+ ]
+ ].rename(columns={"address": "Matched EPC Address"})
+
+ asset_list = asset_list.merge(
+ epc_df,
+ how="left",
+ left_on=["NO", "ADDRESS 1", "POSTCODE"],
+ right_on=["asset_list_house_no", "asset_list_address1", "asset_list_postcode"]
+ )
+
+ asset_list = asset_list.drop_duplicates(subset=["NO", "ADDRESS 1", "POSTCODE"])
+ asset_list = asset_list.drop(columns=["asset_list_house_no", "asset_list_address1", "asset_list_postcode"])
+
+ # Rename the columns
+ asset_list = asset_list.rename(columns={
+ "property-type": "Property Type",
+ "built-form": "Archetype",
+ "inspection-date": "Last EPC Inspection Date",
+ "current-energy-rating": "Last survey EPC Rating",
+ "current-energy-efficiency": "Last survey SAP Score",
+ "roof-description": "Roof Construction",
+ "walls-description": "Wall Construction",
+ "transaction-type": "Last EPC Reason"
+ })
+
+ # Store as an excel
+ filename = "Livewest EPC data.xlsx"
+ asset_list.to_excel(filename, index=False)
diff --git a/etl/customers/places_for_people/route_march.py b/etl/customers/places_for_people/route_march.py
new file mode 100644
index 00000000..c38c71d3
--- /dev/null
+++ b/etl/customers/places_for_people/route_march.py
@@ -0,0 +1,137 @@
+import os
+
+import pandas as pd
+from tqdm import tqdm
+
+from dotenv import load_dotenv
+from utils.s3 import read_excel_from_s3
+from backend.SearchEpc import SearchEpc
+from epc_api.client import EpcClient
+from utils.s3 import save_csv_to_s3
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def app():
+ """
+ This app is satisying an adhoc request to retrieve EPC data for properties owned by Guiness, to help plan the
+ route march
+
+ These properties were provided to us by Ecosurv
+ :return:
+ """
+ asset_list = read_excel_from_s3(
+ bucket_name="retrofit-datalake-dev",
+ file_key="customers/Places For People/PFP ROUTE MARCH PHASE 1.xlsx",
+ header_row=1
+ )
+
+ epc_data = []
+ for _, pfp_property in tqdm(asset_list.iterrows(), total=len(asset_list)):
+
+ lst = [
+ pfp_property["ADDRESS"],
+ pfp_property["ADDRESS.1"],
+ pfp_property["ADDRESS.2"],
+ pfp_property["POSTCODE"]
+ ]
+ lst = [str(x).strip() for x in lst if not pd.isnull(x)]
+
+ full_address = ", ".join(lst)
+
+ searcher = SearchEpc(
+ address1=str(pfp_property["ADDRESS"]),
+ postcode=pfp_property["POSTCODE"],
+ auth_token=EPC_AUTH_TOKEN,
+ os_api_key="",
+ property_type=None,
+ fast=True,
+ full_address=full_address
+ )
+ # Force the skipping of estimating the EPC
+ searcher.ordnance_survey_client.property_type = None
+ searcher.ordnance_survey_client.built_form = None
+
+ searcher.find_property(skip_os=True)
+ if searcher.newest_epc is None:
+ # We try with a different address 1
+ add1 = str(pfp_property["ADDRESS"]).lower()
+ add1 = add1.replace("ft", "").replace("t", "").strip()
+
+ searcher = SearchEpc(
+ address1=add1,
+ postcode=pfp_property["POSTCODE"],
+ auth_token=EPC_AUTH_TOKEN,
+ os_api_key="",
+ property_type=None,
+ fast=True,
+ full_address=full_address
+ )
+ # Force the skipping of estimating the EPC
+ searcher.ordnance_survey_client.property_type = None
+ searcher.ordnance_survey_client.built_form = None
+
+ searcher.find_property(skip_os=True)
+
+ if searcher.newest_epc is None:
+ continue
+
+ epc = {
+ "asset_list_address": pfp_property["ADDRESS"],
+ "asset_list_address1": pfp_property["ADDRESS.1"],
+ "asset_list_postcode": pfp_property["POSTCODE"],
+ **searcher.newest_epc.copy()
+ }
+
+ epc_data.append(epc)
+
+ epc_df = pd.DataFrame(epc_data)
+
+ # 702
+
+ # Retrieve just the data we need
+ epc_df = epc_df[
+ [
+ "asset_list_address",
+ "asset_list_address1",
+ "asset_list_postcode",
+ "uprn",
+ "address",
+ "property-type",
+ "built-form",
+ "inspection-date",
+ "current-energy-rating",
+ "current-energy-efficiency",
+ "roof-description",
+ "walls-description",
+ "transaction-type"
+ ]
+ ].rename(columns={"address": "Matched EPC Address"})
+
+ asset_list = asset_list.merge(
+ epc_df,
+ how="left",
+ left_on=["ADDRESS", "ADDRESS.1", "POSTCODE"],
+ right_on=["asset_list_address", "asset_list_address1", "asset_list_postcode"]
+ )
+
+ # De-dupe on the address and postcode, since 137 Badger Avenue was duplicated
+ asset_list = asset_list.drop_duplicates(subset=["ADDRESS", "ADDRESS.1", "POSTCODE"])
+ asset_list = asset_list.drop(columns=["asset_list_address", "asset_list_address1", "asset_list_postcode"])
+
+ # Rename the columns
+ asset_list = asset_list.rename(columns={
+ "property-type": "Property Type",
+ "built-form": "Archetype",
+ "inspection-date": "Last EPC Inspection Date",
+ "current-energy-rating": "Last survey EPC Rating",
+ "current-energy-efficiency": "Last survey SAP Score",
+ "roof-description": "Roof Construction",
+ "walls-description": "Wall Construction",
+ "transaction-type": "Last EPC Reason"
+ })
+
+ # Store as an excel
+ filename = "Places For People EPC data.xlsx"
+ asset_list.to_excel(filename, index=False)
diff --git a/etl/customers/vander_elliot/__init__.py b/etl/customers/vander_elliot/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/etl/customers/vander_elliot/single_property_pilot.py b/etl/customers/vander_elliot/single_property_pilot.py
new file mode 100644
index 00000000..99624dfc
--- /dev/null
+++ b/etl/customers/vander_elliot/single_property_pilot.py
@@ -0,0 +1,56 @@
+import pandas as pd
+from utils.s3 import read_excel_from_s3
+from utils.s3 import save_csv_to_s3
+
+PORTFOLIO_ID = 77
+USER_ID = 8
+
+patches = [
+ {
+ "address": "79 Perryn Road",
+ "postcode": "W3 7LT",
+ "roof-description": "Pitched, no insulation (assumed)"
+ }
+]
+
+
+def app():
+ asset_list = [
+ {
+ 'uprn': 12103117,
+ "address": "79 Perryn Road",
+ "postcode": "W3 7LT",
+ },
+
+ ]
+
+ asset_list = pd.DataFrame(asset_list)
+
+ # Store the asset list in s3
+ filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
+ save_csv_to_s3(
+ dataframe=asset_list,
+ bucket_name="retrofit-plan-inputs-dev",
+ file_name=filename
+ )
+
+ # Store patches in s3
+ patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json"
+ save_csv_to_s3(
+ dataframe=pd.DataFrame(patches),
+ bucket_name="retrofit-plan-inputs-dev",
+ file_name=patches_filename
+ )
+
+ body = {
+ "portfolio_id": str(PORTFOLIO_ID),
+ "housing_type": "Private",
+ "goal": "Increase EPC",
+ "goal_value": "B",
+ "trigger_file_path": filename,
+ "already_installed_file_path": "",
+ "patches_file_path": patches_filename,
+ "non_invasive_recommendations_file_path": "",
+ "budget": None,
+ }
+ print(body)
diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index d7a8ad2f..fd3c1692 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -37,6 +37,25 @@ MCS_SOLAR_PV_COST_DATA = {
"average_cost_per_kwh-Northern Ireland": 2126.09,
}
+# This data is based on the MCS database, We use the larger figure between the 2023 and 2024 average,
+# to be conservative
+MCS_AIR_SOURCE_HEAT_PUMP_COST_DATA = {
+ "Outer London": 13220,
+ "Inner London": 13220,
+ "South East England": 13547,
+ "South West England": 12776,
+ "East of England": 12585,
+ "East Midlands": 12239,
+ "West Midlands": 13182,
+ "North East England": 11829,
+ "North West England": 11714,
+ "Yorkshire and the Humber": 11919,
+ "Wales": 13701,
+ "Scotland": 12586,
+ "Northern Ireland": 12000, # There are hardly any air source heat pump installs going on in Northern Ireland
+}
+BOILER_UPGRADE_SCHEME_ASHP_VALUE = 7500
+
# This is based on quotes from installers
BATTERY_COST = 3500
@@ -1240,3 +1259,29 @@ class Costs:
"labour_hours": labour_hours,
"labour_days": labour_days,
}
+
+ def air_source_heat_pump(self):
+ """
+ Based on the region and type of property, this function will produce a cost estimation for an air source heat
+ pump. This cost will include the boiler upgrade scheme grant
+
+ """
+
+ # This is the average cost of a project, we'll add some additional contingency
+ regional_cost = MCS_AIR_SOURCE_HEAT_PUMP_COST_DATA[self.region]
+
+ total_cost = regional_cost * (1 + self.CONTINGENCY) - BOILER_UPGRADE_SCHEME_ASHP_VALUE
+ subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
+ vat = total_cost - subtotal_before_vat
+
+ # We assume 3 days installation
+ labour_days = 3
+ labour_hours = labour_days * 8
+
+ return {
+ "total": total_cost,
+ "subtotal": subtotal_before_vat,
+ "vat": vat,
+ "labour_hours": labour_hours,
+ "labour_days": labour_days,
+ }
diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py
index d24ad811..76da6c37 100644
--- a/recommendations/HeatingControlRecommender.py
+++ b/recommendations/HeatingControlRecommender.py
@@ -35,6 +35,9 @@ class HeatingControlRecommender:
return
+ if heating_description in ["Air source heat pump, radiators, electric"]:
+ self.recommend_time_temperature_zone_controls()
+
def recommend_room_heaters_electric_controls(self):
"""
If the home has Room heaters, electric, we start by identifying potential heating controls that could
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 537125a1..a51803f2 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -1,6 +1,4 @@
-import pandas as pd
-
-from recommendations.Costs import Costs
+from recommendations.Costs import Costs, BOILER_UPGRADE_SCHEME_ASHP_VALUE
from recommendations.recommendation_utils import check_simulation_difference, override_costs
from backend.Property import Property
from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes
@@ -18,7 +16,14 @@ class HeatingRecommender:
self.heating_recommendations = []
self.heating_control_recommendations = []
- def recommend(self, phase=0):
+ def recommend(self, has_cavity_or_loft_recommendations, phase=0):
+ """
+ Produces heating recommendations
+ :param has_cavity_or_loft_recommendations: boolean indicating if we have produced a cavity or loft insulation
+ recommendation. If there are cavity or loft recommendations, the property would need to complete those measures
+ before being able to get the boiler upgrade scheme benefits. The messaging in the front end would be to
+ :param phase: indicates the phase of the retrofit programme
+ """
# TODO: We could have a system flush recommendation for an existing boiler, where there is no need to replace
# the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this
@@ -81,8 +86,122 @@ class HeatingRecommender:
phase=phase, system_change=system_change, exising_room_heaters=exising_room_heaters
)
+ # We recommend air source heat pumps
+ # Heat pumps are suitable for all property types:
+ # https://energysavingtrust.org.uk/from-flats-to-terraced-houses-heat-pumps-are-suitable-for-all-property-types/
+ # Just seems least probable for flats, so we'll allow houses and bungalows
+ # In the future, we'll allow overrides, so that non-intrusive surveys can contradict these conditions
+ # and either allow or prevent the recommendation of an air source heat pump
+
+ suitable_property_type = self.property.data["property-type"] in ["House", "Bungalow"]
+ has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"]
+
+ if suitable_property_type and not has_air_source_heat_pump:
+ self.recommend_air_source_heat_pump(
+ phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations
+ )
+
return
+ def recommend_air_source_heat_pump(self, phase, has_cavity_or_loft_recommendations):
+ """
+ This method will implement the recommendation for an air source heat pump
+ This is ultimately an overhaul to the heating system and so is recommended as an alternative to other
+ heating system recommendations
+ :return:
+ """
+
+ controls_recommender = HeatingControlRecommender(self.property)
+ controls_recommender.recommend(heating_description="Air source heat pump, radiators, electric")
+
+ ashp_costs = self.costs.air_source_heat_pump()
+ # We add the costs of the heating controls, onto each key in the costs dictionary
+ if controls_recommender.recommendation:
+ for key in ashp_costs:
+ ashp_costs[key] += controls_recommender.recommendation[0][key]
+
+ already_installed = "air_source_heat_pump" in self.property.already_installed
+ if already_installed:
+ ashp_costs = override_costs(ashp_costs)
+ description = "The property already has an air source heat pump, no further action needed."
+ else:
+ if controls_recommender.recommendation:
+ description = ("Install an air source heat pump, and upgrade heating controls to Smart Thermostats, "
+ "room sensors and smart radiator valves (time & temperature zone control).")
+ else:
+ description = "Install an air source heat pump."
+
+ # If the property does not have existing cavity and loft insulation, we include a note that the cost
+ # includes the boiler upgrade scheme and that the cavity and loft need to be treated, to ensure access
+ # to the funding
+ if has_cavity_or_loft_recommendations:
+ description = description + (f" The cost includes the £"
+ f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant. "
+ f"You must ensure that the property has an insulated cavity and "
+ f"270mm+ loft insulation to qualify for the grant")
+ else:
+ description = description + (f" The cost includes the £"
+ f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant")
+
+ simulation_config = {
+ "mainheat_energy_eff_ending": "Good",
+ "hot_water_energy_eff_ending": "Good"
+ }
+ # Installation of a boiler improves the hot water system so we need to reflect this in
+ # the outcome of the recommendation
+ heating_ending_config = MainHeatAttributes("Air source heat pump, radiators, electric").process()
+ hotwater_ending_config = HotWaterAttributes("From main system").process()
+
+ # If the property does not currently have electric main fuel, we'll simulate the change
+ fuel_ending_config = {}
+ if self.property.main_fuel["fuel_type"] != "electricity":
+ fuel_ending_config = MainFuelAttributes("electricity (not community)").process()
+
+ # Check the simulation differences
+ heating_simulation_config = check_simulation_difference(
+ new_config=heating_ending_config, old_config=self.property.main_heating
+ )
+ hotwater_simulation_config = check_simulation_difference(
+ new_config=hotwater_ending_config, old_config=self.property.hotwater
+ )
+ fuel_simulation_config = check_simulation_difference(
+ new_config=fuel_ending_config, old_config=self.property.main_fuel
+ )
+
+ simulation_config = {
+ **simulation_config,
+ **heating_simulation_config,
+ **hotwater_simulation_config,
+ **fuel_simulation_config,
+ }
+
+ if controls_recommender.recommendation:
+ # We should have just the single recommendation for heat controls, which is time
+ # and temperature zone controls
+ if len(controls_recommender.recommendation) != 1:
+ raise NotImplementedError("More than one heat controls recommendation for air source heat pump")
+ simulation_config = {
+ **simulation_config,
+ **controls_recommender.recommendation[0]["simulation_config"]
+ }
+
+ ashp_recommendation = {
+ "phase": phase,
+ "parts": [
+ # TODO
+ ],
+ "type": "heating",
+ "description": description,
+ "starting_u_value": None,
+ "new_u_value": None,
+ "sap_points": None,
+ "already_installed": already_installed,
+ "simulation_config": simulation_config,
+ **ashp_costs
+ }
+
+ self.heating_recommendations.append(ashp_recommendation)
+
@staticmethod
def check_simulation_difference(old_config, new_config):
"""
@@ -146,7 +265,7 @@ class HeatingRecommender:
recommendation_description = f"{description} and {controls_description}"
- already_installed = "cavity_wall_insulation" in self.property.already_installed
+ already_installed = "heating_controls" in self.property.already_installed
if already_installed:
total_costs = override_costs(total_costs)
recommendation_description = "Heating system has already been upgraded, no further action needed."
@@ -359,7 +478,6 @@ class HeatingRecommender:
**heating_simulation_config,
**hotwater_simulation_config,
**fuel_simulation_config,
- "hot_water_energy_eff_ending": "Good"
}
boiler_costs = self.costs.boiler(
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 06dc2d61..c8113cdc 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -109,16 +109,42 @@ class Recommendations:
# Heating and Electical systems
if "heating" not in self.exclusions:
- self.heating_recommender.recommend(phase=phase)
+
+ cavity_or_loft_recommendations = [
+ r for r in self.wall_recomender.recommendations + self.roof_recommender.recommendations
+ if r["type"] in ["cavity_wall_insulation", "loft_insulation"]
+ ]
+ has_cavity_or_loft_recommendations = len(cavity_or_loft_recommendations) > 0
+
+ self.heating_recommender.recommend(
+ phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations
+ )
if (
self.heating_recommender.heating_recommendations or
self.heating_recommender.heating_control_recommendations
):
- if self.heating_recommender.heating_recommendations:
- property_recommendations.append(self.heating_recommender.heating_recommendations)
- if self.heating_recommender.heating_control_recommendations:
- property_recommendations.append(self.heating_recommender.heating_control_recommendations)
+ # We split into first and second phase recommendations
+ first_phase_recommendations = [
+ r for r in (
+ self.heating_recommender.heating_recommendations +
+ self.heating_recommender.heating_control_recommendations
+ )
+ if r["phase"] == phase
+ ]
+ second_phase_recommendations = [
+ r for r in (
+ self.heating_recommender.heating_recommendations +
+ self.heating_recommender.heating_control_recommendations
+ )
+ if r["phase"] == phase + 1
+ ]
+
+ if first_phase_recommendations:
+ property_recommendations.append(first_phase_recommendations)
+
+ if second_phase_recommendations:
+ property_recommendations.append(second_phase_recommendations)
# We check if we have distinct heating and heating controls recommendations
# If so, we increment by 2 (one of the heating system, one for the heating controls)
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index b44557ab..58d4b123 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -44,7 +44,7 @@ class SolarPvRecommendations:
:return:
"""
- is_valid_property_type = self.property.data["property-type"] in ["House", "Bungalow"]
+ is_valid_property_type = self.property.data["property-type"] in ["House", "Bungalow", "Maisonette"]
is_valid_roof_type = (
self.property.roof["is_flat"] or self.property.roof["is_pitched"] or self.property.roof["is_roof_room"]
)
diff --git a/recommendations/tests/test_air_source_heat_pump.py b/recommendations/tests/test_air_source_heat_pump.py
new file mode 100644
index 00000000..0d69b10d
--- /dev/null
+++ b/recommendations/tests/test_air_source_heat_pump.py
@@ -0,0 +1,944 @@
+import pandas as pd
+import msgpack
+from datetime import datetime
+
+from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3
+from backend.Property import Property
+from recommendations.HeatingRecommender import HeatingRecommender
+from recommendations.Recommendations import Recommendations
+from etl.epc.Record import EPCRecord
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+from backend.ml_models.api import ModelApi
+
+
+def find_examples():
+ """ Some scrappy helper code to find EPC examples"""
+ # Let's look for some testing data, where the only thing different pre and post is the installation of an
+ # air source heat pump
+ data = read_dataframe_from_s3_parquet(
+ bucket_name="retrofit-data-dev",
+ file_key="sap_change_model/2024-03-24-15-51-13/dataset_no_cleaning.parquet"
+ )
+
+ # Firstly, take records where before there was no air source heat pump and afterwards there was
+ data = data[
+ data["has_air_source_heat_pump_ending"] & ~data["has_air_source_heat_pump"]
+ ]
+
+ # Start with a property that has a boiler
+ data = data[data["has_boiler"]]
+
+ static_columns = [
+ # Walls
+ 'walls_thermal_transmittance_ending',
+ 'is_filled_cavity_ending',
+ 'is_park_home_ending',
+ 'walls_insulation_thickness_ending',
+ 'external_insulation_ending',
+ 'internal_insulation_ending',
+ # Floors
+ # 'floor_thermal_transmittance_ending', # Don't subset on this, because it changes based on floor area
+ 'floor_insulation_thickness_ending',
+ # Roof
+ 'roof_thermal_transmittance_ending',
+ 'is_at_rafters_ending',
+ 'roof_insulation_thickness_ending',
+ # Hot water - air source heat pump will shange the hot water system (probably from whatever it was -> main)
+ # 'heater_type_ending',
+ # 'system_type_ending',
+ # 'thermostat_characteristics_ending',
+ # 'heating_scope_ending',
+ # 'energy_recovery_ending',
+ # 'hotwater_tariff_type_ending',
+ # 'extra_features_ending',
+ # 'chp_systems_ending',
+ # 'distribution_system_ending',
+ # 'no_system_present_ending',
+ # 'appliance_ending',
+ # Heating - Will change when installing an ASHP
+ # 'has_radiators_ending',
+ # 'has_fan_coil_units_ending',
+ # 'has_pipes_in_screed_above_insulation_ending',
+ # 'has_pipes_in_insulated_timber_floor_ending',
+ # 'has_pipes_in_concrete_slab_ending',
+ # 'has_boiler_ending',
+ # 'has_air_source_heat_pump_ending', # We want the air source heat pump to change
+ # 'has_room_heaters_ending',
+ # 'has_electric_storage_heaters_ending',
+ # 'has_warm_air_ending',
+ # 'has_electric_underfloor_heating_ending',
+ # 'has_electric_ceiling_heating_ending',
+ # 'has_community_scheme_ending',
+ # 'has_ground_source_heat_pump_ending',
+ # 'has_no_system_present_ending',
+ # 'has_portable_electric_heaters_ending',
+ # 'has_water_source_heat_pump_ending',
+ # 'has_electric_heat_pump_ending',
+ # 'has_micro-cogeneration_ending',
+ # 'has_solar_assisted_heat_pump_ending',
+ # 'has_exhaust_source_heat_pump_ending',
+ # 'has_community_heat_pump_ending',
+ # 'has_electric_ending',
+ # 'has_mains_gas_ending',
+ # 'has_wood_logs_ending', 'has_coal_ending', 'has_oil_ending',
+ # 'has_wood_pellets_ending', 'has_anthracite_ending', 'has_dual_fuel_mineral_and_wood_ending',
+ # 'has_smokeless_fuel_ending', 'has_lpg_ending', 'has_b30k_ending', 'has_electricaire_ending',
+ # 'has_assumed_for_most_rooms_ending', 'has_underfloor_heating_ending',
+ # 'thermostatic_control_ending',
+ # 'charging_system_ending',
+ # 'switch_system_ending',
+ # 'no_control_ending',
+ # 'dhw_control_ending',
+ # 'community_heating_ending',
+ # 'multiple_room_thermostats_ending',
+ # 'auxiliary_systems_ending',
+ # 'trvs_ending',
+ # 'rate_control_ending',
+ # Window
+ 'glazing_type_ending',
+ # Fuel - could change with ASHP
+ # 'fuel_type_ending',
+ # 'main-fuel_tariff_type_ending',
+ # 'is_community_ending',
+ # 'no_individual_heating_or_community_network_ending',
+ # 'complex_fuel_type_ending',
+
+ 'mechanical_ventilation_ending', 'secondheat_description_ending', 'glazed_type_ending',
+ 'multi_glaze_proportion_ending', 'low_energy_lighting_ending', 'number_open_fireplaces_ending',
+ 'solar_water_heating_flag_ending',
+ 'photo_supply_ending',
+ 'energy_tariff_ending',
+ 'extension_count_ending',
+ 'total_floor_area_ending',
+ # 'hot_water_energy_eff_ending',
+ 'floor_energy_eff_ending',
+ 'windows_energy_eff_ending',
+ 'walls_energy_eff_ending',
+ 'sheating_energy_eff_ending',
+ 'roof_energy_eff_ending',
+ # 'mainheat_energy_eff_ending',
+ # 'mainheatc_energy_eff_ending',
+ 'lighting_energy_eff_ending',
+ 'number_habitable_rooms_ending',
+ 'number_heated_rooms_ending',
+ ]
+
+ for col in static_columns:
+
+ base_starting = col.split("_ending")[0]
+ if base_starting + "_starting" in data.columns:
+ starting_col = base_starting + "_starting"
+ else:
+ starting_col = base_starting
+ # Filter
+ print("Column: %s" % col)
+ print("Starting size: %s" % data.shape[0])
+ data = data[data[starting_col] == data[col]]
+ print("Ending size: %s" % data.shape[0])
+
+ z = data[['uprn', col, starting_col]]
+
+ # Great example UPRNs
+ # 100030969273
+ # 10034685399 - Completely transforms the heating and hot water systems in the home (goes from oil -> electricity)
+ # 100091200828 - goes from a liquid petroleum gas boiler to ashp
+
+ # Look for starting with a gas boiler
+ data[
+ data["has_boiler"] & data["has_radiators"] & data["has_mains_gas"] & ~data["has_boiler_ending"]
+ ]
+
+ # UPRN: 100011776843
+
+
+class TestAirSourceHeatPump:
+
+ def test_eligible(self):
+ # This tests a house, which will be suitable for an air source heat pump
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {
+ "county": "Broxbourne",
+ "mainheat-energy-eff": "Good",
+ "hot-water-energy-eff": "Good",
+ "mainheatc-energy-eff": "Good",
+ "number-heated-rooms": 5,
+ "property-type": "House",
+ "built-form": "Semi-Detached"
+ }
+
+ property_instance = Property(id=0, address="fake", postcode="fake", epc_record=epc_record)
+ property_instance.main_heating = {
+ 'original_description': 'Boiler and radiators, mains gas',
+ "clean_description": "Boiler and radiators, mains gas",
+ 'has_radiators': True,
+ 'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False,
+ 'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': True,
+ 'has_air_source_heat_pump': False,
+ 'has_room_heaters': False, 'has_electric_storage_heaters': False,
+ 'has_warm_air': False,
+ 'has_electric_underfloor_heating': False,
+ 'has_electric_ceiling_heating': False, 'has_community_scheme': False,
+ 'has_ground_source_heat_pump': False, 'has_no_system_present': False,
+ 'has_portable_electric_heaters': False,
+ 'has_water_source_heat_pump': False, 'has_electric': False,
+ 'has_mains_gas': True, 'has_wood_logs': False,
+ 'has_coal': False, 'has_oil': False, 'has_wood_pellets': False,
+ 'has_anthracite': False,
+ 'has_dual_fuel_mineral_and_wood': False, 'has_smokeless_fuel': False,
+ 'has_lpg': False, 'has_assumed': False,
+ 'has_electricaire': False, 'has_assumed_for_most_rooms': False,
+ 'has_underfloor_heating': False,
+ "has_electric_heat_pumps": False,
+ "has_micro-cogeneration": False
+ }
+ property_instance.main_fuel = {
+ 'original_description': 'mains gas (not community)', 'fuel_type': 'mains gas',
+ 'tariff_type': None,
+ 'is_community': False, 'no_individual_heating_or_community_network': False,
+ 'complex_fuel_type': None
+ }
+ property_instance.hotwater = {
+ 'original_description': 'From main system',
+ 'clean_description': 'From main system',
+ 'heater_type': None,
+ 'system_type': 'from main system',
+ 'thermostat_characteristics': None, 'heating_scope': None,
+ 'energy_recovery': None, 'tariff_type': None,
+ 'extra_features': None, 'chp_systems': None, 'distribution_system': None,
+ 'no_system_present': None,
+ 'assumed': False, "appliance": None
+ }
+ property_instance.main_heating_controls = {
+ 'original_description': 'Programmer, room thermostat and TRVs',
+ 'thermostatic_control': 'room thermostat', 'charging_system': None, 'switch_system': 'programmer',
+ 'no_control': None, 'dhw_control': None, 'community_heating': None, 'multiple_room_thermostats': False,
+ 'auxiliary_systems': None, 'trvs': 'trvs', 'rate_control': None
+
+ }
+
+ recommender = HeatingRecommender(property_instance=property_instance)
+
+ assert not recommender.heating_recommendations
+
+ recommender.recommend(phase=0)
+
+ assert recommender.recommendation is None
+
+ def test_air_source_heat_pump_gas_boiler_starting(self):
+ starting_epc = {
+ 'low-energy-fixed-light-count': '', 'address': '430 Gidlow Lane', 'uprn-source': 'Energy Assessor',
+ 'floor-height': '2.62', 'heating-cost-potential': '599', 'unheated-corridor-length': '',
+ 'hot-water-cost-potential': '67', 'construction-age-band': 'England and Wales: 1950-1966',
+ 'potential-energy-rating': 'C', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Good',
+ 'lighting-energy-eff': 'Very Good', 'environment-impact-potential': '72',
+ 'glazed-type': 'double glazing installed during or after 2002', 'heating-cost-current': '913',
+ 'address3': '', 'mainheatcont-description': 'Programmer, no room thermostat', 'sheating-energy-eff': 'N/A',
+ 'property-type': 'House', 'local-authority-label': 'Wigan', 'fixed-lighting-outlets-count': '9',
+ 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '210',
+ 'county': '', 'postcode': 'WN6 8RG', 'solar-water-heating-flag': 'N', 'constituency': 'E14001039',
+ 'co2-emissions-potential': '2.6', 'number-heated-rooms': '4',
+ 'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '180',
+ 'local-authority': 'E08000010', 'built-form': 'Mid-Terrace', 'number-open-fireplaces': '0',
+ 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2022-02-15',
+ 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '78', 'address1': '430 Gidlow Lane',
+ 'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Wigan',
+ 'roof-energy-eff': 'Very Poor', 'total-floor-area': '80.0', 'building-reference-number': '10002334112',
+ 'environment-impact-current': '38', 'co2-emissions-current': '6.2',
+ 'roof-description': 'Pitched, no insulation (assumed)', 'floor-energy-eff': 'N/A',
+ 'number-habitable-rooms': '4', 'address2': '', 'hot-water-env-eff': 'Poor', 'posttown': 'WIGAN',
+ 'mainheatc-energy-eff': 'Very Poor', 'main-fuel': 'mains gas (not community)',
+ 'lighting-env-eff': 'Very Good', 'windows-energy-eff': 'Good', 'floor-env-eff': 'N/A',
+ 'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in all fixed outlets',
+ 'roof-env-eff': 'Very Poor', 'walls-energy-eff': 'Average', 'photo-supply': '0.0',
+ 'lighting-cost-potential': '67', 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100',
+ 'main-heating-controls': '', 'lodgement-datetime': '2022-02-23 16:39:41', 'flat-top-storey': '',
+ 'current-energy-rating': 'E', 'secondheat-description': 'Room heaters, mains gas',
+ 'walls-env-eff': 'Average', 'transaction-type': 'ECO assessment', 'uprn': '100011776843',
+ 'current-energy-efficiency': '45', 'energy-consumption-current': '441',
+ 'mainheat-description': 'Boiler and radiators, mains gas', 'lighting-cost-current': '67',
+ 'lodgement-date': '2022-02-23', 'extension-count': '1', 'mainheatc-env-eff': 'Very Poor',
+ 'lmk-key': '46cb404438a6d88ddff8965cab8b3027ec15c32d93e0b6a5f0381a5109b9bb0d', 'wind-turbine-count': '0',
+ 'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '77',
+ 'hot-water-energy-eff': 'Poor', 'low-energy-lighting': '100',
+ 'walls-description': 'Cavity wall, filled cavity',
+ 'hotwater-description': 'From main system, no cylinder thermostat'
+ }
+
+ ending_epc = {
+ 'low-energy-fixed-light-count': '', 'address': '430 Gidlow Lane', 'uprn-source': 'Energy Assessor',
+ 'floor-height': '2.62', 'heating-cost-potential': '803', 'unheated-corridor-length': '',
+ 'hot-water-cost-potential': '292', 'construction-age-band': 'England and Wales: 1950-1966',
+ 'potential-energy-rating': 'C', 'mainheat-energy-eff': 'Very Good', 'windows-env-eff': 'Good',
+ 'lighting-energy-eff': 'Very Good', 'environment-impact-potential': '78',
+ 'glazed-type': 'double glazing installed during or after 2002', 'heating-cost-current': '861',
+ 'address3': '', 'mainheatcont-description': 'Time and temperature zone control',
+ 'sheating-energy-eff': 'N/A', 'property-type': 'House', 'local-authority-label': 'Wigan',
+ 'fixed-lighting-outlets-count': '9', 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural',
+ 'hot-water-cost-current': '434', 'county': '', 'postcode': 'WN6 8RG', 'solar-water-heating-flag': 'N',
+ 'constituency': 'E14001039', 'co2-emissions-potential': '2.0', 'number-heated-rooms': '4',
+ 'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '147',
+ 'local-authority': 'E08000010', 'built-form': 'Mid-Terrace', 'number-open-fireplaces': '0',
+ 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2022-05-11',
+ 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '43', 'address1': '430 Gidlow Lane',
+ 'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Wigan',
+ 'roof-energy-eff': 'Very Poor', 'total-floor-area': '80.0', 'building-reference-number': '10002334112',
+ 'environment-impact-current': '63', 'co2-emissions-current': '3.4',
+ 'roof-description': 'Pitched, no insulation (assumed)', 'floor-energy-eff': 'N/A',
+ 'number-habitable-rooms': '4', 'address2': '', 'hot-water-env-eff': 'Poor', 'posttown': 'WIGAN',
+ 'mainheatc-energy-eff': 'Very Good', 'main-fuel': 'electricity (not community)',
+ 'lighting-env-eff': 'Very Good', 'windows-energy-eff': 'Good', 'floor-env-eff': 'N/A',
+ 'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in all fixed outlets',
+ 'roof-env-eff': 'Very Poor', 'walls-energy-eff': 'Average', 'photo-supply': '0.0',
+ 'lighting-cost-potential': '67', 'mainheat-env-eff': 'Very Good', 'multi-glaze-proportion': '100',
+ 'main-heating-controls': '', 'lodgement-datetime': '2022-06-06 13:01:20', 'flat-top-storey': '',
+ 'current-energy-rating': 'E', 'secondheat-description': 'Room heaters, mains gas',
+ 'walls-env-eff': 'Average', 'transaction-type': 'ECO assessment', 'uprn': '100011776843',
+ 'current-energy-efficiency': '53', 'energy-consumption-current': '252',
+ 'mainheat-description': 'Air source heat pump, radiators, electric', 'lighting-cost-current': '67',
+ 'lodgement-date': '2022-06-06', 'extension-count': '1', 'mainheatc-env-eff': 'Very Good',
+ 'lmk-key': '672d5947f3d4a55d97255af71651d6127a939418fa66a687070af77e0ba90df2', 'wind-turbine-count': '0',
+ 'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '70',
+ 'hot-water-energy-eff': 'Very Poor', 'low-energy-lighting': '100',
+ 'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system'
+ }
+
+ # differences = []
+ # for k, v in ending_epc.items():
+ # if v != starting_epc[k]:
+ # differences.append(
+ # {
+ # "variable": k,
+ # "starting_value": starting_epc[k],
+ # "ending_value": v
+ # }
+ # )
+ # differences = pd.DataFrame(differences)
+ #
+ # diffs = differences[
+ # differences["variable"].isin(
+ # [
+ # "mainheat-energy-eff",
+ # "mainheatcont-description",
+ # "mainheatc-energy-eff",
+ # "main-fuel",
+ # "mainheat-env-eff",
+ # "mainheat-description",
+ # "hot-water-energy-eff",
+ # "hotwater-description"
+ # ]
+ # )
+ # ]
+
+ cleaning_data = read_dataframe_from_s3_parquet(
+ bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+ )
+
+ cleaned = read_from_s3(
+ s3_file_name="cleaned_epc_data/cleaned.bson",
+ bucket_name="retrofit-data-dev"
+ )
+ cleaned = msgpack.unpackb(cleaned, raw=False)
+
+ photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+ epc = EPCRecord(
+ epc_records={
+ 'original_epc': starting_epc,
+ 'full_sap_epc': {},
+ 'old_data': []
+ },
+ run_mode="newdata",
+ cleaning_data=cleaning_data
+ )
+
+ home = Property(
+ id=0,
+ address="",
+ postcode="",
+ epc_record=epc,
+ already_installed={},
+ non_invasive_recommendations={},
+ )
+ home.in_conservation_area = False
+ home.is_listed = False
+ home.is_heritage = False
+ home.restricted_measures = True
+ home.get_components(
+ cleaned=cleaned,
+ photo_supply_lookup=photo_supply_lookup,
+ floor_area_decile_thresholds=floor_area_decile_thresholds
+ )
+
+ recommender = HeatingRecommender(property_instance=home)
+ recommender.recommend_air_source_heat_pump(phase=0, has_cavity_or_loft_recommendations=False)
+
+ # Patch - for this property, the hot water energy efficiency is very poor. it's not clear why this is,
+ # but we insert this for this test
+ recommender.heating_recommendations[0]["simulation_config"]["hot_water_energy_eff_ending"] = "Very Poor"
+
+ property_recommendations = Recommendations.insert_temp_recommendation_id([recommender.heating_recommendations])
+
+ assert len(recommender.heating_recommendations) == 1
+
+ home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+ home.adjust_difference_record_with_recommendations(
+ property_recommendations, []
+ )
+
+ scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop(
+ columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+ "carbon_ending"]
+ )
+
+ model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+ model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+ predictions_dict = model_api.predict_all(
+ df=scoring_data,
+ bucket="retrofit-data-dev",
+ prediction_buckets={
+ "sap_change_predictions": "retrofit-sap-predictions-dev",
+ }
+ )
+ assert predictions_dict["sap_change_predictions"]["predictions"].values[0] == 52.2
+
+ def test_air_source_heat_pump_gas_boiler_starting_2(self):
+ """
+ This property seems to have miniscule movement in SAP - just 2 poins
+ :return:
+ """
+
+ starting_epc = {
+ 'low-energy-fixed-light-count': '', 'address': '31 Whinney Hill Park', 'uprn-source': 'Energy Assessor',
+ 'floor-height': '2.3', 'heating-cost-potential': '394', 'unheated-corridor-length': '',
+ 'hot-water-cost-potential': '48', 'construction-age-band': 'England and Wales: 1967-1975',
+ 'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average',
+ 'lighting-energy-eff': 'Good', 'environment-impact-potential': '87',
+ 'glazed-type': 'double glazing, unknown install date', 'heating-cost-current': '487', 'address3': '',
+ 'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'sheating-energy-eff': 'N/A',
+ 'property-type': 'Bungalow', 'local-authority-label': 'Calderdale', 'fixed-lighting-outlets-count': '5',
+ 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '86',
+ 'county': '', 'postcode': 'HD6 2PX', 'solar-water-heating-flag': 'N', 'constituency': 'E14000614',
+ 'co2-emissions-potential': '0.8', 'number-heated-rooms': '2',
+ 'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '105',
+ 'local-authority': 'E08000033', 'built-form': 'End-Terrace', 'number-open-fireplaces': '0',
+ 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2021-11-25',
+ 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '56', 'address1': '31 Whinney Hill Park',
+ 'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Calder Valley',
+ 'roof-energy-eff': 'Good', 'total-floor-area': '44.0', 'building-reference-number': '10001772583',
+ 'environment-impact-current': '62', 'co2-emissions-current': '2.5',
+ 'roof-description': 'Pitched, 250 mm loft insulation', 'floor-energy-eff': 'N/A',
+ 'number-habitable-rooms': '2', 'address2': '', 'hot-water-env-eff': 'Good', 'posttown': 'BRIGHOUSE',
+ 'mainheatc-energy-eff': 'Good', 'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Good',
+ 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+ 'lighting-description': 'Low energy lighting in 60% of fixed outlets', 'roof-env-eff': 'Good',
+ 'walls-energy-eff': 'Average', 'photo-supply': '0.0', 'lighting-cost-potential': '40',
+ 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+ 'lodgement-datetime': '2021-11-25 11:39:35', 'flat-top-storey': '', 'current-energy-rating': 'D',
+ 'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average',
+ 'transaction-type': 'rental', 'uprn': '100051304421', 'current-energy-efficiency': '62',
+ 'energy-consumption-current': '322', 'mainheat-description': 'Boiler and radiators, mains gas',
+ 'lighting-cost-current': '56', 'lodgement-date': '2021-11-25', 'extension-count': '0',
+ 'mainheatc-env-eff': 'Good', 'lmk-key': '077f70657e9c3f1f0ce5392798398398616b159493b2a8ca2338961596631c27',
+ 'wind-turbine-count': '0', 'tenure': 'Rented (social)', 'floor-level': '',
+ 'potential-energy-efficiency': '86', 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '60',
+ 'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system'
+ }
+
+ ending_epc = {
+ 'low-energy-fixed-light-count': '', 'address': '31 Whinney Hill Park',
+ 'uprn-source': 'Energy Assessor', 'floor-height': '2.3', 'heating-cost-potential': '277',
+ 'unheated-corridor-length': '', 'hot-water-cost-potential': '266',
+ 'construction-age-band': 'England and Wales: 1967-1975', 'potential-energy-rating': 'B',
+ 'mainheat-energy-eff': 'Very Good', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Good',
+ 'environment-impact-potential': '90', 'glazed-type': 'double glazing, unknown install date',
+ 'heating-cost-current': '331', 'address3': '',
+ 'mainheatcont-description': 'Programmer and room thermostat', 'sheating-energy-eff': 'N/A',
+ 'property-type': 'Bungalow', 'local-authority-label': 'Calderdale',
+ 'fixed-lighting-outlets-count': '5', 'energy-tariff': 'Single',
+ 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '404', 'county': '',
+ 'postcode': 'HD6 2PX', 'solar-water-heating-flag': 'N', 'constituency': 'E14000614',
+ 'co2-emissions-potential': '0.7', 'number-heated-rooms': '2',
+ 'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '92',
+ 'local-authority': 'E08000033', 'built-form': 'End-Terrace', 'number-open-fireplaces': '0',
+ 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal',
+ 'inspection-date': '2021-11-25', 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '48',
+ 'address1': '31 Whinney Hill Park', 'heat-loss-corridor': '', 'flat-storey-count': '',
+ 'constituency-label': 'Calder Valley', 'roof-energy-eff': 'Good', 'total-floor-area': '44.0',
+ 'building-reference-number': '10001772583', 'environment-impact-current': '68',
+ 'co2-emissions-current': '2.1', 'roof-description': 'Pitched, 250 mm loft insulation',
+ 'floor-energy-eff': 'N/A', 'number-habitable-rooms': '2', 'address2': '',
+ 'hot-water-env-eff': 'Poor', 'posttown': 'BRIGHOUSE', 'mainheatc-energy-eff': 'Average',
+ 'main-fuel': 'electricity (not community)', 'lighting-env-eff': 'Good',
+ 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+ 'lighting-description': 'Low energy lighting in 60% of fixed outlets', 'roof-env-eff': 'Good',
+ 'walls-energy-eff': 'Average', 'photo-supply': '0.0', 'lighting-cost-potential': '40',
+ 'mainheat-env-eff': 'Very Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+ 'lodgement-datetime': '2022-03-23 16:06:21', 'flat-top-storey': '', 'current-energy-rating': 'D',
+ 'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average',
+ 'transaction-type': 'rental', 'uprn': '100051304421', 'current-energy-efficiency': '64',
+ 'energy-consumption-current': '283',
+ 'mainheat-description': 'Air source heat pump, radiators, electric',
+ 'lighting-cost-current': '57', 'lodgement-date': '2022-03-23', 'extension-count': '0',
+ 'mainheatc-env-eff': 'Average',
+ 'lmk-key': '6296248141447b53426a40f1c39da17dad5f4786485db55ee38737891111a4d4',
+ 'wind-turbine-count': '0', 'tenure': 'Rented (social)', 'floor-level': '',
+ 'potential-energy-efficiency': '89', 'hot-water-energy-eff': 'Very Poor',
+ 'low-energy-lighting': '60', 'walls-description': 'Cavity wall, filled cavity',
+ 'hotwater-description': 'From main system'
+ }
+
+ # differences = []
+ # for k, v in ending_epc.items():
+ # if v != starting_epc[k]:
+ # differences.append(
+ # {
+ # "variable": k,
+ # "starting_value": starting_epc[k],
+ # "ending_value": v
+ # }
+ # )
+ # differences = pd.DataFrame(differences)
+ #
+ # diffs = differences[
+ # differences["variable"].isin(
+ # [
+ # "mainheat-energy-eff",
+ # "mainheatcont-description",
+ # "mainheatc-energy-eff",
+ # "main-fuel",
+ # "mainheat-env-eff",
+ # "mainheat-description",
+ # "hot-water-energy-eff",
+ # "hotwater-description"
+ # ]
+ # )
+ # ]
+
+ cleaning_data = read_dataframe_from_s3_parquet(
+ bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+ )
+
+ cleaned = read_from_s3(
+ s3_file_name="cleaned_epc_data/cleaned.bson",
+ bucket_name="retrofit-data-dev"
+ )
+ cleaned = msgpack.unpackb(cleaned, raw=False)
+
+ photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+ epc = EPCRecord(
+ epc_records={
+ 'original_epc': starting_epc,
+ 'full_sap_epc': {},
+ 'old_data': []
+ },
+ run_mode="newdata",
+ cleaning_data=cleaning_data
+ )
+
+ home = Property(
+ id=0,
+ address="",
+ postcode="",
+ epc_record=epc,
+ already_installed={},
+ non_invasive_recommendations={},
+ )
+ home.in_conservation_area = False
+ home.is_listed = False
+ home.is_heritage = False
+ home.restricted_measures = True
+ home.get_components(
+ cleaned=cleaned,
+ photo_supply_lookup=photo_supply_lookup,
+ floor_area_decile_thresholds=floor_area_decile_thresholds
+ )
+
+ recommender = HeatingRecommender(property_instance=home)
+ recommender.recommend_air_source_heat_pump(phase=0, has_cavity_or_loft_recommendations=False)
+ property_recommendations = Recommendations.insert_temp_recommendation_id([recommender.heating_recommendations])
+
+ assert len(recommender.heating_recommendations) == 1
+
+ home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+ home.adjust_difference_record_with_recommendations(
+ property_recommendations, []
+ )
+
+ scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop(
+ columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+ "carbon_ending"]
+ )
+
+ model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+ model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+ predictions_dict = model_api.predict_all(
+ df=scoring_data,
+ bucket="retrofit-data-dev",
+ prediction_buckets={
+ "sap_change_predictions": "retrofit-sap-predictions-dev",
+ }
+ )
+ assert predictions_dict["sap_change_predictions"]["predictions"].values[0] == 69.3
+
+ # In actuality with this property, the heating controls get downgraded, so we test a manual patch of this
+ patched_simulation_config = {
+ 'mainheat_energy_eff_ending': "Very Good",
+ 'hot_water_energy_eff_ending': 'Very Poor',
+ 'has_boiler_ending': False,
+ 'has_air_source_heat_pump_ending': True,
+ 'has_electric_ending': True,
+ 'has_mains_gas_ending': False,
+ 'fuel_type_ending': 'electricity',
+ 'trvs_ending': None,
+ "mainheatc_energy_eff_ending": 'Average'
+ }
+
+ # PATCHING
+ property_recommendations_patch = Recommendations.insert_temp_recommendation_id(
+ [recommender.heating_recommendations]
+ )
+ property_recommendations_patch[0][0]["simulation_config"] = patched_simulation_config
+
+ home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+ home.adjust_difference_record_with_recommendations(
+ property_recommendations_patch, []
+ )
+
+ scoring_data_patch = pd.DataFrame(home.recommendations_scoring_data).drop(
+ columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+ "carbon_ending"]
+ )
+
+ model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+ model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+ predictions_dict_patch = model_api.predict_all(
+ df=scoring_data_patch,
+ bucket="retrofit-data-dev",
+ prediction_buckets={
+ "sap_change_predictions": "retrofit-sap-predictions-dev",
+ }
+ )
+ # The error is only 0.3, so the model is working
+ assert predictions_dict_patch["sap_change_predictions"]["predictions"].values[0] == 64.3
+ assert ending_epc["current-energy-efficiency"] == '64'
+
+ def test_air_source_heat_pump_lpg_boiler(self):
+ starting_epc = {
+ 'low-energy-fixed-light-count': '', 'address': 'Holly Lodge, The Drive, Perry',
+ 'uprn-source': 'Energy Assessor', 'floor-height': '2.8', 'heating-cost-potential': '1628',
+ 'unheated-corridor-length': '', 'hot-water-cost-potential': '175',
+ 'construction-age-band': 'England and Wales: 1950-1966', 'potential-energy-rating': 'D',
+ 'mainheat-energy-eff': 'Poor', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Average',
+ 'environment-impact-potential': '70', 'glazed-type': 'double glazing, unknown install date',
+ 'heating-cost-current': '2158', 'address3': 'Perry',
+ 'mainheatcont-description': 'No time or thermostatic control of room temperature',
+ 'sheating-energy-eff': 'N/A', 'property-type': 'Bungalow', 'local-authority-label': 'Huntingdonshire',
+ 'fixed-lighting-outlets-count': '12', 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural',
+ 'hot-water-cost-current': '257', 'county': 'Cambridgeshire', 'postcode': 'PE28 0SX',
+ 'solar-water-heating-flag': 'N', 'constituency': 'E14000757', 'co2-emissions-potential': '3.3',
+ 'number-heated-rooms': '5', 'floor-description': 'Solid, no insulation (assumed)',
+ 'energy-consumption-potential': '128', 'local-authority': 'E07000011', 'built-form': 'Semi-Detached',
+ 'number-open-fireplaces': '0', 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal',
+ 'inspection-date': '2023-08-31', 'mains-gas-flag': 'N', 'co2-emiss-curr-per-floor-area': '51',
+ 'address1': 'Holly Lodge', 'heat-loss-corridor': '', 'flat-storey-count': '',
+ 'constituency-label': 'Huntingdon', 'roof-energy-eff': 'Good', 'total-floor-area': '117.0',
+ 'building-reference-number': '10005199915', 'environment-impact-current': '50',
+ 'co2-emissions-current': '5.9', 'roof-description': 'Pitched, 270 mm loft insulation',
+ 'floor-energy-eff': 'N/A', 'number-habitable-rooms': '5', 'address2': 'The Drive',
+ 'hot-water-env-eff': 'Good', 'posttown': 'HUNTINGDON', 'mainheatc-energy-eff': 'Very Poor',
+ 'main-fuel': 'LPG (not community)', 'lighting-env-eff': 'Average', 'windows-energy-eff': 'Average',
+ 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+ 'lighting-description': 'Low energy lighting in 33% of fixed outlets', 'roof-env-eff': 'Good',
+ 'walls-energy-eff': 'Average', 'photo-supply': '0.0', 'lighting-cost-potential': '166',
+ 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+ 'lodgement-datetime': '2023-10-30 13:46:54', 'flat-top-storey': '', 'current-energy-rating': 'F',
+ 'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average',
+ 'transaction-type': 'ECO assessment', 'uprn': '100091200828', 'current-energy-efficiency': '32',
+ 'energy-consumption-current': '243', 'mainheat-description': 'Boiler and radiators, LPG',
+ 'lighting-cost-current': '277', 'lodgement-date': '2023-10-30', 'extension-count': '0',
+ 'mainheatc-env-eff': 'Very Poor',
+ 'lmk-key': 'f1d3bd4b8b50bc9b006231ccb158537c408523b748b3f4ef7e98cd03b144afa5', 'wind-turbine-count': '0',
+ 'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '56',
+ 'hot-water-energy-eff': 'Poor', 'low-energy-lighting': '33',
+ 'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system'
+ }
+
+ ending_epc = {
+ 'low-energy-fixed-light-count': '', 'address': 'Holly Lodge, The Drive, Perry',
+ 'uprn-source': 'Energy Assessor', 'floor-height': '2.8', 'heating-cost-potential': '917',
+ 'unheated-corridor-length': '', 'hot-water-cost-potential': '328',
+ 'construction-age-band': 'England and Wales: 1950-1966', 'potential-energy-rating': 'A',
+ 'mainheat-energy-eff': 'Very Good', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Average',
+ 'environment-impact-potential': '96', 'glazed-type': 'double glazing, unknown install date',
+ 'heating-cost-current': '1098', 'address3': 'Perry',
+ 'mainheatcont-description': 'Programmer, TRVs and bypass', 'sheating-energy-eff': 'N/A',
+ 'property-type': 'Bungalow', 'local-authority-label': 'Huntingdonshire',
+ 'fixed-lighting-outlets-count': '12', 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural',
+ 'hot-water-cost-current': '328', 'county': 'Cambridgeshire', 'postcode': 'PE28 0SX',
+ 'solar-water-heating-flag': 'N', 'constituency': 'E14000757', 'co2-emissions-potential': '0.3',
+ 'number-heated-rooms': '5', 'floor-description': 'Solid, no insulation (assumed)',
+ 'energy-consumption-potential': '16', 'local-authority': 'E07000011', 'built-form': 'Semi-Detached',
+ 'number-open-fireplaces': '0', 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal',
+ 'inspection-date': '2023-10-05', 'mains-gas-flag': 'N', 'co2-emiss-curr-per-floor-area': '6',
+ 'address1': 'Holly Lodge', 'heat-loss-corridor': '', 'flat-storey-count': '',
+ 'constituency-label': 'Huntingdon', 'roof-energy-eff': 'Good', 'total-floor-area': '117.0',
+ 'building-reference-number': '10005199915', 'environment-impact-current': '92',
+ 'co2-emissions-current': '0.7', 'roof-description': 'Pitched, 270 mm loft insulation',
+ 'floor-energy-eff': 'N/A', 'number-habitable-rooms': '5', 'address2': 'The Drive',
+ 'hot-water-env-eff': 'Very Good', 'posttown': 'HUNTINGDON', 'mainheatc-energy-eff': 'Average',
+ 'main-fuel': 'electricity (not community)', 'lighting-env-eff': 'Average', 'windows-energy-eff': 'Average',
+ 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+ 'lighting-description': 'Low energy lighting in 33% of fixed outlets', 'roof-env-eff': 'Good',
+ 'walls-energy-eff': 'Average', 'photo-supply': '', 'lighting-cost-potential': '166',
+ 'mainheat-env-eff': 'Very Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+ 'lodgement-datetime': '2023-11-01 16:29:16', 'flat-top-storey': '', 'current-energy-rating': 'A',
+ 'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average',
+ 'transaction-type': 'ECO assessment', 'uprn': '100091200828', 'current-energy-efficiency': '92',
+ 'energy-consumption-current': '37', 'mainheat-description': 'Air source heat pump, radiators, electric',
+ 'lighting-cost-current': '277', 'lodgement-date': '2023-11-01', 'extension-count': '0',
+ 'mainheatc-env-eff': 'Average',
+ 'lmk-key': 'cb7f2838b727907767c8c2a385cd22f722b1e4745463391d910d228e52124515', 'wind-turbine-count': '0',
+ 'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '95',
+ 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '33',
+ 'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system'
+ }
+
+ cleaning_data = read_dataframe_from_s3_parquet(
+ bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+ )
+
+ cleaned = read_from_s3(
+ s3_file_name="cleaned_epc_data/cleaned.bson",
+ bucket_name="retrofit-data-dev"
+ )
+ cleaned = msgpack.unpackb(cleaned, raw=False)
+
+ photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+ epc = EPCRecord(
+ epc_records={
+ 'original_epc': starting_epc,
+ 'full_sap_epc': {},
+ 'old_data': []
+ },
+ run_mode="newdata",
+ cleaning_data=cleaning_data
+ )
+
+ home = Property(
+ id=0,
+ address="",
+ postcode="",
+ epc_record=epc,
+ already_installed={},
+ non_invasive_recommendations={},
+ )
+ home.in_conservation_area = False
+ home.is_listed = False
+ home.is_heritage = False
+ home.restricted_measures = True
+ home.get_components(
+ cleaned=cleaned,
+ photo_supply_lookup=photo_supply_lookup,
+ floor_area_decile_thresholds=floor_area_decile_thresholds
+ )
+
+ recommender = HeatingRecommender(property_instance=home)
+ recommender.recommend_air_source_heat_pump(phase=0, has_cavity_or_loft_recommendations=False)
+ property_recommendations = Recommendations.insert_temp_recommendation_id([recommender.heating_recommendations])
+
+ assert len(recommender.heating_recommendations) == 1
+
+ home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+ home.adjust_difference_record_with_recommendations(
+ property_recommendations, []
+ )
+
+ scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop(
+ columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+ "carbon_ending"]
+ )
+
+ model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+ model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+ predictions_dict = model_api.predict_all(
+ df=scoring_data,
+ bucket="retrofit-data-dev",
+ prediction_buckets={
+ "sap_change_predictions": "retrofit-sap-predictions-dev",
+ }
+ )
+ # We predict a huge uplift but not quite as much as the EPC, due to some distinct differences between our
+ # recommendation and the EPC
+ assert predictions_dict["sap_change_predictions"]["predictions"].values[0] == 81.3
+ assert ending_epc['current-energy-efficiency'] == '92'
+
+ # PATCH
+ # We patch the simulation config, to reflect the ending EPC, to see if we get the ending EPC's config
+ patched_simulation_config = {
+ 'mainheat_energy_eff_ending': "Very Good",
+ 'hot_water_energy_eff_ending': 'Good',
+ 'has_boiler_ending': False,
+ 'has_air_source_heat_pump_ending': True,
+ 'has_electric_ending': True,
+ 'has_lpg_ending': False,
+ 'fuel_type_ending': 'electricity',
+ 'switch_system_ending': 'programmer',
+ 'no_control_ending': None,
+ 'auxiliary_systems_ending': 'bypass',
+ 'trvs_ending': 'trvs',
+ "mainheatc_energy_eff_ending": 'Average'
+ }
+
+ # PATCHING
+ property_recommendations_patch = Recommendations.insert_temp_recommendation_id(
+ [recommender.heating_recommendations]
+ )
+ property_recommendations_patch[0][0]["simulation_config"] = patched_simulation_config
+
+ home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+ home.adjust_difference_record_with_recommendations(
+ property_recommendations_patch, []
+ )
+
+ scoring_data_patch = pd.DataFrame(home.recommendations_scoring_data).drop(
+ columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+ "carbon_ending"]
+ )
+
+ model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+ model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+ predictions_dict_patch = model_api.predict_all(
+ df=scoring_data_patch,
+ bucket="retrofit-data-dev",
+ prediction_buckets={
+ "sap_change_predictions": "retrofit-sap-predictions-dev",
+ }
+ )
+
+ assert predictions_dict_patch["sap_change_predictions"]["predictions"].values[0] == 88.9
+ # We still underpredict but the improvement is notable
+
+ def test_offgrid(self):
+ """
+ We test on a property we've worked with before, where we compare two options
+ a) Upgrading to a boiler
+ b) Upgrading to a heat pump
+ :return:
+ """
+
+ starting_epc = {
+ 'low-energy-fixed-light-count': '', 'address': '6 Beech Road', 'uprn-source': 'Energy Assessor',
+ 'floor-height': '2.4', 'heating-cost-potential': '612', 'unheated-corridor-length': '',
+ 'hot-water-cost-potential': '123', 'construction-age-band': 'England and Wales: 1930-1949',
+ 'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Very Poor', 'windows-env-eff': 'Good',
+ 'lighting-energy-eff': 'Good', 'environment-impact-potential': '87',
+ 'glazed-type': 'double glazing installed during or after 2002', 'heating-cost-current': '2278',
+ 'address3': '', 'mainheatcont-description': 'Appliance thermostats', 'sheating-energy-eff': 'N/A',
+ 'property-type': 'House', 'local-authority-label': 'Dudley', 'fixed-lighting-outlets-count': '9',
+ 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '604',
+ 'county': '', 'postcode': 'DY1 4BP', 'solar-water-heating-flag': 'N', 'constituency': 'E14000671',
+ 'co2-emissions-potential': '1.0', 'number-heated-rooms': '4',
+ 'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '93',
+ 'local-authority': 'E08000027', 'built-form': 'End-Terrace', 'number-open-fireplaces': '0',
+ 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2024-03-13',
+ 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '83', 'address1': '6 Beech Road',
+ 'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Dudley North',
+ 'roof-energy-eff': 'Very Poor', 'total-floor-area': '60.0', 'building-reference-number': '10005780080',
+ 'environment-impact-current': '41', 'co2-emissions-current': '5.0',
+ 'roof-description': 'Pitched, 12 mm loft insulation', 'floor-energy-eff': 'N/A',
+ 'number-habitable-rooms': '4', 'address2': '', 'hot-water-env-eff': 'Poor', 'posttown': 'DUDLEY',
+ 'mainheatc-energy-eff': 'Good', 'main-fuel': 'electricity (not community)', 'lighting-env-eff': 'Good',
+ 'windows-energy-eff': 'Good', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+ 'lighting-description': 'Low energy lighting in 67% of fixed outlets', 'roof-env-eff': 'Very Poor',
+ 'walls-energy-eff': 'Average', 'photo-supply': '0.0', 'lighting-cost-potential': '113',
+ 'mainheat-env-eff': 'Poor', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+ 'lodgement-datetime': '2024-03-13 11:29:11', 'flat-top-storey': '', 'current-energy-rating': 'F',
+ 'secondheat-description': 'None', 'walls-env-eff': 'Average', 'transaction-type': 'rental',
+ 'uprn': '90055152', 'current-energy-efficiency': '32', 'energy-consumption-current': '491',
+ 'mainheat-description': 'Room heaters, electric', 'lighting-cost-current': '113',
+ 'lodgement-date': '2024-03-13', 'extension-count': '1', 'mainheatc-env-eff': 'Good',
+ 'lmk-key': '78ddf851b660e599a0894924d0e6b503980f5e0ad1aa711f8411718dc2989c44', 'wind-turbine-count': '0',
+ 'tenure': 'Rented (social)', 'floor-level': '', 'potential-energy-efficiency': '87',
+ 'hot-water-energy-eff': 'Very Poor', 'low-energy-lighting': '67',
+ 'walls-description': 'Cavity wall, filled cavity',
+ 'hotwater-description': 'Electric immersion, standard tariff'
+ }
+
+ cleaning_data = read_dataframe_from_s3_parquet(
+ bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+ )
+
+ cleaned = read_from_s3(
+ s3_file_name="cleaned_epc_data/cleaned.bson",
+ bucket_name="retrofit-data-dev"
+ )
+ cleaned = msgpack.unpackb(cleaned, raw=False)
+
+ photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+ epc = EPCRecord(
+ epc_records={
+ 'original_epc': starting_epc,
+ 'full_sap_epc': {},
+ 'old_data': []
+ },
+ run_mode="newdata",
+ cleaning_data=cleaning_data
+ )
+
+ home = Property(
+ id=0,
+ address="",
+ postcode="",
+ epc_record=epc,
+ already_installed={},
+ non_invasive_recommendations={},
+ )
+ home.in_conservation_area = False
+ home.is_listed = False
+ home.is_heritage = False
+ home.restricted_measures = True
+ home.get_components(
+ cleaned=cleaned,
+ photo_supply_lookup=photo_supply_lookup,
+ floor_area_decile_thresholds=floor_area_decile_thresholds
+ )
+
+ recommender = HeatingRecommender(property_instance=home)
+ recommender.recommend_air_source_heat_pump(phase=0, has_cavity_or_loft_recommendations=False)
+ recommender.recommend_boiler_upgrades(phase=0, system_change=True, exising_room_heaters=False)
+
+ assert len(recommender.heating_recommendations) == 3
+
+ property_recommendations = Recommendations.insert_temp_recommendation_id([recommender.heating_recommendations])
+
+ home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+ home.adjust_difference_record_with_recommendations(
+ property_recommendations, []
+ )
+
+ scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop(
+ columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+ "carbon_ending"]
+ )
+
+ model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+ model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+ predictions_dict = model_api.predict_all(
+ df=scoring_data,
+ bucket="retrofit-data-dev",
+ prediction_buckets={
+ "sap_change_predictions": "retrofit-sap-predictions-dev",
+ }
+ )
+
+ # The ASHP isn't better under SAP, compared to a gas boiler with good heat controls
+ assert predictions_dict["sap_change_predictions"]["predictions"].tolist() == [66.9, 65.5, 65.9]
diff --git a/recommendations/tests/test_solar_pv_recommendations.py b/recommendations/tests/test_solar_pv_recommendations.py
index 5481cb17..fbbfe3a1 100644
--- a/recommendations/tests/test_solar_pv_recommendations.py
+++ b/recommendations/tests/test_solar_pv_recommendations.py
@@ -2,6 +2,13 @@ import pytest
from recommendations.SolarPvRecommendations import SolarPvRecommendations
from backend.Property import Property
from etl.epc.Record import EPCRecord
+import pandas as pd
+from datetime import datetime
+from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+from recommendations.Recommendations import Recommendations
+from backend.ml_models.api import ModelApi
+import msgpack
class TestSolarPvRecommendations:
@@ -82,3 +89,321 @@ class TestSolarPvRecommendations:
'photo_supply': 4000
}
]
+
+ def test_model(self):
+ """
+ This function tests the recommendation engine, in conjunction with the model
+ :return:
+ """
+
+ starting_epc = {
+ 'low-energy-fixed-light-count': '', 'address': '27 Cromwell Street', 'uprn-source': 'Energy Assessor',
+ 'floor-height': '2.5', 'heating-cost-potential': '443', 'unheated-corridor-length': '',
+ 'hot-water-cost-potential': '53', 'construction-age-band': 'England and Wales: before 1900',
+ 'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average',
+ 'lighting-energy-eff': 'Very Poor', 'environment-impact-potential': '85',
+ 'glazed-type': 'double glazing installed before 2002', 'heating-cost-current': '904', 'address3': '',
+ 'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'sheating-energy-eff': 'N/A',
+ 'property-type': 'House', 'local-authority-label': 'West Lindsey', 'fixed-lighting-outlets-count': '10',
+ 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '79',
+ 'county': 'Lincolnshire', 'postcode': 'DN21 1DH', 'solar-water-heating-flag': 'N',
+ 'constituency': 'E14000707', 'co2-emissions-potential': '1.5', 'number-heated-rooms': '5',
+ 'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '92',
+ 'local-authority': 'E07000142', 'built-form': 'Mid-Terrace', 'number-open-fireplaces': '0',
+ 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2021-11-17',
+ 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '61', 'address1': '27 Cromwell Street',
+ 'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Gainsborough',
+ 'roof-energy-eff': 'Very Poor', 'total-floor-area': '89.0', 'building-reference-number': '10001989430',
+ 'environment-impact-current': '47', 'co2-emissions-current': '5.4',
+ 'roof-description': 'Pitched, no insulation (assumed)', 'floor-energy-eff': 'N/A',
+ 'number-habitable-rooms': '5', 'address2': '', 'hot-water-env-eff': 'Good', 'posttown': 'GAINSBOROUGH',
+ 'mainheatc-energy-eff': 'Good', 'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Very Poor',
+ 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+ 'lighting-description': 'No low energy lighting', 'roof-env-eff': 'Very Poor',
+ 'walls-energy-eff': 'Very Poor', 'photo-supply': '0.0', 'lighting-cost-potential': '72',
+ 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+ 'lodgement-datetime': '2021-12-01 10:12:23', 'flat-top-storey': '', 'current-energy-rating': 'E',
+ 'secondheat-description': 'Room heaters, mains gas', 'walls-env-eff': 'Very Poor',
+ 'transaction-type': 'ECO assessment', 'uprn': '100030949912', 'current-energy-efficiency': '54',
+ 'energy-consumption-current': '346', 'mainheat-description': 'Boiler and radiators, mains gas',
+ 'lighting-cost-current': '144', 'lodgement-date': '2021-12-01', 'extension-count': '2',
+ 'mainheatc-env-eff': 'Good', 'lmk-key': '3ec5533af02ec78361c1f9bea8dd2e878c2c6fa6cf59e5cc505c3eeb038e0f91',
+ 'wind-turbine-count': '0', 'tenure': 'Owner-occupied', 'floor-level': '',
+ 'potential-energy-efficiency': '86', 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '0',
+ 'walls-description': 'Solid brick, as built, no insulation (assumed)',
+ 'hotwater-description': 'From main system'
+ }
+
+ ending_epc = {
+ 'low-energy-fixed-light-count': '', 'address': '27 Cromwell Street', 'uprn-source': 'Energy Assessor',
+ 'floor-height': '2.5', 'heating-cost-potential': '443', 'unheated-corridor-length': '',
+ 'hot-water-cost-potential': '53', 'construction-age-band': 'England and Wales: before 1900',
+ 'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average',
+ 'lighting-energy-eff': 'Very Poor', 'environment-impact-potential': '86',
+ 'glazed-type': 'double glazing installed before 2002', 'heating-cost-current': '904', 'address3': '',
+ 'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'sheating-energy-eff': 'N/A',
+ 'property-type': 'House', 'local-authority-label': 'West Lindsey', 'fixed-lighting-outlets-count': '10',
+ 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '79',
+ 'county': 'Lincolnshire', 'postcode': 'DN21 1DH', 'solar-water-heating-flag': 'N',
+ 'constituency': 'E14000707', 'co2-emissions-potential': '1.4', 'number-heated-rooms': '5',
+ 'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '84',
+ 'local-authority': 'E07000142', 'built-form': 'Mid-Terrace', 'number-open-fireplaces': '0',
+ 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2021-12-21',
+ 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '49', 'address1': '27 Cromwell Street',
+ 'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Gainsborough',
+ 'roof-energy-eff': 'Very Poor', 'total-floor-area': '89.0', 'building-reference-number': '10001989430',
+ 'environment-impact-current': '55', 'co2-emissions-current': '4.4',
+ 'roof-description': 'Pitched, no insulation (assumed)', 'floor-energy-eff': 'N/A',
+ 'number-habitable-rooms': '5', 'address2': '', 'hot-water-env-eff': 'Good', 'posttown': 'GAINSBOROUGH',
+ 'mainheatc-energy-eff': 'Good', 'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Very Poor',
+ 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+ 'lighting-description': 'No low energy lighting', 'roof-env-eff': 'Very Poor',
+ 'walls-energy-eff': 'Very Poor', 'photo-supply': '50.0', 'lighting-cost-potential': '72',
+ 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+ 'lodgement-datetime': '2021-12-21 17:33:09', 'flat-top-storey': '', 'current-energy-rating': 'D',
+ 'secondheat-description': 'Room heaters, mains gas', 'walls-env-eff': 'Very Poor',
+ 'transaction-type': 'ECO assessment', 'uprn': '100030949912', 'current-energy-efficiency': '65',
+ 'energy-consumption-current': '277', 'mainheat-description': 'Boiler and radiators, mains gas',
+ 'lighting-cost-current': '144', 'lodgement-date': '2021-12-21', 'extension-count': '2',
+ 'mainheatc-env-eff': 'Good', 'lmk-key': 'b0b19583c59afbc69db12f4d6c98cd8837e80da3214d577c426eb3e672d424fc',
+ 'wind-turbine-count': '0', 'tenure': 'Owner-occupied', 'floor-level': '',
+ 'potential-energy-efficiency': '88', 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '0',
+ 'walls-description': 'Solid brick, as built, no insulation (assumed)',
+ 'hotwater-description': 'From main system'
+ }
+
+ cleaning_data = read_dataframe_from_s3_parquet(
+ bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+ )
+
+ cleaned = read_from_s3(
+ s3_file_name="cleaned_epc_data/cleaned.bson",
+ bucket_name="retrofit-data-dev"
+ )
+ cleaned = msgpack.unpackb(cleaned, raw=False)
+
+ photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+ epc = EPCRecord(
+ epc_records={
+ 'original_epc': starting_epc,
+ 'full_sap_epc': {},
+ 'old_data': []
+ },
+ run_mode="newdata",
+ cleaning_data=cleaning_data
+ )
+
+ home = Property(
+ id=0,
+ address="",
+ postcode="",
+ epc_record=epc,
+ already_installed={},
+ non_invasive_recommendations={},
+ )
+ home.in_conservation_area = False
+ home.is_listed = False
+ home.is_heritage = False
+ home.restricted_measures = True
+ home.get_components(
+ cleaned=cleaned,
+ photo_supply_lookup=photo_supply_lookup,
+ floor_area_decile_thresholds=floor_area_decile_thresholds
+ )
+
+ recommender = SolarPvRecommendations(property_instance=home)
+ recommender.recommend(phase=0)
+
+ coverage_50_percent = [x for x in recommender.recommendation if x["photo_supply"] == 50]
+ assert len(coverage_50_percent) == 2
+
+ property_recommendations = Recommendations.insert_temp_recommendation_id([coverage_50_percent])
+
+ home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+ home.adjust_difference_record_with_recommendations(
+ property_recommendations, []
+ )
+
+ scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop(
+ columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+ "carbon_ending"]
+ )
+
+ model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+ model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+ predictions_dict = model_api.predict_all(
+ df=scoring_data,
+ bucket="retrofit-data-dev",
+ prediction_buckets={
+ "sap_change_predictions": "retrofit-sap-predictions-dev",
+ }
+ )
+
+ assert predictions_dict["sap_change_predictions"]["predictions"].tolist() == [65.9, 65.9]
+ assert ending_epc["current-energy-efficiency"] == '65'
+
+ def test_model2(self):
+ data[["uprn", "sap_ending"]]
+ #
+
+ searcher = SearchEpc(
+ address1="",
+ postcode="",
+ auth_token="a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA=",
+ os_api_key="",
+ full_address="",
+ uprn=100030952942,
+ )
+ searcher.find_property(False)
+
+ ending_epc = {
+ 'low-energy-fixed-light-count': '', 'address': '6 Kenmare Crescent',
+ 'uprn-source': 'Energy Assessor', 'floor-height': '2.49', 'heating-cost-potential': '464',
+ 'unheated-corridor-length': '', 'hot-water-cost-potential': '46',
+ 'construction-age-band': 'England and Wales: 1967-1975', 'potential-energy-rating': 'B',
+ 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Very Good',
+ 'environment-impact-potential': '91', 'glazed-type': 'not defined', 'heating-cost-current': '535',
+ 'address3': '', 'mainheatcont-description': 'Programmer, room thermostat and TRVs',
+ 'sheating-energy-eff': 'N/A', 'property-type': 'Bungalow',
+ 'local-authority-label': 'West Lindsey', 'fixed-lighting-outlets-count': '9',
+ 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '69',
+ 'county': 'Lincolnshire', 'postcode': 'DN21 1PR', 'solar-water-heating-flag': 'N',
+ 'constituency': 'E14000707', 'co2-emissions-potential': '0.7', 'number-heated-rooms': '3',
+ 'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '56',
+ 'local-authority': 'E07000142', 'built-form': 'Semi-Detached', 'number-open-fireplaces': '0',
+ 'windows-description': 'Fully double glazed', 'glazed-area': 'Much More Than Typical',
+ 'inspection-date': '2022-08-24', 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '18',
+ 'address1': '6 Kenmare Crescent', 'heat-loss-corridor': '', 'flat-storey-count': '',
+ 'constituency-label': 'Gainsborough', 'roof-energy-eff': 'Very Good', 'total-floor-area': '66.0',
+ 'building-reference-number': '10002845316', 'environment-impact-current': '85',
+ 'co2-emissions-current': '1.2', 'roof-description': 'Pitched, 300 mm loft insulation',
+ 'floor-energy-eff': 'N/A', 'number-habitable-rooms': '3', 'address2': '',
+ 'hot-water-env-eff': 'Good', 'posttown': 'GAINSBOROUGH', 'mainheatc-energy-eff': 'Good',
+ 'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Very Good',
+ 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+ 'lighting-description': 'Low energy lighting in all fixed outlets', 'roof-env-eff': 'Very Good',
+ 'walls-energy-eff': 'Average', 'photo-supply': '40.0', 'lighting-cost-potential': '65',
+ 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+ 'lodgement-datetime': '2022-08-24 15:39:42', 'flat-top-storey': '', 'current-energy-rating': 'B',
+ 'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average',
+ 'transaction-type': 'ECO assessment', 'uprn': '100030952942', 'current-energy-efficiency': '87',
+ 'energy-consumption-current': '100', 'mainheat-description': 'Boiler and radiators, mains gas',
+ 'lighting-cost-current': '65', 'lodgement-date': '2022-08-24', 'extension-count': '0',
+ 'mainheatc-env-eff': 'Good',
+ 'lmk-key': 'e20be883431b1fed15db7fa1f52634fb7655d2b80c2fdad37df779f93ec4dafd',
+ 'wind-turbine-count': '0', 'tenure': 'Owner-occupied', 'floor-level': '',
+ 'potential-energy-efficiency': '91', 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '100',
+ 'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system'
+ }
+ starting_epc = {
+ 'low-energy-fixed-light-count': '', 'address': '6 Kenmare Crescent', 'uprn-source': 'Energy Assessor',
+ 'floor-height': '2.49', 'heating-cost-potential': '464', 'unheated-corridor-length': '',
+ 'hot-water-cost-potential': '46', 'construction-age-band': 'England and Wales: 1967-1975',
+ 'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average',
+ 'lighting-energy-eff': 'Very Good', 'environment-impact-potential': '85', 'glazed-type': 'not defined',
+ 'heating-cost-current': '535', 'address3': '',
+ 'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'sheating-energy-eff': 'N/A',
+ 'property-type': 'Bungalow', 'local-authority-label': 'West Lindsey', 'fixed-lighting-outlets-count': '9',
+ 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '69',
+ 'county': 'Lincolnshire', 'postcode': 'DN21 1PR', 'solar-water-heating-flag': 'N',
+ 'constituency': 'E14000707', 'co2-emissions-potential': '1.2', 'number-heated-rooms': '3',
+ 'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '102',
+ 'local-authority': 'E07000142', 'built-form': 'Semi-Detached', 'number-open-fireplaces': '0',
+ 'windows-description': 'Fully double glazed', 'glazed-area': 'Much More Than Typical',
+ 'inspection-date': '2022-05-31', 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '40',
+ 'address1': '6 Kenmare Crescent', 'heat-loss-corridor': '', 'flat-storey-count': '',
+ 'constituency-label': 'Gainsborough', 'roof-energy-eff': 'Very Good', 'total-floor-area': '66.0',
+ 'building-reference-number': '10002845316', 'environment-impact-current': '68',
+ 'co2-emissions-current': '2.6', 'roof-description': 'Pitched, 300 mm loft insulation',
+ 'floor-energy-eff': 'N/A', 'number-habitable-rooms': '3', 'address2': '', 'hot-water-env-eff': 'Good',
+ 'posttown': 'GAINSBOROUGH', 'mainheatc-energy-eff': 'Good', 'main-fuel': 'mains gas (not community)',
+ 'lighting-env-eff': 'Very Good', 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A',
+ 'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in all fixed outlets',
+ 'roof-env-eff': 'Very Good', 'walls-energy-eff': 'Average', 'photo-supply': '0.0',
+ 'lighting-cost-potential': '65', 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100',
+ 'main-heating-controls': '', 'lodgement-datetime': '2022-06-15 08:38:02', 'flat-top-storey': '',
+ 'current-energy-rating': 'D', 'secondheat-description': 'Room heaters, electric',
+ 'walls-env-eff': 'Average', 'transaction-type': 'ECO assessment', 'uprn': '100030952942',
+ 'current-energy-efficiency': '68', 'energy-consumption-current': '227',
+ 'mainheat-description': 'Boiler and radiators, mains gas', 'lighting-cost-current': '65',
+ 'lodgement-date': '2022-06-15', 'extension-count': '0', 'mainheatc-env-eff': 'Good',
+ 'lmk-key': 'ce181970b7077cb9b4626242bfb010b30a0e48541b5f22427e81f1adbeeec4f2', 'wind-turbine-count': '0',
+ 'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '85',
+ 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '100',
+ 'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system'
+ }
+
+ cleaning_data = read_dataframe_from_s3_parquet(
+ bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+ )
+
+ cleaned = read_from_s3(
+ s3_file_name="cleaned_epc_data/cleaned.bson",
+ bucket_name="retrofit-data-dev"
+ )
+ cleaned = msgpack.unpackb(cleaned, raw=False)
+
+ photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+ epc = EPCRecord(
+ epc_records={
+ 'original_epc': starting_epc,
+ 'full_sap_epc': {},
+ 'old_data': []
+ },
+ run_mode="newdata",
+ cleaning_data=cleaning_data
+ )
+
+ home = Property(
+ id=0,
+ address="",
+ postcode="",
+ epc_record=epc,
+ already_installed={},
+ non_invasive_recommendations={},
+ )
+ home.in_conservation_area = False
+ home.is_listed = False
+ home.is_heritage = False
+ home.restricted_measures = True
+ home.get_components(
+ cleaned=cleaned,
+ photo_supply_lookup=photo_supply_lookup,
+ floor_area_decile_thresholds=floor_area_decile_thresholds
+ )
+
+ recommender = SolarPvRecommendations(property_instance=home)
+ recommender.recommend(phase=0)
+
+ coverage_40_percent = [x for x in recommender.recommendation if x["photo_supply"] == 40]
+ assert len(coverage_40_percent) == 2
+
+ property_recommendations = Recommendations.insert_temp_recommendation_id([coverage_40_percent])
+
+ home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+ home.adjust_difference_record_with_recommendations(
+ property_recommendations, []
+ )
+
+ scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop(
+ columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+ "carbon_ending"]
+ )
+
+ model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+ model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+ predictions_dict = model_api.predict_all(
+ df=scoring_data,
+ bucket="retrofit-data-dev",
+ prediction_buckets={
+ "sap_change_predictions": "retrofit-sap-predictions-dev",
+ }
+ )
+
+ assert predictions_dict["sap_change_predictions"]["predictions"].tolist() == [87.1, 87.1]
+ assert ending_epc["current-energy-efficiency"] == '87'
+ assert starting_epc["current-energy-efficiency"] == '68'