diff --git a/.gitignore b/.gitignore index 63884ad7..5e247d77 100644 --- a/.gitignore +++ b/.gitignore @@ -268,4 +268,11 @@ adhoc adhoc/* etl-router-venv/ -refactor_datasets/ \ No newline at end of file +refactor_datasets/ + +etl/eligibility/ha_15_32/ +cache/ +*/.idea + +*.png +*.pptx \ No newline at end of file diff --git a/.idea/Model.iml b/.idea/Model.iml index 762580d9..df6c4faa 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index c916a158..50cad4ca 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index 306edd99..af5a3faf 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -4,9 +4,11 @@ import re import tiktoken from pprint import pprint from datetime import datetime + from openai import OpenAI import numpy as np import pandas as pd +from tqdm import tqdm from fuzzywuzzy import process from utils.logger import setup_logger from backend.SearchEpc import SearchEpc @@ -15,6 +17,8 @@ import asset_list.mappings.property_type as property_type_mappings import asset_list.mappings.walls as walls_mappings import asset_list.mappings.heating_systems as heating_mappings import asset_list.mappings.exising_pv as existing_pv_mappings +import asset_list.mappings.built_form as built_form_mappings +import asset_list.mappings.roof as roof_mappings from recommendations.recommendation_utils import ( estimate_perimeter, @@ -267,9 +271,12 @@ class AssetList: STANDARD_UPRN = "ordnance_survey_uprn" STANDARD_LANDLORD_PROPERTY_ID = "landlord_property_id" STANDARD_PROPERTY_TYPE = "landlord_property_type" + STANDARD_BUILT_FORM = "landlord_built_form" STANDARD_WALL_CONSTRUCTION = "landlord_wall_construction" + STANDARD_ROOF_CONSTRUCTION = "landlord_roof_construction" STANDARD_HEATING_SYSTEM = "landlord_heating_system" STANDARD_EXISTING_PV = "landlord_existing_pv" + STANDARD_SAP = "landlord_sap_rating" DOMNA_PROPERTY_ID = "domna_property_id" @@ -283,6 +290,10 @@ class AssetList: "Any further surveyor notes", 'Surveyors Name' ] + NON_INTRUSIVES_ELIGIBILITY_COLUMN = "Eligibility (Red/Yellow/Green)" + + OLD_FORMAT_NON_INTRUSIVE_COLNAMES = ['WFT Findings', 'ECO Eligibility'] + # This SAP threshold is a key search criteria for properties that may be eligible for extraction FILLED_CAVITY_SAP_THRESHOLD = 75 # This SAP the @@ -290,6 +301,9 @@ class AssetList: # Any EPC deemed to have been conducted prior to this year is deemed to be unreliable EPC_YEAR_THRESHOLD = pd.Timestamp.now().year - 5 + # Properties before this year are more likely to have lower EPC ratings and more likely to qualify + EMPTY_CAVITY_YEAR_THRESHOLD = 2002 + # Attributes - these are columns that we produce, calcualted based on other pieces of data ATTRIBUTE_HAS_SOLAR = "attribute_has_solar" ATTRIBUTE_NUMBER_OF_FLOORS = "attribute_est_number_floors" @@ -318,6 +332,14 @@ class AssetList: ", ceiling insulated", ] + # List of strings we look for in the EPC data, where substrings indicate that the cavity is empty + UNINSULATED_CAVITY_SUBSTRINGS = [ + "cavity wall, as built, no insulation (assumed)", + "cavity wall, as built, no insulation", + "cavity wall, as built, partial insulation (assumed)", + "cavity wall, as built, partial insulation", + ] + def __init__( self, local_filepath, @@ -332,24 +354,47 @@ class AssetList: landlord_year_built=None, landlord_uprn=None, landlord_property_type=None, + landlord_built_form=None, landlord_wall_construction=None, + landlord_roof_construction=None, landlord_heating_system=None, landlord_existing_pv=None, + landlord_sap=None, + phase=False, header=0 ): self.local_filepath = local_filepath self.sheet_name = sheet_name # Read in the data - self.raw_asset_list = pd.read_excel(local_filepath, header=header, sheet_name=sheet_name) + if local_filepath.endswith(".xlsx"): + self.raw_asset_list = pd.read_excel(local_filepath, header=header, sheet_name=sheet_name) + else: + self.raw_asset_list = pd.read_csv(local_filepath) self.standardised_asset_list = self.raw_asset_list.copy() # Will be used to store aggregated figures against the various work types self.work_type_figures = {} - self.work_type_breakdowns = {} self.flat_data = None self.duplicated_addresses = None + self.contact_details = None + self.contact_detail_fields = None + self.outcomes = None + self.outcomes_no_match = pd.DataFrame() + self.outcomes_for_output = pd.DataFrame() + self.master_surveyed = None + self.unmatched_submissions = pd.DataFrame() + + # When this is True, we intend to break the programme into multiple phases. We may need to review + # how this is structured in the future, as depending on how we get future data, we may need to + # remove some existing phases from the reporting, or specifically highlight the phase (1 to n-1) + # properties, assuming the current phase is n. + self.phase = phase # We detect the presence of the non-intrusive columns - self.non_intrusives_present = True if "CIGA Check Required" in self.raw_asset_list.columns else False + self.non_intrusives_present = "CIGA Check Required" in self.raw_asset_list.columns + # We detect if we have the old format of non-intruvies + self.old_format_non_intrusives_present = "WFT Findings" in self.raw_asset_list.columns + + self.non_intrusives_eligibility = "Eligibility (Red/Yellow/Green)" in self.raw_asset_list.columns # Names of columns self.landlord_property_id = landlord_property_id @@ -359,9 +404,12 @@ class AssetList: self.landlord_year_built = landlord_year_built self.landlord_uprn = landlord_uprn self.landlord_property_type = landlord_property_type + self.landlord_built_form = landlord_built_form self.landlord_wall_construction = landlord_wall_construction + self.landlord_roof_construction = landlord_roof_construction self.landlord_heating_system = landlord_heating_system self.landlord_existing_pv = landlord_existing_pv + self.landlord_sap = landlord_sap # parameters for cleaning self.full_address_cols_to_concat = full_address_cols_to_concat @@ -376,6 +424,7 @@ class AssetList: } self.variable_mappings = {} + self.hubspot_data = None self.rename_map = {} self.keep_variables = [] @@ -386,6 +435,27 @@ class AssetList: # Update the reference to landlord UPRn self.landlord_uprn = self.STANDARD_UPRN + # Handle the case when full address and address 1 are the same + if self.full_address_colname == self.address1_colname: + self.full_address_colname = self.STANDARD_FULL_ADDRESS + self.standardised_asset_list[self.full_address_colname] = ( + self.standardised_asset_list[self.address1_colname].copy() + ) + + # Handle the case where the property type column is the same as the built type + if self.landlord_property_type == self.landlord_built_form: + self.landlord_built_form = self.STANDARD_BUILT_FORM + self.standardised_asset_list[self.landlord_built_form] = ( + self.standardised_asset_list[self.landlord_property_type].copy() + ) + + # If landlord built form is None (which it often is) we use the built for from inspections + if (self.landlord_built_form is None) and self.non_intrusives_present: + self.landlord_built_form = self.STANDARD_BUILT_FORM + self.standardised_asset_list[self.landlord_built_form] = ( + self.standardised_asset_list["Archetype"].copy() + ) + def _extract_address1(self, asset_list, full_address_col, postcode_col, method="first_two_words"): if method not in self.ADDRESS_1_CLEANING_METHODS: @@ -404,6 +474,9 @@ class AssetList: lambda x: SearchEpc.get_house_number(address=x[full_address_col], postcode=x[postcode_col]), axis=1 ) + + for _, x in asset_list.iterrows(): + SearchEpc.get_house_number(address=x[full_address_col], postcode=x[postcode_col]) return asset_list raise ValueError(f"Method {method} not recognized") @@ -471,6 +544,18 @@ class AssetList: return str(int(x)) return x + @staticmethod + def _clean_postcode(postcode): + # Remove double spaces + postcode = postcode.replace(" ", " ") + if " " not in postcode: + # Restructure it + return " ".join( + [postcode[:-3], postcode[-3:]] + ) + + return postcode + def init_standardise(self): """ This function is used to standardise the asset list @@ -480,6 +565,10 @@ class AssetList: # Remove rows without a postcode if self.postcode_colname is not None: self.standardised_asset_list = self.standardised_asset_list.dropna(subset=[self.postcode_colname]) + # We also clean postcode columns where if there is not space, we create one + self.standardised_asset_list[self.postcode_colname] = self.standardised_asset_list[ + self.postcode_colname + ].apply(self._clean_postcode) # We clean up portential non-breaking spaces, and double spaces for col in [ @@ -507,7 +596,10 @@ class AssetList: raise ValueError("Missing full address - please specify columns to concatenate") self.full_address_colname = self.STANDARD_FULL_ADDRESS self.standardised_asset_list[self.full_address_colname] = ( - self.standardised_asset_list[self.full_address_cols_to_concat].apply(lambda x: ", ".join(x), axis=1) + self.standardised_asset_list[self.full_address_cols_to_concat].apply( + lambda x: ", ".join([y for y in x if not pd.isnull(y)]), + axis=1 + ) ) else: @@ -538,10 +630,13 @@ class AssetList: self.full_address_colname, self.landlord_uprn, self.landlord_property_type, + self.landlord_built_form, self.landlord_year_built, self.landlord_wall_construction, + self.landlord_roof_construction, self.landlord_heating_system, - self.landlord_existing_pv + self.landlord_existing_pv, + self.landlord_sap, ] # Keep just non-null variables (e.g landlord may not provide uprn self.keep_variables = [v for v in variables if v is not None] @@ -552,21 +647,37 @@ class AssetList: self.full_address_colname: self.STANDARD_FULL_ADDRESS, self.landlord_uprn: self.STANDARD_UPRN, self.landlord_property_type: self.STANDARD_PROPERTY_TYPE, + self.landlord_built_form: self.STANDARD_BUILT_FORM, self.landlord_year_built: self.STANDARD_YEAR_BUILT, self.landlord_wall_construction: self.STANDARD_WALL_CONSTRUCTION, + self.landlord_roof_construction: self.STANDARD_ROOF_CONSTRUCTION, self.landlord_heating_system: self.STANDARD_HEATING_SYSTEM, - self.landlord_existing_pv: self.STANDARD_EXISTING_PV + self.landlord_existing_pv: self.STANDARD_EXISTING_PV, + self.landlord_sap: self.STANDARD_SAP, } self.rename_map = {k: v for k, v in self.rename_map.items() if k is not None} + non_intrusive_columns = [] if self.non_intrusives_present: - self.keep_variables += self.NON_INTRUSIVES_COLNAMES - self.rename_map = { - **self.rename_map, - **dict( - zip(self.NON_INTRUSIVES_COLNAMES, ["non-intrusives: " + c for c in self.NON_INTRUSIVES_COLNAMES]) - ) - } + non_intrusive_columns = self.NON_INTRUSIVES_COLNAMES + + if self.non_intrusives_eligibility: + non_intrusive_columns.append(self.NON_INTRUSIVES_ELIGIBILITY_COLUMN) + + if self.old_format_non_intrusives_present: + # We check if we have the ECO Eligibility column, which we might not have + non_intrusive_columns = [ + c for c in self.OLD_FORMAT_NON_INTRUSIVE_COLNAMES if c in self.standardised_asset_list.columns + ] + + self.keep_variables += non_intrusive_columns + + self.rename_map = { + **self.rename_map, + **dict( + zip(non_intrusive_columns, ["non-intrusives: " + c for c in non_intrusive_columns]) + ) + } # We idenfiy addresses which are likely to be multi-addresses (i.g are rooms x-y) self.standardised_asset_list["is_multi_address"] = self.standardised_asset_list[ @@ -575,13 +686,22 @@ class AssetList: # We handle cleaning for walls, in the instance that the landlord provides us with EPC data and # we see instances of "average thermal transmittance" in the description - self.standardised_asset_list[self.landlord_wall_construction] = np.where( - self.standardised_asset_list[self.landlord_wall_construction].str.lower().str.contains( - "average thermal transmittance" - ) == True, - "new build - average thermal transmittance", - self.standardised_asset_list[self.landlord_wall_construction] - ) + if self.landlord_wall_construction is not None: + self.standardised_asset_list[self.landlord_wall_construction] = np.where( + self.standardised_asset_list[self.landlord_wall_construction].str.lower().str.contains( + "average thermal transmittance" + ) == True, + "new build - average thermal transmittance", + self.standardised_asset_list[self.landlord_wall_construction] + ) + else: + # We want to make sure that we have a column for wall construction + self.landlord_wall_construction = self.STANDARD_WALL_CONSTRUCTION + self.standardised_asset_list[self.landlord_wall_construction] = None + + if self.landlord_roof_construction is None: + self.landlord_roof_construction = self.STANDARD_ROOF_CONSTRUCTION + self.standardised_asset_list[self.landlord_roof_construction] = None # Clear our build year column # We attempt to process the year built column @@ -608,24 +728,49 @@ class AssetList: Extracts the year from a date string in the format '01-Jul-YYYY'. Returns the extracted year as an integer or None if the format is incorrect. """ - known_errors = ["#MULTIVALUE"] + known_errors = [ + "#MULTIVALUE", + "This cell has an external reference that can't be shown or edited. Editing this cell will " + "remove the external reference.", + "ND", + 'PIMSS EMPTY' + ] - if pd.isnull(date_str) or date_str in known_errors: + if pd.isnull(date_str) or date_str in known_errors or (date_str == 0): return None if isinstance(date_str, str): match = re.match(r"\d{1,2}-[A-Za-z]{3}-(\d{4})", date_str) if match: return int(match.group(1)) # Extract the year and convert to integer + if "-" in date_str: + + # Count the number of times we have "-", as we've seen double ranges + # (when we have extensions) so the format is like this: + # 'G: 1983-1990, H: 1991-1995' + if date_str.count("-") == 2: + # We have a range + return int(date_str.split("-")[1].split(",")[0]) + # We probably have a range + return int(date_str.split("-")[1].strip()) if isinstance(date_str, datetime): return date_str.year + if isinstance(date_str, float): + if str(int(date_str)).isdigit() & (len(str(int(date_str))) == 4): + return int(date_str) + # Check if date_str is a year itself if str(date_str).isdigit() & (len(str(date_str)) == 4): return int(date_str) - raise NotImplementedError("Unhandled format for year built - implement me") + # Remove any non-numeric characters + date_str = re.sub(r"\D", "", str(date_str)) + if str(date_str).isdigit() & (len(str(date_str)) == 4): + return int(date_str) + + raise NotImplementedError(f"Unhandled format for year built, value is {date_str} - implement me") self.standardised_asset_list[self.landlord_year_built] = self.standardised_asset_list[ self.landlord_year_built @@ -637,6 +782,10 @@ class AssetList: "standard_values": property_type_mappings.STANDARD_PROPERTY_TYPES, "standard_map": property_type_mappings.PROPERTY_MAPPING }, + self.landlord_built_form: { + "standard_values": built_form_mappings.STANDARD_BUILT_FORMS, + "standard_map": built_form_mappings.BUILT_FORM_MAPPINGS + }, self.landlord_wall_construction: { "standard_values": walls_mappings.STANDARD_WALL_CONSTRUCTIONS, "standard_map": walls_mappings.WALL_CONSTRUCTION_MAPPINGS @@ -648,6 +797,10 @@ class AssetList: self.landlord_existing_pv: { "standard_values": existing_pv_mappings.STANDARD_EXISTING_PV, "standard_map": existing_pv_mappings.EXISTING_PV_MAPPINGS + }, + self.landlord_roof_construction: { + "standard_values": roof_mappings.STANDARD_ROOF_CONSTRUCTIONS, + "standard_map": roof_mappings.ROOF_CONSTRUCTION_MAPPINGS } } # Keep just entries where the key is not None @@ -655,6 +808,8 @@ class AssetList: for variable, config in to_remap.items(): logger.info("Standardising variable: %s", variable) + # Strip each of these columns + self.standardised_asset_list[variable] = self.standardised_asset_list[variable].str.strip() values_to_remap = self.standardised_asset_list[variable].unique() # We want to map this to our standardised list of property types we're interested in remapper = DataRemapper(standard_values=config["standard_values"], standard_map=config["standard_map"]) @@ -677,6 +832,13 @@ class AssetList: if there are no categories which need remapping which is highly unlikely :return: """ + + if self.phase: + # We filter on just the properties that have had an inspection + self.standardised_asset_list = self.standardised_asset_list[ + ~self.standardised_asset_list['Surveyors Name'].isin(["YET TO BE SURVEYED"]) + ] + if not self.variable_mappings and not override_empty_mappings: raise ValueError("Please run init_standardise first") @@ -732,6 +894,11 @@ class AssetList: for v in missing_variables: self.standardised_asset_list[v] = None + # Convert to string + self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] = ( + self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID].astype(str) + ) + def merge_data(self, df: pd.DataFrame): """ Used to insert data into the standardised asset list, based on the domna property id @@ -747,12 +914,12 @@ class AssetList: df, how="left", on=self.DOMNA_PROPERTY_ID ) - def extract_attributes(self): + def extract_attributes(self, pull_epc=True): # Used to extracty the typical attributes that we use to identify viable work self.standardised_asset_list[self.ATTRIBUTE_HAS_SOLAR] = ( self.standardised_asset_list[self.FIND_EPC_DATA_NAMES["Solar photovoltaics"]] | - ~self.standardised_asset_list[self.EPC_API_DATA_NAMES["photo-supply"]].isin(["0.0", 0, None, ""]) + ~self.standardised_asset_list[self.EPC_API_DATA_NAMES["photo-supply"]].isin(["0.0", 0, None, "", np.nan]) ) accepted_epc_property_types = ["House", "Flat", "Bungalow", "Maisonette"] @@ -764,8 +931,8 @@ class AssetList: self.standardised_asset_list[self.ATTRIBUTE_NUMBER_OF_FLOORS] = self.standardised_asset_list.apply( lambda x: estimate_number_of_floors( property_type=( - x[self.STANDARD_PROPERTY_TYPE].title() if - x[self.STANDARD_PROPERTY_TYPE].title() in accepted_epc_property_types else ( + str(x[self.STANDARD_PROPERTY_TYPE]).title() if + str(x[self.STANDARD_PROPERTY_TYPE]).title() in accepted_epc_property_types else ( x[self.EPC_API_DATA_NAMES["property-type"]] if not pd.isnull(x[self.EPC_API_DATA_NAMES["property-type"]]) else None ) @@ -812,6 +979,9 @@ class AssetList: x[self.EPC_API_DATA_NAMES["roof-description"]]) else None, axis=1 ) + self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS] = ( + self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS].str.replace("+", "") + ) # We produce some additional fields # 1) Is the SAP rating below C75 @@ -916,13 +1086,16 @@ class AssetList: age_band = x[self.EPC_API_DATA_NAMES["construction-age-band"]].split(": ")[1] lower_date, upper_date = age_band.split("-") - age_band_matches = ( - "EPC Age Band Matches Year Built" if (x[self.STANDARD_YEAR_BUILT] >= float(lower_date)) and ( - x[self.STANDARD_YEAR_BUILT] <= float(upper_date) + if not x[self.STANDARD_YEAR_BUILT]: + age_band_matches = "No Year Built From Landlord" + else: + age_band_matches = ( + "EPC Age Band Matches Year Built" if (x[self.STANDARD_YEAR_BUILT] >= float(lower_date)) and ( + x[self.STANDARD_YEAR_BUILT] <= float(upper_date) + ) + else "EPC Age Band is older than Year Built" if x[self.STANDARD_YEAR_BUILT] > float(upper_date) + else "EPC Age Band is newer than Year Built" ) - else "EPC Age Band is older than Year Built" if x[self.STANDARD_YEAR_BUILT] > float(upper_date) - else "EPC Age Band is newer than Year Built" - ) processed_age_band.append( { @@ -941,402 +1114,614 @@ class AssetList: def identify_worktypes(self, cleaned): - if not self.non_intrusives_present: - raise NotImplementedError("Need to implement the case for non-intrusives") + if self.STANDARD_SAP is not None: + # We add a SAP category for all work type identification + self.standardised_asset_list["SAP Category"] = np.where( + ( + (self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= 68) | + (self.standardised_asset_list[self.STANDARD_SAP] <= 68) + ), + "SAP Rating 68 or less", + np.where( + ( + ( + self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= + self.EMPTY_CAVITY_SAP_THRESHOLD + ) | (self.standardised_asset_list[self.STANDARD_SAP] <= self.EMPTY_CAVITY_SAP_THRESHOLD) + ), + f"SAP Rating 69-{self.EMPTY_CAVITY_SAP_THRESHOLD}", + f"SAP Rating {self.EMPTY_CAVITY_SAP_THRESHOLD + 1} or more" + ) + ) + else: + # We add a SAP category for all work type identification + self.standardised_asset_list["SAP Category"] = np.where( + self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= 68, + "SAP Rating 68 or less", + np.where( + ( + self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= + self.EMPTY_CAVITY_SAP_THRESHOLD + ), + f"SAP Rating 69-{self.EMPTY_CAVITY_SAP_THRESHOLD}", + f"SAP Rating {self.EMPTY_CAVITY_SAP_THRESHOLD + 1} or more" + ) + ) + + # Before we being, we identify if a property has solar already as we use this + # for identifying cavity jobs + if self.non_intrusives_present: + existing_solar_non_intrusives_check = ( + self.standardised_asset_list["non-intrusives: PV, ACCESS ISSUE, SEE NOTES"] == "SOLAR PV ON ROOF" + ) + elif self.old_format_non_intrusives_present: + existing_solar_non_intrusives_check = ( + self.standardised_asset_list["non-intrusives: WFT Findings"].str.lower().str.strip().isin( + ["solar pv on roof"] + ) + ) + else: + # We don't have an indication + existing_solar_non_intrusives_check = False + + self.standardised_asset_list["property_has_solar"] = ( + (self.standardised_asset_list[self.STANDARD_EXISTING_PV] == "already has PV") | + existing_solar_non_intrusives_check | + (self.standardised_asset_list[self.ATTRIBUTE_HAS_SOLAR]) + ) # If we have non-intrusives completed, we can use this to identify work types + ###################################################### + # Empty cavity: + ###################################################### + # 1) Has been flagged on the non-intrusives as being a cavity wall, empty or partially filled + # 2) The age is before 1995 + # 3) We don't remove anything that haas access issues yet if self.non_intrusives_present: - ###################################################### - # Empty cavity: - ###################################################### - # 1) Has been flagged on the non-intrusives as being a cavity wall, empty or partially filled - # 2) The age is before 1995 - # 3) We don't remove anything that haas access issues yet - self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] = ( - (~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) & + non_intrusives_wall_filter = ( (self.standardised_asset_list['non-intrusives: Construction'] == "CAVITY") & - self.standardised_asset_list['non-intrusives: Insulated'].isin(["EMPTY", "PARTIAL"]) & - (self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2002) & - ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["current-energy-efficiency"] - ] <= self.EMPTY_CAVITY_SAP_THRESHOLD - ) + self.standardised_asset_list['non-intrusives: Insulated'].isin(["EMPTY", "PARTIAL"]) ) - # Let's also flag work that looks eligible without the SAP filter - self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"] = ( - (~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) & - (self.standardised_asset_list['non-intrusives: Construction'] == "CAVITY") & - self.standardised_asset_list['non-intrusives: Insulated'].isin(["EMPTY", "PARTIAL"]) & - (self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2002) - ) - - # If non_intrusive_indicates_empty_cavity is True, - # set non_intrusive_indicates_empty_cavity_no_sap_filter to False - self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"] = np.where( - self.standardised_asset_list["non_intrusive_indicates_empty_cavity"], - False, - self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"] - ) - - self.standardised_asset_list["epc_indicates_empty_cavity"] = ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]].str.lower().isin( - self.EPC_NO_WALL_INSULATION_DESCRIPTIONS - ) & ( - self.standardised_asset_list["epc_year_upper_bound"] <= 1995 - ) & ( - ~self.standardised_asset_list[self.ATTRIBUTE_EPC_PRE_YEAR_THRESHOLD] - ) & ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= self.EMPTY_CAVITY_SAP_THRESHOLD - ) - ) - - # If the EPC is esimtated, we defer to the non-intrusives - self.standardised_asset_list["epc_indicates_empty_cavity"] = np.where( - ( - self.standardised_asset_list["epc_indicates_empty_cavity"] & - ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & - self.standardised_asset_list["estimated"] - ), - False, - self.standardised_asset_list["epc_indicates_empty_cavity"] - ) - - ###################################################### - # Extraction - ###################################################### - - # as needing a CIGA check. What is the logic we should be applying here? - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] = ( - (self.standardised_asset_list["non-intrusives: Construction"] == "CAVITY") & - (self.standardised_asset_list["non-intrusives: Insulated"].isin(["RETRO DRILLED", "FILLED AT BUILD"])) & - (~self.standardised_asset_list['non-intrusives: Material'].isin( - ["GREY LOOSE BEAD", "COMPACTED BEAD", "FIBRE BATT NO CAVITY", "EMPTY NARROW BELOW 30mm"] - ) - ) & ( - self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW] - ) - ) - - # Also include work without the SAP filter as optimistic - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_sap_filter"] = ( - (self.standardised_asset_list["non-intrusives: Construction"] == "CAVITY") & - (self.standardised_asset_list["non-intrusives: Insulated"].isin(["RETRO DRILLED", "FILLED AT BUILD"])) & - (~self.standardised_asset_list['non-intrusives: Material'].isin( - ["GREY LOOSE BEAD", "COMPACTED BEAD", "FIBRE BATT NO CAVITY", "EMPTY NARROW BELOW 30mm"] - ) - ) - ) - - # Adjust - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_sap_filter"] = np.where( - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"], - False, - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_sap_filter"] - ) - - ###################################################### - # Solar - ###################################################### - # Criteria: - # Check 1: Does the property have a valid heating system? - self.standardised_asset_list["solar_landlord_data_indicates_correct_heating_system"] = ( - self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin( - ["air source heat pump", "ground source heat pump", "high heat retention storage heaters"] - ) - ) - - self.standardised_asset_list["solar_epc_data_indicates_correct_heating_system"] = ( - ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["mainheat-description"]] - .str.lower().str.contains("air source heat pump|ground source heat pump") + elif self.old_format_non_intrusives_present: + non_intrusives_wall_filter = ( + self.standardised_asset_list['non-intrusives: WFT Findings'].str.lower().str.strip().isin( + ["empty cavity", "partial fill"] ) | ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["mainheat-description"]].str.lower().str.contains( - "electric storage heaters" - ) & ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES[ - "mainheatcont-description"]] == "Controls for high heat retention storage heaters" + ( + self.standardised_asset_list['non-intrusives: WFT Findings'] + .str.lower().str.strip().str.contains("empty cavity|partial fill") & + ~self.standardised_asset_list['non-intrusives: WFT Findings'] + .astype(str).str.lower().str.strip().str.contains("major access issues") ) ) ) + else: + # We set the filter to False, as we have no non-intrusives + non_intrusives_wall_filter = False - # Check 2: Does the property have solar already - self.standardised_asset_list["property_has_solar"] = ( - (self.standardised_asset_list[self.STANDARD_EXISTING_PV] == "already has PV") | - (self.standardised_asset_list["non-intrusives: PV, ACCESS ISSUE, SEE NOTES"] == "SOLAR PV ON ROOF") | - (self.standardised_asset_list[self.ATTRIBUTE_HAS_SOLAR]) + if self.landlord_year_built is None: + year_built_filter = self.standardised_asset_list["epc_year_upper_bound"] <= self.EMPTY_CAVITY_YEAR_THRESHOLD + else: + year_built_filter = ( + (self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= self.EMPTY_CAVITY_YEAR_THRESHOLD) | + (self.standardised_asset_list["epc_year_upper_bound"] <= self.EMPTY_CAVITY_YEAR_THRESHOLD) ) - # Check 3: Does the property meet the fabric condition - # Solar PV installs are subject to the minimum insulation requirements which means: - # 1) one of the following insulation measures must be installed as part of the same - # ECO4 project: - # • roof insulation (flat roof, pitched roof, room-in-roof) - # • exterior facing wall insulation (cavity wall, solid wall) - # • party cavity wall insulation - # • floor insulation (solid and underfloor) - # - # OR - # - # all measures (except any exempted measure referred to in paragraph 4.28) - # listed in paragraph a) must already be installed - # - # With this in mind, we look for 2 clases - # 1) The property is fully insulated apart from the loft (<200mm insulation) - # 2) THe property is fully insulated + # Criteria: + # The property isn't a bedsit + # Non-intrusives indicate it needs a fill + # The EPC year is before 2002 + # We also flag where the property has solar on the roof, because this is a signal of a high EPC rating + self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] = ( + (~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) & + non_intrusives_wall_filter & + year_built_filter & + ( + ~self.standardised_asset_list["property_has_solar"] + ) + ) - self.standardised_asset_list["solar_landlord_walls_insulated"] = ( - self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin( - ["filled cavity", "insulated solid brick"] + self.standardised_asset_list["non_intrusive_indicates_empty_cavity_has_solar"] = ( + pd.isnull(self.standardised_asset_list["non_intrusive_indicates_empty_cavity"]) & + (~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) & + non_intrusives_wall_filter & + year_built_filter & + ( + # If the property has solar, there's a chance it won't qualify + self.standardised_asset_list["property_has_solar"] + ) + ) + + # We also add a filter on anything that was generally identified by the non-intrusives + self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_year_filter"] = ( + pd.isnull(self.standardised_asset_list["non_intrusive_indicates_empty_cavity"]) & + pd.isnull(self.standardised_asset_list["non_intrusive_indicates_empty_cavity_has_solar"]) & + (~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) & + non_intrusives_wall_filter + ) + + self.standardised_asset_list["epc_indicates_empty_cavity"] = ( + self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]].str.lower().isin( + self.EPC_NO_WALL_INSULATION_DESCRIPTIONS + ) & ( + self.standardised_asset_list["epc_year_upper_bound"] <= self.EMPTY_CAVITY_YEAR_THRESHOLD + ) & ( + ~self.standardised_asset_list[self.ATTRIBUTE_EPC_PRE_YEAR_THRESHOLD] + ) & ( + ~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"]) + ) + ) + + self.standardised_asset_list["landlord_data_indicates_empty_cavity"] = ( + self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin(["uninsulated cavity"]) & + ( + (self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= self.EMPTY_CAVITY_YEAR_THRESHOLD) | + (self.standardised_asset_list["epc_year_upper_bound"] <= self.EMPTY_CAVITY_YEAR_THRESHOLD) + ) & ( + ~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"]) + ) + ) + + # Finally, we create a flag to indicate that the cavity is empty, based on the criteria above + self.standardised_asset_list["cavity_is_empty"] = ( + non_intrusives_wall_filter | + self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]].str.lower().isin( + self.EPC_NO_WALL_INSULATION_DESCRIPTIONS + ) | + self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin(["uninsulated cavity"]) + ) + + ###################################################### + # Extraction + ###################################################### + # as needing a CIGA check. What is the logic we should be applying here? + + if self.non_intrusives_present: + + extraction_wall_filter = ( + (self.standardised_asset_list["non-intrusives: Construction"] == "CAVITY") & + (self.standardised_asset_list["non-intrusives: Insulated"].isin(["RETRO DRILLED", "FILLED AT BUILD"])) & + (~self.standardised_asset_list['non-intrusives: Material'].isin( + ["GREY LOOSE BEAD", "COMPACTED BEAD", "FIBRE BATT NO CAVITY", "EMPTY NARROW BELOW 30mm"] + )) + ) + + if self.non_intrusives_eligibility: + # If we have the eligibility column, we check if the wall is eligible + extraction_wall_filter = ( + extraction_wall_filter & + ~self.standardised_asset_list["non-intrusives: Eligibility (Red/Yellow/Green)"].isin( + ["RED"] + ) + ) + + self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] = ( + extraction_wall_filter & year_built_filter + ) + + elif self.old_format_non_intrusives_present: + print("Review these categories!!!!") + extraction_wall_filter = ( + self.standardised_asset_list['non-intrusives: WFT Findings'].str.lower().str.strip().isin( + ["retro drilled", "retro filled", "fibre from build", "polybead", "retro drilled and filled", + "retro drilled & filled", "blown in white wool", "blown in yellow wool"] ) ) + self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] = ( + extraction_wall_filter + ) + + else: + self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] = False + self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_sap_filter"] = False + + ###################################################### + # Solar + ###################################################### + # Criteria: + # Check 1: Does the property have a valid heating system? + self.standardised_asset_list["solar_landlord_data_indicates_correct_heating_system"] = ( + self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin( + [ + "air source heat pump", + "ground source heat pump", + "high heat retention storage heaters", + "electric boiler" + ] + ) + ) + self.standardised_asset_list["solar_landlord_data_indicates_needs_heating_upgrade"] = ( + self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin( + ["electric storage heaters", "room heaters", "electric radiators", "no heating"] + ) + ) + + self.standardised_asset_list["solar_epc_data_indicates_correct_heating_system"] = ( + ( + self.standardised_asset_list[self.EPC_API_DATA_NAMES["mainheat-description"]] + .str.lower().str.contains("air source heat pump|ground source heat pump|boiler and radiators, electric") + ) | ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["mainheat-description"]].str.lower().str.contains( + "electric storage heaters" + ) & ( + self.standardised_asset_list[self.EPC_API_DATA_NAMES[ + "mainheatcont-description"]] == "Controls for high heat retention storage heaters" + ) + ) + ) + + self.standardised_asset_list["solar_epc_data_indicates_requires_heating_upgrade"] = ( + self.standardised_asset_list[self.EPC_API_DATA_NAMES["mainheat-description"]].str.lower().str.contains( + "electric storage heaters|room heaters" + ) & ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["mainheatcont-description"] + ] != "Controls for high heat retention storage heaters" + ) + ) + + # Basic check - both of the previous two shouldn't be true simultaneously + if ( + self.standardised_asset_list["solar_epc_data_indicates_correct_heating_system"] & + self.standardised_asset_list["solar_epc_data_indicates_requires_heating_upgrade"] + ).sum(): + raise ValueError("Both heating system checks are true - this should not be possible") + + # Check 3: Does the property meet the fabric condition + # Solar PV installs are subject to the minimum insulation requirements which means: + # 1) one of the following insulation measures must be installed as part of the same + # ECO4 project: + # • roof insulation (flat roof, pitched roof, room-in-roof) + # • exterior facing wall insulation (cavity wall, solid wall) + # • party cavity wall insulation + # • floor insulation (solid and underfloor) + # + # OR + # + # all measures (except any exempted measure referred to in paragraph 4.28) + # listed in paragraph a) must already be installed + # + # With this in mind, we look for 2 clases + # 1) The property is fully insulated apart from the loft (<200mm insulation) + # 2) THe property is fully insulated + + print("Should we include cavity properties where they might be uninsulated?") + self.standardised_asset_list["solar_landlord_walls_insulated"] = ( + self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin( + [ + "filled cavity", "insulated solid brick", "insulated timber frame", + ] + ) + ) + + if self.non_intrusives_present: self.standardised_asset_list["solar_non_intrusives_walls_insulated"] = ( self.standardised_asset_list["non-intrusives: Insulated"].isin( ["EWI", "RETRO DRILLED", "FILLED AT BUILD"] ) ) - - # TODO: We don't have information about the roof from this landlord - - # We merge on the u-value for average thermal transmittance - walls_uvalue_data = pd.DataFrame(cleaned["walls-description"]) - walls_uvalue_data = walls_uvalue_data[ - ~pd.isnull(walls_uvalue_data["thermal_transmittance"]) - ][["original_description", "thermal_transmittance"]].rename( - columns={ - "original_description": self.EPC_API_DATA_NAMES["walls-description"], - "thermal_transmittance": "walls_u_value" - } - ) - self.standardised_asset_list = self.standardised_asset_list.merge( - walls_uvalue_data, how="left", on=self.EPC_API_DATA_NAMES["walls-description"] - ) - - self.standardised_asset_list["solar_epc_walls_insulated"] = ( - ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES[ - "walls-description"]].str.lower().str.contains( - "|".join( - self.EPC_INSULATED_WALLS_SUBSTRINGS) - ) - ) | ( - self.standardised_asset_list[ - "walls_u_value"].apply( - lambda x: x <= 0.7 if not pd.isnull(x) else False - ) + elif self.old_format_non_intrusives_present: + self.standardised_asset_list["solar_non_intrusives_walls_insulated"] = ( + self.standardised_asset_list["non-intrusives: WFT Findings"].str.lower().str.strip().isin( + ["retro drilled", "retro filled", "ewi", "retro drilled/ solid"] ) ) + else: + self.standardised_asset_list["solar_non_intrusives_walls_insulated"] = False - # We merge on the u-value for average thermal transmittance - roof_uvalue_data = pd.DataFrame(cleaned["roof-description"]) - roof_uvalue_data = roof_uvalue_data[ - ~pd.isnull(roof_uvalue_data["thermal_transmittance"]) - ][["original_description", "thermal_transmittance"]].rename( - columns={ - "original_description": self.EPC_API_DATA_NAMES["roof-description"], - "thermal_transmittance": "roof_u_value" - } + # We merge on the u-value for average thermal transmittance + walls_uvalue_data = pd.DataFrame(cleaned["walls-description"]) + walls_uvalue_data = walls_uvalue_data[ + ~pd.isnull(walls_uvalue_data["thermal_transmittance"]) + ][["original_description", "thermal_transmittance"]].rename( + columns={ + "original_description": self.EPC_API_DATA_NAMES["walls-description"], + "thermal_transmittance": "walls_u_value" + } + ) + self.standardised_asset_list = self.standardised_asset_list.merge( + walls_uvalue_data, how="left", on=self.EPC_API_DATA_NAMES["walls-description"] + ) + + self.standardised_asset_list["solar_epc_walls_insulated"] = ( + ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["walls-description"]].str.lower().str.contains( + "|".join(self.EPC_INSULATED_WALLS_SUBSTRINGS) + ) + ) | ( + self.standardised_asset_list["walls_u_value"].apply(lambda x: x <= 0.7 if not pd.isnull(x) else False) ) + ) - self.standardised_asset_list = self.standardised_asset_list.merge( - roof_uvalue_data, how="left", on=self.EPC_API_DATA_NAMES["roof-description"] - ) + # We merge on the u-value for average thermal transmittance + roof_roof_data = pd.DataFrame(cleaned["roof-description"])[ + ["original_description", "thermal_transmittance", "is_pitched", "is_loft"] + ].rename( + columns={ + "original_description": self.EPC_API_DATA_NAMES["roof-description"], + "thermal_transmittance": "roof_u_value", + } + ) - # If the u-value of a roof is less than 0.7 we consider it insulated - self.standardised_asset_list["solar_epc_roof_insulated"] = ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["roof-description"]].str.lower().str.contains( - "|".join(self.EPC_INSULATED_ROOF_SUBSTRINGS), regex=False - ) | ( - self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS].apply( - lambda x: int(x) >= 200 if str(x).isdigit() else False - ) - ) | ( - self.standardised_asset_list["roof_u_value"].apply( - lambda x: x <= 0.7 if not pd.isnull(x) else False - ) + self.standardised_asset_list = self.standardised_asset_list.merge( + roof_roof_data, how="left", on=self.EPC_API_DATA_NAMES["roof-description"] + ) + + # If the u-value of a roof is less than 0.7 we consider it insulated + self.standardised_asset_list["solar_epc_roof_insulated"] = ( + self.standardised_asset_list[self.EPC_API_DATA_NAMES["roof-description"]].str.lower().str.contains( + "|".join(self.EPC_INSULATED_ROOF_SUBSTRINGS), + ) | ( + self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS].apply( + lambda x: int(x) >= 200 if str(x).isdigit() else False + ) + ) | ( + self.standardised_asset_list["roof_u_value"].apply( + lambda x: x <= 0.7 if not pd.isnull(x) else False ) ) + ) - self.standardised_asset_list["solar_epc_loft_needs_topup"] = self.standardised_asset_list[ + self.standardised_asset_list["solar_epc_loft_needs_topup"] = ( + self.standardised_asset_list[ self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS].apply( lambda x: int(x) < 200 if str(x).isdigit() else False - ) - - # TODO: Fill with False - should be temp! - self.standardised_asset_list["epc_has_floor_recommendation"] = ( - self.standardised_asset_list["epc_has_floor_recommendation"].fillna(False) - ) - - # We merge on the u-value for average thermal transmittance - floors_uvalue_data = pd.DataFrame(cleaned["floor-description"]) - floors_uvalue_data = floors_uvalue_data[ - ~pd.isnull(floors_uvalue_data["thermal_transmittance"]) - ][["original_description", "thermal_transmittance"]].rename( - columns={ - "original_description": self.EPC_API_DATA_NAMES["floor-description"], - "thermal_transmittance": "floor_u_value" - } - ) - - # Merge on - self.standardised_asset_list = self.standardised_asset_list.merge( - floors_uvalue_data, how="left", on=self.EPC_API_DATA_NAMES["floor-description"] - ) - - # We assume that a U-value of 0.5 or below is indicative of an insulated floor - self.standardised_asset_list["solar_epc_floor_is_solid_no_recommendation"] = ( + ) | ( ( - ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["floor-description"]].str - .lower().str.contains("solid") - ) & ( - ~self.standardised_asset_list["epc_has_floor_recommendation"] - ) & ( - # We do not utilise estimated EPCs for this method because we will always find that - # "epc_has_floor_recommendation" is False - (self.standardised_asset_list["estimated"] == False) - ) - ) | ( - ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["floor-description"]].str.lower().str.contains("solid") - ) & ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["floor-description"]].str.lower() - .str.contains(", insulated") + self.standardised_asset_list["is_loft"] | self.standardised_asset_list["is_pitched"] + ) & ( + self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS].isin( + ["below average", "none"] ) ) ) + ) - # Check for other floor types, insulated - self.standardised_asset_list["solar_epc_floor_is_other_insulated"] = ( - # The floor is suspended and insulated - ( - ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["floor-description"]].str - .lower().str.contains("suspended") - ) & ( - ~self.standardised_asset_list["epc_has_floor_recommendation"] - ) & ( - # We do not utilise estimated EPCs for this method because we will always find that - # "epc_has_floor_recommendation" is False - self.standardised_asset_list["estimated"] == False - ) - ) | ( - ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["floor-description"] - ].str.lower().str.contains("suspended") - ) & ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["floor-description"] - ].str.lower().str.contains(", insulated") - ) - ) | ( - self.standardised_asset_list["floor_u_value"].apply( - lambda x: x <= 0.5 if not pd.isnull(x) else False - ) - ) + self.standardised_asset_list["epc_has_floor_recommendation"] = ( + self.standardised_asset_list["epc_has_floor_recommendation"].fillna(False) + ) + + # Check if the boiler is electric + # We check if it contains both the terms boiler & electric + self.standardised_asset_list["has_electric_boiler"] = ( + ( + self.standardised_asset_list[self.EPC_API_DATA_NAMES["mainheat-description"]] + .str.lower().isin( + ["boiler and radiators, electric"]) + ) | ( + self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM] == "electric boiler" + ) + ) + + #################################### + # Check solar eligibility + #################################### + + # Set up the filters to stop repetition + correct_heating_system = ( + self.standardised_asset_list["solar_landlord_data_indicates_correct_heating_system"] | + self.standardised_asset_list["solar_epc_data_indicates_correct_heating_system"] | + self.standardised_asset_list["has_electric_boiler"] + ) + + needs_heating_upgrade = ( + self.standardised_asset_list["solar_landlord_data_indicates_needs_heating_upgrade"] | + self.standardised_asset_list["solar_epc_data_indicates_requires_heating_upgrade"] + ) + + # The requirements for walls are: + # 1) walls are insulated + # 2) property is a cavity (can be done insulated or not) + + walls_meet_solar_requirements = ( + # The landlord is saying the walls are insulated + self.standardised_asset_list["solar_landlord_walls_insulated"] | + # EPC data is saying the walls are insulated + self.standardised_asset_list["solar_epc_walls_insulated"] | + # Non-intrusives are saying the walls are insulated + self.standardised_asset_list["solar_non_intrusives_walls_insulated"] | + # It's empty cavity + self.standardised_asset_list["cavity_is_empty"] | + # It's a cavity wall + (self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].str.contains("cavity")) + ) + + not_a_flat = ( + self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "flat" + ) + + solar_roof_meets_criteria = ( + self.standardised_asset_list["solar_epc_roof_insulated"] | + self.standardised_asset_list["solar_epc_loft_needs_topup"] + ) + + self.standardised_asset_list["solar_eligible"] = ( + # Property isn't a flag + not_a_flat & + # Landlord data or EPC data indicates the heating system is appropriate + correct_heating_system & + # The property doesn't currently have solar + ~self.standardised_asset_list["property_has_solar"] & + # The walls are insulated + walls_meet_solar_requirements & + # Roof meets criteria + solar_roof_meets_criteria + ) + + # With heating upgrade + self.standardised_asset_list["solar_eligible_needs_heating_upgrade"] = ( + not_a_flat & + # Needs heating upgrade + needs_heating_upgrade & + # The property doesn't currently have solar + ~self.standardised_asset_list["property_has_solar"] & + # The walls are insulated + walls_meet_solar_requirements & + # Roof meets criteria + solar_roof_meets_criteria + ) + + # We shouldn't have an overlap + if ( + self.standardised_asset_list["solar_eligible"] & + self.standardised_asset_list["solar_eligible_needs_heating_upgrade"] + ).sum(): + raise ValueError("Both heating upgrade and no heating upgrade are true - this should not be possible") + + # We check for a specific sub-set of properties which are uninsulated solid wall properties that are EPC E + # or below (we'll use 57 as a threshold) - These are for a pilot with Net Zero Renewables + self.standardised_asset_list["solar_eligible_solid_wall_uninsulated"] = ( + not_a_flat & + # Landlord data or EPC data indicates the heating system is appropriate - in this case, we can also take + # electric boilers + correct_heating_system & + # The property doesn't currently have solar + ~self.standardised_asset_list["property_has_solar"] & + # The walls are uninsulated solid + ~walls_meet_solar_requirements & + (self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= 57) + ) + + # Drop anything we don't need + self.standardised_asset_list = self.standardised_asset_list.drop( + columns=["walls_u_value", "roof_u_value"] + ) + + # Adjust flagged extraction jobs to remove anything for solar + self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] = ( + self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] & + ~self.standardised_asset_list["solar_eligible"] + ) + + # Finally, we note why each property has been flagged + self.standardised_asset_list["cavity_reason"] = None + + empty_cavity_map = { + "non_intrusive_indicates_empty_cavity": "Non-Intrusive Data Shows Empty Cavity: ", + "non_intrusive_indicates_empty_cavity_has_solar": "Non-Intrusive Data Shows Empty Cavity - property " + "already has solar: ", + "non_intrusive_indicates_empty_cavity_no_year_filter": f"Non-Intrusive Data Shows Empty Cavity, " + f"built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: ", + + } + for variable, description in empty_cavity_map.items(): + self.standardised_asset_list["cavity_reason"] = np.where( + self.standardised_asset_list[variable] & + pd.isnull(self.standardised_asset_list["cavity_reason"]), + description + self.standardised_asset_list["SAP Category"], + self.standardised_asset_list["cavity_reason"] ) - # We now put together the criteria: - # Flag properties that look eligible for solar, that have solid floors - # TODO: We'll need to revise this - self.standardised_asset_list["solar_eligible_solid_floor"] = ( - # Landlord data or EPC data indicates the heating system is appropriate - ( - self.standardised_asset_list["solar_landlord_data_indicates_correct_heating_system"] | - self.standardised_asset_list["solar_epc_data_indicates_correct_heating_system"] - ) & - # The property doesn't currently have solar - ~self.standardised_asset_list["property_has_solar"] & - # The walls are insulated - ( - self.standardised_asset_list["solar_landlord_walls_insulated"] | - self.standardised_asset_list["solar_epc_walls_insulated"] | - self.standardised_asset_list["solar_non_intrusives_walls_insulated"] - ) & - # Roof is insulated - self.standardised_asset_list["solar_epc_roof_insulated"] & - self.standardised_asset_list["solar_epc_floor_is_solid_no_recommendation"] - ) + # We break the cavity reason into a few different categories, when the EPC is different from inspections + self.standardised_asset_list["cavity_reason"] = np.where( + ( + self.standardised_asset_list["epc_indicates_empty_cavity"] & + ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & + (self.standardised_asset_list['non-intrusives: Insulated'] == "RETRO DRILLED") & + pd.isnull(self.standardised_asset_list["cavity_reason"]) + ), + "EPC Shows Empty Cavity, inspections show retro drilled: " + self.standardised_asset_list["SAP Category"], + self.standardised_asset_list["cavity_reason"] + ) - # Solid floor but needs a loft top-up - self.standardised_asset_list["solar_eligible_solid_floor_needs_loft"] = ( - # Landlord data or EPC data indicates the heating system is appropriate - ( - self.standardised_asset_list["solar_landlord_data_indicates_correct_heating_system"] | - self.standardised_asset_list["solar_epc_data_indicates_correct_heating_system"] - ) & - # The property doesn't currently have solar - ~self.standardised_asset_list["property_has_solar"] & - # The walls are insulated - ( - self.standardised_asset_list["solar_landlord_walls_insulated"] | - self.standardised_asset_list["solar_epc_walls_insulated"] | - self.standardised_asset_list["solar_non_intrusives_walls_insulated"] - ) & - # Roof is insulated - self.standardised_asset_list["solar_epc_loft_needs_topup"] & - self.standardised_asset_list["solar_epc_floor_is_solid_no_recommendation"] - ) + self.standardised_asset_list["cavity_reason"] = np.where( + ( + self.standardised_asset_list["epc_indicates_empty_cavity"] & + ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & + (self.standardised_asset_list['non-intrusives: Insulated'] == "FILLED AT BUILD") & + pd.isnull(self.standardised_asset_list["cavity_reason"]) + ), + "EPC Shows Empty Cavity, inspections show filled at build: " + self.standardised_asset_list["SAP Category"], + self.standardised_asset_list["cavity_reason"] + ) - # Other floor type, fully insulated + self.standardised_asset_list["cavity_reason"] = np.where( + ( + self.standardised_asset_list["epc_indicates_empty_cavity"] & + ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & + pd.isnull(self.standardised_asset_list["cavity_reason"]) + ), + "EPC Shows Empty Cavity, inspections show non-cavity build: " + self.standardised_asset_list[ + "SAP Category"], + self.standardised_asset_list["cavity_reason"] + ) + # Landlord data: The landlord's data indicates that the wall is an uninsulated cavity wall, but EPC and + # inspections show filled + self.standardised_asset_list["cavity_reason"] = np.where( + ( + self.standardised_asset_list["landlord_data_indicates_empty_cavity"] & + ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & + ~self.standardised_asset_list["epc_indicates_empty_cavity"] & + pd.isnull(self.standardised_asset_list["cavity_reason"]) + ), + "Landlord Data Shows Empty Cavity, EPC & Inspections Shows Filled: " + self.standardised_asset_list[ + "SAP Category"], + self.standardised_asset_list["cavity_reason"] + ) - self.standardised_asset_list["solar_eligible_other_floor"] = ( - # Landlord data or EPC data indicates the heating system is appropriate - ( - self.standardised_asset_list["solar_landlord_data_indicates_correct_heating_system"] | - self.standardised_asset_list["solar_epc_data_indicates_correct_heating_system"] - ) & - # The property doesn't currently have solar - ~self.standardised_asset_list["property_has_solar"] & - # The walls are insulated - ( - self.standardised_asset_list["solar_landlord_walls_insulated"] | - self.standardised_asset_list["solar_epc_walls_insulated"] - ) & - # Roof is insulated - self.standardised_asset_list["solar_epc_roof_insulated"] & - self.standardised_asset_list["solar_epc_floor_is_other_insulated"] - ) - - # Other floor type, needs loft top-up - self.standardised_asset_list["solar_eligible_other_floor_needs_loft"] = ( - # Landlord data or EPC data indicates the heating system is appropriate - ( - self.standardised_asset_list["solar_landlord_data_indicates_correct_heating_system"] | - self.standardised_asset_list["solar_epc_data_indicates_correct_heating_system"] - ) & - # The property doesn't currently have solar - ~self.standardised_asset_list["property_has_solar"] & - # The walls are insulated - ( - self.standardised_asset_list["solar_landlord_walls_insulated"] | - self.standardised_asset_list["solar_epc_walls_insulated"] - ) & - # Roof need loft top-up - self.standardised_asset_list["solar_epc_loft_needs_topup"] & - # Floor is not solid, but is insulated - self.standardised_asset_list["solar_epc_floor_is_other_insulated"] - ) - - # Drop anything we don't need - self.standardised_asset_list = self.standardised_asset_list.drop( - columns=["walls_u_value", "roof_u_value", "floor_u_value"] - ) - - # Adjust flagged extraction jobs to remove anything for solar - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] = ( + # Flag extraction + self.standardised_asset_list["cavity_reason"] = np.where( + ( self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] & - ~self.standardised_asset_list["solar_eligible_solid_floor"] & - ~self.standardised_asset_list["solar_eligible_solid_floor_needs_loft"] - # ~self.standardised_asset_list["solar_eligible_other_floor"] & - # ~self.standardised_asset_list["solar_eligible_other_floor_needs_loft"] + pd.isnull(self.standardised_asset_list["cavity_reason"]) + ), + "Non-Intrusive Data Shows Cavity Extraction: " + self.standardised_asset_list["SAP Category"], + self.standardised_asset_list["cavity_reason"] + ) + + ###################################################### + # Flag solar + ###################################################### + self.standardised_asset_list["solar_reason"] = None + + # Map of variables and fill values for the solar_reason variable + solar_reason_map = { + "solar_eligible": "Solar Eligible: ", + "solar_eligible_needs_heating_upgrade": ( + "Solar Eligible, Solid Floor, Needs Heating Upgrade: " + ), + "solar_eligible_solid_wall_uninsulated": "Solar Eligible, Solid Wall Uninsulated, EPC E or Below: ", + } + + for variable, reason in solar_reason_map.items(): + self.standardised_asset_list["solar_reason"] = np.where( + self.standardised_asset_list[variable], + reason + self.standardised_asset_list["SAP Category"], + self.standardised_asset_list["solar_reason"] + ) + + # Flag anything that has existing outcomes + if (self.outcomes is not None) and ("Surveyed" in self.standardised_asset_list.columns): + + if "Installer Refusal" not in self.standardised_asset_list.columns: + self.standardised_asset_list["cavity_reason"] = np.where( + ( + (self.standardised_asset_list["Surveyed"] > 0) + ), + None, + self.standardised_asset_list["cavity_reason"] + ) + else: + self.standardised_asset_list["cavity_reason"] = np.where( + ( + (self.standardised_asset_list["Surveyed"] > 0) | + (self.standardised_asset_list["Installer Refusal"] > 0) + ), + None, + self.standardised_asset_list["cavity_reason"] + ) + + if self.master_surveyed is not None: + self.standardised_asset_list["cavity_reason"] = np.where( + ( + (~pd.isnull(self.standardised_asset_list["submission_date"])) + ), + None, + self.standardised_asset_list["cavity_reason"] ) blocks_of_flats = self.standardised_asset_list[ @@ -1349,136 +1734,26 @@ class AssetList: # Produce some aggregate figures self.work_type_figures = { - # Empty cavity from non-intrusives - "Empty Cavity (non-intrusives)": non_blocks_of_flats["non_intrusive_indicates_empty_cavity"].sum(), - "Empty Cavity (non-intrusives, blocks of flats)": ( - blocks_of_flats["non_intrusive_indicates_empty_cavity"].sum() - ), - "Empty Cavity (non-intrusives, no SAP filter)": ( - non_blocks_of_flats["non_intrusive_indicates_empty_cavity_no_sap_filter"].sum() - ), - "Empty Cavity (non-intrusives, no SAP filter, blocks of flats)": ( - blocks_of_flats["non_intrusive_indicates_empty_cavity_no_sap_filter"].sum() - ), - "Empty Cavity (EPC)": ( - ( - non_blocks_of_flats["epc_indicates_empty_cavity"] & - ~non_blocks_of_flats["non_intrusive_indicates_empty_cavity"] - ).sum() - ), - "Empty Cavity (EPC, blocks of flat)": ( - ( - blocks_of_flats["epc_indicates_empty_cavity"] & - ~blocks_of_flats["non_intrusive_indicates_empty_cavity"] - ).sum() - ), - "Cavity Extraction": ( - ( - ~non_blocks_of_flats["non_intrusive_indicates_empty_cavity"] & - ~non_blocks_of_flats["epc_indicates_empty_cavity"] & - non_blocks_of_flats["non_intrusive_indicates_cavity_extraction"] - ).sum() - ), - "Cavity Extraction (blocks of flats)": ( - ( - ~blocks_of_flats["non_intrusive_indicates_empty_cavity"] & - ~blocks_of_flats["epc_indicates_empty_cavity"] & - blocks_of_flats["non_intrusive_indicates_cavity_extraction"] - ).sum() - ), - "Cavity Extraction (no SAP filter)": ( - ( - ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & - ~self.standardised_asset_list["epc_indicates_empty_cavity"] & - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_sap_filter"] - ).sum() - ), - "Solar PV (Solid Floor)": ( - self.standardised_asset_list["solar_eligible_solid_floor"].sum() - ), - "Solar PV (Solid Floor, Needs Loft Top-up)": ( - self.standardised_asset_list["solar_eligible_solid_floor_needs_loft"].sum() - ), - "Solar PV (Other Floor)": ( - self.standardised_asset_list["solar_eligible_other_floor"].sum() - ), - "Solar PV (Other Floor, Needs Loft Top-up)": ( - self.standardised_asset_list["solar_eligible_other_floor_needs_loft"].sum() - ) + **non_blocks_of_flats["cavity_reason"].value_counts().to_dict(), + **{ + k + " (Block of flats)": v for k, v in + blocks_of_flats["solar_reason"].value_counts().to_dict().items() + }, + **self.standardised_asset_list["solar_reason"].value_counts().to_dict() } - # We produce a breakdown of the property types, for cavity fills - cavity_fills = self.standardised_asset_list[ - self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] | ( - self.standardised_asset_list["epc_indicates_empty_cavity"] - ) - ] + # We prepare outcomes for output + if self.outcomes is not None: + logger.info("Preparing outcomes for output") + identified_work = self.standardised_asset_list[ + ~pd.isnull(self.standardised_asset_list["cavity_reason"]) | + ~pd.isnull(self.standardised_asset_list["solar_reason"]) + ][self.DOMNA_PROPERTY_ID].values - self.work_type_breakdowns = { - "empty_cavity": cavity_fills[self.STANDARD_PROPERTY_TYPE].value_counts() - } - - # Finally, we note why each property has been flagged - self.standardised_asset_list["cavity_reason"] = None - self.standardised_asset_list["cavity_reason"] = np.where( - self.standardised_asset_list["non_intrusive_indicates_empty_cavity"], - "Non-Intrusive Data Showed Empty Cavity", - self.standardised_asset_list["cavity_reason"] - ) - self.standardised_asset_list["cavity_reason"] = np.where( - self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"], - "Non-Intrusive Data Showed Empty Cavity but all SAP scores allowed", - self.standardised_asset_list["cavity_reason"] - ) - self.standardised_asset_list["cavity_reason"] = np.where( - ( - self.standardised_asset_list["epc_indicates_empty_cavity"] & - ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] - ), - "EPC Data Showed Empty Cavity", - self.standardised_asset_list["cavity_reason"] - ) - # Flag extraction - self.standardised_asset_list["cavity_reason"] = np.where( - ( - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] & - pd.isnull(self.standardised_asset_list["cavity_reason"]) - ), - "Non-Intrusive Data Showed Cavity Extraction", - self.standardised_asset_list["cavity_reason"] - ) - # extraction no sap filter - self.standardised_asset_list["cavity_reason"] = np.where( - ( - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_sap_filter"] & - pd.isnull(self.standardised_asset_list["cavity_reason"]) - ), - "Non-Intrusive Data Showed Cavity Extraction but all SAP scores allowed", - self.standardised_asset_list["cavity_reason"] - ) - - # Flag solar - self.standardised_asset_list["solar_reason"] = None - self.standardised_asset_list["solar_reason"] = np.where( - self.standardised_asset_list["solar_eligible_solid_floor"], - "Solid Floor, Insulated, No Solar", - self.standardised_asset_list["solar_reason"] - ) - self.standardised_asset_list["solar_reason"] = np.where( - self.standardised_asset_list["solar_eligible_solid_floor_needs_loft"], - "Solid Floor, Insulated, Needs Loft", - self.standardised_asset_list["solar_reason"] - ) - self.standardised_asset_list["solar_reason"] = np.where( - self.standardised_asset_list["solar_eligible_other_floor"], - "Other Floor, Insulated, No Solar", - self.standardised_asset_list["solar_reason"] - ) - self.standardised_asset_list["solar_reason"] = np.where( - self.standardised_asset_list["solar_eligible_other_floor_needs_loft"], - "Other Floor, Insulated, Needs Loft", - self.standardised_asset_list["solar_reason"] - ) + if self.DOMNA_PROPERTY_ID in self.outcomes.columns: + self.outcomes_for_output = self.outcomes[ + self.outcomes[self.DOMNA_PROPERTY_ID].isin(identified_work) + ] def flat_analysis(self): @@ -1516,3 +1791,646 @@ class AssetList: flat_data = pd.DataFrame(flat_data) self.flat_data = flat_data + + @staticmethod + def split_full_name(x): + if pd.isnull(x): + return None, None, None + x = x.lower() + titles = ["mr", "mrs", "ms", "miss", "dr", "prof"] + # Remove titles + detected_title = [title for title in titles if x.startswith(title)] + if detected_title: + for title in detected_title: + x = x.replace(title, "") + x = x.strip() + first_name, last_name = x.split(" ")[0], x.split(" ")[-1] + title = detected_title[0].title() if detected_title else None + return title, first_name.title(), last_name.title() + + def load_contact_details( + self, + local_filepath, + sheet_name, + landlord_property_id, + phone_number_column=None, + email_column=None, + fullname_column=None, + firstname_column=None, + lastname_column=None + ): + + self.contact_detail_fields = { + "landlord_property_id": landlord_property_id, + "phone_number": phone_number_column, + "email": email_column, + "fullname": fullname_column, + "firstname": firstname_column, + "lastname": lastname_column + } + + details_colnames = [ + phone_number_column, email_column, fullname_column, firstname_column, lastname_column + ] + # We'll fill them + none_details = [x for x in details_colnames if x is None] + details_colnames = [x for x in details_colnames if x is not None] + + contact_details = pd.read_excel( + local_filepath, sheet_name=sheet_name + )[[self.contact_detail_fields["landlord_property_id"]] + details_colnames] + contact_details = contact_details[ + ~pd.isnull(contact_details[self.contact_detail_fields["landlord_property_id"]]) + ] + # Fill anything we don't have + for detail in none_details: + contact_details[detail] = None + + if fullname_column and not (firstname_column and lastname_column): + contact_details["title"], contact_details["first_name"], contact_details["last_name"] = zip( + *contact_details[fullname_column].apply(self.split_full_name) + ) + else: + raise NotImplementedError("Implement me") + + self.contact_details = contact_details + + def prepare_for_crm(self, company_domain, crm_pipeline_name, first_dealstage, assigned_surveyors): + """ + This function prepares the data for upload into Hubspot + :return: + """ + # This is a placeholder for now + + # This maps the opportunities as we reference them, to the product data as stored in Hubspot + product_lookup_table = { + "Non-Intrusive Data Showed Cavity Extraction": { + "name": "Extract & Fill - ECO4", "id": 100307905778, "unit_price": 500 + }, + "Non-Intrusive Data Showed Empty Cavity": { + "name": "Empty Cavity & Loft - ECO4", "id": 82733738177, "unit_price": 1000 + }, + "Non-Intrusive Data Showed Empty Cavity but all SAP scores allowed": { + "name": "Empty Cavity & Loft - ECO4", "id": 82733738177, "unit_price": 1000 + }, + "Non-Intrusive Data Showed Cavity Extraction but all SAP scores allowed": { + "name": "Extract & Fill - ECO4", "id": 100307905778, "unit_price": 500 + }, + "EPC Data Showed Empty Cavity": { + "name": "Empty Cavity & Loft - ECO4", "id": 82733738177, "unit_price": 1000 + }, + "Solid Floor, Insulated, No Solar": { + "name": "Solar PV - ECO4", "id": 82623589564, "unit_price": 1608 + }, + "Solid Floor, Insulated, Needs Loft": { + "name": "Solar PV - ECO4", "id": 82623589564, "unit_price": 1608 + }, + "Other Floor, Insulated, No Solar": { + "name": "Solar PV - ECO4", "id": 82623589564, "unit_price": 1608 + }, + "Other Floor, Insulated, Needs Loft": { + "name": "Solar PV - ECO4", "id": 82623589564, "unit_price": 1608 + } + } + # We check if all products are covered in the lookup table + cavity_products = self.standardised_asset_list["cavity_reason"].unique() + solar_products = self.standardised_asset_list["solar_reason"].unique() + # Check if there any options not in out lookup table + if ( + any(x for x in cavity_products if x not in product_lookup_table) or + any(x for x in solar_products if x not in product_lookup_table) + ): + raise ValueError("We have products not referenced in the lookup table - check this") + + programme_data = self.standardised_asset_list.copy() + + # Exclusions - these are properties we won't treat for the moment + product_exclusions = [ + "Other Floor, Insulated, No Solar", + "Other Floor, Insulated, Needs Loft" + ] + if product_exclusions: + logger.warning("Excluding products: %s", product_exclusions) + + programme_data = programme_data[programme_data["solar_reason"].isin(product_exclusions) == False] + + # Merge on the contact details + programme_data = programme_data.merge( + self.contact_details, + how="left", + left_on=self.STANDARD_LANDLORD_PROPERTY_ID, + right_on=self.landlord_property_id, + ) + + programme_data["Company Domain Name "] = company_domain + # Append the product data onto the programme data + programme_data["cavity_product"] = programme_data["cavity_reason"].map( + lambda x: product_lookup_table.get(x, {"name": None})["name"] + ) + programme_data["solar_product"] = programme_data["solar_reason"].map( + lambda x: product_lookup_table.get(x, {"name": None})["name"] + ) + + programme_data["domna_product"] = programme_data["solar_reason"].copy() + programme_data["domna_product"] = np.where( + pd.isnull(programme_data["domna_product"]), + programme_data["solar_product"], + programme_data["domna_product"] + ) + # We filter just on rows where we have a product + programme_data = programme_data[ + ~pd.isnull(programme_data["domna_product"]) + ] + programme_data = programme_data.drop(columns=["solar_product", "cavity_product"]) + + product_df = ( + pd.DataFrame(product_lookup_table).T[["name", "id", "unit_price"]] + .reset_index() + .rename( + columns={ + "name": "Name ", + "id": 'Product ID ', + "unit_price": 'Unit price ', + "index": "domna_product" + } + ) + ) + + product_df['Quantity '] = 1 + + # Append on the product data + programme_data = programme_data.merge( + product_df, + how="left", + on="domna_product", + ) + + # Add in deal and pipeline information + programme_data["dealname"] = programme_data[self.STANDARD_FULL_ADDRESS] + " : " + programme_data[ + "domna_product"] + programme_data['Pipeline '] = crm_pipeline_name + programme_data['Deal Stage '] = first_dealstage + programme_data['Associations: Listing'] = "Property Owner" + + programme_data = programme_data.merge( + assigned_surveyors.rename( + columns={self.landlord_property_id: self.STANDARD_LANDLORD_PROPERTY_ID} + ), how="left", on=self.STANDARD_LANDLORD_PROPERTY_ID + ) + + # This maps the hubspot schema to the template. Anything that is not covered in this will be flagged + schema_mappings = { + 'Name ': self.DOMNA_PROPERTY_ID, # TODO: Maybe change this? + 'Company Domain Name ': 'Company Domain Name ', + 'Email ': ( + self.contact_detail_fields["email"] if self.contact_detail_fields["email"] else None + ), # TODO: Review + 'First Name ': ( + self.contact_detail_fields["firstname"] if self.contact_detail_fields["firstname"] else None + ), # TODO: Review + 'Last Name ': ( + self.contact_detail_fields["lastname"] if self.contact_detail_fields["lastname"] else None + ), # TODO: Review + 'Phone ': ( + self.contact_detail_fields["phone_number"] if self.contact_detail_fields["phone_number"] else None + ), # TODO: Review + 'Full Address ': self.STANDARD_FULL_ADDRESS, + 'Address 1 ': self.STANDARD_ADDRESS_1, + 'Address 2 ': None, # TODO: Don't have this for the moment + 'Postcode ': self.STANDARD_POSTCODE, + 'Property Type ': self.STANDARD_PROPERTY_TYPE, + 'Property Sub Type ': None, # TODO: Don't have this for the moment + 'Bedroom(s) ': None, # TODO: Don't have this for the moment + 'Domna Property ID ': self.DOMNA_PROPERTY_ID, + 'National UPRN ': ( + self.STANDARD_UPRN if self.STANDARD_UPRN is not None else self.EPC_API_DATA_NAMES["uprn"] + ), + 'Owner Property ID ': self.STANDARD_LANDLORD_PROPERTY_ID, + 'Wall Construction ': self.STANDARD_WALL_CONSTRUCTION, + 'Heating System ': self.STANDARD_HEATING_SYSTEM, + 'Year Built ': self.STANDARD_YEAR_BUILT, + 'Boiler Make ': None, # TODO: Don't have this for the moment + 'Boiler Model ': None, # TODO: Don't have this for the moment + 'Non-Intrusives: Date Checked ': None, + # TODO: Don't have this for the moment + 'Non-Intrusives: Wall Type ': ( + "non-intrusives: Construction" if self.non_intrusives_present else None + ), + 'Non-intrusives: Insulation ': ( + "non-intrusives: Insulated" if self.non_intrusives_present else None + ), + 'Non-intrusives: Insulation Material ': ( + "non-intrusives: Material" if self.non_intrusives_present else None + ), + 'Non-Intrusives: CIGA Check Required ': ( + 'non-intrusives: CIGA Check Required' if self.non_intrusives_present else None + ), + 'Non-Intrusives: PV Access Issues ': ( + 'non-intrusives: PV, ACCESS ISSUE, SEE NOTES' if self.non_intrusives_present else None + ), + 'Non-Intrusives: Roof Orientation ': ( + 'non-intrusives: OFF GAS - ROOF ORIENTATION' if self.non_intrusives_present else None + ), + 'Non-Intrusives: Surveyor Notes ': ( + 'non-intrusives: Any further surveyor notes' if self.non_intrusives_present else None + ), + 'Non-Intrusives: Surveyor Name ': ( + 'non-intrusives: Surveyors Name' if self.non_intrusives_present else None + ), + 'CIGA: Date Requested ': None, # TODO: Don't have this for the moment + 'CIGA: Cavity Guarantee Found ': None, + 'Last EPC: Is Estimated ': self.EPC_API_DATA_NAMES["estimated"], + 'Last EPC: EPC Rating ': self.EPC_API_DATA_NAMES["current-energy-rating"], + 'Last EPC: SAP Rating ': self.EPC_API_DATA_NAMES["current-energy-efficiency"], + 'Last EPC: Main Heating Description ': self.EPC_API_DATA_NAMES[ + "mainheat-description"], + 'Last EPC: Heating Controls ': self.EPC_API_DATA_NAMES[ + "mainheatcont-description"], + 'Last EPC: Lodgement Date ': self.EPC_API_DATA_NAMES["inspection-date"], + 'Last EPC: Floor Area ': self.EPC_API_DATA_NAMES["total-floor-area"], + 'Last EPC: Wall ': self.EPC_API_DATA_NAMES["walls-description"], + 'Last EPC: Roof ': self.EPC_API_DATA_NAMES["roof-description"], + 'Last EPC: Floor ': self.EPC_API_DATA_NAMES["floor-description"], + 'Last EPC: Room Height ': self.EPC_API_DATA_NAMES["floor-height"], + 'Last EPC: Age Band ': self.EPC_API_DATA_NAMES["construction-age-band"], + 'Deal Stage ': 'Deal Stage ', + 'Pipeline ': 'Pipeline ', + 'Expected Commencement Date ': None, # TODO: Need to set this, + 'Deal Name ': "dealname", # Need to create this, + 'Product ID ': 'Product ID ', + 'Name ': 'Name ', + 'Unit price ': 'Unit price ', + 'Quantity ': 'Quantity ', + 'Deal Owner': 'surveyor_email', + 'Amount ': 'Unit price ', + } + + # We now create the finalised dataset to be uploaded into Hubspot + variables_required = list(schema_mappings.values()) + variables_required = [v for v in variables_required if v is not None] + # We now flag anything that has a none value, which is information we haven't got right now + none_variables = [k for k, v in schema_mappings.items() if v is None] + # We'll add placeholder columns for the None variables + programme_data = programme_data[variables_required] + for col in none_variables: + programme_data[col] = None + + programme_data = programme_data.rename( + columns={v: k for k, v in schema_mappings.items() if v is not None} + ) + + self.hubspot_data = programme_data + + def flag_outcomes( + self, + outcomes_filepath, + outcomes_sheetname, + outcomes_address, + outcomes_postcode, + outcomes_houseno, + outcomes_id + ): + if outcomes_filepath is None: + return + + self.outcomes = pd.read_excel(outcomes_filepath, sheet_name=outcomes_sheetname) + self.outcomes["row_id"] = self.outcomes.index + + if outcomes_houseno is None: + outcomes_houseno = "houseno" + self.outcomes["houseno"] = self.outcomes[outcomes_address].apply( + lambda x: SearchEpc.get_house_number(x, self.outcomes[outcomes_postcode]) + ) + + logger.info("Matching outcomes to asset list") + # Merge the outcomes onto the asset list - we check we're able to match sufficiently well + lookup = [] + nomatch = [] + for _, x in tqdm(self.outcomes.iterrows(), total=len(self.outcomes)): + + if pd.isnull(x[outcomes_address]): + continue + + # Check if we have an id + oid = x[outcomes_id] if outcomes_id is not None else None + + if oid is not None: + matched = self.standardised_asset_list[ + (self.standardised_asset_list[ + self.STANDARD_LANDLORD_PROPERTY_ID + ].str.strip() == oid) + ] + + if matched.shape[0] == 1: + lookup.append( + { + "row_id": x["row_id"], + self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0] + } + ) + continue + + address_clean = x[outcomes_address].lower().replace(",", "").replace(" ", " ") + + self.outcomes["Outcome"] = self.outcomes["Outcome"].str.lower() + + matched = self.standardised_asset_list[ + (self.standardised_asset_list[ + self.STANDARD_FULL_ADDRESS + ].str.lower().str.replace(",", "").str.replace(" ", " ") == address_clean) + ] + + if matched.shape[0] == 1: + lookup.append( + { + "row_id": x["row_id"], + self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0] + } + ) + continue + + matched = self.standardised_asset_list[ + (self.standardised_asset_list[self.STANDARD_POSTCODE].str.strip() == x[outcomes_postcode]) + ].copy() + if not matched.empty: + matched["houseno"] = matched.apply( + lambda x: SearchEpc.get_house_number( + str(x[self.STANDARD_ADDRESS_1]), str(x[self.STANDARD_POSTCODE]) + ), + axis=1 + ) + + matched = matched[ + matched["houseno"].astype(str) == str(x[outcomes_houseno]) + ] + if matched.shape[0] == 1: + lookup.append( + { + "row_id": x["row_id"], + self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0] + } + ) + continue + elif not matched.empty: + # Use levenstein distance to match + matched["address"] = matched[self.STANDARD_ADDRESS_1] + " " + matched[self.STANDARD_POSTCODE] + + best_match = process.extractOne(x["Address"], matched[self.STANDARD_FULL_ADDRESS].values)[0] + matched = matched[matched[self.STANDARD_FULL_ADDRESS] == best_match] + lookup.append( + { + "row_id": x["row_id"], + self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0] + } + ) + continue + + nomatch.append(x["row_id"]) + + self.outcomes_no_match = self.outcomes[self.outcomes["row_id"].isin(nomatch)] + lookup = pd.DataFrame(lookup) + + if lookup.empty: + return + + # We will have duplicated domna property IDs, where a surveyor has been to a property multiple times + # Where we have multiple rows, we want to make a call on what the action should be. For example, + # there may be properties that have been visited multiple times where the outcome was "See notes" implying + # that the surveyor had a detailed explanation as to why they couldn't gain access so if this has + # happened multiple times, in this case we judge that the work may not be viable + + date_col = "Week Commencing" if "Week Commencing" in self.outcomes else "Survey Date" + + lookup = lookup.merge( + self.outcomes[["row_id", "Outcome", "Notes", date_col]], how="left", on="row_id" + ) + + visit_counts = ( + lookup.groupby(self.DOMNA_PROPERTY_ID)["row_id"] + .count() + .reset_index() + .rename(columns={"row_id": "visit_count"}) + .sort_values("visit_count", ascending=False) + ) + + pivot_df = lookup.groupby(["domna_property_id", "Outcome"]).size().unstack(fill_value=0).reset_index() + pivot_df = pivot_df.merge( + visit_counts, how="left", on="domna_property_id" + ) + + if pivot_df[self.DOMNA_PROPERTY_ID].duplicated().sum(): + raise Exception("We have duplicated property IDs in the outcomes data") + + # We merge this data onto outcomes + self.outcomes["matched_to_asset_list"] = self.outcomes["row_id"].isin(lookup["row_id"].values) + self.outcomes = self.outcomes.merge(lookup[["row_id", "domna_property_id"]], how="left", on="row_id") + + # We merge out pivoted outcomes onto the asset list + self.standardised_asset_list = self.standardised_asset_list.merge( + pivot_df, how="left", left_on=self.DOMNA_PROPERTY_ID, right_on="domna_property_id" + ) + + self.outcomes = self.outcomes.sort_values("domna_property_id", ascending=False) + + def flag_survey_master( + self, + master_filepaths, + master_to_asset_list_filepath=None + ): + # TODO: This probably needs further expansion + + if not master_filepaths: + return + + if master_to_asset_list_filepath is not None: + id_map = pd.read_csv(master_to_asset_list_filepath) + else: + id_map = pd.DataFrame() + + logger.info("Getting masters and merging onto asset list") + master_surveyed = [] + unmatched_submissions = [] + for filepath in master_filepaths: + master_data = pd.read_csv(filepath) + # Strip columns + master_data.columns = [c.strip() for c in master_data.columns] + + if not id_map.empty: + master_data = master_data.merge( + id_map, how="left", on=['NO.', 'Street / Block Name', 'Post Code'] + ) + + install_col = ( + "INSTALLED OR CANCELLED" if "INSTALLED OR CANCELLED" in master_data.columns + else "INSTALL / CANCELLATION DATE" + ) + + submission_col = ( + "SUBMISSION DATE" if "SUBMISSION DATE" in master_data.columns else "SUBMISSION DATE TO INSTALLERS" + ) + + if "UPRN" in master_data.columns: + # We just need to check if any were cancelled + master_to_append = master_data[ + ["UPRN", install_col, submission_col] + ].rename( + columns={ + "UPRN": self.STANDARD_LANDLORD_PROPERTY_ID, + install_col: "survey_status", + submission_col: "submission_date" + } + ) + master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel") + + master_surveyed.append(master_to_append) + continue + + master_data["row_id"] = master_data.index + + self.standardised_asset_list["house_no"] = self.standardised_asset_list.apply( + lambda x: SearchEpc.get_house_number( + str(x[self.STANDARD_ADDRESS_1]), str(x[self.STANDARD_POSTCODE]) + ), + axis=1 + ) + + postcode_col = "POSTCODE" if "POSTCODE" in master_data.columns else "Post Code" + house_no_col = 'NO.' if 'NO.' in master_data.columns else "NO" + + # Otherwise, we need to match algorithmically + logger.info("Matching master data to asset list") + matched = [] + unmatched = [] + for _, row in tqdm(master_data.iterrows(), total=len(master_data)): + if pd.isnull(row[postcode_col]): + continue + postcode_no_space = row[postcode_col].strip().replace(" ", "").lower() + + df = self.standardised_asset_list[ + ( + self.standardised_asset_list[self.STANDARD_POSTCODE].str.strip().str.lower().str.replace(" ", + "") + == postcode_no_space + ) + ] + + house_no = row[house_no_col] + + if house_no in df["house_no"].values: + df = df[df["house_no"] == house_no] + if df.shape[0] != 1: + # Levenstein distance + + if any(df[self.STANDARD_FULL_ADDRESS].str.contains(row["Street / Block Name"])): + df = df[ + df[self.STANDARD_FULL_ADDRESS].str.contains(row["Street / Block Name"]) + ] + else: + # Levenstein distance + df = df[ + df[self.STANDARD_FULL_ADDRESS].str.lower().apply( + lambda x: process.extractOne( + " ".join([row[house_no_col], row["Street / Block Name"], row["TOWN"]]).lower(), + x + )[1] + ) > 90 + ] + + if df.shape[0] == 0: + unmatched.append(row["row_id"]) + continue + + if any(df[self.STANDARD_FULL_ADDRESS].str.lower().str.contains( + " ".join([row[house_no_col], row["Street / Block Name"]]).lower() + )): + df = df[ + df[self.STANDARD_FULL_ADDRESS].str.lower().str.contains( + " ".join([row[house_no_col], row["Street / Block Name"]]).lower() + ) + ] + + if any( + df[self.STANDARD_PROPERTY_TYPE].str.contains( + row["PROPERTY TYPE As per table emailed"].split(" ")[-1].lower() + ) + ): + # We ignore "block of flats" entries + df = df[ + df[self.STANDARD_PROPERTY_TYPE].str.contains( + row["PROPERTY TYPE As per table emailed"].split(" ")[-1].lower() + ) & (df[self.STANDARD_PROPERTY_TYPE] != "block of flats") + ] + + if df.shape[0] != 1: + # We have multiple matches + raise NotImplementedError("FIX ME") + matched.append( + { + "row_id": row["row_id"], + self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0], + } + ) + + self.standardised_asset_list = self.standardised_asset_list.drop(columns="house_no") + + # We match the "UPRN" which is the landlords ID, onto the master sheet + matched = pd.DataFrame(matched) + master_to_append = master_data[["row_id", install_col, submission_col]].merge( + matched, how="left", on="row_id" + ).rename( + columns={ + install_col: "survey_status", + submission_col: "submission_date" + } + ) + master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel") + master_surveyed.append(master_to_append) + unmatched_df = master_data[ + master_data["row_id"].isin(unmatched) + ] + + scheme_col = ( + "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" if + "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" in master_data.columns else "AFFORDABLE WARMTH" + ) + # The columns are massively different - we take just a few + unmatched_df = unmatched_df[ + [ + scheme_col, house_no_col, "Street / Block Name", postcode_col, install_col, submission_col + ] + ].rename( + columns={ + scheme_col: "Funding Scheme", + house_no_col: "House Number", + postcode_col: "Postcode", + install_col: "survey_status", + submission_col: "submission_date" + } + ) + + unmatched_submissions.append(unmatched_df) + + master_surveyed = pd.concat(master_surveyed) + master_surveyed = master_surveyed[~pd.isnull(master_surveyed[self.STANDARD_LANDLORD_PROPERTY_ID])] + master_surveyed = master_surveyed[ + ~master_surveyed[self.STANDARD_LANDLORD_PROPERTY_ID].isin( + ["NOT ON ASSET LIST", "Missing From Asset List"] + ) + ] + + master_surveyed[self.STANDARD_LANDLORD_PROPERTY_ID] = master_surveyed[ + self.STANDARD_LANDLORD_PROPERTY_ID + ].astype(str) + + # We de-dupe crudely on landlord property id + self.master_surveyed = master_surveyed.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID]) + + self.standardised_asset_list = self.standardised_asset_list.merge( + self.master_surveyed, how="left", on=self.STANDARD_LANDLORD_PROPERTY_ID + ) + + # Finally, we keep a record of the unmatched + if unmatched_submissions: + self.unmatched_submissions = pd.concat( + unmatched_submissions + ) diff --git a/asset_list/DataMapper.py b/asset_list/DataMapper.py new file mode 100644 index 00000000..ac1b8db3 --- /dev/null +++ b/asset_list/DataMapper.py @@ -0,0 +1,178 @@ +# OpenAI API Key (set this in your environment variables for security) +OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") + + +class DataRemapper: + def __init__(self, standard_values, standard_map=None, max_tokens=1000): + """ + Initialize the remapper with standard values and a predefined mapping. + + :param standard_values: Set of allowed standardized values. + :param standard_map: Dictionary of common remappings {raw_value: standard_value}. + """ + self.standard_values = standard_values + self.standard_map = standard_map + self.fuzzy_threshold = 90 # Adjust fuzzy matching sensitivity + self.ai_model = "gpt-4-turbo" # Use gpt-3.5-turbo for cheaper processing + + # Tokenizer for counting tokens + self.tokenizer = tiktoken.encoding_for_model(self.ai_model) + + # Track token usage and remap dictionary + self.total_tokens_used = 0 + self.total_cost = 0 + self.remap_dict = {} # {original_value: standardized_value} + self.max_tokens = max_tokens # Limit for OpenAI API + + # Memoization for AI calls + self.ai_cache = {} # {tuple(unmapped_values): {original_value: standardized_value}} + # Capture the reponse for debugging + self.ai_response = None + + # OpenAI pricing (as of Feb 2024) + self.pricing = { + "gpt-4-turbo": {"input": 0.01 / 1000, "output": 0.03 / 1000}, + "gpt-3.5-turbo": {"input": 0.0015 / 1000, "output": 0.002 / 1000}, + } + + self.openai_client = OpenAI(api_key=OPENAI_API_KEY) + + @staticmethod + def clean_string(text): + """Basic text cleaning: remove extra spaces, punctuation, and normalize case.""" + if not isinstance(text, str): + return None + text = text.strip().lower() + text = re.sub(r'[^\w\s]', '', text) # Remove punctuation + # Replace double strings + text = re.sub(r'\s+', ' ', text) + return text + + def fuzzy_match(self, text): + """Use fuzzy matching to find the closest standard value.""" + match, score = process.extractOne(text, self.standard_values) if text else (None, 0) + return match if score >= self.fuzzy_threshold else None + + def count_tokens(self, text): + """Estimate the number of tokens in a given text.""" + return len(self.tokenizer.encode(text)) if text else 0 + + def ai_standardize(self, unmapped_values): + """Call OpenAI API **once** for all unmapped values to minimize cost, with memoization.""" + if not unmapped_values: + return {} + + unmapped_tuple = tuple(sorted(unmapped_values)) # Ensure consistency for memoization + if unmapped_tuple in self.ai_cache: + return self.ai_cache[unmapped_tuple] # Return memoized result + + prompt = f""" + You are an expert in data classification. Standardize each of these values into one of the categories: + {list(self.standard_values)}. + + Return only a JSON dictionary where: + - The keys are the original values. + - The values are the standardized ones. + + Strictly return JSON **without markdown formatting** or extra text. + + Example Output: + {{ + "BLKHOUS": "block house", + "BEDSIT": "bedsit" + }} + + Values to standardize: + {unmapped_values} + """ + + # Count input tokens + input_tokens = self.count_tokens(prompt) + if input_tokens > self.max_tokens: + raise ValueError("Input tokens exceed the maximum limit.") + + logger.info("Calling OpenAI API for standardization...") + response = self.openai_client.chat.completions.create( + model=self.ai_model, + messages=[{"role": "user", "content": prompt}], + max_tokens=self.max_tokens, + temperature=0.1, + ) + + output_text = response.choices[0].message.content.strip() + output_tokens = self.count_tokens(output_text) # Count output tokens + + # Track total token usage + self.total_tokens_used += input_tokens + output_tokens + + # Estimate cost + input_cost = input_tokens * self.pricing[self.ai_model]["input"] + output_cost = output_tokens * self.pricing[self.ai_model]["output"] + self.total_cost += input_cost + output_cost + + try: + # Parse response as dictionary + mapping = eval(output_text) # OpenAI should return a valid dictionary + except: + mapping = {val: "unknown" for val in unmapped_values} # Fallback + + # Memoize the AI response + self.ai_cache[unmapped_tuple] = mapping + # We store the raw AI response for debugging + logger.debug(f"AI Response: {mapping}") + self.ai_response = output_text + + return mapping + + def standardize_list(self, values_to_remap): + """ + Standardizes a list of values and returns a dictionary {original_value: standardized_value}. + + :param values_to_remap: List of raw values to standardize. + :return: Dictionary {original_value: standardized_value}. + """ + unique_values = set(values_to_remap) # Process only unique values + + unmapped_values = [] + for value in unique_values: + if pd.isna(value): # Handle NaN values + self.remap_dict[value] = "unknown" + continue + + cleaned_value = self.clean_string(value) + + # Rule-Based Check (Predefined Mapping) + if cleaned_value in self.standard_map or value in self.standard_map: + self.remap_dict[value] = ( + self.standard_map[cleaned_value] if cleaned_value in self.standard_map else self.standard_map[value] + ) + continue + + if value.lower() in self.standard_map: + self.remap_dict[value] = self.standard_map[value.lower()] + continue + + # Exact Match in Standard Values + if cleaned_value in self.standard_values: + self.remap_dict[value] = cleaned_value + continue + + # Fuzzy Matching + fuzzy_match = self.fuzzy_match(cleaned_value) + if fuzzy_match: + self.remap_dict[value] = fuzzy_match + continue + + # Capture anything that wasn't mapped + unmapped_values.append(value) + + # AI Model - remap anything unmapped (batch request) + ai_mapping = self.ai_standardize(unmapped_values) + self.remap_dict.update(ai_mapping) + + return self.remap_dict + + def report_usage(self): + """Prints a summary of token usage and cost.""" + print(f"\n🔹 Total Tokens Used: {self.total_tokens_used}") + print(f"💰 Estimated Cost: ${self.total_cost:.4f}") diff --git a/asset_list/app.py b/asset_list/app.py index 84999e93..a284371e 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -1,182 +1,25 @@ import os -import time import json import pandas as pd -import numpy as np -from tqdm import tqdm from pprint import pprint import msgpack from utils.s3 import read_from_s3 from asset_list.AssetList import AssetList from asset_list.mappings.property_type import PROPERTY_MAPPING +from asset_list.mappings.built_form import BUILT_FORM_MAPPINGS from asset_list.mappings.walls import WALL_CONSTRUCTION_MAPPINGS from asset_list.mappings.heating_systems import HEATING_MAPPINGS from asset_list.mappings.exising_pv import EXISTING_PV_MAPPINGS +from asset_list.mappings.roof import ROOF_CONSTRUCTION_MAPPINGS +from asset_list.utils import get_data from dotenv import load_dotenv from backend.SearchEpc import SearchEpc -from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc load_dotenv(dotenv_path="backend/.env") EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") -def get_data( - df, manual_uprn_map, epc_api_only=False, row_id_name="row_id" -): - uprn_column = AssetList.STANDARD_UPRN - fulladdress_column = AssetList.STANDARD_FULL_ADDRESS - address1_column = AssetList.STANDARD_ADDRESS_1 - postcode_column = AssetList.STANDARD_POSTCODE - - # These re-map the standard property types to forms accepted by the EPC api, so we can predict EPCs - property_type_map = { - "house": "House", - "flat": "Flat", - "maisonette": "Maisonette", - "bungalow": "Bungalow", - "block house": "House", - "coach house": "House", - "bedsit": "Flat" - } - - epc_data = [] - errors = [] - no_epc = [] - for _, home in tqdm(df.iterrows(), total=len(df)): - try: - - # If we have a block of flats, we cannot retrieve this data - if home[AssetList.STANDARD_PROPERTY_TYPE] == "block of flats": - no_epc.append(home[row_id_name]) - continue - - postcode = home[postcode_column] - house_number = str(home[address1_column]).strip() - full_address = home[fulladdress_column].strip() - house_no = SearchEpc.get_house_number(address=str(house_number), postcode=postcode) - if house_no is None: - house_no = house_number - uprn = manual_uprn_map.get(full_address, None) - if uprn is None and home.get(uprn_column): - uprn = home[uprn_column] - - if pd.isnull(uprn): - uprn = None - - property_type = property_type_map.get(home[AssetList.STANDARD_PROPERTY_TYPE], None) - - searcher = SearchEpc( - address1=str(house_no), - postcode=postcode, - auth_token=EPC_AUTH_TOKEN, - os_api_key="", - property_type=None, - fast=True, - full_address=full_address, - max_retries=5, - uprn=uprn - ) - # Force the skipping of estimating the EPC - searcher.ordnance_survey_client.property_type = None - searcher.ordnance_survey_client.built_form = None - - searcher.find_property(skip_os=True) - - # Check if we have a flat or appartment - if searcher.newest_epc is None and uprn is None: - # Try again: - if SearchEpc.get_house_number(address=str(house_number), postcode=postcode) is None: - # Backup - add1 = full_address.split(",") - if len(add1) > 1: - add1 = add1[1].strip() - else: - # Try splitting on space - add1 = full_address.split(" ")[0].strip() - - else: - add1 = str(house_number) - searcher = SearchEpc( - address1=add1, - postcode=postcode, - auth_token=EPC_AUTH_TOKEN, - os_api_key="", - property_type=None, - fast=True, - full_address=full_address, - max_retries=5 - ) - - if ( - "flat" in house_number.lower() or "apartment" in house_number.lower() or "apt" in - house_number.lower() - ): - searcher.ordnance_survey_client.property_type = "Flat" - - searcher.find_property(skip_os=True) - - # As a final resort, we estimate the EPC - if property_type is not None and searcher.newest_epc is None: - searcher.ordnance_survey_client.property_type = property_type - searcher.find_property(skip_os=True) - - if searcher.newest_epc is None: - no_epc.append(home[row_id_name]) - continue - - if epc_api_only: - epc = { - row_id_name: home[row_id_name], - **searcher.newest_epc.copy() - } - - epc_data.append(epc) - continue - - # Look for EPC recommendatons - try: - property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"]) - except: - property_recommendations = {"rows": []} - - # Retrieve data from FindMyEPC - try: - find_epc_searcher = RetrieveFindMyEpc( - address=searcher.newest_epc["address"], postcode=searcher.newest_epc["postcode"] - ) - find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data() - except ValueError as e: - if "No EPC found" in str(e) and "address1" in searcher.newest_epc: - try: - find_epc_searcher = RetrieveFindMyEpc( - address=searcher.newest_epc["address1"], postcode=searcher.newest_epc["postcode"] - ) - find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data() - except ValueError as e: - if "No EPC found" in str(e): - find_epc_data = {} - else: - find_epc_data = {} - except Exception as e: - raise Exception(f"Error retrieving FindMyEPC data: {e}") - time.sleep(np.random.uniform(0.1, 1)) - - epc = { - row_id_name: home[row_id_name], - **searcher.newest_epc.copy(), - "recommendations": property_recommendations["rows"], - "find_my_epc_data": find_epc_data, - } - - epc_data.append(epc) - except Exception as e: - errors.append(home[row_id_name]) - time.sleep(5) - - return epc_data, errors, no_epc - - def extract_address1(asset_list, full_address_col, postcode_col, method="first_two_words"): if method == "first_two_words": asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[:2].str.join(" ") @@ -246,40 +89,437 @@ def app(): # - We want: fully insulated property (all wall types), EPC D or below (floors should be solid) # - Or the insulation required is loft/cavity (floors should be solid) - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester" - data_filename = "Warmfront data- Colchester Borough Homes (Complete).xlsx" + # Bromford + data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme " + "Rebuild/Prepared data/") + data_filename = "asset_list.xlsx" sheet_name = "Sheet1" - postcode_column = 'Full Address.1' - fulladdress_column = "Full Address" + postcode_column = 'PostCode' + fulladdress_column = "FullAddress" address1_column = None - address1_method = "first_word" + address1_method = "house_number_extraction" address_cols_to_concat = [] missing_postcodes_method = None - landlord_year_built = "Build Date" + landlord_year_built = "ConYear" landlord_os_uprn = None - landlord_property_type = "Property Type" - landlord_wall_construction = "Wallinsul" - landlord_heating_system = "HeatSorc" + landlord_property_type = "AssetTypeDesc" + landlord_built_form = "PropTypeDesc" + landlord_wall_construction = "Construction type" + landlord_roof_construction = None + landlord_heating_system = "Heating Type" landlord_existing_pv = None - landlord_property_id = "Property Reference" + landlord_property_id = "Asset" + landlord_sap = None + outcomes_filename = "outcomes.xlsx" + outcomes_sheetname = "Sheet1" + outcomes_postcode = "Postcode" + outcomes_houseno = "No" + outcomes_id = None + outcomes_address = "Address" + master_filepaths = [ + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared data/ECO " + "3 submissions.csv", + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared data/ECO " + "4 submissions.csv", + ] + master_to_asset_list_filepath = None + phase = False - # For Westward - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Westward" - # data_filename = "WESTWARD - completed list..xlsx" - # sheet_name = "Sheet1" - # postcode_column = "WFT EDIT Postcode" + # Torus + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Torus/Phase 1" + data_filename = "Torus Property Asset List - Phase 1.xlsx" + sheet_name = "TORUS" + postcode_column = 'Postcode' + fulladdress_column = None + address1_column = "AddressLine1" + address1_method = None + address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"] + missing_postcodes_method = None + landlord_year_built = "Property Age" + landlord_os_uprn = "NatUPRN" + landlord_property_type = "Property Type" + landlord_built_form = "Built Form" + landlord_wall_construction = "Wall Construction" + landlord_roof_construction = "Roof Construction" + landlord_heating_system = "Space Heating Source" + landlord_existing_pv = "Low Carbon Technology (Solar PV)" + landlord_property_id = "UPRN" + landlord_sap = "SAP Score" + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None + master_filepaths = [] + master_to_asset_list_filepath = None + phase = True + + # Ealing - houses + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing" + data_filename = "Ealing_rechecked_cleaned_05042025.csv" + sheet_name = None + postcode_column = 'Postcode' + fulladdress_column = "Address" + address1_column = None + address1_method = "house_number_extraction" + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = "Year Built" + landlord_os_uprn = None + landlord_property_type = "Property Type Code" + landlord_built_form = None + landlord_wall_construction = None + landlord_heating_system = None + landlord_existing_pv = None + landlord_property_id = "Property ref" + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None + master_filepaths = [] + master_to_asset_list_filepath = None + + # Southern Midlands + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/Midlands Properties - Apr 2025" + data_filename = "Southern Housing Midlands Property List - combined.xlsx" + sheet_name = "Sheet 1" + postcode_column = 'Post Code' + fulladdress_column = "Address" + address1_column = None + address1_method = "house_number_extraction" + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = "Age_1" + landlord_os_uprn = None + landlord_property_type = "Prop_Type" + landlord_built_form = "Prop_Type" + landlord_wall_construction = "Walls_P" + landlord_heating_system = "Heating System" + landlord_existing_pv = None + landlord_property_id = "AssetID" + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None + master_filepaths = [] + master_to_asset_list_filepath = None + + # Live West (2018 Asset list) + data_folder = ( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/2018 Asset List" + ) + data_filename = "LIVEWEST STOCK - 23rd October 2018.xlsx" + sheet_name = "Assets" + postcode_column = 'Postcode' + fulladdress_column = "Address" + address1_column = None + address1_method = "house_number_extraction" + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = "Build Year" + landlord_os_uprn = None + landlord_property_type = "Property Archetype" + landlord_built_form = None + landlord_wall_construction = None + landlord_heating_system = "Heating Fuel Type" + landlord_existing_pv = None + landlord_property_id = "Uprn - DO NOT DELETE" + outcomes_filename = "RT - LiveWest.xlsx" + outcomes_sheetname = "Feedback" + outcomes_postcode = "Poscode" + outcomes_houseno = "No." + outcomes_id = "UPRN" + master_filepaths = [ + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling Master " + "- redacted for analysis/CAVITY-Table 1.csv" + ] + master_to_asset_list_filepath = None + + # Live West (South West asset list) + data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March " + "2025/Livewest Asset List (Original) - csv") + data_filename = "Report-Table 1.csv" + sheet_name = None + postcode_column = 'Postcode' + fulladdress_column = "T1_Address" + address1_column = None + address1_method = "house_number_extraction" + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = "Build Yr" + landlord_os_uprn = None + landlord_property_type = "T1_AssetType" + landlord_built_form = "T1_AssetType" + landlord_wall_construction = "Wall Type Cavity" + landlord_heating_system = "Heating Fuel" + landlord_existing_pv = None + landlord_property_id = "T1_UPRN" + outcomes_filename = "RT - LiveWest.xlsx" + outcomes_sheetname = "Feedback" + outcomes_postcode = "Poscode" + outcomes_houseno = "No." + outcomes_id = "UPRN" + master_filepaths = [ + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling Master " + "- redacted for analysis/CAVITY-Table 1.csv" + ] + master_to_asset_list_filepath = None + + # PFP London + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/London" + data_filename = "PFP AREAS SURROUNDING LONDON - JAY, RUTH & LANE.xlsx" + sheet_name = "PFP SURROUNDING LONDON" + postcode_column = 'Postcode' + fulladdress_column = None + address1_column = "AddressLine1" + address1_method = None + address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"] + missing_postcodes_method = None + landlord_year_built = None + landlord_os_uprn = None + landlord_property_type = "Archetype (PFP)" + landlord_built_form = "Archetype (PFP)" + landlord_wall_construction = None + landlord_heating_system = None + landlord_existing_pv = None + landlord_property_id = "Uprn" + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + master_filepaths = [] + master_to_asset_list_filepath = None + + # PFP North-West + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/North-West" + data_filename = "Places for People NORTH WEST - INSPECTIONS MASTER - UPDATE.xlsx" + sheet_name = "CHECKED" + postcode_column = 'Postcode' + fulladdress_column = None + address1_column = "AddressLine1" + address1_method = None + address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"] + missing_postcodes_method = None + landlord_year_built = None + landlord_os_uprn = None + landlord_property_type = "Archetype (PFP)" + landlord_built_form = "Archetype (PFP)" + landlord_wall_construction = None + landlord_heating_system = None + landlord_existing_pv = None + landlord_property_id = "Uprn" + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + master_filepaths = [] + master_to_asset_list_filepath = None + + # PFP North-East + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/North-East" + data_filename = "Places for People NORTH EAST - INSPECTIONS MASTER.xlsx" + sheet_name = "CHECKED" + postcode_column = 'Postcode' + fulladdress_column = None + address1_column = "AddressLine1" + address1_method = None + address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"] + missing_postcodes_method = None + landlord_year_built = None + landlord_os_uprn = None + landlord_property_type = "Archetype (PFP)" + landlord_built_form = "Archetype (PFP)" + landlord_wall_construction = None + landlord_heating_system = None + landlord_existing_pv = None + landlord_property_id = "Uprn" + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + master_filepaths = [] + master_to_asset_list_filepath = None + + # PFP East + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/East" + data_filename = "PFP EAST - Master - DN LN NG NR PE POSTCODES.xlsx" + sheet_name = "PFP EAST" + postcode_column = 'Postcode' + fulladdress_column = None + address1_column = "AddressLine1" + address1_method = None + address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"] + missing_postcodes_method = None + landlord_year_built = None + landlord_os_uprn = None + landlord_property_type = "Archetype (PFP)" + landlord_built_form = "Archetype (PFP)" + landlord_wall_construction = None + landlord_heating_system = None + landlord_existing_pv = None + landlord_property_id = "Uprn" + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + master_filepaths = [] + master_to_asset_list_filepath = None + + # Wates + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Wates - " + data_filename = "ECO 4 Wates.xlsx" + sheet_name = "Roadmap Homes" + postcode_column = 'Postcode' + fulladdress_column = None + address1_column = "Address Line 1" + address1_method = None + address_cols_to_concat = ["Address Line 1", "Address Line 2", "Address Line 3"] + missing_postcodes_method = None + landlord_year_built = "Build Year" + landlord_os_uprn = None + landlord_property_type = "Archetype" + landlord_built_form = "Archetype" + landlord_wall_construction = "Wall" + landlord_heating_system = "Heating Type" + landlord_existing_pv = None + landlord_property_id = "UPRN" + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + master_filepaths = [] + master_to_asset_list_filepath = None + + # Ealing + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Programme data - 04032025" + # data_filename = "Ealing BC - Property Plus Tenure 25.02.2025.xlsx" + # sheet_name = "IGNORE - FULL MAIN" + # postcode_column = 'Postcode' # fulladdress_column = "Address" # address1_column = None - # address1_method = "house_number_extraction" + # address1_method = "first_word" # address_cols_to_concat = [] # missing_postcodes_method = None - # landlord_year_built = "Build date" - # landlord_os_uprn = "UPRN" - # landlord_property_type = "Location type" - # landlord_wall_construction = "Wall Construction (EPC)" - # landlord_heating_system = "Heat Source" - # landlord_existing_pv = "PV (Y/N)" - # landlord_property_id = "Place ref" + # landlord_year_built = "Year Built" + # landlord_os_uprn = None + # landlord_property_type = "Property Type Code" + # landlord_wall_construction = None + # landlord_heating_system = None + # landlord_existing_pv = None + # landlord_property_id = "Property ref" + + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester" + # data_filename = "Warmfront data- Colchester Borough Homes (Complete).xlsx" + # sheet_name = "Sheet1" + # postcode_column = 'Full Address.1' + # fulladdress_column = "Full Address" + # address1_column = None + # address1_method = "first_word" + # address_cols_to_concat = [] + # missing_postcodes_method = None + # landlord_year_built = "Build Date" + # landlord_os_uprn = None + # landlord_property_type = "Property Type" + # landlord_wall_construction = "Wallinsul" + # landlord_heating_system = "HeatSorc" + # landlord_existing_pv = None + # landlord_property_id = "Property Reference" + # outcomes_filename = None + # outcomes_sheetname = None + # outcomes_postcode = None + # outcomes_houseno = None + # master_filepaths = [] + # master_to_asset_list_filepath = None + + # For Westward + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Westward" + data_filename = "WESTWARD - completed list - 20.03.2025.xlsx" + sheet_name = "Sheet1" + postcode_column = "WFT EDIT Postcode" + fulladdress_column = "Address" + address1_column = None + address1_method = "house_number_extraction" + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = "Build date" + landlord_os_uprn = "UPRN" + landlord_property_type = "Location type" + landlord_built_form = None + landlord_wall_construction = "Wall Construction (EPC)" + landlord_heating_system = "Heat Source" + landlord_existing_pv = "PV (Y/N)" + landlord_property_id = "Place ref" + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + master_filepaths = [] + master_to_asset_list_filepath = None + outcomes_id = None + + # For ACIS - programme re-build + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/ACIS Full Programme Review March 2025" + # data_filename = "ACIS asset list.xlsx" + # sheet_name = "Assets" + # address1_column = "House No" + # postcode_column = "Postcode" + # landlord_property_id = "UPRN" + # fulladdress_column = None + # address_cols_to_concat = ["House No", "Street", "Town"] + # missing_postcodes_method = None + # address1_method = None + # landlord_year_built = "YEAR BUILT" + # landlord_os_uprn = None + # landlord_property_type = "Property type" + # landlord_built_form = None + # landlord_wall_construction = "Wall Constuction" + # landlord_heating_system = "Heating" + # landlord_existing_pv = None + # outcomes_filename = "ACIS Group - 25.11.2024 - outcomes.xlsx" + # outcomes_sheetname = "Feedback" + # outcomes_postcode = "Postcode" + # outcomes_houseno = "No" + # master_filepaths = [ + # os.path.join(data_folder, "ECO 3 -Table 1.csv"), + # os.path.join(data_folder, "ECO 4 -Table 1.csv"), + # ] + # master_to_asset_list_filepath = None + + # For plus dane + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane" + data_filename = "PLUS DANE Asset List - for analysis.xlsx" + sheet_name = "Asset List" + address1_column = " Address" + postcode_column = " Postcode" + landlord_property_id = "UPRN" + fulladdress_column = " Address" + address_cols_to_concat = [] + missing_postcodes_method = None + address1_method = None + landlord_year_built = "Property Age" + landlord_os_uprn = None + landlord_property_type = "Property Type" + landlord_wall_construction = "Landlord Wall Full" + landlord_heating_system = "Landlord Heating" + landlord_existing_pv = None + outcomes_filename = "plus dane outcomes.xlsx" + outcomes_sheetname = "EVERYTHING" + outcomes_postcode = "Post Code" + outcomes_houseno = "Numb." + master_filepaths = [ + os.path.join(data_folder, "JJC Rolling Master.csv"), + os.path.join(data_folder, "SCIS Rolling Master.csv"), + ] + master_to_asset_list_filepath = os.path.join(data_folder, "surveys_to_assets.csv") # Maps addresses to uprn in problematic cases manual_uprn_map = {} @@ -298,37 +538,84 @@ def app(): landlord_year_built=landlord_year_built, landlord_uprn=landlord_os_uprn, landlord_property_type=landlord_property_type, + landlord_built_form=landlord_built_form, landlord_wall_construction=landlord_wall_construction, + landlord_roof_construction=landlord_roof_construction, landlord_heating_system=landlord_heating_system, - landlord_existing_pv=landlord_existing_pv + landlord_existing_pv=landlord_existing_pv, + landlord_sap=landlord_sap, + phase=phase ) asset_list.init_standardise() # We produce the new maps, which can be saved for future useage - - new_property_type_map = PROPERTY_MAPPING.copy().update( - asset_list.variable_mappings[asset_list.landlord_property_type] if asset_list.landlord_property_type else {} - ) - new_wall_map = WALL_CONSTRUCTION_MAPPINGS.copy().update( - asset_list.variable_mappings[asset_list.landlord_wall_construction] if - asset_list.landlord_wall_construction else {} - ) - new_heating_map = HEATING_MAPPINGS.copy().update( - asset_list.variable_mappings[asset_list.landlord_heating_system] if asset_list.landlord_heating_system else {} - ) - new_existing_pv_map = EXISTING_PV_MAPPINGS.copy().update( - asset_list.variable_mappings[asset_list.landlord_existing_pv] if asset_list.landlord_existing_pv else {} - ) + new_property_type_map = { + k: v for k, v in ( + asset_list.variable_mappings[asset_list.landlord_property_type] if + asset_list.landlord_property_type else {} + ).items() + if k not in PROPERTY_MAPPING + } + new_built_form_map = { + k: v for k, v in ( + asset_list.variable_mappings[asset_list.landlord_built_form] if + asset_list.landlord_built_form else {} + ).items() + if k not in BUILT_FORM_MAPPINGS + } + new_wall_map = { + k: v for k, v in ( + asset_list.variable_mappings[asset_list.landlord_wall_construction] if + asset_list.landlord_wall_construction else {} + ).items() + if k not in WALL_CONSTRUCTION_MAPPINGS + } + new_heating_map = { + k: v for k, v in ( + asset_list.variable_mappings[asset_list.landlord_heating_system] if + asset_list.landlord_heating_system else {} + ).items() + if k not in HEATING_MAPPINGS + } + new_existing_pv_map = { + k: v for k, v in ( + asset_list.variable_mappings[asset_list.landlord_existing_pv] if asset_list.landlord_existing_pv else {} + ).items() + if k not in EXISTING_PV_MAPPINGS + } + new_roof_construction_map = { + k: v for k, v in ( + asset_list.variable_mappings[asset_list.landlord_roof_construction] if + asset_list.landlord_roof_construction else {} + ).items() + if k not in ROOF_CONSTRUCTION_MAPPINGS + } asset_list.apply_standardiation() + # We now flag properties that have been treated under existing programmes + asset_list.flag_outcomes( + outcomes_filepath=os.path.join(data_folder, outcomes_filename) if outcomes_filename else None, + outcomes_sheetname=outcomes_sheetname, + outcomes_address=outcomes_address, + outcomes_postcode=outcomes_postcode, + outcomes_houseno=outcomes_houseno, + outcomes_id=outcomes_id + ) + + asset_list.flag_survey_master( + master_filepaths=master_filepaths, + master_to_asset_list_filepath=master_to_asset_list_filepath + ) + ### We retrieve the EPC data # We chunk up this data into 5000 rows at a time # Create the chunks directory + epc_api_only = False force_retrieve_data = False skip = None # Used to skip already completed chunks - chunk_size = 5000 + chunk_size = 1000 filename = "Chunk {i}.csv" download_folder = os.path.join(data_folder, "Chunks") if not os.path.exists(download_folder): @@ -343,6 +630,9 @@ def app(): if all(x in folder_contents for x in downloaded_files): skip = max(chunk_indexes) + if any(x in folder_contents for x in downloaded_files): + skip = max([i for i in chunk_indexes if filename.format(i=i) in folder_contents]) + for i in range(0, len(asset_list.standardised_asset_list), chunk_size): print(f"Processing chunk {i} to {i + chunk_size}") if skip is not None and not force_retrieve_data: @@ -352,7 +642,15 @@ def app(): epc_data_chunk, errors_chunk, no_epc_chunk = get_data( df=chunk, row_id_name=asset_list.DOMNA_PROPERTY_ID, + uprn_column=AssetList.STANDARD_UPRN, + fulladdress_column=AssetList.STANDARD_FULL_ADDRESS, + address1_column=AssetList.STANDARD_ADDRESS_1, + postcode_column=AssetList.STANDARD_POSTCODE, + property_type_column=AssetList.STANDARD_PROPERTY_TYPE, + built_form_column=AssetList.STANDARD_BUILT_FORM, manual_uprn_map=manual_uprn_map, + epc_api_only=epc_api_only, + epc_auth_token=EPC_AUTH_TOKEN ) # We now retrieve any failed properties @@ -360,8 +658,15 @@ def app(): epc_data_failed, _, _ = get_data( df=chunk_failed, row_id_name=asset_list.DOMNA_PROPERTY_ID, + uprn_column=AssetList.STANDARD_UPRN, + fulladdress_column=AssetList.STANDARD_FULL_ADDRESS, + address1_column=AssetList.STANDARD_ADDRESS_1, + postcode_column=AssetList.STANDARD_POSTCODE, + property_type_column=AssetList.STANDARD_PROPERTY_TYPE, + built_form_column=AssetList.STANDARD_BUILT_FORM, manual_uprn_map=manual_uprn_map, - epc_api_only=False + epc_api_only=epc_api_only, + epc_auth_token=EPC_AUTH_TOKEN ) epc_data_chunk.extend(epc_data_failed) @@ -383,7 +688,9 @@ def app(): csv_data = pd.read_csv(os.path.join(download_folder, file)) # We need to convert the recommendations back to a list csv_data["recommendations"] = csv_data["recommendations"].apply(eval) - csv_data["find_my_epc_data"] = csv_data["find_my_epc_data"].apply(eval) + # We don't have this if we didn't run the pulling from find my epc + if "find_my_epc_data" in csv_data.columns: + csv_data["find_my_epc_data"] = csv_data["find_my_epc_data"].apply(eval) epc_data.append(csv_data) epc_df = pd.concat(epc_data) @@ -425,10 +732,27 @@ def app(): ) # Get the find my epc data - find_my_epc_data = epc_df[[asset_list.DOMNA_PROPERTY_ID, "find_my_epc_data"]].drop( - columns=["find_my_epc_data"]).join( - pd.json_normalize(epc_df["find_my_epc_data"]) - ) + if "find_my_epc_data" not in epc_df.columns: + epc_df["find_my_epc_data"] = None + + find_my_epc_data = [] + for _, x in epc_df.iterrows(): + if x["find_my_epc_data"]: + find_my_epc_data.append( + { + asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID], + **x["find_my_epc_data"] + } + ) + else: + find_my_epc_data.append( + { + asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID] + } + ) + + find_my_epc_data = pd.DataFrame(find_my_epc_data) + find_my_epc_data = find_my_epc_data.merge( transformed_df[[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"]], how="left", on=asset_list.DOMNA_PROPERTY_ID @@ -445,6 +769,13 @@ def app(): columns=asset_list.EPC_API_DATA_NAMES ) + # Look for columns not in the find my EPC data, which will have happened if we didn't + # retrieve it in the first place + missed_find_epc_cols = [c for c in list(asset_list.FIND_EPC_DATA_NAMES.keys()) if c not in find_my_epc_data.columns] + if missed_find_epc_cols: + for c in missed_find_epc_cols: + find_my_epc_data[c] = None + epc_df = epc_df.merge( find_my_epc_data[ [asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"] + list(asset_list.FIND_EPC_DATA_NAMES.keys()) @@ -464,13 +795,143 @@ def app(): ) cleaned = msgpack.unpackb(cleaned, raw=False) - # TODO: We should break out the identification of work types to flag blocks of flats specifically asset_list.identify_worktypes(cleaned) pprint(asset_list.work_type_figures) asset_list.flat_analysis() + ################################################################ + # WESTWARD - comparison between Kieran's method & automated + ################################################################ + + # Check 1) + cavity_fills = pd.read_excel( + os.path.join(data_folder, "WESTWARD - Route March Prep.xlsx"), + sheet_name="Straight Fill" + ) + cavity_fills = cavity_fills.merge( + asset_list.standardised_asset_list[ + [asset_list.STANDARD_LANDLORD_PROPERTY_ID, "cavity_reason"] + ], + how="left", + left_on=asset_list.landlord_property_id, + right_on=asset_list.STANDARD_LANDLORD_PROPERTY_ID + ) + cavity_fills["cavity_reason"] = cavity_fills["cavity_reason"].fillna("Not identified") + print(cavity_fills["cavity_reason"].value_counts()) + # Didn't identify 3 properties because they're bedsits + # 4 properties were identified, not based on the non-intrusives but instead because + # Westward said they were built in 2003/2007. Have adjusted this to use the age from the + # epc as well, as EPC says 1975 and they look like 1975 properties + # 37 properties flagged as already having solar - these are all because the landlord said they have solar + # e.g. + # https://earth.google.com/web/search/11+Winsland+Avenue+TOTNES+TQ9+5FT/@50.43354465,-3.71318276,46.57468503a, + # 59.14004365d,35y,0h,0t, + # 0r/data=CpABGmISXAolMHg0ODZkMWQxOGE4NWRiZjdkOjB4YjBhM2E5M2Q3YWVlMWEwYhlZYgp7fzdJQCHFfC9027QNwCohMTEgV2luc2xhbmQgQXZlbnVlIFRPVE5FUyBUUTkgNUZUGAIgASImCiQJbxsQEoo3SUARXQcp_HE3SUAZBmiZGJ6yDcAhCA0fqq63DcBCAggBOgMKATBCAggASg0I____________ARAA + # https://earth.google.com/web/search/15+St+Anne%27s+Ct,+Newton+Abbot+TQ12+1TL/@50.53068337,-3.61611128, + # 11.74908956a,135.73212429d,35y,0h,0t, + # 0r/data=CpUBGmcSYQolMHg0ODZkMDVkMjFhODhjZjgxOjB4MjBmMzE2Zjc3MGI2NGMwYxlCxHLw8UNJQCFZqyzALe4MwComMTUgU3QgQW5uZSdzIEN0LCBOZXd0b24gQWJib3QgVFExMiAxVEwYAiABIiYKJAm-r6U2iDdJQBHS5ICRdDdJQBmYGVpmiLINwCG8wcrtqbYNwEICCAE6AwoBMEICCABKDQj___________8BEAA + + # Check 2) + cavity_fills_with_solar = pd.read_excel( + os.path.join(data_folder, "WESTWARD - Route March Prep.xlsx"), + sheet_name="Solar PV - Straight Fill" + ) + cavity_fills_with_solar = cavity_fills_with_solar.merge( + asset_list.standardised_asset_list[ + [asset_list.STANDARD_LANDLORD_PROPERTY_ID, "cavity_reason"] + ], + how="left", + left_on=asset_list.landlord_property_id, + right_on=asset_list.STANDARD_LANDLORD_PROPERTY_ID + ) + cavity_fills_with_solar["cavity_reason"] = cavity_fills_with_solar["cavity_reason"].fillna("Not identified") + print(cavity_fills_with_solar["cavity_reason"].value_counts()) + # 203 properties total + # 140 properties were flagged up based on non-intrusives (Non-Intrusive Data Showed Empty Cavity) + # 63 property already has solar + + # Check 3) RDF + rdf = pd.read_excel( + os.path.join(data_folder, "WESTWARD - Route March Prep.xlsx"), + sheet_name="RDF CIGA checks" + ) + rdf = rdf.merge( + asset_list.standardised_asset_list[ + [asset_list.STANDARD_LANDLORD_PROPERTY_ID, "cavity_reason", "solar_reason"] + ], + how="left", + left_on=asset_list.landlord_property_id, + right_on=asset_list.STANDARD_LANDLORD_PROPERTY_ID + ) + rdf["cavity_reason"] = rdf["cavity_reason"].fillna("Not identified") + print(rdf["cavity_reason"].value_counts()) + # 264 properties are not identified, 261 of which are due to the fact they contain materials + # The other 3 were determined to be eligible for solar instead + # Many of these units that were identified for rdf works could be solar jobs + + rdf_with_solar = pd.read_excel( + os.path.join(data_folder, "WESTWARD - Route March Prep.xlsx"), + sheet_name="Solar PV - RDF CIGA Checks" + ) + rdf_with_solar = rdf_with_solar.merge( + asset_list.standardised_asset_list[ + [asset_list.STANDARD_LANDLORD_PROPERTY_ID, "cavity_reason", "solar_reason"] + ], + how="left", + left_on=asset_list.landlord_property_id, + right_on=asset_list.STANDARD_LANDLORD_PROPERTY_ID + ) + rdf_with_solar["cavity_reason"] = rdf_with_solar["cavity_reason"].fillna("Not identified") + rdf_with_solar["cavity_reason"].value_counts() + + # All others identified - some flagged as empties due to EPC or landlord data suggesting as much + # 5 not identified due to containing COMPACTED BEAD + + asset_list.standardised_asset_list = asset_list.standardised_asset_list[ + asset_list.standardised_asset_list[asset_list.landlord_property_id] + ] + + asset_list.load_contact_details( + local_filepath=os.path.join(data_folder, "Full property list wth D&V report V look up 12.2.25.xlsx"), + sheet_name="Report 1", + landlord_property_id=asset_list.landlord_property_id, + phone_number_column='Property Current Tel. Number', + fullname_column='Proeprty Current Occupant', + firstname_column=None, + lastname_column=None, + email_column=None, # TODO - we need this + ) + + # Convert to a format suitable for CRM + # TODO: TEMP + assigned_surveyors = pd.DataFrame( + [ + { + asset_list.landlord_property_id: "02610001", + "week_commencing": "10/10/2025", + "surveyor_name": "Khalim Conn-Kowlessar", + "surveyor_email": "khalim@domna.homes", + } + ] + ) + + # TODO: Sort the output by postcode + + company_domain = "ealing.gov.uk" + crm_pipeline_name = "Survey Management" + first_dealstage = "READY TO BEGIN SCHEDULING" + # TODO - temp, upload to either SharePoint or AWS + + asset_list.prepare_for_crm( + assigned_surveyors=assigned_surveyors, + company_domain=company_domain, + crm_pipeline_name=crm_pipeline_name, + first_dealstage=first_dealstage + ) + hubspot_data = asset_list.hubspot_data + # Store as an excel filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx" # Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data @@ -478,3 +939,15 @@ def app(): with pd.ExcelWriter(filename) as writer: asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False) asset_list.flat_data.to_excel(writer, sheet_name="Flat Data", index=False) + # If we have outcomes, we add a tab with the outcomes + if not asset_list.outcomes_for_output.empty: + asset_list.outcomes_for_output.to_excel(writer, sheet_name="Outcomes", index=False) + + if not asset_list.unmatched_submissions.empty: + asset_list.unmatched_submissions.to_excel(writer, sheet_name="Unmatched Submissions", index=False) + + if not asset_list.outcomes_no_match.empty: + asset_list.outcomes_no_match.to_excel(writer, sheet_name="Unmatched Outcomes", index=False) + + # Store the Hubspot export as a csv + hubspot_data.to_csv(os.path.join(data_folder, "Hubspot Export.csv"), index=False) diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py new file mode 100644 index 00000000..e103f794 --- /dev/null +++ b/asset_list/mappings/built_form.py @@ -0,0 +1,148 @@ +import numpy as np + +STANDARD_BUILT_FORMS = { + "unknown", + # Houses + "end-terrace", "semi-detached", "detached", "mid-terrace", + # Flats + "ground floor", "mid-floor", "top-floor", "basement" +} + +BUILT_FORM_MAPPINGS = { + 'House (End Terrace)': 'end-terrace', + 'Ground Floor Flat General': 'ground floor', + 'House (Semi)': 'semi-detached', + 'House (Mid Terrace)': 'mid-terrace', + 'Bungalow': 'unknown', + 'House (Mid terrace)': 'mid-terrace', + 'Maisonette': 'unknown', + 'Flat': 'unknown', + 'First Floor Flat General': 'mid-floor', + 'Bungalow (Semi)': 'semi-detached', + + 'Detached House': 'detached', + 'End Terraced House': 'end-terrace', + 'Studio (Ground floor)': 'ground floor', + 'Mid Terraced House': 'mid-terrace', + 'Ground Floor Flat': 'ground floor', + 'Semi Detached House': 'semi-detached', + 'Detached Property': 'detached', + 'Level not confirmed': 'unknown', + 'Bedsit': 'unknown', + 'Cottage': 'detached', + 'Terraced House': 'mid-terrace', + 'Studio (1st Floor)': 'ground floor', + 'Standard Maisonette': 'unknown', + 'Third Floor Flat or Above': 'top-floor', + 'Town House': 'end-terrace', + 'Guest room in a complex': 'unknown', + 'Back To Back House': 'mid-terrace', + 'PIMSS EMPTY': 'unknown', + 'Flat Basement': 'basement', + 'House': 'unknown', + 'Second Floor Flat': 'mid-floor', + 'First Floor Flat': 'ground floor', + 'Room Only': 'unknown', + + 'End Terrace Housex': 'end-terrace', + 'Mid Terrace Bungalow': 'mid-terrace', + 'End Terrace Bungalow': 'end-terrace', + 'Mid Terrace House': 'mid-terrace', + 'Detached Bungalow': 'detached', + 'End Terrace House': 'end-terrace', + 'Mid Terrace Housekeeping ': 'mid-terrace', + 'Semi Detached Bung': 'semi-detached', + 'Guest Room': 'unknown', + 'Coach House': 'detached', + 'Office Buildings': 'unknown', + 'Maisonnette': 'mid-floor', + 'Bedspace': 'unknown', + 'Studio (3rd floor and above)': 'top-floor', + 'Adapted Property For Disabled': 'unknown', + 'Studio (2nd floor)': 'mid-floor', + np.nan: 'unknown', + 'Third Floor Flat': 'mid-floor', + '2 Ext. Wall Flat': 'mid-terrace', + 'Hostel': 'unknown', + 'Flat: Mid Terrace: Mid Floor': 'mid-terrace', + 'Bungalow: SemiDetached': 'semi-detached', + 'Flat: End Terrace: Top Floor': 'end-terrace', + 'Flat: Enclosed End Terrace: Top Floor': 'end-terrace', + 'Maisonette: End Terrace: Ground Floor': 'end-terrace', + 'Flat: End Terrace: Ground Floor': 'end-terrace', + 'Flat: Mid Terrace: Top Floor': 'mid-terrace', + 'House: Detached': 'detached', + 'Flat: End Terrace: Mid Floor': 'end-terrace', + 'House: SemiDetached': 'semi-detached', + 'Flat: Semi Detached: Ground Floor': 'semi-detached', + 'Flat: Semi Detached: Top Floor': 'semi-detached', + 'Flat: Mid Terrace: Ground Floor': 'mid-terrace', + 'House: MidTerrace': 'mid-terrace', + 'House: EndTerrace': 'end-terrace', + 'Bungalow: EndTerrace': 'end-terrace', + 'Bungalow: MidTerrace': 'mid-terrace', + 'Flat: Semi Detached: Mid Floor': 'semi-detached', + 'Maisonette: Mid Terrace: Top Floor': 'mid-terrace', + 'Flat: Enclosed Mid Terrace: Mid Floor': 'mid-terrace', + 'Flat: Enclosed Mid Terrace: Ground Floor': 'mid-terrace', + 'Flat: Detached: Ground Floor': 'detached', + 'Flat: Detached: Mid Floor': 'detached', + 'Flat: Detached: Top Floor': 'detached', + 'Flat: Enclosed End Terrace: Mid Floor': 'end-terrace', + 'Bungalow: Detached': 'detached', + 'Maisonette: End Terrace: Mid Floor': 'end-terrace', + 'Maisonette: Detached: Top Floor': 'detached', + 'Flat: Enclosed End Terrace: Ground Floor': 'end-terrace', + 'Flat: Enclosed Mid Terrace: Top Floor': 'mid-terrace', + 'House: EnclosedEndTerrace': 'end-terrace', + '3 Ext. Wall Flat': 'semi-detached', + 'Bungalow Detached': 'detached', + 'Bungalow End Terrace': 'end-terrace', + 'Bungalow Mid Terrace': 'mid-terrace', + 'Bungalow Semi Detached': 'detached', + 'Maisonette 2 Ext. Wall': 'mid-terrace', + 'Maisonette 3 Ext. Wall': 'semi-detached', + 'End-terrace': 'end-terrace', + 'Mid-terrace': 'mid-terrace', + 'Semi-detached': 'semi-detached', + 'Detached': 'detached', + 'Flat / maisonette': 'unknown', + '2014 onwards': 'unknown', + + 'Semi Detached': 'semi-detached', + 'End Terraced': 'end-terrace', + 'Basement': 'basement', + 'No': 'unknown', + 'Mid Terrace': 'mid-terrace', + 'Link Detached': 'detached', + 'Mid Terraced': 'mid-terrace', + 'Ground Floor': 'ground floor', + 'End Terrace': 'end-terrace', + 'Sheltrd Semi Det': 'semi-detached', + 'Shop': 'unknown', + 'Fourth Floor': 'mid-floor', + 'Terraced': 'mid-terrace', + 'Leasehold Terr': 'mid-terrace', + 'Room': 'unknown', + 'Second Floor': 'mid-floor', + 'Third Floor': 'mid-floor', + 'Office': 'unknown', + 'First Floor Over Arch': 'ground floor', + '16-25 IND-PPL': 'unknown', + 'Seventh Floor': 'top-floor', + 'Sheltered': 'unknown', + 'Shelt Bung End': 'end-terrace', + 'Room In Shared Accommodation': 'unknown', + 'Sheltred Bung Terrace': 'mid-terrace', + 'Garage In Block': 'unknown', + 'First Floor': 'ground floor', + 'First Floor Over Garage': 'ground floor', + 'Leasehold': 'unknown', + 'Sheltred Bung': 'unknown', + 'Garage': 'unknown', + 'Sixth Floor': 'top-floor', + 'Sheltered Bung': 'semi-detached', + 'Guest': 'unknown', + 'Fifth Floor': 'mid-floor' + +} diff --git a/asset_list/mappings/exising_pv.py b/asset_list/mappings/exising_pv.py index 06e77bba..51f5f922 100644 --- a/asset_list/mappings/exising_pv.py +++ b/asset_list/mappings/exising_pv.py @@ -1,3 +1,5 @@ +import numpy as np + STANDARD_EXISTING_PV = { "already has PV", "no PV", "unknown" } @@ -9,4 +11,10 @@ EXISTING_PV_MAPPINGS = { "yes": "already has PV", True: "already has PV", False: "no PV", + np.nan: 'unknown', + 'PV: 2kWp array': 'already has PV', + 'PV: 25% roof area, PV: 3.6kWp array': 'already has PV', + 'PV: 10% roof area, PV: 2kWp array': 'already has PV', + 'PV: 50% roof area': 'already has PV', + 'Solar PV': 'already has PV' } diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py index 4879efcc..7f2f81f2 100644 --- a/asset_list/mappings/heating_systems.py +++ b/asset_list/mappings/heating_systems.py @@ -16,11 +16,20 @@ STANDARD_HEATING_SYSTEMS = { "unknown", "communal gas boiler", "high heat retention storage heaters", + "room heaters", + 'electric fuel', + 'oil fuel', + 'solid fuel', + 'gas combi boiler', + 'unknown', + "electric ceiling", + "electric underfloor", + "no heating" } HEATING_MAPPINGS = { "Combi - GAS": "gas combi boiler", - "E7 Storage Heaters": "electric storage heaters", + "E7 Storage Heaters": "high heat retention storage heaters", "District heating system": "district heating", "Condensing Boiler - GAS": "gas condensing boiler", "Boiler Oil/other": "oil boiler", @@ -38,7 +47,7 @@ HEATING_MAPPINGS = { "Gas fire": "other", "Backboiler - Solid fuel": "other", 'combi - gas': 'gas combi boiler', - 'e7 storage heaters': 'electric storage heaters', + 'e7 storage heaters': 'high heat retention storage heaters', 'district heating system': 'district heating', 'condensing boiler - gas': 'gas condensing boiler', 'boiler oil/other': 'oil boiler', @@ -64,4 +73,134 @@ HEATING_MAPPINGS = { 'SOLIDFUEL': 'boiler - other fuel', 'STORHTR': 'electric storage heaters', np.nan: 'unknown', + 'Oil': 'boiler - other fuel', + 'Gas': 'gas condensing boiler', + 'Electric': 'electric storage heaters', + 'Solid fuel': 'other', + 'No Heat': 'unknown', + 'GSHP': 'ground source heat pump', + + 'Boiler Oil': 'oil boiler', + 'Boiler Electricity': 'electric boiler', + 'Boiler ND': 'unknown', + 'ND Mains gas': 'unknown', + 'Room heaters Mains gas': "room heaters", + 'Heat pump (air) Electricity': 'air source heat pump', + 'Room heaters Electricity': 'electric radiators', + 'Room heaters Oil': 'room heaters', + 'No heating system ND': 'no heating', + 'Heat pump (wet) Electricity': 'ground source heat pump', + 'Room heaters Biomass': 'room heaters', + 'ND Solid fuel': 'unknown', + 'Boiler Mains gas': 'gas combi boiler', + 'Boiler LPG': 'boiler - other fuel', + 'Room heaters Solid fuel': 'room heaters', + 'ND ND': 'unknown', + 'Storage heating Electricity': 'electric storage heaters', + 'ND Electricity': 'unknown', + 'Community heating Community (non-gas)': 'district heating', + 'No heating system N/A': 'no heating', + 'Boiler Solid fuel': 'boiler - other fuel', + 'Community heating Community (mains gas)': 'communal gas boiler', + 'Boiler Biomass': 'boiler - other fuel', + 'No heating system Mains gas': 'no heating', + + 'Storage heaters': 'electric storage heaters', + 'Air Source': 'air source heat pump', + 'Ground source': 'ground source heat pump', + 'OIl': 'boiler - other fuel', + 'Quantum storage heaters (old sh on EPC)': 'high heat retention storage heaters', + 'Quanum Storage heaters': 'high heat retention storage heaters', + 'Quantum storage heaters (Old SH on EPC)': 'high heat retention storage heaters', + 'Quantum storage heaters': 'high heat retention storage heaters', + 'Air Source (EPC says SH)': 'air source heat pump', + 'ASHP - Was logged as oil': 'air source heat pump', + 'Ground Source': 'ground source heat pump', + 'District Heating': 'district heating', + 'Mains Gas (Communal)': 'communal gas boiler', + 'LPG': 'boiler - other fuel', + 'Mains Gas': 'gas condensing boiler', + 'ELECTRIC': 'electric fuel', + 'OIL': 'oil fuel', + 'SOLID FUEL': 'solid fuel', + 'GAS': 'gas combi boiler', + 'DO NOT SURVEY': 'unknown', + 'Gas Boiler': 'gas combi boiler', + 'Communal Gas ': 'communal gas boiler', + 'Communal': 'communal gas boiler', + 'Communal Gas': 'communal gas boiler', + 'Wood Burning Boiler': "boiler - other fuel", + 'Oil Fired Boiler': 'oil boiler', + 'Electric (direct acting) room heaters: Panel, convector or radiant heaters Electricity: Electricity': 'room ' + 'heaters', + 'Electric Storage Systems: Integrated storage+direct-acting heater Electricity: Electricity': 'electric storage ' + 'heaters', + 'Community Heating Systems: Community CHP and boilers (RdSAP) Gas: Mains Gas (Community)': 'communal gas boiler', + 'Boiler: D rated Regular Boiler Gas: Mains Gas': 'gas boiler', + 'Boiler: C rated Combi Gas: Mains Gas': 'gas combi boiler', + 'Electric Storage Systems: Fan storage heaters Electricity: Electricity': 'electric storage heaters', + ' ': 'unknown', + 'Boiler: G rated Regular Boiler Gas: Mains Gas': 'gas boiler', + 'Electric Storage Systems: Modern (slimline) storage heaters Electricity: Electricity': 'electric storage heaters', + 'Boiler: E rated Regular Boiler Gas: Mains Gas': 'gas boiler', + 'Boiler: A rated Regular Boiler Electricity: Electricity': 'electric boiler', + 'Community Heating Systems: Community boilers only (RdSAP) Gas: Mains Gas (Community)': 'communal gas boiler', + 'Boiler: A rated Combi Gas: Mains Gas': 'gas condensing combi', + 'Boiler: A rated CPSU Electricity: Electricity': 'electric boiler', + 'Heat Pump: Electric Heat pumps: Ground source heat pump with flow temperature <= 35°C': 'ground source heat pump', + 'Heat Pump: Electric Heat pumps: Ground source heat pump in other cases': 'ground source heat pump', + 'Electric Storage Systems: High heat retention storage heaters': 'high heat retention storage heaters', + 'Heat Pump: Electric Heat pumps: Air source heat pump with flow temperature <= 35°C': 'air source heat pump', + 'Electric (direct acting) room heaters: Panel, convector or radiant heaters': 'room heaters', + 'Boiler: C rated Combi': 'gas combi boiler', + 'Boiler: B rated Regular Boiler': 'gas condensing boiler', + 'Boiler: E rated Combi': 'gas combi boiler', + 'Boiler: A rated Combi': 'gas combi boiler', + 'Boiler: E rated Regular Boiler': 'gas condensing boiler', + 'Community Heating Systems: Community boilers only (RdSAP)': 'district heating', + 'Boiler: C rated Regular Boiler': 'gas condensing boiler', + 'Boiler: A rated Regular Boiler': 'gas condensing boiler', + 'Electric Storage Systems: Fan storage heaters': 'electric storage heaters', + 'Boiler: F rated Combi': 'gas combi boiler', + + 'Room heaters': 'room heaters', + 'Room Heaters': 'room heaters', + 'Boiler': 'gas condensing boiler', + 'Heat Pump (Wet)': 'air source heat pump', + 'Community Heating': 'district heating', + 'Heat pump (wet)': 'air source heat pump', + 'Electric ceiling heating': 'electric ceiling', + 'Electric under floor heating': 'electric underfloor', + 'Community heating': 'district heating', + + 'Wet - Radiators Air Source Heat Pump': 'air source heat pump', + 'Wet - Radiators Electric': 'electric boiler', + 'Storage Heaters': 'high heat retention storage heaters', + 'Wet - Radiators Oil': 'oil boiler', + 'Communal Wet - Radiators Gas': 'communal gas boiler', + 'Electric - Storage/Panel Heaters Electric': 'electric storage heaters', + 'Gas Central Heating': 'gas combi boiler', + 'Wet - Radiators Solar': 'other', + 'Electric - Storage/Panel Heaters LPG': 'electric storage heaters', + 'No Heating Solid': 'no heating', + 'Wet - Underfloor Gas': 'gas condensing boiler', + 'No Heating Electric': 'no heating', + 'Oil Fired Central Heating': 'oil boiler', + 'Warm Air Gas': 'other', + 'Communal Boilers': 'communal gas boiler', + 'Wet - Radiators Gas': 'gas combi boiler', + 'Wet - Radiators Solid': 'solid fuel', + 'Wet - Radiators LPG': 'other', + 'No Heating Gas': 'no heating', + 'No Heating': 'no heating', + 'Panel Heaters': 'electric radiators', + 'Rointe Electric Heating': 'electric storage heaters', + 'Underfloor Heating': 'electric underfloor', + 'Air Source Heating': 'air source heat pump', + 'Warm Air Electric': 'other', + 'Communal Wet - Radiators Electric': 'communal gas boiler', + 'Wet - Underfloor Solar': 'other', + 'No Heating Required Gas': 'unknown', + 'Electric - Storage/Panel Heaters Gas': 'electric storage heaters', + 'Electric - Storage/Panel Heaters Solid': 'electric storage heaters' } diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py index 2612f058..dc8dbf21 100644 --- a/asset_list/mappings/property_type.py +++ b/asset_list/mappings/property_type.py @@ -1,3 +1,5 @@ +import numpy as np + # These are the standard categories for property types STANDARD_PROPERTY_TYPES = { "house", "flat", "maisonette", "bungalow", "park home", "block house", "bedsit", "coach house", @@ -21,5 +23,160 @@ PROPERTY_MAPPING = { 'Flat': 'flat', 'House': 'house', 'Maisonette': 'maisonette', - 'Stairwell': 'other' + 'Stairwell': 'other', + 'MAISON': 'maisonette', + '3 Bed Semi Detached House': 'house', + '3 Bed Mid Terrace House': 'house', + '2 Bed Semi Detached House': 'house', + '4 Bed Semi Detached House': 'house', + '2 Bed End Terrace House': 'house', + '1 Bed Sheltered Bungalow': 'bungalow', + '1 Bed 1st Floor Sheltered Flat': 'flat', + '2 Bed Second Floor Flat': 'flat', + '1 Bed Mid Terrace House': 'house', + '1 Bed End Terrace House': 'house', + '7 Bed Detached House': 'house', + '4 Bed End Terrace House': 'house', + '1 Bed Link House': 'house', + '1 Bed Second Floor Flat': 'flat', + '2 Bed Detached House': 'house', + '1 Bed Ground Floor Flat': 'flat', + '2 Bed Sheltered Bungalow': 'bungalow', + '4 Bed Mid Terrace House': 'house', + '2 Bed Mid Terrace House': 'house', + '2 Bed First Floor Flat': 'flat', + '3 Bed Detached House': 'house', + 'Ground Floor Bedsit': 'bedsit', + '3 Bed Bungalow': 'bungalow', + np.nan: 'unknown', + '5 Bed End Terrace House': 'house', + '1 Bed Grd Floor Sheltered Flat': 'flat', + '3 Bed End Terrace House': 'house', + '2 Bed Second Floor Maisonette': 'maisonette', + '2 Bed Ground Floor Flat': 'flat', + '2 Bed First Floor Maisonette': 'maisonette', + '4 Bed Detached House': 'house', + '1 Bed Bungalow': 'bungalow', + '2 Bed Bungalow': 'bungalow', + 'First Floor Bedsit': 'bedsit', + '3 Bed First Floor Maisonette': 'maisonette', + '2 Bed 1st Floor Sheltered Flat': 'flat', + '1 Bed First Floor Flat': 'flat', + '3 Bed First Floor Flat': 'flat', + 'ND': 'unknown', + 'House (Mid Terrace)': 'house', + 'First Floor Flat General': 'flat', + 'House (End Terrace)': 'house', + 'House (Mid terrace)': 'house', + 'Bungalow (Semi)': 'bungalow', + 'Ground Floor Flat General': 'flat', + 'House (Semi)': 'house', + 'Detached House': 'house', + 'Bedsit': 'bedsit', + 'Terraced House': 'house', + 'Standard Maisonette': 'maisonette', + 'End Terraced House': 'house', + 'Third Floor Flat or Above': 'flat', + 'Town House': 'house', + 'Mid Terraced House': 'house', + 'Back To Back House': 'house', + 'Flat Basement': 'flat', + 'Ground Floor Flat': 'flat', + 'Semi Detached House': 'house', + 'Second Floor Flat': 'flat', + 'First Floor Flat': 'flat', + 'Level not confirmed': 'flat', + 'Cottage': 'house', + 'Studio (1st Floor)': 'flat', + 'Studio (Ground floor)': 'flat', + 'Guest room in a complex': 'other', + 'PIMSS EMPTY': 'bedsit', + 'Room Only': 'other', + 'Detached Property': 'house', + 'End Terrace Housex': 'house', + 'Coach House': 'coach house', + 'Mid Terrace Bungalow': 'bungalow', + 'End Terrace Bungalow': 'bungalow', + 'Mid Terrace House': 'house', + 'Detached Bungalow': 'bungalow', + 'End Terrace House': 'house', + 'Mid Terrace Housekeeping ': 'house', + 'Maisonnette': 'maisonette', + 'Guest Room': 'unknown', + 'Office Buildings': 'unknown', + 'Semi Detached Bung': 'bungalow', + 'Bedspace': 'bedsit', + 'Houses/Bungalows': 'bungalow', + 'Bedsits': 'bedsit', + 'Unknown': 'unknown', + 'Sheltered Flats/besits': 'flat', + 'House/Bungalow ': 'bungalow', + 'Low/Med Rise Flats/Mais': 'flat', + 'Staff/Comm': 'other', + 'A Rooms': 'other', + 'Studio (3rd floor and above)': 'flat', + 'Adapted Property For Disabled': 'unknown', + 'Studio (2nd floor)': 'flat', + 'Third Floor Flat': 'flat', + '2 Ext. Wall Flat': 'flat', + 'Hostel': 'other', + 'House: MidTerrace': 'house', + 'House: EndTerrace': 'house', + 'Flat: Mid Terrace: Mid Floor': 'flat', + 'Bungalow: SemiDetached': 'bungalow', + 'Bungalow: EndTerrace': 'bungalow', + 'Flat: End Terrace: Top Floor': 'flat', + 'Maisonette: End Terrace: Ground Floor': 'maisonette', + 'Flat: End Terrace: Ground Floor': 'flat', + 'Flat: Mid Terrace: Top Floor': 'flat', + 'House: Detached': 'house', + 'Flat: End Terrace: Mid Floor': 'flat', + 'House: SemiDetached': 'house', + 'Flat: Semi Detached: Ground Floor': 'flat', + 'Flat: Semi Detached: Top Floor': 'flat', + 'Flat: Mid Terrace: Ground Floor': 'flat', + 'Bungalow: MidTerrace': 'bungalow', + 'Flat: Enclosed End Terrace: Top Floor': 'flat', + 'Flat: Semi Detached: Mid Floor': 'flat', + 'Maisonette: Mid Terrace: Top Floor': 'maisonette', + 'House: EnclosedEndTerrace': 'house', + 'Flat: Detached: Ground Floor': 'flat', + 'Flat: Detached: Mid Floor': 'flat', + 'Flat: Detached: Top Floor': 'flat', + 'Bungalow: Detached': 'bungalow', + 'Maisonette: End Terrace: Mid Floor': 'maisonette', + 'Maisonette: Detached: Top Floor': 'maisonette', + 'Flat: Enclosed Mid Terrace: Mid Floor': 'flat', + 'Flat: Enclosed Mid Terrace: Ground Floor': 'flat', + 'Flat: Enclosed End Terrace: Mid Floor': 'flat', + 'Flat: Enclosed End Terrace: Ground Floor': 'flat', + 'Flat: Enclosed Mid Terrace: Top Floor': 'flat', + '2013 onwards': 'unknown', + + 'House 2 Storey': 'house', + 'Bung': 'bungalow', + 'House 3 Storey': 'house', + 'Shared Flat': 'flat', + 'd': 'unknown', + 'Mais': 'maisonette', + 'e': 'unknown', + 'Shared House': 'house', + 'House 4 Storey': 'house', + 'Shared Bungalow': 'bungalow', + 'Detch': 'house', + 'Shop': 'other', + 'Terr': 'house', + 'Terrace': 'house', + 'Description': 'unknown', + 'Hse': 'house', + 'Room': 'other', + 'Office': 'other', + 'Room In Shared Accommodation': 'other', + 'Apartment': 'flat', + 'm': 'unknown', + 'Garage': 'other', + 'Parking Space': 'other', + 'Community Centre': 'other', + 'Communal Facility': 'other', + 'Semi': 'house' } diff --git a/asset_list/mappings/roof.py b/asset_list/mappings/roof.py new file mode 100644 index 00000000..a95f0529 --- /dev/null +++ b/asset_list/mappings/roof.py @@ -0,0 +1,27 @@ +import numpy as np + +STANDARD_ROOF_CONSTRUCTIONS = { + "pitched access to loft", + "pitched no access to loft", + "pitched unknown access to loft", + "piched unknown insulation", + "pitched insulated", + "another dwelling above", + "flat unknown insulation", + "unknown insulated", + "unknown", +} + +ROOF_CONSTRUCTION_MAPPINGS = { + 'Flat': 'flat unknown insulation', + 'Pitched (access to loft)': 'pitched access to loft', + 'Pitched (no access to loft)': 'pitched no access to loft', + 'Another dwelling above': 'another dwelling above', + 'Same dwelling above': 'another dwelling above', + 'As-built': 'unknown', + 'ND (inferred)': 'unknown', + '2018 onwards': 'unknown', + 'Pitched (vaulted ceiling)': 'pitched insulated', + np.nan: "unknown", + None: "unknown" +} diff --git a/asset_list/mappings/walls.py b/asset_list/mappings/walls.py index 78d64988..c327338a 100644 --- a/asset_list/mappings/walls.py +++ b/asset_list/mappings/walls.py @@ -1,8 +1,14 @@ +import numpy as np + STANDARD_WALL_CONSTRUCTIONS = { + # Cavity "uninsulated cavity", "filled cavity", "partial insulated cavity", "cavity unknown insulation", + # Solic Brick "uninsulated solid brick", "insulated solid brick", "solid brick unknown insulation", - "timber frame", - "system built", "granite or whinstone", "other", "unknown", "sandstone or limestone", + # Timber Frame + "timber frame unknown insulation", "insulated timber frame", "uninsulated timber frame", + "system built", "granite or whinstone", "other", + "unknown", "sandstone or limestone", "cob", "new build - average thermal transmittance", } @@ -89,4 +95,76 @@ WALL_CONSTRUCTION_MAPPINGS = { 'NONE': 'unknown', 'NOTKNOWN': 'unknown', 'SOLID': 'solid brick unknown insulation', + np.nan: 'unknown', + 'RENDER/TIMBER FRAME': 'timber frame', + 'SYSTEM BUILT': 'system built', + 'PCC PANELS': 'other', + 'NOT APPLICABLE - FLAT': 'unknown', + 'BRICK/TIMBER FRAME': 'timber frame', + 'BRICK/BLOCK CAVITY': 'cavity unknown insulation', + 'STONE SOLID': 'sandstone or limestone', + 'EXT CLADDING SYSTEM': 'system built', + 'BRICK/BLOCK SOLID': 'solid brick unknown insulation', + + 'Cavity Filled cavity (with internal/external)': 'filled cavity', + 'ND (inferred) Filled cavity': 'filled cavity', + 'Cavity Filled cavity': 'filled cavity', + 'Cavity Unknown insulation': 'cavity unknown insulation', + 'Timber frame As-built': 'timber frame', + 'System build Unknown insulation': 'system built', + 'Cavity As-built': 'uninsulated cavity', + 'System build External': 'system built', + 'ND (inferred) ND (inferred)': 'unknown', + 'Solid brick External': 'insulated solid brick', + 'Cavity External': 'filled cavity', + 'System build As-built': 'system built', + 'Solid brick Internal': 'insulated solid brick', + 'Cavity Internal': 'filled cavity', + 'System build Internal': 'system built', + 'Solid brick As-built': 'solid brick unknown insulation', + + 'Cavity ': 'cavity unknown insulation', + 'Solid brick ': 'solid brick unknown insulation', + 'Timber frame Timber frame (good insulation)': 'insulated timber frame', + ' ': 'unknown', + 'Cavity No data': 'cavity unknown insulation', + 'Non trad ': 'other', + 'Solid brick / Multiple Attributes ': 'solid brick unknown insulation', + 'Cavity Believe CWI done by Dyson': 'filled cavity', + 'Cavity CWI required': 'uninsulated cavity', + 'Solid brick EWI installed': 'insulated solid brick', + 'Cavity Cavity batts': 'filled cavity', + 'Cavity CWI Completed by Dyson': 'filled cavity', + None: "unknown", + "Cavity": "cavity unknown insulation", + 'SolidBrick: Unknown': 'solid brick unknown insulation', + 'Cavity: Unknown': 'cavity unknown insulation', + 'Cavity: AsBuilt (Post 1995)': 'filled cavity', + 'Cavity: AsBuilt (1976-1982)': 'cavity unknown insulation', + 'SystemBuilt: AsBuilt': 'system built', + 'TimberFrame: AsBuilt': "timber frame unknown insulation", + 'Cavity: AsBuilt (1983-1995)': 'cavity unknown insulation', + 'Cavity: AsBuilt (1983-1995), Cavity: FilledCavity': 'filled cavity', + 'SolidBrick: AsBuilt': 'solid brick unknown insulation', + 'Cavity: FilledCavity': 'filled cavity', + 'SolidBrick: Internal': 'insulated solid brick', + 'Cavity: External': 'filled cavity', + 'Sandstone: Internal': 'sandstone or limestone', + 'Cavity: AsBuilt (Pre 1976)': 'cavity unknown insulation', + 'System build': 'system built', + 'Solid brick': 'solid brick unknown insulation', + 'Stone': 'sandstone or limestone', + 'Timber frame': 'timber frame unknown insulation', + '2017 onwards': 'new build - average thermal transmittance', + 'ND (inferred)': 'unknown', + 'Flat / maisonette': 'other', + + 'Other': 'other', + 'Timber Frame': 'timber frame unknown insulation', + 'Cavity Wall': 'cavity unknown insulation', + 'Non-Traditional': 'system built', + 'PRC': 'system built', + 'Cross Wall': 'system built', + 'Solid Wall': 'solid brick unknown insulation', + 'Traditional': 'other' } diff --git a/asset_list/utils.py b/asset_list/utils.py new file mode 100644 index 00000000..ff9db3f8 --- /dev/null +++ b/asset_list/utils.py @@ -0,0 +1,183 @@ +import time +import numpy as np +import pandas as pd +from backend.SearchEpc import SearchEpc +from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc +from tqdm import tqdm +from utils.logger import setup_logger + +logger = setup_logger() + + +def get_data( + df, + manual_uprn_map, + epc_auth_token, + uprn_column, + fulladdress_column, + address1_column, + postcode_column, + property_type_column, + built_form_column, + epc_api_only=False, + row_id_name="row_id", +): + # These re-map the standard property types to forms accepted by the EPC api, so we can predict EPCs + property_type_map = { + "house": "House", + "flat": "Flat", + "maisonette": "Maisonette", + "bungalow": "Bungalow", + "block house": "House", + "coach house": "House", + "bedsit": "Flat" + } + + built_form_map = { + "mid-terrace": "Mid-Terrace", + "end-terrace": "End-Terrace", + "semi-detached": "Semi-Detached", + "detached": "Detached" + } + + epc_data = [] + errors = [] + no_epc = [] + for _, home in tqdm(df.iterrows(), total=len(df)): + try: + + # If we have a block of flats, we cannot retrieve this data + if home.get(property_type_column) == "block of flats": + no_epc.append(home[row_id_name]) + continue + + postcode = home[postcode_column] + house_number = str(home[address1_column]).strip() + full_address = home[fulladdress_column].strip() + house_no = SearchEpc.get_house_number(address=str(house_number), postcode=postcode) + if house_no is None: + house_no = house_number + uprn = manual_uprn_map.get(full_address, None) + if uprn is None and home.get(uprn_column): + uprn = home[uprn_column] + + if pd.isnull(uprn): + uprn = None + + property_type = property_type_map.get(home.get(property_type_column), None) + built_form = built_form_map.get(home.get(built_form_column)) + + searcher = SearchEpc( + address1=str(house_no), + postcode=postcode, + auth_token=epc_auth_token, + os_api_key="", + property_type=None, + fast=True, + full_address=full_address, + max_retries=5, + uprn=uprn + ) + # Force the skipping of estimating the EPC + searcher.ordnance_survey_client.property_type = None + searcher.ordnance_survey_client.built_form = None + + searcher.find_property(skip_os=True) + + # Check if we have a flat or appartment + if searcher.newest_epc is None and uprn is None: + # Try again: + if SearchEpc.get_house_number(address=str(house_number), postcode=postcode) is None: + # Backup + add1 = full_address.split(",") + if len(add1) > 1: + add1 = add1[1].strip() + else: + # Try splitting on space + add1 = full_address.split(" ")[0].strip() + + else: + add1 = str(house_number) + searcher = SearchEpc( + address1=add1, + postcode=postcode, + auth_token=epc_auth_token, + os_api_key="", + property_type=None, + fast=True, + full_address=full_address, + max_retries=5 + ) + + if ( + "flat" in house_number.lower() or "apartment" in house_number.lower() or "apt" in + house_number.lower() + ): + searcher.ordnance_survey_client.property_type = "Flat" + + searcher.find_property(skip_os=True) + + # As a final resort, we estimate the EPC + if property_type is not None and searcher.newest_epc is None: + searcher.ordnance_survey_client.property_type = property_type + searcher.ordnance_survey_client.built_form = built_form + searcher.find_property(skip_os=True) + + if searcher.newest_epc is None: + no_epc.append(home[row_id_name]) + continue + + # Look for EPC recommendatons + try: + property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"]) + except: + property_recommendations = {"rows": []} + + if epc_api_only: + epc = { + row_id_name: home[row_id_name], + **searcher.newest_epc.copy(), + "recommendations": property_recommendations["rows"] + } + + epc_data.append(epc) + continue + + # Retrieve data from FindMyEPC + try: + find_epc_searcher = RetrieveFindMyEpc( + address=searcher.newest_epc["address"], postcode=searcher.newest_epc["postcode"] + ) + find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data() + except ValueError as e: + if "No EPC found" in str(e) and "address1" in searcher.newest_epc: + try: + find_epc_searcher = RetrieveFindMyEpc( + address=searcher.newest_epc["address1"], postcode=searcher.newest_epc["postcode"] + ) + find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data() + except ValueError as e: + if "No EPC found" in str(e): + find_epc_data = {} + else: + logger.error(f"Error retrieving FindMyEPC data: {e}") + raise Exception(f"Error retrieving FindMyEPC data: {e}") + else: + find_epc_data = {} + except Exception as e: + raise Exception(f"Error retrieving FindMyEPC data: {e}") + time.sleep(np.random.uniform(0.1, 1)) + + epc = { + row_id_name: home[row_id_name], + **searcher.newest_epc.copy(), + "recommendations": property_recommendations["rows"], + "find_my_epc_data": find_epc_data, + } + + epc_data.append(epc) + except Exception as e: + errors.append(home[row_id_name]) + time.sleep(5) + + return epc_data, errors, no_epc diff --git a/backend/Funding.py b/backend/Funding.py index 2839c7ff..f5f85b9f 100644 --- a/backend/Funding.py +++ b/backend/Funding.py @@ -98,11 +98,14 @@ class Funding: self, scheme: str, eligible: bool, + types: List[str], measure_types: List[str], + project_score: float, estimated_funding: float, notify_tenant_benefits_requirements: bool, notify_council_tax_band_requirements: bool, notify_tenant_low_income_requirements: bool, + innovation_required: bool, ): """" """ @@ -113,11 +116,14 @@ class Funding: return { "scheme": scheme, "eligible": eligible, + "type": types, "measure_types": measure_types, + "project_score": project_score, "estimated_funding": estimated_funding, "requires_benefits": notify_tenant_benefits_requirements, "requires_council_tax_band": notify_council_tax_band_requirements, - "requires_low_income": notify_tenant_low_income_requirements + "requires_low_income": notify_tenant_low_income_requirements, + "innovation_required": innovation_required, } @staticmethod @@ -140,7 +146,7 @@ class Funding: """ pass - def find_best_gbis_measure(self, measures): + def find_gbis_measures(self, measures): """ The best measure is one that: 1) Creates some SAP movement, therefore enables eligiblity @@ -247,21 +253,26 @@ class Funding: ) and (self.council_tax_band in [None, "A", "B", "C", "D"]) ): - # We find the best measure for GBIS - recommended_measure = self.find_best_gbis_measure( + # This function pulls out the various measures that can provide funding under GBIS + recommended_measures = self.find_gbis_measures( measures=[m for m in valid_measures if m not in ["cavity_wall_insulation", "loft_insulation"]] ) # If the council tax band is missing, we nofify the customer that this is a requirement that # should be checked - return self.output( - scheme="gbis", - eligible=True, - measure_types=[recommended_measure["measure_type"]], - estimated_funding=recommended_measure["estimated_funding"], - notify_tenant_benefits_requirements=False, - notify_council_tax_band_requirements=self.council_tax_band is None, - notify_tenant_low_income_requirements=False, - ) + return [ + self.output( + scheme="gbis", + eligible=True, + types=[m["type"]], # This is single measure so we only have one type + measure_types=[m["measure_type"]], + project_score=m["project_score"], + estimated_funding=m["estimated_funding"], + notify_tenant_benefits_requirements=False, + notify_council_tax_band_requirements=self.council_tax_band is None, + notify_tenant_low_income_requirements=False, + innovation_required=False + ) for m in recommended_measures + ] # Low income/flex if ( @@ -271,28 +282,83 @@ class Funding: # Find the best measure, and can also include CWI/LI but requires the tenant to be # low inome or on benefits # We find the best measure for GBIS - recommended_measure = self.find_best_gbis_measure(measures=valid_measures) - return self.output( - scheme="gbis", - eligible=True, - measure_types=[recommended_measure["measure_type"]], - estimated_funding=recommended_measure["estimated_funding"], - notify_tenant_benefits_requirements=True, - notify_council_tax_band_requirements=False, - notify_tenant_low_income_requirements=True, - ) + recommended_measures = self.find_gbis_measures(measures=valid_measures) + return [ + self.output( + scheme="gbis", + eligible=True, + types=[m["type"]], # This is single measure so we only have one type + measure_types=[m["measure_type"]], + project_score=m["project_score"], + estimated_funding=m["estimated_funding"], + notify_tenant_benefits_requirements=True, + notify_council_tax_band_requirements=False, + notify_tenant_low_income_requirements=True, + innovation_required=False + ) for m in recommended_measures + ] # Otherwise, no funding availability - return self.output( - scheme="gbis", - eligible=False, - measure_types=[], - estimated_funding=0, - notify_tenant_benefits_requirements=False, - notify_council_tax_band_requirements=False, - notify_tenant_low_income_requirements=False + return [] + + def gbis_social(self): + """ + Because this is social housing, we have two typical means for eligibility + 1) EPC D, where an innovation measure is required + 2) EPC G-E, where an innovation measure isn't required + :return: + """ + valid_measures = [ + "internal_wall_insulation", + "external_wall_insulation", + "flat_roof_insulation", + "suspended_floor_insulation", + "room_roof_insulation", + # Not available for every eligiblity type + "cavity_wall_insulation", + "loft_insulation", + "heating_control" + ] + + recommended_measures = self.find_gbis_measures( + measures=valid_measures ) + # All measures are available + if self.starting_sap == "D": + return [ + self.output( + scheme="gbis", + eligible=True, + types=[m["type"]], # This is single measure so we only have one type + measure_types=[m["measure_type"]], + project_score=m["project_score"], + estimated_funding=m["estimated_funding"], + notify_tenant_benefits_requirements=False, + notify_council_tax_band_requirements=False, + notify_tenant_low_income_requirements=False, + innovation_required=True + ) for m in recommended_measures + ] + + if self.starting_sap in ["G", "F", "E"]: + return [ + self.output( + scheme="gbis", + eligible=True, + types=[m["type"]], # This is single measure so we only have one type + measure_types=[m["measure_type"]], + project_score=m["project_score"], + estimated_funding=m["estimated_funding"], + notify_tenant_benefits_requirements=False, + notify_council_tax_band_requirements=False, + notify_tenant_low_income_requirements=False, + innovation_required=False + ) for m in recommended_measures + ] + + return [] + def gbis(self): """ Check if a property is eligible for GBIS @@ -303,24 +369,33 @@ class Funding: self.gbis_eligibiltiy = self.gbis_prs() return + if self.tenure == "Social": + self.gbis_eligibiltiy = self.gbis_social() + raise NotImplementedError("Implement social/oo") def whlg(self): if self.tenure == "Social": # We can't do anything for social housing - self.whlg_eligibility = self.output( - scheme="whlg", - eligible=False, - measure_types=[], - estimated_funding=0, - notify_tenant_benefits_requirements=False, - notify_council_tax_band_requirements=False, - notify_tenant_low_income_requirements=False - ) + self.whlg_eligibility = [] return if not self.whlg_eligible_postcodes.empty: - print("Eligible implement me!") + raise Exception("Implement me") + # self.whlg_eligibility = [ + # self.output( + # scheme, + # eligible, + # types, + # measure_types, + # project_score: float, + # estimated_funding: float, + # notify_tenant_benefits_requirements: bool, + # notify_council_tax_band_requirements: bool, + # notify_tenant_low_income_requirements: bool, + # innovation_required: bool, + # ) + # ] def eco4(self): if self.tenure == "Private": diff --git a/backend/Property.py b/backend/Property.py index eaffd54d..52e8c213 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -70,6 +70,10 @@ class Property: # Contains the solar panel optimisation results from the Google Solar API solar_panel_configuration = None + # If true, indicates the floor area has actually been given to us by the owner, and we should use this figure + # instead of the one in the EPC, when we simulate + owner_floor_area = False + def __init__( self, id, @@ -104,7 +108,7 @@ class Property: self.already_installed = ast.literal_eval(already_installed['already_installed']) if already_installed else [] self.non_invasive_recommendations = ( - ast.literal_eval(non_invasive_recommendations['recommendations']) if + non_invasive_recommendations['recommendations'] if non_invasive_recommendations else [] ) # This is a list of measures that have been recommended for the property @@ -226,25 +230,24 @@ class Property: # as we collect more data from the energy assessment n_bathrooms = kwargs.get("n_bathrooms", None) - if n_bathrooms not in [None, ""]: - # We add on a small value to ensure that the number of bathrooms is rounded up, in case the value is 0.5 - n_bathrooms = int(round(float(n_bathrooms) + 1e-5)) + # We add on a small value to ensure that the number of bathrooms is rounded up, in case the value is 0.5 + n_bathrooms = int(round(float(n_bathrooms) + 1e-5)) if n_bathrooms not in [None, ""] else None n_bedrooms = kwargs.get("n_bedrooms", None) - if n_bedrooms not in [None, ""]: - n_bedrooms = int(round(float(n_bedrooms) + 1e-5)) + n_bedrooms = int(round(float(n_bedrooms) + 1e-5)) if n_bedrooms not in [None, ""] else None number_of_floors = kwargs.get("number_of_floors", None) - if number_of_floors not in [None, ""]: - number_of_floors = int(round(float(number_of_floors) + 1e-5)) + number_of_floors = int(round(float(number_of_floors) + 1e-5)) if number_of_floors not in [None, ""] else None insulation_floor_area = kwargs.get("insulation_floor_area", None) - if insulation_floor_area not in [None, ""]: - insulation_floor_area = float(insulation_floor_area) + insulation_floor_area = float(insulation_floor_area) if insulation_floor_area not in [None, ""] else None insulation_wall_area = kwargs.get("insulation_wall_area", None) - if insulation_wall_area not in [None, ""]: - insulation_wall_area = float(insulation_wall_area) + insulation_wall_area = float(insulation_wall_area) if insulation_wall_area not in [None, ""] else None + + # We allow for the asset owner to provide us with total floor area, in the event of it being incorrect + floor_area = kwargs.get("floor_area", None) + floor_area = float(floor_area) if floor_area not in [None, ""] else None return { "n_bathrooms": n_bathrooms, @@ -253,12 +256,15 @@ class Property: "insulation_floor_area": insulation_floor_area, "insulation_wall_area": insulation_wall_area, "building_id": kwargs.get("building_id", None), + "floor_area": floor_area } def parse_kwargs(self, kwargs): # We extract the elements from kwargs that we recognise. Anything additional is ignored for arg, val in kwargs.items(): if val is not None: + if arg == "floor_area": + self.owner_floor_area = True setattr(self, arg, val) def create_base_difference_epc_record(self, cleaned_lookup: dict): @@ -268,14 +274,7 @@ class Property: It will be the same starting and ending EPC, as we don't have the expected EPC yet """ - # difference_record = self.epc_record - self.epc_record - - # TODO: change these lower and replace in the settings file - # print( - # "CHANGE THE LATEST FIELD TO REMOVE NUMBER HABITABLE ROOMS IF WE WANT TO USE STARTING/ENDING" - # ) fixed_data_col_names = MANDATORY_FIXED_FEATURES + LATEST_FIELD - # print("NEED TO CHANGE THE DASH TO LOWER CASE") fixed_data_col_names = [ x.lower().replace("_", "-") for x in fixed_data_col_names ] @@ -286,8 +285,6 @@ class Property: if k in fixed_data_col_names } - # difference_record.append_fixed_data(fixed_data) - difference_record = self.epc_record.create_EPCDifferenceRecord( self.epc_record, fixed_data ) @@ -296,10 +293,11 @@ class Property: datasets=[difference_record], cleaned_lookup=cleaned_lookup ) - # TODO: adjust the base difference record with the previously calculated u values + features - # estimated_perimeter is different to the perimeter in the epc record - - # self.base_difference_record.df + # If we have variables that have been given to us by the landlord that we know are correct, whereas the EPC + # may not be, we use them + if self.owner_floor_area is not None: + self.base_difference_record.df["total_floor_area_ending"] = self.floor_area + self.base_difference_record.df["estimated_perimeter_ending"] = self.perimeter def simulate_all_representative_recommendations( self, property_representative_recommendations, @@ -385,7 +383,7 @@ class Property: for rec in property_recommendations_by_phase: # We simulate the impact of the recommendation at this current phase, and all of the prior phases - if rec["type"] in ["mechanical_ventilation", "trickle_vents", "draught_proofing"]: + if rec["type"] in ["trickle_vents", "draught_proofing"]: continue scoring_dict = self.create_recommendation_scoring_data( @@ -393,7 +391,6 @@ class Property: recommendation_record=recommendation_record, recommendations=previous_phase_representatives + [rec], primary_recommendation_id=rec["recommendation_id"], - non_invasive_recommendations=self.non_invasive_recommendations, ) self.recommendations_scoring_data.append(scoring_dict) @@ -465,7 +462,7 @@ class Property: if self.simulation_epcs is None: raise ValueError("Simulation EPCs have not been created") - rec_ids = sorted(list(self.simulation_epcs.keys())) + rec_ids = list(self.simulation_epcs.keys()) updated_simulation_epcs = [] for rec_id in rec_ids: sim_epc = self.simulation_epcs[rec_id].copy() @@ -491,15 +488,12 @@ class Property: # Now we havet this data inthe self.updated_simulation_epcs = updated_simulation_epcs - return updated_simulation_epcs - @staticmethod def create_recommendation_scoring_data( property_id, recommendation_record, recommendations: list, primary_recommendation_id: int, - non_invasive_recommendations: list = None, ): """ This function will iterate through a list of recommendations and apply a simulation for each recommendation @@ -508,7 +502,6 @@ class Property: :param recommendation_record: The record of the property, which will be updated :param recommendations: The list of recommendations to apply :param primary_recommendation_id: The id of the primary recommendation, which is used to identify the record - :param non_invasive_recommendations: The list of non-invasive recommendations :return: The updated recommendation record """ @@ -537,7 +530,7 @@ class Property: "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation", "cylinder_thermostat", "loft_insulation", "room_roof_insulation", "flat_roof_insulation", "solid_floor_insulation", "suspended_floor_insulation", "mixed_glazing", - "windows_glazing" + "windows_glazing", "mechanical_ventilation" ]: # We update the data, as defined in the recommendaton for prefix in ["walls", "roof", "floor"]: @@ -563,7 +556,7 @@ class Property: "solid_floor_insulation", "suspended_floor_insulation", "windows_glazing", "solar_pv", "heating", "hot_water_tank_insulation", "heating_control", "secondary_heating", "cylinder_thermostat", "mixed_glazing", - "extension_cavity_wall_insulation", + "extension_cavity_wall_insulation", "mechanical_ventilation", ]: raise NotImplementedError( "Implement me, given type %s" % recommendation["type"] @@ -1262,7 +1255,10 @@ class Property: # If the property is in a conservation area, is listed or is a heriage building, solar panels # become a difficult measure to generally get through planning restrictions and so we do not recommend # solar panels - if self.restricted_measures: + if self.is_listed or self.is_heritage: + # If the property is in a conservation area, we can still recommend solar panels + # but they need to be done in a way that is sympathetic to the building. E.g. the panels + # may be installed such that they are not visible from the street return False is_valid_property_type = self.data["property-type"] in ["House", "Bungalow", "Maisonette"] diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 0d921bec..96b7c5de 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -207,12 +207,12 @@ class SearchEpc: try: # Updated regex to catch house numbers including alphanumeric ones - pattern = r'(?i)(?:flat|apartment)\s*(\d+\w*)|^\s*(\d+\w*)' + pattern = r'(?i)(?:flat|apartment|room)\s*(\d+\w*)|^\s*(\d+\w*)' match1 = re.search(pattern, address) if match1: return next(g for g in match1.groups() if g is not None) - pattern2 = r'(?i)(flat|apartment)\s*([a-zA-Z]?\d+[a-zA-Z]?)' + pattern2 = r'(?i)(flat|apartment|room)\s*([a-zA-Z]?\d+[a-zA-Z]?)' match2 = re.search(pattern2, address) if match2: return match2.group(2) @@ -226,8 +226,8 @@ class SearchEpc: continue if part == postcode.split(" ")[1]: continue - return part.rstrip( - ",") # This assumes the first 'OccupancyIdentifier' after 'OccupancyType' is the primary + return part.rstrip(",") + # This assumes the first 'OccupancyIdentifier' after 'OccupancyType' is the primary # number # Fallback to 'AddressNumber' if no 'OccupancyIdentifier' is found @@ -308,12 +308,20 @@ class SearchEpc: self.data = output["response"] return output["msg"] + if not self.uprn and not self.address1 and not self.postcode: + raise ValueError("No search parameters provided") + uprn_params = {"uprn": self.uprn} if self.uprn else {} - address_params = {"address": self.address1, "postcode": self.postcode} + address_params = {} + if self.address1: + address_params["address"] = self.address1 + if self.postcode: + address_params["postcode"] = self.postcode # We attempt the search with uprn params data = {"rows": []} + api_response = {} if uprn_params: api_response = self._get_epc(params=uprn_params, size=size) if api_response["msg"]["status"] == 200: @@ -321,14 +329,15 @@ class SearchEpc: # If we were unsuccessful, we then make a second attempt to fetch the data. We find that # properties are sometimes listed under the wrong UPRN - api_response = self._get_epc(params=address_params, size=size) - if api_response["msg"]["status"] == 200: - # We update the data with the correct uprn - if self.uprn: - for x in api_response["response"]["rows"]: - x["uprn"] = self.uprn + if address_params: + api_response = self._get_epc(params=address_params, size=size) + if api_response["msg"]["status"] == 200: + # We update the data with the correct uprn + if self.uprn: + for x in api_response["response"]["rows"]: + x["uprn"] = self.uprn - data["rows"].extend(api_response["response"]["rows"]) + data["rows"].extend(api_response["response"]["rows"]) # We no de-dupe on lmk-key to avoid duplicates seen = set() @@ -746,6 +755,10 @@ class SearchEpc: "photo-supply"] ) + estimated_epc["co2-emiss-curr-per-floor-area"] = ( + estimated_epc["co2-emissions-current"] / estimated_epc["total-floor-area"] + ) + estimated_epc["postcode"] = self.postcode if not self.uprn: # Update self.uprn too diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py index 183503d5..cda32faa 100644 --- a/backend/apis/GoogleSolarApi.py +++ b/backend/apis/GoogleSolarApi.py @@ -9,8 +9,7 @@ from tqdm import tqdm from math import sin, cos, sqrt, atan2, radians from utils.logger import setup_logger -from recommendations.Costs import Costs, MCS_SOLAR_PV_COST_DATA -from etl.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel +from recommendations.Costs import Costs from backend.ml_models.AnnualBillSavings import AnnualBillSavings from backend.Property import Property from backend.app.db.functions.solar_functions import get_solar_data, store_batch_data @@ -54,6 +53,13 @@ class GoogleSolarApi: # Max area of a roof space we allow panels for PERCENTAGE_OF_ROOF_LIMIT = 0.8 + # If the roof area that comes back from the solar API is more than 25% larger than the estiamted roof area + # that we calcualte based on the property dimensions, we will correct the roof area + ROOF_AREA_TOLERANCE = 1.25 + + # Error Messages + ENTITY_NOT_FOUND_ERROR = 'Requested entity was not found.' + def __init__(self, api_key, max_retries=5): """ Initialize the GoogleSolarApi class with the provided API key and maximum retries. @@ -112,6 +118,13 @@ class GoogleSolarApi: response.raise_for_status() # Raise an error for bad status codes return response.json() except requests.exceptions.RequestException as e: + if ( + (e.response.status_code == 404) & + (e.response.json()["error"]["message"] == self.ENTITY_NOT_FOUND_ERROR) + ): + logger.warning("No building insights found for the given location.") + return {"error": self.ENTITY_NOT_FOUND_ERROR} + attempt += 1 print(f"Attempt {attempt} failed: {e}") time.sleep(2 ** attempt) # Exponential backoff @@ -155,6 +168,10 @@ class GoogleSolarApi: # If we have no data in the db, or updated_at is more than 6 months if self.insights_data is None or is_outdated: self.insights_data = self.get_building_insights(longitude, latitude, required_quality) + if self.insights_data.get("error") == self.ENTITY_NOT_FOUND_ERROR: + # We use default performance since in this case, we couldn't retrieve data. We don't store + self.panel_performance = self.default_panel_performance(property_instance=property_instance) + return self.need_to_store = True # Extract key data from the insights response @@ -168,7 +185,13 @@ class GoogleSolarApi: ): self.exclude_likely_duplicate_surfaces() + # We constrain the roof area, based on the floor area to be more conservative self.roof_area = self.insights_data["solarPotential"]["wholeRoofStats"]['areaMeters2'] + if ( + self.roof_area > property_instance.roof_area * self.ROOF_AREA_TOLERANCE + ) | (self.roof_area < (2 - self.ROOF_AREA_TOLERANCE) * property_instance.roof_area): + self.roof_area = property_instance.roof_area + self.floor_area = self.insights_data["solarPotential"]["wholeRoofStats"]['groundAreaMeters2'] self.panel_wattage = self.insights_data["solarPotential"]["panelCapacityWatts"] if self.panel_wattage != 400: @@ -265,8 +288,6 @@ class GoogleSolarApi: # minimum is 4 min_panels = self.MIN_BUILDING_PANELS if is_building else self.MIN_UNIT_PANELS - cost_instance = Costs(property_instance=property_instance) if property_instance is not None else None - # Remove any north facing roof segments panel_performance = [] for config in self.insights_data["solarPotential"].get("solarPanelConfigs", []): @@ -300,18 +321,12 @@ class GoogleSolarApi: if roi_summary["n_panels"].sum() < min_panels: continue - if cost_instance is None: - total_cost = Costs.solar_pv( - n_panels=roi_summary["n_panels"].sum(), - has_battery=False, - n_floors=3, # Assume the most amount of scaffolding - )["total"] - else: - total_cost = cost_instance.solar_pv( - n_panels=roi_summary["n_panels"].sum(), - has_battery=False, - n_floors=property_instance.number_of_floors, - )["total"] + total_cost = Costs.solar_pv( + n_panels=roi_summary["n_panels"].sum(), + has_battery=False, + # Assume the most amount of scaffolding + n_floors=3 if property_instance is None else property_instance.number_of_floors + )["total"] weighted_ratio = np.average( roi_summary["ratio"].values, weights=roi_summary["generated_dc_energy"].values @@ -820,7 +835,6 @@ class GoogleSolarApi: if unit["longitude"] is None or unit["latitude"] is None: # At this point, we've checked that solar PV is valid, and so we provide some defaults - property_instance.set_solar_panel_configuration( solar_panel_configuration={ "insights_data": None, @@ -875,19 +889,19 @@ class GoogleSolarApi: cost_instance = Costs(property_instance=property_instance) - # We return a 2.4 and 4 kwp system + # We return a 1.6 and 3.2 kwp system panel_performance = pd.DataFrame( [ { - 'n_panels': 10, - 'yearly_dc_energy': 4000 * 0.99, # Assumed 99% efficient wattage -> dc + 'n_panels': 8, + 'yearly_dc_energy': 3200 * assumptions.MEDIAN_WATTAGE_TO_DC, 'total_cost': cost_instance.solar_pv( - n_panels=10, has_battery=False, n_floors=property_instance.number_of_floors + n_panels=8, has_battery=False, n_floors=property_instance.number_of_floors )["total"], 'weighted_ratio': None, - 'panneled_roof_area': 10 * assumptions.RDSAP_AREA_PER_PANEL, - 'array_wattage': 4000, - 'initial_ac_kwh_per_year': 4000 * 0.95, # Assumed 95% efficient wattage -> ac + 'panneled_roof_area': 8 * assumptions.RDSAP_AREA_PER_PANEL, + 'array_wattage': 3200, + 'initial_ac_kwh_per_year': 3200 * assumptions.MEDIAN_WATTAGE_TO_AC, 'lifetime_ac_kwh': None, 'lifetime_dc_kwh': None, 'roi': None, @@ -899,15 +913,15 @@ class GoogleSolarApi: 'rank': None }, { - 'n_panels': 6, - 'yearly_dc_energy': 2400 * 0.99, # Assumed 99% efficient wattage -> dc + 'n_panels': 4, + 'yearly_dc_energy': 1600 * assumptions.MEDIAN_WATTAGE_TO_DC, 'total_cost': cost_instance.solar_pv( n_panels=6, has_battery=False, n_floors=property_instance.number_of_floors )["total"], 'weighted_ratio': None, - 'panneled_roof_area': 6 * assumptions.RDSAP_AREA_PER_PANEL, - 'array_wattage': 2400, - 'initial_ac_kwh_per_year': 2400 * 0.95, # Assumed 95% efficient wattage -> ac + 'panneled_roof_area': 4 * assumptions.RDSAP_AREA_PER_PANEL, + 'array_wattage': 1600, + 'initial_ac_kwh_per_year': 1600 * assumptions.MEDIAN_WATTAGE_TO_AC, 'lifetime_ac_kwh': None, 'lifetime_dc_kwh': None, 'roi': None, diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py index 8d0c05be..f1090ef3 100644 --- a/backend/app/assumptions.py +++ b/backend/app/assumptions.py @@ -11,6 +11,9 @@ SOLAR_CONSUMPTION_WITH_BATTERY_PROPORTION = 0.7 # Typically, each solar panel takes up around 3.4 m2 of roof space under RdSAP. This was been verified in Elmhurst RDSAP_AREA_PER_PANEL = 3.4 +# This is a median based on a sample of properties +MEDIAN_WATTAGE_TO_AC = 0.965 +MEDIAN_WATTAGE_TO_DC = 0.99 SOCIAL_TENURES = ["Rented (social)", "rental (social)"] @@ -56,3 +59,9 @@ DESCRIPTIONS_TO_FUEL_TYPES = { "Boiler and radiators, coal": {"fuel": "Coal", "cop": 0.85}, "From main system, no cylinderstat": {"fuel": "Natural Gas", "cop": 0.85}, } + +# These are the measure types where if there is a ventilation recommendation, we force the inclusion of it +# if one of these has been recommended. +measures_needing_ventilation = [ + "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation" +] diff --git a/backend/app/db/models/materials.py b/backend/app/db/models/materials.py index f0af3343..9f8abbf4 100644 --- a/backend/app/db/models/materials.py +++ b/backend/app/db/models/materials.py @@ -19,6 +19,7 @@ class MaterialType(enum.Enum): flat_roof_insulation = "flat_roof_insulation" room_roof_insulation = "room_roof_insulation" windows_glazing = "windows_glazing" + cavity_wall_extraction = "cavity_wall_extraction" iwi_wall_demolition = "iwi_wall_demolition" iwi_vapour_barrier = "iwi_vapour_barrier" diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index d82e774b..80a531bf 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -1,3 +1,4 @@ +import ast import json from datetime import datetime @@ -27,6 +28,7 @@ from backend.app.dependencies import validate_token from backend.app.plan.schemas import PlanTriggerRequest from backend.app.plan.utils import get_cleaned from backend.app.utils import epc_to_sap_lower_bound, sap_to_epc +import backend.app.assumptions as assumptions from backend.ml_models.api import ModelApi from backend.Property import Property @@ -43,6 +45,7 @@ from backend.ml_models.Valuation import PropertyValuation from etl.bill_savings.KwhData import KwhData from etl.spatial.OpenUprnClient import OpenUprnClient +from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc logger = setup_logger() @@ -356,7 +359,6 @@ def extract_property_request_data( ), {}) if isinstance(property_non_invasive_recommendations.get("recommendations"), str): - import ast property_non_invasive_recommendations["recommendations"] = ast.literal_eval( property_non_invasive_recommendations["recommendations"] ) @@ -367,7 +369,7 @@ def extract_property_request_data( else: transformed.append(rec) - property_non_invasive_recommendations["recommendations"] = str(transformed) + property_non_invasive_recommendations["recommendations"] = transformed # Check if the valuation data has uprn valuation_has_uprn = "uprn" in valuation_data[0] if valuation_data else False @@ -513,6 +515,14 @@ async def trigger_plan(body: PlanTriggerRequest): ) ) + # if we have a remote assment data type, we pull the additional data and include it + if body.event_type == "remote_assessment": + logger.info("Retrieving find my epc data") + property_non_invasive_recommendations = RetrieveFindMyEpc.get_from_epc( + epc_searcher.newest_epc + ) + # TODO: We need to determine if we should make a patch, if the EPC is new + epc_records = patch_epc(patch, epc_records) prepared_epc = EPCRecord( @@ -543,7 +553,8 @@ async def trigger_plan(body: PlanTriggerRequest): model_api = ModelApi( portfolio_id=body.portfolio_id, timestamp=created_at, - prediction_buckets=get_prediction_buckets() + prediction_buckets=get_prediction_buckets(), + max_retries=1 ) await model_api.async_warm_up_lambdas( model_prefies=model_api.KWH_MODEL_PREFIXES + model_api.MODEL_PREFIXES @@ -683,8 +694,6 @@ async def trigger_plan(body: PlanTriggerRequest): ) # We now insert kwh estimates and costs into the recommendations - # TODO: We should join the methodology which maps the heating and hot water descriptions to the fuel types in - # Recommendations, but also the Property class logger.info("Calculating tenant savings - kwh and bills") for property_id in tqdm([p.id for p in input_properties]): property_recommendations = recommendations.get(property_id, []) @@ -701,23 +710,67 @@ async def trigger_plan(body: PlanTriggerRequest): property_instance.current_energy_bill = property_current_energy_bill # Insert the predictions into the recommendations and run the optimiser - # TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a - # possibility with heating system? - for p in input_properties: if not recommendations.get(p.id): continue - input_measures = prepare_input_measures(recommendations[p.id], body.goal) + # we need to double unlist because we have a list of lists + property_measure_types = {rec["type"] for recs in recommendations[p.id] for rec in recs} + + property_required_measures = [ + m for m in recommendations[p.id] if m[0]["type"] in body.required_measures + ] + measures_to_optimise = [ + m for m in recommendations[p.id] if m[0]["type"] not in body.required_measures + ] + + # If we have a wall insulation measure, we MUST include mechanical ventilation + # Additionally, if we have required measures, they should also be included. Therefore + # we can discount the number of points required to get to the target SAP band (or increase) + # in the case of ventilation + needs_ventilation = any(x in property_measure_types for x in assumptions.measures_needing_ventilation) + + input_measures = prepare_input_measures(measures_to_optimise, body.goal, needs_ventilation) if not input_measures[0]: # This means that we have no defaults selected_recommendations = {} + solution = [] else: + fixed_gain = 0 + if property_required_measures: + # We get the SAP points for the required measures + if body.goal != "Increasing EPC": + raise NotImplementedError("Only EPC optimisation is currently supported") + sap_by_type = [ + {"type": rec["type"], "sap_points": rec["sap_points"]} for recs in property_required_measures + for rec in recs + ] + # We get a MAX sap points per type + max_per_type = ( + pd.DataFrame(sap_by_type).groupby("type")["sap_points"].max().to_dict() + ) + fixed_gain = sum(max_per_type.values()) + + property_required_measure_types = {rec["type"] for rec in sap_by_type} + + # if the property needs ventilation, but the measure we optimise didn't include + # venilation we add the points for ventilation as a fixed gain + if needs_ventilation and any( + r in property_required_measure_types for r in assumptions.measures_needing_ventilation + ): + fixed_gain += next( + (r[0]["sap_points"] for r in recommendations[p.id] if + r[0]["type"] == "mechanical_ventilation"), + 0 + ) + current_sap_points = int(p.data["current-energy-efficiency"]) - target_sap_points = epc_to_sap_lower_bound(body.goal_value) - sap_gain = CostOptimiser.calculate_sap_gain_with_slack(target_sap_points - current_sap_points) + + sap_gain = CostOptimiser.calculate_sap_gain_with_slack( + epc_to_sap_lower_bound(body.goal_value) - current_sap_points + ) - fixed_gain if not body.optimise: if body.goal != "Increasing EPC": @@ -747,10 +800,33 @@ async def trigger_plan(body: PlanTriggerRequest): selected_recommendations = {r["id"] for r in solution} + if property_required_measures: + # We select the cheapest of the required measures, into selected + for recs in property_required_measures: + # We select the cheapest of the required measures + cost_to_id = { + rec["recommendation_id"]: rec["total"] for rec in recs + if rec["recommendation_id"] not in selected_recommendations + } + # Take the recommendation id with the lowers cost + + selected_recommendations.add(min(cost_to_id, key=cost_to_id.get)) + # Update the solution with the selected recommendaitons + solution = [] + for recs in recommendations[p.id]: + for rec in recs: + if rec["recommendation_id"] in selected_recommendations: + solution.append( + { + "id": rec["recommendation_id"], + "cost": rec["total"], + "gain": rec["sap_points"], + "type": rec["type"] + } + ) + # If wall insulation is selected, we also include mechanical ventilation as a best practice measure - if any(x in [r["type"] for r in solution] for x in [ - "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation" - ]): + if any(x in [r["type"] for r in solution] for x in assumptions.measures_needing_ventilation): ventilation_rec = next( (r[0] for r in recommendations[p.id] if r[0]["type"] == "mechanical_ventilation"), None @@ -779,10 +855,9 @@ async def trigger_plan(body: PlanTriggerRequest): ] # We'll also unlist the recommendations so they're a bit easier to handle from here onwards - final_recommendations = [ + recommendations[p.id] = [ rec for recommendations_by_type in final_recommendations for rec in recommendations_by_type ] - recommendations[p.id] = final_recommendations # when we have buildings, we tweak our solar PV recommendations as if one unit needs it, we apply it to all # of them @@ -814,23 +889,23 @@ async def trigger_plan(body: PlanTriggerRequest): # Funding # ~~~~~~~~~~~~~~~~ - for p in input_properties: - funding_calulator = Funding( - tenure=body.housing_type, - starting_epc=p.data["current-energy-rating"], - starting_sap=int(p.data["current-energy-efficiency"]), - postcode=p.postcode, - floor_area=p.floor_area, - council_tax_band=None, # This is seemingly always None at the moment - property_recommendations=recommendations[p.id], - project_scores_matrix=eco_project_scores_matrix, - whlg_eligible_postcodes=whlg_eligible_postcodes, - gbis_abs_rate=15, - eco4_abs_rate=15, - ) - funding_calulator.check_eligibiltiy() - # Insert finding - p.insert_funding(funding_calulator) + # for p in input_properties: + # funding_calulator = Funding( + # tenure=body.housing_type, + # starting_epc=p.data["current-energy-rating"], + # starting_sap=int(p.data["current-energy-efficiency"]), + # postcode=p.postcode, + # floor_area=p.floor_area, + # council_tax_band=None, # This is seemingly always None at the moment + # property_recommendations=recommendations[p.id], + # project_scores_matrix=eco_project_scores_matrix, + # whlg_eligible_postcodes=whlg_eligible_postcodes, + # gbis_abs_rate=15, + # eco4_abs_rate=15, + # ) + # funding_calulator.check_eligibiltiy() + # # Insert finding + # p.insert_funding(funding_calulator) logger.info("Uploading recommendations to the database") # If we have any work to do, we create a new scenario diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py index 618bec90..5db3d4d1 100644 --- a/backend/app/plan/schemas.py +++ b/backend/app/plan/schemas.py @@ -37,6 +37,7 @@ MEASURE_MAP = { VALID_GOALS = ["Increasing EPC"] VALID_HOUSING_TYPES = ["Social", "Private"] +VALID_EVENT_TYPES = ["remote_assessment"] # Define the validation function for inclusions/exclusions @@ -56,10 +57,16 @@ def check_housing_type(value: str) -> str: return value +def check_event_type(value: str) -> str: + assert value in VALID_EVENT_TYPES, f"{value} is not a valid event type" + return value + + # Use Annotated with BeforeValidator for each list item validation InclusionOrExclusionItem = Annotated[str, BeforeValidator(check_inclusion_or_exclusion)] Goal = Annotated[str, BeforeValidator(check_goals)] HousingType = Annotated[str, BeforeValidator(check_housing_type)] +EventType = Annotated[str, BeforeValidator(check_event_type)] class PlanTriggerRequest(BaseModel): @@ -75,6 +82,9 @@ class PlanTriggerRequest(BaseModel): valuation_file_path: Optional[str] = None exclusions: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1) inclusions: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1) + # This is a list of measures that we want to be included, if they are options + # Default to empty + required_measures: Optional[List[InclusionOrExclusionItem]] = Field(default=[], min_length=1) scenario_name: Optional[str] = "" multi_plan: Optional[bool] = False @@ -82,3 +92,7 @@ class PlanTriggerRequest(BaseModel): default_u_values: Optional[bool] = True ashp_cop: Optional[float] = 2.8 + + # When performing a remote assessment, if this has been set, it will allow the engine to + # pull data from the find my epc website, to utilise as part of a remote assessment + event_type: Optional[float] = "remote_assessment", diff --git a/backend/app/plan/utils.py b/backend/app/plan/utils.py index 07d4642d..34fb02e7 100644 --- a/backend/app/plan/utils.py +++ b/backend/app/plan/utils.py @@ -1,9 +1,5 @@ -import pandas as pd -from backend.Property import Property from utils.s3 import read_from_s3 -from recommendations.recommendation_utils import get_wall_u_value, get_floor_u_value, get_roof_u_value - from backend.app.config import get_settings import msgpack diff --git a/backend/ml_models/api.py b/backend/ml_models/api.py index c2f2dcd9..c108f1b7 100644 --- a/backend/ml_models/api.py +++ b/backend/ml_models/api.py @@ -39,6 +39,7 @@ class ModelApi: timestamp, prediction_buckets, base_url="https://api.dev.hestia.homes", + max_retries=2, ): """ This class handles the communication with the Model APIs. These models include SAP change, heat demain change @@ -54,6 +55,8 @@ class ModelApi: self.timestamp = timestamp self.prediction_buckets = prediction_buckets + self.max_retries = max_retries + @staticmethod def predictions_template(): return { @@ -295,15 +298,33 @@ class ModelApi: async def run_batches(): for chunk in tqdm(to_loop_over, total=len(to_loop_over)): - predictions_dict = await self.predict_all_async( - df=data.iloc[chunk:chunk + batch_size], - bucket=bucket, - model_prefixes=model_prefixes, - extract_ids=extract_ids - ) - for key, scored in predictions_dict.items(): - all_predictions[key] = pd.concat([all_predictions[key], scored]) + attempts = 0 + success = False + while attempts <= self.max_retries and not success: + try: + predictions_dict = await self.predict_all_async( + df=data.iloc[chunk:chunk + batch_size], + bucket=bucket, + model_prefixes=model_prefixes, + extract_ids=extract_ids + ) + + for key, scored in predictions_dict.items(): + all_predictions[key] = pd.concat([all_predictions[key], scored]) + + success = True + except Exception as e: + attempts += 1 + logger.error( + f"Batch {chunk}-{chunk + batch_size} failed (Attempt {attempts}/{self.max_retries}). " + f"Error: {e}" + ) + + if attempts > self.max_retries: + logger.error( + f"Skipping batch {chunk}-{chunk + batch_size} after {self.max_retries} failed attempts." + ) # Check if there is an existing event loop try: diff --git a/backend/requirements/requirements.txt b/backend/requirements/requirements.txt index dd5c34ca..577776be 100644 --- a/backend/requirements/requirements.txt +++ b/backend/requirements/requirements.txt @@ -29,3 +29,5 @@ mip==1.15.0 pyarrow==17.0.0 fastparquet==2024.5.0 aiohttp==3.10.10 +# find my epc +beautifulsoup4 diff --git a/etl/costs/app.py b/etl/costs/app.py index 797191d2..f2bf365b 100644 --- a/etl/costs/app.py +++ b/etl/costs/app.py @@ -11,7 +11,7 @@ import inspect src_file_path = inspect.getfile(lambda: None) -DATA_DIRECTORY = Path(src_file_path).parent / "local_data" / "20240917 Hestia Materials.xlsx" +DATA_DIRECTORY = Path(src_file_path).parent / "local_data" / "20250316 Domna Materials.xlsx" # Environment file is at the same level as this file ENV_FILE = Path(src_file_path).parent / "etl" / "costs" / ".env" dotenv.load_dotenv(ENV_FILE) @@ -91,6 +91,7 @@ def app(): lel_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="low_energy_lighting", header=0) flat_roof_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="flat_roof_insulation", header=0) window_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="window_glazing", header=0) + rir_insulation_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="room_roof_insulation", header=0) # Form a single table to be uploaded costs = pd.concat( @@ -104,7 +105,8 @@ def app(): ewi_costs, lel_costs, flat_roof_costs, - window_costs + window_costs, + rir_insulation_costs, ] ) diff --git a/etl/customers/benyon/epc_data.py b/etl/customers/benyon/epc_data.py new file mode 100644 index 00000000..9ba71f2f --- /dev/null +++ b/etl/customers/benyon/epc_data.py @@ -0,0 +1,71 @@ +""" +Rough script to get the EPC data for Benyon +""" + +import pandas as pd +import os +from dotenv import load_dotenv +from backend.SearchEpc import SearchEpc +from asset_list.utils import get_data + +load_dotenv(dotenv_path="backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") + +asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Benyon Estate/List of All Properties ecl Grd Rents in " + "Alphabetical Order.xlsx", + header=1 +) +asset_list.columns = ["tennancy", "landlord_id", "landlord_address"] +# Get postcode as the last 2 parts of the address, split on space +asset_list["postcode"] = asset_list["landlord_address"].apply(lambda x: x.split(" ")[-2] + " " + x.split(" ")[-1]) + +asset_list["house_no"] = asset_list.apply( + lambda x: SearchEpc.get_house_number(address=x["landlord_address"], postcode=x["postcode"]), axis=1 +) + +epc_data, errors, no_epc = get_data( + df=asset_list, + manual_uprn_map={}, + epc_auth_token=EPC_AUTH_TOKEN, + uprn_column=None, + fulladdress_column="landlord_address", + address1_column="house_no", + postcode_column="postcode", + property_type_column=None, + built_form_column=None, + epc_api_only=True, + row_id_name="landlord_id", +) + +df = asset_list[asset_list["landlord_id"].isin(no_epc)] +epc_df = pd.DataFrame(epc_data) +epc_df["current-energy-rating"].value_counts() +epc_df["property-type"].value_counts() +epc_df["walls-description"].value_counts(normalize=True) + +asset_list = asset_list.merge( + epc_df[ + [ + "landlord_id", "current-energy-rating", "property-type", "total-floor-area", "roof-description", + "walls-description", "co2-emissions-current" + ] + ], + how="left", + left_on="landlord_id", + right_on="landlord_id" +) +asset_list.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Benyon Estate/asset_list.csv", index=False +) + +asset_list_big = asset_list.merge( + epc_df, + how="left", + left_on="landlord_id", + right_on="landlord_id" +) +asset_list_big.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Benyon Estate/asset_list_full_data.csv", + index=False +) diff --git a/etl/customers/bromford/data_cleanup.py b/etl/customers/bromford/data_cleanup.py new file mode 100644 index 00000000..45429523 --- /dev/null +++ b/etl/customers/bromford/data_cleanup.py @@ -0,0 +1,192 @@ +""" +12th April 2025 +This script attempts to clean up the various pieces of data we have for Bromford, with the intention of producing a +standardised asset list +""" + +import pandas as pd + +# Step 1 +# The inspectons data is spread across three different files. We attempt to produce one finalised asset list, with +# comprehensive inspections + +# Primary asset list +asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Bromford Asset " + "List.xlsx", + sheet_name="Asset List" +) + +# +inspections_1 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Inspections/BROMFORD " + "MDS.xlsx", + sheet_name="Data list" +) +inspections_1["Heating Type"] = (inspections_1["Heating Type"] + " " + inspections_1["Heating fuel"]).str.strip() + +inspections_2 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Inspections/BROMFORD " + "MERLIN LANE.xlsx", + sheet_name="Report" +) +inspections_2["AssetTypeDesc"] = inspections_2["PropertyType"].str.split(" ").str[-1] +inspections_2["PropTypeDesc"] = inspections_2["PropertyType"].str.split(" ").str[:-1].str.join(" ") + +inspections_3 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Inspections/BROMFORD " + "SEVERN VALE - KLARKE.xlsx", + sheet_name="Asset report" +) + +inspections_3["FullAddress"] = inspections_3["T1_Address1"] + ", " + inspections_3["T1_Address2"] + +# On inspections 3, we have multiple sheets which describe the heating +heating_systems = [] +for sheet_name in [ + "Storage Heaters", "No Heating", "Underfloor Heating", "Rointe Electric Heating", "Air Source Heating", + "Gas Central Heating", "Electric Boiler", "Oil Fired Central Heating", + "Communal Boilers", "Panel Heaters" +]: + df = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme " + "Rebuild/Inspections/BROMFORD " + "SEVERN VALE - KLARKE.xlsx", + sheet_name=sheet_name + ) + df = df[["UPRN"]] + df["Heating Type"] = sheet_name + heating_systems.append(df) + +heating_systems = pd.concat(heating_systems) +# We have no clue which one is correct, we have some dupes +heating_systems = heating_systems.drop_duplicates("UPRN") +heating_systems = heating_systems.rename(columns={"UPRN": "Asset"}) +heating_systems["Asset"] = heating_systems["Asset"].astype(int) + +inspections_3 = inspections_3.merge(heating_systems, how="left", on="Asset") + +# Create a consolidated inspections sheet +inspections = pd.concat( + [ + inspections_1[["Asset", "Construction type", 'Heating Type', "WFT Findings", "Eligibility (Red/Yellow/Green)"]], + inspections_2[["Asset", "Construction type", "WFT Findings", "Eligibility (Red/Yellow/Green)"]], + inspections_3[["Asset", 'Heating Type', "WFT Findings", "Eligibility (Red/Yellow/Green)"]], + ] +) + +inspections_address_data = pd.concat( + [ + inspections_1[ + ["Asset", "FullAddress", "PostCode", "ConYear", "Beds", "AssetTypeDesc", "PropTypeDesc", 'ManAreaDesc', ] + ], + inspections_2[ + ['Asset', 'FullAddress', 'AccomType', "AssetTypeDesc", "PropTypeDesc", 'ConYear', 'Postcode'] + ].rename(columns={"Postcode": "PostCode"}), + inspections_3[ + ['Asset', "FullAddress", 'T1_Postcode', 'T1_Build Year', 'T1_AssetType'] + ].rename( + columns={"T1_Postcode": "PostCode", "T1_Build Year": "ConYear", "T1_AssetType": "AssetTypeDesc"} + ), + ] +) + +# Remove some error values +inspections = inspections[~inspections["Asset"].isin( + [ + "They're all green partial fill they're all green this", + "South Staffordshire District Council", + 'Blk Milton Crt F9-10, Perton, Wolverhampton' + ] +)] + +inspections["Asset"] = inspections["Asset"].astype(str) +asset_list["Asset"] = asset_list["Asset"].astype(str) +inspections_address_data["Asset"] = inspections_address_data["Asset"].astype(str) +inspections['WFT Findings'] = inspections['WFT Findings'].replace(r'^\s*$', pd.NA, regex=True) + +# We have some cases where the inspetions data has dupes on Asset (the ID column). We take the instance that is +# populated +inspections = inspections.sort_values(by='WFT Findings', na_position='last') +inspections = inspections.drop_duplicates(subset='Asset', keep='first') + +# We have dupes in the asset list +asset_list = asset_list.drop_duplicates("Asset") + +# Merge on +missed_asset_ids = inspections[ + ~inspections["Asset"].isin(asset_list["Asset"].values) +]["Asset"].values + +missed_assets = inspections_address_data[ + inspections_address_data["Asset"].isin(missed_asset_ids) +] +missed_assets = missed_assets.drop_duplicates("Asset") + +# We produce a larger asset list +asset_list = pd.concat([asset_list, missed_assets]) + +asset_list = asset_list.merge( + inspections, how="left", on="Asset" +) +asset_list["WFT Findings"] = asset_list["WFT Findings"].fillna("No Inspections Note") + +# Store +# asset_list.to_excel( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared " +# "data/asset_list.xlsx" +# ) + +# We now prepare outcomes into a single file +pv_outcomes = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Bromford PV " + "Outcomes.csv", + encoding='cp1252' +) +pv_outcomes["measure_type"] = "solar" + +other_outcomes = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/(Bromford) " + "15.04.2024.xlsx", + sheet_name="ECO4 & GBIS", + header=1 +) +other_outcomes["measure_type"] = "cwi" + +combined_outcomes = pd.concat( + [ + other_outcomes[["NO", "ADDRESS", "POSTCODE", "WEEK COMMENCING", "OUTCOMES", "NOTES"]].rename( + columns={ + "NO": "No", "ADDRESS": "Address", "POSTCODE": "Postcode", "WEEK COMMENCING": "Week Commencing", + "OUTCOMES": "Outcome", "NOTES": "Notes" + } + ), + pv_outcomes[['No', 'Address', 'Postcode', "Week Commencing", "Outcome", "Notes"]] + ] +) + +# Store +# combined_outcomes.to_excel( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared " +# "data/outcomes.xlsx" +# ) + +# Submissions sheet - +eco3_submissions = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/ECO 3 Submissions.csv", + encoding='cp1252' +) +# Get rid of the unnamed columns +unnamed_columns = [c for c in eco3_submissions.columns if "Unnamed: " in c] +eco3_submissions = eco3_submissions.drop(columns=unnamed_columns) +# Store +eco3_submissions.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/ECO 3 submissions.csv", + index=False +) + +eco4_submissions = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/ECO 4 submissions.csv", +) + +same_cols = [c for c in eco4_submissions.columns if c in eco3_submissions.columns] diff --git a/etl/customers/mod/pilot/1. Create Sample.py b/etl/customers/mod/pilot/1. Create Sample.py new file mode 100644 index 00000000..fd045294 --- /dev/null +++ b/etl/customers/mod/pilot/1. Create Sample.py @@ -0,0 +1,205 @@ +import os +import pandas as pd +from tqdm import tqdm +from dotenv import load_dotenv +from backend.SearchEpc import SearchEpc +from etl.spatial.OpenUprnClient import OpenUprnClient +from asset_list.utils import get_data +from utils.s3 import save_csv_to_s3 + +PORTFOLIO_ID = 139 +USER_ID = 8 + +load_dotenv(dotenv_path="backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") + + +def app(): + """ + Given the sample data and additonal properties, this function prepares the data + :return: + """ + folder_path = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme" + sample_list = pd.read_excel(f"{folder_path}/20250227_DIO_Accommodation_Sample_Properties.xlsx") + asset_data = pd.read_excel(f"{folder_path}/20250303_DIO_Accommodation_Property_Attribution.xlsx") + + sample_list = sample_list[sample_list["BLDNG_COUNTRY_NAME"].isin(["ENGLAND", "WALES"])] + + # Merge on the UPRN + sample_list = sample_list.merge( + asset_data[["BLDNG_ID", "BLNDG_GOVERMENT_UPRN"]].drop_duplicates(), + how="left", on="BLDNG_ID" + ) + sample_list["BLNDG_GOVERMENT_UPRN"] = sample_list["BLNDG_GOVERMENT_UPRN"].astype("Int64") + + # Use the EPC API to get corrected postcodes + model_asset_list = [] + missed = [] + for _, x in tqdm(sample_list.iterrows(), total=len(sample_list)): + + if pd.isnull(x["BLNDG_GOVERMENT_UPRN"]): + continue + searcher = SearchEpc( + address1="", + postcode="", + uprn=x["BLNDG_GOVERMENT_UPRN"], + auth_token=EPC_AUTH_TOKEN, + os_api_key="" + ) + searcher.find_property(skip_os=True) + newest_epc = searcher.newest_epc + if newest_epc is None: + missed.append(x["BLNDG_GOVERMENT_UPRN"]) + continue + + model_asset_list.append(newest_epc) + + model_asset_list = pd.DataFrame(model_asset_list) + model_asset_list["uprn"] = model_asset_list["uprn"].astype(int) + + spatial_data = OpenUprnClient.get_spatial_data( + uprns=model_asset_list["uprn"].tolist(), bucket_name="retrofit-data-dev" + ) + + # We determine if the building is listed, heritage or in a conservation area + + # Merge on the property features + features = asset_data.drop( + columns=["BUILDING_SYSTEM_ITEM_NAME", "OBSERVED_CONDITION_DESCRIPTION"] + ).drop_duplicates() + + df = features.merge( + model_asset_list, how="inner", right_on="uprn", left_on="BLNDG_GOVERMENT_UPRN" + ).merge( + pd.DataFrame(spatial_data).rename(columns={"UPRN": "uprn"}), how="left", on="uprn" + ) + + # Store data locally + # df.to_csv(folder_path + "/MOD property data.csv", index=False) + + # Produce as asset list for analysis + + df["row_id"] = df.index + + epc_data, errors, no_epc = get_data( + df=df, + manual_uprn_map={}, + epc_auth_token=EPC_AUTH_TOKEN, + uprn_column="uprn", + fulladdress_column="address", + address1_column="address1", + postcode_column="postcode", + property_type_column=None, + built_form_column=None, + epc_api_only=False, + row_id_name="row_id", + ) + + non_invasive_recommendations = [] + for x in epc_data: + non_invasive_recommendations.append( + { + "uprn": x["uprn"], + "recommendations": x["find_my_epc_data"]["recommendations"] + } + ) + + # also include the floor area + asset_list = df[ + ["uprn", "address1", "postcode", "NUMBER_OF_BEDROOMS", "BLDNG_STOREYS_QTY", "BLDNG_MSRMNT_VAL"] + ].rename( + columns={ + "address1": "address", + "NUMBER_OF_BEDROOMS": "n_bedrooms", + "BLDNG_STOREYS_QTY": "number_of_floors", + "BLDNG_MSRMNT_VAL": "floor_area" + } + ) + + filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv" + save_csv_to_s3( + dataframe=asset_list, + bucket_name="retrofit-plan-inputs-dev", + file_name=filename + ) + + # Store the non-invasive recommendations in s3 + non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv" + save_csv_to_s3( + dataframe=pd.DataFrame(non_invasive_recommendations), + bucket_name="retrofit-plan-inputs-dev", + file_name=non_invasive_recommendations_filename + ) + + # Scenario 1 - EPC C + body = { + "portfolio_id": str(PORTFOLIO_ID), + "housing_type": "Private", + "goal": "Increasing EPC", + "goal_value": "C", + "trigger_file_path": filename, + "already_installed_file_path": "", + "patches_file_path": "", + "non_invasive_recommendations_file_path": non_invasive_recommendations_filename, + "valuation_file_path": "", + "scenario_name": "Hit EPC C", + "multi_plan": True, + "budget": None, + # "inclusions": [ + # "cavity_wall_insulation", + # "loft_insulation", + # "windows", + # "solar_pv", + # "air_source_heat_pump" + # ] + } + print(body) + + # Scenario 2 - EPC B + body = { + "portfolio_id": str(PORTFOLIO_ID), + "housing_type": "Private", + "goal": "Increasing EPC", + "goal_value": "B", + "trigger_file_path": filename, + "already_installed_file_path": "", + "patches_file_path": "", + "non_invasive_recommendations_file_path": non_invasive_recommendations_filename, + "valuation_file_path": "", + "scenario_name": "Hit EPC B", + "multi_plan": True, + "budget": None, + # "inclusions": [ + # "cavity_wall_insulation", + # "loft_insulation", + # "windows", + # "solar_pv", + # "air_source_heat_pump" + # ] + } + print(body) + + # Scenario 3 - EPC B, 3.5 COP ASHP + body = { + "portfolio_id": str(PORTFOLIO_ID), + "housing_type": "Private", + "goal": "Increasing EPC", + "goal_value": "B", + "trigger_file_path": filename, + "already_installed_file_path": "", + "patches_file_path": "", + "non_invasive_recommendations_file_path": non_invasive_recommendations_filename, + "valuation_file_path": "", + "scenario_name": "Hit EPC B - 3.5 COP ASHP", + "multi_plan": True, + "budget": None, + "ashp_cop": 3.5 + # "inclusions": [ + # "cavity_wall_insulation", + # "loft_insulation", + # "windows", + # "solar_pv", + # "air_source_heat_pump" + # ] + } + print(body) diff --git a/etl/customers/mod/pilot/2. Create Excel Model.py b/etl/customers/mod/pilot/2. Create Excel Model.py new file mode 100644 index 00000000..9a9eda86 --- /dev/null +++ b/etl/customers/mod/pilot/2. Create Excel Model.py @@ -0,0 +1,652 @@ +from pprint import pprint +import pandas as pd +import numpy as np +from backend.app.utils import sap_to_epc +from sqlalchemy.orm import sessionmaker +from backend.app.db.connection import db_engine +from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations +from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel + + +def get_data(portfolio_id, scenario_ids): + session = sessionmaker(bind=db_engine)() + session.begin() + + # Get properties and their details for a specific portfolio + properties_query = session.query( + PropertyModel, + PropertyDetailsEpcModel + ).join( + PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id + ).filter( + PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID + ).all() + + # Transform properties data to include all fields dynamically + properties_data = [ + {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns}, + **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in + PropertyDetailsEpcModel.__table__.columns}} + for prop in properties_query + ] + + # Get property IDs from fetched properties + + # Get plans linked to the fetched properties + plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all() + + # Transform plans data to include all fields dynamically + plans_data = [ + {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + for plan in plans_query + ] + + # Extract plan IDs for filtering recommendations through PlanRecommendations + plan_ids = [plan['id'] for plan in plans_data] + + # Get recommendations through PlanRecommendations for those plans and that are default + recommendations_query = session.query( + Recommendation, + Plan.scenario_id + ).join( + PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id + ).join( + Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id + ).filter( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default == True # Filtering for default recommendations + ).all() + + # Transform recommendations data to include all fields dynamically and include scenario_id + recommendations_data = [ + {**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') + else getattr(rec, col.name) for + col in Recommendation.__table__.columns}, + "Scenario ID": rec.scenario_id} + for rec in recommendations_query + ] + + session.close() + + return properties_data, plans_data, recommendations_data + + +def app(): + """ + Given a portfolio and a scenario, this function prepares an excel model to present the data + """ + + # Set the inputs: + portfolio_id = 139 + scenario_ids = [237, 238] + + properties_data, plans_data, recommendations_data = get_data( + portfolio_id=portfolio_id, scenario_ids=scenario_ids + ) + + properties_df = pd.DataFrame(properties_data) + plans_df = pd.DataFrame(plans_data) + recommendations_df = pd.DataFrame(recommendations_data) + + # Merge on the orignal data + mod_property_data = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/MOD property data.csv" + ) + + property_asset_data = properties_df.merge( + mod_property_data.drop(columns=["address", "postcode", "tenure"]), how="left", on="uprn" + ) + + property_asset_data["is_pitched"] = property_asset_data["roof"].str.contains("pitched", case=False) + property_asset_data["pre_1970"] = property_asset_data["BUILD_YEAR"] < 1970 + property_asset_data["wall_type"] = property_asset_data["walls"].str.split(" ").str[0].str.strip() + property_asset_data["is_insulated"] = ( + property_asset_data["walls"].str.split(",").str[1].str.strip().isin( + ["filled cavity", "with external insulation", "filled cavity and external insulation"] + ) | property_asset_data["walls"].str.split(",").str[2].str.strip().isin(["insulated"]) + ) + property_asset_data["is_insulated"] = np.where( + property_asset_data["is_insulated"], "Insulated", "Uninsulated" + ) + property_asset_data["is_pitched"] = np.where( + property_asset_data["is_pitched"], "Pitched roof", "Not Pitched Roof" + ) + property_asset_data["pre_1970"] = np.where( + property_asset_data["pre_1970"], "Pre 1970", "Post 1970" + ) + + archetype_variables = ["property_type", "wall_type", "is_insulated", "is_pitched", "pre_1970"] + + assigned_archetypes = ( + property_asset_data.groupby( + archetype_variables + ).size().reset_index().rename(columns={0: "n_properties"}).sort_values("n_properties", ascending=False) + ) + + # Make the archetype ID a concatenation of the variables + assigned_archetypes["archetype_id"] = assigned_archetypes[archetype_variables].apply( + lambda x: "_".join(x.astype(str)), axis=1 + ) + + # Most prominent archetypes + prominent_archetypes = assigned_archetypes.head(6) + other_archetypes = assigned_archetypes.tail(-6) + # 2 or fewer properties in the other archetypes + + property_asset_data = property_asset_data.merge( + assigned_archetypes[archetype_variables + ["archetype_id"]], + how="left", + on=archetype_variables + ) + + # Create age bands: + # 1960-1969 + # 1970-1979 + # 1980-1989 + # 1990-1999 + # 2000+ + property_asset_data["age_band"] = pd.cut( + property_asset_data["BUILD_YEAR"], + bins=[1959, 1969, 1979, 1989, 1999, 2022], + labels=["1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000+"] + ) + + # Create floor area bands + # 0-73 + # 74-97 + # 98-199 + # 200+ + property_asset_data["floor_area_band"] = pd.cut( + property_asset_data["total_floor_area"], + bins=[0, 73, 97, 199, 10000], + labels=["0-73", "74-97", "98-199", "200+"] + ) + + property_asset_data["archetype_group"] = property_asset_data["archetype_id"].copy() + property_asset_data["archetype_group"] = np.where( + property_asset_data["archetype_id"].isin(other_archetypes["archetype_id"].values), + "other", + property_asset_data["archetype_group"] + ) + + # For colour + wall_types = ( + property_asset_data[["wall_type"]].value_counts().to_frame().reset_index().rename( + columns={"wall_type": "Wall Type"} + ) + ) + # Group into age bands + ages = ( + property_asset_data[["age_band"]].value_counts() + .to_frame() + .reset_index().sort_values("age_band", ascending=True) + .rename(columns={"age_band": "Age Band"}) + ) + floor_area_bands = ( + property_asset_data[["floor_area_band"]].value_counts() + .to_frame() + .reset_index().sort_values("floor_area_band", ascending=True) + .rename(columns={"floor_area_band": "Floor Area Band"}) + ) + archetype_counts = ( + property_asset_data[["archetype_group"]]. + value_counts(). + to_frame(). + reset_index() + .rename(columns={"archetype_group": "Archetype"}) + ) + property_types = ( + (property_asset_data["property_type"] + ": " + property_asset_data["built_form"]). + value_counts(). + to_frame(). + reset_index() + .rename(columns={"index": "Property Type", 0: "Count"}) + ) + + # epc breakdown + epc_breakdown = ( + property_asset_data["current_epc_rating"] + .apply(lambda x: x.value) + .value_counts() + .to_frame() + .reset_index() + ) + + # Figures for the deck + # Carbon per property + totals = property_asset_data[ + [ + "Total_household_members", + "co2_emissions", "current_energy_demand", "current_energy_demand_heating_hotwater", + "heating_cost_current", "hot_water_cost_current", "lighting_cost_current", + "appliances_cost_current", "gas_standing_charge", "electricity_standing_charge" + ] + ].copy() + totals["total_cost"] = ( + totals["heating_cost_current"] + + totals["hot_water_cost_current"] + + totals["lighting_cost_current"] + + totals["appliances_cost_current"] + + totals["gas_standing_charge"] + + totals["electricity_standing_charge"] + ) + print( + totals[ + [ + "Total_household_members", + "co2_emissions", + "current_energy_demand", + "total_cost", + ] + ].mean() + ) + + # Store these to an excel + # with pd.ExcelWriter( + # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/MOD archetype breakdowns.xlsx" + # ) as writer: + # wall_types.to_excel(writer, sheet_name="Wall Types", index=False) + # ages.to_excel(writer, sheet_name="Ages", index=False) + # floor_area_bands.to_excel(writer, sheet_name="Floor Area Bands", index=False) + # archetype_counts.to_excel(writer, sheet_name="Archetype Counts", index=False) + # epc_breakdown.to_excel(writer, sheet_name="EPC Rating", index=False) + + contingency = 0.26 + + # We prepare the outputs, by scenario + scenario_data = {} + for scenario in scenario_ids: + + scenario_recommendations_df = recommendations_df[ + recommendations_df["Scenario ID"] == scenario + ].copy() + + scenario_recommendations_df["contingency"] = contingency * scenario_recommendations_df["estimated_cost"] + scenario_recommendations_df["total_cost"] = ( + scenario_recommendations_df["estimated_cost"] + scenario_recommendations_df["contingency"] + ) + + recommended_measures_df = scenario_recommendations_df[ + ["property_id", "measure_type", "estimated_cost", "default"] + ] + + recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]] + recommended_measures_df = recommended_measures_df.drop(columns=["default"]) + + # Metrics by property ID + aggregated_metrics = scenario_recommendations_df[ + [ + "property_id", "type", "default", "sap_points", + "energy_cost_savings", "kwh_savings", "co2_equivalent_savings", "estimated_cost", "contingency", + "total_cost" + ] + ] + aggregated_metrics = aggregated_metrics[aggregated_metrics["default"]] + aggregated_metrics = aggregated_metrics.groupby("property_id")[ + ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost", + "total_cost", "contingency"] + ].sum().reset_index() + + recommendations_measures_pivot = recommended_measures_df.pivot( + index='property_id', + columns='measure_type', + values='estimated_cost' + ) + recommendations_measures_pivot = recommendations_measures_pivot.reset_index() + recommendations_measures_pivot = recommendations_measures_pivot.fillna(0) + + # We flag with boolean if the measure is recommended + for c in recommendations_measures_pivot.columns: + if c == "property_id": + continue + recommendations_measures_pivot["Recommendation: " + c] = recommendations_measures_pivot[c] > 0 + + # We now create a final output + df = properties_df[ + [ + "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows", + "current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms", + "co2_emissions", "current_energy_demand", "current_energy_demand_heating_hotwater", + "heating_cost_current", "hot_water_cost_current", "lighting_cost_current", + "appliances_cost_current", "gas_standing_charge", "electricity_standing_charge" + ] + ].merge( + recommendations_measures_pivot, how="left", on="property_id" + ).merge( + aggregated_metrics, how="left", on="property_id" + ) + + df["bills_total_cost"] = ( + df["heating_cost_current"] + df["hot_water_cost_current"] + df["lighting_cost_current"] + + df["appliances_cost_current"] + df["gas_standing_charge"] + df["electricity_standing_charge"] + ) + + df = df.drop(columns=["property_id"]) + for c in ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings"]: + df[c] = df[c].fillna(0) + + df = df.rename( + columns={ + "uprn": "UPRN", + "address": "Address", + "postcode": "Postcode", + "walls": "Walls", + "roof": "Roof", + "heating": "Heating", + "windows": "Windows", + "current_epc_rating": "Current EPC Rating", + "current_sap_points": "Current SAP Points", + "total_floor_area": "Total Floor Area", + "number_of_rooms": "Number of Habitable Rooms", + "floor_height": "Floor Height", + } + ) + + # Calculate post SAP + df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"] + df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round() + df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(lambda x: sap_to_epc(x)) + + # Calculate the relative savings on carbon, kwh, and bills + df["relative_carbon_savings"] = df["co2_equivalent_savings"] / df["co2_emissions"] + df["relative_kwh_savings"] = df["kwh_savings"] / df["current_energy_demand"] + df["relative_bill_savings"] = df["energy_cost_savings"] / df["bills_total_cost"] + + # Add on the archetype + df = df.merge( + property_asset_data[["uprn", "archetype_group"]], how="left", left_on="UPRN", right_on="uprn" + ) + + # For properties that don't make it to EPC B, check why. E.g. for a property that has an oil boiler, it + # the bills go up recommending HHRSH, so it doesn't make it to EPC B + # For mid-terrace units, use the ordnance survey API to check if there is space for a heat pump? + # DO it manually??? + + # Doesn't make it + # misses = df[df["Predicted Post Works EPC"] == "C"] + # # 5 of them are flats and so are difficult to get to EPC B without renewables. Possibly not worth it from an + # # ROI perspective + # + # misses[["UPRN", "Address", "Postcode", "property_type"]] + + # UPRN Address Postcode property_type + # 2 100120988937 13 Sidbury Circular Road SP9 7HX Flat No further action + # 3 100120988998 74 Sidbury Circular Road SP9 7JA Flat No further action + # 4 100120989416 47 Zouch Avenue SP9 7LR Flat No further action + # 6 100060585002 42, Muscott Close, Shipton Bellinger SP9 7TX House Can probably take a heat pump + # 37 10000801072 34 Luffenham Place, Chicksands SG17 5XH House Already surveyed as having + # an ASHP - should be looked at + # 121 100120988259 8, Karachi Close SP9 7LW Flat + # 122 100121101217 599, Pepper Place BA12 0DW Flat + # 140 100021455241 33 Blenheim Crescent, Ruislip HA4 7HA House - Solar isnt recommended + # due to bug + # 149 100120915656 10 Bower Green, Shrivenham SN6 8TU House - Solar isn't recommended + # due to bug + + scenario_data[scenario] = df + + printing_scenario_id = scenario_ids[0] + # EPC breakdown + print(scenario_data[printing_scenario_id]['Predicted Post Works EPC'].value_counts()) + # Cost + # Total cost + print(scenario_data[printing_scenario_id]["total_cost"].sum()) + # Base cost + print(scenario_data[printing_scenario_id]["estimated_cost"].sum()) + # Contingency + print(scenario_data[printing_scenario_id]["contingency"].sum()) + # Costs averaged per unit + print(scenario_data[printing_scenario_id]["total_cost"].mean()) + print(scenario_data[printing_scenario_id]["estimated_cost"].mean()) + print(scenario_data[printing_scenario_id]["contingency"].mean()) + + # Average relative savings + print(scenario_data[printing_scenario_id]["relative_carbon_savings"].mean()) + print(scenario_data[printing_scenario_id]["relative_kwh_savings"].mean()) + print(scenario_data[printing_scenario_id]["relative_bill_savings"].mean()) + + measure_details = {} + for scenario in scenario_ids: + measure_details[scenario] = {} + recommendation_cols = [c for c in scenario_data[scenario].columns if "Recommendation:" in c] + measure_details[scenario]["count"] = scenario_data[scenario][recommendation_cols].sum().to_dict() + # Get average cost per measure + measure_columns = [ + c.split("Recommendation: ")[1] for c in scenario_data[scenario].columns if "Recommendation:" in c + ] + # Take the mean, drop zero columns + measure_costs = {} + for m in measure_columns: + measure_costs[m] = float(scenario_data[scenario][scenario_data[scenario][m] > 0][m].mean()) + measure_details[scenario]["cost_per_measure"] = measure_costs + + pprint(measure_details[scenario_ids[0]]["count"]) + pprint(measure_details[scenario_ids[1]]["count"]) + + # Cost per measures + pprint(measure_details[scenario_ids[0]]["cost_per_measure"]) + pprint(measure_details[scenario_ids[1]]["cost_per_measure"]) + + # Do not get to EPC B: + # 5 are flats + # 1) 34 Luffenham Place, Chicksands SG17 5XH, has been surveyed as having a low performing heat pump - + # should be looked at but several surrounding properties have been surveyed in a similar fashion + # 2) 42, Muscott Close, Shipton Bellinger SP9 7TX, has an oil boiler and the bills go up recommending HHRSH. + # we could non-intrusively recommend a heat pump. + # 3) 33 Blenheim Crescent, Ruislip, HA4 7HA, 100021455241 Solar potential modelling returned nothing - + # manual review indicates that there are multiple trees surrouding the south facing side of the property + # 4) 10 Bower Green, Shrivenham, SN6 8TU - Solar isn't recommended without further survey due to the local + # area being surrounded by trees + + # Scenario adjustments: + # Exclude: boiler_upgrade + # Make ASHP COP 3.5 + + # Metrics we need by scenario: + # Cost + # contingency + # Carbon + # kwh + # bill savings + scenario_metrics = {} + for scenario in scenario_ids: + df = scenario_data[scenario].copy() + + avg_savings = df[ + ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost", + "total_cost", "contingency"] + ].mean().to_dict() + avg_savings["cost_per_sap_point"] = avg_savings["total_cost"] / avg_savings["sap_points"] + avg_savings["cost_per_carbon"] = avg_savings["total_cost"] / avg_savings["co2_equivalent_savings"] + scenario_metrics[scenario] = avg_savings + + pprint(scenario_metrics[scenario_ids[0]]) + pprint(scenario_metrics[scenario_ids[1]]) + + scenario_data[scenario_ids[0]]["loft_insulation"][ + scenario_data[scenario_ids[0]]["loft_insulation"] > 0 + ].mean() + + scenario_data[scenario_ids[0]]["cavity_wall_insulation"][ + scenario_data[scenario_ids[0]]["cavity_wall_insulation"] > 0 + ].mean() + + # Testing checking floor risk + + import requests + + def get_flood_risk(lat, lon, radius_km=1): + url = "https://environment.data.gov.uk/flood-monitoring/id/floods" + params = { + 'lat': lat, + 'long': lon, + 'dist': radius_km # search radius in km + } + + response = requests.get(url, params=params) + response.raise_for_status() + data = response.json() + + flood_warnings = data.get("items", []) + + if not flood_warnings: + print("No active flood warnings near this location.") + else: + print(f"{len(flood_warnings)} warning(s) found near the location:") + for warning in flood_warnings: + print(f"- Area: {warning.get('description')}") + print(f" Severity: {warning.get('severity')} (Level {warning.get('severityLevel')})") + print(f" Message changed at: {warning.get('timeMessageChanged')}") + print() + + return flood_warnings + + from shapely.geometry import shape, Point + def get_flood_areas_near_point(lat, lon, radius_km=2): + url = "https://environment.data.gov.uk/flood-monitoring/id/floodAreas" + params = { + 'lat': lat, + 'long': lon, + 'dist': radius_km + } + + response = requests.get(url, params=params) + response.raise_for_status() + return response.json().get("items", []) + + def point_in_flood_area(lat, lon): + flood_areas = get_flood_areas_near_point(lat, lon, radius_km=1) + point = Point(lon, lat) # GeoJSON uses (lon, lat) format + + for area in flood_areas: + polygon_url = area.get("polygon") + if not polygon_url: + continue + + polygon_response = requests.get(polygon_url) + polygon_response.raise_for_status() + polygon_geojson = polygon_response.json() + + features = polygon_geojson.get("features", []) + if not features: + continue + + flood_polygon = shape(features[0]['geometry']) + + try: + is_inside = flood_polygon.contains(point) + except: + is_inside = False + + if is_inside: + print(f"📍 Point is inside flood area: {area['label']} ({area['notation']})") + return area + + from tqdm import tqdm + floor_warnings_data = [] + for _, property in tqdm(property_asset_data.iterrows(), total=len(property_asset_data)): + # warnings = floor_warnings_data.extend( + # get_flood_risk(lat=property["LATITUDE"], lon=property["LONGITUDE"], radius_km=1) + # ) + + resp = point_in_flood_area(lat=property["LATITUDE"], lon=property["LONGITUDE"]) + if resp: + floor_warnings_data.append( + { + "uprn": property["uprn"], + "address": property["address"], + "postcode": property["postcode"], + "area": resp + } + ) + continue + + import plotly.graph_objects as go + + labels = [ + "House_Cavity_Insulated_Pitched roof_Pre 1970", + "House_Cavity_Insulated_Pitched roof_Post 1970", + "House_Cavity_Uninsulated_Pitched roof_Pre 1970", + "House_Cavity_Uninsulated_Pitched roof_Post 1970", + "other", + "House_System_Uninsulated_Pitched roof_Pre 1970", + "House_Solid_Uninsulated_Not Pitched Roof_Pre 1970" + ] + + values = [62, 36, 21, 16, 16, 4, 2] + + hovertext = [ + "Loft insulation, draft proofing", + "Top-up loft insulation", + "Cavity wall insulation, loft insulation", + "Cavity wall insulation, ventilation", + "Bespoke retrofit measures", + "External wall insulation, roof insulation", + "Flat roof insulation, internal wall insulation" + ] + + fig = go.Figure(go.Treemap( + labels=labels, + parents=[""] * len(labels), # No root + values=values, + hovertext=hovertext, + hoverinfo="text", + textinfo="none", + marker=dict( + line=dict(color="white", width=4), + colors=values, + colorscale="Blues" + ) + )) + + fig.update_layout( + margin=dict(t=10, l=10, r=10, b=10), + plot_bgcolor="white", + paper_bgcolor="white" + ) + + fig.show() + + # Get the recommended measures by scenario id + recommendation_cols = [c for c in scenario_data[scenario_ids[1]].columns if "Recommendation:" in c] + measure_counts_by_scenario = scenario_data[scenario_ids[1]].groupby("archetype_group")[ + recommendation_cols + ].sum().reset_index() + + measure_counts_by_scenario.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/measure_counts_by_scenario.csv" + ) + + # Estimate average valuation improvment by scenarios + valuation_data = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/property_valuation.csv" + ) + + from backend.ml_models.Valuation import PropertyValuation + + uplift = [] + for _, x in valuation_data.iterrows(): + uprn = x["uprn"] + + to_append = {"uprn": uprn} + for _id in scenario_ids: + scenario = scenario_data[_id][ + scenario_data[_id]["uprn"] == uprn + ].squeeze() + + val = PropertyValuation.estimate_valuation_improvement( + current_value=x["valuation"], + current_epc=scenario["Current EPC Rating"].value, + target_epc=scenario["Predicted Post Works EPC"], + total_cost=None + ) + + to_append[_id] = val["average_increase"] + + uplift.append(to_append) + + uplift = pd.DataFrame(uplift) + print(uplift[scenario_ids[0]].mean()) + # £8,161 + print(uplift[scenario_ids[1]].mean()) + # £16,938 diff --git a/etl/customers/mod/pilot/3. Past Project Costs.py b/etl/customers/mod/pilot/3. Past Project Costs.py new file mode 100644 index 00000000..79a0493c --- /dev/null +++ b/etl/customers/mod/pilot/3. Past Project Costs.py @@ -0,0 +1,76 @@ +import pandas as pd + +# Get the wave 2 costing data and produce some breakdowns +costs = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/Measure cost study for MOD.xlsx", + header=2 +) + +# Get the EPC data for these + + +# Cavity +cwi_costs = costs[ + ['Model', 'Total invoiced (including VAT)'] +].copy() +cwi_costs["Model"] = "CWI - " + cwi_costs["Model"] +cwi_costs = cwi_costs[~pd.isnull(cwi_costs["Total invoiced (including VAT)"])] + +# Loft +li_costs = costs[ + ['Model.2', 'Total invoiced (including VAT).2'] +].copy() +li_costs["Model.2"] = "LI - " + li_costs["Model.2"] +li_costs = li_costs[~pd.isnull(li_costs["Total invoiced (including VAT).2"])] +# Rename +li_costs.columns = ["Model", "Total invoiced (including VAT)"] + +# Windows +windows_costs = costs[ + ['Model.3', 'Total invoiced (including VAT).3'] +].copy() +windows_costs["Model.3"] = "Windows - " + windows_costs["Model.3"] +windows_costs = windows_costs[~pd.isnull(windows_costs["Total invoiced (including VAT).3"])] +# Rename +windows_costs.columns = ["Model", "Total invoiced (including VAT)"] + +# Doors +doors_costs = costs[ + ['Model.4', 'Total invoiced (including VAT).4'] +].copy() +doors_costs["Model.4"] = "Doors - " + doors_costs["Model.4"] +doors_costs = doors_costs[~pd.isnull(doors_costs["Total invoiced (including VAT).4"])] +# Rename +doors_costs.columns = ["Model", "Total invoiced (including VAT)"] + +# ASHP +ashps_costs = costs[ + ['Model.5', 'Total invoiced (including VAT).5'] +].copy() +ashps_costs["Model.5"] = "ASHP - " + ashps_costs["Model.5"] +ashps_costs = ashps_costs[~pd.isnull(ashps_costs["Total invoiced (including VAT).5"])] +# Rename +ashps_costs.columns = ["Model", "Total invoiced (including VAT)"] + +# Solar +solar_costs = costs[ + ['Model.6', 'Total invoiced (including VAT).6'] +].copy() +solar_costs["Model.6"] = "Solar - " + solar_costs["Model.6"] +solar_costs = solar_costs[~pd.isnull(solar_costs["Total invoiced (including VAT).6"])] +# Rename +solar_costs.columns = ["Model", "Total invoiced (including VAT)"] + +fabric_costing_data = pd.concat([cwi_costs, li_costs]) +windows_doors_costing_data = pd.concat([windows_costs, doors_costs]) + +windows_doors_costing_data.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/windows_doors_costs.csv" +) +fabric_costing_data.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/fabric_costing_data.csv" +) +ashps_costs.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/ashps_costs.csv") +solar_costs.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/solar_costs.csv") + +project_cost_by_age = costs[["Property age ", "TOTAL Cost of Works"]].groupby("Property age ").mean().reset_index() diff --git a/etl/customers/remote_assessments/app.py b/etl/customers/remote_assessments/app.py index fc3b7ec6..a8805a71 100644 --- a/etl/customers/remote_assessments/app.py +++ b/etl/customers/remote_assessments/app.py @@ -4,7 +4,7 @@ from dotenv import load_dotenv from utils.s3 import save_csv_to_s3 from etl.find_my_epc.AssetListEpcData import AssetListEpcData -PORTFOLIO_ID = 134 +PORTFOLIO_ID = 141 USER_ID = 8 load_dotenv(dotenv_path="backend/.env") @@ -19,25 +19,21 @@ def app(): asset_list = [ { - "address": "Flat 2, 42 Malden Road, London NW5 3HG", - "postcode": "NW5 3HG", - "uprn": 5117165, + "address": "196 Merrow Street", + "postcode": "SE17 2NP", + "uprn": 200003423454, + "patch": True }, { - "address": "15 Bournville Lane", - "postcode": "B30 2JY", - "uprn": 100070301128 + "address": "65 Liverpool Grove", + "postcode": "SE17 2HP", + "uprn": 200003423194 }, { - "address": "34 Bournville Lane", - "postcode": "B30 2LN", - "uprn": 100070301140 + "address": "2 Brettell Street", + "postcode": "SE17 2NZ", + "uprn": 200003423607 }, - { - "address": "36 Bournville Lane", - "postcode": "B30 2LN", - "uprn": 100070301142 - } ] asset_list = pd.DataFrame(asset_list) @@ -56,6 +52,7 @@ def app(): ) asset_list_epc_client.get_data() asset_list_epc_client.get_non_invasive_recommendations() + asset_list_epc_client.get_patch() # Store non-invasive recommendations in S3 non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv" @@ -65,22 +62,28 @@ def app(): file_name=non_invasive_recommendations_filename ) + # Store patches in S3 + patches_filename = "" + if asset_list_epc_client.patches: + patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.csv" + save_csv_to_s3( + dataframe=pd.DataFrame(asset_list_epc_client.patches), + bucket_name="retrofit-plan-inputs-dev", + file_name=patches_filename + ) + valuation_data = [ { - "uprn": 5117165, - "valuation": 467_000 + "valuation": 339_000, + "uprn": 200003423454, }, { - "uprn": 100070301128, - "valuation": 335_000 + "valuation": 374_000, + "uprn": 200003423194 }, { - "uprn": 100070301140, - "valuation": 276_000 - }, - { - "uprn": 100070301142, - "valuation": 276_000 + "valuation": 719_000, + "uprn": 200003423607 }, ] # Store valuation data to s3 @@ -98,7 +101,7 @@ def app(): "goal_value": "C", "trigger_file_path": filename, "already_installed_file_path": "", - "patches_file_path": "", + "patches_file_path": patches_filename, "non_invasive_recommendations_file_path": non_invasive_recommendations_filename, "valuation_file_path": valuation_filename, "scenario_name": "Full package remote assessment", diff --git a/etl/customers/stonewater/data_cleaning.py b/etl/customers/stonewater/data_cleaning.py index a5da0c79..eedae9b9 100644 --- a/etl/customers/stonewater/data_cleaning.py +++ b/etl/customers/stonewater/data_cleaning.py @@ -96,6 +96,7 @@ def download_data_from_sharepoint(): folder for folder in contents["value"] if folder["name"] in folders_to_keep ] for folder_to_pull in folders_to_pull: + # Get the contents folder_contents = sharepoint_client.list_folder_contents( drive_id=sharepoint_client.document_drive["id"], diff --git a/etl/customers/united living/get_data.py b/etl/customers/united living/get_data.py new file mode 100644 index 00000000..bc4ab400 --- /dev/null +++ b/etl/customers/united living/get_data.py @@ -0,0 +1,73 @@ +import os +import pandas as pd +import numpy as np +from asset_list.utils import get_data +from backend.SearchEpc import SearchEpc +from etl.spatial.OpenUprnClient import OpenUprnClient + +from dotenv import load_dotenv + +load_dotenv(dotenv_path="backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") + + +def app(): + filepath = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/United Living/Potential GMCA props 05.03.xlsx" + + df = pd.read_excel(filepath) + df["row_id"] = df.index + + df["house_number"] = df.apply( + lambda x: SearchEpc.get_house_number(x["Address"], x["Postcode"]), + axis=1 + ) + + properties_data, _, _ = get_data( + df=df, + manual_uprn_map={}, + epc_auth_token=EPC_AUTH_TOKEN, + uprn_column=None, + fulladdress_column="Address", + address1_column="house_number", + postcode_column="Postcode", + property_type_column=None, + built_form_column=None, + epc_api_only=True, + row_id_name="row_id", + ) + + no_data = df[df["row_id"].isin(_)] + no_data[["Address", "Postcode"]] + + # 53 108 Alexandra Street OL6 9QP 100011536830 + # 56 301 Whiteacre Road OL6 9QF 100011557437 + # 65 97 Princess Street OL6 9QJ 100011551813 + + data = df.merge( + pd.DataFrame(properties_data)[["uprn", "row_id"]], + how="left", left_on="row_id", right_on="row_id" + ) + + # Fill missing UPRNS + data["uprn"] = np.where(data["Address"] == "108 Alexandra Street", 100011536830, data["uprn"]) + data["uprn"] = np.where(data["Address"] == "301 Whiteacre Road", 100011557437, data["uprn"]) + data["uprn"] = np.where(data["Address"] == "97 Princess Street", 100011551813, data["uprn"]) + + # We now get whether the property is listed, heritage or in a conservation area + spatial_data = OpenUprnClient.get_spatial_data(uprns=data["uprn"].tolist(), bucket_name="retrofit-data-dev") + spatial_data = spatial_data.rename(columns={"UPRN": "uprn"}) + + data["uprn"] = data["uprn"].astype(int) + + merged = data.merge( + spatial_data, how="left", on="uprn" + ) + # fill NAs + for c in ['conservation_status', 'is_listed_building', 'is_heritage_building']: + merged[c] = merged[c].fillna(False) + + merged.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/United Living/Potential GMCA props 05.03 - data " + "pulled.xlsx", + index=False + ) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index aca36584..76087a76 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1,7 +1,7 @@ import os import re import openpyxl -import Levenshtein +from fuzzywuzzy import fuzz from pathlib import Path import msgpack from datetime import datetime @@ -2771,7 +2771,8 @@ class DataLoader: match_to = [x.replace(" ", "") for x in match_to] # Perform matching between full key and match_to - distances = [Levenshtein.distance(matching_string, s) for s in match_to] + distances = [100 - fuzz.ratio(matching_string, s) for s in match_to] + best_match_index = distances.index(min(distances)) # We might want to consider a threshold for the distance, however for the momeny, # we don't consider this for the moment @@ -2897,6 +2898,17 @@ class DataLoader: # Merge onto the survey list survey_list = survey_list.merge(matching_lookup, how='left', on="survey_list_row_id") + # TEMP FOR NEWER WORK + # matching_lookup = matching_lookup.merge( + # asset_list[["asset_list_row_id", "UPRN"]], how="left", on="asset_list_row_id" + # ).merge( + # survey_list[["survey_list_row_id", "NO.", "Street / Block Name", "Post Code"]], + # how="left", on="survey_list_row_id" + # ) + # matching_lookup.to_csv( + # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/surveys_to_assets.csv" + # ) + return survey_list @staticmethod diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py index 3f2e810e..83a85b78 100644 --- a/etl/epc/Dataset.py +++ b/etl/epc/Dataset.py @@ -203,11 +203,11 @@ class TrainingDataset(BaseDataset): common_cols = [[col + "_starting", col + "_ending"] for col in common_cols] self.df = self.df.loc[ - :, - no_suffix_cols - + only_ending_cols - + [col for cols in common_cols for col in cols], - ] + :, + no_suffix_cols + + only_ending_cols + + [col for cols in common_cols for col in cols], + ] def _remove_abnormal_change_in_floor_area(self): """ @@ -511,7 +511,7 @@ class TrainingDataset(BaseDataset): expanded_df["is_sandstone_or_limestone"] == expanded_df["is_sandstone_or_limestone_ending"] ) - ] + ] elif component == "floor": expanded_df = expanded_df[ (expanded_df["is_suspended"] == expanded_df["is_suspended_ending"]) @@ -528,7 +528,7 @@ class TrainingDataset(BaseDataset): expanded_df["is_to_external_air"] == expanded_df["is_to_external_air_ending"] ) - ] + ] elif component == "roof": expanded_df = expanded_df[ (expanded_df["is_pitched"] == expanded_df["is_pitched_ending"]) @@ -541,7 +541,7 @@ class TrainingDataset(BaseDataset): expanded_df["has_dwelling_above"] == expanded_df["has_dwelling_above_ending"] ) - ] + ] return expanded_df diff --git a/etl/epc/Record.py b/etl/epc/Record.py index 558dbacb..9ff1de0a 100644 --- a/etl/epc/Record.py +++ b/etl/epc/Record.py @@ -139,28 +139,22 @@ class EPCRecord: self._clean_records_using_epc_records() self._clean_with_data_processor() - self._expand_prepared_epc_to_attributes() - self._identify_delta_between_prepared_and_original_records() # Process to create uvalues for the single epc record - - # selff.df = self.epc_record_as_dataframe('prepared_epc') - + # self.df = self.epc_record_as_dataframe('prepared_epc') # self._feature_generation() # self._drop_features() return - self._expand_description_to_features() - self._expand_description_to_uvalues() - + # self._expand_description_to_features() + # self._expand_description_to_uvalues() + # # self._generate_uvalues() # self._validate_expanded_description() # self._validate_u_values() - # etc - pass def _drop_features(self): """ @@ -360,6 +354,7 @@ class EPCRecord: self._clean_number_lighting_outlets() self._clean_floor_level() self._clean_floor_height() + self._clean_constituency() # self._clean_potential_energy_efficiency() # self._clean_environment_impact_potential() @@ -402,6 +397,17 @@ class EPCRecord: if self.prepared_epc["floor-height"] <= 1.665: self.prepared_epc["floor-height"] = average + def _clean_constituency(self): + """ + We handle the single case of finding a missing constituency by using the local authority + """ + if pd.isnull(self.prepared_epc["constituency"]) or (self.prepared_epc["constituency"] == ""): + if self.prepared_epc["local-authority"] != "E06000044": + raise NotImplementedError( + "This function is only implemented for Portsmouth, in the single edgecase seen" + ) + self.prepared_epc["constituency"] = "E14000883" + def _clean_floor_level(self): """ This method will clean the floor level, if empty or invalid diff --git a/etl/find_my_epc/AssetListEpcData.py b/etl/find_my_epc/AssetListEpcData.py index 1d2e1472..f085c8fb 100644 --- a/etl/find_my_epc/AssetListEpcData.py +++ b/etl/find_my_epc/AssetListEpcData.py @@ -26,6 +26,7 @@ class AssetListEpcData: self.extracted_data = None self.non_invasive_recommendations = None + self.patches = None @staticmethod def check_asset_list(asset_list): @@ -52,6 +53,21 @@ class AssetListEpcData: } for r in self.extracted_data ] + def get_patch(self): + """ + + :return: + """ + if self.extracted_data is None: + raise ValueError("extracted data is missing - run get_data first") + + self.patches = [ + { + "uprn": r.get("uprn"), + **r.get("patch") + } for r in self.extracted_data if r.get("patch") + ] + def get_data(self): logger.info("Retrieving data for given asset list") @@ -67,11 +83,18 @@ class AssetListEpcData: postcode=pc, uprn=home.get("uprn"), auth_token=self.epc_auth_token, - os_api_key="" + os_api_key="", ) + epc_searcher.ordnance_survey_client.property_type = home.get("property_type") + epc_searcher.ordnance_survey_client.built_form = home.get("built_form") epc_searcher.find_property(skip_os=True) + if epc_searcher.newest_epc is None: continue + + if not pd.isnull(home.get("patch")): + epc_searcher.newest_epc["address1"] = add1 + # Attempt both methods: try: find_epc_searcher = RetrieveFindMyEpc( @@ -89,14 +112,22 @@ class AssetListEpcData: time.sleep(0.5) # We need uprn - extracted_data.append( - { - "uprn": home.get("uprn"), - "address": home["address"], - "postcode": home["postcode"], - **find_epc_data, + to_append = { + "uprn": home.get("uprn"), + "address": home["address"], + "postcode": home["postcode"], + **find_epc_data, + } + if not pd.isnull(home.get("patch")): + to_append["patch"] = { + "current-energy-rating": find_epc_data["current_epc_rating"], + "current-energy-efficiency": find_epc_data["current_epc_efficiency"], + "potential-energy-rating": find_epc_data["potential_epc_rating"], + "potential-energy-efficiency": find_epc_data["potential_epc_efficiency"], + **find_epc_data["epc_data"] } - ) + + extracted_data.append(to_append) self.extracted_data = extracted_data logger.info("Data Extrction complete") diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py index 9852cc0d..86c3fda1 100644 --- a/etl/find_my_epc/RetrieveFindMyEpc.py +++ b/etl/find_my_epc/RetrieveFindMyEpc.py @@ -1,8 +1,13 @@ +import re import pandas as pd import requests from bs4 import BeautifulSoup from datetime import datetime +from utils.logger import setup_logger + +logger = setup_logger() + class RetrieveFindMyEpc: SEARCH_POSTCODE_URL = ( @@ -41,6 +46,85 @@ class RetrieveFindMyEpc: sources = {item.get_text(strip=True): True for item in energy_list.find_all("li")} return sources + @staticmethod + def get_text(elem): + return elem.get_text(strip=True) if elem else None + + def extract_epc_data(self, soup): + + results = {} + + # 1. Total floor area + results['total-floor-area'] = int(self.get_text( + soup.find("dt", string="Total floor area").find_next_sibling("dd") + ).split(" ")[0]) + + # Table with features + rows = soup.select("table.govuk-table tbody tr") + + rating_map = { + "Very poor": "Very Poor", + "Very good": "Very Good" + } + + def get_feature_row_text(feature_name, index=0): + matches = [row for row in rows if row.find("th") and feature_name in row.find("th").text] + if len(matches) > index: + cells = matches[index].find_all("td") + description = self.get_text(cells[0]) + rating = self.get_text(cells[1]) + return description, rating_map.get(rating, rating) + return None, None + + # 2-3. First wall description and rating + results['walls-description'], results['walls-energy-eff'] = get_feature_row_text("Wall", 0) + + # 4-5. First roof description and rating + results['roof-description'], results['roof-energy-eff'] = get_feature_row_text("Roof", 0) + + # 6-7. Windows description and rating + results['windows-description'], results['windows-energy-eff'] = get_feature_row_text("Window") + + # 8-9. Main heating description and rating + results['mainheat-description'], results['mainheat-energy-eff'] = get_feature_row_text("Main heating") + + # 10-11. Main heating control description and rating + results['mainheatcont-description'], results['mainheatc-energy-eff'] = get_feature_row_text( + "Main heating control" + ) + + # 12-13. Hot water description and rating + results['hotwater-description'], results['hot-water-energy-ef'] = get_feature_row_text("Hot water") + + # 14-15. Lighting description and rating + results['lighting-description'], results['lighting-energy-eff'] = get_feature_row_text("Lighting") + + # 16. Floor description + results['floor-description'], _ = get_feature_row_text("Floor") + + # 17. Secondary heating description + results['secondheat-description'], _ = get_feature_row_text("Secondary heating") + + # 18. Primary energy use + p_energy = soup.find(string=lambda t: "primary energy use for this property per year" in t.lower()) + # We should always have this + match = re.search(r"(\d+)\s+kilowatt", p_energy) + results['energy-consumption-current'] = int(match.group(1)) if match else None + + # 19. Current CO2 emissions + co2_now = soup.find("dd", id="eir-property-produces") + # We should always have this + match = re.search(r"([\d.]+)", co2_now.text) + results['co2-emissions-current'] = float(match.group(1)) if match else None + # Need co2-emiss-curr-per-floor-area + + # 20. Potential CO2 emissions + co2_pot = soup.find("dd", id="eir-potential-production") + match = re.search(r"([\d.]+)", co2_pot.text) + results['co2-emissions-potential'] = float(match.group(1)) if match else None + + return results + def retrieve_newest_find_my_epc_data(self, sap_2012_date=None): """ For a post code and address, we pull out all the required data from the find my epc website @@ -111,6 +195,9 @@ class RetrieveFindMyEpc: potential_rating = ratings.split(".")[1] current_sap = int(current_rating.split(' ')[-1]) + # Floor area + address_res.find() + # Retrieve the energy consumption bills = address_res.find('div', {'id': 'bills-affected'}) bills_list = bills.find_all('li') @@ -228,6 +315,9 @@ class RetrieveFindMyEpc: # 4) Low and zero carbon energy sources low_carbon_energy_sources = self.extract_low_carbon_sources(address_res) + # 5) Pull out the EPC data + epc_data = self.extract_epc_data(address_res) + resulting_data = { 'epc_certificate': epc_certificate, 'current_epc_rating': current_rating.split(' ')[-6], @@ -237,8 +327,9 @@ class RetrieveFindMyEpc: "heating_text": heating_text, "hot_water_text": hot_water_text, "recommendations": recommendations, + "epc_data": epc_data, **assessment_data, - **low_carbon_energy_sources + **low_carbon_energy_sources, } return resulting_data @@ -332,6 +423,16 @@ class RetrieveFindMyEpc: "Replacement warm air unit": [], "Secondary glazing": ["secondary_glazing"], "Condensing heating unit": ["boiler_upgrade"], + '???': [], + 'Solar photovoltaic panels, 2.5kWp': ["solar_pv"], + 'Heating controls (programmer, room thermostat and thermostatic radiator valves)': [ + "roomstat_programmer_trvs", "time_temperature_zone_control" + ], + 'Translation missing: en.improvement_code.41.title': [], + "Condensing boiler (separate from the range cooker)": ["boiler_upgrade"], + "Heating controls (programmer and thermostatic radiator valves)": [ + "roomstat_programmer_trvs", "time_temperature_zone_control" + ] } survey = True @@ -356,3 +457,24 @@ class RetrieveFindMyEpc: formatted_recommendations.append(to_append) return formatted_recommendations + + @classmethod + def get_from_epc(cls, epc): + # Attempt both methods: + try: + searcher = cls(address=epc["address"], postcode=epc["postcode"]) + find_epc_data = searcher.retrieve_newest_find_my_epc_data() + except Exception as e: + logger.error(f"Error retrieving find my epc data: {e}") + # We attempt with the backup add + searcher = cls(address=epc["address1"], postcode=epc["postcode"]) + find_epc_data = searcher.retrieve_newest_find_my_epc_data() + + non_invasive_recommendations = { + "uprn": epc["uprn"], + "address": epc["address"], + "postcode": epc["postcode"], + "recommendations": find_epc_data["recommendations"], + } + + return non_invasive_recommendations diff --git a/input_property_list.csv b/input_property_list.csv deleted file mode 100644 index dc677c88..00000000 --- a/input_property_list.csv +++ /dev/null @@ -1,12 +0,0 @@ -address,postcode,Notes,,,, -28 Distillery Wharf,W6 9bf,,,,, -Flat 14 Godley V C House,E2 0LP,,,,, -49 Elderfield Road,E5 0LF,,,,, -26 Stanhope Road,N6 5NG,,,,, -Flat 3 Frederick Building,N1 4BD,,,,, -Flat 4 Frederick Building,N1 4BD,,,,, -"Flat 28, 22 Adelina Grove",E1 3BX,,,,, -"Flat 39, 239 Long Lane",SE1 4PT,,,,, -"1, Westview, Somerby",LE14 2QH,This property has an unfilled cavity,,,, -"59, Ashdale",CM23 4EB,This property has a partially filled cavity,,,, -88 Cleveland Avenue,DL3 7BE,This property has a filled cavity,,,, \ No newline at end of file diff --git a/keyzy_pilot.csv b/keyzy_pilot.csv deleted file mode 100644 index b972bcf9..00000000 --- a/keyzy_pilot.csv +++ /dev/null @@ -1,3 +0,0 @@ -address,postcode,Notes,,,, -2 South Terrace,NN1 5JY,,,,, -25 Albert Street,PO12 4TY,,,,, \ No newline at end of file diff --git a/recommendations/Costs.py b/recommendations/Costs.py index 2312dff2..2d486191 100644 --- a/recommendations/Costs.py +++ b/recommendations/Costs.py @@ -37,22 +37,25 @@ MCS_SOLAR_PV_COST_DATA = { "average_cost_per_kwh-Northern Ireland": 1347, } +# Installers are now working with 435 watt panels +PANEL_SIZE = 0.435 + INSTALLER_SOLAR_COSTS = [ - {'n_panels': 4, 'array_kwp': 1.6, 'cost': 3040.00, 'installer': 'CEG'}, - {'n_panels': 5, 'array_kwp': 2.1, 'cost': 3201.00, 'installer': 'CEG'}, - {'n_panels': 6, 'array_kwp': 2.5, 'cost': 3363.00, 'installer': 'CEG'}, - {'n_panels': 7, 'array_kwp': 2.9, 'cost': 3524.00, 'installer': 'CEG'}, - {'n_panels': 8, 'array_kwp': 3.3, 'cost': 3686.00, 'installer': 'CEG'}, - {'n_panels': 9, 'array_kwp': 3.7, 'cost': 3847.00, 'installer': 'CEG'}, - {'n_panels': 10, 'array_kwp': 4.1, 'cost': 4009.00, 'installer': 'CEG'}, - {'n_panels': 11, 'array_kwp': 4.5, 'cost': 4170.00, 'installer': 'CEG'}, - {'n_panels': 12, 'array_kwp': 4.9, 'cost': 4332.00, 'installer': 'CEG'}, - {'n_panels': 13, 'array_kwp': 5.3, 'cost': 4835.00, 'installer': 'CEG'}, - {'n_panels': 14, 'array_kwp': 5.7, 'cost': 5015.00, 'installer': 'CEG'}, - {'n_panels': 15, 'array_kwp': 6.2, 'cost': 5176.00, 'installer': 'CEG'}, - {'n_panels': 16, 'array_kwp': 6.6, 'cost': 5338.00, 'installer': 'CEG'}, - {'n_panels': 17, 'array_kwp': 7.0, 'cost': 5500.00, 'installer': 'CEG'}, - {'n_panels': 18, 'array_kwp': 7.4, 'cost': 6021.00, 'installer': 'CEG'} + {'n_panels': 4, 'array_kwp': 4 * PANEL_SIZE, 'cost': 4089.25, 'installer': 'CEG'}, + {'n_panels': 5, 'array_kwp': 5 * PANEL_SIZE, 'cost': 4242.48, 'installer': 'CEG'}, + {'n_panels': 6, 'array_kwp': 6 * PANEL_SIZE, 'cost': 4395.71, 'installer': 'CEG'}, + {'n_panels': 7, 'array_kwp': 7 * PANEL_SIZE, 'cost': 4548.94, 'installer': 'CEG'}, + {'n_panels': 8, 'array_kwp': 8 * PANEL_SIZE, 'cost': 4702.17, 'installer': 'CEG'}, + {'n_panels': 9, 'array_kwp': 9 * PANEL_SIZE, 'cost': 4855.41, 'installer': 'CEG'}, + {'n_panels': 10, 'array_kwp': 10 * PANEL_SIZE, 'cost': 5010.95, 'installer': 'CEG'}, + {'n_panels': 11, 'array_kwp': 11 * PANEL_SIZE, 'cost': 5166.49, 'installer': 'CEG'}, + {'n_panels': 12, 'array_kwp': 12 * PANEL_SIZE, 'cost': 5322.04, 'installer': 'CEG'}, + {'n_panels': 13, 'array_kwp': 13 * PANEL_SIZE, 'cost': 5657.6, 'installer': 'CEG'}, + {'n_panels': 14, 'array_kwp': 14 * PANEL_SIZE, 'cost': 5993.16, 'installer': 'CEG'}, + {'n_panels': 15, 'array_kwp': 15 * PANEL_SIZE, 'cost': 6328.71, 'installer': 'CEG'}, + {'n_panels': 16, 'array_kwp': 16 * PANEL_SIZE, 'cost': 6483.33, 'installer': 'CEG'}, + {'n_panels': 17, 'array_kwp': 17 * PANEL_SIZE, 'cost': 6637.95, 'installer': 'CEG'}, + {'n_panels': 18, 'array_kwp': 18 * PANEL_SIZE, 'cost': 6792.57, 'installer': 'CEG'} ] # This is the maximum number of panels that we have a cost from the installers for INSTALLER_MAX_PANELS = 18 @@ -62,11 +65,11 @@ INSTALLER_MAX_PANELS = 18 INSTALLER_SOLAR_PV_INVERTER_COST = 7500 INSTALLER_SOLAR_PV_INVERTER_LABOUR_COST = 500 # Just a rough guess to labour costs -INSTALLER_SCAFFOLDING_COSTS = [ - {'stories': 1, 'description': '1 Story Scaffold', 'cost': 531.00, 'installer': 'CEG'}, - {'stories': 2, 'description': '2 Story Scaffold', 'cost': 841.00, 'installer': 'CEG'}, - {'stories': 3, 'description': '3 Story Scaffold', 'cost': 1077.00, 'installer': 'CEG'} -] +# INSTALLER_SCAFFOLDING_COSTS = [ +# {'stories': 1, 'description': '1 Story Scaffold', 'cost': 531.00, 'installer': 'CEG'}, +# {'stories': 2, 'description': '2 Story Scaffold', 'cost': 841.00, 'installer': 'CEG'}, +# {'stories': 3, 'description': '3 Story Scaffold', 'cost': 1077.00, 'installer': 'CEG'} +# ] # This data is based on the MCS database, We use the larger figure between the 2023 and 2024 average, # to be conservative @@ -101,10 +104,10 @@ INSTALLER_ASHP_COSTS = [ BOILER_UPGRADE_SCHEME_ASHP_VALUE = 7500 INSTALLER_SOLAR_BATTERY_COSTS = [ - {'capacity_kwh': 5, 'description': 'Battery Add on', 'cost': 2700.00, 'installer': 'CEG'}, - {'capacity_kwh': 10, 'description': 'Battery Add on', 'cost': 4300.00, 'installer': 'CEG'}, - {'capacity_kwh': 5, 'description': 'Battery Retrofit existing system', 'cost': 4250.00, 'installer': 'CEG'}, - {'capacity_kwh': 10, 'description': 'Battery Retrofit Existing system', 'cost': 5950.00, 'installer': 'CEG'} + {'capacity_kwh': 5, 'description': 'Battery Add on', 'cost': 3769.89, 'installer': 'JJC'}, + # {'capacity_kwh': 10, 'description': 'Battery Add on', 'cost': 4300.00, 'installer': 'CEG'}, + # {'capacity_kwh': 5, 'description': 'Battery Retrofit existing system', 'cost': 4250.00, 'installer': 'CEG'}, + # {'capacity_kwh': 10, 'description': 'Battery Retrofit Existing system', 'cost': 5950.00, 'installer': 'CEG'} ] # This is based on https://www.checkatrade.com/blog/cost-guides/cost-smart-thermostat/ @@ -149,7 +152,7 @@ CONDENSING_BOILER_COSTS = { ELECTRIC_BOILER_COSTS = 1800 # Assumes 1 hours to remove each heater (including re-decorating) -ROOM_HEATER_REMOVAL_COST = 50 +ROOM_HEATER_REMOVAL_COST = 25 ROOM_HEATER_REMOVAL_LABOUR_HOURS = 3 # This is a cost quoted by Jim for a system flush - existig system will run more efficiently @@ -190,6 +193,8 @@ class Costs: # fittings and trimming doors, as well as scope for damage to the existing wall during preparation. IWI_CONTINGENCY = 0.2 + # For air source heat pumps, we inflate the assume cost by quite a bit to account for design and installation + ASHP_CONTINGENCY = 0.35 # Where there is more uncertainty, a higher contingency rate is used HIGH_RISK_CONTINGENCY = 0.2 # When there is less uncertainty, a lower contingency rate is used @@ -234,6 +239,13 @@ class Costs: if self.region is None: # Try and grab using the local-authority-label self.region = county_to_region_map.get(self.property.data["local-authority-label"], None) + + if self.region is None: + # Try and get the region after converting the keys to lower + self.region = { + k.lower(): v for k, v in county_to_region_map.items() + }.get(self.property.data["local-authority-label"].lower(), None) + if self.region is None: raise ValueError("Region not found in county map") @@ -765,18 +777,14 @@ class Costs: battery_cost = [c for c in INSTALLER_SOLAR_BATTERY_COSTS if c["capacity_kwh"] == battery_kwh][0]["cost"] subtotal += battery_cost - scaffolding_cost = [c for c in INSTALLER_SCAFFOLDING_COSTS if c["stories"] == n_floors][0]["cost"] - subtotal += scaffolding_cost - if needs_inverter: subtotal += INSTALLER_SOLAR_PV_INVERTER_COST # We also add an additional labour cost subtotal += INSTALLER_SOLAR_PV_INVERTER_LABOUR_COST - # We add an additional cost for scaffolding - # The costs from installers exclude VAT - vat = subtotal * cls.VAT_RATE - total_cost = subtotal + vat + # Solar doesn't have VAT but we add a high risk contingency + # to account for design variation that we see in practice + total_cost = subtotal * (1 + cls.HIGH_RISK_CONTINGENCY) # Labour hours are based on estimates from online research but an average team seems to consist of 3 people # and most jobs take around 2 days. Assuming an 8 hour day for 3 people across 2 days, gives us 48 hours of @@ -784,7 +792,7 @@ class Costs: return { "total": total_cost, "subtotal": subtotal, - "vat": vat, + "vat": 0, "labour_hours": 48, "labour_days": 2, } @@ -1154,7 +1162,6 @@ class Costs: pump. This cost will include the boiler upgrade scheme grant """ - # This is the average cost of a project, we'll add some additional contingency if ashp_size is None: @@ -1163,7 +1170,7 @@ class Costs: cost = [x for x in INSTALLER_ASHP_COSTS if x][0]["cost"] # We add some contingency since there are additional costs such as resizing radiators, that could be required - subtotal = cost * (1 + self.CONTINGENCY) + subtotal = cost * (1 + self.ASHP_CONTINGENCY) # The costs from installers exclude VAT vat = subtotal * self.VAT_RATE total_cost = subtotal + vat @@ -1173,7 +1180,7 @@ class Costs: labour_hours = labour_days * 8 return { - "total": subtotal, + "total": total_cost, "subtotal": subtotal, "vat": vat, "labour_hours": labour_hours, diff --git a/recommendations/FloorRecommendations.py b/recommendations/FloorRecommendations.py index ed00bbe9..85e1a8dc 100644 --- a/recommendations/FloorRecommendations.py +++ b/recommendations/FloorRecommendations.py @@ -145,7 +145,9 @@ class FloorRecommendations(Definitions): ) return - raise NotImplementedError("Implement me!") + # In this case, we have no recommendation to make. E.g., if we have a solid floor property + # but solid floor insulation has been excluded as a measure, we get here + return @staticmethod def _make_floor_description(material): diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py index c613aa42..bd015a79 100644 --- a/recommendations/HeatingControlRecommender.py +++ b/recommendations/HeatingControlRecommender.py @@ -12,7 +12,7 @@ class HeatingControlRecommender: self.recommendation = [] - def recommend(self, heating_description, description_prefix="", description_suffix=""): + def recommend(self, heating_description, phase, description_prefix="", description_suffix=""): # TODO: Many of these functions are quite similar. We can possibly create a single wrapper function that # takes in the heating description and the description prefix/suffix, and then creates the appropriate @@ -23,32 +23,32 @@ class HeatingControlRecommender: # This first iteration of the recommender will provide very basic recommendation # We recommend heating controls based on the main heating system if heating_description in ["Room heaters, electric"]: - self.recommend_room_heaters_electric_controls() + self.recommend_room_heaters_electric_controls(phase=phase) return if heating_description in ["Electric storage heaters", "Electric storage heaters, radiators"]: - self.recommend_high_heat_retention_controls(description_prefix=description_prefix) + self.recommend_high_heat_retention_controls(description_prefix=description_prefix, phase=phase) return if heating_description in ["Boiler and radiators, mains gas"]: # We can recommend roomstat programmer trvs - self.recommend_roomstat_programmer_trvs(description_suffix=description_suffix) + self.recommend_roomstat_programmer_trvs(description_suffix=description_suffix, phase=phase) # We can also recommend time and temperature zone controls - self.recommend_time_temperature_zone_controls(description_suffix=description_suffix) + self.recommend_time_temperature_zone_controls(description_suffix=description_suffix, phase=phase) return if heating_description in ["Boiler and radiators, electric"]: - self.recommend_roomstat_programmer_trvs() + self.recommend_roomstat_programmer_trvs(phase=phase) return if heating_description in ["Air source heat pump, radiators, electric"]: # For an ASHP, we can recommend time and temperature zone controls, as well as programmer, trvs and a bypass # which are common configurations for ASHPs - self.recommend_time_temperature_zone_controls() + self.recommend_time_temperature_zone_controls(phase=phase) # self.recommend_programmer_trvs_bypass() - def recommend_room_heaters_electric_controls(self): + def recommend_room_heaters_electric_controls(self, phase): """ If the home has Room heaters, electric, we start by identifying potential heating controls that could be upgraded, that would provide a practical impact. This will be the least invasive improvement. @@ -88,6 +88,9 @@ class HeatingControlRecommender: self.recommendation.append( { + "phase": phase, + "type": "heating", + "measure_type": "programmer_appliance_thermostat", "description": "upgrade heating controls to Programmer and Appliance or Smart Thermostats", **self.costs.programmer_and_appliance_thermostat(has_programmer=has_programmer), "simulation_config": simulation_config @@ -97,7 +100,7 @@ class HeatingControlRecommender: # We don't implement any other recommendations right now return - def recommend_high_heat_retention_controls(self, description_prefix=""): + def recommend_high_heat_retention_controls(self, phase, description_prefix=""): """ When applicable, we recommend upgrading the heating controls to high heat retention controls. This is a specific type of control system that is designed to work with electric storage heaters. It is a more @@ -133,6 +136,9 @@ class HeatingControlRecommender: self.recommendation.append( { + "phase": phase, + "type": "heating", + "measure_type": "celect_type_controls", "description": "Upgrade heating controls to High Heat Retention Storage Heater Controls", **self.costs.celect_type_controls(), "simulation_config": simulation_config, @@ -143,7 +149,7 @@ class HeatingControlRecommender: # We don't implement any other recommendations right now return - def recommend_roomstat_programmer_trvs(self, description_suffix=""): + def recommend_roomstat_programmer_trvs(self, phase, description_suffix=""): """ If the home has a boiler and radiators, mains gas, we start by identifying potential heating controls that could be upgraded, that would provide a practical impact. @@ -208,15 +214,16 @@ class HeatingControlRecommender: description = "Upgrade heating controls to Room thermostat, programmer and TRVs" - already_installed = "heating_control" in self.property.already_installed + already_installed = "roomstat_programmer_trvs" in self.property.already_installed if already_installed: cost_result = override_costs(cost_result) description = "Heating controls have already been upgraded, no further action needed." self.recommendation.append( { - "type": "heating_control", + "type": "heating", "measure_type": "roomstat_programmer_trvs", + "phase": phase, "parts": [], "description": description, **cost_result, @@ -231,7 +238,7 @@ class HeatingControlRecommender: return - def recommend_time_temperature_zone_controls(self, description_suffix=""): + def recommend_time_temperature_zone_controls(self, phase, description_suffix=""): """ If the home has a boiler, we can recommend time and temperature zone controls. This is a more advanced and more efficient control system than the standard controls that come with a boiler. However, it may come @@ -282,14 +289,15 @@ class HeatingControlRecommender: "temperature zone control)" ) - already_installed = "heating_control" in self.property.already_installed + already_installed = "time_temperature_zone_control" in self.property.already_installed if already_installed: cost_result = override_costs(cost_result) description = "Heating controls have already been upgraded, no further action needed." self.recommendation.append( { - "type": "heating_control", + "type": "heating", + "phase": phase, "measure_type": "time_temperature_zone_control", "parts": [], "description": description, @@ -335,14 +343,15 @@ class HeatingControlRecommender: description = "Install a Bypass valve, TRVs and a Programmer" - already_installed = "heating_control" in self.property.already_installed + already_installed = "programmer_trvs_bypass" in self.property.already_installed if already_installed: cost_result = override_costs(cost_result) description = "Heating controls have already been upgraded, no further action needed." self.recommendation.append( { - "type": "heating_control", + "type": "heating", + "measure_type": "programmer_trvs_bypass", "parts": [], "description": description, **cost_result, diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index e4dd3a78..20f5e7ad 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -65,7 +65,6 @@ class HeatingRecommender: self.costs = Costs(self.property) self.heating_recommendations = [] - self.heating_control_recommendations = [] self.has_electric_heating_description = ( self.property.main_heating["has_electric"] or self.property.main_heating["has_electricaire"] @@ -259,7 +258,6 @@ class HeatingRecommender: "ashp_only_heating_recommendation", False ) self.heating_recommendations = [] - self.heating_control_recommendations = [] # This first iteration of the recommender will provide very basic recommendation # We recommend heating controls based on the main heating system @@ -302,7 +300,6 @@ class HeatingRecommender: self.recommend_air_source_heat_pump( phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations, - ) return @@ -360,7 +357,7 @@ class HeatingRecommender: } controls_recommender = HeatingControlRecommender(self.property) - controls_recommender.recommend(heating_description="Boiler and radiators, electric") + controls_recommender.recommend(heating_description="Boiler and radiators, electric", phase=phase) self.heating_recommendations.extend([boiler_recommendation] + controls_recommender.recommendation) return @@ -453,7 +450,7 @@ class HeatingRecommender: ), {}) controls_recommender = HeatingControlRecommender(self.property) - controls_recommender.recommend(heating_description="Air source heat pump, radiators, electric") + controls_recommender.recommend(heating_description="Air source heat pump, radiators, electric", phase=phase) ashp_size = self.size_heat_pump() ashp_costs = self.costs.air_source_heat_pump(ashp_size) @@ -805,7 +802,9 @@ class HeatingRecommender: description_prefix = "" controls_recommender.recommend( - heating_description="Electric storage heaters", description_prefix=description_prefix + heating_description="Electric storage heaters", + description_prefix=description_prefix, + phase=phase ) has_hhr = self.is_hhr_already_installed() @@ -1120,10 +1119,10 @@ class HeatingRecommender: description_suffix = "" controls_recommender.recommend( heating_description="Boiler and radiators, mains gas", - description_suffix=description_suffix + description_suffix=description_suffix, + phase=recommendation_phase ) # We may have 2 recommendations from the heating controls - if not controls_recommender.recommendation and not boiler_recommendation: return @@ -1161,10 +1160,6 @@ class HeatingRecommender: # 3) Heating controls only # But they are options that are not mutually exclusive # So, we actually set heating controls as a heating recommendation - for recommendation in controls_recommender.recommendation: - recommendation["phase"] = recommendation_phase - # recommendation["type"] = "heating" - - self.heating_control_recommendations.extend(controls_recommender.recommendation) + self.heating_recommendations.extend(controls_recommender.recommendation) return diff --git a/recommendations/LightingRecommendations.py b/recommendations/LightingRecommendations.py index f9a1d63a..3447394d 100644 --- a/recommendations/LightingRecommendations.py +++ b/recommendations/LightingRecommendations.py @@ -4,6 +4,7 @@ from backend.Property import Property from typing import List from recommendations.Costs import Costs from recommendations.recommendation_utils import override_costs +from backend.ml_models.AnnualBillSavings import AnnualBillSavings class LightingRecommendations: @@ -161,6 +162,7 @@ class LightingRecommendations: # the proportion of lights that will be set to low energy "sap_points": sap_points, "kwh_savings": heat_demand_change, + "energy_cost_savings": heat_demand_change * AnnualBillSavings.ELECTRICITY_PRICE_CAP, "co2_equivalent_savings": carbon_change, "description_simulation": { "lighting-energy-eff": "Very Good", diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index 715332a5..0e73cffe 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -149,9 +149,10 @@ class Recommendations: (self.wall_recomender.recommendations or self.roof_recommender.recommendations) and ("ventilation" in measures) ): - self.ventilation_recomender.recommend() + self.ventilation_recomender.recommend(phase=phase) if self.ventilation_recomender.recommendation: property_recommendations.append(self.ventilation_recomender.recommendation) + phase += 1 if "trickle_vents" in measures: # This is a recommendatin that typically comes from an energy assessment @@ -208,27 +209,25 @@ class Recommendations: measures=measures, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations, ) - if ( - self.heating_recommender.heating_recommendations or - self.heating_recommender.heating_control_recommendations - ): + if self.heating_recommender.heating_recommendations: # We split into first and second phase recommendations first_phase_recommendations = [ r for r in ( - self.heating_recommender.heating_recommendations + - self.heating_recommender.heating_control_recommendations + self.heating_recommender.heating_recommendations ) if r["phase"] == phase ] second_phase_recommendations = [ r for r in ( - self.heating_recommender.heating_recommendations + - self.heating_recommender.heating_control_recommendations + self.heating_recommender.heating_recommendations ) if r["phase"] == phase + 1 ] + if first_phase_recommendations and second_phase_recommendations: + raise Exception("Imeplement me") + if first_phase_recommendations: property_recommendations.append(first_phase_recommendations) @@ -240,8 +239,7 @@ class Recommendations: # otherwise we incremenet by 1 max_used_phase = max( [rec["phase"] for rec in - self.heating_recommender.heating_recommendations + - self.heating_recommender.heating_control_recommendations] + self.heating_recommender.heating_recommendations] ) amount_to_increment = max_used_phase - phase + 1 phase += amount_to_increment @@ -306,7 +304,7 @@ class Recommendations: # want to include the cavity wall insulation recommendation in the defaults if recommendations_by_type[0].get("type") in [ - "mechanical_ventilation", "trickle_vents", "draught_proofing" + "trickle_vents", "draught_proofing" ]: continue @@ -463,6 +461,7 @@ class Recommendations: :param property_instance: Instance of the Property class, for the home associated to property_id :param all_predictions: dictionary of predictions from the model apis :param recommendations: dictionary of recommendations for the property + :param representative_recommendations: dictionary of representative recommendations for the property :return: """ @@ -480,12 +479,14 @@ class Recommendations: increasing_variables = ["sap"] decreasing_variables = ["carbon", "heat_demand"] + # If the recommendation is mechanical ventilation, we don't apply the rule that the new value should be higher + mv_increasing_variables = ["carbon", "heat_demand"] + mv_decreasing_variables = ["sap"] + impact_summary = [] for recommendations_by_type in property_recommendations: for rec in recommendations_by_type: - if rec["type"] in [ - "mechanical_ventilation", "trickle_vents", "draught_proofing", "extension_cavity_wall_insulation" - ]: + if rec["type"] in ["trickle_vents", "draught_proofing", "extension_cavity_wall_insulation"]: # We don't have a percieved sap impact of mechanical ventilation or trickle vents, and we don't # have the capacity to score draught proofing if rec["type"] == "extension_cavity_wall_insulation": @@ -571,13 +572,23 @@ class Recommendations: # For decreasing variables, the new value should be lower than the previous, otherwise we set it to # the previous # In either case, we adjudge the recommendation to have had no/negligible impact - for v in increasing_variables: + # However, if the recommendation is mechanical ventilation, this can have a negative SAP impact so + # we don't apply this rule + + if rec["type"] == "mechanical_ventilation": + phase_increasing_variables = mv_increasing_variables + phase_decreasing_variables = mv_decreasing_variables + else: + phase_increasing_variables = increasing_variables + phase_decreasing_variables = decreasing_variables + + for v in phase_increasing_variables: current_phase_values[v] = ( current_phase_values[v] if current_phase_values[v] > previous_phase_values[v] else previous_phase_values[v] ) for v in previous_phase_values: - if v in decreasing_variables: + if v in phase_decreasing_variables: current_phase_values[v] = ( current_phase_values[v] if current_phase_values[v] < previous_phase_values[v] else previous_phase_values[v] @@ -592,13 +603,19 @@ class Recommendations: "heat_demand": previous_phase_values["heat_demand"] - current_phase_values["heat_demand"], } - # Prevent from being negative + # Prevent from being negative - apart from ventilation for metric in ["sap", "carbon", "heat_demand"]: - property_phase_impact[metric] = ( - 0 if property_phase_impact[metric] < 0 else property_phase_impact[metric] - ) - if metric == "sap": - property_phase_impact[metric] = round(property_phase_impact[metric], 2) + if rec["type"] != "mechanical_ventilation": + property_phase_impact[metric] = ( + 0 if property_phase_impact[metric] < 0 else property_phase_impact[metric] + ) + if metric == "sap": + property_phase_impact[metric] = round(property_phase_impact[metric], 2) + else: + # We prevent these from being positive + property_phase_impact[metric] = ( + 0 if property_phase_impact[metric] > 0 else property_phase_impact[metric] + ) # For the moment, we cap the number of SAP points that can be achieved by LEDs at 2 if rec["type"] == "low_energy_lighting": @@ -618,7 +635,7 @@ class Recommendations: # By limiting here, we don't change the value in current_phase_values. This means that the # future recommendations won't have an impact that is too large li_sap_limit = RoofRecommendations.get_loft_insulation_sap_limit( - property_instance.data["roof-energy-eff"], property_instance.data["extension-count"] + property_instance.data["roof-energy-eff"], property_instance.roof["insulation_thickness"] ) if li_sap_limit is not None: property_phase_impact["sap"] = min(property_phase_impact["sap"], li_sap_limit) @@ -776,13 +793,26 @@ class Recommendations: ] ).sort_values(["phase", "recommendation_id"], ascending=True).reset_index(drop=True) + # We need the recommendaion type + rec_id_to_type = { + rec["recommendation_id"]: rec["type"] for recs in property_recommendations for rec in recs + } + rec_id_to_type[STARTING_DUMMY_ID_VALUE] = "starting_dummy" + for i in range(0, len(kwh_impact_table)): - current_phase = kwh_impact_table.loc[i, 'phase'] + current = kwh_impact_table.loc[i] + current_phase = current['phase'] previous_phase_id = (current_phase - 1) if (current_phase > 0) else -9999 previous_phase = kwh_impact_table[kwh_impact_table['phase'] == previous_phase_id] if not previous_phase.empty: for col in ["predictions_heating", "predictions_hotwater"]: + # Check if the recommendation type is ventilation + if rec_id_to_type[current["recommendation_id"]] == "mechanical_ventilation": + # We expect the kwh to increase + if kwh_impact_table.loc[i, col] > previous_phase[col].max(): + continue + if kwh_impact_table.loc[i, col] > previous_phase[col].max(): kwh_impact_table.loc[i, col] = previous_phase[col].max() @@ -842,7 +872,7 @@ class Recommendations: for recs in property_recommendations: for rec in recs: if rec["type"] in [ - "mechanical_ventilation", "trickle_vents", "draught_proofing", "extension_cavity_wall_insulation" + "trickle_vents", "draught_proofing", "extension_cavity_wall_insulation" ]: # We cannot score the impact on draught proofing continue @@ -867,13 +897,18 @@ class Recommendations: heating_kwh_savings = ( previous_phase_impact["predictions_heating"].mean() - rec_impact["predictions_heating"].values[0] ) - heating_cost_savings = ( - previous_phase_impact["heating_cost"].mean() - rec_impact["heating_cost"].values[0] - ) - hotwater_kwh_savings = ( previous_phase_impact["predictions_hotwater"].mean() - rec_impact["predictions_hotwater"].values[0] ) + + # Shouldn't be positive + if rec["type"] == "mechanical_ventilation": + heating_kwh_savings = 0 if heating_kwh_savings > 0 else heating_kwh_savings + hotwater_kwh_savings = 0 if hotwater_kwh_savings > 0 else hotwater_kwh_savings + + heating_cost_savings = ( + previous_phase_impact["heating_cost"].mean() - rec_impact["heating_cost"].values[0] + ) hotwater_host = ( previous_phase_impact["hotwater_cost"].mean() - rec_impact["hotwater_cost"].values[0] ) @@ -881,9 +916,8 @@ class Recommendations: total_kwh_savings = heating_kwh_savings + hotwater_kwh_savings energy_cost_savings = heating_cost_savings + hotwater_host - if rec["type"] == "lighting": - # In this case, we should probably just SKIP but check when we have one! - raise Exception("Implement me 3") + if rec["type"] == "low_energy_lighting": + continue rec["kwh_savings"] = total_kwh_savings rec["energy_cost_savings"] = energy_cost_savings diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py index b7e34406..cd7f82c4 100644 --- a/recommendations/RoofRecommendations.py +++ b/recommendations/RoofRecommendations.py @@ -52,6 +52,10 @@ class RoofRecommendations: part for part in materials if part["type"] == "flat_roof_insulation" ] + self.room_roof_insulation_materials = [ + part for part in materials if part["type"] == "room_roof_insulation" + ] + # Extract the insulation thickness from the roof, which is used throughout this method self.insulation_thickness = convert_thickness_to_numeric( self.property.roof["insulation_thickness"], @@ -60,16 +64,16 @@ class RoofRecommendations: ) @classmethod - def get_loft_insulation_sap_limit(cls, roof_energy_eff, extension_count): + def get_loft_insulation_sap_limit(cls, roof_energy_eff, existing_thickness): """ Get the SAP limit for loft insulation :param roof_energy_eff: :return: """ - if extension_count == 0: - # No limit - return None + if str(existing_thickness).isdigit(): + if float(existing_thickness) >= 250: + return 0 if roof_energy_eff in ["Good", "Very Good"]: return 1 @@ -496,29 +500,22 @@ class RoofRecommendations: :return: """ - # TODO: We temporarilty use costs from SCIS for RIR insulation. The costing was £180/m2 floor - roof_roof_insulation_materials = [ - { - "type": "room_roof_insulation", - "measure_type": "room_roof_insulation", - "description": "Insulating the ceiling of the roof roof and re-decorate", - "depths": [100], - "depth_unit": "mm", - "r_value_per_mm": 0.038, - "thermal_conductivity": 0.022, - "cost": [180], - } - ] + # We have a list of materials that can be used for room roof insulation + # We will iterate over these materials and recommend them based on the current u-value of the roof + # and the cost of the materials rir_non_invasive_recommendation = next( (x for x in self.property.non_invasive_recommendations if x["type"] == "room_roof_insulation"), {} ) + insulation_materials = pd.DataFrame(self.room_roof_insulation_materials) + # lowest_selected_u_value = None recommendations = [] - for material in roof_roof_insulation_materials: - for depth, cost_per_unit in zip(material["depths"], material["cost"]): - part_u_value = r_value_per_mm_to_u_value(depth, material["r_value_per_mm"]) + for _, material_group in insulation_materials.groupby("description"): + for material in material_group.itertuples(): + + part_u_value = r_value_per_mm_to_u_value(material.depth, material.r_value_per_mm) _, new_u_value = calculate_u_value_uplift(u_value, part_u_value) new_u_value = math.ceil(new_u_value * 100.0) / 100.0 @@ -526,7 +523,7 @@ class RoofRecommendations: # We allow a small tolerance for error so we don't discount the recommendation entirely estimated_cost = ( - cost_per_unit * self.property.insulation_floor_area if + material.total_cost * self.property.insulation_floor_area if rir_non_invasive_recommendation.get("cost") is None else rir_non_invasive_recommendation.get("cost") ) diff --git a/recommendations/SecondaryHeating.py b/recommendations/SecondaryHeating.py index a9d5de04..e63951d9 100644 --- a/recommendations/SecondaryHeating.py +++ b/recommendations/SecondaryHeating.py @@ -9,12 +9,6 @@ class SecondaryHeating: system. """ - # The list of existing heating systems that are accepted - ACCEPTED_MAINHEAT_DESCRIPTIONS = ["Boiler and radiators, mains gas", "Electric storage heaters"] - ACCEPTED_SECONDHEAT_DESCRIPTIONS = ["Room heaters, electric", 'Portable electric heaters (assumed)'] - # These are the heaters where works are required to remove them - FIXED_HEATER_DESCRIPTIONS = ["Room heaters, electric"] - def __init__(self, property_instance: Property): self.property = property_instance self.costs = Costs(self.property) @@ -25,18 +19,10 @@ class SecondaryHeating: # Reset self.recommendation = [] - if self.property.main_heating["clean_description"] not in self.ACCEPTED_MAINHEAT_DESCRIPTIONS: - return - - # TODO: We need to clean secondary data - if self.property.data['secondheat-description'] not in self.ACCEPTED_SECONDHEAT_DESCRIPTIONS: - return - - if self.property.data['secondheat-description'] in self.FIXED_HEATER_DESCRIPTIONS: - # We have an associated cost otherwise, there is no cost + if self.property.data['number-habitable-rooms'] > self.property.data['number-heated-rooms']: n_rooms = self.property.data['number-habitable-rooms'] - self.property.data['number-heated-rooms'] else: - n_rooms = 0 + n_rooms = self.property.data["number-heated-rooms"] costs = self.costs.heater_removal(n_rooms=n_rooms) diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py index a97dbcb3..ee07ff28 100644 --- a/recommendations/SolarPvRecommendations.py +++ b/recommendations/SolarPvRecommendations.py @@ -1,19 +1,12 @@ import numpy as np import pandas as pd +import backend.app.assumptions as assumptions from recommendations.Costs import Costs from recommendations.recommendation_utils import override_costs, estimate_pitched_roof_area class SolarPvRecommendations: - # Solar panel specs based on Eurener 400s solar panels - # https://midsummerwholesale.co.uk/buy/eurener/eurener-400w-mepv-zebra-ab-half-cut-mono - # Approximate area of the solar panels - SOLAR_PANEL_AREA = 1.79 - # Wattage per panel - this is based on the average wattage of a solar panel being between 250w and 420w - # This was previously set to 250w, but has been upped to 400 based on the systems used by Cotswolrd Energy Group - SOLAR_PANEL_WATTAGE = 400 - # For domestic properties, we don't recommend a solar PV system with wattage outside of these # bounds MAX_SYSTEM_WATTAGE = 6000 @@ -24,6 +17,23 @@ class SolarPvRecommendations: SAP_POINTS_PER_5_PERCENT_ROOF_COVERAGE = 1 + BACKUP_PANEL_PERFORMANCE = pd.DataFrame( + [ + { + "n_panels": 4, + "array_wattage": 1600, + "initial_ac_kwh_per_year": assumptions.MEDIAN_WATTAGE_TO_AC * 1600, + "panneled_roof_area": 4 * assumptions.RDSAP_AREA_PER_PANEL + }, + { + "n_panels": 8, + "array_warrage": 3200, + "initial_ac_kwh_per_year": assumptions.MEDIAN_WATTAGE_TO_AC * 3200, + "panneled_roof_area": 8 * assumptions.RDSAP_AREA_PER_PANEL + }, + ] + ) + def __init__(self, property_instance): """ :param property_instance: Instance of the Property class, for the home associated to property_id @@ -47,46 +57,6 @@ class SolarPvRecommendations: return trimmed_list - def mds_recommend(self, phase=None, solar_pv_percentage=0.5): - # For specific usage within the mds report - - solar_pv_roof_area = self.property.get_solar_pv_roof_area(solar_pv_percentage) - - number_solar_panels = np.floor(solar_pv_roof_area / self.SOLAR_PANEL_AREA) - solar_panel_wattage = number_solar_panels * self.SOLAR_PANEL_WATTAGE - - solar_panel_wattage = np.clip( - a=solar_panel_wattage, a_min=self.MIN_SYSTEM_WATTAGE, a_max=self.MAX_SYSTEM_WATTAGE - ) - - # We now have a property which is potentially suitable for solar PV - roof_coverage_percent = round(solar_pv_percentage * 100) - # Given the wattage, we estimate the cost of the solar PV system. This is based on the MCS database - # of solar PV installations - cost_result = self.costs.solar_pv(wattage=solar_panel_wattage, has_battery=False) - kw = np.floor(solar_panel_wattage / 100) / 10 - - description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) p" - f"anel system on {round(roof_coverage_percent)}% the roof.") - - return [ - { - "phase": phase, - "parts": [], - "type": "solar_pv", - "description": description, - "starting_u_value": None, - "new_u_value": None, - "sap_points": None, - "already_installed": False, - **cost_result, - # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale - # back up here - "photo_supply": roof_coverage_percent, - "has_battery": False - } - ] - def recommend_building_analysis(self, phase): """ This recommendation approach handles the case of producing solar PV recommendations at the building level, @@ -240,11 +210,14 @@ class SolarPvRecommendations: ) kw = np.floor(recommendation_config["array_wattage"] / 100) / 10 if has_battery: - description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) panel system on " - f"{round(roof_coverage_percent)}% the roof, with a battery storage system.") + description = ( + f"Install a {kw} kilowatt-peak (kWp) solar panel system, with a battery." + ) else: - description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) p" - f"anel system on {round(roof_coverage_percent)}% the roof.") + description = f"Install a {kw} kilowatt-peak (kWp) solar panel system." + + if self.property.in_conservation_area: + description += " Property is in a consevation area - please check with local planning authority." already_installed = "solar_pv" in self.property.already_installed if already_installed: diff --git a/recommendations/VentilationRecommendations.py b/recommendations/VentilationRecommendations.py index 9738b898..a82e4df5 100644 --- a/recommendations/VentilationRecommendations.py +++ b/recommendations/VentilationRecommendations.py @@ -29,7 +29,7 @@ class VentilationRecommendations(Definitions): def identify_ventilation(self): self.has_ventilaion = self.property.data["mechanical-ventilation"] in self.VENTILATION_DESCRIPTIONS - def recommend(self): + def recommend(self, phase): """ If there is no ventilation, we recommend installing ventilation @@ -63,7 +63,7 @@ class VentilationRecommendations(Definitions): # We recommend installing two mechanical ventilation systems self.recommendation = [ { - "phase": None, + "phase": phase, "parts": part, "type": part[0]["type"], "measure_type": "mechanical_ventilation", @@ -79,7 +79,13 @@ class VentilationRecommendations(Definitions): "total": estimated_cost, # We use a very simple and rough estimate of 4 hours per unit "labour_hours": labour_hours, - "labour_days": labour_days # Assume 8 hour day + "labour_days": labour_days, # Assume 8 hour day + "simulation_config": { + "mechanical_ventilation_ending": "mechanical, extract only", + }, + "description_simulation": { + "mechanical-ventilation": "mechanical, extract only" + } } ] diff --git a/recommendations/county_to_region.py b/recommendations/county_to_region.py index e84b5698..13c1cdaa 100644 --- a/recommendations/county_to_region.py +++ b/recommendations/county_to_region.py @@ -135,7 +135,10 @@ county_to_region_map = { 'Merthyr Tydfil': 'Wales', 'Monmouthshire': 'Wales', 'Mountain Ash': 'Wales', 'Neath Port Talbot': 'Wales', 'Newport': 'Wales', 'Pembrokeshire': 'Wales', 'Penarth': 'Wales', 'Pentre': 'Wales', 'Pontyclun': 'Wales', 'Pontypridd': 'Wales', 'Porth': 'Wales', 'Porthcawl': 'Wales', 'Powys': 'Wales', 'Rhondda Cynon Taff': 'Wales', - 'Rhoose': 'Wales', 'Sully': 'Wales', 'Swansea': 'Wales', 'The Vale of Glamorgan': 'Wales', 'Tonypandy': 'Wales', + 'Rhoose': 'Wales', 'Sully': 'Wales', 'Swansea': 'Wales', + 'The Vale of Glamorgan': 'Wales', + 'Vale of Glamorgan': 'Wales', + 'Tonypandy': 'Wales', 'Torfaen': 'Wales', 'Treharris': 'Wales', 'Treorchy': 'Wales', 'Wrexham': 'Wales', 'Birmingham': 'West Midlands', 'Bromsgrove': 'West Midlands', 'Cannock Chase': 'West Midlands', 'Coventry': 'West Midlands', 'Dudley': 'West Midlands', 'East Staffordshire': 'West Midlands', 'Herefordshire': 'West Midlands', diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py index 8c15673d..05b9ec42 100644 --- a/recommendations/optimiser/optimiser_functions.py +++ b/recommendations/optimiser/optimiser_functions.py @@ -1,10 +1,14 @@ -def prepare_input_measures(property_recommendations, goal): +import backend.app.assumptions as assumptions + + +def prepare_input_measures(property_recommendations, goal, needs_ventilation): """ Basic function to convert recommendations_to_upload to a format that is suitable for the optimiser - large :param property_recommendations: object containing the recommendations, created in the plan trigger api :param goal: goal to be optimised for, should be one of the keys in gain_map. E.g. if the gain is SAP points, the goal should reflect that desired gain + :param needs_ventilation: boolean to indicate if the property needs ventilation :return: Nested list of input measures """ @@ -16,9 +20,20 @@ def prepare_input_measures(property_recommendations, goal): if not goal_key: raise NotImplementedError("Not implemented this gain type - investigate me") + # We ony ever have one ventilation measure with now + ventilation_recommendation = next( + (measure[0] for measure in property_recommendations if measure[0]["type"] == "mechanical_ventilation"), + {} + ) + input_measures = [] for recs in property_recommendations: + if needs_ventilation and recs[0]["type"] == "mechanical_ventilation": + # If we house needs ventilation, ventilation will be packaged with the fabric measure so + # we don't need to optimise it independently + continue + if recs[0]["type"] == "solar_pv": # if the recommendation is a solar recommendation with a battery, we exclude it from the optimisation. recs = [r for r in recs if ~r["has_battery"]] @@ -27,16 +42,36 @@ def prepare_input_measures(property_recommendations, goal): if not recs_to_append: continue - input_measures.append( - [ + to_append = [] + for rec in recs: + # We bundle the impact of ventilation with the measure + total = ( + rec["total"] + ventilation_recommendation["total"] + if rec["type"] in assumptions.measures_needing_ventilation + else rec["total"] + ) + gain = ( + rec[goal_key] + ventilation_recommendation[goal_key] + if rec["type"] in assumptions.measures_needing_ventilation + else rec[goal_key] + ) + + rec_type = ( + "+".join( + [rec["type"], ventilation_recommendation["type"]] + ) if rec["type"] in assumptions.measures_needing_ventilation + else rec["type"] + ) + + to_append.append( { "id": rec["recommendation_id"], - "cost": rec["total"], - "gain": rec[goal_key], - "type": rec["type"] + "cost": total, + "gain": gain, + "type": rec_type } - for rec in recs if rec["energy_cost_savings"] >= 0 - ] - ) + ) + + input_measures.append(to_append) return input_measures