diff --git a/.idea/Model.iml b/.idea/Model.iml index c6561970..09f2e496 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index 50cad4ca..fb10c6b0 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/asset_list/app.py b/asset_list/app.py index 13a6a025..4c15b71d 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -69,24 +69,24 @@ def app(): Property UPRN """ - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/West Kent" - data_filename = "West Kent Asset List.xlsx" + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lifespace Rentals" + data_filename = "For Modelling.xlsx" sheet_name = "Sheet1" - postcode_column = "POSTCODE" - address1_column = None - address1_method = "house_number_extraction" - fulladdress_column = "ADDRESS" + postcode_column = "Postcode" + address1_column = "address1" + address1_method = None + fulladdress_column = "full_address" address_cols_to_concat = [] missing_postcodes_method = None landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "PROPERTY TYPE" + landlord_os_uprn = "UPRN" + landlord_property_type = None landlord_built_form = None - landlord_wall_construction = "wall combined" - landlord_roof_construction = "HEATING SYSTEM" + landlord_wall_construction = None + landlord_roof_construction = None landlord_heating_system = None landlord_existing_pv = None - landlord_property_id = "UPRN" + landlord_property_id = "Reference" landlord_sap = None outcomes_filename = None outcomes_sheetname = None @@ -116,7 +116,7 @@ def app(): address_cols_to_concat = None missing_postcodes_method = None landlord_year_built = None - landlord_os_uprn = None + landlord_os_uprn = "UPRN" landlord_property_type = None landlord_built_form = None landlord_wall_construction = None diff --git a/backend/Property.py b/backend/Property.py index c0ac4fe8..5e9e5e84 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -1509,3 +1509,11 @@ class Property: """ lodgement_date = self.data["lodgement-date"] return (datetime.now() - pd.to_datetime(lodgement_date)) > timedelta(days=3650) + + @property + def epc_is_estimated(self) -> bool: + """ + This property indicates that the EPC is estimated, based on the presence of the "estimated" flag in the data + :return: boolean indicating whether the EPC is estimated + """ + return self.data.get("estimated", False) diff --git a/backend/addresses/Address.py b/backend/addresses/Address.py index 9b95f5e0..f348b141 100644 --- a/backend/addresses/Address.py +++ b/backend/addresses/Address.py @@ -1,36 +1,52 @@ from dataclasses import dataclass -from typing import Optional +import datatypes.epc as epc_datatypes +from typing import Optional, Union @dataclass(slots=True) class Address: + # address: Optional[str] + # full_address: Optional[str] + # property_type: Optional[str] + # built_form: Optional[str] + # estimated: bool + + # New fields uprn: Optional[int] landlord_property_id: Optional[str] - address: Optional[str] - full_address: Optional[str] + address_1: str + address_2: Optional[str] + address_3: Optional[str] + full_address: str postcode: str - property_type: Optional[str] - built_form: Optional[str] - estimated: bool + landlord_total_floor_area_m2: Union[float, None] + # Property components + landlord_property_type: Optional[epc_datatypes.property_type_built_form.PropertyType] + landlord_built_form: Optional[epc_datatypes.property_type_built_form.BuiltForm] + landlord_wall_construction: Optional[epc_datatypes.walls.EpcWallDescriptions] + landlord_roof_construction: Optional[epc_datatypes.roof.EpcRoofDescriptions] + landlord_floor_construction: Optional[epc_datatypes.floor.EpcFloorDescriptions] + landlord_windows_type: Optional[epc_datatypes.windows.EpcWindowDescriptions] + landlord_heating_system: Optional[epc_datatypes.main_heating.EpcHeatingSystems] + landlord_fuel_type: Optional[epc_datatypes.fuel.EpcFuel] + landlord_heating_controls: Optional[epc_datatypes.heating_controls.EpcHeatingControls] + landlord_hot_water_system: Optional[epc_datatypes.hotwater.EpcHotWaterSystems] + # Efficiency + landlord_wall_efficiency: Optional[epc_datatypes.efficiency.EpcEfficiency] + landlord_roof_efficiency: Optional[epc_datatypes.efficiency.EpcEfficiency] + landlord_windows_efficiency: Optional[epc_datatypes.efficiency.EpcEfficiency] + landlord_heating_efficiency: Optional[epc_datatypes.efficiency.EpcEfficiency] + landlord_heating_controls_efficiency: Optional[epc_datatypes.efficiency.EpcEfficiency] + landlord_hot_water_efficiency: Optional[epc_datatypes.efficiency.EpcEfficiency] + + # Additionals + landlord_has_sloping_ceiling: Optional[bool] + landlord_multi_glaze_proportion: Optional[float] + landlord_construction_age_band: Optional[epc_datatypes.construction_age_band.EpcConstructionAgeBand] # Additional address data, associated to a standardised asset list - domna_full_address: Optional[str] - domna_address_1: Optional[str] - landlord_heating_system: Optional[str] = None - solar_reason: Optional[str] = None - cavity_reason: Optional[str] = None - - @property - def address1(self): - - if self.domna_address_1 is not None: - address1 = self.domna_address_1 - else: - address1 = self.address - - # Format - address1 = str(int(address1)) if isinstance(address1, float) else str(address1) - return address1 + # domna_full_address: Optional[str] + # domna_address_1: Optional[str] @property def request_data(self) -> dict[str, Optional[str]]: @@ -41,27 +57,9 @@ class Address: "uprn": self.uprn, "landlord_property_id": self.landlord_property_id, "postcode": self.postcode, - "address1": self.address1, + "address1": self.address_1, "full_address": self.full_address, } # Drop nulls return {k: v for k, v in data.items() if v is not None} - - @property - def heating_system(self): - """ - Helper function to extract a heating system, which can be used to estimate EPC. This is a very limited, - placeholder function to cover some initial immediate cases. - :return: - """ - - ll_heating = self.landlord_property_id - if not ll_heating: - return None - - if ll_heating == "electric storage heaters": - # Return with the same format at the EPC - return "Electric storage heaters" - - return None diff --git a/backend/addresses/Addresses.py b/backend/addresses/Addresses.py index e81fef50..41f47d28 100644 --- a/backend/addresses/Addresses.py +++ b/backend/addresses/Addresses.py @@ -1,5 +1,7 @@ +import warnings from typing import Iterator from backend.addresses.Address import Address +from datatypes.epc.property_type_built_form import PropertyType class Addresses: @@ -19,8 +21,19 @@ class Addresses: @classmethod def from_plan_input(cls, plan_input: list[dict], body) -> "Addresses": addresses = [] + if body.file_format == "ara_property_list": + row_parser = cls.parse_ara_row + else: + warnings.warn( + "_parse_row_deprecated is deprecated and will be removed in a future version. " + "Use the parse_ara_row method instead", + DeprecationWarning, + stacklevel=2 + ) + row_parser = cls._parse_row_deprecated + for row in plan_input: - addresses.append(cls._parse_row(row, body)) + addresses.append(row_parser(row, body)) return cls(addresses) def get_uprns(self): @@ -35,13 +48,64 @@ class Addresses: def get_postcodes_for_flats(self): # Method to extract all of the postcodes associated to a flat, which is used for remote assessments # on flats - return [x.postcode for x in self._addresses if x.property_type in ["Flat", "flat"]] + return [x.postcode for x in self._addresses if x.landlord_property_type in [PropertyType.flat.value]] def get_property_requests(self): return [x.request_data for x in self._addresses] @staticmethod - def _parse_row(row: dict, body) -> Address: + def parse_ara_row(row: dict, body) -> Address: + """ + Method to parse a row from the ARA property list format, which is a more standardised format that we are + moving towards. + :param row: A dictionary representing a row from the ARA property list, which should have keys corresponding + to the Address dataclass fields. The method will attempt to parse these fields and create an Address object. + :param body: The PlanTriggerRequest body, which may contain additional information about the file format and + other details that could be relevant for parsing. + :return: An Address object created from the parsed row data. + """ + return Address( + uprn=int(row["uprn"]), + landlord_property_id=str(row["landlord_property_id"]) if row.get("landlord_property_id") else None, + address_1=row["address_1"], + address_2=row.get("address_2"), + address_3=row.get("address_3"), + full_address=row["full_address"], + postcode=str(row["postcode"]), + landlord_total_floor_area_m2=float(row["landlord_total_floor_area_m2"]) if row.get( + "landlord_total_floor_area_m2") else None, + landlord_property_type=row.get("landlord_property_type"), + landlord_built_form=row.get("landlord_built_form"), + landlord_wall_construction=row.get("landlord_wall_construction"), + landlord_roof_construction=row.get("landlord_roof_construction"), + landlord_floor_construction=row.get("landlord_floor_construction"), + landlord_windows_type=row.get("landlord_windows_type"), + landlord_heating_system=row.get("landlord_heating_system"), + landlord_fuel_type=row.get("landlord_fuel_type"), + landlord_heating_controls=row.get("landlord_heating_controls"), + landlord_hot_water_system=row.get("landlord_hot_water_system"), + landlord_wall_efficiency=row.get("landlord_wall_efficiency"), + landlord_roof_efficiency=row.get("landlord_roof_efficiency"), + landlord_windows_efficiency=row.get("landlord_windows_efficiency"), + landlord_heating_efficiency=row.get("landlord_heating_efficiency"), + landlord_heating_controls_efficiency=row.get("landlord_heating_controls_efficiency"), + landlord_hot_water_efficiency=row.get("landlord_hot_water_efficiency"), + landlord_has_sloping_ceiling=bool(row.get("landlord_has_sloping_ceiling")) if row.get( + "landlord_has_sloping_ceiling") is not None else None, + landlord_multi_glaze_proportion=float(row["landlord_multi_glaze_proportion"]) if row.get( + "landlord_multi_glaze_proportion") else None, + landlord_construction_age_band=row.get("landlord_construction_age_band"), + ) + + @staticmethod + def _parse_row_deprecated(row: dict, body) -> Address: + """ + Is a method to be deprecated in favour of using the new array property list format + :param row: + :param body: + :return: + """ + def clean_uprn(v): try: return int(float(v)) @@ -68,14 +132,32 @@ class Addresses: uprn=uprn, landlord_property_id=str(row["landlord_property_id"]) if row.get("landlord_property_id") else None, - address=str(address).strip() if address else None, + address_1=str(address).strip() if address else None, full_address=str(full_address).strip() if full_address else None, postcode=postcode, - property_type=row.get("property_type"), - built_form=row.get("built_form"), - estimated=bool(row.get("estimated", False)), - domna_full_address=row.get("domna_full_address"), - domna_address_1=row.get("domna_address_1"), + landlord_property_type=row.get("property_type"), + landlord_built_form=row.get("built_form"), + # estimated=bool(row.get("estimated", False)), + address_2=None, + address_3=None, + landlord_total_floor_area_m2=None, + landlord_wall_construction=None, + landlord_roof_construction=None, + landlord_floor_construction=None, + landlord_windows_type=None, + landlord_heating_system=None, + landlord_fuel_type=None, + landlord_heating_controls=None, + landlord_hot_water_system=None, + landlord_wall_efficiency=None, + landlord_roof_efficiency=None, + landlord_windows_efficiency=None, + landlord_heating_efficiency=None, + landlord_heating_controls_efficiency=None, + landlord_hot_water_efficiency=None, + landlord_has_sloping_ceiling=None, + landlord_multi_glaze_proportion=None, + landlord_construction_age_band=None, ) # def _build_identity_index(self) -> dict: diff --git a/backend/app/db/functions/epc_functions.py b/backend/app/db/functions/epc_functions.py index defc24c9..1dcb92fe 100644 --- a/backend/app/db/functions/epc_functions.py +++ b/backend/app/db/functions/epc_functions.py @@ -11,7 +11,7 @@ class EpcStoreService: Service layer for EPC data lookup and persistence. """ - FRESHNESS_DAYS = 30 + FRESHNESS_DAYS = 180 # Upgraded to 180 days # status labels FRESH = "fresh" diff --git a/backend/app/db/functions/property_functions.py b/backend/app/db/functions/property_functions.py index 99cc8ed7..0710ad09 100644 --- a/backend/app/db/functions/property_functions.py +++ b/backend/app/db/functions/property_functions.py @@ -15,8 +15,9 @@ from backend.app.db.models.portfolio import ( ) -def create_property(session: Session, portfolio_id: int, address: str, postcode: str, uprn: str, - energy_assessment: dict, landlord_property_id: str | None = None) -> (int, bool): +def create_property( + session: Session, portfolio_id: int, address: str, postcode: str, uprn: str, + energy_assessment: dict, landlord_property_id: str | None = None) -> (int, bool): """ This function will create a record for the property in the database if it does not exist. If it does exist, it will just update the updated_at field. @@ -252,7 +253,7 @@ def bulk_create_properties( rows.append( { - "address": addr.address1, + "address": addr.address_1, "postcode": addr.postcode, "portfolio_id": body.portfolio_id, "uprn": addr.uprn, diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py index 7c352eba..afea49e7 100644 --- a/backend/app/plan/schemas.py +++ b/backend/app/plan/schemas.py @@ -126,7 +126,7 @@ class PlanTriggerRequest(BaseModel): # Add in optional fields which describe the format of the asset list being used file_type: Optional[Literal["csv", "xlsx"]] = None - file_format: Optional[Literal["domna_asset_list"]] = None + file_format: Optional[Literal["domna_asset_list", "ara_property_list"]] = None sheet_name: Optional[str] = None sheet_count: Optional[int] = None # If one of index_start or index_end is set, the other must be set too diff --git a/backend/engine/engine.py b/backend/engine/engine.py index f86310cf..d808e2a5 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -543,6 +543,10 @@ def keep_max_sap_per_measure_type(items): async def model_engine(body: PlanTriggerRequest): logger.info("Model Engine triggered with body: %s", json.loads(body.model_dump_json())) + if body.subtask_id: + SubTaskInterface().update_subtask_status( + subtask_id=UUID(body.subtask_id), status="in progress", cloud_logs_url=None + ) created_at = datetime.now().isoformat() start_ms = int(time.time() * 1000) @@ -647,6 +651,15 @@ async def model_engine(body: PlanTriggerRequest): if body.index_start is not None and body.index_end is not None: plan_input = plan_input[body.index_start:body.index_end] + # TODO: New onboarding process + if body.file_format == "ara_property_list": + plan_input = pd.read_excel( + "/Users/khalimconn-kowlessar/Downloads/2025_11_11 - Peabody - Data Extracts for Domna_transformed (" + "2).xlsx", + sheet_name="Input Sample" + ) + plan_input = plan_input.to_dict('records') + # Confirm no duplicate UPRNS check_duplicate_uprns(plan_input) @@ -747,24 +760,25 @@ async def model_engine(body: PlanTriggerRequest): property_already_installed = list(already_installed_by_uprn[addr.uprn]) epc_searcher = SearchEpc( - address1=addr.address1, + address1=addr.address_1, postcode=addr.postcode, uprn=addr.uprn, auth_token=get_settings().EPC_AUTH_TOKEN, os_api_key="", full_address=addr.full_address, - heating_system=addr.heating_system, + heating_system=addr.landlord_heating_system, associated_uprns=associated_uprns ) - epc_searcher.ordnance_survey_client.built_form = addr.built_form - epc_searcher.ordnance_survey_client.property_type = addr.property_type + epc_searcher.ordnance_survey_client.built_form = addr.landlord_built_form + epc_searcher.ordnance_survey_client.property_type = addr.landlord_property_type # For the moment, our OS API access is unavailable, so we skip and interpolate epc_searcher.find_property(skip_os=True, api_data=epc_api_data, overwrite_sap05=True) epc_searcher.set_uprn_source(file_format=body.file_format) lookup_key = ( - ("uprn", addr.uprn) if addr.uprn is not None else ("landlord_property_id", addr.landlord_property_id) + ("uprn", addr.uprn) if addr.uprn is not None + else ("landlord_property_id", addr.landlord_property_id) ) property_id = property_lookup[lookup_key] @@ -804,7 +818,7 @@ async def model_engine(body: PlanTriggerRequest): epc_page=epc_page, rrn=rrn, cleaned_address=epc_searcher.address_clean, - config_address=addr.address, + config_address=addr.address_1, address_postal_town=epc_searcher.address_postal_town ) ) @@ -817,14 +831,6 @@ async def model_engine(body: PlanTriggerRequest): # factor this into EPCRecord as part of the cleaning however we need some more testing prepared_epc = averages_cleaning(prepared_epc, cleaning_data) - # If we have an ECO project, we parse the cavity/solar reasons - eco_packages[property_id] = parse_eco_packages(addr, prepared_epc) - - # Final step - extract inspections data, if we have it - we inject into property for usage - property_inspections = db_funcs.inspections_functions.extract_inspection_data(config) - if property_inspections: - inspections_map[property_id] = property_inspections - input_properties.append( Property( id=property_id, @@ -833,7 +839,7 @@ async def model_engine(body: PlanTriggerRequest): address=epc_searcher.address_clean, postcode=epc_searcher.postcode_clean, epc_record=prepared_epc, - already_installed=property_already_installed + eco_packages.get(property_id)[3], + already_installed=property_already_installed, find_my_epc_components=find_my_epc_components, property_valuation=req_data.valuation, non_invasive_recommendations=property_non_invasive_recommendations, @@ -885,13 +891,125 @@ async def model_engine(body: PlanTriggerRequest): model_prefies=model_api.KWH_MODEL_PREFIXES + model_api.MODEL_PREFIXES ) - # The materials data could be cached or local so we don't need to make - # consistent requests to the backend for the same data logger.info("Reading in materials and cleaned datasets") with db_read_session() as session: materials = db_funcs.materials_functions.get_materials(session) cleaned = get_cleaned() + # Rebaselining + # TODO: MUST happen before setting features + rebaselining_scoring_data = [] + for p in tqdm(input_properties): + # 1) EPC expired + # 2) Missing EPC + # 3) Materially different information from landlord vs EPC + # make the landlord remapping dictionary + addr = [a for a in addresses if a.uprn == p.uprn][0] + landlord_remapping = { + "total-floor-area": addr.landlord_total_floor_area_m2, # 1m tolerance on floor area to perform remap + "property-type": addr.landlord_property_type, + "built-form": addr.landlord_built_form, + # Components + "walls-description": addr.landlord_wall_construction, + "roof-description": addr.landlord_roof_construction, + "floor-description": addr.landlord_floor_construction, + "windows-description": addr.landlord_windows_type, + "main-fuel": addr.landlord_fuel_type, + "mainheat-description": addr.landlord_heating_system, + "mainheatcont-description": addr.landlord_heating_controls, + "hotwater-description": addr.landlord_hot_water_system, + # Efficiency + "walls-energy-eff": addr.landlord_wall_efficiency, + "roof-energy-eff": addr.landlord_roof_efficiency, + "windows-energy-eff": addr.landlord_windows_efficiency, + "mainheat-energy-eff": addr.landlord_heating_efficiency, + "mainheatc-energy-eff": addr.landlord_heating_controls_efficiency, + "hot-water-energy-eff": addr.landlord_hot_water_efficiency, + "multi-glaze-proportion": addr.landlord_multi_glaze_proportion * 100, # TODO: Fix this! + "construction-age-band": addr.landlord_construction_age_band, + } + # Find differences between EPC and landlord data + differences = {} + for k, v in landlord_remapping.items(): + if k == "total-floor-area": + if abs(p.data[k] - v) > 1: # 1m tolerance + differences[k] = v + else: + if v != p.data[k] and (not pd.isnull(v)) and (not pd.isnull(p.data[k])): + differences[k] = v + + needs_rebaselining = p.epc_is_expired | p.epc_is_estimated | len(differences) > 0 + + # Need to adjust p.data and p.epc_record.df? + if needs_rebaselining: + if len(differences): + p.data.update(differences) + differences_underscored = {k.replace("-", "_"): v for k, v in differences.items()} + # Insert + for k, v in differences_underscored.items(): + if not hasattr(p.epc_record, k) and k not in ["property_type", "built_form"]: + # Sanity check - while we're implementing + raise ValueError("Property does not have an EPC record to update with differences") + # Hack but these aren't in the data class + if k not in ["property_type", "built_form"]: + setattr(p.epc_record, k, v) + p.epc_record.prepared_epc[k] = v + + p.create_base_difference_epc_record(cleaned_lookup=cleaned) + scoring_data = p.base_difference_record.df.copy() + rebaselining_scoring_data.append(scoring_data) + + rebaselining_scoring_data = pd.concat(rebaselining_scoring_data) + + # Trigger re-scoring + rebaselining_scoring_data["is_post_sap10_starting"] = True + # Score model - SAP re-baselining model + model_api.MODEL_URLS["retrofit-sap-baseline-predictions"] = "sapbaselinemodel" + model_api.prediction_buckets["retrofit-sap-baseline-predictions"] = "retrofit-sap-baseline-predictions-dev" + rebaselining_response = model_api.predict_all( + df=rebaselining_scoring_data, + bucket=get_settings().DATA_BUCKET, + model_prefixes=["retrofit-sap-baseline-predictions"], + extract_ids=False, + extract_uprn=True + ) + + for idx, rebaselined_prediction in rebaselining_response["retrofit-sap-baseline-predictions"].iterrows(): + property_instance = next(p for p in input_properties if p.uprn == int(rebaselined_prediction["uprn"])) + new_rating = rebaselined_prediction["predictions"] + new_epc_rating = sap_to_epc(new_rating) + # Insert + + # property_instance.data["current-energy-efficiency"] = sap_to_epc(new_rating) + + addr = [a for a in addresses if a.uprn == property_instance.uprn][0] + landlord_remapping = { + "total-floor-area": addr.landlord_total_floor_area_m2, # 1m tolerance on floor area to perform remap + "property-type": addr.landlord_property_type, + "built-form": addr.landlord_built_form, + # Components + "walls-description": addr.landlord_wall_construction, + "roof-description": addr.landlord_roof_construction, + "floor-description": addr.landlord_floor_construction, + "windows-description": addr.landlord_windows_type, + "main-fuel": addr.landlord_fuel_type, + "mainheatcont-description": addr.landlord_heating_controls, + "hotwater-description": addr.landlord_hot_water_system, + # Efficiency + "walls-energy-eff": addr.landlord_wall_efficiency, + "roof-energy-eff": addr.landlord_roof_efficiency, + "windows-energy-eff": addr.landlord_windows_efficiency, + "mainheat-energy-eff": addr.landlord_heating_efficiency, + "mainheatc-energy-eff": addr.landlord_heating_controls_efficiency, + "hot-water-energy-eff": addr.landlord_hot_water_efficiency, + "multi-glaze-proportion": addr.landlord_multi_glaze_proportion * 100, # TODO: Fix this! + "construction-age-band": addr.landlord_construction_age_band, + } + + # Insert the re-baselined scores into the property data + for p in input_properties: + property_rebaselined_sap = rebaselining_response["retrofit-sap-baseline-predictions"] + kwh_client = KwhData(bucket=get_settings().DATA_BUCKET, read_consumption_data=True) epcs_for_scoring = kwh_client.transform(data=kwh_client.prepare_epc(input_properties), cleaned=cleaned) diff --git a/backend/ml_models/api.py b/backend/ml_models/api.py index 440367b2..d3a83e01 100644 --- a/backend/ml_models/api.py +++ b/backend/ml_models/api.py @@ -1,8 +1,7 @@ -import json -import random import aiohttp import asyncio import pandas as pd +from typing import List from tqdm import tqdm import requests from requests.exceptions import RequestException @@ -147,7 +146,13 @@ class ModelApi: else: return None - def predict_all(self, df, bucket, model_prefixes=None, extract_ids=True) -> dict: + def predict_all( + self, df: pd.DataFrame, + bucket: str, + model_prefixes: List[str] | None = None, + extract_ids: bool = True, + extract_uprn: bool = False + ) -> dict: """ For each model prefix, this method will upload the scoring data to s3 and then make a request to the @@ -159,6 +164,8 @@ class ModelApi: :param model_prefixes: List of model prefixes to generate predictions for. If None, all model prefixes will be used :param extract_ids: Boolean to determine if the property_id and recommendation_id should be extracted from the + scoring data + :param extract_uprn: Boolean to determine if the uprn should be extracted from the scoring data id column :return: """ @@ -196,6 +203,9 @@ class ModelApi: # Convert back to int predictions_df['phase'] = predictions_df['recommendation_id'].apply(self.extract_phase) + if extract_uprn and "uprn" in df.columns: + predictions_df["uprn"] = df["uprn"].values + predictions[model_prefix] = predictions_df return predictions diff --git a/backend/onboarders/base.py b/backend/onboarders/base.py index 03cb2370..04ac9203 100644 --- a/backend/onboarders/base.py +++ b/backend/onboarders/base.py @@ -13,7 +13,7 @@ class OnboarderBase: landlord_roof_construction: str = "landlord_roof_construction" landlord_floor_construction: str = "landlord_floor_construction" landlord_windows_type: str = "landlord_windows_type" - landlord_heating_construction: str = "landlord_heating_construction" + landlord_heating_system: str = "landlord_heating_system" landlord_fuel_type: str = "landlord_fuel_type" landlord_heating_controls: str = "landlord_heating_controls" landlord_hot_water_system: str = "landlord_hot_water_system" @@ -53,7 +53,7 @@ class OnboarderBase: ) else: self.data = read_from_s3(bucket_name=self.bucket_name, s3_file_name=self.input_file_name) - + def write(self): if self.data is None: raise ValueError("No data to write. Please run transform() before writing.") diff --git a/backend/onboarders/mappings/parity/age_band.py b/backend/onboarders/mappings/parity/age_band.py index 406d39c1..02dfec00 100644 --- a/backend/onboarders/mappings/parity/age_band.py +++ b/backend/onboarders/mappings/parity/age_band.py @@ -12,8 +12,8 @@ parity_map = { "1996-2002": EpcConstructionAgeBand.from_1996_to_2002, "2003-2006": EpcConstructionAgeBand.from_2003_to_2006, "2007-2011": EpcConstructionAgeBand.from_2007_to_2011, - "2012 onwards": EpcConstructionAgeBand.from_2012_onwards, # Newer age bands, under SAP10 + "2012 onwards": EpcConstructionAgeBand.from_2012_to_2022, "2012-2022": EpcConstructionAgeBand.from_2012_to_2022, "2023 onwards": EpcConstructionAgeBand.from_2023_onwards, } diff --git a/backend/onboarders/mappings/parity/glazing.py b/backend/onboarders/mappings/parity/glazing.py index 46c006bd..fffb8de5 100644 --- a/backend/onboarders/mappings/parity/glazing.py +++ b/backend/onboarders/mappings/parity/glazing.py @@ -1,20 +1,23 @@ from datatypes.epc.efficiency import EpcEfficiency +from datatypes.epc.windows import EpcWindowDescriptions glazing_map = { # (description, energy efficiency, multi_glaze_proportion, glazed_type, glazed_area # For SAP 10 assessments, The glazed type and glazed area are not populated in the EPC API data any more - "Double 2002 or later": ("Fully double glazed", EpcEfficiency.AVERAGE, 1, None, None), - "Double before 2002": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None), - "Double but age unknown": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None), - "Single": ("Single glazed", EpcEfficiency.VERY_POOR, 0, None, None), + "Double 2002 or later": (EpcWindowDescriptions.fully_double_glazed, EpcEfficiency.AVERAGE, 1, None, None), + "Double before 2002": (EpcWindowDescriptions.fully_double_glazed, EpcEfficiency.POOR, 1, None, None), + "Double but age unknown": (EpcWindowDescriptions.fully_double_glazed, EpcEfficiency.POOR, 1, None, None), + "Single": (EpcWindowDescriptions.single_glazed, EpcEfficiency.VERY_POOR, 0, None, None), # For triple glazing, with age unknown, the performance is only average, whereas if it's a post 2022 # installation, it's classed as high performance glazing with good efficiency. We'll need to be considerate as to # how we make updates to the windows data. # Triple known data is high performance glazing with Good efficiency (at least) - "Triple": ("Fully triple glazed", EpcEfficiency.AVERAGE, 1, None, None), + "Triple": (EpcWindowDescriptions.fully_triple_glazed, EpcEfficiency.AVERAGE, 1, None, None), # This is also classed as high performance glazing - "DoubleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None), + "DoubleKnownData": ( + EpcWindowDescriptions.fully_double_glazed.high_performance_glazing, EpcEfficiency.GOOD, 1, None, None + ), # Under SAP 10, secondary glazing is classed as poor efficiency (whereas under SAP 2012 it was generally good) - "Secondary": ("Full secondary glazing", EpcEfficiency.POOR, 1, None, None), - "TripleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None), + "Secondary": (EpcWindowDescriptions.full_secondary_glazing, EpcEfficiency.POOR, 1, None, None), + "TripleKnownData": (EpcWindowDescriptions.high_performance_glazing, EpcEfficiency.GOOD, 1, None, None), } diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py index 6c79d027..5c180ad3 100644 --- a/backend/onboarders/parity.py +++ b/backend/onboarders/parity.py @@ -262,7 +262,7 @@ class ParityOnboarder(OnboarderBase): # controls. E.g. it may be programmer and room thermostat self.data[ [ - self.landlord_heating_construction, + self.landlord_heating_system, self.landlord_heating_efficiency, self.landlord_fuel_type, self.landlord_heating_controls, @@ -309,7 +309,7 @@ class ParityOnboarder(OnboarderBase): self.landlord_multi_glaze_proportion, self.landlord_glazed_type, self.landlord_glazed_area, - self.landlord_heating_construction, + self.landlord_heating_system, self.landlord_heating_efficiency, self.landlord_fuel_type, self.landlord_heating_controls, @@ -332,7 +332,7 @@ class ParityOnboarder(OnboarderBase): self.landlord_construction_age_band, self.landlord_property_type, self.landlord_built_form, self.landlord_wall_construction, self.landlord_wall_efficiency, self.landlord_roof_construction, self.landlord_roof_efficiency, self.landlord_floor_construction, self.landlord_windows_type, - self.landlord_windows_efficiency, self.landlord_heating_construction, self.landlord_heating_efficiency, + self.landlord_windows_efficiency, self.landlord_heating_system, self.landlord_heating_efficiency, self.landlord_fuel_type, self.landlord_heating_controls, self.landlord_heating_controls_efficiency, self.landlord_hot_water_system, self.landlord_hot_water_efficiency ]: diff --git a/datatypes/epc/__init__.py b/datatypes/epc/__init__.py index e69de29b..d997816a 100644 --- a/datatypes/epc/__init__.py +++ b/datatypes/epc/__init__.py @@ -0,0 +1,26 @@ +from .construction_age_band import EpcConstructionAgeBand +from .efficiency import EpcEfficiency +from .floor import EpcFloorDescriptions +from .fuel import EpcFuel +from .heating_controls import EpcHeatingControls +from .hotwater import EpcHotWaterSystems +from .main_heating import EpcHeatingSystems +from .property_type_built_form import PropertyType, BuiltForm +from .roof import EpcRoofDescriptions +from .walls import EpcWallDescriptions +from .windows import EpcWindowDescriptions + +__all__ = [ + "EpcConstructionAgeBand", + "EpcEfficiency", + "EpcFloorDescriptions", + "EpcFuel", + "EpcHeatingControls", + "EpcHotWaterSystems", + "EpcHeatingSystems", + "PropertyType", + "BuiltForm", + "EpcRoofDescriptions", + "EpcWallDescriptions", + "EpcWindowDescriptions", +] diff --git a/datatypes/epc/construction_age_band.py b/datatypes/epc/construction_age_band.py index c5e7a03b..12d98988 100644 --- a/datatypes/epc/construction_age_band.py +++ b/datatypes/epc/construction_age_band.py @@ -15,7 +15,7 @@ class EpcConstructionAgeBand(Enum): from_1996_to_2002: str = 'England and Wales: 1996-2002' from_2003_to_2006: str = 'England and Wales: 2003-2006' from_2007_to_2011: str = 'England and Wales: 2007-2011' - from_2012_onwards: str = 'England and Wales: 2012-onwards' + from_2012_onwards: str = 'England and Wales: 2012 onwards' from_2012_to_2022: str = 'England and Wales: 2012-2022' from_2023_onwards: str = 'England and Wales: 2023 onwards' diff --git a/datatypes/epc/windows.py b/datatypes/epc/windows.py new file mode 100644 index 00000000..3a8cde52 --- /dev/null +++ b/datatypes/epc/windows.py @@ -0,0 +1,9 @@ +from enum import Enum + + +class EpcWindowDescriptions(Enum): + fully_double_glazed: str = "Fully double glazed" + single_glazed: str = "Single glazed" + fully_triple_glazed: str = "Fully triple glazed" + high_performance_glazing: str = "High performance glazing" + full_secondary_glazing: str = "Full secondary glazing" diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py index 7c27de51..5d1fcaa0 100644 --- a/etl/epc/Dataset.py +++ b/etl/epc/Dataset.py @@ -212,11 +212,11 @@ class TrainingDataset(BaseDataset): common_cols = [[col + "_starting", col + "_ending"] for col in common_cols] self.df = self.df.loc[ - :, - no_suffix_cols - + only_ending_cols - + [col for cols in common_cols for col in cols], - ] + :, + no_suffix_cols + + only_ending_cols + + [col for cols in common_cols for col in cols], + ] def _remove_abnormal_change_in_floor_area(self): """ diff --git a/recommendations/rdsap_tables.py b/recommendations/rdsap_tables.py index 46e7d083..558b0da4 100644 --- a/recommendations/rdsap_tables.py +++ b/recommendations/rdsap_tables.py @@ -105,6 +105,13 @@ age_band_data = [ "Northern_Ireland": "2023 onwards", "Park_home_UK": None, }, + { + "age_band": "L", + "England_Wales": "2012-2022", + "Scotland": "2012 - 2023", + "Northern_Ireland": "2014 -2022", + "Park_home_UK": None, + } ] england_wales_age_band_lookup = { @@ -779,13 +786,13 @@ epc_wall_description_map = { "Sandstone or limestone, as built, no insulation": "Stone: sandstone or limestone as built", "Sandstone or limestone, with internal insulation": "Stone/solid brick with 100 mm external or internal insulation", "Sandstone or limestone, as built, partial insulation": "Stone/solid brick with 50 mm external or internal " - "insulation", + "insulation", "Sandstone, as built, no insulation": "Stone: sandstone or limestone as built", "Sandstone or limestone, as built, insulated": "Stone/solid brick with 100 mm external or internal insulation", "Sandstone, as built, insulated": "Stone/solid brick with 100 mm external or internal insulation", "Sandstone, with internal insulation": "Stone/solid brick with 100 mm external or internal insulation", "Sandstone or limestone, with external insulation": "Stone/solid brick with 100 mm external or internal " - "insulation", + "insulation", "Sandstone, with external insulation": "Stone/solid brick with 100 mm external or internal insulation", "Sandstone, as built, partial insulation": "Stone/solid brick with 50 mm external or internal insulation", ############################ @@ -794,7 +801,8 @@ epc_wall_description_map = { "Granite or whinstone, as built, no insulation": "Stone: granite or whinstone as built", "Granite or whinstone, with internal insulation": "Stone/solid brick with 100 mm external or internal insulation", "Granite or whin, with internal insulation": "Stone/solid brick with 100 mm external or internal insulation", - "Granite or whinstone, as built, partial insulation": "Stone/solid brick with 50 mm external or internal insulation", + "Granite or whinstone, as built, partial insulation": "Stone/solid brick with 50 mm external or internal " + "insulation", "Granite or whinstone, as built, insulated": "Stone/solid brick with 100 mm external or internal insulation", "Granite or whin, as built, insulated": "Stone/solid brick with 100 mm external or internal insulation", "Granite or whinstone, with external insulation": "Stone/solid brick with 100 mm external or internal insulation",