diff --git a/.idea/Model.iml b/.idea/Model.iml
index c6561970..09f2e496 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
-
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 50cad4ca..fb10c6b0 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
-
+
diff --git a/asset_list/app.py b/asset_list/app.py
index 13a6a025..4c15b71d 100644
--- a/asset_list/app.py
+++ b/asset_list/app.py
@@ -69,24 +69,24 @@ def app():
Property UPRN
"""
- data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/West Kent"
- data_filename = "West Kent Asset List.xlsx"
+ data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lifespace Rentals"
+ data_filename = "For Modelling.xlsx"
sheet_name = "Sheet1"
- postcode_column = "POSTCODE"
- address1_column = None
- address1_method = "house_number_extraction"
- fulladdress_column = "ADDRESS"
+ postcode_column = "Postcode"
+ address1_column = "address1"
+ address1_method = None
+ fulladdress_column = "full_address"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = None
- landlord_os_uprn = None
- landlord_property_type = "PROPERTY TYPE"
+ landlord_os_uprn = "UPRN"
+ landlord_property_type = None
landlord_built_form = None
- landlord_wall_construction = "wall combined"
- landlord_roof_construction = "HEATING SYSTEM"
+ landlord_wall_construction = None
+ landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
- landlord_property_id = "UPRN"
+ landlord_property_id = "Reference"
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None
@@ -116,7 +116,7 @@ def app():
address_cols_to_concat = None
missing_postcodes_method = None
landlord_year_built = None
- landlord_os_uprn = None
+ landlord_os_uprn = "UPRN"
landlord_property_type = None
landlord_built_form = None
landlord_wall_construction = None
diff --git a/backend/Property.py b/backend/Property.py
index c0ac4fe8..5e9e5e84 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -1509,3 +1509,11 @@ class Property:
"""
lodgement_date = self.data["lodgement-date"]
return (datetime.now() - pd.to_datetime(lodgement_date)) > timedelta(days=3650)
+
+ @property
+ def epc_is_estimated(self) -> bool:
+ """
+ This property indicates that the EPC is estimated, based on the presence of the "estimated" flag in the data
+ :return: boolean indicating whether the EPC is estimated
+ """
+ return self.data.get("estimated", False)
diff --git a/backend/addresses/Address.py b/backend/addresses/Address.py
index 9b95f5e0..f348b141 100644
--- a/backend/addresses/Address.py
+++ b/backend/addresses/Address.py
@@ -1,36 +1,52 @@
from dataclasses import dataclass
-from typing import Optional
+import datatypes.epc as epc_datatypes
+from typing import Optional, Union
@dataclass(slots=True)
class Address:
+ # address: Optional[str]
+ # full_address: Optional[str]
+ # property_type: Optional[str]
+ # built_form: Optional[str]
+ # estimated: bool
+
+ # New fields
uprn: Optional[int]
landlord_property_id: Optional[str]
- address: Optional[str]
- full_address: Optional[str]
+ address_1: str
+ address_2: Optional[str]
+ address_3: Optional[str]
+ full_address: str
postcode: str
- property_type: Optional[str]
- built_form: Optional[str]
- estimated: bool
+ landlord_total_floor_area_m2: Union[float, None]
+ # Property components
+ landlord_property_type: Optional[epc_datatypes.property_type_built_form.PropertyType]
+ landlord_built_form: Optional[epc_datatypes.property_type_built_form.BuiltForm]
+ landlord_wall_construction: Optional[epc_datatypes.walls.EpcWallDescriptions]
+ landlord_roof_construction: Optional[epc_datatypes.roof.EpcRoofDescriptions]
+ landlord_floor_construction: Optional[epc_datatypes.floor.EpcFloorDescriptions]
+ landlord_windows_type: Optional[epc_datatypes.windows.EpcWindowDescriptions]
+ landlord_heating_system: Optional[epc_datatypes.main_heating.EpcHeatingSystems]
+ landlord_fuel_type: Optional[epc_datatypes.fuel.EpcFuel]
+ landlord_heating_controls: Optional[epc_datatypes.heating_controls.EpcHeatingControls]
+ landlord_hot_water_system: Optional[epc_datatypes.hotwater.EpcHotWaterSystems]
+ # Efficiency
+ landlord_wall_efficiency: Optional[epc_datatypes.efficiency.EpcEfficiency]
+ landlord_roof_efficiency: Optional[epc_datatypes.efficiency.EpcEfficiency]
+ landlord_windows_efficiency: Optional[epc_datatypes.efficiency.EpcEfficiency]
+ landlord_heating_efficiency: Optional[epc_datatypes.efficiency.EpcEfficiency]
+ landlord_heating_controls_efficiency: Optional[epc_datatypes.efficiency.EpcEfficiency]
+ landlord_hot_water_efficiency: Optional[epc_datatypes.efficiency.EpcEfficiency]
+
+ # Additionals
+ landlord_has_sloping_ceiling: Optional[bool]
+ landlord_multi_glaze_proportion: Optional[float]
+ landlord_construction_age_band: Optional[epc_datatypes.construction_age_band.EpcConstructionAgeBand]
# Additional address data, associated to a standardised asset list
- domna_full_address: Optional[str]
- domna_address_1: Optional[str]
- landlord_heating_system: Optional[str] = None
- solar_reason: Optional[str] = None
- cavity_reason: Optional[str] = None
-
- @property
- def address1(self):
-
- if self.domna_address_1 is not None:
- address1 = self.domna_address_1
- else:
- address1 = self.address
-
- # Format
- address1 = str(int(address1)) if isinstance(address1, float) else str(address1)
- return address1
+ # domna_full_address: Optional[str]
+ # domna_address_1: Optional[str]
@property
def request_data(self) -> dict[str, Optional[str]]:
@@ -41,27 +57,9 @@ class Address:
"uprn": self.uprn,
"landlord_property_id": self.landlord_property_id,
"postcode": self.postcode,
- "address1": self.address1,
+ "address1": self.address_1,
"full_address": self.full_address,
}
# Drop nulls
return {k: v for k, v in data.items() if v is not None}
-
- @property
- def heating_system(self):
- """
- Helper function to extract a heating system, which can be used to estimate EPC. This is a very limited,
- placeholder function to cover some initial immediate cases.
- :return:
- """
-
- ll_heating = self.landlord_property_id
- if not ll_heating:
- return None
-
- if ll_heating == "electric storage heaters":
- # Return with the same format at the EPC
- return "Electric storage heaters"
-
- return None
diff --git a/backend/addresses/Addresses.py b/backend/addresses/Addresses.py
index e81fef50..41f47d28 100644
--- a/backend/addresses/Addresses.py
+++ b/backend/addresses/Addresses.py
@@ -1,5 +1,7 @@
+import warnings
from typing import Iterator
from backend.addresses.Address import Address
+from datatypes.epc.property_type_built_form import PropertyType
class Addresses:
@@ -19,8 +21,19 @@ class Addresses:
@classmethod
def from_plan_input(cls, plan_input: list[dict], body) -> "Addresses":
addresses = []
+ if body.file_format == "ara_property_list":
+ row_parser = cls.parse_ara_row
+ else:
+ warnings.warn(
+ "_parse_row_deprecated is deprecated and will be removed in a future version. "
+ "Use the parse_ara_row method instead",
+ DeprecationWarning,
+ stacklevel=2
+ )
+ row_parser = cls._parse_row_deprecated
+
for row in plan_input:
- addresses.append(cls._parse_row(row, body))
+ addresses.append(row_parser(row, body))
return cls(addresses)
def get_uprns(self):
@@ -35,13 +48,64 @@ class Addresses:
def get_postcodes_for_flats(self):
# Method to extract all of the postcodes associated to a flat, which is used for remote assessments
# on flats
- return [x.postcode for x in self._addresses if x.property_type in ["Flat", "flat"]]
+ return [x.postcode for x in self._addresses if x.landlord_property_type in [PropertyType.flat.value]]
def get_property_requests(self):
return [x.request_data for x in self._addresses]
@staticmethod
- def _parse_row(row: dict, body) -> Address:
+ def parse_ara_row(row: dict, body) -> Address:
+ """
+ Method to parse a row from the ARA property list format, which is a more standardised format that we are
+ moving towards.
+ :param row: A dictionary representing a row from the ARA property list, which should have keys corresponding
+ to the Address dataclass fields. The method will attempt to parse these fields and create an Address object.
+ :param body: The PlanTriggerRequest body, which may contain additional information about the file format and
+ other details that could be relevant for parsing.
+ :return: An Address object created from the parsed row data.
+ """
+ return Address(
+ uprn=int(row["uprn"]),
+ landlord_property_id=str(row["landlord_property_id"]) if row.get("landlord_property_id") else None,
+ address_1=row["address_1"],
+ address_2=row.get("address_2"),
+ address_3=row.get("address_3"),
+ full_address=row["full_address"],
+ postcode=str(row["postcode"]),
+ landlord_total_floor_area_m2=float(row["landlord_total_floor_area_m2"]) if row.get(
+ "landlord_total_floor_area_m2") else None,
+ landlord_property_type=row.get("landlord_property_type"),
+ landlord_built_form=row.get("landlord_built_form"),
+ landlord_wall_construction=row.get("landlord_wall_construction"),
+ landlord_roof_construction=row.get("landlord_roof_construction"),
+ landlord_floor_construction=row.get("landlord_floor_construction"),
+ landlord_windows_type=row.get("landlord_windows_type"),
+ landlord_heating_system=row.get("landlord_heating_system"),
+ landlord_fuel_type=row.get("landlord_fuel_type"),
+ landlord_heating_controls=row.get("landlord_heating_controls"),
+ landlord_hot_water_system=row.get("landlord_hot_water_system"),
+ landlord_wall_efficiency=row.get("landlord_wall_efficiency"),
+ landlord_roof_efficiency=row.get("landlord_roof_efficiency"),
+ landlord_windows_efficiency=row.get("landlord_windows_efficiency"),
+ landlord_heating_efficiency=row.get("landlord_heating_efficiency"),
+ landlord_heating_controls_efficiency=row.get("landlord_heating_controls_efficiency"),
+ landlord_hot_water_efficiency=row.get("landlord_hot_water_efficiency"),
+ landlord_has_sloping_ceiling=bool(row.get("landlord_has_sloping_ceiling")) if row.get(
+ "landlord_has_sloping_ceiling") is not None else None,
+ landlord_multi_glaze_proportion=float(row["landlord_multi_glaze_proportion"]) if row.get(
+ "landlord_multi_glaze_proportion") else None,
+ landlord_construction_age_band=row.get("landlord_construction_age_band"),
+ )
+
+ @staticmethod
+ def _parse_row_deprecated(row: dict, body) -> Address:
+ """
+ Is a method to be deprecated in favour of using the new array property list format
+ :param row:
+ :param body:
+ :return:
+ """
+
def clean_uprn(v):
try:
return int(float(v))
@@ -68,14 +132,32 @@ class Addresses:
uprn=uprn,
landlord_property_id=str(row["landlord_property_id"])
if row.get("landlord_property_id") else None,
- address=str(address).strip() if address else None,
+ address_1=str(address).strip() if address else None,
full_address=str(full_address).strip() if full_address else None,
postcode=postcode,
- property_type=row.get("property_type"),
- built_form=row.get("built_form"),
- estimated=bool(row.get("estimated", False)),
- domna_full_address=row.get("domna_full_address"),
- domna_address_1=row.get("domna_address_1"),
+ landlord_property_type=row.get("property_type"),
+ landlord_built_form=row.get("built_form"),
+ # estimated=bool(row.get("estimated", False)),
+ address_2=None,
+ address_3=None,
+ landlord_total_floor_area_m2=None,
+ landlord_wall_construction=None,
+ landlord_roof_construction=None,
+ landlord_floor_construction=None,
+ landlord_windows_type=None,
+ landlord_heating_system=None,
+ landlord_fuel_type=None,
+ landlord_heating_controls=None,
+ landlord_hot_water_system=None,
+ landlord_wall_efficiency=None,
+ landlord_roof_efficiency=None,
+ landlord_windows_efficiency=None,
+ landlord_heating_efficiency=None,
+ landlord_heating_controls_efficiency=None,
+ landlord_hot_water_efficiency=None,
+ landlord_has_sloping_ceiling=None,
+ landlord_multi_glaze_proportion=None,
+ landlord_construction_age_band=None,
)
# def _build_identity_index(self) -> dict:
diff --git a/backend/app/db/functions/epc_functions.py b/backend/app/db/functions/epc_functions.py
index defc24c9..1dcb92fe 100644
--- a/backend/app/db/functions/epc_functions.py
+++ b/backend/app/db/functions/epc_functions.py
@@ -11,7 +11,7 @@ class EpcStoreService:
Service layer for EPC data lookup and persistence.
"""
- FRESHNESS_DAYS = 30
+ FRESHNESS_DAYS = 180 # Upgraded to 180 days
# status labels
FRESH = "fresh"
diff --git a/backend/app/db/functions/property_functions.py b/backend/app/db/functions/property_functions.py
index 99cc8ed7..0710ad09 100644
--- a/backend/app/db/functions/property_functions.py
+++ b/backend/app/db/functions/property_functions.py
@@ -15,8 +15,9 @@ from backend.app.db.models.portfolio import (
)
-def create_property(session: Session, portfolio_id: int, address: str, postcode: str, uprn: str,
- energy_assessment: dict, landlord_property_id: str | None = None) -> (int, bool):
+def create_property(
+ session: Session, portfolio_id: int, address: str, postcode: str, uprn: str,
+ energy_assessment: dict, landlord_property_id: str | None = None) -> (int, bool):
"""
This function will create a record for the property in the database if it does not exist.
If it does exist, it will just update the updated_at field.
@@ -252,7 +253,7 @@ def bulk_create_properties(
rows.append(
{
- "address": addr.address1,
+ "address": addr.address_1,
"postcode": addr.postcode,
"portfolio_id": body.portfolio_id,
"uprn": addr.uprn,
diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index 7c352eba..afea49e7 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -126,7 +126,7 @@ class PlanTriggerRequest(BaseModel):
# Add in optional fields which describe the format of the asset list being used
file_type: Optional[Literal["csv", "xlsx"]] = None
- file_format: Optional[Literal["domna_asset_list"]] = None
+ file_format: Optional[Literal["domna_asset_list", "ara_property_list"]] = None
sheet_name: Optional[str] = None
sheet_count: Optional[int] = None
# If one of index_start or index_end is set, the other must be set too
diff --git a/backend/engine/engine.py b/backend/engine/engine.py
index f86310cf..d808e2a5 100644
--- a/backend/engine/engine.py
+++ b/backend/engine/engine.py
@@ -543,6 +543,10 @@ def keep_max_sap_per_measure_type(items):
async def model_engine(body: PlanTriggerRequest):
logger.info("Model Engine triggered with body: %s", json.loads(body.model_dump_json()))
+ if body.subtask_id:
+ SubTaskInterface().update_subtask_status(
+ subtask_id=UUID(body.subtask_id), status="in progress", cloud_logs_url=None
+ )
created_at = datetime.now().isoformat()
start_ms = int(time.time() * 1000)
@@ -647,6 +651,15 @@ async def model_engine(body: PlanTriggerRequest):
if body.index_start is not None and body.index_end is not None:
plan_input = plan_input[body.index_start:body.index_end]
+ # TODO: New onboarding process
+ if body.file_format == "ara_property_list":
+ plan_input = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Downloads/2025_11_11 - Peabody - Data Extracts for Domna_transformed ("
+ "2).xlsx",
+ sheet_name="Input Sample"
+ )
+ plan_input = plan_input.to_dict('records')
+
# Confirm no duplicate UPRNS
check_duplicate_uprns(plan_input)
@@ -747,24 +760,25 @@ async def model_engine(body: PlanTriggerRequest):
property_already_installed = list(already_installed_by_uprn[addr.uprn])
epc_searcher = SearchEpc(
- address1=addr.address1,
+ address1=addr.address_1,
postcode=addr.postcode,
uprn=addr.uprn,
auth_token=get_settings().EPC_AUTH_TOKEN,
os_api_key="",
full_address=addr.full_address,
- heating_system=addr.heating_system,
+ heating_system=addr.landlord_heating_system,
associated_uprns=associated_uprns
)
- epc_searcher.ordnance_survey_client.built_form = addr.built_form
- epc_searcher.ordnance_survey_client.property_type = addr.property_type
+ epc_searcher.ordnance_survey_client.built_form = addr.landlord_built_form
+ epc_searcher.ordnance_survey_client.property_type = addr.landlord_property_type
# For the moment, our OS API access is unavailable, so we skip and interpolate
epc_searcher.find_property(skip_os=True, api_data=epc_api_data, overwrite_sap05=True)
epc_searcher.set_uprn_source(file_format=body.file_format)
lookup_key = (
- ("uprn", addr.uprn) if addr.uprn is not None else ("landlord_property_id", addr.landlord_property_id)
+ ("uprn", addr.uprn) if addr.uprn is not None
+ else ("landlord_property_id", addr.landlord_property_id)
)
property_id = property_lookup[lookup_key]
@@ -804,7 +818,7 @@ async def model_engine(body: PlanTriggerRequest):
epc_page=epc_page,
rrn=rrn,
cleaned_address=epc_searcher.address_clean,
- config_address=addr.address,
+ config_address=addr.address_1,
address_postal_town=epc_searcher.address_postal_town
)
)
@@ -817,14 +831,6 @@ async def model_engine(body: PlanTriggerRequest):
# factor this into EPCRecord as part of the cleaning however we need some more testing
prepared_epc = averages_cleaning(prepared_epc, cleaning_data)
- # If we have an ECO project, we parse the cavity/solar reasons
- eco_packages[property_id] = parse_eco_packages(addr, prepared_epc)
-
- # Final step - extract inspections data, if we have it - we inject into property for usage
- property_inspections = db_funcs.inspections_functions.extract_inspection_data(config)
- if property_inspections:
- inspections_map[property_id] = property_inspections
-
input_properties.append(
Property(
id=property_id,
@@ -833,7 +839,7 @@ async def model_engine(body: PlanTriggerRequest):
address=epc_searcher.address_clean,
postcode=epc_searcher.postcode_clean,
epc_record=prepared_epc,
- already_installed=property_already_installed + eco_packages.get(property_id)[3],
+ already_installed=property_already_installed,
find_my_epc_components=find_my_epc_components,
property_valuation=req_data.valuation,
non_invasive_recommendations=property_non_invasive_recommendations,
@@ -885,13 +891,125 @@ async def model_engine(body: PlanTriggerRequest):
model_prefies=model_api.KWH_MODEL_PREFIXES + model_api.MODEL_PREFIXES
)
- # The materials data could be cached or local so we don't need to make
- # consistent requests to the backend for the same data
logger.info("Reading in materials and cleaned datasets")
with db_read_session() as session:
materials = db_funcs.materials_functions.get_materials(session)
cleaned = get_cleaned()
+ # Rebaselining
+ # TODO: MUST happen before setting features
+ rebaselining_scoring_data = []
+ for p in tqdm(input_properties):
+ # 1) EPC expired
+ # 2) Missing EPC
+ # 3) Materially different information from landlord vs EPC
+ # make the landlord remapping dictionary
+ addr = [a for a in addresses if a.uprn == p.uprn][0]
+ landlord_remapping = {
+ "total-floor-area": addr.landlord_total_floor_area_m2, # 1m tolerance on floor area to perform remap
+ "property-type": addr.landlord_property_type,
+ "built-form": addr.landlord_built_form,
+ # Components
+ "walls-description": addr.landlord_wall_construction,
+ "roof-description": addr.landlord_roof_construction,
+ "floor-description": addr.landlord_floor_construction,
+ "windows-description": addr.landlord_windows_type,
+ "main-fuel": addr.landlord_fuel_type,
+ "mainheat-description": addr.landlord_heating_system,
+ "mainheatcont-description": addr.landlord_heating_controls,
+ "hotwater-description": addr.landlord_hot_water_system,
+ # Efficiency
+ "walls-energy-eff": addr.landlord_wall_efficiency,
+ "roof-energy-eff": addr.landlord_roof_efficiency,
+ "windows-energy-eff": addr.landlord_windows_efficiency,
+ "mainheat-energy-eff": addr.landlord_heating_efficiency,
+ "mainheatc-energy-eff": addr.landlord_heating_controls_efficiency,
+ "hot-water-energy-eff": addr.landlord_hot_water_efficiency,
+ "multi-glaze-proportion": addr.landlord_multi_glaze_proportion * 100, # TODO: Fix this!
+ "construction-age-band": addr.landlord_construction_age_band,
+ }
+ # Find differences between EPC and landlord data
+ differences = {}
+ for k, v in landlord_remapping.items():
+ if k == "total-floor-area":
+ if abs(p.data[k] - v) > 1: # 1m tolerance
+ differences[k] = v
+ else:
+ if v != p.data[k] and (not pd.isnull(v)) and (not pd.isnull(p.data[k])):
+ differences[k] = v
+
+ needs_rebaselining = p.epc_is_expired | p.epc_is_estimated | len(differences) > 0
+
+ # Need to adjust p.data and p.epc_record.df?
+ if needs_rebaselining:
+ if len(differences):
+ p.data.update(differences)
+ differences_underscored = {k.replace("-", "_"): v for k, v in differences.items()}
+ # Insert
+ for k, v in differences_underscored.items():
+ if not hasattr(p.epc_record, k) and k not in ["property_type", "built_form"]:
+ # Sanity check - while we're implementing
+ raise ValueError("Property does not have an EPC record to update with differences")
+ # Hack but these aren't in the data class
+ if k not in ["property_type", "built_form"]:
+ setattr(p.epc_record, k, v)
+ p.epc_record.prepared_epc[k] = v
+
+ p.create_base_difference_epc_record(cleaned_lookup=cleaned)
+ scoring_data = p.base_difference_record.df.copy()
+ rebaselining_scoring_data.append(scoring_data)
+
+ rebaselining_scoring_data = pd.concat(rebaselining_scoring_data)
+
+ # Trigger re-scoring
+ rebaselining_scoring_data["is_post_sap10_starting"] = True
+ # Score model - SAP re-baselining model
+ model_api.MODEL_URLS["retrofit-sap-baseline-predictions"] = "sapbaselinemodel"
+ model_api.prediction_buckets["retrofit-sap-baseline-predictions"] = "retrofit-sap-baseline-predictions-dev"
+ rebaselining_response = model_api.predict_all(
+ df=rebaselining_scoring_data,
+ bucket=get_settings().DATA_BUCKET,
+ model_prefixes=["retrofit-sap-baseline-predictions"],
+ extract_ids=False,
+ extract_uprn=True
+ )
+
+ for idx, rebaselined_prediction in rebaselining_response["retrofit-sap-baseline-predictions"].iterrows():
+ property_instance = next(p for p in input_properties if p.uprn == int(rebaselined_prediction["uprn"]))
+ new_rating = rebaselined_prediction["predictions"]
+ new_epc_rating = sap_to_epc(new_rating)
+ # Insert
+
+ # property_instance.data["current-energy-efficiency"] = sap_to_epc(new_rating)
+
+ addr = [a for a in addresses if a.uprn == property_instance.uprn][0]
+ landlord_remapping = {
+ "total-floor-area": addr.landlord_total_floor_area_m2, # 1m tolerance on floor area to perform remap
+ "property-type": addr.landlord_property_type,
+ "built-form": addr.landlord_built_form,
+ # Components
+ "walls-description": addr.landlord_wall_construction,
+ "roof-description": addr.landlord_roof_construction,
+ "floor-description": addr.landlord_floor_construction,
+ "windows-description": addr.landlord_windows_type,
+ "main-fuel": addr.landlord_fuel_type,
+ "mainheatcont-description": addr.landlord_heating_controls,
+ "hotwater-description": addr.landlord_hot_water_system,
+ # Efficiency
+ "walls-energy-eff": addr.landlord_wall_efficiency,
+ "roof-energy-eff": addr.landlord_roof_efficiency,
+ "windows-energy-eff": addr.landlord_windows_efficiency,
+ "mainheat-energy-eff": addr.landlord_heating_efficiency,
+ "mainheatc-energy-eff": addr.landlord_heating_controls_efficiency,
+ "hot-water-energy-eff": addr.landlord_hot_water_efficiency,
+ "multi-glaze-proportion": addr.landlord_multi_glaze_proportion * 100, # TODO: Fix this!
+ "construction-age-band": addr.landlord_construction_age_band,
+ }
+
+ # Insert the re-baselined scores into the property data
+ for p in input_properties:
+ property_rebaselined_sap = rebaselining_response["retrofit-sap-baseline-predictions"]
+
kwh_client = KwhData(bucket=get_settings().DATA_BUCKET, read_consumption_data=True)
epcs_for_scoring = kwh_client.transform(data=kwh_client.prepare_epc(input_properties), cleaned=cleaned)
diff --git a/backend/ml_models/api.py b/backend/ml_models/api.py
index 440367b2..d3a83e01 100644
--- a/backend/ml_models/api.py
+++ b/backend/ml_models/api.py
@@ -1,8 +1,7 @@
-import json
-import random
import aiohttp
import asyncio
import pandas as pd
+from typing import List
from tqdm import tqdm
import requests
from requests.exceptions import RequestException
@@ -147,7 +146,13 @@ class ModelApi:
else:
return None
- def predict_all(self, df, bucket, model_prefixes=None, extract_ids=True) -> dict:
+ def predict_all(
+ self, df: pd.DataFrame,
+ bucket: str,
+ model_prefixes: List[str] | None = None,
+ extract_ids: bool = True,
+ extract_uprn: bool = False
+ ) -> dict:
"""
For each model prefix, this method will upload the scoring data to s3 and then make a request to the
@@ -159,6 +164,8 @@ class ModelApi:
:param model_prefixes: List of model prefixes to generate predictions for. If None, all model prefixes will be
used
:param extract_ids: Boolean to determine if the property_id and recommendation_id should be extracted from the
+ scoring data
+ :param extract_uprn: Boolean to determine if the uprn should be extracted from the scoring data
id column
:return:
"""
@@ -196,6 +203,9 @@ class ModelApi:
# Convert back to int
predictions_df['phase'] = predictions_df['recommendation_id'].apply(self.extract_phase)
+ if extract_uprn and "uprn" in df.columns:
+ predictions_df["uprn"] = df["uprn"].values
+
predictions[model_prefix] = predictions_df
return predictions
diff --git a/backend/onboarders/base.py b/backend/onboarders/base.py
index 03cb2370..04ac9203 100644
--- a/backend/onboarders/base.py
+++ b/backend/onboarders/base.py
@@ -13,7 +13,7 @@ class OnboarderBase:
landlord_roof_construction: str = "landlord_roof_construction"
landlord_floor_construction: str = "landlord_floor_construction"
landlord_windows_type: str = "landlord_windows_type"
- landlord_heating_construction: str = "landlord_heating_construction"
+ landlord_heating_system: str = "landlord_heating_system"
landlord_fuel_type: str = "landlord_fuel_type"
landlord_heating_controls: str = "landlord_heating_controls"
landlord_hot_water_system: str = "landlord_hot_water_system"
@@ -53,7 +53,7 @@ class OnboarderBase:
)
else:
self.data = read_from_s3(bucket_name=self.bucket_name, s3_file_name=self.input_file_name)
-
+
def write(self):
if self.data is None:
raise ValueError("No data to write. Please run transform() before writing.")
diff --git a/backend/onboarders/mappings/parity/age_band.py b/backend/onboarders/mappings/parity/age_band.py
index 406d39c1..02dfec00 100644
--- a/backend/onboarders/mappings/parity/age_band.py
+++ b/backend/onboarders/mappings/parity/age_band.py
@@ -12,8 +12,8 @@ parity_map = {
"1996-2002": EpcConstructionAgeBand.from_1996_to_2002,
"2003-2006": EpcConstructionAgeBand.from_2003_to_2006,
"2007-2011": EpcConstructionAgeBand.from_2007_to_2011,
- "2012 onwards": EpcConstructionAgeBand.from_2012_onwards,
# Newer age bands, under SAP10
+ "2012 onwards": EpcConstructionAgeBand.from_2012_to_2022,
"2012-2022": EpcConstructionAgeBand.from_2012_to_2022,
"2023 onwards": EpcConstructionAgeBand.from_2023_onwards,
}
diff --git a/backend/onboarders/mappings/parity/glazing.py b/backend/onboarders/mappings/parity/glazing.py
index 46c006bd..fffb8de5 100644
--- a/backend/onboarders/mappings/parity/glazing.py
+++ b/backend/onboarders/mappings/parity/glazing.py
@@ -1,20 +1,23 @@
from datatypes.epc.efficiency import EpcEfficiency
+from datatypes.epc.windows import EpcWindowDescriptions
glazing_map = {
# (description, energy efficiency, multi_glaze_proportion, glazed_type, glazed_area
# For SAP 10 assessments, The glazed type and glazed area are not populated in the EPC API data any more
- "Double 2002 or later": ("Fully double glazed", EpcEfficiency.AVERAGE, 1, None, None),
- "Double before 2002": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None),
- "Double but age unknown": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None),
- "Single": ("Single glazed", EpcEfficiency.VERY_POOR, 0, None, None),
+ "Double 2002 or later": (EpcWindowDescriptions.fully_double_glazed, EpcEfficiency.AVERAGE, 1, None, None),
+ "Double before 2002": (EpcWindowDescriptions.fully_double_glazed, EpcEfficiency.POOR, 1, None, None),
+ "Double but age unknown": (EpcWindowDescriptions.fully_double_glazed, EpcEfficiency.POOR, 1, None, None),
+ "Single": (EpcWindowDescriptions.single_glazed, EpcEfficiency.VERY_POOR, 0, None, None),
# For triple glazing, with age unknown, the performance is only average, whereas if it's a post 2022
# installation, it's classed as high performance glazing with good efficiency. We'll need to be considerate as to
# how we make updates to the windows data.
# Triple known data is high performance glazing with Good efficiency (at least)
- "Triple": ("Fully triple glazed", EpcEfficiency.AVERAGE, 1, None, None),
+ "Triple": (EpcWindowDescriptions.fully_triple_glazed, EpcEfficiency.AVERAGE, 1, None, None),
# This is also classed as high performance glazing
- "DoubleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None),
+ "DoubleKnownData": (
+ EpcWindowDescriptions.fully_double_glazed.high_performance_glazing, EpcEfficiency.GOOD, 1, None, None
+ ),
# Under SAP 10, secondary glazing is classed as poor efficiency (whereas under SAP 2012 it was generally good)
- "Secondary": ("Full secondary glazing", EpcEfficiency.POOR, 1, None, None),
- "TripleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None),
+ "Secondary": (EpcWindowDescriptions.full_secondary_glazing, EpcEfficiency.POOR, 1, None, None),
+ "TripleKnownData": (EpcWindowDescriptions.high_performance_glazing, EpcEfficiency.GOOD, 1, None, None),
}
diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py
index 6c79d027..5c180ad3 100644
--- a/backend/onboarders/parity.py
+++ b/backend/onboarders/parity.py
@@ -262,7 +262,7 @@ class ParityOnboarder(OnboarderBase):
# controls. E.g. it may be programmer and room thermostat
self.data[
[
- self.landlord_heating_construction,
+ self.landlord_heating_system,
self.landlord_heating_efficiency,
self.landlord_fuel_type,
self.landlord_heating_controls,
@@ -309,7 +309,7 @@ class ParityOnboarder(OnboarderBase):
self.landlord_multi_glaze_proportion,
self.landlord_glazed_type,
self.landlord_glazed_area,
- self.landlord_heating_construction,
+ self.landlord_heating_system,
self.landlord_heating_efficiency,
self.landlord_fuel_type,
self.landlord_heating_controls,
@@ -332,7 +332,7 @@ class ParityOnboarder(OnboarderBase):
self.landlord_construction_age_band, self.landlord_property_type, self.landlord_built_form,
self.landlord_wall_construction, self.landlord_wall_efficiency, self.landlord_roof_construction,
self.landlord_roof_efficiency, self.landlord_floor_construction, self.landlord_windows_type,
- self.landlord_windows_efficiency, self.landlord_heating_construction, self.landlord_heating_efficiency,
+ self.landlord_windows_efficiency, self.landlord_heating_system, self.landlord_heating_efficiency,
self.landlord_fuel_type, self.landlord_heating_controls, self.landlord_heating_controls_efficiency,
self.landlord_hot_water_system, self.landlord_hot_water_efficiency
]:
diff --git a/datatypes/epc/__init__.py b/datatypes/epc/__init__.py
index e69de29b..d997816a 100644
--- a/datatypes/epc/__init__.py
+++ b/datatypes/epc/__init__.py
@@ -0,0 +1,26 @@
+from .construction_age_band import EpcConstructionAgeBand
+from .efficiency import EpcEfficiency
+from .floor import EpcFloorDescriptions
+from .fuel import EpcFuel
+from .heating_controls import EpcHeatingControls
+from .hotwater import EpcHotWaterSystems
+from .main_heating import EpcHeatingSystems
+from .property_type_built_form import PropertyType, BuiltForm
+from .roof import EpcRoofDescriptions
+from .walls import EpcWallDescriptions
+from .windows import EpcWindowDescriptions
+
+__all__ = [
+ "EpcConstructionAgeBand",
+ "EpcEfficiency",
+ "EpcFloorDescriptions",
+ "EpcFuel",
+ "EpcHeatingControls",
+ "EpcHotWaterSystems",
+ "EpcHeatingSystems",
+ "PropertyType",
+ "BuiltForm",
+ "EpcRoofDescriptions",
+ "EpcWallDescriptions",
+ "EpcWindowDescriptions",
+]
diff --git a/datatypes/epc/construction_age_band.py b/datatypes/epc/construction_age_band.py
index c5e7a03b..12d98988 100644
--- a/datatypes/epc/construction_age_band.py
+++ b/datatypes/epc/construction_age_band.py
@@ -15,7 +15,7 @@ class EpcConstructionAgeBand(Enum):
from_1996_to_2002: str = 'England and Wales: 1996-2002'
from_2003_to_2006: str = 'England and Wales: 2003-2006'
from_2007_to_2011: str = 'England and Wales: 2007-2011'
- from_2012_onwards: str = 'England and Wales: 2012-onwards'
+ from_2012_onwards: str = 'England and Wales: 2012 onwards'
from_2012_to_2022: str = 'England and Wales: 2012-2022'
from_2023_onwards: str = 'England and Wales: 2023 onwards'
diff --git a/datatypes/epc/windows.py b/datatypes/epc/windows.py
new file mode 100644
index 00000000..3a8cde52
--- /dev/null
+++ b/datatypes/epc/windows.py
@@ -0,0 +1,9 @@
+from enum import Enum
+
+
+class EpcWindowDescriptions(Enum):
+ fully_double_glazed: str = "Fully double glazed"
+ single_glazed: str = "Single glazed"
+ fully_triple_glazed: str = "Fully triple glazed"
+ high_performance_glazing: str = "High performance glazing"
+ full_secondary_glazing: str = "Full secondary glazing"
diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py
index 7c27de51..5d1fcaa0 100644
--- a/etl/epc/Dataset.py
+++ b/etl/epc/Dataset.py
@@ -212,11 +212,11 @@ class TrainingDataset(BaseDataset):
common_cols = [[col + "_starting", col + "_ending"] for col in common_cols]
self.df = self.df.loc[
- :,
- no_suffix_cols
- + only_ending_cols
- + [col for cols in common_cols for col in cols],
- ]
+ :,
+ no_suffix_cols
+ + only_ending_cols
+ + [col for cols in common_cols for col in cols],
+ ]
def _remove_abnormal_change_in_floor_area(self):
"""
diff --git a/recommendations/rdsap_tables.py b/recommendations/rdsap_tables.py
index 46e7d083..558b0da4 100644
--- a/recommendations/rdsap_tables.py
+++ b/recommendations/rdsap_tables.py
@@ -105,6 +105,13 @@ age_band_data = [
"Northern_Ireland": "2023 onwards",
"Park_home_UK": None,
},
+ {
+ "age_band": "L",
+ "England_Wales": "2012-2022",
+ "Scotland": "2012 - 2023",
+ "Northern_Ireland": "2014 -2022",
+ "Park_home_UK": None,
+ }
]
england_wales_age_band_lookup = {
@@ -779,13 +786,13 @@ epc_wall_description_map = {
"Sandstone or limestone, as built, no insulation": "Stone: sandstone or limestone as built",
"Sandstone or limestone, with internal insulation": "Stone/solid brick with 100 mm external or internal insulation",
"Sandstone or limestone, as built, partial insulation": "Stone/solid brick with 50 mm external or internal "
- "insulation",
+ "insulation",
"Sandstone, as built, no insulation": "Stone: sandstone or limestone as built",
"Sandstone or limestone, as built, insulated": "Stone/solid brick with 100 mm external or internal insulation",
"Sandstone, as built, insulated": "Stone/solid brick with 100 mm external or internal insulation",
"Sandstone, with internal insulation": "Stone/solid brick with 100 mm external or internal insulation",
"Sandstone or limestone, with external insulation": "Stone/solid brick with 100 mm external or internal "
- "insulation",
+ "insulation",
"Sandstone, with external insulation": "Stone/solid brick with 100 mm external or internal insulation",
"Sandstone, as built, partial insulation": "Stone/solid brick with 50 mm external or internal insulation",
############################
@@ -794,7 +801,8 @@ epc_wall_description_map = {
"Granite or whinstone, as built, no insulation": "Stone: granite or whinstone as built",
"Granite or whinstone, with internal insulation": "Stone/solid brick with 100 mm external or internal insulation",
"Granite or whin, with internal insulation": "Stone/solid brick with 100 mm external or internal insulation",
- "Granite or whinstone, as built, partial insulation": "Stone/solid brick with 50 mm external or internal insulation",
+ "Granite or whinstone, as built, partial insulation": "Stone/solid brick with 50 mm external or internal "
+ "insulation",
"Granite or whinstone, as built, insulated": "Stone/solid brick with 100 mm external or internal insulation",
"Granite or whin, as built, insulated": "Stone/solid brick with 100 mm external or internal insulation",
"Granite or whinstone, with external insulation": "Stone/solid brick with 100 mm external or internal insulation",