mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
testing out rebaselining
This commit is contained in:
parent
5e0db68101
commit
043f57e04a
20 changed files with 376 additions and 113 deletions
2
.idea/Model.iml
generated
2
.idea/Model.iml
generated
|
|
@ -7,7 +7,7 @@
|
|||
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
|
||||
<orderEntry type="jdk" jdkName="AssetList" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
||||
2
.idea/misc.xml
generated
2
.idea/misc.xml
generated
|
|
@ -3,7 +3,7 @@
|
|||
<component name="Black">
|
||||
<option name="sdkName" value="Python 3.10 (backend)" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="AssetList" project-jdk-type="Python SDK" />
|
||||
<component name="PyCharmProfessionalAdvertiser">
|
||||
<option name="shown" value="true" />
|
||||
</component>
|
||||
|
|
|
|||
|
|
@ -69,24 +69,24 @@ def app():
|
|||
Property UPRN
|
||||
"""
|
||||
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/West Kent"
|
||||
data_filename = "West Kent Asset List.xlsx"
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lifespace Rentals"
|
||||
data_filename = "For Modelling.xlsx"
|
||||
sheet_name = "Sheet1"
|
||||
postcode_column = "POSTCODE"
|
||||
address1_column = None
|
||||
address1_method = "house_number_extraction"
|
||||
fulladdress_column = "ADDRESS"
|
||||
postcode_column = "Postcode"
|
||||
address1_column = "address1"
|
||||
address1_method = None
|
||||
fulladdress_column = "full_address"
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = None
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "PROPERTY TYPE"
|
||||
landlord_os_uprn = "UPRN"
|
||||
landlord_property_type = None
|
||||
landlord_built_form = None
|
||||
landlord_wall_construction = "wall combined"
|
||||
landlord_roof_construction = "HEATING SYSTEM"
|
||||
landlord_wall_construction = None
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "UPRN"
|
||||
landlord_property_id = "Reference"
|
||||
landlord_sap = None
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
|
|
@ -116,7 +116,7 @@ def app():
|
|||
address_cols_to_concat = None
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = None
|
||||
landlord_os_uprn = None
|
||||
landlord_os_uprn = "UPRN"
|
||||
landlord_property_type = None
|
||||
landlord_built_form = None
|
||||
landlord_wall_construction = None
|
||||
|
|
|
|||
|
|
@ -1509,3 +1509,11 @@ class Property:
|
|||
"""
|
||||
lodgement_date = self.data["lodgement-date"]
|
||||
return (datetime.now() - pd.to_datetime(lodgement_date)) > timedelta(days=3650)
|
||||
|
||||
@property
|
||||
def epc_is_estimated(self) -> bool:
|
||||
"""
|
||||
This property indicates that the EPC is estimated, based on the presence of the "estimated" flag in the data
|
||||
:return: boolean indicating whether the EPC is estimated
|
||||
"""
|
||||
return self.data.get("estimated", False)
|
||||
|
|
|
|||
|
|
@ -1,36 +1,52 @@
|
|||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
import datatypes.epc as epc_datatypes
|
||||
from typing import Optional, Union
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class Address:
|
||||
# address: Optional[str]
|
||||
# full_address: Optional[str]
|
||||
# property_type: Optional[str]
|
||||
# built_form: Optional[str]
|
||||
# estimated: bool
|
||||
|
||||
# New fields
|
||||
uprn: Optional[int]
|
||||
landlord_property_id: Optional[str]
|
||||
address: Optional[str]
|
||||
full_address: Optional[str]
|
||||
address_1: str
|
||||
address_2: Optional[str]
|
||||
address_3: Optional[str]
|
||||
full_address: str
|
||||
postcode: str
|
||||
property_type: Optional[str]
|
||||
built_form: Optional[str]
|
||||
estimated: bool
|
||||
landlord_total_floor_area_m2: Union[float, None]
|
||||
# Property components
|
||||
landlord_property_type: Optional[epc_datatypes.property_type_built_form.PropertyType]
|
||||
landlord_built_form: Optional[epc_datatypes.property_type_built_form.BuiltForm]
|
||||
landlord_wall_construction: Optional[epc_datatypes.walls.EpcWallDescriptions]
|
||||
landlord_roof_construction: Optional[epc_datatypes.roof.EpcRoofDescriptions]
|
||||
landlord_floor_construction: Optional[epc_datatypes.floor.EpcFloorDescriptions]
|
||||
landlord_windows_type: Optional[epc_datatypes.windows.EpcWindowDescriptions]
|
||||
landlord_heating_system: Optional[epc_datatypes.main_heating.EpcHeatingSystems]
|
||||
landlord_fuel_type: Optional[epc_datatypes.fuel.EpcFuel]
|
||||
landlord_heating_controls: Optional[epc_datatypes.heating_controls.EpcHeatingControls]
|
||||
landlord_hot_water_system: Optional[epc_datatypes.hotwater.EpcHotWaterSystems]
|
||||
# Efficiency
|
||||
landlord_wall_efficiency: Optional[epc_datatypes.efficiency.EpcEfficiency]
|
||||
landlord_roof_efficiency: Optional[epc_datatypes.efficiency.EpcEfficiency]
|
||||
landlord_windows_efficiency: Optional[epc_datatypes.efficiency.EpcEfficiency]
|
||||
landlord_heating_efficiency: Optional[epc_datatypes.efficiency.EpcEfficiency]
|
||||
landlord_heating_controls_efficiency: Optional[epc_datatypes.efficiency.EpcEfficiency]
|
||||
landlord_hot_water_efficiency: Optional[epc_datatypes.efficiency.EpcEfficiency]
|
||||
|
||||
# Additionals
|
||||
landlord_has_sloping_ceiling: Optional[bool]
|
||||
landlord_multi_glaze_proportion: Optional[float]
|
||||
landlord_construction_age_band: Optional[epc_datatypes.construction_age_band.EpcConstructionAgeBand]
|
||||
|
||||
# Additional address data, associated to a standardised asset list
|
||||
domna_full_address: Optional[str]
|
||||
domna_address_1: Optional[str]
|
||||
landlord_heating_system: Optional[str] = None
|
||||
solar_reason: Optional[str] = None
|
||||
cavity_reason: Optional[str] = None
|
||||
|
||||
@property
|
||||
def address1(self):
|
||||
|
||||
if self.domna_address_1 is not None:
|
||||
address1 = self.domna_address_1
|
||||
else:
|
||||
address1 = self.address
|
||||
|
||||
# Format
|
||||
address1 = str(int(address1)) if isinstance(address1, float) else str(address1)
|
||||
return address1
|
||||
# domna_full_address: Optional[str]
|
||||
# domna_address_1: Optional[str]
|
||||
|
||||
@property
|
||||
def request_data(self) -> dict[str, Optional[str]]:
|
||||
|
|
@ -41,27 +57,9 @@ class Address:
|
|||
"uprn": self.uprn,
|
||||
"landlord_property_id": self.landlord_property_id,
|
||||
"postcode": self.postcode,
|
||||
"address1": self.address1,
|
||||
"address1": self.address_1,
|
||||
"full_address": self.full_address,
|
||||
}
|
||||
|
||||
# Drop nulls
|
||||
return {k: v for k, v in data.items() if v is not None}
|
||||
|
||||
@property
|
||||
def heating_system(self):
|
||||
"""
|
||||
Helper function to extract a heating system, which can be used to estimate EPC. This is a very limited,
|
||||
placeholder function to cover some initial immediate cases.
|
||||
:return:
|
||||
"""
|
||||
|
||||
ll_heating = self.landlord_property_id
|
||||
if not ll_heating:
|
||||
return None
|
||||
|
||||
if ll_heating == "electric storage heaters":
|
||||
# Return with the same format at the EPC
|
||||
return "Electric storage heaters"
|
||||
|
||||
return None
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
import warnings
|
||||
from typing import Iterator
|
||||
from backend.addresses.Address import Address
|
||||
from datatypes.epc.property_type_built_form import PropertyType
|
||||
|
||||
|
||||
class Addresses:
|
||||
|
|
@ -19,8 +21,19 @@ class Addresses:
|
|||
@classmethod
|
||||
def from_plan_input(cls, plan_input: list[dict], body) -> "Addresses":
|
||||
addresses = []
|
||||
if body.file_format == "ara_property_list":
|
||||
row_parser = cls.parse_ara_row
|
||||
else:
|
||||
warnings.warn(
|
||||
"_parse_row_deprecated is deprecated and will be removed in a future version. "
|
||||
"Use the parse_ara_row method instead",
|
||||
DeprecationWarning,
|
||||
stacklevel=2
|
||||
)
|
||||
row_parser = cls._parse_row_deprecated
|
||||
|
||||
for row in plan_input:
|
||||
addresses.append(cls._parse_row(row, body))
|
||||
addresses.append(row_parser(row, body))
|
||||
return cls(addresses)
|
||||
|
||||
def get_uprns(self):
|
||||
|
|
@ -35,13 +48,64 @@ class Addresses:
|
|||
def get_postcodes_for_flats(self):
|
||||
# Method to extract all of the postcodes associated to a flat, which is used for remote assessments
|
||||
# on flats
|
||||
return [x.postcode for x in self._addresses if x.property_type in ["Flat", "flat"]]
|
||||
return [x.postcode for x in self._addresses if x.landlord_property_type in [PropertyType.flat.value]]
|
||||
|
||||
def get_property_requests(self):
|
||||
return [x.request_data for x in self._addresses]
|
||||
|
||||
@staticmethod
|
||||
def _parse_row(row: dict, body) -> Address:
|
||||
def parse_ara_row(row: dict, body) -> Address:
|
||||
"""
|
||||
Method to parse a row from the ARA property list format, which is a more standardised format that we are
|
||||
moving towards.
|
||||
:param row: A dictionary representing a row from the ARA property list, which should have keys corresponding
|
||||
to the Address dataclass fields. The method will attempt to parse these fields and create an Address object.
|
||||
:param body: The PlanTriggerRequest body, which may contain additional information about the file format and
|
||||
other details that could be relevant for parsing.
|
||||
:return: An Address object created from the parsed row data.
|
||||
"""
|
||||
return Address(
|
||||
uprn=int(row["uprn"]),
|
||||
landlord_property_id=str(row["landlord_property_id"]) if row.get("landlord_property_id") else None,
|
||||
address_1=row["address_1"],
|
||||
address_2=row.get("address_2"),
|
||||
address_3=row.get("address_3"),
|
||||
full_address=row["full_address"],
|
||||
postcode=str(row["postcode"]),
|
||||
landlord_total_floor_area_m2=float(row["landlord_total_floor_area_m2"]) if row.get(
|
||||
"landlord_total_floor_area_m2") else None,
|
||||
landlord_property_type=row.get("landlord_property_type"),
|
||||
landlord_built_form=row.get("landlord_built_form"),
|
||||
landlord_wall_construction=row.get("landlord_wall_construction"),
|
||||
landlord_roof_construction=row.get("landlord_roof_construction"),
|
||||
landlord_floor_construction=row.get("landlord_floor_construction"),
|
||||
landlord_windows_type=row.get("landlord_windows_type"),
|
||||
landlord_heating_system=row.get("landlord_heating_system"),
|
||||
landlord_fuel_type=row.get("landlord_fuel_type"),
|
||||
landlord_heating_controls=row.get("landlord_heating_controls"),
|
||||
landlord_hot_water_system=row.get("landlord_hot_water_system"),
|
||||
landlord_wall_efficiency=row.get("landlord_wall_efficiency"),
|
||||
landlord_roof_efficiency=row.get("landlord_roof_efficiency"),
|
||||
landlord_windows_efficiency=row.get("landlord_windows_efficiency"),
|
||||
landlord_heating_efficiency=row.get("landlord_heating_efficiency"),
|
||||
landlord_heating_controls_efficiency=row.get("landlord_heating_controls_efficiency"),
|
||||
landlord_hot_water_efficiency=row.get("landlord_hot_water_efficiency"),
|
||||
landlord_has_sloping_ceiling=bool(row.get("landlord_has_sloping_ceiling")) if row.get(
|
||||
"landlord_has_sloping_ceiling") is not None else None,
|
||||
landlord_multi_glaze_proportion=float(row["landlord_multi_glaze_proportion"]) if row.get(
|
||||
"landlord_multi_glaze_proportion") else None,
|
||||
landlord_construction_age_band=row.get("landlord_construction_age_band"),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _parse_row_deprecated(row: dict, body) -> Address:
|
||||
"""
|
||||
Is a method to be deprecated in favour of using the new array property list format
|
||||
:param row:
|
||||
:param body:
|
||||
:return:
|
||||
"""
|
||||
|
||||
def clean_uprn(v):
|
||||
try:
|
||||
return int(float(v))
|
||||
|
|
@ -68,14 +132,32 @@ class Addresses:
|
|||
uprn=uprn,
|
||||
landlord_property_id=str(row["landlord_property_id"])
|
||||
if row.get("landlord_property_id") else None,
|
||||
address=str(address).strip() if address else None,
|
||||
address_1=str(address).strip() if address else None,
|
||||
full_address=str(full_address).strip() if full_address else None,
|
||||
postcode=postcode,
|
||||
property_type=row.get("property_type"),
|
||||
built_form=row.get("built_form"),
|
||||
estimated=bool(row.get("estimated", False)),
|
||||
domna_full_address=row.get("domna_full_address"),
|
||||
domna_address_1=row.get("domna_address_1"),
|
||||
landlord_property_type=row.get("property_type"),
|
||||
landlord_built_form=row.get("built_form"),
|
||||
# estimated=bool(row.get("estimated", False)),
|
||||
address_2=None,
|
||||
address_3=None,
|
||||
landlord_total_floor_area_m2=None,
|
||||
landlord_wall_construction=None,
|
||||
landlord_roof_construction=None,
|
||||
landlord_floor_construction=None,
|
||||
landlord_windows_type=None,
|
||||
landlord_heating_system=None,
|
||||
landlord_fuel_type=None,
|
||||
landlord_heating_controls=None,
|
||||
landlord_hot_water_system=None,
|
||||
landlord_wall_efficiency=None,
|
||||
landlord_roof_efficiency=None,
|
||||
landlord_windows_efficiency=None,
|
||||
landlord_heating_efficiency=None,
|
||||
landlord_heating_controls_efficiency=None,
|
||||
landlord_hot_water_efficiency=None,
|
||||
landlord_has_sloping_ceiling=None,
|
||||
landlord_multi_glaze_proportion=None,
|
||||
landlord_construction_age_band=None,
|
||||
)
|
||||
|
||||
# def _build_identity_index(self) -> dict:
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ class EpcStoreService:
|
|||
Service layer for EPC data lookup and persistence.
|
||||
"""
|
||||
|
||||
FRESHNESS_DAYS = 30
|
||||
FRESHNESS_DAYS = 180 # Upgraded to 180 days
|
||||
|
||||
# status labels
|
||||
FRESH = "fresh"
|
||||
|
|
|
|||
|
|
@ -15,7 +15,8 @@ from backend.app.db.models.portfolio import (
|
|||
)
|
||||
|
||||
|
||||
def create_property(session: Session, portfolio_id: int, address: str, postcode: str, uprn: str,
|
||||
def create_property(
|
||||
session: Session, portfolio_id: int, address: str, postcode: str, uprn: str,
|
||||
energy_assessment: dict, landlord_property_id: str | None = None) -> (int, bool):
|
||||
"""
|
||||
This function will create a record for the property in the database if it does not exist.
|
||||
|
|
@ -252,7 +253,7 @@ def bulk_create_properties(
|
|||
|
||||
rows.append(
|
||||
{
|
||||
"address": addr.address1,
|
||||
"address": addr.address_1,
|
||||
"postcode": addr.postcode,
|
||||
"portfolio_id": body.portfolio_id,
|
||||
"uprn": addr.uprn,
|
||||
|
|
|
|||
|
|
@ -126,7 +126,7 @@ class PlanTriggerRequest(BaseModel):
|
|||
# Add in optional fields which describe the format of the asset list being used
|
||||
|
||||
file_type: Optional[Literal["csv", "xlsx"]] = None
|
||||
file_format: Optional[Literal["domna_asset_list"]] = None
|
||||
file_format: Optional[Literal["domna_asset_list", "ara_property_list"]] = None
|
||||
sheet_name: Optional[str] = None
|
||||
sheet_count: Optional[int] = None
|
||||
# If one of index_start or index_end is set, the other must be set too
|
||||
|
|
|
|||
|
|
@ -543,6 +543,10 @@ def keep_max_sap_per_measure_type(items):
|
|||
|
||||
async def model_engine(body: PlanTriggerRequest):
|
||||
logger.info("Model Engine triggered with body: %s", json.loads(body.model_dump_json()))
|
||||
if body.subtask_id:
|
||||
SubTaskInterface().update_subtask_status(
|
||||
subtask_id=UUID(body.subtask_id), status="in progress", cloud_logs_url=None
|
||||
)
|
||||
|
||||
created_at = datetime.now().isoformat()
|
||||
start_ms = int(time.time() * 1000)
|
||||
|
|
@ -647,6 +651,15 @@ async def model_engine(body: PlanTriggerRequest):
|
|||
if body.index_start is not None and body.index_end is not None:
|
||||
plan_input = plan_input[body.index_start:body.index_end]
|
||||
|
||||
# TODO: New onboarding process
|
||||
if body.file_format == "ara_property_list":
|
||||
plan_input = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Downloads/2025_11_11 - Peabody - Data Extracts for Domna_transformed ("
|
||||
"2).xlsx",
|
||||
sheet_name="Input Sample"
|
||||
)
|
||||
plan_input = plan_input.to_dict('records')
|
||||
|
||||
# Confirm no duplicate UPRNS
|
||||
check_duplicate_uprns(plan_input)
|
||||
|
||||
|
|
@ -747,24 +760,25 @@ async def model_engine(body: PlanTriggerRequest):
|
|||
property_already_installed = list(already_installed_by_uprn[addr.uprn])
|
||||
|
||||
epc_searcher = SearchEpc(
|
||||
address1=addr.address1,
|
||||
address1=addr.address_1,
|
||||
postcode=addr.postcode,
|
||||
uprn=addr.uprn,
|
||||
auth_token=get_settings().EPC_AUTH_TOKEN,
|
||||
os_api_key="",
|
||||
full_address=addr.full_address,
|
||||
heating_system=addr.heating_system,
|
||||
heating_system=addr.landlord_heating_system,
|
||||
associated_uprns=associated_uprns
|
||||
)
|
||||
epc_searcher.ordnance_survey_client.built_form = addr.built_form
|
||||
epc_searcher.ordnance_survey_client.property_type = addr.property_type
|
||||
epc_searcher.ordnance_survey_client.built_form = addr.landlord_built_form
|
||||
epc_searcher.ordnance_survey_client.property_type = addr.landlord_property_type
|
||||
# For the moment, our OS API access is unavailable, so we skip and interpolate
|
||||
|
||||
epc_searcher.find_property(skip_os=True, api_data=epc_api_data, overwrite_sap05=True)
|
||||
epc_searcher.set_uprn_source(file_format=body.file_format)
|
||||
|
||||
lookup_key = (
|
||||
("uprn", addr.uprn) if addr.uprn is not None else ("landlord_property_id", addr.landlord_property_id)
|
||||
("uprn", addr.uprn) if addr.uprn is not None
|
||||
else ("landlord_property_id", addr.landlord_property_id)
|
||||
)
|
||||
property_id = property_lookup[lookup_key]
|
||||
|
||||
|
|
@ -804,7 +818,7 @@ async def model_engine(body: PlanTriggerRequest):
|
|||
epc_page=epc_page,
|
||||
rrn=rrn,
|
||||
cleaned_address=epc_searcher.address_clean,
|
||||
config_address=addr.address,
|
||||
config_address=addr.address_1,
|
||||
address_postal_town=epc_searcher.address_postal_town
|
||||
)
|
||||
)
|
||||
|
|
@ -817,14 +831,6 @@ async def model_engine(body: PlanTriggerRequest):
|
|||
# factor this into EPCRecord as part of the cleaning however we need some more testing
|
||||
prepared_epc = averages_cleaning(prepared_epc, cleaning_data)
|
||||
|
||||
# If we have an ECO project, we parse the cavity/solar reasons
|
||||
eco_packages[property_id] = parse_eco_packages(addr, prepared_epc)
|
||||
|
||||
# Final step - extract inspections data, if we have it - we inject into property for usage
|
||||
property_inspections = db_funcs.inspections_functions.extract_inspection_data(config)
|
||||
if property_inspections:
|
||||
inspections_map[property_id] = property_inspections
|
||||
|
||||
input_properties.append(
|
||||
Property(
|
||||
id=property_id,
|
||||
|
|
@ -833,7 +839,7 @@ async def model_engine(body: PlanTriggerRequest):
|
|||
address=epc_searcher.address_clean,
|
||||
postcode=epc_searcher.postcode_clean,
|
||||
epc_record=prepared_epc,
|
||||
already_installed=property_already_installed + eco_packages.get(property_id)[3],
|
||||
already_installed=property_already_installed,
|
||||
find_my_epc_components=find_my_epc_components,
|
||||
property_valuation=req_data.valuation,
|
||||
non_invasive_recommendations=property_non_invasive_recommendations,
|
||||
|
|
@ -885,13 +891,125 @@ async def model_engine(body: PlanTriggerRequest):
|
|||
model_prefies=model_api.KWH_MODEL_PREFIXES + model_api.MODEL_PREFIXES
|
||||
)
|
||||
|
||||
# The materials data could be cached or local so we don't need to make
|
||||
# consistent requests to the backend for the same data
|
||||
logger.info("Reading in materials and cleaned datasets")
|
||||
with db_read_session() as session:
|
||||
materials = db_funcs.materials_functions.get_materials(session)
|
||||
cleaned = get_cleaned()
|
||||
|
||||
# Rebaselining
|
||||
# TODO: MUST happen before setting features
|
||||
rebaselining_scoring_data = []
|
||||
for p in tqdm(input_properties):
|
||||
# 1) EPC expired
|
||||
# 2) Missing EPC
|
||||
# 3) Materially different information from landlord vs EPC
|
||||
# make the landlord remapping dictionary
|
||||
addr = [a for a in addresses if a.uprn == p.uprn][0]
|
||||
landlord_remapping = {
|
||||
"total-floor-area": addr.landlord_total_floor_area_m2, # 1m tolerance on floor area to perform remap
|
||||
"property-type": addr.landlord_property_type,
|
||||
"built-form": addr.landlord_built_form,
|
||||
# Components
|
||||
"walls-description": addr.landlord_wall_construction,
|
||||
"roof-description": addr.landlord_roof_construction,
|
||||
"floor-description": addr.landlord_floor_construction,
|
||||
"windows-description": addr.landlord_windows_type,
|
||||
"main-fuel": addr.landlord_fuel_type,
|
||||
"mainheat-description": addr.landlord_heating_system,
|
||||
"mainheatcont-description": addr.landlord_heating_controls,
|
||||
"hotwater-description": addr.landlord_hot_water_system,
|
||||
# Efficiency
|
||||
"walls-energy-eff": addr.landlord_wall_efficiency,
|
||||
"roof-energy-eff": addr.landlord_roof_efficiency,
|
||||
"windows-energy-eff": addr.landlord_windows_efficiency,
|
||||
"mainheat-energy-eff": addr.landlord_heating_efficiency,
|
||||
"mainheatc-energy-eff": addr.landlord_heating_controls_efficiency,
|
||||
"hot-water-energy-eff": addr.landlord_hot_water_efficiency,
|
||||
"multi-glaze-proportion": addr.landlord_multi_glaze_proportion * 100, # TODO: Fix this!
|
||||
"construction-age-band": addr.landlord_construction_age_band,
|
||||
}
|
||||
# Find differences between EPC and landlord data
|
||||
differences = {}
|
||||
for k, v in landlord_remapping.items():
|
||||
if k == "total-floor-area":
|
||||
if abs(p.data[k] - v) > 1: # 1m tolerance
|
||||
differences[k] = v
|
||||
else:
|
||||
if v != p.data[k] and (not pd.isnull(v)) and (not pd.isnull(p.data[k])):
|
||||
differences[k] = v
|
||||
|
||||
needs_rebaselining = p.epc_is_expired | p.epc_is_estimated | len(differences) > 0
|
||||
|
||||
# Need to adjust p.data and p.epc_record.df?
|
||||
if needs_rebaselining:
|
||||
if len(differences):
|
||||
p.data.update(differences)
|
||||
differences_underscored = {k.replace("-", "_"): v for k, v in differences.items()}
|
||||
# Insert
|
||||
for k, v in differences_underscored.items():
|
||||
if not hasattr(p.epc_record, k) and k not in ["property_type", "built_form"]:
|
||||
# Sanity check - while we're implementing
|
||||
raise ValueError("Property does not have an EPC record to update with differences")
|
||||
# Hack but these aren't in the data class
|
||||
if k not in ["property_type", "built_form"]:
|
||||
setattr(p.epc_record, k, v)
|
||||
p.epc_record.prepared_epc[k] = v
|
||||
|
||||
p.create_base_difference_epc_record(cleaned_lookup=cleaned)
|
||||
scoring_data = p.base_difference_record.df.copy()
|
||||
rebaselining_scoring_data.append(scoring_data)
|
||||
|
||||
rebaselining_scoring_data = pd.concat(rebaselining_scoring_data)
|
||||
|
||||
# Trigger re-scoring
|
||||
rebaselining_scoring_data["is_post_sap10_starting"] = True
|
||||
# Score model - SAP re-baselining model
|
||||
model_api.MODEL_URLS["retrofit-sap-baseline-predictions"] = "sapbaselinemodel"
|
||||
model_api.prediction_buckets["retrofit-sap-baseline-predictions"] = "retrofit-sap-baseline-predictions-dev"
|
||||
rebaselining_response = model_api.predict_all(
|
||||
df=rebaselining_scoring_data,
|
||||
bucket=get_settings().DATA_BUCKET,
|
||||
model_prefixes=["retrofit-sap-baseline-predictions"],
|
||||
extract_ids=False,
|
||||
extract_uprn=True
|
||||
)
|
||||
|
||||
for idx, rebaselined_prediction in rebaselining_response["retrofit-sap-baseline-predictions"].iterrows():
|
||||
property_instance = next(p for p in input_properties if p.uprn == int(rebaselined_prediction["uprn"]))
|
||||
new_rating = rebaselined_prediction["predictions"]
|
||||
new_epc_rating = sap_to_epc(new_rating)
|
||||
# Insert
|
||||
|
||||
# property_instance.data["current-energy-efficiency"] = sap_to_epc(new_rating)
|
||||
|
||||
addr = [a for a in addresses if a.uprn == property_instance.uprn][0]
|
||||
landlord_remapping = {
|
||||
"total-floor-area": addr.landlord_total_floor_area_m2, # 1m tolerance on floor area to perform remap
|
||||
"property-type": addr.landlord_property_type,
|
||||
"built-form": addr.landlord_built_form,
|
||||
# Components
|
||||
"walls-description": addr.landlord_wall_construction,
|
||||
"roof-description": addr.landlord_roof_construction,
|
||||
"floor-description": addr.landlord_floor_construction,
|
||||
"windows-description": addr.landlord_windows_type,
|
||||
"main-fuel": addr.landlord_fuel_type,
|
||||
"mainheatcont-description": addr.landlord_heating_controls,
|
||||
"hotwater-description": addr.landlord_hot_water_system,
|
||||
# Efficiency
|
||||
"walls-energy-eff": addr.landlord_wall_efficiency,
|
||||
"roof-energy-eff": addr.landlord_roof_efficiency,
|
||||
"windows-energy-eff": addr.landlord_windows_efficiency,
|
||||
"mainheat-energy-eff": addr.landlord_heating_efficiency,
|
||||
"mainheatc-energy-eff": addr.landlord_heating_controls_efficiency,
|
||||
"hot-water-energy-eff": addr.landlord_hot_water_efficiency,
|
||||
"multi-glaze-proportion": addr.landlord_multi_glaze_proportion * 100, # TODO: Fix this!
|
||||
"construction-age-band": addr.landlord_construction_age_band,
|
||||
}
|
||||
|
||||
# Insert the re-baselined scores into the property data
|
||||
for p in input_properties:
|
||||
property_rebaselined_sap = rebaselining_response["retrofit-sap-baseline-predictions"]
|
||||
|
||||
kwh_client = KwhData(bucket=get_settings().DATA_BUCKET, read_consumption_data=True)
|
||||
|
||||
epcs_for_scoring = kwh_client.transform(data=kwh_client.prepare_epc(input_properties), cleaned=cleaned)
|
||||
|
|
|
|||
|
|
@ -1,8 +1,7 @@
|
|||
import json
|
||||
import random
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import pandas as pd
|
||||
from typing import List
|
||||
from tqdm import tqdm
|
||||
import requests
|
||||
from requests.exceptions import RequestException
|
||||
|
|
@ -147,7 +146,13 @@ class ModelApi:
|
|||
else:
|
||||
return None
|
||||
|
||||
def predict_all(self, df, bucket, model_prefixes=None, extract_ids=True) -> dict:
|
||||
def predict_all(
|
||||
self, df: pd.DataFrame,
|
||||
bucket: str,
|
||||
model_prefixes: List[str] | None = None,
|
||||
extract_ids: bool = True,
|
||||
extract_uprn: bool = False
|
||||
) -> dict:
|
||||
|
||||
"""
|
||||
For each model prefix, this method will upload the scoring data to s3 and then make a request to the
|
||||
|
|
@ -159,6 +164,8 @@ class ModelApi:
|
|||
:param model_prefixes: List of model prefixes to generate predictions for. If None, all model prefixes will be
|
||||
used
|
||||
:param extract_ids: Boolean to determine if the property_id and recommendation_id should be extracted from the
|
||||
scoring data
|
||||
:param extract_uprn: Boolean to determine if the uprn should be extracted from the scoring data
|
||||
id column
|
||||
:return:
|
||||
"""
|
||||
|
|
@ -196,6 +203,9 @@ class ModelApi:
|
|||
# Convert back to int
|
||||
predictions_df['phase'] = predictions_df['recommendation_id'].apply(self.extract_phase)
|
||||
|
||||
if extract_uprn and "uprn" in df.columns:
|
||||
predictions_df["uprn"] = df["uprn"].values
|
||||
|
||||
predictions[model_prefix] = predictions_df
|
||||
|
||||
return predictions
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ class OnboarderBase:
|
|||
landlord_roof_construction: str = "landlord_roof_construction"
|
||||
landlord_floor_construction: str = "landlord_floor_construction"
|
||||
landlord_windows_type: str = "landlord_windows_type"
|
||||
landlord_heating_construction: str = "landlord_heating_construction"
|
||||
landlord_heating_system: str = "landlord_heating_system"
|
||||
landlord_fuel_type: str = "landlord_fuel_type"
|
||||
landlord_heating_controls: str = "landlord_heating_controls"
|
||||
landlord_hot_water_system: str = "landlord_hot_water_system"
|
||||
|
|
|
|||
|
|
@ -12,8 +12,8 @@ parity_map = {
|
|||
"1996-2002": EpcConstructionAgeBand.from_1996_to_2002,
|
||||
"2003-2006": EpcConstructionAgeBand.from_2003_to_2006,
|
||||
"2007-2011": EpcConstructionAgeBand.from_2007_to_2011,
|
||||
"2012 onwards": EpcConstructionAgeBand.from_2012_onwards,
|
||||
# Newer age bands, under SAP10
|
||||
"2012 onwards": EpcConstructionAgeBand.from_2012_to_2022,
|
||||
"2012-2022": EpcConstructionAgeBand.from_2012_to_2022,
|
||||
"2023 onwards": EpcConstructionAgeBand.from_2023_onwards,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,20 +1,23 @@
|
|||
from datatypes.epc.efficiency import EpcEfficiency
|
||||
from datatypes.epc.windows import EpcWindowDescriptions
|
||||
|
||||
glazing_map = {
|
||||
# (description, energy efficiency, multi_glaze_proportion, glazed_type, glazed_area
|
||||
# For SAP 10 assessments, The glazed type and glazed area are not populated in the EPC API data any more
|
||||
"Double 2002 or later": ("Fully double glazed", EpcEfficiency.AVERAGE, 1, None, None),
|
||||
"Double before 2002": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None),
|
||||
"Double but age unknown": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None),
|
||||
"Single": ("Single glazed", EpcEfficiency.VERY_POOR, 0, None, None),
|
||||
"Double 2002 or later": (EpcWindowDescriptions.fully_double_glazed, EpcEfficiency.AVERAGE, 1, None, None),
|
||||
"Double before 2002": (EpcWindowDescriptions.fully_double_glazed, EpcEfficiency.POOR, 1, None, None),
|
||||
"Double but age unknown": (EpcWindowDescriptions.fully_double_glazed, EpcEfficiency.POOR, 1, None, None),
|
||||
"Single": (EpcWindowDescriptions.single_glazed, EpcEfficiency.VERY_POOR, 0, None, None),
|
||||
# For triple glazing, with age unknown, the performance is only average, whereas if it's a post 2022
|
||||
# installation, it's classed as high performance glazing with good efficiency. We'll need to be considerate as to
|
||||
# how we make updates to the windows data.
|
||||
# Triple known data is high performance glazing with Good efficiency (at least)
|
||||
"Triple": ("Fully triple glazed", EpcEfficiency.AVERAGE, 1, None, None),
|
||||
"Triple": (EpcWindowDescriptions.fully_triple_glazed, EpcEfficiency.AVERAGE, 1, None, None),
|
||||
# This is also classed as high performance glazing
|
||||
"DoubleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None),
|
||||
"DoubleKnownData": (
|
||||
EpcWindowDescriptions.fully_double_glazed.high_performance_glazing, EpcEfficiency.GOOD, 1, None, None
|
||||
),
|
||||
# Under SAP 10, secondary glazing is classed as poor efficiency (whereas under SAP 2012 it was generally good)
|
||||
"Secondary": ("Full secondary glazing", EpcEfficiency.POOR, 1, None, None),
|
||||
"TripleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None),
|
||||
"Secondary": (EpcWindowDescriptions.full_secondary_glazing, EpcEfficiency.POOR, 1, None, None),
|
||||
"TripleKnownData": (EpcWindowDescriptions.high_performance_glazing, EpcEfficiency.GOOD, 1, None, None),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -262,7 +262,7 @@ class ParityOnboarder(OnboarderBase):
|
|||
# controls. E.g. it may be programmer and room thermostat
|
||||
self.data[
|
||||
[
|
||||
self.landlord_heating_construction,
|
||||
self.landlord_heating_system,
|
||||
self.landlord_heating_efficiency,
|
||||
self.landlord_fuel_type,
|
||||
self.landlord_heating_controls,
|
||||
|
|
@ -309,7 +309,7 @@ class ParityOnboarder(OnboarderBase):
|
|||
self.landlord_multi_glaze_proportion,
|
||||
self.landlord_glazed_type,
|
||||
self.landlord_glazed_area,
|
||||
self.landlord_heating_construction,
|
||||
self.landlord_heating_system,
|
||||
self.landlord_heating_efficiency,
|
||||
self.landlord_fuel_type,
|
||||
self.landlord_heating_controls,
|
||||
|
|
@ -332,7 +332,7 @@ class ParityOnboarder(OnboarderBase):
|
|||
self.landlord_construction_age_band, self.landlord_property_type, self.landlord_built_form,
|
||||
self.landlord_wall_construction, self.landlord_wall_efficiency, self.landlord_roof_construction,
|
||||
self.landlord_roof_efficiency, self.landlord_floor_construction, self.landlord_windows_type,
|
||||
self.landlord_windows_efficiency, self.landlord_heating_construction, self.landlord_heating_efficiency,
|
||||
self.landlord_windows_efficiency, self.landlord_heating_system, self.landlord_heating_efficiency,
|
||||
self.landlord_fuel_type, self.landlord_heating_controls, self.landlord_heating_controls_efficiency,
|
||||
self.landlord_hot_water_system, self.landlord_hot_water_efficiency
|
||||
]:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,26 @@
|
|||
from .construction_age_band import EpcConstructionAgeBand
|
||||
from .efficiency import EpcEfficiency
|
||||
from .floor import EpcFloorDescriptions
|
||||
from .fuel import EpcFuel
|
||||
from .heating_controls import EpcHeatingControls
|
||||
from .hotwater import EpcHotWaterSystems
|
||||
from .main_heating import EpcHeatingSystems
|
||||
from .property_type_built_form import PropertyType, BuiltForm
|
||||
from .roof import EpcRoofDescriptions
|
||||
from .walls import EpcWallDescriptions
|
||||
from .windows import EpcWindowDescriptions
|
||||
|
||||
__all__ = [
|
||||
"EpcConstructionAgeBand",
|
||||
"EpcEfficiency",
|
||||
"EpcFloorDescriptions",
|
||||
"EpcFuel",
|
||||
"EpcHeatingControls",
|
||||
"EpcHotWaterSystems",
|
||||
"EpcHeatingSystems",
|
||||
"PropertyType",
|
||||
"BuiltForm",
|
||||
"EpcRoofDescriptions",
|
||||
"EpcWallDescriptions",
|
||||
"EpcWindowDescriptions",
|
||||
]
|
||||
|
|
@ -15,7 +15,7 @@ class EpcConstructionAgeBand(Enum):
|
|||
from_1996_to_2002: str = 'England and Wales: 1996-2002'
|
||||
from_2003_to_2006: str = 'England and Wales: 2003-2006'
|
||||
from_2007_to_2011: str = 'England and Wales: 2007-2011'
|
||||
from_2012_onwards: str = 'England and Wales: 2012-onwards'
|
||||
from_2012_onwards: str = 'England and Wales: 2012 onwards'
|
||||
from_2012_to_2022: str = 'England and Wales: 2012-2022'
|
||||
from_2023_onwards: str = 'England and Wales: 2023 onwards'
|
||||
|
||||
|
|
|
|||
9
datatypes/epc/windows.py
Normal file
9
datatypes/epc/windows.py
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
from enum import Enum
|
||||
|
||||
|
||||
class EpcWindowDescriptions(Enum):
|
||||
fully_double_glazed: str = "Fully double glazed"
|
||||
single_glazed: str = "Single glazed"
|
||||
fully_triple_glazed: str = "Fully triple glazed"
|
||||
high_performance_glazing: str = "High performance glazing"
|
||||
full_secondary_glazing: str = "Full secondary glazing"
|
||||
|
|
@ -105,6 +105,13 @@ age_band_data = [
|
|||
"Northern_Ireland": "2023 onwards",
|
||||
"Park_home_UK": None,
|
||||
},
|
||||
{
|
||||
"age_band": "L",
|
||||
"England_Wales": "2012-2022",
|
||||
"Scotland": "2012 - 2023",
|
||||
"Northern_Ireland": "2014 -2022",
|
||||
"Park_home_UK": None,
|
||||
}
|
||||
]
|
||||
|
||||
england_wales_age_band_lookup = {
|
||||
|
|
@ -794,7 +801,8 @@ epc_wall_description_map = {
|
|||
"Granite or whinstone, as built, no insulation": "Stone: granite or whinstone as built",
|
||||
"Granite or whinstone, with internal insulation": "Stone/solid brick with 100 mm external or internal insulation",
|
||||
"Granite or whin, with internal insulation": "Stone/solid brick with 100 mm external or internal insulation",
|
||||
"Granite or whinstone, as built, partial insulation": "Stone/solid brick with 50 mm external or internal insulation",
|
||||
"Granite or whinstone, as built, partial insulation": "Stone/solid brick with 50 mm external or internal "
|
||||
"insulation",
|
||||
"Granite or whinstone, as built, insulated": "Stone/solid brick with 100 mm external or internal insulation",
|
||||
"Granite or whin, as built, insulated": "Stone/solid brick with 100 mm external or internal insulation",
|
||||
"Granite or whinstone, with external insulation": "Stone/solid brick with 100 mm external or internal insulation",
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue