mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
refactoring construction of the attributes
This commit is contained in:
parent
ed333e1714
commit
8bf6aa5af2
1 changed files with 65 additions and 0 deletions
|
|
@ -21,6 +21,8 @@ from recommendations.recommendation_utils import (
|
|||
estimate_number_of_floors
|
||||
)
|
||||
|
||||
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
# OpenAI API Key (set this in your environment variables for security)
|
||||
|
|
@ -279,9 +281,19 @@ class AssetList:
|
|||
"Any further surveyor notes", 'Surveyors Name'
|
||||
]
|
||||
|
||||
# This SAP threshold is a key search criteria for properties that may be eligible for extraction
|
||||
SAP_RATING_THRESHOLD = 75
|
||||
# Any EPC deemed to have been conducted prior to this year is deemed to be unreliable
|
||||
EPC_YEAR_THRESHOLD = pd.Timestamp.now().year - 5
|
||||
|
||||
# Attributes - these are columns that we produce, calcualted based on other pieces of data
|
||||
ATTRIBUTE_HAS_SOLAR = "attribute_has_solar"
|
||||
ATTRIBUTE_NUMBER_OF_FLOORS = "attribute_est_number_floors"
|
||||
ATTRIBUTE_ESTIMATED_PERIMETER = "attribute_est_perimter"
|
||||
ATTRIBUTE_HEAT_LOSS_AREA = "attribute_heat_loss_area"
|
||||
ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS = "attribute_epc_roof_insulation_thickness"
|
||||
ATTRIBUTE_SAP_THRESHOLD_AND_BELOW = f"sap_rating_{SAP_RATING_THRESHOLD}_and_below"
|
||||
ATTRIBUTE_EPC_PRE_YEAR_THRESHOLD = f"EPC is pre {EPC_YEAR_THRESHOLD}"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
|
@ -672,3 +684,56 @@ class AssetList:
|
|||
),
|
||||
axis=1
|
||||
)
|
||||
|
||||
self.standardised_asset_list[self.EPC_API_DATA_NAMES["total-floor-area"]] = (
|
||||
self.standardised_asset_list[self.EPC_API_DATA_NAMES["total-floor-area"]].astype(float)
|
||||
)
|
||||
# Replace "" value with None
|
||||
self.standardised_asset_list[self.EPC_API_DATA_NAMES["number-habitable-rooms"]] = (
|
||||
self.standardised_asset_list[self.EPC_API_DATA_NAMES["number-habitable-rooms"]].replace("", None)
|
||||
)
|
||||
self.standardised_asset_list[self.EPC_API_DATA_NAMES["number-habitable-rooms"]] = (
|
||||
self.standardised_asset_list[self.EPC_API_DATA_NAMES["number-habitable-rooms"]].astype(float)
|
||||
)
|
||||
|
||||
# Estimate the perimeter
|
||||
self.standardised_asset_list[self.ATTRIBUTE_ESTIMATED_PERIMETER] = self.standardised_asset_list.apply(
|
||||
lambda x: estimate_perimeter(
|
||||
floor_area=x[self.EPC_API_DATA_NAMES["total-floor-area"]] / x[self.ATTRIBUTE_NUMBER_OF_FLOORS],
|
||||
num_rooms=x[self.EPC_API_DATA_NAMES["number-habitable-rooms"]] / x[self.ATTRIBUTE_NUMBER_OF_FLOORS],
|
||||
), axis=1
|
||||
)
|
||||
|
||||
self.standardised_asset_list[self.ATTRIBUTE_HEAT_LOSS_AREA] = self.standardised_asset_list.apply(
|
||||
lambda x: estimate_external_wall_area(
|
||||
num_floors=x[self.ATTRIBUTE_NUMBER_OF_FLOORS],
|
||||
floor_height=(
|
||||
float(x[self.EPC_API_DATA_NAMES["floor-height"]]) if
|
||||
x[self.EPC_API_DATA_NAMES["floor-height"]] else 2.5
|
||||
),
|
||||
perimeter=x[self.ATTRIBUTE_ESTIMATED_PERIMETER],
|
||||
built_form=x[self.EPC_API_DATA_NAMES["built-form"]]
|
||||
),
|
||||
axis=1
|
||||
)
|
||||
|
||||
self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS] = self.standardised_asset_list.apply(
|
||||
lambda x: RoofAttributes(description=x[self.EPC_API_DATA_NAMES["roof-description"]]).process()[
|
||||
"insulation_thickness"] if not pd.isnull(
|
||||
x[self.EPC_API_DATA_NAMES["roof-description"]]) else None,
|
||||
axis=1
|
||||
)
|
||||
|
||||
# We produce some additional fields
|
||||
# 1) Is the SAP rating below C75
|
||||
self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW] = (
|
||||
self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <=
|
||||
self.SAP_RATING_THRESHOLD
|
||||
)
|
||||
# 2) Flag anything where the EPC is older than 5 years
|
||||
|
||||
self.standardised_asset_list[self.ATTRIBUTE_EPC_PRE_YEAR_THRESHOLD] = (
|
||||
pd.to_datetime(
|
||||
self.standardised_asset_list[self.EPC_API_DATA_NAMES["lodgement-date"]]
|
||||
).dt.year < self.EPC_YEAR_THRESHOLD
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue