Merge pull request #571 from Hestia-Homes/eco-eligiblity-bug

debugging for peabody - edge case properties and added placeholder averages cleaning for missing variables
This commit is contained in:
KhalimCK 2025-11-29 02:16:54 +08:00 committed by GitHub
commit 512fa1e469
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 102 additions and 12 deletions

View file

@ -1192,6 +1192,14 @@ class Property:
'oil range cooker': 'Oil'
}
fuel_map = {
None: "Natural Gas (Community Scheme)",
"mains gas": "Natural Gas (Community Scheme)",
"biomass": "Smokeless Fuel",
"electricity": "Electricity",
"biogas": "Smokeless Fuel",
}
self.heating_energy_source = list({
fuel for key, fuel in heating_fuel_mapping.items() if self.main_heating.get(key, False)
})
@ -1212,19 +1220,39 @@ class Property:
else:
self.heating_energy_source = ['Wood Logs']
if len(self.heating_energy_source) == 0 or len(self.heating_energy_source) > 1:
if len(self.heating_energy_source) > 1 and "Varied (Community Scheme)" not in self.heating_energy_source:
# We might have something like heating energy source equal to ['Natural Gas', 'Varied (Community Scheme)']
# so we treat this as community heating
raise Exception("Investigate me")
if len(self.heating_energy_source) == 0:
heating_flags = {
v for k, v in self.main_heating.items() if k not in ["original_description", "clean_description"]
}
hotwater_flags = {
v for k, v in self.hotwater.items() if k not in ["original_description", "clean_description"]
}
# If all flags are zero, we have a no data example
if (heating_flags == {False} or hotwater_flags == {None}) and (
hotwater_flags == {False} or hotwater_flags == {None}):
# We have nodata so we try and rely on main fuel
if self.main_fuel["fuel_type"] in fuel_map: # We assume when None as it's unknown
mapped_fuel = fuel_map[self.main_fuel["fuel_type"]]
self.heating_energy_source = mapped_fuel
self.hot_water_energy_source = mapped_fuel
return
else:
raise NotImplementedError(f"Unhandled fuel {self.main_fuel['fuel_type']}")
if len(self.heating_energy_source) > 1:
# We treat this as a community scheme
self.heating_energy_source = ["Varied (Community Scheme)"]
self.heating_energy_source = self.heating_energy_source[0]
if self.heating_energy_source == "Varied (Community Scheme)":
fuel_map = {
None: "Natural Gas (Community Scheme)",
"mains gas": "Natural Gas (Community Scheme)",
"biomass": "Smokeless Fuel",
"electricity": "Electricity",
"biogas": "Smokeless Fuel",
}
if self.main_fuel["fuel_type"] in fuel_map: # We assume when None as it's unknown
self.heating_energy_source = fuel_map[self.main_fuel["fuel_type"]]
else:

View file

@ -405,6 +405,65 @@ def check_duplicate_uprns(plan_input):
return True
def averages_cleaning(prepared_epc: EPCRecord, cleaning_data: pd.DataFrame):
"""
Placeholder cleaning function to handle edge cases where we have missing data for
number of habitable rooms, number of heated rooms and floor height. We take the median
This need was born out of the Peabody project
:param prepared_epc:
:param cleaning_data:
:return:
"""
if not pd.isnull(prepared_epc.prepared_epc["number_habitable_rooms"]) and not pd.isnull(
prepared_epc.prepared_epc["number_heated_rooms"]) and not pd.isnull(prepared_epc.prepared_epc["floor_height"]):
# Nothing to do
return prepared_epc
# Clean with cleaning_data
clean_with = cleaning_data[
(cleaning_data["property_type"] == prepared_epc.prepared_epc["property_type"]) &
(cleaning_data["property_type"] == prepared_epc.prepared_epc["property_type"])
]
if prepared_epc.prepared_epc["local_authority"] in clean_with["local_authority"].values:
clean_with = clean_with[
clean_with["local_authority"] == prepared_epc.prepared_epc["local_authority"]
]
floor_area_clean_with = clean_with[
(clean_with["total_floor_area"] <= prepared_epc.prepared_epc["total_floor_area"] * 1.1) &
(clean_with["total_floor_area"] >= prepared_epc.prepared_epc["total_floor_area"] * 0.9)
]
if not floor_area_clean_with.empty:
clean_with = floor_area_clean_with
clean_n_habitable_rooms = int(round(clean_with["number_habitable_rooms"].median()))
clean_n_heated_rooms = int(round(clean_with["number_heated_rooms"].median()))
if clean_n_heated_rooms > clean_n_habitable_rooms:
clean_n_heated_rooms = clean_n_habitable_rooms
clean_floor_height = clean_with["floor_height"].median()
# We now fill
if not pd.isnull(clean_n_habitable_rooms) and pd.isnull(
prepared_epc.prepared_epc["number_habitable_rooms"]):
prepared_epc.prepared_epc["number_habitable_rooms"] = clean_n_habitable_rooms
prepared_epc.number_habitable_rooms = clean_n_habitable_rooms
if not pd.isnull(clean_n_heated_rooms) and pd.isnull(
prepared_epc.prepared_epc["number_heated_rooms"]):
prepared_epc.prepared_epc["number_heated_rooms"] = clean_n_heated_rooms
prepared_epc.number_heated_rooms = clean_n_heated_rooms
if not pd.isnull(clean_floor_height) and pd.isnull(
prepared_epc.prepared_epc["floor_height"]):
prepared_epc.prepared_epc["floor_height"] = clean_floor_height
prepared_epc.floor_height = clean_floor_height
return prepared_epc
async def model_engine(body: PlanTriggerRequest):
logger.info("Model Engine triggered with body: %s", json.loads(body.model_dump_json()))
@ -669,6 +728,10 @@ async def model_engine(body: PlanTriggerRequest):
cleaning_data=cleaning_data,
)
# TODO: This is a temp function to handle a specific edge case with Peabody. We should
# factor this into EPCRecord as part of the cleaning however we need some more testing
prepared_epc = averages_cleaning(prepared_epc, cleaning_data)
# If we have an ECO project, we parse the cavity/solar reasons
eco_packages[property_id] = parse_eco_packages(config, prepared_epc)
@ -756,10 +819,6 @@ async def model_engine(body: PlanTriggerRequest):
input_properties = OpenUprnClient.set_spatial_data(input_properties, bucket_name=get_settings().DATA_BUCKET)
[p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=kwh_preds) for p in input_properties]
# TODO: If a property is semi-detached, we might get roof surfaces for the main building + the neighbour
# TODO: If we can't get high image quality, should we use the solar API? Maybe just for semi-detached units with
# extensions, since it doesn't seem to do a great job
logger.info("Performing solar analysis")
ofgem_consumption_averages = read_dataframe_from_s3_parquet(

View file

@ -596,6 +596,9 @@ class EPCRecord:
if not self.prepared_epc:
raise ValueError("EPC Recrod doesn not contain epc data")
if self.prepared_epc["total-floor-area"] is None:
return
self.prepared_epc["total-floor-area"] = float(
self.prepared_epc["total-floor-area"]
)