Merge pull request #699 from Hestia-Homes/debugging-ara-runs

Debugging ara runs
This commit is contained in:
KhalimCK 2026-02-10 14:40:26 +00:00 committed by GitHub
commit da64ececcd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 89 additions and 37 deletions

View file

@ -69,24 +69,24 @@ def app():
Property UPRN
"""
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hackney"
data_filename = "Domna SHF Wave 3 (3).xlsx"
sheet_name = "Domna Wave 3"
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Aspire"
data_filename = "ASPIRE ASSET LIST.xlsx"
sheet_name = "Asset List"
postcode_column = "Postcode"
address1_column = "Address 1"
address1_method = None
fulladdress_column = None
address_cols_to_concat = ["Address 1"]
address1_column = None
address1_method = "house_number_extraction"
fulladdress_column = "Address"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = "Construction Years"
landlord_os_uprn = "UPRN"
landlord_property_type = "Type"
landlord_built_form = "Attachment"
landlord_wall_construction = "Wall type"
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "Property Type"
landlord_built_form = None
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "Row ID"
landlord_property_id = "LLUPRN"
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None
@ -275,7 +275,7 @@ def app():
if skip is not None and not force_retrieve_data:
if i <= skip:
continue
chunk = asset_list.standardised_asset_list[i : i + chunk_size]
chunk = asset_list.standardised_asset_list[i: i + chunk_size]
epc_data_chunk, errors_chunk, no_epc_chunk = get_data(
df=chunk,
row_id_name=asset_list.DOMNA_PROPERTY_ID,
@ -418,7 +418,7 @@ def app():
# Retrieve just the data we need
epc_df = epc_df[
[asset_list.DOMNA_PROPERTY_ID] + list(asset_list.EPC_API_DATA_NAMES.keys())
].rename(columns=asset_list.EPC_API_DATA_NAMES)
].rename(columns=asset_list.EPC_API_DATA_NAMES)
# Look for columns not in the find my EPC data, which will have happened if we didn't
# retrieve it in the first place
@ -435,7 +435,7 @@ def app():
find_my_epc_data[
[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"]
+ list(asset_list.FIND_EPC_DATA_NAMES.keys())
].rename(columns=asset_list.FIND_EPC_DATA_NAMES),
].rename(columns=asset_list.FIND_EPC_DATA_NAMES),
how="left",
on=asset_list.DOMNA_PROPERTY_ID,
)

View file

@ -439,6 +439,11 @@ PROPERTY_MAPPING = {
'House: Semi Detached: Top Floor': 'house',
'House: End Terrace: Ground Floor': 'house',
'Maisonette: Enclosed End Terrace: Mid Floor': 'maisonette',
'Flat: Enclosed Mid Terrace: Basement': 'flat'
'Flat: Enclosed Mid Terrace: Basement': 'flat',
'Warden Bungalow': 'bungalow',
'Warden Flat': 'flat',
'Upper Floor Flat': 'flat',
'Extracare Scheme': 'other'
}

View file

@ -1256,7 +1256,8 @@ class Property:
"biodiesel": "Smokeless Fuel",
"b30d": "B30K Biofuel",
"coal": "Coal",
"oil": "Oil"
"oil": "Oil",
"unknown": None # Handle - anything post 2020 is electricity else gas
}
self.heating_energy_source = list({
@ -1326,7 +1327,16 @@ class Property:
if self.heating_energy_source == "Varied (Community Scheme)":
if self.main_fuel["fuel_type"] in fuel_map: # We assume when None as it's unknown
self.heating_energy_source = fuel_map[self.main_fuel["fuel_type"]]
mapped_to = fuel_map[self.main_fuel["fuel_type"]]
if mapped_to is None and self.main_fuel["fuel_type"] == "unknown":
# Handle logic based on age band
if self.year_built >= 2020:
self.heating_energy_source = "Electricity"
else:
self.heating_energy_source = "Natural Gas (Community Scheme)"
else:
self.heating_energy_source = mapped_to
else:
raise NotImplementedError(f"Unhandled fuel {self.main_fuel['fuel_type']}")

View file

@ -1,8 +1,22 @@
import os
from functools import lru_cache
from pydantic_settings import BaseSettings
from pydantic_settings import BaseSettings, SettingsConfigDict
from typing import Optional
def resolve_env_file() -> Optional[str]:
env = os.getenv("ENVIRONMENT", "local")
if env == "local":
return "backend/.env"
if env == "test":
return "backend/.env.test"
# prod = no env file
return None
class Settings(BaseSettings):
API_KEY: str
API_KEY_NAME: str = "X-API-KEY"
@ -41,8 +55,10 @@ class Settings(BaseSettings):
AWS_SECRET_KEY_ID: Optional[str] = None
AWS_DEFAULT_REGION: Optional[str] = None
class Config:
env_file = "backend/.env.local"
model_config = SettingsConfigDict(
env_file=resolve_env_file(),
env_file_encoding="utf-8",
)
@lru_cache()

View file

@ -24,7 +24,7 @@ def get_cleaned():
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-{environment}".format(environment=get_settings().ENVIRONMENT)
bucket_name=get_settings().DATA_BUCKET
)
cleaned = msgpack.unpackb(cleaned, raw=False)

View file

@ -15,7 +15,7 @@ ENV DB_HOST=${DEV_DB_HOST}
ENV DB_PORT=${DEV_DB_PORT}
ENV DB_NAME=${DEV_DB_NAME}
COPY backend/.env.local backend/.env.local
COPY backend/.env.test backend/.env
# -----------------------------
# Copy requirements FIRST (for Docker layer caching)

View file

@ -978,13 +978,15 @@ async def model_engine(body: PlanTriggerRequest):
recommendations_scoring_data.extend(p.recommendations_scoring_data)
logger.info("Preparing data for scoring in sap change api")
recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data).drop(
columns=[
"rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
"carbon_ending"
]
)
# Temp putting this here
recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
if not recommendations_scoring_data.empty:
recommendations_scoring_data = recommendations_scoring_data.drop(
columns=[
"rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
"carbon_ending"
]
)
# TODO: Temp putting this here
recommendations_scoring_data["is_post_sap10_ending"] = True
all_predictions = await model_api.async_paginated_predictions(

View file

@ -313,4 +313,15 @@ class ModelApi:
logger.error(f"Batch {chunk}-{chunk + batch_size} failed (Attempt {attempts}): {e}")
await asyncio.sleep(2 ** attempts) # exponential backoff
await self.close_aiohttp_session()
# Ensure stable output structure for the datagrame to be utilised by other functions downstream
for k in all_predictions.keys():
if all_predictions[k].empty:
col_template = ['id', 'predictions', 'property_id', 'recommendation_id', 'phase'] if (
extract_ids) else ['id', 'predictions']
all_predictions[k] = pd.DataFrame(
columns=col_template
)
return all_predictions

View file

@ -53,7 +53,7 @@ class OnboarderBase:
)
else:
self.data = read_from_s3(bucket_name=self.bucket_name, s3_file_name=self.input_file_name)
def write(self):
if self.data is None:
raise ValueError("No data to write. Please run transform() before writing.")

View file

@ -196,6 +196,10 @@ class KwhData:
if save and self.bucket is None:
raise Exception("bucket not set, cannot save data")
if data.empty:
# If we have no data
return data
# TODO: New is a temporary parameter, which will transform the epc descriptions to their transformed features
# in anticipation of the new model

View file

@ -1090,6 +1090,7 @@ class Recommendations:
ashp_cop = ashp_cop if ashp_cop else assumptions.AVERAGE_ASHP_EFFICIENCY
#
kwh_impact_table = kwh_simulation_predictions["heating_kwh_predictions"][
kwh_simulation_predictions["heating_kwh_predictions"]["property_id"] == str(property_instance.id)
].merge(

View file

@ -331,18 +331,18 @@ class RoofRecommendations:
"""
# Can a non-primary part satisfy loft insulation?
primary_needs_loft = component_needs[1]["needs_loft_insulation"]
primary_needs_loft = component_needs[0]["needs_loft_insulation"]
secondary_needs_loft = any(
p['needs_loft_insulation'] for idx, p in component_needs.items() if idx != 1
p['needs_loft_insulation'] for idx, p in component_needs.items() if idx != 0
)
if primary_needs_loft and not secondary_needs_loft:
# Only option is loft
return "loft"
primary_needs_sloping = component_needs[1]["needs_sloping_ceiling"]
primary_needs_sloping = component_needs[0]["needs_sloping_ceiling"]
secondary_needs_sloping = any(
p['needs_sloping_ceiling'] for idx, p in component_needs.items() if idx != 1
p['needs_sloping_ceiling'] for idx, p in component_needs.items() if idx != 0
)
if primary_needs_sloping and not secondary_needs_sloping:
@ -418,11 +418,13 @@ class RoofRecommendations:
return needs_sloping, not needs_loft # Indicates that the property needs sloping ceiling as we only run
# this in that case
roof_components = [x for x in find_my_epc_components if x["component_name"] == "Roof"]
extracted_roof_descriptions = {
idx: {
"description": component["description"],
**RoofAttributes(component["description"]).process()
} for idx, component in enumerate(find_my_epc_components) if component["component_name"] == "Roof"
} for idx, component in enumerate(roof_components)
}
component_needs = {}

View file

@ -264,6 +264,7 @@ def save_excel_to_s3(df, bucket_name, file_key):
def read_csv_from_s3(bucket_name, filepath):
logger.info(f"Reading CSV file from S3 bucket '{bucket_name}' with key '{filepath}'")
s3 = boto3.client('s3')
# Get the object from s3