setting up ingtegration test - handled case of floor area being 0

This commit is contained in:
Khalim Conn-Kowlessar 2025-08-28 22:09:39 +01:00
parent e391b6c7c6
commit 95226a73ff
6 changed files with 281 additions and 50 deletions

View file

@ -1376,6 +1376,12 @@ class AssetList:
# 3) We don't remove anything that haas access issues yet
if self.non_intrusives_present:
if self.new_format_non_insturives_present_v2:
non_intrusives_wall_filter = (
(self.standardised_asset_list['non-intrusives: Construction'] == "CAVITY") &
self.standardised_asset_list['non-intrusives: Insulated'].isin(["EMPTY", "PARTIAL", "EMPTY CAVITY"])
)
else:
non_intrusives_wall_filter = (
(self.standardised_asset_list['non-intrusives: Construction'] == "CAVITY") &
self.standardised_asset_list['non-intrusives: Insulated'].isin(["EMPTY", "PARTIAL"])

View file

@ -59,25 +59,25 @@ def app():
Property UPRN
"""
# Colchester
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/Aug2025 202 inspections"
data_filename = "Colchester Borough Homes - Inspections - Additional 202 Addresses JW 280725 copy.xlsx"
sheet_name = "Extra 202 Colchester Addresses"
postcode_column = 'domna_postcode'
address1_column = "domna_address_1"
# Lambeth
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lambeth"
data_filename = "LAMBETH Asset List ( Incomplete).xlsx"
sheet_name = "Green properties"
postcode_column = 'SX3 Postcode'
address1_column = "SX3 Short Address"
address1_method = None
fulladdress_column = "domna_full_address"
address_cols_to_concat = []
fulladdress_column = None
address_cols_to_concat = ["SX3 Short Address"]
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "landlord_property_type"
landlord_built_form = "landlord_built_form"
landlord_property_type = "Property Type"
landlord_built_form = None
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "landlord_property_id"
landlord_property_id = "row_id"
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None
@ -91,7 +91,41 @@ def app():
phase = False
ecosurv_landlords = None
asset_list_header = 0
landlord_block_reference = "landlord_block_reference"
landlord_block_reference = None
# # Colchester
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/Aug2025 202 inspections"
# data_filename = "Colchester Borough Homes - Inspections - Additional 202 Addresses JW 280725 copy.xlsx"
# sheet_name = "Extra 202 Colchester Addresses"
# postcode_column = 'domna_postcode'
# address1_column = "domna_address_1"
# address1_method = None
# fulladdress_column = "domna_full_address"
# address_cols_to_concat = []
# missing_postcodes_method = None
# landlord_year_built = None
# landlord_os_uprn = None
# landlord_property_type = "landlord_property_type"
# landlord_built_form = "landlord_built_form"
# landlord_wall_construction = None
# landlord_roof_construction = None
# landlord_heating_system = None
# landlord_existing_pv = None
# landlord_property_id = "landlord_property_id"
# landlord_sap = None
# outcomes_filename = None
# outcomes_sheetname = None
# outcomes_postcode = None
# outcomes_houseno = None
# outcomes_id = None
# outcomes_address = None
# master_filepaths = []
# master_id_colnames = []
# master_to_asset_list_filepath = None
# phase = False
# ecosurv_landlords = None
# asset_list_header = 0
# landlord_block_reference = "landlord_block_reference"
# # Abri
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Abri/Post Inspections"
@ -128,38 +162,38 @@ def app():
# landlord_block_reference = None
# Freebridge
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Freebridge"
# data_filename = "Domna - FCH property data May 25 copy.xlsx"
# sheet_name = "EPC Data"
# postcode_column = 'Post Code'
# address1_column = "Address 1"
# address1_method = None
# fulladdress_column = None
# address_cols_to_concat = ["Address 1", "Address 4"]
# missing_postcodes_method = None
# landlord_year_built = "Build Date"
# landlord_os_uprn = None
# landlord_property_type = "Property Type"
# landlord_built_form = None
# landlord_wall_construction = "Walls Description"
# landlord_heating_system = "Heating Type"
# landlord_existing_pv = None
# landlord_property_id = "Place Ref"
# landlord_roof_construction = "Roof Description"
# landlord_sap = "Current SAP"
# outcomes_filename = []
# outcomes_sheetname = []
# outcomes_postcode = []
# outcomes_houseno = []
# outcomes_address = []
# outcomes_id = []
# master_filepaths = []
# master_to_asset_list_filepath = None
# asset_list_header = 0
# landlord_block_reference = None
# master_id_colnames = []
# phase = True # Inspections not complete, produce a partial view
# ecosurv_landlords = None
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Freebridge/Aug2025 programme"
data_filename = "Domna - FCH property data May 25 copy.xlsx"
sheet_name = "EPC Data"
postcode_column = 'Post Code'
address1_column = "Address 1"
address1_method = None
fulladdress_column = None
address_cols_to_concat = ["Address 1", "Address 4"]
missing_postcodes_method = None
landlord_year_built = "Build Date"
landlord_os_uprn = None
landlord_property_type = "Property Type"
landlord_built_form = None
landlord_wall_construction = "Walls Description"
landlord_heating_system = "Heating Type"
landlord_existing_pv = None
landlord_property_id = "Place Ref"
landlord_roof_construction = "Roof Description"
landlord_sap = "Current SAP"
outcomes_filename = []
outcomes_sheetname = []
outcomes_postcode = []
outcomes_houseno = []
outcomes_address = []
outcomes_id = []
master_filepaths = []
master_to_asset_list_filepath = None
asset_list_header = 0
landlord_block_reference = None
master_id_colnames = []
phase = False # Inspections not complete, produce a partial view
ecosurv_landlords = None
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Broadlands"
# data_filename = "Broadlands Asset List.xlsx"

View file

@ -341,5 +341,7 @@ PROPERTY_MAPPING = {
'house': 'house',
'block of flats': 'block of flats',
'bungalow': 'bungalow',
'flat': 'flat'
'flat': 'flat',
'FLA': 'flat',
'HOU': 'house'
}

View file

@ -0,0 +1,178 @@
import ast
import json
from copy import deepcopy
from datetime import datetime
import random
from tqdm import tqdm
import pandas as pd
import numpy as np
from etl.epc.Record import EPCRecord
from backend.SearchEpc import SearchEpc
from sqlalchemy.exc import IntegrityError, OperationalError
from sqlalchemy.orm import sessionmaker
from starlette.responses import Response
from backend.app.config import get_settings, get_prediction_buckets
from backend.app.db.connection import db_engine
from backend.app.db.functions.materials_functions import get_materials
from backend.app.db.functions.portfolio_functions import aggregate_portfolio_recommendations
from backend.app.db.functions.property_functions import (
create_property, create_property_details_epc, create_property_targets, update_property_data,
update_or_create_property_spatial_details
)
from backend.app.db.functions.recommendations_functions import (
create_plan, upload_recommendations, create_scenario
)
from backend.app.db.functions.funding_functions import upload_funding
from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn
from backend.app.db.models.portfolio import rating_lookup
from backend.app.plan.schemas import PlanTriggerRequest, WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES
from backend.app.plan.utils import get_cleaned
from backend.app.utils import sap_to_epc
import backend.app.assumptions as assumptions
from backend.ml_models.api import ModelApi
from backend.Property import Property
from backend.apis.GoogleSolarApi import GoogleSolarApi
from recommendations.optimiser.CostOptimiser import CostOptimiser
from recommendations.optimiser.GainOptimiser import GainOptimiser
import recommendations.optimiser.optimiser_functions as optimiser_functions
from recommendations.Recommendations import Recommendations
from utils.logger import setup_logger
from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3, read_excel_from_s3
from backend.ml_models.Valuation import PropertyValuation
from etl.bill_savings.KwhData import KwhData
from etl.spatial.OpenUprnClient import OpenUprnClient
from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
from backend.Funding import Funding
from recommendations.optimiser.funding_optimiser import optimise_with_funding_paths
from recommendations.recommendation_utils import convert_thickness_to_numeric, get_wall_u_value
# Input data (temp)
import pickle
import pandas as pd
with open("local_data_for_deletion.pkl", 'rb') as f:
local_data = pickle.load(f)
cleaning_data = local_data["cleaning_data"]
materials = local_data["materials"]
cleaned = local_data["cleaned"]
project_scores_matrix = local_data["project_scores_matrix"]
partial_project_scores_matrix = local_data["partial_project_scores_matrix"]
whlg_eligible_postcodes = local_data["whlg_eligible_postcodes"]
with open("kwh_client_for_deletion.pkl", "rb") as f:
kwh_client = pickle.load(f)
epc_data = pd.read_csv(
"/Users/khalimconn-kowlessar/Downloads/all-domestic-certificates/domestic-E06000002-Middlesbrough/certificates.csv",
low_memory=False
)
sample_epc_data = epc_data.drop_duplicates("UPRN").sample(1000).reset_index(drop=True)
# Load the input properties
input_properties = []
for row_id, config in tqdm(sample_epc_data.iterrows(), total=len(sample_epc_data)):
epc = {
k.lower().replace("_", "-"): v if not pd.isnull(v) else None for k, v in config.items()
}
# Avoid the data load inside of EPCRecord - something we should pull out
for x in ["number-habitable-rooms", "floor-height", "number-heated-rooms"]:
if pd.isnull(epc[x]):
if x == "floor-height":
epc[x] = 2.4
if x == "number-habitable-rooms":
epc[x] = 3
if x == "number-heated-rooms":
epc[x] = 3
epc_records = {'original_epc': epc, 'full_sap_epc': {}, 'old_data': []}
prepared_epc = EPCRecord(
epc_records=epc_records,
run_mode="newdata",
cleaning_data=cleaning_data,
)
input_properties.append(
Property(
id=row_id,
is_new=True,
address=epc["address"],
postcode=epc["postcode"],
epc_record=prepared_epc,
already_installed={},
property_valuation={},
non_invasive_recommendations=[],
energy_assessment=None,
**Property.extract_kwargs(config), # TODO: Depraecate this
)
)
# For each property, insert the default solar configuration
for p in tqdm(input_properties):
solar_api = GoogleSolarApi(
api_key=None, solar_materials=[m for m in materials if m["type"] == "solar_pv"], max_retries=5
)
panel_performance = solar_api.default_panel_performance(property_instance=p)
p.set_solar_panel_configuration(
solar_panel_configuration={
"insights_data": None, "panel_performance": panel_performance, "unit_share_of_energy": 1
},
)
# We mock kwh preds
mocked_kwh_predictions = {"heating_kwh_predictions": [], "hotwater_kwh_predictions": []}
for p in tqdm(input_properties):
mocked_kwh_predictions["heating_kwh_predictions"].append({
"id": p.uprn, "predictions": random.sample(range(100, 3000), 1)[0]
})
mocked_kwh_predictions["hotwater_kwh_predictions"].append({
"id": p.uprn, "predictions": random.sample(range(100, 3000), 1)[0]
})
mocked_kwh_predictions["heating_kwh_predictions"] = pd.DataFrame(mocked_kwh_predictions["heating_kwh_predictions"])
mocked_kwh_predictions["hotwater_kwh_predictions"] = pd.DataFrame(mocked_kwh_predictions["hotwater_kwh_predictions"])
[
p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=mocked_kwh_predictions) for p in
input_properties
]
for p in input_properties:
# TEMP
p.DATA_ANOMALY_MATCHES = DATA_ANOMALY_MATCHES
p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=mocked_kwh_predictions)
# Run the recommendations
recommendations = {}
recommendations_scoring_data = []
representative_recommendations = {}
for p in tqdm(input_properties):
recommender = Recommendations(
property_instance=p,
materials=materials,
exclusions=[],
inclusions=[],
default_u_values=True
)
property_recommendations, property_representative_recommendations = recommender.recommend()
if not property_recommendations:
continue
recommendations[p.id] = property_recommendations
representative_recommendations[p.id] = property_representative_recommendations
p.create_base_difference_epc_record(cleaned_lookup=cleaned)
p.adjust_difference_record_with_recommendations(
property_recommendations, property_representative_recommendations
)
recommendations_scoring_data.extend(p.recommendations_scoring_data)

View file

@ -593,6 +593,15 @@ class EPCRecord:
self.prepared_epc["total-floor-area"]
)
# We handle the edge case of floor area being 0. We set it to zero and it is cleaned by
# _clean_with_data_processor
if self.prepared_epc['total-floor-area'] == 0:
print(
"Edge case of floor area being zero - will set to none and will be cleaned in "
"_clean_with_data_processor"
)
self.prepared_epc['total-floor-area'] = None
def _clean_mains_gas(self):
"""
This method will clean the mains gas, if empty or invalid
@ -668,7 +677,7 @@ class EPCRecord:
for attribute in fields:
value = self.prepared_epc[attribute]
if value in DATA_ANOMALY_MATCHES:
if value in DATA_ANOMALY_MATCHES or pd.isnull(value):
if attribute in null_attributes:
value = None
else:

View file

@ -48,6 +48,8 @@ DATA_ANOMALY_MATCHES = {
None,
# An older value which rarely shows up but has been seen in the data.
"UNKNOWN",
#
"Unknown"
}
DATA_ANOMALY_SUBSTRINGS = {