diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index 3c5627fc..9569afe8 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -1376,10 +1376,16 @@ class AssetList: # 3) We don't remove anything that haas access issues yet if self.non_intrusives_present: - non_intrusives_wall_filter = ( - (self.standardised_asset_list['non-intrusives: Construction'] == "CAVITY") & - self.standardised_asset_list['non-intrusives: Insulated'].isin(["EMPTY", "PARTIAL"]) - ) + if self.new_format_non_insturives_present_v2: + non_intrusives_wall_filter = ( + (self.standardised_asset_list['non-intrusives: Construction'] == "CAVITY") & + self.standardised_asset_list['non-intrusives: Insulated'].isin(["EMPTY", "PARTIAL", "EMPTY CAVITY"]) + ) + else: + non_intrusives_wall_filter = ( + (self.standardised_asset_list['non-intrusives: Construction'] == "CAVITY") & + self.standardised_asset_list['non-intrusives: Insulated'].isin(["EMPTY", "PARTIAL"]) + ) elif self.old_format_non_intrusives_present: non_intrusives_wall_filter = ( self.standardised_asset_list['non-intrusives: WFT Findings'].str.lower().str.strip().isin( diff --git a/asset_list/app.py b/asset_list/app.py index cf64a02d..01c31f0f 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -59,25 +59,25 @@ def app(): Property UPRN """ - # Colchester - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/Aug2025 202 inspections" - data_filename = "Colchester Borough Homes - Inspections - Additional 202 Addresses JW 280725 copy.xlsx" - sheet_name = "Extra 202 Colchester Addresses" - postcode_column = 'domna_postcode' - address1_column = "domna_address_1" + # Lambeth + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lambeth" + data_filename = "LAMBETH Asset List ( Incomplete).xlsx" + sheet_name = "Green properties" + postcode_column = 'SX3 Postcode' + address1_column = "SX3 Short Address" address1_method = None - fulladdress_column = "domna_full_address" - address_cols_to_concat = [] + fulladdress_column = None + address_cols_to_concat = ["SX3 Short Address"] missing_postcodes_method = None landlord_year_built = None landlord_os_uprn = None - landlord_property_type = "landlord_property_type" - landlord_built_form = "landlord_built_form" + landlord_property_type = "Property Type" + landlord_built_form = None landlord_wall_construction = None landlord_roof_construction = None landlord_heating_system = None landlord_existing_pv = None - landlord_property_id = "landlord_property_id" + landlord_property_id = "row_id" landlord_sap = None outcomes_filename = None outcomes_sheetname = None @@ -91,7 +91,41 @@ def app(): phase = False ecosurv_landlords = None asset_list_header = 0 - landlord_block_reference = "landlord_block_reference" + landlord_block_reference = None + + # # Colchester + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/Aug2025 202 inspections" + # data_filename = "Colchester Borough Homes - Inspections - Additional 202 Addresses JW 280725 copy.xlsx" + # sheet_name = "Extra 202 Colchester Addresses" + # postcode_column = 'domna_postcode' + # address1_column = "domna_address_1" + # address1_method = None + # fulladdress_column = "domna_full_address" + # address_cols_to_concat = [] + # missing_postcodes_method = None + # landlord_year_built = None + # landlord_os_uprn = None + # landlord_property_type = "landlord_property_type" + # landlord_built_form = "landlord_built_form" + # landlord_wall_construction = None + # landlord_roof_construction = None + # landlord_heating_system = None + # landlord_existing_pv = None + # landlord_property_id = "landlord_property_id" + # landlord_sap = None + # outcomes_filename = None + # outcomes_sheetname = None + # outcomes_postcode = None + # outcomes_houseno = None + # outcomes_id = None + # outcomes_address = None + # master_filepaths = [] + # master_id_colnames = [] + # master_to_asset_list_filepath = None + # phase = False + # ecosurv_landlords = None + # asset_list_header = 0 + # landlord_block_reference = "landlord_block_reference" # # Abri # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Abri/Post Inspections" @@ -128,38 +162,38 @@ def app(): # landlord_block_reference = None # Freebridge - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Freebridge" - # data_filename = "Domna - FCH property data May 25 copy.xlsx" - # sheet_name = "EPC Data" - # postcode_column = 'Post Code' - # address1_column = "Address 1" - # address1_method = None - # fulladdress_column = None - # address_cols_to_concat = ["Address 1", "Address 4"] - # missing_postcodes_method = None - # landlord_year_built = "Build Date" - # landlord_os_uprn = None - # landlord_property_type = "Property Type" - # landlord_built_form = None - # landlord_wall_construction = "Walls Description" - # landlord_heating_system = "Heating Type" - # landlord_existing_pv = None - # landlord_property_id = "Place Ref" - # landlord_roof_construction = "Roof Description" - # landlord_sap = "Current SAP" - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_address = [] - # outcomes_id = [] - # master_filepaths = [] - # master_to_asset_list_filepath = None - # asset_list_header = 0 - # landlord_block_reference = None - # master_id_colnames = [] - # phase = True # Inspections not complete, produce a partial view - # ecosurv_landlords = None + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Freebridge/Aug2025 programme" + data_filename = "Domna - FCH property data May 25 copy.xlsx" + sheet_name = "EPC Data" + postcode_column = 'Post Code' + address1_column = "Address 1" + address1_method = None + fulladdress_column = None + address_cols_to_concat = ["Address 1", "Address 4"] + missing_postcodes_method = None + landlord_year_built = "Build Date" + landlord_os_uprn = None + landlord_property_type = "Property Type" + landlord_built_form = None + landlord_wall_construction = "Walls Description" + landlord_heating_system = "Heating Type" + landlord_existing_pv = None + landlord_property_id = "Place Ref" + landlord_roof_construction = "Roof Description" + landlord_sap = "Current SAP" + outcomes_filename = [] + outcomes_sheetname = [] + outcomes_postcode = [] + outcomes_houseno = [] + outcomes_address = [] + outcomes_id = [] + master_filepaths = [] + master_to_asset_list_filepath = None + asset_list_header = 0 + landlord_block_reference = None + master_id_colnames = [] + phase = False # Inspections not complete, produce a partial view + ecosurv_landlords = None # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Broadlands" # data_filename = "Broadlands Asset List.xlsx" diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py index 5c3a2b29..290e172a 100644 --- a/asset_list/mappings/property_type.py +++ b/asset_list/mappings/property_type.py @@ -341,5 +341,7 @@ PROPERTY_MAPPING = { 'house': 'house', 'block of flats': 'block of flats', 'bungalow': 'bungalow', - 'flat': 'flat' + 'flat': 'flat', + 'FLA': 'flat', + 'HOU': 'house' } diff --git a/backend/tests/test_integration.py b/backend/tests/test_integration.py new file mode 100644 index 00000000..cd41d5c0 --- /dev/null +++ b/backend/tests/test_integration.py @@ -0,0 +1,178 @@ +import ast +import json +from copy import deepcopy +from datetime import datetime + +import random +from tqdm import tqdm +import pandas as pd +import numpy as np +from etl.epc.Record import EPCRecord +from backend.SearchEpc import SearchEpc +from sqlalchemy.exc import IntegrityError, OperationalError +from sqlalchemy.orm import sessionmaker +from starlette.responses import Response + +from backend.app.config import get_settings, get_prediction_buckets +from backend.app.db.connection import db_engine +from backend.app.db.functions.materials_functions import get_materials +from backend.app.db.functions.portfolio_functions import aggregate_portfolio_recommendations +from backend.app.db.functions.property_functions import ( + create_property, create_property_details_epc, create_property_targets, update_property_data, + update_or_create_property_spatial_details +) +from backend.app.db.functions.recommendations_functions import ( + create_plan, upload_recommendations, create_scenario +) +from backend.app.db.functions.funding_functions import upload_funding +from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn +from backend.app.db.models.portfolio import rating_lookup +from backend.app.plan.schemas import PlanTriggerRequest, WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES +from backend.app.plan.utils import get_cleaned +from backend.app.utils import sap_to_epc +import backend.app.assumptions as assumptions + +from backend.ml_models.api import ModelApi +from backend.Property import Property +from backend.apis.GoogleSolarApi import GoogleSolarApi + +from recommendations.optimiser.CostOptimiser import CostOptimiser +from recommendations.optimiser.GainOptimiser import GainOptimiser +import recommendations.optimiser.optimiser_functions as optimiser_functions +from recommendations.Recommendations import Recommendations +from utils.logger import setup_logger +from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3, read_excel_from_s3 +from backend.ml_models.Valuation import PropertyValuation + +from etl.bill_savings.KwhData import KwhData +from etl.spatial.OpenUprnClient import OpenUprnClient +from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc + +from backend.Funding import Funding +from recommendations.optimiser.funding_optimiser import optimise_with_funding_paths +from recommendations.recommendation_utils import convert_thickness_to_numeric, get_wall_u_value + +# Input data (temp) +import pickle + +import pandas as pd + +with open("local_data_for_deletion.pkl", 'rb') as f: + local_data = pickle.load(f) + +cleaning_data = local_data["cleaning_data"] +materials = local_data["materials"] +cleaned = local_data["cleaned"] +project_scores_matrix = local_data["project_scores_matrix"] +partial_project_scores_matrix = local_data["partial_project_scores_matrix"] +whlg_eligible_postcodes = local_data["whlg_eligible_postcodes"] + +with open("kwh_client_for_deletion.pkl", "rb") as f: + kwh_client = pickle.load(f) + +epc_data = pd.read_csv( + "/Users/khalimconn-kowlessar/Downloads/all-domestic-certificates/domestic-E06000002-Middlesbrough/certificates.csv", + low_memory=False +) + +sample_epc_data = epc_data.drop_duplicates("UPRN").sample(1000).reset_index(drop=True) + +# Load the input properties +input_properties = [] +for row_id, config in tqdm(sample_epc_data.iterrows(), total=len(sample_epc_data)): + epc = { + k.lower().replace("_", "-"): v if not pd.isnull(v) else None for k, v in config.items() + } + # Avoid the data load inside of EPCRecord - something we should pull out + for x in ["number-habitable-rooms", "floor-height", "number-heated-rooms"]: + if pd.isnull(epc[x]): + if x == "floor-height": + epc[x] = 2.4 + if x == "number-habitable-rooms": + epc[x] = 3 + if x == "number-heated-rooms": + epc[x] = 3 + + epc_records = {'original_epc': epc, 'full_sap_epc': {}, 'old_data': []} + + prepared_epc = EPCRecord( + epc_records=epc_records, + run_mode="newdata", + cleaning_data=cleaning_data, + ) + + input_properties.append( + Property( + id=row_id, + is_new=True, + address=epc["address"], + postcode=epc["postcode"], + epc_record=prepared_epc, + already_installed={}, + property_valuation={}, + non_invasive_recommendations=[], + energy_assessment=None, + **Property.extract_kwargs(config), # TODO: Depraecate this + ) + ) + +# For each property, insert the default solar configuration +for p in tqdm(input_properties): + solar_api = GoogleSolarApi( + api_key=None, solar_materials=[m for m in materials if m["type"] == "solar_pv"], max_retries=5 + ) + panel_performance = solar_api.default_panel_performance(property_instance=p) + p.set_solar_panel_configuration( + solar_panel_configuration={ + "insights_data": None, "panel_performance": panel_performance, "unit_share_of_energy": 1 + }, + ) + +# We mock kwh preds +mocked_kwh_predictions = {"heating_kwh_predictions": [], "hotwater_kwh_predictions": []} +for p in tqdm(input_properties): + mocked_kwh_predictions["heating_kwh_predictions"].append({ + "id": p.uprn, "predictions": random.sample(range(100, 3000), 1)[0] + }) + mocked_kwh_predictions["hotwater_kwh_predictions"].append({ + "id": p.uprn, "predictions": random.sample(range(100, 3000), 1)[0] + }) +mocked_kwh_predictions["heating_kwh_predictions"] = pd.DataFrame(mocked_kwh_predictions["heating_kwh_predictions"]) +mocked_kwh_predictions["hotwater_kwh_predictions"] = pd.DataFrame(mocked_kwh_predictions["hotwater_kwh_predictions"]) + +[ + p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=mocked_kwh_predictions) for p in + input_properties +] + +for p in input_properties: + # TEMP + p.DATA_ANOMALY_MATCHES = DATA_ANOMALY_MATCHES + p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=mocked_kwh_predictions) + +# Run the recommendations +recommendations = {} +recommendations_scoring_data = [] +representative_recommendations = {} +for p in tqdm(input_properties): + recommender = Recommendations( + property_instance=p, + materials=materials, + exclusions=[], + inclusions=[], + default_u_values=True + ) + property_recommendations, property_representative_recommendations = recommender.recommend() + + if not property_recommendations: + continue + + recommendations[p.id] = property_recommendations + representative_recommendations[p.id] = property_representative_recommendations + + p.create_base_difference_epc_record(cleaned_lookup=cleaned) + p.adjust_difference_record_with_recommendations( + property_recommendations, property_representative_recommendations + ) + + recommendations_scoring_data.extend(p.recommendations_scoring_data) diff --git a/etl/epc/Record.py b/etl/epc/Record.py index 8e6be5d0..8c060e13 100644 --- a/etl/epc/Record.py +++ b/etl/epc/Record.py @@ -593,6 +593,15 @@ class EPCRecord: self.prepared_epc["total-floor-area"] ) + # We handle the edge case of floor area being 0. We set it to zero and it is cleaned by + # _clean_with_data_processor + if self.prepared_epc['total-floor-area'] == 0: + print( + "Edge case of floor area being zero - will set to none and will be cleaned in " + "_clean_with_data_processor" + ) + self.prepared_epc['total-floor-area'] = None + def _clean_mains_gas(self): """ This method will clean the mains gas, if empty or invalid @@ -668,7 +677,7 @@ class EPCRecord: for attribute in fields: value = self.prepared_epc[attribute] - if value in DATA_ANOMALY_MATCHES: + if value in DATA_ANOMALY_MATCHES or pd.isnull(value): if attribute in null_attributes: value = None else: diff --git a/etl/epc/settings.py b/etl/epc/settings.py index 2a9b1746..a641575c 100644 --- a/etl/epc/settings.py +++ b/etl/epc/settings.py @@ -48,6 +48,8 @@ DATA_ANOMALY_MATCHES = { None, # An older value which rarely shows up but has been seen in the data. "UNKNOWN", + # + "Unknown" } DATA_ANOMALY_SUBSTRINGS = {