mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
setting up ingtegration test - handled case of floor area being 0
This commit is contained in:
parent
e391b6c7c6
commit
95226a73ff
6 changed files with 281 additions and 50 deletions
|
|
@ -1376,10 +1376,16 @@ class AssetList:
|
|||
# 3) We don't remove anything that haas access issues yet
|
||||
|
||||
if self.non_intrusives_present:
|
||||
non_intrusives_wall_filter = (
|
||||
(self.standardised_asset_list['non-intrusives: Construction'] == "CAVITY") &
|
||||
self.standardised_asset_list['non-intrusives: Insulated'].isin(["EMPTY", "PARTIAL"])
|
||||
)
|
||||
if self.new_format_non_insturives_present_v2:
|
||||
non_intrusives_wall_filter = (
|
||||
(self.standardised_asset_list['non-intrusives: Construction'] == "CAVITY") &
|
||||
self.standardised_asset_list['non-intrusives: Insulated'].isin(["EMPTY", "PARTIAL", "EMPTY CAVITY"])
|
||||
)
|
||||
else:
|
||||
non_intrusives_wall_filter = (
|
||||
(self.standardised_asset_list['non-intrusives: Construction'] == "CAVITY") &
|
||||
self.standardised_asset_list['non-intrusives: Insulated'].isin(["EMPTY", "PARTIAL"])
|
||||
)
|
||||
elif self.old_format_non_intrusives_present:
|
||||
non_intrusives_wall_filter = (
|
||||
self.standardised_asset_list['non-intrusives: WFT Findings'].str.lower().str.strip().isin(
|
||||
|
|
|
|||
|
|
@ -59,25 +59,25 @@ def app():
|
|||
Property UPRN
|
||||
"""
|
||||
|
||||
# Colchester
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/Aug2025 202 inspections"
|
||||
data_filename = "Colchester Borough Homes - Inspections - Additional 202 Addresses JW 280725 copy.xlsx"
|
||||
sheet_name = "Extra 202 Colchester Addresses"
|
||||
postcode_column = 'domna_postcode'
|
||||
address1_column = "domna_address_1"
|
||||
# Lambeth
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lambeth"
|
||||
data_filename = "LAMBETH Asset List ( Incomplete).xlsx"
|
||||
sheet_name = "Green properties"
|
||||
postcode_column = 'SX3 Postcode'
|
||||
address1_column = "SX3 Short Address"
|
||||
address1_method = None
|
||||
fulladdress_column = "domna_full_address"
|
||||
address_cols_to_concat = []
|
||||
fulladdress_column = None
|
||||
address_cols_to_concat = ["SX3 Short Address"]
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = None
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "landlord_property_type"
|
||||
landlord_built_form = "landlord_built_form"
|
||||
landlord_property_type = "Property Type"
|
||||
landlord_built_form = None
|
||||
landlord_wall_construction = None
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "landlord_property_id"
|
||||
landlord_property_id = "row_id"
|
||||
landlord_sap = None
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
|
|
@ -91,7 +91,41 @@ def app():
|
|||
phase = False
|
||||
ecosurv_landlords = None
|
||||
asset_list_header = 0
|
||||
landlord_block_reference = "landlord_block_reference"
|
||||
landlord_block_reference = None
|
||||
|
||||
# # Colchester
|
||||
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/Aug2025 202 inspections"
|
||||
# data_filename = "Colchester Borough Homes - Inspections - Additional 202 Addresses JW 280725 copy.xlsx"
|
||||
# sheet_name = "Extra 202 Colchester Addresses"
|
||||
# postcode_column = 'domna_postcode'
|
||||
# address1_column = "domna_address_1"
|
||||
# address1_method = None
|
||||
# fulladdress_column = "domna_full_address"
|
||||
# address_cols_to_concat = []
|
||||
# missing_postcodes_method = None
|
||||
# landlord_year_built = None
|
||||
# landlord_os_uprn = None
|
||||
# landlord_property_type = "landlord_property_type"
|
||||
# landlord_built_form = "landlord_built_form"
|
||||
# landlord_wall_construction = None
|
||||
# landlord_roof_construction = None
|
||||
# landlord_heating_system = None
|
||||
# landlord_existing_pv = None
|
||||
# landlord_property_id = "landlord_property_id"
|
||||
# landlord_sap = None
|
||||
# outcomes_filename = None
|
||||
# outcomes_sheetname = None
|
||||
# outcomes_postcode = None
|
||||
# outcomes_houseno = None
|
||||
# outcomes_id = None
|
||||
# outcomes_address = None
|
||||
# master_filepaths = []
|
||||
# master_id_colnames = []
|
||||
# master_to_asset_list_filepath = None
|
||||
# phase = False
|
||||
# ecosurv_landlords = None
|
||||
# asset_list_header = 0
|
||||
# landlord_block_reference = "landlord_block_reference"
|
||||
|
||||
# # Abri
|
||||
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Abri/Post Inspections"
|
||||
|
|
@ -128,38 +162,38 @@ def app():
|
|||
# landlord_block_reference = None
|
||||
|
||||
# Freebridge
|
||||
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Freebridge"
|
||||
# data_filename = "Domna - FCH property data May 25 copy.xlsx"
|
||||
# sheet_name = "EPC Data"
|
||||
# postcode_column = 'Post Code'
|
||||
# address1_column = "Address 1"
|
||||
# address1_method = None
|
||||
# fulladdress_column = None
|
||||
# address_cols_to_concat = ["Address 1", "Address 4"]
|
||||
# missing_postcodes_method = None
|
||||
# landlord_year_built = "Build Date"
|
||||
# landlord_os_uprn = None
|
||||
# landlord_property_type = "Property Type"
|
||||
# landlord_built_form = None
|
||||
# landlord_wall_construction = "Walls Description"
|
||||
# landlord_heating_system = "Heating Type"
|
||||
# landlord_existing_pv = None
|
||||
# landlord_property_id = "Place Ref"
|
||||
# landlord_roof_construction = "Roof Description"
|
||||
# landlord_sap = "Current SAP"
|
||||
# outcomes_filename = []
|
||||
# outcomes_sheetname = []
|
||||
# outcomes_postcode = []
|
||||
# outcomes_houseno = []
|
||||
# outcomes_address = []
|
||||
# outcomes_id = []
|
||||
# master_filepaths = []
|
||||
# master_to_asset_list_filepath = None
|
||||
# asset_list_header = 0
|
||||
# landlord_block_reference = None
|
||||
# master_id_colnames = []
|
||||
# phase = True # Inspections not complete, produce a partial view
|
||||
# ecosurv_landlords = None
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Freebridge/Aug2025 programme"
|
||||
data_filename = "Domna - FCH property data May 25 copy.xlsx"
|
||||
sheet_name = "EPC Data"
|
||||
postcode_column = 'Post Code'
|
||||
address1_column = "Address 1"
|
||||
address1_method = None
|
||||
fulladdress_column = None
|
||||
address_cols_to_concat = ["Address 1", "Address 4"]
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = "Build Date"
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Property Type"
|
||||
landlord_built_form = None
|
||||
landlord_wall_construction = "Walls Description"
|
||||
landlord_heating_system = "Heating Type"
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "Place Ref"
|
||||
landlord_roof_construction = "Roof Description"
|
||||
landlord_sap = "Current SAP"
|
||||
outcomes_filename = []
|
||||
outcomes_sheetname = []
|
||||
outcomes_postcode = []
|
||||
outcomes_houseno = []
|
||||
outcomes_address = []
|
||||
outcomes_id = []
|
||||
master_filepaths = []
|
||||
master_to_asset_list_filepath = None
|
||||
asset_list_header = 0
|
||||
landlord_block_reference = None
|
||||
master_id_colnames = []
|
||||
phase = False # Inspections not complete, produce a partial view
|
||||
ecosurv_landlords = None
|
||||
|
||||
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Broadlands"
|
||||
# data_filename = "Broadlands Asset List.xlsx"
|
||||
|
|
|
|||
|
|
@ -341,5 +341,7 @@ PROPERTY_MAPPING = {
|
|||
'house': 'house',
|
||||
'block of flats': 'block of flats',
|
||||
'bungalow': 'bungalow',
|
||||
'flat': 'flat'
|
||||
'flat': 'flat',
|
||||
'FLA': 'flat',
|
||||
'HOU': 'house'
|
||||
}
|
||||
|
|
|
|||
178
backend/tests/test_integration.py
Normal file
178
backend/tests/test_integration.py
Normal file
|
|
@ -0,0 +1,178 @@
|
|||
import ast
|
||||
import json
|
||||
from copy import deepcopy
|
||||
from datetime import datetime
|
||||
|
||||
import random
|
||||
from tqdm import tqdm
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from etl.epc.Record import EPCRecord
|
||||
from backend.SearchEpc import SearchEpc
|
||||
from sqlalchemy.exc import IntegrityError, OperationalError
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from starlette.responses import Response
|
||||
|
||||
from backend.app.config import get_settings, get_prediction_buckets
|
||||
from backend.app.db.connection import db_engine
|
||||
from backend.app.db.functions.materials_functions import get_materials
|
||||
from backend.app.db.functions.portfolio_functions import aggregate_portfolio_recommendations
|
||||
from backend.app.db.functions.property_functions import (
|
||||
create_property, create_property_details_epc, create_property_targets, update_property_data,
|
||||
update_or_create_property_spatial_details
|
||||
)
|
||||
from backend.app.db.functions.recommendations_functions import (
|
||||
create_plan, upload_recommendations, create_scenario
|
||||
)
|
||||
from backend.app.db.functions.funding_functions import upload_funding
|
||||
from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn
|
||||
from backend.app.db.models.portfolio import rating_lookup
|
||||
from backend.app.plan.schemas import PlanTriggerRequest, WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES
|
||||
from backend.app.plan.utils import get_cleaned
|
||||
from backend.app.utils import sap_to_epc
|
||||
import backend.app.assumptions as assumptions
|
||||
|
||||
from backend.ml_models.api import ModelApi
|
||||
from backend.Property import Property
|
||||
from backend.apis.GoogleSolarApi import GoogleSolarApi
|
||||
|
||||
from recommendations.optimiser.CostOptimiser import CostOptimiser
|
||||
from recommendations.optimiser.GainOptimiser import GainOptimiser
|
||||
import recommendations.optimiser.optimiser_functions as optimiser_functions
|
||||
from recommendations.Recommendations import Recommendations
|
||||
from utils.logger import setup_logger
|
||||
from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3, read_excel_from_s3
|
||||
from backend.ml_models.Valuation import PropertyValuation
|
||||
|
||||
from etl.bill_savings.KwhData import KwhData
|
||||
from etl.spatial.OpenUprnClient import OpenUprnClient
|
||||
from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
|
||||
|
||||
from backend.Funding import Funding
|
||||
from recommendations.optimiser.funding_optimiser import optimise_with_funding_paths
|
||||
from recommendations.recommendation_utils import convert_thickness_to_numeric, get_wall_u_value
|
||||
|
||||
# Input data (temp)
|
||||
import pickle
|
||||
|
||||
import pandas as pd
|
||||
|
||||
with open("local_data_for_deletion.pkl", 'rb') as f:
|
||||
local_data = pickle.load(f)
|
||||
|
||||
cleaning_data = local_data["cleaning_data"]
|
||||
materials = local_data["materials"]
|
||||
cleaned = local_data["cleaned"]
|
||||
project_scores_matrix = local_data["project_scores_matrix"]
|
||||
partial_project_scores_matrix = local_data["partial_project_scores_matrix"]
|
||||
whlg_eligible_postcodes = local_data["whlg_eligible_postcodes"]
|
||||
|
||||
with open("kwh_client_for_deletion.pkl", "rb") as f:
|
||||
kwh_client = pickle.load(f)
|
||||
|
||||
epc_data = pd.read_csv(
|
||||
"/Users/khalimconn-kowlessar/Downloads/all-domestic-certificates/domestic-E06000002-Middlesbrough/certificates.csv",
|
||||
low_memory=False
|
||||
)
|
||||
|
||||
sample_epc_data = epc_data.drop_duplicates("UPRN").sample(1000).reset_index(drop=True)
|
||||
|
||||
# Load the input properties
|
||||
input_properties = []
|
||||
for row_id, config in tqdm(sample_epc_data.iterrows(), total=len(sample_epc_data)):
|
||||
epc = {
|
||||
k.lower().replace("_", "-"): v if not pd.isnull(v) else None for k, v in config.items()
|
||||
}
|
||||
# Avoid the data load inside of EPCRecord - something we should pull out
|
||||
for x in ["number-habitable-rooms", "floor-height", "number-heated-rooms"]:
|
||||
if pd.isnull(epc[x]):
|
||||
if x == "floor-height":
|
||||
epc[x] = 2.4
|
||||
if x == "number-habitable-rooms":
|
||||
epc[x] = 3
|
||||
if x == "number-heated-rooms":
|
||||
epc[x] = 3
|
||||
|
||||
epc_records = {'original_epc': epc, 'full_sap_epc': {}, 'old_data': []}
|
||||
|
||||
prepared_epc = EPCRecord(
|
||||
epc_records=epc_records,
|
||||
run_mode="newdata",
|
||||
cleaning_data=cleaning_data,
|
||||
)
|
||||
|
||||
input_properties.append(
|
||||
Property(
|
||||
id=row_id,
|
||||
is_new=True,
|
||||
address=epc["address"],
|
||||
postcode=epc["postcode"],
|
||||
epc_record=prepared_epc,
|
||||
already_installed={},
|
||||
property_valuation={},
|
||||
non_invasive_recommendations=[],
|
||||
energy_assessment=None,
|
||||
**Property.extract_kwargs(config), # TODO: Depraecate this
|
||||
)
|
||||
)
|
||||
|
||||
# For each property, insert the default solar configuration
|
||||
for p in tqdm(input_properties):
|
||||
solar_api = GoogleSolarApi(
|
||||
api_key=None, solar_materials=[m for m in materials if m["type"] == "solar_pv"], max_retries=5
|
||||
)
|
||||
panel_performance = solar_api.default_panel_performance(property_instance=p)
|
||||
p.set_solar_panel_configuration(
|
||||
solar_panel_configuration={
|
||||
"insights_data": None, "panel_performance": panel_performance, "unit_share_of_energy": 1
|
||||
},
|
||||
)
|
||||
|
||||
# We mock kwh preds
|
||||
mocked_kwh_predictions = {"heating_kwh_predictions": [], "hotwater_kwh_predictions": []}
|
||||
for p in tqdm(input_properties):
|
||||
mocked_kwh_predictions["heating_kwh_predictions"].append({
|
||||
"id": p.uprn, "predictions": random.sample(range(100, 3000), 1)[0]
|
||||
})
|
||||
mocked_kwh_predictions["hotwater_kwh_predictions"].append({
|
||||
"id": p.uprn, "predictions": random.sample(range(100, 3000), 1)[0]
|
||||
})
|
||||
mocked_kwh_predictions["heating_kwh_predictions"] = pd.DataFrame(mocked_kwh_predictions["heating_kwh_predictions"])
|
||||
mocked_kwh_predictions["hotwater_kwh_predictions"] = pd.DataFrame(mocked_kwh_predictions["hotwater_kwh_predictions"])
|
||||
|
||||
[
|
||||
p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=mocked_kwh_predictions) for p in
|
||||
input_properties
|
||||
]
|
||||
|
||||
for p in input_properties:
|
||||
# TEMP
|
||||
p.DATA_ANOMALY_MATCHES = DATA_ANOMALY_MATCHES
|
||||
p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=mocked_kwh_predictions)
|
||||
|
||||
# Run the recommendations
|
||||
recommendations = {}
|
||||
recommendations_scoring_data = []
|
||||
representative_recommendations = {}
|
||||
for p in tqdm(input_properties):
|
||||
recommender = Recommendations(
|
||||
property_instance=p,
|
||||
materials=materials,
|
||||
exclusions=[],
|
||||
inclusions=[],
|
||||
default_u_values=True
|
||||
)
|
||||
property_recommendations, property_representative_recommendations = recommender.recommend()
|
||||
|
||||
if not property_recommendations:
|
||||
continue
|
||||
|
||||
recommendations[p.id] = property_recommendations
|
||||
representative_recommendations[p.id] = property_representative_recommendations
|
||||
|
||||
p.create_base_difference_epc_record(cleaned_lookup=cleaned)
|
||||
p.adjust_difference_record_with_recommendations(
|
||||
property_recommendations, property_representative_recommendations
|
||||
)
|
||||
|
||||
recommendations_scoring_data.extend(p.recommendations_scoring_data)
|
||||
|
|
@ -593,6 +593,15 @@ class EPCRecord:
|
|||
self.prepared_epc["total-floor-area"]
|
||||
)
|
||||
|
||||
# We handle the edge case of floor area being 0. We set it to zero and it is cleaned by
|
||||
# _clean_with_data_processor
|
||||
if self.prepared_epc['total-floor-area'] == 0:
|
||||
print(
|
||||
"Edge case of floor area being zero - will set to none and will be cleaned in "
|
||||
"_clean_with_data_processor"
|
||||
)
|
||||
self.prepared_epc['total-floor-area'] = None
|
||||
|
||||
def _clean_mains_gas(self):
|
||||
"""
|
||||
This method will clean the mains gas, if empty or invalid
|
||||
|
|
@ -668,7 +677,7 @@ class EPCRecord:
|
|||
|
||||
for attribute in fields:
|
||||
value = self.prepared_epc[attribute]
|
||||
if value in DATA_ANOMALY_MATCHES:
|
||||
if value in DATA_ANOMALY_MATCHES or pd.isnull(value):
|
||||
if attribute in null_attributes:
|
||||
value = None
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -48,6 +48,8 @@ DATA_ANOMALY_MATCHES = {
|
|||
None,
|
||||
# An older value which rarely shows up but has been seen in the data.
|
||||
"UNKNOWN",
|
||||
#
|
||||
"Unknown"
|
||||
}
|
||||
|
||||
DATA_ANOMALY_SUBSTRINGS = {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue