Merge pull request #359 from Hestia-Homes/backend-deployment

Backend deployment - Renewed docker image and requirements file, fixed tests
This commit is contained in:
KhalimCK 2024-10-22 12:15:04 +01:00 committed by GitHub
commit b3d0b45648
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
26 changed files with 415 additions and 916 deletions

View file

@ -1,6 +1,9 @@
# Ignore all test directories
model_data/local_data/*
backend/tests/*
backend/node_modules/*
backend/.idea/*
backend/.env
recommendations/tests/*
model_data/tests/*
infrastructure/*
@ -12,3 +15,8 @@ land_registry/*
pytest.ini
*/README.md
utils/tests/*
etl/epc/tests/*
etl/epc_clean/tests/*
etl/spatial/tests/*

2
.idea/Model.iml generated
View file

@ -7,7 +7,7 @@
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Engine" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyNamespacePackagesService">

2
.idea/misc.xml generated
View file

@ -3,7 +3,7 @@
<component name="Black">
<option name="sdkName" value="Python 3.10 (backend)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Engine" project-jdk-type="Python SDK" />
<component name="PyCharmProfessionalAdvertiser">
<option name="shown" value="true" />
</component>

View file

@ -589,14 +589,61 @@ class GoogleSolarApi:
self.double_property = True
@staticmethod
def calculate_percentage_decrease(start_efficiency, end_efficiency, consumption_averages):
"""
Calculate the percentage decrease in consumption between two energy efficiency ratings.
:param start_efficiency: The starting energy efficiency rating.
:param end_efficiency: The ending energy efficiency rating.
:param consumption_averages: The DataFrame containing the consumption averages.
:return:
"""
start_consumption = consumption_averages.loc[
consumption_averages["current-energy-efficiency"].astype(str) == str(start_efficiency), "total_consumption"
].values[0]
end_consumption = consumption_averages.loc[
consumption_averages["current-energy-efficiency"].astype(str) == str(end_efficiency), "total_consumption"
].values[0]
percentage_decrease = ((start_consumption - end_consumption) / start_consumption) * 100
# percentage_decrease cannot be nehative
if percentage_decrease < 0:
percentage_decrease = 0
return percentage_decrease
@classmethod
def estimate_new_consumption(
cls, current_energy_efficiency, target_efficiency, current_consumption, ofgem_consumption_averages
):
"""
Given then consumption_averages dataset, which is produced as a result of the training_data.py script,
for the energy kwh models, this function will estimate the new consumption based on the current consumption,
based on the expected reduction in consumption from the current rating to the target rating.
:param current_energy_efficiency: The current energy efficiency rating
:param target_efficiency: The target energy efficiency rating
:param current_consumption: The current consumption of the property
:param ofgem_consumption_averages: DataFrame of the Ofgem consumption averages
:return:
"""
percentage_decrease = cls.calculate_percentage_decrease(
start_efficiency=current_energy_efficiency,
end_efficiency=target_efficiency,
consumption_averages=ofgem_consumption_averages
)
new_consumption = current_consumption * (1 - percentage_decrease / 100)
return new_consumption
@classmethod
def prepare_input_data(
cls,
input_properties: List[Property],
energy_consumption_client: EnergyConsumptionModel,
ofgem_consumption_averages: pd.DataFrame,
body: PlanTriggerRequest
):
"""
:param input_properties: List of properties
:param energy_consumption_client: EnergyConsumptionModel instance
:param ofgem_consumption_averages: DataFrame of the Ofgem consumption averages
:param body: PlanTriggerRequest instance
This sets up the data required to make the solar api request
:return:
@ -610,12 +657,13 @@ class GoogleSolarApi:
# Energy consumption is adjusted for the property's expected post retrofit state
# We set the target rating to EPC C, which is the typical EPC rating we would expect the
# property to achieve post retrofit of just the fabric
"energy_consumption": energy_consumption_client.estimate_new_consumption(
"energy_consumption": cls.estimate_new_consumption(
current_energy_efficiency=p.data["current-energy-efficiency"],
target_efficiency="69",
current_consumption=p.estimate_electrical_consumption(
assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions
)
),
ofgem_consumption_averages=ofgem_consumption_averages
),
"property_id": p.id,
"uprn": p.uprn
@ -628,12 +676,13 @@ class GoogleSolarApi:
# Energy consumption is adjusted for the property's expected post retrofit state
# We set the target rating to EPC C, which is the typical EPC rating we would expect the
# property to achieve post retrofit of just the fabric
"energy_consumption": energy_consumption_client.estimate_new_consumption(
"energy_consumption": cls.estimate_new_consumption(
current_energy_efficiency=p.data["current-energy-efficiency"],
target_efficiency="69",
current_consumption=p.estimate_electrical_consumption(
assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions
),
ofgem_consumption_averages=ofgem_consumption_averages
),
"property_id": p.id,
"uprn": p.uprn

View file

@ -1,5 +1,5 @@
from functools import lru_cache
from pydantic import BaseSettings
from pydantic_settings import BaseSettings
class Settings(BaseSettings):

View file

@ -110,19 +110,19 @@ def upload_recommendations(session: Session, recommendations_to_upload, property
"type": rec["type"],
"measure_type": rec["measure_type"],
"description": rec["description"],
"estimated_cost": rec["total"],
"estimated_cost": float(rec["total"]),
"default": rec["default"],
"starting_u_value": rec.get("starting_u_value"),
"new_u_value": rec.get("new_u_value"),
"sap_points": rec["sap_points"],
"energy_savings": rec["heat_demand"],
"kwh_savings": rec["kwh_savings"],
"co2_equivalent_savings": rec["co2_equivalent_savings"],
"total_work_hours": rec["labour_hours"],
"energy_cost_savings": rec["energy_cost_savings"],
"labour_days": rec["labour_days"],
"starting_u_value": float(rec.get("starting_u_value")) if rec.get("starting_u_value") else None,
"new_u_value": float(rec.get("new_u_value")) if rec.get("new_u_value") else None,
"sap_points": float(rec["sap_points"]),
"energy_savings": float(rec["heat_demand"]),
"kwh_savings": float(rec["kwh_savings"]),
"co2_equivalent_savings": float(rec["co2_equivalent_savings"]),
"total_work_hours": float(rec["labour_hours"]),
"energy_cost_savings": float(rec["energy_cost_savings"]),
"labour_days": float(rec["labour_days"]),
"already_installed": rec["already_installed"],
"heat_demand": rec["heat_demand"]
"heat_demand": float(rec["heat_demand"])
}
for rec in recommendations_to_upload
]

View file

@ -24,7 +24,7 @@ from backend.app.db.functions.recommendations_functions import (
from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn
from backend.app.db.models.portfolio import rating_lookup
from backend.app.dependencies import validate_token
from backend.app.plan.schemas import PlanTriggerRequest, MdsRequest
from backend.app.plan.schemas import PlanTriggerRequest
from backend.app.plan.utils import get_cleaned
from backend.app.utils import epc_to_sap_lower_bound, sap_to_epc
@ -36,15 +36,12 @@ from recommendations.optimiser.CostOptimiser import CostOptimiser
from recommendations.optimiser.GainOptimiser import GainOptimiser
from recommendations.optimiser.optimiser_functions import prepare_input_measures
from recommendations.Recommendations import Recommendations
from recommendations.Mds import Mds
from utils.logger import setup_logger
from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3
from backend.ml_models.Valuation import PropertyValuation
from etl.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel
from etl.bill_savings.KwhData import KwhData
from etl.spatial.OpenUprnClient import OpenUprnClient
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
logger = setup_logger()
@ -488,6 +485,16 @@ async def trigger_plan(body: PlanTriggerRequest):
if not input_properties:
return Response(status_code=204)
# Set up model api and warm up the lambdas
model_api = ModelApi(
portfolio_id=body.portfolio_id,
timestamp=created_at,
prediction_buckets=get_prediction_buckets()
)
await model_api.async_warm_up_lambdas(
model_prefies=model_api.KWH_MODEL_PREFIXES + model_api.MODEL_PREFIXES
)
# The materials data could be cached or local so we don't need to make
# consistent requests to the backend for
# the same data
@ -495,32 +502,14 @@ async def trigger_plan(body: PlanTriggerRequest):
materials = get_materials(session)
cleaned = get_cleaned()
dataset_version = "2024-07-08"
energy_consumption_client = EnergyConsumptionModel(
model_paths={
"heating_kwh": f"model_directory/energy_consumption_model/heating_kwh_{dataset_version}.pkl",
"hot_water_kwh": f"model_directory/energy_consumption_model/hot_water_kwh_{dataset_version}.pkl"
},
dummy_schema_path=f"model_directory/energy_consumption_model/{dataset_version}_dummy_schema.pkl",
consumption_average_path=f"energy_consumption/{dataset_version}/consumption_averages.parquet",
cleaned=cleaned,
environment=get_settings().ENVIRONMENT
)
kwh_client = KwhData(bucket=get_settings().DATA_BUCKET, read_consumption_data=True)
model_api = ModelApi(
portfolio_id=body.portfolio_id,
timestamp=created_at,
prediction_buckets=get_prediction_buckets()
)
epcs_for_scoring = kwh_client.transform(data=kwh_client.prepare_epc(input_properties), cleaned=cleaned)
kwh_preds = model_api.paginated_predictions(
kwh_preds = await model_api.async_paginated_predictions(
data=epcs_for_scoring,
bucket=get_settings().DATA_BUCKET,
model_prefixes=["heating_kwh_predictions", "hotwater_kwh_predictions"],
model_prefixes=model_api.KWH_MODEL_PREFIXES,
extract_ids=False,
batch_size=SCORING_BATCH_SIZE
)
@ -536,9 +525,15 @@ async def trigger_plan(body: PlanTriggerRequest):
# extensions, since it doesn't seem to do a great job
logger.info("Performing solar analysis")
ofgem_consumption_averages = read_dataframe_from_s3_parquet(
bucket_name=get_settings().DATA_BUCKET,
file_key=f"energy_consumption/2024-07-08/consumption_averages.parquet"
)
building_solar_config, unit_solar_config = GoogleSolarApi.prepare_input_data(
input_properties=input_properties,
energy_consumption_client=energy_consumption_client,
ofgem_consumption_averages=ofgem_consumption_averages,
body=body
)
@ -593,7 +588,7 @@ async def trigger_plan(body: PlanTriggerRequest):
"carbon_ending"]
)
all_predictions = model_api.paginated_predictions(
all_predictions = await model_api.async_paginated_predictions(
data=recommendations_scoring_data,
bucket=get_settings().DATA_BUCKET,
batch_size=SCORING_BATCH_SIZE
@ -622,10 +617,10 @@ async def trigger_plan(body: PlanTriggerRequest):
scoring_epcs = pd.DataFrame(scoring_epcs)
scoring_epcs = kwh_client.transform(data=scoring_epcs, cleaned=cleaned)
kwh_simulation_predictions = model_api.paginated_predictions(
kwh_simulation_predictions = await model_api.async_paginated_predictions(
data=scoring_epcs,
bucket=get_settings().DATA_BUCKET,
model_prefixes=["heating_kwh_predictions", "hotwater_kwh_predictions"],
model_prefixes=model_api.KWH_MODEL_PREFIXES,
batch_size=SCORING_BATCH_SIZE
)
@ -776,6 +771,7 @@ async def trigger_plan(body: PlanTriggerRequest):
update_or_create_property_spatial_details(session, p.uprn, p.spatial)
property_data = p.get_full_property_data(current_valuation=valuations["current_value"])
update_property_data(
session, property_id=p.id, portfolio_id=body.portfolio_id, property_data=property_data
)
@ -868,587 +864,3 @@ async def trigger_plan(body: PlanTriggerRequest):
session.close()
return Response(status_code=200)
@router.post("/mds")
async def build_mds(body: MdsRequest):
# TODO: This is a placeholder location for the MDS endpoint, which this is being assembled
logger.info("Connecting to db")
session = sessionmaker(bind=db_engine)()
created_at = datetime.now().isoformat()
try:
session.begin()
logger.info("Getting the inputs")
plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path)
measure_set = body.measures
optimise_measures = measure_set is not None
cleaning_data = read_dataframe_from_s3_parquet(
bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
)
input_properties = []
for property_id, config in tqdm(enumerate(plan_input), total=len(plan_input)):
# We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
uprn = config.get("uprn", None)
uprn = None if uprn == "" else uprn
if uprn:
uprn = int(float(uprn))
epc_searcher = SearchEpc(
address1=config["address"],
postcode=config["postcode"],
uprn=uprn,
auth_token=get_settings().EPC_AUTH_TOKEN,
os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY,
)
epc_searcher.ordnance_survey_client.built_form = config.get("built_form", None)
epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None)
# For the moment, our OS API access is unavailable, so we skip and interpolate
epc_searcher.find_property(skip_os=True)
if config["address"] == "35b High Street":
print("Performing temporary patch on 35b High Street")
epc_searcher.newest_epc["uprn"] = 10002911892
epc_searcher.full_sap_epc["uprn"] = 10002911892
if config["address"] == "Cobnut Barn":
print("Performing temporary patch on Cobnut Barn")
epc_searcher.newest_epc["uprn"] = 10013924689
# Create a record in db
# TODO: If we productionise the creation of this mds report, we will need to store this in the db
# property_id, is_new = create_property(
# session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
# )
# if not is_new:
# continue
#
# create_property_targets(
# session,
# property_id=property_id,
# portfolio_id=body.portfolio_id,
# epc_target=body.goal_value,
# heat_demand_target=None
# )
epc_records = {
'original_epc': epc_searcher.newest_epc.copy(),
'full_sap_epc': epc_searcher.full_sap_epc.copy(),
'old_data': epc_searcher.older_epcs.copy(),
}
# patch = next((
# x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
# ), {})
# epc_records = patch_epc(patch, epc_records)
prepared_epc = EPCRecord(
epc_records=epc_records,
run_mode="newdata",
cleaning_data=cleaning_data
)
# property_already_installed = next((
# x for x in already_installed if
# (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
# ), {})
#
# property_non_invasive_recommendations = next((
# x for x in non_invasive_recommendations if
# (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
# ), {})
if measure_set is None:
measures = config["measures"] if "measures" in config else None
else:
measures = measure_set
input_properties.append(
Property(
id=property_id,
address=epc_searcher.address_clean,
postcode=epc_searcher.postcode_clean,
epc_record=prepared_epc,
# already_installed=property_already_installed,
# non_invasive_recommendations=property_non_invasive_recommendations,
measures=measures,
is_new=is_new,
**Property.extract_kwargs(config)
)
)
logger.info("Reading in materials and cleaned datasets")
materials = get_materials(session)
cleaned = get_cleaned()
uprn_filenames = read_dataframe_from_s3_parquet(
bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
)
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket=get_settings().DATA_BUCKET)
logger.info("Getting spatial data")
for p in tqdm(input_properties):
p.get_spatial_data(uprn_filenames)
logger.info("Getting components and epc recommendations")
recommendations_scoring_data = []
representative_recommendations = {}
recommendations = {}
for p in tqdm(input_properties):
p.set_features(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
mds = Mds(property_instance=p, materials=materials, optimise_measures=optimise_measures)
mds_recommendations, property_representative_recommendations, errors = mds.build()
if isinstance(errors, list):
if errors:
raise Exception("Errors occurred during MDS build")
else:
if any([len(x) for x in errors.values()]):
raise Exception("Errors occurred during MDS build")
recommendations[p.id] = mds_recommendations
representative_recommendations[p.id] = property_representative_recommendations
# Build the scoring data
p.create_base_difference_epc_record(cleaned_lookup=cleaned)
if optimise_measures:
for _id, mds_recs in mds_recommendations.items():
representative_ids = [r["recommendation_id"] for r in property_representative_recommendations[_id]]
simulation_mds_recs = []
for recs in mds_recs:
simulation_mds_recs.append(
[r for r in recs if r["recommendation_id"] in representative_ids]
)
p.adjust_difference_record_with_recommendations(
simulation_mds_recs, property_representative_recommendations[_id]
)
data = p.recommendations_scoring_data.copy()
for d in data:
d["id"] = d["id"] + "*" + _id
recommendations_scoring_data.extend(data)
else:
recommendations_scoring_data.append(
p.simulate_all_representative_recommendations(property_representative_recommendations)
)
logger.info("Preparing data for scoring in sap change api")
recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
recommendations_scoring_data = recommendations_scoring_data.drop(
columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
"carbon_ending"]
)
model_api = ModelApi(
portfolio_id=body.portfolio_id, timestamp=created_at, prediction_buckets=get_prediction_buckets()
)
all_predictions = {
"sap_change_predictions": pd.DataFrame(),
"heat_demand_predictions": pd.DataFrame(),
"carbon_change_predictions": pd.DataFrame()
}
to_loop_over = range(0, recommendations_scoring_data.shape[0], SCORING_BATCH_SIZE)
for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
predictions_dict = model_api.predict_all(
df=recommendations_scoring_data.iloc[chunk:chunk + SCORING_BATCH_SIZE],
)
# Append the predictions to the predictions dictionary
for key, scored in predictions_dict.items():
all_predictions[key] = pd.concat([all_predictions[key], scored])
# TODO: 1) walls_insulation_thickness_ending is not being set in the recommendations_scoring_data,
# insulation_thickness_ending is being set instead
# 2)
# TODO: TEMP
for p in plan_input:
if p["uprn"]:
p["uprn"] = str(int(float(p["uprn"])))
import re
from backend.ml_models.AnnualBillSavings import AnnualBillSavings
if optimise_measures:
results = []
for p in input_properties:
sap_before = int(p.data["current-energy-efficiency"])
epc_before = p.data["current-energy-rating"]
heat_demand_before = p.data["energy-consumption-current"]
carbon_before = p.data["co2-emissions-current"]
current_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
epc_energy_consumption=heat_demand_before * p.floor_area,
current_epc_rating=epc_before,
)
current_energy_bill = AnnualBillSavings.calculate_annual_bill(current_adjusted_energy)
package_comparison = []
for _id in recommendations[p.id].keys():
sap_prediction = all_predictions["sap_change_predictions"][
(all_predictions["sap_change_predictions"]["property_id"] == str(p.id)) &
(all_predictions["sap_change_predictions"]["recommendation_id"].str.contains(re.escape(_id)))
].copy().reset_index(drop=True)
sap_prediction["row_id"] = sap_prediction.index
heat_demand_prediction = all_predictions["heat_demand_predictions"][
(all_predictions["heat_demand_predictions"]["property_id"] == str(p.id)) &
(all_predictions["heat_demand_predictions"]["recommendation_id"].str.contains(re.escape(_id)))
].copy().reset_index(drop=True)
heat_demand_prediction["row_id"] = heat_demand_prediction.index
carbon_prediction = all_predictions["carbon_change_predictions"][
(all_predictions["carbon_change_predictions"]["property_id"] == str(p.id)) &
(all_predictions["carbon_change_predictions"]["recommendation_id"].str.contains(re.escape(_id)))
].copy().reset_index(drop=True)
carbon_prediction["row_id"] = carbon_prediction.index
epc_target = body.goal_value
if epc_before == epc_target:
continue
sap_target = epc_to_sap_lower_bound(epc_target)
# Define the measures
sap_threshold_barrier = sap_prediction[sap_prediction["predictions"] >= sap_target]
meets_threshold = True
if sap_threshold_barrier.empty:
sap_threshold_barrier = sap_prediction.tail(1)
meets_threshold = False
sap_threshold_barrier = sap_threshold_barrier.head(1)
sap_prediction = sap_prediction[
sap_prediction["row_id"] <= sap_threshold_barrier["row_id"].values[0]
]
heat_demand_prediction = heat_demand_prediction[
heat_demand_prediction["row_id"] <= sap_threshold_barrier["row_id"].values[0]
]
carbon_prediction = carbon_prediction[
carbon_prediction["row_id"] <= sap_threshold_barrier["row_id"].values[0]
]
reverse_map = {v: k for k, v in Mds.format_map.items()}
selected_measures = [
reverse_map[x.split("-")[0]] for x in sap_prediction["recommendation_id"].values
]
selected_measure_ids = [x.split("*")[0] for x in sap_prediction["recommendation_id"].values]
costs = [
r["total"] for r in representative_recommendations[p.id][_id] if
r["recommendation_id"] in selected_measure_ids
]
costs = sum(costs)
sap_after = sap_prediction["predictions"].values[-1]
epc_after = sap_to_epc(sap_after)
heat_demand_after = heat_demand_prediction["predictions"].values[-1]
carbon_after = carbon_prediction["predictions"].values[-1]
expected_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
epc_energy_consumption=heat_demand_after * p.floor_area,
current_epc_rating=epc_before,
)
expected_energy_bill = AnnualBillSavings.calculate_annual_bill(expected_adjusted_energy)
bill_savings = current_energy_bill - expected_energy_bill
energy_savings = current_adjusted_energy - expected_adjusted_energy
package_comparison.append(
{
"id": _id,
"cost": costs,
"measures": selected_measures,
"sap_before": sap_before,
"sap_after": sap_after,
"epc_before": epc_before,
"epc_after": epc_after,
"heat_demand_before": heat_demand_before,
"heat_demand_after": heat_demand_after,
"carbon_before": carbon_before,
"carbon_after": carbon_after,
"bill_savings": bill_savings,
"energy_savings": energy_savings,
"current_energy_bill": current_energy_bill,
"meets_threshold": meets_threshold
}
)
package_comparison = pd.DataFrame(package_comparison)
# Find the smallest cost package
if not package_comparison.empty:
# We check if any of the packages meet the threshold
# If none of them do, take the one that gets closest to the target
if package_comparison["meets_threshold"].any():
package_comparison = package_comparison[package_comparison["meets_threshold"]]
package_comparison = package_comparison.sort_values("cost")
else:
package_comparison = package_comparison.sort_values("sap_after", ascending=False)
package_comparison = package_comparison.head(1).to_dict("records")[0]
else:
package_comparison = {
"measures": [],
"sap_before": sap_before,
"sap_after": sap_before,
"epc_before": epc_before,
"epc_after": epc_before,
"heat_demand_before": heat_demand_before,
"heat_demand_after": heat_demand_before,
"carbon_before": carbon_before,
"carbon_after": carbon_before,
"bill_savings": 0,
"energy_savings": 0,
"current_energy_bill": current_energy_bill,
"meets_threshold": False
}
config = [c for c in plan_input if c["uprn"] == str(p.uprn)]
if not config:
config = {"address": None, "postcode": None}
else:
config = config[0]
results.append({
"config_address": config["address"],
"config_postcode": config["postcode"],
"uprn": p.uprn,
"address": p.address,
"postcode": p.postcode,
"measures": package_comparison["measures"],
"year_of_epc": p.data['lodgement-date'],
"sap_before": package_comparison["sap_before"],
"sap_after": package_comparison["sap_after"],
"epc_before": package_comparison["epc_before"],
"epc_after": package_comparison["epc_after"],
"heat_demand_before": package_comparison["heat_demand_before"],
"heat_demand_after": package_comparison["heat_demand_after"],
"carbon_before": package_comparison["carbon_before"],
"carbon_after": package_comparison["carbon_after"],
"bill_savings": round(package_comparison["bill_savings"], 2),
"energy_savings": round(package_comparison["energy_savings"], 2),
"current_energy_bill": round(package_comparison["current_energy_bill"], 2),
"EWI": "EWI" if "external_wall_insulation" in package_comparison["measures"] else None,
"CWI": "CWI" if "cavity_wall_insulation" in package_comparison["measures"] else None,
"LI": "LI" if "loft_insulation" in package_comparison["measures"] else None,
"ASHP Htg": "ASHP Htg" if "air_source_heat_pump" in package_comparison["measures"] else None,
"Elec Storage": (
"Elec Storage Htrs (Out of scope -Prov sum only)" if "high_heat_retention_storage_heaters" in
package_comparison["measures"] else None
),
"Solar PV": "Solar PV" if "solar_pv" in package_comparison["measures"] else None,
})
results = pd.DataFrame(results)
# For the different measures, we check the impact with a few debugging functions
walls_check, hhr_check = check_mds(results, input_properties, recommendations, optimise_measures)
results.to_excel("optimised mds_results 5th June.xlsx")
results = []
for p in input_properties:
measures = p.measures
property_recommendations = [r['type'] for r in representative_recommendations[p.id]]
# TODO: Check high heat retention storage heaters - looks like it's excluded controls!
sap_prediction = all_predictions["sap_change_predictions"][
all_predictions["sap_change_predictions"]["property_id"] == str(p.id)
]
heat_demand_prediction = all_predictions["heat_demand_predictions"][
all_predictions["heat_demand_predictions"]["property_id"] == str(p.id)
]
carbon_prediction = all_predictions["carbon_change_predictions"][
all_predictions["carbon_change_predictions"]["property_id"] == str(p.id)
]
# Get a before and after for SAP, heat demand, CO2 and also calculate energy bill and energy savings
sap_before = int(p.data["current-energy-efficiency"])
sap_after = sap_prediction["predictions"].values[0] if measures else sap_before
epc_before = p.data["current-energy-rating"]
epc_after = sap_to_epc(sap_after) if measures else epc_before
heat_demand_before = p.data["energy-consumption-current"]
heat_demand_after = heat_demand_prediction["predictions"].values[0] if measures else heat_demand_before
carbon_before = p.data["co2-emissions-current"]
carbon_after = carbon_prediction["predictions"].values[0] if measures else carbon_before
# Estimate bill savings
from backend.ml_models.AnnualBillSavings import AnnualBillSavings
current_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
epc_energy_consumption=heat_demand_before * p.floor_area,
current_epc_rating=epc_before,
)
# TODO: This isn't quite right as this is based on EVERY possible measure, not just the ones that are
# actually implemented
expected_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
epc_energy_consumption=heat_demand_after * p.floor_area,
current_epc_rating=epc_before,
)
# TODO: We should determine if the home is gas & electricity or just electricity
# Determine if the heating and hotwater was previously electric only or both
current_energy_bill = AnnualBillSavings.calculate_annual_bill(
kwh=current_adjusted_energy,
)
expected_energy_bill = AnnualBillSavings.calculate_annual_bill(
kwh=expected_adjusted_energy,
)
bill_savings = current_energy_bill - expected_energy_bill
energy_savings = current_adjusted_energy - expected_adjusted_energy
config = [c for c in plan_input if c["uprn"] == str(p.uprn)]
if not config:
config = {"address": None, "postcode": None}
else:
config = config[0]
to_append = {
"config_address": config["address"],
"config_postcode": config["postcode"],
"uprn": p.uprn,
"address": p.address,
"postcode": p.postcode,
"measures": measures,
"property_recommendations": property_recommendations,
"year_of_epc": p.data['lodgement-date'],
"sap_before": sap_before,
"sap_after": sap_after,
"epc_before": epc_before,
"epc_after": epc_after,
"heat_demand_before": heat_demand_before,
"heat_demand_after": heat_demand_after,
"carbon_before": carbon_before,
"carbon_after": carbon_after,
"bill_savings": round(bill_savings, 2),
"energy_savings": round(energy_savings, 2),
"current_energy_bill": round(current_energy_bill, 2),
"fuel_type": p.main_fuel["fuel_type"],
}
results.append(to_append)
results = pd.DataFrame(results)
results["sap_uplift"] = results["sap_after"] - results["sap_before"]
# results.to_excel("mds_results 5th June.xlsx")
walls_check, hhr_check = check_mds(results, input_properties, recommendations, optimise_measures)
except IntegrityError:
logger.error("Database integrity error occurred", exc_info=True)
session.rollback()
return Response(status_code=500, content="Database integrity error.")
except OperationalError:
logger.error("Database operational error occurred", exc_info=True)
session.rollback()
return Response(status_code=500, content="Database operational error.")
except ValueError:
logger.error("Value error - possibly due to malformed data", exc_info=True)
session.rollback()
return Response(status_code=400, content="Bad request: malformed data.")
except Exception as e: # General exception handling
logger.error(f"An error occurred: {e}")
session.rollback()
return Response(status_code=500, content="An unexpected error occurred.")
finally:
session.close()
def check_mds(results, input_properties, recommendations, optimise_measures):
import ast
walls_check = []
hhr_check = []
for p in input_properties:
res = results[results["uprn"] == p.uprn]
wall = p.walls
heating = p.main_heating
heating_controls = p.main_heating_controls
if optimise_measures:
measures = res["measures"].values[0]
else:
measures = [list(z.keys())[0] for z in res["measures"].values[0]]
wall_recommendation = [
x for x in measures if
x in ["internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"]
]
hhr_recommendation = [
x for x in measures if
x in ["high_heat_retention_storage_heaters"]
]
if optimise_measures:
possible_measures = [ast.literal_eval(x) for x in list(recommendations[p.id].keys())]
# Unlist them
possible_measures = [x for sublist in possible_measures for x in sublist]
possible_measures = list(set(possible_measures))
else:
possible_measures = p.measures
if wall_recommendation:
if len(wall_recommendation) > 1:
raise Exception("something went wrong")
wall_recommendation = wall_recommendation[0]
else:
wall_recommendation = None
hhr_recommendation = hhr_recommendation[0] if hhr_recommendation else None
walls_check.append(
{
"uprn": p.uprn,
"address": p.address,
"postcode": p.postcode,
"property_type": p.data['property-type'],
"conservation_status": p.spatial["conservation_status"],
"is_listed_building": p.spatial["is_listed_building"],
"is_heritage_building": p.spatial["is_heritage_building"],
"wall": wall["clean_description"],
"recommendation": wall_recommendation,
"possible_measures": possible_measures,
"selected_measures": res["measures"].values[0],
}
)
hhr_check.append(
{
"uprn": p.uprn,
"address": p.address,
"postcode": p.postcode,
"heating": heating["clean_description"],
"heating_controls": heating_controls["clean_description"],
"recommendation": hhr_recommendation,
"possible_measures": possible_measures,
"selected_measures": res["measures"].values[0],
}
)
walls_check = pd.DataFrame(walls_check)
hhr_check = pd.DataFrame(hhr_check)
return walls_check, hhr_check

View file

@ -1,60 +1,25 @@
from pydantic import BaseModel, conlist, validator
from typing import Optional
from pydantic import BaseModel, Field, BeforeValidator
from typing import Annotated, List, Optional
# Example constants for validation
TYPICAL_MEASURE_TYPES = [
"wall_insulation",
"roof_insulation",
"ventilation",
"floor_insulation",
"windows",
"fireplace",
"heating",
"hot_water",
"low_energy_lighting",
"secondary_heating",
"solar_pv"
"wall_insulation", "roof_insulation", "ventilation", "floor_insulation",
"windows", "fireplace", "heating", "hot_water", "low_energy_lighting",
"secondary_heating", "solar_pv"
]
SPECIFIC_MEASURES = [
# Specific measures
# Walls
"internal_wall_insulation",
"external_wall_insulation",
"cavity_wall_insulation",
# Roof
"loft_insulation",
"flat_roof_insulation",
"room_roof_insulation",
# Floor
"suspended_floor_insulation",
"solid_floor_insulation",
# Heating
"boiler_upgrade",
"high_heat_retention_storage_heater",
"air_source_heat_pump",
"secondary_heating",
# Solar
"solar_pv",
# Windows Glazing
"double_glazing",
"secondary_glazing",
# Mechanical ventilation
"ventilation",
# Other
"low_energy_lighting",
"fireplace",
"hot_water",
"internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
"loft_insulation", "flat_roof_insulation", "room_roof_insulation",
"suspended_floor_insulation", "solid_floor_insulation",
"boiler_upgrade", "high_heat_retention_storage_heater", "air_source_heat_pump",
"secondary_heating", "solar_pv", "double_glazing", "secondary_glazing",
"ventilation", "low_energy_lighting", "fireplace", "hot_water"
]
NON_INVASIVE_SPECIFIC_MEASURES = [
# Specific measures that will typically come from an energy assessment
"trickle_vents",
"draught_proofing",
"mixed_glazing", # This covers partial double glazing and secondary glazing
"cavity_extract_and_refill",
# Indicates that there is one (need to handle the case where there are multiple)
# extension that requires cavity wall insulation
"extension_cavity_wall_insulation",
"trickle_vents", "draught_proofing", "mixed_glazing", "cavity_extract_and_refill",
"extension_cavity_wall_insulation"
]
# This allows us to extend high level categories for measures such as "wall_insulation" to the specific measures
@ -70,11 +35,37 @@ MEASURE_MAP = {
"heating_controls": ["roomstat_programmer_trvs", "time_temperature_zone_control"]
}
VALID_GOALS = ["Increasing EPC"]
VALID_HOUSING_TYPES = ["Social", "Private"]
# Define the validation function for inclusions/exclusions
def check_inclusion_or_exclusion(value: str) -> str:
if value not in TYPICAL_MEASURE_TYPES + SPECIFIC_MEASURES + NON_INVASIVE_SPECIFIC_MEASURES:
raise ValueError(f"{value} is not an allowed inclusion")
return value
def check_goals(value: str) -> str:
assert value in VALID_GOALS, f"{value} is not a valid goal"
return value
def check_housing_type(value: str) -> str:
assert value in VALID_HOUSING_TYPES, f"{value} is not a valid housing type"
return value
# Use Annotated with BeforeValidator for each list item validation
InclusionOrExclusionItem = Annotated[str, BeforeValidator(check_inclusion_or_exclusion)]
Goal = Annotated[str, BeforeValidator(check_goals)]
HousingType = Annotated[str, BeforeValidator(check_housing_type)]
class PlanTriggerRequest(BaseModel):
budget: Optional[float] = None
goal: str
housing_type: str
goal: Goal
housing_type: HousingType
goal_value: str
portfolio_id: int
trigger_file_path: str
@ -82,53 +73,10 @@ class PlanTriggerRequest(BaseModel):
patches_file_path: Optional[str] = None
non_invasive_recommendations_file_path: Optional[str] = None
valuation_file_path: Optional[str] = None
exclusions: Optional[conlist(str, min_items=1)] = None
inclusions: Optional[conlist(str, min_items=1)] = None
exclusions: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1)
inclusions: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1)
scenario_name: Optional[str] = ""
# If true, will allow us to create multiple plans for the same portfolio, whereas if this is false, if this property
# exists in the portfolio, it will be ignored
multi_plan: Optional[bool] = False
# if False, allows optimisation to be switched off
optimise: Optional[bool] = True
# If True, uses default u-values for models
default_u_values: Optional[bool] = True
_allowed_goals = {"Increasing EPC"}
_allowed_housing_types = {"Social", "Private"}
# Validator to ensure exclusions are within the pre-defined possibilities
@validator('exclusions', each_item=True)
def check_exclusions(cls, v):
if v not in TYPICAL_MEASURE_TYPES + SPECIFIC_MEASURES + NON_INVASIVE_SPECIFIC_MEASURES:
raise ValueError(f"{v} is not an allowed exclusion")
return v
@validator('inclusions', each_item=True)
def check_inclusions(cls, v):
if v not in TYPICAL_MEASURE_TYPES + SPECIFIC_MEASURES + NON_INVASIVE_SPECIFIC_MEASURES:
raise ValueError(f"{v} is not an allowed inclusion")
return v
# Validator to ensure that the goal is within the pre-defined possibilities
@validator('goal')
def check_goal(cls, v):
if v not in cls._allowed_goals:
raise ValueError(f"{v} is not a valid goal")
return v
# Validator to ensure that the housing type is within the pre-defined possibilities
@validator('housing_type')
def check_housing_type(cls, v):
if v not in cls._allowed_housing_types:
raise ValueError(f"{v} is not a valid housing type")
return v
class MdsRequest(PlanTriggerRequest):
# When creating the mds report, we allow an optional list of measures to select from. If this is passed, it will
# cause the service to select the optimal package from the list of measures
measures: Optional[conlist(str, min_items=1)] = None

View file

@ -1,5 +1,5 @@
# Pull base image
FROM python:3.10.12-slim-buster as build-image
FROM python:3.11.10-slim-bullseye as build-image
# Set environment variables
ENV PYTHONDONTWRITEBYTECODE 1
@ -12,10 +12,10 @@ WORKDIR var/task/Model
RUN #apt-get update && apt-get install -y netcat-openbsd
# Install python dependencies
COPY ./backend/requirements/base.txt ./backend/requirements/base.txt
COPY ./backend/requirements/requirements.txt ./backend/requirements/requirements.txt
RUN pip install --upgrade pip
# Install and clean up temp caches
RUN pip install -r backend/requirements/base.txt && rm -rf /root/.cache
RUN pip install -r backend/requirements/requirements.txt && rm -rf /root/.cache
# Since we are not using a base AWS image, there is some additional setup required. We need to set up the runtime
# interface client
@ -35,16 +35,12 @@ COPY --from=build-image /usr/local/lib/python3.10/site-packages/ /usr/local/lib/
# Copy project files
COPY ./backend/ ./backend
COPY ./recommendations/ ./recommendations
COPY ./model_data/BaseUtility.py ./model_data/BaseUtility.py
COPY ./model_data/config.py ./model_data/config.py
COPY ./model_data/optimiser/ ./model_data/optimiser/
COPY ./model_data/__init__.py ./model_data/__init__.py
COPY ./model_data/EpcClean.py ./model_data/EpcClean.py
COPT ./model_data/simulation_system/core/ ./model_data/simulation_system/core/
COPY ./model_data/utils.py ./model_data/utils.py
COPY ./model_data/epc_attributes/ ./model_data/epc_attributes/
COPY ./datatypes/ ./datatypes/
COPY ./utils/ ./utils/
COPY ./etl/epc/ ./etl/epc/
COPY ./etl/epc_clean/ ./etl/epc_clean/
COPY ./etl/bill_savings/ ./etl/bill_savings/
COPY ./etl/spatial/ ./etl/spatial/
COPY ./datatypes/ ./datatypes/
# Set the ENTRYPOINT to the AWS Lambda RIC and CMD to your function handler
ENTRYPOINT [ "/usr/local/bin/python", "-m", "awslambdaric" ]

View file

@ -1,5 +1,5 @@
# Pull base image
FROM python:3.10.12-slim-buster as build-image
FROM python:3.11.10-slim-bullseye as build-image
# Set environment variables
ENV PYTHONDONTWRITEBYTECODE 1
@ -12,10 +12,10 @@ WORKDIR var/task/Model
#RUN apt-get update && apt-get install -y netcat-openbsd
# Install python dependencies
COPY ./backend/requirements/base.txt ./backend/requirements/base.txt
COPY ./backend/requirements/requirements.txt ./backend/requirements/requirements.txt
# Install and clean up temp caches
RUN pip install --upgrade pip \
&& pip install -r backend/requirements/base.txt && rm -rf /root/.cache
&& pip install -r backend/requirements/requirements.txt && rm -rf /root/.cache
# Since we are not using a base AWS image, there is some additional setup required. We need to set up the runtime
# interface client
@ -24,28 +24,28 @@ RUN pip install --upgrade pip \
RUN pip install awslambdaric
# Second stage: "runtime-image"
FROM python:3.10.12-slim-buster
FROM python:3.11.10-slim-bullseye
# Create the extensions directory to avoid warnings with RIE
RUN mkdir -p /opt/extensions
# Set work directory to the root of your project
WORKDIR /var/task/Model
# Copy the python dependencies from the build-image
COPY --from=build-image /usr/local/lib/python3.10/site-packages/ /usr/local/lib/python3.10/site-packages/
COPY --from=build-image /usr/local/lib/python3.11/site-packages/ /usr/local/lib/python3.11/site-packages/
# Copy project files
COPY ./backend/ ./backend
COPY ./recommendations/ ./recommendations
COPY ./model_data/BaseUtility.py ./model_data/BaseUtility.py
COPY ./model_data/config.py ./model_data/config.py
COPY ./model_data/optimiser/ ./model_data/optimiser/
COPY ./model_data/__init__.py ./model_data/__init__.py
COPY ./model_data/EpcClean.py ./model_data/EpcClean.py
COPY ./model_data/utils.py ./model_data/utils.py
COPY ./model_data/epc_attributes/ ./model_data/epc_attributes/
COPY ./model_data/simulation_system/core/DataProcessor.py ./model_data/simulation_system/core/DataProcessor.py
COPY ./model_data/simulation_system/core/Settings.py ./model_data/simulation_system/core/Settings.py
COPY ./datatypes/ ./datatypes/
COPY ./utils/ ./utils/
COPY ./etl/epc/ ./etl/epc/
COPY ./etl/epc_clean/ ./etl/epc_clean/
COPY ./etl/bill_savings/ ./etl/bill_savings/
COPY ./etl/spatial/ ./etl/spatial/
COPY ./BaseUtility.py ./BaseUtility.py
COPY ./datatypes/ ./datatypes/
# Set the ENTRYPOINT to the AWS Lambda RIC and CMD to your function handler
ENTRYPOINT [ "/usr/local/bin/python", "-m", "awslambdaric" ]

View file

@ -244,8 +244,8 @@ class PropertyValuation:
return {
"current_value": current_value,
"lower_bound_increased_value": current_value * (1 + min_increase),
"upper_bound_increased_value": current_value * (1 + max_increase),
"average_increased_value": current_value * (1 + avg_increase),
"average_increase": current_value * (1 + avg_increase) - current_value
"lower_bound_increased_value": float(current_value * (1 + min_increase)),
"upper_bound_increased_value": float(current_value * (1 + max_increase)),
"average_increased_value": float(current_value * (1 + avg_increase)),
"average_increase": float(current_value * (1 + avg_increase) - current_value)
}

View file

@ -1,3 +1,5 @@
import aiohttp
import asyncio
import pandas as pd
from tqdm import tqdm
import requests
@ -18,6 +20,8 @@ class ModelApi:
# "hot_water_cost_predictions",
]
KWH_MODEL_PREFIXES = ["heating_kwh_predictions", "hotwater_kwh_predictions"]
MODEL_URLS = {
"sap_change_predictions": "sapmodel",
"heat_demand_predictions": "heatmodel",
@ -120,6 +124,28 @@ class ModelApi:
# depending on how you want to handle errors in your application
return None
async def predict_async(self, file_location, model_prefix: str):
"""Makes an asynchronous POST request to the Model API with the provided parameters."""
logger.info(f"Making request to {model_prefix} change api")
url = f"{self.base_url}/{self.MODEL_URLS[model_prefix]}/predict"
payload = {
"file_location": file_location,
"property_id": "", # This should get removed
"portfolio_id": self.portfolio_id,
"created_at": self.timestamp
}
async with aiohttp.ClientSession() as session:
try:
async with session.post(
url, json=payload, headers={"Content-Type": "application/json"}, timeout=120
) as response:
response.raise_for_status()
return await response.json()
except aiohttp.ClientError as e:
logger.error(f"An error occurred: {e}")
return None
@staticmethod
def extract_phase(recommendation_id):
if 'phase=' in recommendation_id:
@ -180,6 +206,43 @@ class ModelApi:
return predictions
async def predict_all_async(self, df, bucket, model_prefixes=None, extract_ids=True) -> dict:
"""Uploads data and makes asynchronous requests to the model APIs for predictions."""
model_prefixes = self.MODEL_PREFIXES if model_prefixes is None else model_prefixes
predictions = {}
tasks = []
async with aiohttp.ClientSession() as session:
for model_prefix in model_prefixes:
logger.info(f"Scoring for model prefix: {model_prefix}")
file_location = self.upload_scoring_data(df, bucket, model_prefix)
# Schedule the prediction request as a coroutine
tasks.append(
self.predict_async(f"s3://{bucket}/" + file_location, model_prefix)
)
# Gather all asynchronous tasks (execute them concurrently)
responses = await asyncio.gather(*tasks, return_exceptions=True)
for model_prefix, response in zip(model_prefixes, responses):
if response:
predictions_bucket = self.prediction_buckets[model_prefix]
predictions_df = pd.DataFrame(
read_dataframe_from_s3_parquet(
bucket_name=predictions_bucket,
file_key=response["storage_filepath"].split(predictions_bucket + "/")[1]
)
)
predictions_df['predictions'] = predictions_df["predictions"].astype(float).round(1)
if extract_ids:
predictions_df[['property_id', 'recommendation_id']] = predictions_df['id'].str.split('+',
expand=True)
predictions_df['phase'] = predictions_df['recommendation_id'].apply(self.extract_phase)
predictions[model_prefix] = predictions_df
return predictions
def paginated_predictions(self, data, bucket, batch_size, model_prefixes=None, extract_ids=True):
all_predictions = self.predictions_template()
to_loop_over = range(0, data.shape[0], batch_size)
@ -196,3 +259,59 @@ class ModelApi:
all_predictions[key] = pd.concat([all_predictions[key], scored])
return all_predictions
async def async_warm_up_lambdas(self, model_prefies=None):
"""Send asynchronous pre-flight requests to each model endpoint to wake up the cold Lambdas without waiting
for responses."""
logger.info("Asynchronously warming up Lambda functions...")
model_prefixes = self.MODEL_PREFIXES if model_prefies is None else model_prefies
tasks = []
async with aiohttp.ClientSession() as session:
for model_prefix in model_prefixes:
url = f"{self.base_url}/{self.MODEL_URLS[model_prefix]}/predict"
# Create a coroutine for each warm-up request and add it to the tasks list
tasks.append(self._send_warm_up_request(session, url, model_prefix))
# Run all tasks concurrently but don't wait for the responses to finish
await asyncio.gather(*tasks, return_exceptions=True)
@staticmethod
async def _send_warm_up_request(session, url, model_prefix):
"""Helper method to send a pre-flight request to a given model URL."""
try:
async with session.post(url, json={}, timeout=2) as response:
# Log success for monitoring but do not block on the response
logger.info(f"Warmed up {model_prefix} with status code: {response.status}")
except aiohttp.ClientError as e:
logger.warning(f"Failed to warm up {model_prefix}: {e}")
logger.info("Lambda functions are warmed up and ready to go!")
async def async_paginated_predictions(self, data, bucket, batch_size, model_prefixes=None, extract_ids=True):
all_predictions = self.predictions_template()
to_loop_over = range(0, data.shape[0], batch_size)
async def run_batches():
for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
predictions_dict = await self.predict_all_async(
df=data.iloc[chunk:chunk + batch_size],
bucket=bucket,
model_prefixes=model_prefixes,
extract_ids=extract_ids
)
for key, scored in predictions_dict.items():
all_predictions[key] = pd.concat([all_predictions[key], scored])
# Check if there is an existing event loop
try:
# If there is an existing event loop, await the coroutine directly
loop = asyncio.get_running_loop()
await run_batches()
except RuntimeError: # No running event loop
# If no event loop is running, use asyncio.run()
asyncio.run(run_batches())
return all_predictions

View file

@ -1,42 +0,0 @@
msgpack==1.0.5
anyio==3.7.1
cffi==1.15.1
click==8.1.3
cryptography==37.0.4
ecdsa==0.18.0
epc-api-python==1.0.2
exceptiongroup==1.1.2
fastapi==0.99.1
h11==0.14.0
httptools==0.5.0
idna==3.4
mangum==0.17.0
pyasn1==0.5.0
pycparser==2.21
pydantic==1.10.11
PyJWT==2.7.0
python-dotenv==1.0.0
python-jose==3.3.0
PyYAML==6.0
rsa==4.9
six==1.16.0
sniffio==1.3.0
starlette==0.27.0
typing_extensions==4.7.1
uvicorn==0.22.0
uvloop==0.17.0
urllib3<2
watchfiles==0.19.0
websockets==11.0.3
sqlalchemy==2.0.19
psycopg2-binary
pytz==2023.3
mip==1.15.0
boto3==1.28.3
pandas==1.5.3
pyarrow==12.0.1
textblob
usaddress==0.5.10
# Requirements we may not need
xgboost==1.7.6

View file

@ -1,28 +0,0 @@
anyio==3.7.1
cffi==1.15.1
click==8.1.3
cryptography==37.0.4
ecdsa==0.18.0
exceptiongroup==1.1.2
fastapi==0.99.1
h11==0.14.0
httptools==0.5.0
idna==3.4
mangum==0.17.0
pyasn1==0.5.0
pycparser==2.21
pydantic==1.10.11
PyJWT==2.7.0
python-dotenv==1.0.0
python-jose==3.3.0
PyYAML==6.0
rsa==4.9
six==1.16.0
sniffio==1.3.0
starlette==0.27.0
typing_extensions==4.7.1
uvicorn==0.22.0
uvloop==0.17.0
watchfiles==0.19.0
websockets==11.0.3
boto3

View file

@ -0,0 +1,31 @@
# Pandas and numpy
numpy==2.1.2
pandas==2.2.3
pytz==2024.2
six==1.16.0
# tqdm
tqdm==4.66.5
# fastapi
fastapi==0.115.2
sqlalchemy==2.0.36
pydantic-settings==2.6.0
psycopg2-binary==2.9.10
python-jose==3.3.0
cryptography==43.0.3
mangum==0.19.0
# AWS
boto3==1.35.44
# ML, Data Science
usaddress==0.5.11
epc-api-python==1.0.2
fuzzywuzzy==0.18.0
python-Levenshtein==0.26.0
textblob==0.18.0.post0
msgpack==1.1.0
scikit-learn==1.5.2
cffi==1.15.1
mip==1.15.0
# Data
pyarrow==17.0.0
fastparquet==2024.5.0
aiohttp==3.10.10

View file

@ -1,6 +1,6 @@
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
# from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.feature_selection import RFECV
@ -79,13 +79,13 @@ class EnergyConsumptionModel:
if x not in self.CATEGORICAL_COLUMNS
})
if model_paths:
for target, path in model_paths.items():
# Read model
self.models[target] = read_pickle_from_s3(
bucket_name=f"retrofit-model-directory-{environment}", s3_file_name=path
)
# Read dummy schema
# if model_paths:
# for target, path in model_paths.items():
# # Read model
# self.models[target] = read_pickle_from_s3(
# bucket_name=f"retrofit-model-directory-{environment}", s3_file_name=path
# )
# Read dummy schema
if dummy_schema_path:
self.dummy_schema = read_pickle_from_s3(
@ -278,33 +278,33 @@ class EnergyConsumptionModel:
logger.info(f"Feature selection completed for target {target}")
def init_model(self, feature_selection=False):
if feature_selection:
# Set up a smaller model to work it
return XGBRegressor(
objective='reg:squarederror',
n_estimators=50,
learning_rate=0.05,
max_depth=6,
subsample=0.8,
colsample_bytree=0.8,
reg_alpha=0.1,
reg_lambda=0.1
)
return XGBRegressor(
objective='reg:squarederror',
n_estimators=1000,
learning_rate=0.05,
max_depth=6,
min_child_weight=3,
subsample=0.8,
colsample_bytree=0.8,
reg_alpha=0.1,
reg_lambda=0.1
# n_jobs=self.n_jobs
)
# def init_model(self, feature_selection=False):
#
# if feature_selection:
# # Set up a smaller model to work it
# return XGBRegressor(
# objective='reg:squarederror',
# n_estimators=50,
# learning_rate=0.05,
# max_depth=6,
# subsample=0.8,
# colsample_bytree=0.8,
# reg_alpha=0.1,
# reg_lambda=0.1
# )
#
# return XGBRegressor(
# objective='reg:squarederror',
# n_estimators=1000,
# learning_rate=0.05,
# max_depth=6,
# min_child_weight=3,
# subsample=0.8,
# colsample_bytree=0.8,
# reg_alpha=0.1,
# reg_lambda=0.1
# # n_jobs=self.n_jobs
# )
def fit_model(self, target):
"""Fits the model to the training data and removes zero-importance features."""

View file

@ -263,7 +263,7 @@ class EPCDataProcessor:
# Use replace function to map data (if exists in key), to corresponding value - i.e. Remove invalid values
data = self.data.replace(data_anomaly_map)
data = data.replace(np.NAN, None)
data = data.replace(np.nan, None)
self.data = data
@ -384,7 +384,7 @@ class EPCDataProcessor:
has_missings = pd.isnull(self.data[col]).sum()
while has_missings:
self.data = apply_clean(
data=self.data, matching_columns=matching_columns[0 : to_index + 1]
data=self.data, matching_columns=matching_columns[0: to_index + 1]
)
has_missings = pd.isnull(self.data[col]).sum()
@ -487,7 +487,7 @@ class EPCDataProcessor:
filled_data = (
self.data.groupby("UPRN", group_keys=True)[columns_to_fill]
.apply(lambda group: group.fillna(method="bfill").fillna(method="ffill"))
.apply(lambda group: group.bfill().ffill().infer_objects(copy=False))
.reset_index()
.set_index("level_1")
.sort_index()
@ -791,7 +791,7 @@ class EPCDataProcessor:
We fill photo supply with zeros where it's missing
"""
self.data["PHOTO_SUPPLY"] = self.data["PHOTO_SUPPLY"].fillna(0)
self.data["PHOTO_SUPPLY"] = self.data["PHOTO_SUPPLY"].astype("Int64").fillna(0)
@staticmethod
def apply_averages_cleaning(
@ -858,12 +858,12 @@ class EPCDataProcessor:
# Fill NaN values with averages
for col in cols_to_clean:
data_to_clean[col].fillna(data_to_clean[f"{col}_AVERAGE"], inplace=True)
data_to_clean.drop(columns=[f"{col}_AVERAGE"], inplace=True)
data_to_clean[col] = data_to_clean[col].fillna(data_to_clean[f"{col}_AVERAGE"])
data_to_clean = data_to_clean.drop(columns=[f"{col}_AVERAGE"])
# If we still have missings
data_to_clean[col].fillna(data_to_clean[col].mean(), inplace=True)
data_to_clean[col] = data_to_clean[col].fillna(data_to_clean[col].mean())
# Final step if we still have missings - use global mean
data_to_clean[col].fillna(global_averages[col], inplace=True)
data_to_clean[col] = data_to_clean[col].fillna(global_averages[col])
return data_to_clean

View file

@ -182,7 +182,6 @@ EFFICIENCY_FEATURES = [
ROOM_FEATURES = ["number_habitable_rooms", "number_heated_rooms"]
COMPONENT_FEATURES = CORE_COMPONENT_FEATURES + [
"TRANSACTION_TYPE",
"ENERGY_TARIFF", # Not sure if this is relevant
@ -241,7 +240,11 @@ BUILT_FORM_REMAP = {
DATA_PROCESSOR_SETTINGS = {
"low_memory": False,
"epc_minimum_count": 1,
"column_mappings": {"UPRN": [int, str]},
"column_mappings": {
"UPRN": [int, str],
"NUMBER_HEATED_ROOMS": [float],
"NUMBER_HABITABLE_ROOMS": [float],
},
}
# This has a manual mapping of the column types required

View file

@ -1,7 +1,6 @@
import os
from tqdm import tqdm
import pandas as pd
import geopandas as gpd
from utils.logger import setup_logger
from utils.s3 import read_io_from_s3, save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
from backend.Property import Property
@ -86,17 +85,6 @@ class OpenUprnClient:
return filename
return None
@staticmethod
def convert_bng_data_to_gpd(df):
gpd_data = gpd.GeoDataFrame(
df,
geometry=gpd.points_from_xy(df.X_COORDINATE, df.Y_COORDINATE),
crs="EPSG:27700" # British National Grid
)
return gpd_data
def save_filenames_to_s3(self, bucket_name):
"""
Save the filenames to s3

View file

@ -6,6 +6,7 @@ our database for querying from other services
import os
from tqdm import tqdm
import pandas as pd
import geopandas as gpd
from etl.spatial.ConservationAreaClient import ConservationAreaClient
from etl.spatial.OpenUprnClient import OpenUprnClient
from etl.spatial.SpecialBuildingsClient import SpecialBuildingsClient
@ -25,6 +26,16 @@ HISTORIC_ENGLAND_HERITAGE_BUILDINGS_PATHNAME = \
logger = setup_logger()
def convert_bng_data_to_gpd(df):
gpd_data = gpd.GeoDataFrame(
df,
geometry=gpd.points_from_xy(df.X_COORDINATE, df.Y_COORDINATE),
crs="EPSG:27700" # British National Grid
)
return gpd_data
def app():
"""
This application uses the conservation area datasets to determine if a UPRN is
@ -85,7 +96,7 @@ def app():
to_loop_over = open_uprn_client.data.groupby("filename")
for filename, uprn_df in tqdm(open_uprn_client.data.groupby("filename"), total=len(to_loop_over)):
uprn_gdf = OpenUprnClient.convert_bng_data_to_gpd(uprn_df)
uprn_gdf = convert_bng_data_to_gpd(uprn_df)
uprn_gdf = conservation_area_client.is_in_conservation_area_vectorised(uprn_gdf=uprn_gdf)
uprn_gdf = special_buildings_client.is_listed_building_vectorised(uprn_gdf=uprn_gdf)

View file

@ -1,8 +1,9 @@
pydantic==1.10.11
pydantic==2.9.2
pydantic-settings==2.6.0
epc-api-python==1.0.2
pandas==2.0.3
numpy==1.25.1
pytz==2023.3
numpy==2.1.2
pandas==2.2.3
pytz==2024.2
tzdata==2023.3
tqdm
mypy
@ -20,4 +21,6 @@ pyspellchecker
textblob
boto3
pyarrow
msgpack==1.0.5
msgpack==1.1.0

View file

@ -730,8 +730,8 @@ class Recommendations:
"id": STARTING_DUMMY_ID_VALUE,
"phase": STARTING_DUMMY_ID_VALUE,
"recommendation_id": STARTING_DUMMY_ID_VALUE,
"predictions_heating": property_kwh["heating"],
"predictions_hotwater": property_kwh["hot_water"],
"predictions_heating": float(property_kwh["heating"]),
"predictions_hotwater": float(property_kwh["hot_water"]),
}
]
),
@ -854,12 +854,12 @@ class Recommendations:
# We return a dictionary that contains the individual costs, that can be stored to the database
current_energy_bill = {
"heating_cost_current": starting_figures["heating_cost"],
"hot_water_cost_current": starting_figures["hotwater_cost"],
"lighting_cost_current": property_instance.energy_cost_estimates["unadjusted"]["lighting"],
"appliances_cost_current": property_instance.energy_cost_estimates["unadjusted"]["appliances"],
"gas_standing_charge": gas_standing_charge,
"electricity_standing_charge": electricity_standing_charge,
"heating_cost_current": float(starting_figures["heating_cost"]),
"hot_water_cost_current": float(starting_figures["hotwater_cost"]),
"lighting_cost_current": float(property_instance.energy_cost_estimates["unadjusted"]["lighting"]),
"appliances_cost_current": float(property_instance.energy_cost_estimates["unadjusted"]["appliances"]),
"gas_standing_charge": float(gas_standing_charge),
"electricity_standing_charge": float(electricity_standing_charge),
}
return current_energy_bill

View file

@ -946,9 +946,10 @@ testing_examples = [
},
"heating_measure_types": [
'high_heat_retention_storage_heater',
'air_source_heat_pump',
],
"heating_controls_measure_types": [],
"notes": "This is an end-terrace house, without mains gas connection, so all we recommend is HHR"
"notes": "This is an end-terrace house, without mains gas connection, so we recommend is HHR & ASHP"
},
{
"epc": {

View file

@ -57,10 +57,10 @@ class TestRoofRecommendations:
assert len(roof_recommender2.recommendations) == 1
assert roof_recommender2.recommendations[0]["total"] == 1610.0000000000002
assert roof_recommender2.recommendations[0]["new_u_value"] == 0.14
assert roof_recommender2.recommendations[0]["total"] == 1653
assert roof_recommender2.recommendations[0]["new_u_value"] == 0.13
assert roof_recommender2.recommendations[0]["starting_u_value"] == 0.68
assert roof_recommender2.recommendations[0]["parts"][0]["depth"] == 270
assert roof_recommender2.recommendations[0]["parts"][0]["depth"] == 300
epc_record = EPCRecord()
epc_record.prepared_epc = {"county": "Greater London Authority", "roof-energy-eff": "Very Poor"}
@ -85,7 +85,7 @@ class TestRoofRecommendations:
assert roof_recommender3.recommendations
assert len(roof_recommender3.recommendations) == 1
assert roof_recommender3.recommendations[0]["parts"][0]["depth"] == 270
assert roof_recommender3.recommendations[0]["parts"][0]["depth"] == 300.0
def test_loft_insulation_recommendation_150mm_insulation(self):
epc_record = EPCRecord()
@ -107,14 +107,14 @@ class TestRoofRecommendations:
assert not roof_recommender4.recommendations
roof_recommender4.recommend(phase=0)
roof_recommender4.recommend(phase=0, default_u_values=True)
assert len(roof_recommender4.recommendations) == 1
assert roof_recommender4.recommendations[0]["total"] == 1552.5
assert roof_recommender4.recommendations[0]["new_u_value"] == 0.13
assert roof_recommender4.recommendations[0]["total"] == 1653.0
assert roof_recommender4.recommendations[0]["new_u_value"] == 0.14
assert roof_recommender4.recommendations[0]["starting_u_value"] == 0.3
assert roof_recommender4.recommendations[0]["parts"][0]["depth"] == 200
assert roof_recommender4.recommendations[0]["parts"][0]["depth"] == 300
epc_record = EPCRecord()
epc_record.prepared_epc = {"county": "Somerset", "roof-energy-eff": "Good"}
@ -139,7 +139,7 @@ class TestRoofRecommendations:
assert roof_recommender5.recommendations
assert len(roof_recommender5.recommendations) == 1
assert roof_recommender5.recommendations[0]["parts"][0]["depth"] == 200
assert roof_recommender5.recommendations[0]["parts"][0]["depth"] == 300
def test_loft_insulation_recommendation_270mm_insulation(self):
# We shouldn't recommend anything in this case

View file

@ -48,13 +48,13 @@ class TestWindowRecommendations:
'starting_u_value': None, 'new_u_value': None, 'sap_points': None, 'already_installed': False,
'total': 7980.0, 'labour_hours': 0.0, 'labour_days': 0.0, 'is_secondary_glazing': False,
'description_simulation': {
'multi-glaze-proportion': 100, 'windows-energy-eff': 'Average',
'multi-glaze-proportion': 100, 'windows-energy-eff': 'Good',
'windows-description': 'Fully double glazed',
'glazed-type': 'double glazing installed during or after 2002'
},
'simulation_config': {
'has_glazing_ending': True, 'glazing_type_ending': 'double',
'multi_glaze_proportion_ending': 100, 'windows_energy_eff_ending': 'Average',
'multi_glaze_proportion_ending': 100, 'windows_energy_eff_ending': 'Good',
'glazed_type_ending': 'double glazing installed during or after 2002'
}
}
@ -401,14 +401,14 @@ class TestWindowRecommendations:
'sap_points': None, 'already_installed': False, 'total': 7980.0, 'labour_hours': 0.0,
'labour_days': 0.0, 'is_secondary_glazing': False,
'description_simulation': {
'multi-glaze-proportion': 100, 'windows-energy-eff': 'Average',
'multi-glaze-proportion': 100, 'windows-energy-eff': 'Good',
'windows-description': 'Fully double glazed',
'glazed-type': 'double glazing installed during or after 2002'
},
'simulation_config': {
'has_glazing_ending': True, 'glazing_coverage_ending': 'full',
'glazing_type_ending': 'double', 'multi_glaze_proportion_ending': 100,
'windows_energy_eff_ending': 'Average',
'windows_energy_eff_ending': 'Good',
'glazed_type_ending': 'double glazing installed during or after 2002'
}
}
@ -624,7 +624,7 @@ class TestWindowRecommendations:
'total_floor_area_ending': 61.0, 'floor_height_starting': 2.37, 'floor_height_ending': 2.37,
'hot_water_energy_eff_starting': 'Good', 'hot_water_energy_eff_ending': 'Good',
'floor_energy_eff_starting': 'NO_RATING', 'floor_energy_eff_ending': 'NO_RATING',
'windows_energy_eff_starting': 'Very Poor', 'windows_energy_eff_ending': 'Average',
'windows_energy_eff_starting': 'Very Poor', 'windows_energy_eff_ending': 'Good',
'walls_energy_eff_starting': 'Very Poor', 'walls_energy_eff_ending': 'Very Poor',
'sheating_energy_eff_starting': 'NO_RATING', 'sheating_energy_eff_ending': 'NO_RATING',
'roof_energy_eff_starting': 'Very Poor', 'roof_energy_eff_ending': 'Very Poor',
@ -666,7 +666,7 @@ class TestWindowRecommendations:
{'variable': 'glazed_type_ending', 'starting': 'not defined',
'simulated': 'double glazing installed during or after 2002'},
{'variable': 'multi_glaze_proportion_ending', 'starting': 0.0, 'simulated': 100},
{'variable': 'windows_energy_eff_ending', 'starting': 'Very Poor', 'simulated': 'Average'},
{'variable': 'windows_energy_eff_ending', 'starting': 'Very Poor', 'simulated': 'Good'},
]
assert different == expected_different

View file

@ -192,7 +192,7 @@ def read_pickle_from_s3(bucket_name, s3_file_name):
try:
data = pickle.loads(serialized_data)
except Exception as e:
logger.errpr(f'Failed to deserialize data: {str(e)}')
logger.error(f'Failed to deserialize data: {str(e)}')
return None
return data