removed old mds app and updating dockerfile

This commit is contained in:
Khalim Conn-Kowlessar 2024-10-21 12:36:01 +01:00
parent 27737d82fd
commit 888357d797
2 changed files with 6 additions and 597 deletions

View file

@ -24,7 +24,7 @@ from backend.app.db.functions.recommendations_functions import (
from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn
from backend.app.db.models.portfolio import rating_lookup
from backend.app.dependencies import validate_token
from backend.app.plan.schemas import PlanTriggerRequest, MdsRequest
from backend.app.plan.schemas import PlanTriggerRequest
from backend.app.plan.utils import get_cleaned
from backend.app.utils import epc_to_sap_lower_bound, sap_to_epc
@ -36,7 +36,6 @@ from recommendations.optimiser.CostOptimiser import CostOptimiser
from recommendations.optimiser.GainOptimiser import GainOptimiser
from recommendations.optimiser.optimiser_functions import prepare_input_measures
from recommendations.Recommendations import Recommendations
from recommendations.Mds import Mds
from utils.logger import setup_logger
from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3
from backend.ml_models.Valuation import PropertyValuation
@ -44,7 +43,6 @@ from backend.ml_models.Valuation import PropertyValuation
from etl.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel
from etl.bill_savings.KwhData import KwhData
from etl.spatial.OpenUprnClient import OpenUprnClient
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
logger = setup_logger()
@ -868,587 +866,3 @@ async def trigger_plan(body: PlanTriggerRequest):
session.close()
return Response(status_code=200)
@router.post("/mds")
async def build_mds(body: MdsRequest):
# TODO: This is a placeholder location for the MDS endpoint, which this is being assembled
logger.info("Connecting to db")
session = sessionmaker(bind=db_engine)()
created_at = datetime.now().isoformat()
try:
session.begin()
logger.info("Getting the inputs")
plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path)
measure_set = body.measures
optimise_measures = measure_set is not None
cleaning_data = read_dataframe_from_s3_parquet(
bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
)
input_properties = []
for property_id, config in tqdm(enumerate(plan_input), total=len(plan_input)):
# We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
uprn = config.get("uprn", None)
uprn = None if uprn == "" else uprn
if uprn:
uprn = int(float(uprn))
epc_searcher = SearchEpc(
address1=config["address"],
postcode=config["postcode"],
uprn=uprn,
auth_token=get_settings().EPC_AUTH_TOKEN,
os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY,
)
epc_searcher.ordnance_survey_client.built_form = config.get("built_form", None)
epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None)
# For the moment, our OS API access is unavailable, so we skip and interpolate
epc_searcher.find_property(skip_os=True)
if config["address"] == "35b High Street":
print("Performing temporary patch on 35b High Street")
epc_searcher.newest_epc["uprn"] = 10002911892
epc_searcher.full_sap_epc["uprn"] = 10002911892
if config["address"] == "Cobnut Barn":
print("Performing temporary patch on Cobnut Barn")
epc_searcher.newest_epc["uprn"] = 10013924689
# Create a record in db
# TODO: If we productionise the creation of this mds report, we will need to store this in the db
# property_id, is_new = create_property(
# session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
# )
# if not is_new:
# continue
#
# create_property_targets(
# session,
# property_id=property_id,
# portfolio_id=body.portfolio_id,
# epc_target=body.goal_value,
# heat_demand_target=None
# )
epc_records = {
'original_epc': epc_searcher.newest_epc.copy(),
'full_sap_epc': epc_searcher.full_sap_epc.copy(),
'old_data': epc_searcher.older_epcs.copy(),
}
# patch = next((
# x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
# ), {})
# epc_records = patch_epc(patch, epc_records)
prepared_epc = EPCRecord(
epc_records=epc_records,
run_mode="newdata",
cleaning_data=cleaning_data
)
# property_already_installed = next((
# x for x in already_installed if
# (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
# ), {})
#
# property_non_invasive_recommendations = next((
# x for x in non_invasive_recommendations if
# (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
# ), {})
if measure_set is None:
measures = config["measures"] if "measures" in config else None
else:
measures = measure_set
input_properties.append(
Property(
id=property_id,
address=epc_searcher.address_clean,
postcode=epc_searcher.postcode_clean,
epc_record=prepared_epc,
# already_installed=property_already_installed,
# non_invasive_recommendations=property_non_invasive_recommendations,
measures=measures,
is_new=is_new,
**Property.extract_kwargs(config)
)
)
logger.info("Reading in materials and cleaned datasets")
materials = get_materials(session)
cleaned = get_cleaned()
uprn_filenames = read_dataframe_from_s3_parquet(
bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
)
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket=get_settings().DATA_BUCKET)
logger.info("Getting spatial data")
for p in tqdm(input_properties):
p.get_spatial_data(uprn_filenames)
logger.info("Getting components and epc recommendations")
recommendations_scoring_data = []
representative_recommendations = {}
recommendations = {}
for p in tqdm(input_properties):
p.set_features(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
mds = Mds(property_instance=p, materials=materials, optimise_measures=optimise_measures)
mds_recommendations, property_representative_recommendations, errors = mds.build()
if isinstance(errors, list):
if errors:
raise Exception("Errors occurred during MDS build")
else:
if any([len(x) for x in errors.values()]):
raise Exception("Errors occurred during MDS build")
recommendations[p.id] = mds_recommendations
representative_recommendations[p.id] = property_representative_recommendations
# Build the scoring data
p.create_base_difference_epc_record(cleaned_lookup=cleaned)
if optimise_measures:
for _id, mds_recs in mds_recommendations.items():
representative_ids = [r["recommendation_id"] for r in property_representative_recommendations[_id]]
simulation_mds_recs = []
for recs in mds_recs:
simulation_mds_recs.append(
[r for r in recs if r["recommendation_id"] in representative_ids]
)
p.adjust_difference_record_with_recommendations(
simulation_mds_recs, property_representative_recommendations[_id]
)
data = p.recommendations_scoring_data.copy()
for d in data:
d["id"] = d["id"] + "*" + _id
recommendations_scoring_data.extend(data)
else:
recommendations_scoring_data.append(
p.simulate_all_representative_recommendations(property_representative_recommendations)
)
logger.info("Preparing data for scoring in sap change api")
recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
recommendations_scoring_data = recommendations_scoring_data.drop(
columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
"carbon_ending"]
)
model_api = ModelApi(
portfolio_id=body.portfolio_id, timestamp=created_at, prediction_buckets=get_prediction_buckets()
)
all_predictions = {
"sap_change_predictions": pd.DataFrame(),
"heat_demand_predictions": pd.DataFrame(),
"carbon_change_predictions": pd.DataFrame()
}
to_loop_over = range(0, recommendations_scoring_data.shape[0], SCORING_BATCH_SIZE)
for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
predictions_dict = model_api.predict_all(
df=recommendations_scoring_data.iloc[chunk:chunk + SCORING_BATCH_SIZE],
)
# Append the predictions to the predictions dictionary
for key, scored in predictions_dict.items():
all_predictions[key] = pd.concat([all_predictions[key], scored])
# TODO: 1) walls_insulation_thickness_ending is not being set in the recommendations_scoring_data,
# insulation_thickness_ending is being set instead
# 2)
# TODO: TEMP
for p in plan_input:
if p["uprn"]:
p["uprn"] = str(int(float(p["uprn"])))
import re
from backend.ml_models.AnnualBillSavings import AnnualBillSavings
if optimise_measures:
results = []
for p in input_properties:
sap_before = int(p.data["current-energy-efficiency"])
epc_before = p.data["current-energy-rating"]
heat_demand_before = p.data["energy-consumption-current"]
carbon_before = p.data["co2-emissions-current"]
current_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
epc_energy_consumption=heat_demand_before * p.floor_area,
current_epc_rating=epc_before,
)
current_energy_bill = AnnualBillSavings.calculate_annual_bill(current_adjusted_energy)
package_comparison = []
for _id in recommendations[p.id].keys():
sap_prediction = all_predictions["sap_change_predictions"][
(all_predictions["sap_change_predictions"]["property_id"] == str(p.id)) &
(all_predictions["sap_change_predictions"]["recommendation_id"].str.contains(re.escape(_id)))
].copy().reset_index(drop=True)
sap_prediction["row_id"] = sap_prediction.index
heat_demand_prediction = all_predictions["heat_demand_predictions"][
(all_predictions["heat_demand_predictions"]["property_id"] == str(p.id)) &
(all_predictions["heat_demand_predictions"]["recommendation_id"].str.contains(re.escape(_id)))
].copy().reset_index(drop=True)
heat_demand_prediction["row_id"] = heat_demand_prediction.index
carbon_prediction = all_predictions["carbon_change_predictions"][
(all_predictions["carbon_change_predictions"]["property_id"] == str(p.id)) &
(all_predictions["carbon_change_predictions"]["recommendation_id"].str.contains(re.escape(_id)))
].copy().reset_index(drop=True)
carbon_prediction["row_id"] = carbon_prediction.index
epc_target = body.goal_value
if epc_before == epc_target:
continue
sap_target = epc_to_sap_lower_bound(epc_target)
# Define the measures
sap_threshold_barrier = sap_prediction[sap_prediction["predictions"] >= sap_target]
meets_threshold = True
if sap_threshold_barrier.empty:
sap_threshold_barrier = sap_prediction.tail(1)
meets_threshold = False
sap_threshold_barrier = sap_threshold_barrier.head(1)
sap_prediction = sap_prediction[
sap_prediction["row_id"] <= sap_threshold_barrier["row_id"].values[0]
]
heat_demand_prediction = heat_demand_prediction[
heat_demand_prediction["row_id"] <= sap_threshold_barrier["row_id"].values[0]
]
carbon_prediction = carbon_prediction[
carbon_prediction["row_id"] <= sap_threshold_barrier["row_id"].values[0]
]
reverse_map = {v: k for k, v in Mds.format_map.items()}
selected_measures = [
reverse_map[x.split("-")[0]] for x in sap_prediction["recommendation_id"].values
]
selected_measure_ids = [x.split("*")[0] for x in sap_prediction["recommendation_id"].values]
costs = [
r["total"] for r in representative_recommendations[p.id][_id] if
r["recommendation_id"] in selected_measure_ids
]
costs = sum(costs)
sap_after = sap_prediction["predictions"].values[-1]
epc_after = sap_to_epc(sap_after)
heat_demand_after = heat_demand_prediction["predictions"].values[-1]
carbon_after = carbon_prediction["predictions"].values[-1]
expected_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
epc_energy_consumption=heat_demand_after * p.floor_area,
current_epc_rating=epc_before,
)
expected_energy_bill = AnnualBillSavings.calculate_annual_bill(expected_adjusted_energy)
bill_savings = current_energy_bill - expected_energy_bill
energy_savings = current_adjusted_energy - expected_adjusted_energy
package_comparison.append(
{
"id": _id,
"cost": costs,
"measures": selected_measures,
"sap_before": sap_before,
"sap_after": sap_after,
"epc_before": epc_before,
"epc_after": epc_after,
"heat_demand_before": heat_demand_before,
"heat_demand_after": heat_demand_after,
"carbon_before": carbon_before,
"carbon_after": carbon_after,
"bill_savings": bill_savings,
"energy_savings": energy_savings,
"current_energy_bill": current_energy_bill,
"meets_threshold": meets_threshold
}
)
package_comparison = pd.DataFrame(package_comparison)
# Find the smallest cost package
if not package_comparison.empty:
# We check if any of the packages meet the threshold
# If none of them do, take the one that gets closest to the target
if package_comparison["meets_threshold"].any():
package_comparison = package_comparison[package_comparison["meets_threshold"]]
package_comparison = package_comparison.sort_values("cost")
else:
package_comparison = package_comparison.sort_values("sap_after", ascending=False)
package_comparison = package_comparison.head(1).to_dict("records")[0]
else:
package_comparison = {
"measures": [],
"sap_before": sap_before,
"sap_after": sap_before,
"epc_before": epc_before,
"epc_after": epc_before,
"heat_demand_before": heat_demand_before,
"heat_demand_after": heat_demand_before,
"carbon_before": carbon_before,
"carbon_after": carbon_before,
"bill_savings": 0,
"energy_savings": 0,
"current_energy_bill": current_energy_bill,
"meets_threshold": False
}
config = [c for c in plan_input if c["uprn"] == str(p.uprn)]
if not config:
config = {"address": None, "postcode": None}
else:
config = config[0]
results.append({
"config_address": config["address"],
"config_postcode": config["postcode"],
"uprn": p.uprn,
"address": p.address,
"postcode": p.postcode,
"measures": package_comparison["measures"],
"year_of_epc": p.data['lodgement-date'],
"sap_before": package_comparison["sap_before"],
"sap_after": package_comparison["sap_after"],
"epc_before": package_comparison["epc_before"],
"epc_after": package_comparison["epc_after"],
"heat_demand_before": package_comparison["heat_demand_before"],
"heat_demand_after": package_comparison["heat_demand_after"],
"carbon_before": package_comparison["carbon_before"],
"carbon_after": package_comparison["carbon_after"],
"bill_savings": round(package_comparison["bill_savings"], 2),
"energy_savings": round(package_comparison["energy_savings"], 2),
"current_energy_bill": round(package_comparison["current_energy_bill"], 2),
"EWI": "EWI" if "external_wall_insulation" in package_comparison["measures"] else None,
"CWI": "CWI" if "cavity_wall_insulation" in package_comparison["measures"] else None,
"LI": "LI" if "loft_insulation" in package_comparison["measures"] else None,
"ASHP Htg": "ASHP Htg" if "air_source_heat_pump" in package_comparison["measures"] else None,
"Elec Storage": (
"Elec Storage Htrs (Out of scope -Prov sum only)" if "high_heat_retention_storage_heaters" in
package_comparison["measures"] else None
),
"Solar PV": "Solar PV" if "solar_pv" in package_comparison["measures"] else None,
})
results = pd.DataFrame(results)
# For the different measures, we check the impact with a few debugging functions
walls_check, hhr_check = check_mds(results, input_properties, recommendations, optimise_measures)
results.to_excel("optimised mds_results 5th June.xlsx")
results = []
for p in input_properties:
measures = p.measures
property_recommendations = [r['type'] for r in representative_recommendations[p.id]]
# TODO: Check high heat retention storage heaters - looks like it's excluded controls!
sap_prediction = all_predictions["sap_change_predictions"][
all_predictions["sap_change_predictions"]["property_id"] == str(p.id)
]
heat_demand_prediction = all_predictions["heat_demand_predictions"][
all_predictions["heat_demand_predictions"]["property_id"] == str(p.id)
]
carbon_prediction = all_predictions["carbon_change_predictions"][
all_predictions["carbon_change_predictions"]["property_id"] == str(p.id)
]
# Get a before and after for SAP, heat demand, CO2 and also calculate energy bill and energy savings
sap_before = int(p.data["current-energy-efficiency"])
sap_after = sap_prediction["predictions"].values[0] if measures else sap_before
epc_before = p.data["current-energy-rating"]
epc_after = sap_to_epc(sap_after) if measures else epc_before
heat_demand_before = p.data["energy-consumption-current"]
heat_demand_after = heat_demand_prediction["predictions"].values[0] if measures else heat_demand_before
carbon_before = p.data["co2-emissions-current"]
carbon_after = carbon_prediction["predictions"].values[0] if measures else carbon_before
# Estimate bill savings
from backend.ml_models.AnnualBillSavings import AnnualBillSavings
current_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
epc_energy_consumption=heat_demand_before * p.floor_area,
current_epc_rating=epc_before,
)
# TODO: This isn't quite right as this is based on EVERY possible measure, not just the ones that are
# actually implemented
expected_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
epc_energy_consumption=heat_demand_after * p.floor_area,
current_epc_rating=epc_before,
)
# TODO: We should determine if the home is gas & electricity or just electricity
# Determine if the heating and hotwater was previously electric only or both
current_energy_bill = AnnualBillSavings.calculate_annual_bill(
kwh=current_adjusted_energy,
)
expected_energy_bill = AnnualBillSavings.calculate_annual_bill(
kwh=expected_adjusted_energy,
)
bill_savings = current_energy_bill - expected_energy_bill
energy_savings = current_adjusted_energy - expected_adjusted_energy
config = [c for c in plan_input if c["uprn"] == str(p.uprn)]
if not config:
config = {"address": None, "postcode": None}
else:
config = config[0]
to_append = {
"config_address": config["address"],
"config_postcode": config["postcode"],
"uprn": p.uprn,
"address": p.address,
"postcode": p.postcode,
"measures": measures,
"property_recommendations": property_recommendations,
"year_of_epc": p.data['lodgement-date'],
"sap_before": sap_before,
"sap_after": sap_after,
"epc_before": epc_before,
"epc_after": epc_after,
"heat_demand_before": heat_demand_before,
"heat_demand_after": heat_demand_after,
"carbon_before": carbon_before,
"carbon_after": carbon_after,
"bill_savings": round(bill_savings, 2),
"energy_savings": round(energy_savings, 2),
"current_energy_bill": round(current_energy_bill, 2),
"fuel_type": p.main_fuel["fuel_type"],
}
results.append(to_append)
results = pd.DataFrame(results)
results["sap_uplift"] = results["sap_after"] - results["sap_before"]
# results.to_excel("mds_results 5th June.xlsx")
walls_check, hhr_check = check_mds(results, input_properties, recommendations, optimise_measures)
except IntegrityError:
logger.error("Database integrity error occurred", exc_info=True)
session.rollback()
return Response(status_code=500, content="Database integrity error.")
except OperationalError:
logger.error("Database operational error occurred", exc_info=True)
session.rollback()
return Response(status_code=500, content="Database operational error.")
except ValueError:
logger.error("Value error - possibly due to malformed data", exc_info=True)
session.rollback()
return Response(status_code=400, content="Bad request: malformed data.")
except Exception as e: # General exception handling
logger.error(f"An error occurred: {e}")
session.rollback()
return Response(status_code=500, content="An unexpected error occurred.")
finally:
session.close()
def check_mds(results, input_properties, recommendations, optimise_measures):
import ast
walls_check = []
hhr_check = []
for p in input_properties:
res = results[results["uprn"] == p.uprn]
wall = p.walls
heating = p.main_heating
heating_controls = p.main_heating_controls
if optimise_measures:
measures = res["measures"].values[0]
else:
measures = [list(z.keys())[0] for z in res["measures"].values[0]]
wall_recommendation = [
x for x in measures if
x in ["internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"]
]
hhr_recommendation = [
x for x in measures if
x in ["high_heat_retention_storage_heaters"]
]
if optimise_measures:
possible_measures = [ast.literal_eval(x) for x in list(recommendations[p.id].keys())]
# Unlist them
possible_measures = [x for sublist in possible_measures for x in sublist]
possible_measures = list(set(possible_measures))
else:
possible_measures = p.measures
if wall_recommendation:
if len(wall_recommendation) > 1:
raise Exception("something went wrong")
wall_recommendation = wall_recommendation[0]
else:
wall_recommendation = None
hhr_recommendation = hhr_recommendation[0] if hhr_recommendation else None
walls_check.append(
{
"uprn": p.uprn,
"address": p.address,
"postcode": p.postcode,
"property_type": p.data['property-type'],
"conservation_status": p.spatial["conservation_status"],
"is_listed_building": p.spatial["is_listed_building"],
"is_heritage_building": p.spatial["is_heritage_building"],
"wall": wall["clean_description"],
"recommendation": wall_recommendation,
"possible_measures": possible_measures,
"selected_measures": res["measures"].values[0],
}
)
hhr_check.append(
{
"uprn": p.uprn,
"address": p.address,
"postcode": p.postcode,
"heating": heating["clean_description"],
"heating_controls": heating_controls["clean_description"],
"recommendation": hhr_recommendation,
"possible_measures": possible_measures,
"selected_measures": res["measures"].values[0],
}
)
walls_check = pd.DataFrame(walls_check)
hhr_check = pd.DataFrame(hhr_check)
return walls_check, hhr_check

View file

@ -35,17 +35,12 @@ COPY --from=build-image /usr/local/lib/python3.10/site-packages/ /usr/local/lib/
# Copy project files
COPY ./backend/ ./backend
COPY ./recommendations/ ./recommendations
COPY ./model_data/BaseUtility.py ./model_data/BaseUtility.py
COPY ./model_data/config.py ./model_data/config.py
COPY ./model_data/optimiser/ ./model_data/optimiser/
COPY ./model_data/__init__.py ./model_data/__init__.py
COPY ./model_data/EpcClean.py ./model_data/EpcClean.py
COPY ./model_data/utils.py ./model_data/utils.py
COPY ./model_data/epc_attributes/ ./model_data/epc_attributes/
COPY ./model_data/simulation_system/core/DataProcessor.py ./model_data/simulation_system/core/DataProcessor.py
COPY ./model_data/simulation_system/core/Settings.py ./model_data/simulation_system/core/Settings.py
COPY ./datatypes/ ./datatypes/
COPY ./utils/ ./utils/
COPY ./etl/epc/ ./etl/epc/
COPY ./etl/epc_clean/ ./etl/epc_clean/
COPY ./etl/bill_savings/ ./etl/bill_savings/
COPY ./etl/spatial/ ./etl/spatial/
# Set the ENTRYPOINT to the AWS Lambda RIC and CMD to your function handler
ENTRYPOINT [ "/usr/local/bin/python", "-m", "awslambdaric" ]