Model/etl/epc/generate_scenarios_data.py

289 lines
9.1 KiB
Python

from datetime import datetime
import itertools
import pandas as pd
from etl.epc.Record import EPCRecord
from backend.SearchEpc import SearchEpc
from sqlalchemy.orm import sessionmaker
from backend.app.config import get_settings
from backend.app.db.connection import db_engine
from backend.app.db.functions.materials_functions import get_materials
from backend.app.plan.utils import get_cleaned
from backend.Property import Property
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
from recommendations.Recommendations import Recommendations
from utils.logger import setup_logger
from utils.s3 import read_dataframe_from_s3_parquet, save_dataframe_to_s3_parquet
from datetime import datetime
now = datetime.now().strftime("%d-%m-%Y-%H-%M-%S")
logger = setup_logger()
logger.info("Connecting to db")
session = sessionmaker(bind=db_engine)()
created_at = datetime.now().isoformat()
session.begin()
logger.info("Getting the inputs")
cleaning_data = read_dataframe_from_s3_parquet(
bucket_name=get_settings().DATA_BUCKET,
file_key="sap_change_model/cleaning_dataset.parquet",
)
materials = get_materials(session)
cleaned = get_cleaned()
uprn_filenames = read_dataframe_from_s3_parquet(
bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
)
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(
bucket=get_settings().DATA_BUCKET
)
scenario_properties = [
{
"address": "2 South Terrace",
"postcode": "NN1 5JY",
"lmk-key": "1459796789102016070507274146560098",
"measures": [
[
["internal_wall_insulation"],
"11",
{"walls_insulation_thickness_ending": "average"},
[0],
],
[
["external_wall_insulation"],
"10",
{"walls_insulation_thickness_ending": "average"},
[0],
],
[["solar", "windows"], "15", {"photo_supply_ending": 50}, [0, 1]],
],
},
{
"address": "8 Lindlings",
"postcode": "HP1 2HA",
"lmk-key": "c14029235739827d5f627dc8aa9bb567d026b267e851e0db0001db24638667b1",
"measures": [
[
["cavity_wall_insulation", "loft_insulation"],
"15",
{"walls_insulation_thickness_ending": "average"},
[0, 1],
],
],
},
{
"address": "44 Lindlings",
"postcode": "HP1 2HE",
"lmk-key": "99296a6dda21314fef3a61cda59e441e9a2aacf115eb96f4a0fa85696bf7b117",
"measures": [
[
["cavity_wall_insulation", "loft_insulation"],
"15",
{"walls_insulation_thickness_ending": "average"},
[0, 1],
],
],
},
{
"address": "46 Chaulden Terrace",
"postcode": "HP1 2AN",
"lmk-key": "d1e0534be3a44c33003323b21d0e322e3daddc65b5ee71936f89c59ddab96b50",
"measures": [
[
["cavity_wall_insulation", "loft_insulation"],
"15",
{"walls_insulation_thickness_ending": "average"},
[0, 1],
],
],
},
{
"address": "73 Long Chaulden",
"postcode": "HP1 2HX",
"lmk-key": "1eae354db522a95188018d9cd0502ed8c609910b6c88f8797d3a25f59b11770a",
"measures": [
[
["cavity_wall_insulation", "loft_insulation"],
"15",
{"walls_insulation_thickness_ending": "average"},
[0, 1],
],
],
},
]
recommendations_scoring_data = []
for scenario_property in scenario_properties:
# We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
epc_searcher = SearchEpc(
address1=scenario_property["address"],
postcode=scenario_property["postcode"],
auth_token=get_settings().EPC_AUTH_TOKEN,
os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY,
)
epc_searcher.find_property()
# Find the epc with the same LMK key
all_epcs = epc_searcher.older_epcs.copy()
all_epcs.extend([epc_searcher.newest_epc, epc_searcher.full_sap_epc])
original_epc = [
epc
for epc in all_epcs
if epc.get("lmk-key", None) == scenario_property.get("lmk-key")
][0]
epc_records = {
"original_epc": original_epc,
"full_sap_epc": {},
"old_data": [],
}
prepared_epc = EPCRecord(
epc_records=epc_records, run_mode="newdata", cleaning_data=cleaning_data
)
p = Property(
id=prepared_epc.uprn,
address=epc_searcher.address_clean,
postcode=epc_searcher.postcode_clean,
epc_record=prepared_epc,
)
p.get_spatial_data(uprn_filenames)
p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
recommender = Recommendations(property_instance=p, materials=materials)
property_recommendations = recommender.recommend("0")
wall_recommendations = recommender.wall_recomender.recommendations
loft_recommendations = recommender.roof_recommender.recommendations
solar_recommendations = recommender.solar_recommender.recommendation
windows_recommendations = recommender.windows_recommender.recommendation
p.create_base_difference_epc_record(cleaned_lookup=cleaned)
scoring_list = []
# Create the record for each of the different measures
for measure_impact_override in scenario_property["measures"]:
measure = measure_impact_override[0]
impact = measure_impact_override[1]
override = measure_impact_override[2]
wall_recs = []
loft_recs = []
solar_recs = []
windows_recs = []
if "internal_wall_insulation" in measure:
for rec in wall_recommendations:
if rec["type"] == "internal_wall_insulation":
wall_recs.append(rec)
if "external_wall_insulation" in measure:
for rec in wall_recommendations:
if rec["type"] == "external_wall_insulation":
wall_recs.append(rec)
if "cavity_wall_insulation" in measure:
for rec in wall_recommendations:
if rec["type"] == "cavity_wall_insulation":
wall_recs.append(rec)
if "loft_insulation" in measure:
loft_recs = []
for rec in loft_recommendations:
if rec["type"] == "loft_insulation":
loft_recs.append(rec)
if "solar" in measure:
for rec in solar_recommendations:
if rec["type"] == "solar_pv":
solar_recs.append(rec)
if "windows" in measure:
for rec in windows_recommendations:
if rec["type"] == "windows_glazing":
windows_recs.append(rec)
combi_list = [wall_recs, loft_recs, solar_recs, windows_recs]
combi_list = [element for element in combi_list if len(element) != 0]
all_combi_recommendations = list(itertools.product(*combi_list))
for i, combi in enumerate(all_combi_recommendations):
recommendation_record = p.base_difference_record.df.to_dict("records")[
0
].copy()
recommendation_record = p.create_recommendation_scoring_data(
property_id=i,
primary_recommendation_id=i,
recommendation_record=recommendation_record,
recommendations=combi,
)
if override is not None:
for key, value in override.items():
recommendation_record[key] = value
recommendation_record["id"] = "&".join(measure) + "+" + str(i)
recommendation_record["impact"] = impact
scoring_list.append(recommendation_record)
recommendations_scoring_data.extend(scoring_list)
recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
recommendations_scoring_data["impact"] = recommendations_scoring_data["impact"].astype(
int
)
recommendations_scoring_data = recommendations_scoring_data.drop(
columns=[
"rdsap_change",
"heat_demand_change",
"carbon_change",
"sap_ending",
"heat_demand_ending",
"carbon_ending",
]
)
impact_col = recommendations_scoring_data.pop("impact")
recommendations_scoring_data.insert(0, "impact", impact_col)
id_col = recommendations_scoring_data.pop("id")
recommendations_scoring_data.insert(0, "id", id_col)
from backend.ml_models.api import ModelApi
model_api = ModelApi(portfolio_id="generate-scenarios-data", timestamp=created_at)
all_predictions = model_api.predict_all(
df=recommendations_scoring_data,
bucket=get_settings().DATA_BUCKET,
prediction_buckets={
"sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET,
"heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET,
"carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET,
},
)
save_dataframe_to_s3_parquet(
recommendations_scoring_data,
"retrofit-data-dev",
f"scenario_data/{now}/recommendations_scoring_data.parquet",
)