mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
211 lines
8.7 KiB
Python
211 lines
8.7 KiB
Python
import ast
|
|
import json
|
|
from copy import deepcopy
|
|
from datetime import datetime
|
|
|
|
import random
|
|
from tqdm import tqdm
|
|
import pandas as pd
|
|
import numpy as np
|
|
from etl.epc.Record import EPCRecord
|
|
from backend.SearchEpc import SearchEpc
|
|
from sqlalchemy.exc import IntegrityError, OperationalError
|
|
from sqlalchemy.orm import sessionmaker
|
|
from starlette.responses import Response
|
|
|
|
from backend.app.config import get_settings, get_prediction_buckets
|
|
from backend.app.db.connection import db_engine
|
|
from backend.app.db.functions.materials_functions import get_materials
|
|
from backend.app.db.functions.portfolio_functions import aggregate_portfolio_recommendations
|
|
from backend.app.db.functions.property_functions import (
|
|
create_property, create_property_details_epc, create_property_targets, update_property_data,
|
|
update_or_create_property_spatial_details
|
|
)
|
|
from backend.app.db.functions.recommendations_functions import (
|
|
create_plan, upload_recommendations, create_scenario
|
|
)
|
|
from backend.app.db.functions.funding_functions import upload_funding
|
|
from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn
|
|
from backend.app.db.models.portfolio import rating_lookup
|
|
from backend.app.plan.schemas import PlanTriggerRequest, WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES
|
|
from backend.app.plan.utils import get_cleaned
|
|
from backend.app.utils import sap_to_epc
|
|
import backend.app.assumptions as assumptions
|
|
|
|
from backend.ml_models.api import ModelApi
|
|
from backend.Property import Property
|
|
from backend.apis.GoogleSolarApi import GoogleSolarApi
|
|
|
|
from recommendations.optimiser.CostOptimiser import CostOptimiser
|
|
from recommendations.optimiser.GainOptimiser import GainOptimiser
|
|
import recommendations.optimiser.optimiser_functions as optimiser_functions
|
|
from recommendations.Recommendations import Recommendations
|
|
from utils.logger import setup_logger
|
|
from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3, read_excel_from_s3
|
|
from backend.ml_models.Valuation import PropertyValuation
|
|
|
|
from etl.bill_savings.KwhData import KwhData
|
|
from etl.spatial.OpenUprnClient import OpenUprnClient
|
|
from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
|
|
|
|
from backend.Funding import Funding
|
|
from recommendations.optimiser.funding_optimiser import optimise_with_funding_paths
|
|
from recommendations.recommendation_utils import convert_thickness_to_numeric, get_wall_u_value
|
|
|
|
# Input data (temp)
|
|
import pickle
|
|
|
|
import pandas as pd
|
|
|
|
with open("local_data_for_deletion.pkl", 'rb') as f:
|
|
local_data = pickle.load(f)
|
|
|
|
cleaning_data = local_data["cleaning_data"]
|
|
materials = local_data["materials"]
|
|
cleaned = local_data["cleaned"]
|
|
project_scores_matrix = local_data["project_scores_matrix"]
|
|
partial_project_scores_matrix = local_data["partial_project_scores_matrix"]
|
|
whlg_eligible_postcodes = local_data["whlg_eligible_postcodes"]
|
|
|
|
with open("kwh_client_for_deletion.pkl", "rb") as f:
|
|
kwh_client = pickle.load(f)
|
|
|
|
epc_data = pd.read_csv(
|
|
"/Users/khalimconn-kowlessar/Downloads/all-domestic-certificates/domestic-E06000002-Middlesbrough/certificates.csv",
|
|
low_memory=False
|
|
)
|
|
|
|
# TODO: Store this for cleaning
|
|
costs_by_floor_area = epc_data[
|
|
pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2024-01-01"
|
|
][["TOTAL_FLOOR_AREA", "CURRENT_ENERGY_EFFICIENCY", "LIGHTING_COST_CURRENT", "HEATING_COST_CURRENT",
|
|
"HOT_WATER_COST_CURRENT"]].copy()
|
|
|
|
costs_by_floor_area.columns = [c.lower().replace("_", "-") for c in costs_by_floor_area.columns]
|
|
for c in ["lighting-cost-current", "heating-cost-current", "hot-water-cost-current"]:
|
|
costs_by_floor_area[c + "_scaled"] = costs_by_floor_area[c] / costs_by_floor_area["total-floor-area"]
|
|
|
|
costs_by_floor_area = costs_by_floor_area.groupby("current-energy-efficiency")[
|
|
["lighting-cost-current_scaled", "heating-cost-current_scaled", "hot-water-cost-current_scaled"]
|
|
].mean().reset_index()
|
|
|
|
sample_epc_data = epc_data[pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2015-01-01"].drop_duplicates("UPRN").sample(
|
|
1000).reset_index(drop=True)
|
|
|
|
# TODO: In Property find_energy_sources, sort out biomass community heating - what fuel type
|
|
# TODO: We might be able to remove find_energy_sources entirely and remove estimate_electrical_consumption. It's used
|
|
# in the google solar api but is it really needed? I don't think it's super accurate. It might be better to
|
|
# just use an average energy consumption by floor area for UK households?
|
|
# Load the input properties
|
|
input_properties = []
|
|
for row_id, config in tqdm(sample_epc_data.iterrows(), total=len(sample_epc_data)):
|
|
epc = {
|
|
k.lower().replace("_", "-"): v if not pd.isnull(v) else None for k, v in config.items()
|
|
}
|
|
# Avoid the data load inside of EPCRecord - something we should pull out
|
|
for x in ["number-habitable-rooms", "floor-height", "number-heated-rooms"]:
|
|
if pd.isnull(epc[x]):
|
|
if x == "floor-height":
|
|
epc[x] = 2.4
|
|
if x == "number-habitable-rooms":
|
|
epc[x] = 3
|
|
if x == "number-heated-rooms":
|
|
epc[x] = 3
|
|
|
|
epc_records = {'original_epc': epc, 'full_sap_epc': {}, 'old_data': []}
|
|
|
|
prepared_epc = EPCRecord(
|
|
epc_records=epc_records,
|
|
run_mode="newdata",
|
|
cleaning_data=cleaning_data,
|
|
)
|
|
|
|
input_properties.append(
|
|
Property(
|
|
id=row_id,
|
|
is_new=True,
|
|
address=epc["address"],
|
|
postcode=epc["postcode"],
|
|
epc_record=prepared_epc,
|
|
already_installed={},
|
|
property_valuation={},
|
|
non_invasive_recommendations=[],
|
|
energy_assessment=None,
|
|
**Property.extract_kwargs(config), # TODO: Depraecate this
|
|
)
|
|
)
|
|
|
|
# For each property, insert the default solar configuration
|
|
for p in tqdm(input_properties):
|
|
solar_api = GoogleSolarApi(
|
|
api_key=None, solar_materials=[m for m in materials if m["type"] == "solar_pv"], max_retries=5
|
|
)
|
|
panel_performance = solar_api.default_panel_performance(property_instance=p)
|
|
p.set_solar_panel_configuration(
|
|
solar_panel_configuration={
|
|
"insights_data": None, "panel_performance": panel_performance, "unit_share_of_energy": 1
|
|
},
|
|
)
|
|
|
|
# We mock kwh preds
|
|
mocked_kwh_predictions = {"heating_kwh_predictions": [], "hotwater_kwh_predictions": []}
|
|
for p in tqdm(input_properties):
|
|
mocked_kwh_predictions["heating_kwh_predictions"].append({
|
|
"id": p.uprn, "predictions": random.sample(range(100, 3000), 1)[0]
|
|
})
|
|
mocked_kwh_predictions["hotwater_kwh_predictions"].append({
|
|
"id": p.uprn, "predictions": random.sample(range(100, 3000), 1)[0]
|
|
})
|
|
mocked_kwh_predictions["heating_kwh_predictions"] = pd.DataFrame(mocked_kwh_predictions["heating_kwh_predictions"])
|
|
mocked_kwh_predictions["hotwater_kwh_predictions"] = pd.DataFrame(mocked_kwh_predictions["hotwater_kwh_predictions"])
|
|
|
|
# TODO: We might want to implement this generally, via an ETL process
|
|
for p in input_properties:
|
|
for col in ["lighting-cost-current", "heating-cost-current", "hot-water-cost-current"]:
|
|
if pd.isnull(p.data[col]):
|
|
min_diff = abs(
|
|
(costs_by_floor_area["current-energy-efficiency"] - p.data["current-energy-efficiency"])
|
|
).min()
|
|
df = costs_by_floor_area[
|
|
abs((costs_by_floor_area["current-energy-efficiency"] - p.data[
|
|
"current-energy-efficiency"])) == min_diff
|
|
]
|
|
if df.shape[0] > 1:
|
|
df = df.head(1)
|
|
p.data[col] = (df[col + "_scaled"] * p.data["total-floor-area"]).values[0]
|
|
|
|
[
|
|
p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=mocked_kwh_predictions) for p in
|
|
input_properties
|
|
]
|
|
# for p in input_properties:
|
|
# p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=mocked_kwh_predictions)
|
|
|
|
# Run the recommendations
|
|
recommendations = {}
|
|
recommendations_scoring_data = []
|
|
representative_recommendations = {}
|
|
for p in tqdm(input_properties):
|
|
if p.data["property-type"] == "House" and pd.isnull(p.data["built-form"]):
|
|
p.data["built-form"] = "Semi-Detached"
|
|
recommender = Recommendations(
|
|
property_instance=p,
|
|
materials=materials,
|
|
exclusions=[],
|
|
inclusions=[],
|
|
default_u_values=True
|
|
)
|
|
property_recommendations, property_representative_recommendations = recommender.recommend()
|
|
|
|
if not property_recommendations:
|
|
continue
|
|
|
|
recommendations[p.id] = property_recommendations
|
|
representative_recommendations[p.id] = property_representative_recommendations
|
|
|
|
p.create_base_difference_epc_record(cleaned_lookup=cleaned)
|
|
p.adjust_difference_record_with_recommendations(
|
|
property_recommendations, property_representative_recommendations
|
|
)
|
|
|
|
recommendations_scoring_data.extend(p.recommendations_scoring_data)
|