Added KwhData client to router

This commit is contained in:
Khalim Conn-Kowlessar 2024-08-09 12:03:58 +01:00
parent fffb179219
commit c9720cd78c
3 changed files with 70 additions and 67 deletions

View file

@ -247,8 +247,8 @@ def create_epc_records(epc_searcher: SearchEpc, energy_assessment: dict):
# We insert county into the epc, since right now this isn't something that we pull out from the energy
# assessment
epc["county"] = epc_searcher.newest_epc["county"]
epc["constituency"] = epc_searcher.newest_epc["constituency"]
for col in ["county", "constituency", "constituency-label", "local-authority", "local-authority-label"]:
epc[col] = epc_searcher.newest_epc[col]
# We check if the energy assessment is newer than the newest EPC
if pd.to_datetime(energy_assessment_date) > pd.to_datetime(epc_searcher.newest_epc["inspection-date"]):
@ -433,10 +433,7 @@ async def trigger_plan(body: PlanTriggerRequest):
model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
epcs_for_scoring = KwhData.transform(
data=pd.DataFrame([p.epc_record.original_epc for p in input_properties]),
cleaned=cleaned,
)
epcs_for_scoring = KwhData().transform(data=KwhData().prepare_epc(input_properties), cleaned=cleaned)
kwh_predictions = model_api.predict_all(
df=epcs_for_scoring,

View file

@ -507,66 +507,6 @@ class EnergyConsumptionModel:
return prediction
@staticmethod
def _prepare_new_data(p: Property):
"""
Given an instance of the property class, this method will ensure that the EPC is ready for scoring with the
kwh models. In the backend, we perform some cleaning and transformation on an EPC so we just ensure that the
data is in the format required by the model
:return:
"""
epc = p.data.copy()
numeric_cols = [
'current-energy-efficiency',
'potential-energy-efficiency', 'environment-impact-current',
'environment-impact-potential', 'energy-consumption-current',
'energy-consumption-potential', 'co2-emissions-current',
'co2-emiss-curr-per-floor-area', 'co2-emissions-potential',
'lighting-cost-current', 'lighting-cost-potential',
'heating-cost-current', 'heating-cost-potential',
'hot-water-cost-current', 'hot-water-cost-potential',
'total-floor-area', 'multi-glaze-proportion',
'extension-count', 'number-habitable-rooms', 'number-heated-rooms',
'low-energy-lighting', 'number-open-fireplaces',
'wind-turbine-count', 'unheated-corridor-length',
'floor-height', 'photo-supply', 'fixed-lighting-outlets-count',
'low-energy-fixed-light-count',
]
for v in numeric_cols:
if epc[v] is not None:
epc[v] = float(epc[v])
bools_to_remap = ['mains-gas-flag', 'flat-top-storey']
bool_map = {
True: "Y",
False: "N",
None: "N",
"Y": "Y",
"N": "N"
}
for v in bools_to_remap:
epc[v] = bool_map[epc[v]]
no_data = {
"floor-level": "NODATA!",
"floor-energy-eff": "NO DATA!"
}
for v, fill_val in no_data.items():
if pd.isnull(epc[v]):
epc[v] = fill_val
return epc
def prepare_new_data(self, input_properties: list[Property]):
scoring_data = pd.DataFrame([self._prepare_new_data(p) for p in input_properties])
scoring_data["lodgement-year"] = pd.to_datetime(scoring_data["lodgement-date"]).dt.year
scoring_data["lodgement-month"] = pd.to_datetime(scoring_data["lodgement-date"]).dt.month
scoring_data["id"] = scoring_data["uprn"].copy()
return scoring_data
@staticmethod
def calculate_percentage_decrease(start_efficiency, end_efficiency, consumption_averages):

View file

@ -5,6 +5,7 @@ from datetime import datetime
from tqdm import tqdm
from utils.logger import setup_logger
from utils.s3 import list_files_in_s3_folder, read_pickle_from_s3, save_dataframe_to_s3_parquet
from backend.Property import Property
logger = setup_logger()
@ -29,7 +30,7 @@ class KwhData:
'heating-cost-potential', 'hot-water-cost-current', 'current-energy-efficiency'
]
def __init__(self, bucket):
def __init__(self, bucket=None):
self.run_date = datetime.now().strftime("%Y-%m-%d")
self.bucket = bucket
self.data = None
@ -144,6 +145,8 @@ class KwhData:
This method can be used to transform the training data, or new epcs within the backend engine
:return:
"""
if save and self.bucket is None:
raise Exception("bucket not set, cannot save data")
# TODO: New is a temporary parameter, which will transform the epc descriptions to their transformed features
# in anticipation of the new model
@ -216,3 +219,66 @@ class KwhData:
file_key=self.model_training_data_filepath,
df=data
)
return
return data
@staticmethod
def _prepare_epc(p: Property):
"""
Given an instance of the property class, this method will ensure that the EPC is ready for scoring with the
kwh models. In the backend, we perform some cleaning and transformation on an EPC so we just ensure that the
data is in the format required by the model
:return:
"""
epc = p.data.copy()
numeric_cols = [
'current-energy-efficiency',
'potential-energy-efficiency', 'environment-impact-current',
'environment-impact-potential', 'energy-consumption-current',
'energy-consumption-potential', 'co2-emissions-current',
'co2-emiss-curr-per-floor-area', 'co2-emissions-potential',
'lighting-cost-current', 'lighting-cost-potential',
'heating-cost-current', 'heating-cost-potential',
'hot-water-cost-current', 'hot-water-cost-potential',
'total-floor-area', 'multi-glaze-proportion',
'extension-count', 'number-habitable-rooms', 'number-heated-rooms',
'low-energy-lighting', 'number-open-fireplaces',
'wind-turbine-count', 'unheated-corridor-length',
'floor-height', 'photo-supply', 'fixed-lighting-outlets-count',
'low-energy-fixed-light-count',
]
for v in numeric_cols:
if epc[v] is not None:
epc[v] = float(epc[v])
bools_to_remap = ['mains-gas-flag', 'flat-top-storey']
bool_map = {
True: "Y",
False: "N",
None: "N",
"Y": "Y",
"N": "N"
}
for v in bools_to_remap:
epc[v] = bool_map[epc[v]]
no_data = {
"floor-level": "NODATA!",
"floor-energy-eff": "NO DATA!"
}
for v, fill_val in no_data.items():
if pd.isnull(epc[v]):
epc[v] = fill_val
return epc
def prepare_epc(self, input_properties: list[Property]):
scoring_data = pd.DataFrame([self._prepare_epc(p) for p in input_properties])
scoring_data["lodgement-year"] = pd.to_datetime(scoring_data["lodgement-date"]).dt.year
scoring_data["lodgement-month"] = pd.to_datetime(scoring_data["lodgement-date"]).dt.month
scoring_data["id"] = scoring_data["uprn"].copy()
return scoring_data