mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Added KwhData client to router
This commit is contained in:
parent
fffb179219
commit
c9720cd78c
3 changed files with 70 additions and 67 deletions
|
|
@ -247,8 +247,8 @@ def create_epc_records(epc_searcher: SearchEpc, energy_assessment: dict):
|
|||
|
||||
# We insert county into the epc, since right now this isn't something that we pull out from the energy
|
||||
# assessment
|
||||
epc["county"] = epc_searcher.newest_epc["county"]
|
||||
epc["constituency"] = epc_searcher.newest_epc["constituency"]
|
||||
for col in ["county", "constituency", "constituency-label", "local-authority", "local-authority-label"]:
|
||||
epc[col] = epc_searcher.newest_epc[col]
|
||||
|
||||
# We check if the energy assessment is newer than the newest EPC
|
||||
if pd.to_datetime(energy_assessment_date) > pd.to_datetime(epc_searcher.newest_epc["inspection-date"]):
|
||||
|
|
@ -433,10 +433,7 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
|
||||
model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
|
||||
|
||||
epcs_for_scoring = KwhData.transform(
|
||||
data=pd.DataFrame([p.epc_record.original_epc for p in input_properties]),
|
||||
cleaned=cleaned,
|
||||
)
|
||||
epcs_for_scoring = KwhData().transform(data=KwhData().prepare_epc(input_properties), cleaned=cleaned)
|
||||
|
||||
kwh_predictions = model_api.predict_all(
|
||||
df=epcs_for_scoring,
|
||||
|
|
|
|||
|
|
@ -507,66 +507,6 @@ class EnergyConsumptionModel:
|
|||
|
||||
return prediction
|
||||
|
||||
@staticmethod
|
||||
def _prepare_new_data(p: Property):
|
||||
"""
|
||||
Given an instance of the property class, this method will ensure that the EPC is ready for scoring with the
|
||||
kwh models. In the backend, we perform some cleaning and transformation on an EPC so we just ensure that the
|
||||
data is in the format required by the model
|
||||
:return:
|
||||
"""
|
||||
|
||||
epc = p.data.copy()
|
||||
numeric_cols = [
|
||||
'current-energy-efficiency',
|
||||
'potential-energy-efficiency', 'environment-impact-current',
|
||||
'environment-impact-potential', 'energy-consumption-current',
|
||||
'energy-consumption-potential', 'co2-emissions-current',
|
||||
'co2-emiss-curr-per-floor-area', 'co2-emissions-potential',
|
||||
'lighting-cost-current', 'lighting-cost-potential',
|
||||
'heating-cost-current', 'heating-cost-potential',
|
||||
'hot-water-cost-current', 'hot-water-cost-potential',
|
||||
'total-floor-area', 'multi-glaze-proportion',
|
||||
'extension-count', 'number-habitable-rooms', 'number-heated-rooms',
|
||||
'low-energy-lighting', 'number-open-fireplaces',
|
||||
'wind-turbine-count', 'unheated-corridor-length',
|
||||
'floor-height', 'photo-supply', 'fixed-lighting-outlets-count',
|
||||
'low-energy-fixed-light-count',
|
||||
]
|
||||
for v in numeric_cols:
|
||||
if epc[v] is not None:
|
||||
epc[v] = float(epc[v])
|
||||
|
||||
bools_to_remap = ['mains-gas-flag', 'flat-top-storey']
|
||||
bool_map = {
|
||||
True: "Y",
|
||||
False: "N",
|
||||
None: "N",
|
||||
"Y": "Y",
|
||||
"N": "N"
|
||||
}
|
||||
for v in bools_to_remap:
|
||||
epc[v] = bool_map[epc[v]]
|
||||
|
||||
no_data = {
|
||||
"floor-level": "NODATA!",
|
||||
"floor-energy-eff": "NO DATA!"
|
||||
}
|
||||
for v, fill_val in no_data.items():
|
||||
if pd.isnull(epc[v]):
|
||||
epc[v] = fill_val
|
||||
|
||||
return epc
|
||||
|
||||
def prepare_new_data(self, input_properties: list[Property]):
|
||||
scoring_data = pd.DataFrame([self._prepare_new_data(p) for p in input_properties])
|
||||
scoring_data["lodgement-year"] = pd.to_datetime(scoring_data["lodgement-date"]).dt.year
|
||||
scoring_data["lodgement-month"] = pd.to_datetime(scoring_data["lodgement-date"]).dt.month
|
||||
|
||||
scoring_data["id"] = scoring_data["uprn"].copy()
|
||||
|
||||
return scoring_data
|
||||
|
||||
@staticmethod
|
||||
def calculate_percentage_decrease(start_efficiency, end_efficiency, consumption_averages):
|
||||
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ from datetime import datetime
|
|||
from tqdm import tqdm
|
||||
from utils.logger import setup_logger
|
||||
from utils.s3 import list_files_in_s3_folder, read_pickle_from_s3, save_dataframe_to_s3_parquet
|
||||
from backend.Property import Property
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
|
@ -29,7 +30,7 @@ class KwhData:
|
|||
'heating-cost-potential', 'hot-water-cost-current', 'current-energy-efficiency'
|
||||
]
|
||||
|
||||
def __init__(self, bucket):
|
||||
def __init__(self, bucket=None):
|
||||
self.run_date = datetime.now().strftime("%Y-%m-%d")
|
||||
self.bucket = bucket
|
||||
self.data = None
|
||||
|
|
@ -144,6 +145,8 @@ class KwhData:
|
|||
This method can be used to transform the training data, or new epcs within the backend engine
|
||||
:return:
|
||||
"""
|
||||
if save and self.bucket is None:
|
||||
raise Exception("bucket not set, cannot save data")
|
||||
|
||||
# TODO: New is a temporary parameter, which will transform the epc descriptions to their transformed features
|
||||
# in anticipation of the new model
|
||||
|
|
@ -216,3 +219,66 @@ class KwhData:
|
|||
file_key=self.model_training_data_filepath,
|
||||
df=data
|
||||
)
|
||||
return
|
||||
|
||||
return data
|
||||
|
||||
@staticmethod
|
||||
def _prepare_epc(p: Property):
|
||||
"""
|
||||
Given an instance of the property class, this method will ensure that the EPC is ready for scoring with the
|
||||
kwh models. In the backend, we perform some cleaning and transformation on an EPC so we just ensure that the
|
||||
data is in the format required by the model
|
||||
:return:
|
||||
"""
|
||||
|
||||
epc = p.data.copy()
|
||||
numeric_cols = [
|
||||
'current-energy-efficiency',
|
||||
'potential-energy-efficiency', 'environment-impact-current',
|
||||
'environment-impact-potential', 'energy-consumption-current',
|
||||
'energy-consumption-potential', 'co2-emissions-current',
|
||||
'co2-emiss-curr-per-floor-area', 'co2-emissions-potential',
|
||||
'lighting-cost-current', 'lighting-cost-potential',
|
||||
'heating-cost-current', 'heating-cost-potential',
|
||||
'hot-water-cost-current', 'hot-water-cost-potential',
|
||||
'total-floor-area', 'multi-glaze-proportion',
|
||||
'extension-count', 'number-habitable-rooms', 'number-heated-rooms',
|
||||
'low-energy-lighting', 'number-open-fireplaces',
|
||||
'wind-turbine-count', 'unheated-corridor-length',
|
||||
'floor-height', 'photo-supply', 'fixed-lighting-outlets-count',
|
||||
'low-energy-fixed-light-count',
|
||||
]
|
||||
for v in numeric_cols:
|
||||
if epc[v] is not None:
|
||||
epc[v] = float(epc[v])
|
||||
|
||||
bools_to_remap = ['mains-gas-flag', 'flat-top-storey']
|
||||
bool_map = {
|
||||
True: "Y",
|
||||
False: "N",
|
||||
None: "N",
|
||||
"Y": "Y",
|
||||
"N": "N"
|
||||
}
|
||||
for v in bools_to_remap:
|
||||
epc[v] = bool_map[epc[v]]
|
||||
|
||||
no_data = {
|
||||
"floor-level": "NODATA!",
|
||||
"floor-energy-eff": "NO DATA!"
|
||||
}
|
||||
for v, fill_val in no_data.items():
|
||||
if pd.isnull(epc[v]):
|
||||
epc[v] = fill_val
|
||||
|
||||
return epc
|
||||
|
||||
def prepare_epc(self, input_properties: list[Property]):
|
||||
scoring_data = pd.DataFrame([self._prepare_epc(p) for p in input_properties])
|
||||
scoring_data["lodgement-year"] = pd.to_datetime(scoring_data["lodgement-date"]).dt.year
|
||||
scoring_data["lodgement-month"] = pd.to_datetime(scoring_data["lodgement-date"]).dt.month
|
||||
|
||||
scoring_data["id"] = scoring_data["uprn"].copy()
|
||||
|
||||
return scoring_data
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue