mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Integrating new sap model process into backend
This commit is contained in:
parent
52578287a7
commit
d58a87af01
4 changed files with 37 additions and 10 deletions
|
|
@ -4,6 +4,7 @@ import os
|
|||
import pandas as pd
|
||||
|
||||
from etl.epc.DataProcessor import DataProcessor
|
||||
from etl.epc.settings import POTENTIAL_COLUMNS, EFFICIENCY_FEATURES
|
||||
from etl.epc_clean.epc_attributes.all_cleaners import all_cleaner_map
|
||||
from utils.logger import setup_logger
|
||||
from utils.s3 import read_dataframe_from_s3_parquet
|
||||
|
|
@ -603,7 +604,7 @@ class Property(Definitions):
|
|||
@staticmethod
|
||||
def _extract_component(component_data, component_rename_cols, component_drop_cols, rename_prefix=None):
|
||||
for k in component_rename_cols:
|
||||
component_data[f"{rename_prefix}_{k}"] = component_data[k]
|
||||
component_data[f"{rename_prefix}_{k}"] = component_data.get(k)
|
||||
|
||||
component_data = {
|
||||
k: v for k, v in component_data.items() if k not in component_drop_cols + component_rename_cols
|
||||
|
|
@ -640,7 +641,7 @@ class Property(Definitions):
|
|||
# We'll need to clean second heating
|
||||
second_heating = self.data["secondheat-description"]
|
||||
|
||||
epc_raw_columns = [
|
||||
epc_raw_columns = POTENTIAL_COLUMNS + EFFICIENCY_FEATURES + [
|
||||
'TRANSACTION_TYPE',
|
||||
'ENERGY_TARIFF',
|
||||
'PROPERTY_TYPE',
|
||||
|
|
|
|||
|
|
@ -125,14 +125,14 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
# with open("input_properties.pickle", "rb") as f:
|
||||
# input_properties = pickle.load(f)
|
||||
#
|
||||
# with open("cleaned.pickle", "rb") as f:
|
||||
# cleaned = pickle.load(f)
|
||||
# import pickle
|
||||
# with open("new_sap_dataset.pickle", "rb") as f:
|
||||
# new_sap_dataset = pickle.load(f)
|
||||
|
||||
recommendations = {}
|
||||
recommendations_scoring_data = []
|
||||
|
||||
for p in input_properties:
|
||||
|
||||
property_recommendations = []
|
||||
|
||||
# Property recommendations
|
||||
|
|
@ -234,6 +234,25 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
]
|
||||
)
|
||||
|
||||
for c in new_sap_dataset.columns:
|
||||
if c in ["UPRN", "RDSAP_CHANGE", "HEAT_DEMAND_CHANGE", "CARBON_CHANGE", "SAP_STARTING"]:
|
||||
continue
|
||||
|
||||
if (new_sap_dataset[c].dtype.name in ["int64", "float64"]) & (
|
||||
recommendations_scoring_data[c].dtype.name in ["int64", "float64"]
|
||||
):
|
||||
continue
|
||||
|
||||
if c == "CONSTITUENCY":
|
||||
if c not in recommendations_scoring_data:
|
||||
raise Exception("wtf")
|
||||
continue
|
||||
|
||||
unique_vals = new_sap_dataset[c].unique()
|
||||
scoring_unique_vals = recommendations_scoring_data[c].unique()
|
||||
if not all(x in unique_vals for x in scoring_unique_vals):
|
||||
raise Exception("")
|
||||
|
||||
sap_change_model_api = SAPChangeModelAPI(portfolio_id=body.portfolio_id, timestamp=created_at)
|
||||
file_location = sap_change_model_api.upload_scoring_data(
|
||||
df=recommendations_scoring_data, bucket=get_settings().DATA_BUCKET
|
||||
|
|
|
|||
|
|
@ -16,7 +16,9 @@ from etl.epc.settings import (
|
|||
fill_na_map,
|
||||
STARTING_SUFFIX_COMPONENT_COLS,
|
||||
NO_SUFFIX_COMPONENT_COLS,
|
||||
ENDING_SUFFIX_COMPONENT_COLS
|
||||
ENDING_SUFFIX_COMPONENT_COLS,
|
||||
POTENTIAL_COLUMNS,
|
||||
EFFICIENCY_FEATURES,
|
||||
)
|
||||
from recommendations.rdsap_tables import FLOOR_LEVEL_MAP
|
||||
|
||||
|
|
@ -203,6 +205,8 @@ class DataProcessor:
|
|||
|
||||
# Final re-casting after data transformed and prepared
|
||||
coltypes = {k: v for k, v in COLUMNTYPES.items() if k in self.data.columns} if self.newdata else COLUMNTYPES
|
||||
for k, v in coltypes.items():
|
||||
self.data[k] = self.data[k].astype(v)
|
||||
self.data = self.data.astype(coltypes)
|
||||
|
||||
self.na_remapping()
|
||||
|
|
@ -504,12 +508,14 @@ class DataProcessor:
|
|||
raise Exception("Suffix should be one of _STARTING or _ENDING")
|
||||
|
||||
if suffix == "_STARTING":
|
||||
starting_cols = self.data[STARTING_SUFFIX_COMPONENT_COLS].copy().add_suffix(suffix)
|
||||
fixed_cols = self.data[NO_SUFFIX_COMPONENT_COLS].copy()
|
||||
starting_cols = self.data[STARTING_SUFFIX_COMPONENT_COLS + EFFICIENCY_FEATURES].copy().add_suffix(suffix)
|
||||
fixed_cols = self.data[NO_SUFFIX_COMPONENT_COLS + POTENTIAL_COLUMNS].copy()
|
||||
|
||||
return pd.concat([starting_cols, fixed_cols], axis=1)
|
||||
|
||||
return self.data[ENDING_SUFFIX_COMPONENT_COLS].copy().add_suffix(suffix)
|
||||
return self.data[
|
||||
ENDING_SUFFIX_COMPONENT_COLS + EFFICIENCY_FEATURES
|
||||
].copy().add_suffix(suffix)
|
||||
|
||||
def get_fixed_features(self) -> pd.DataFrame:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -127,7 +127,6 @@ COMPONENT_FEATURES = CORE_COMPONENT_FEATURES + [
|
|||
]
|
||||
|
||||
POTENTIAL_COLUMNS = [
|
||||
'POTENTIAL_ENERGY_RATING',
|
||||
'POTENTIAL_ENERGY_EFFICIENCY',
|
||||
'ENVIRONMENT_IMPACT_POTENTIAL',
|
||||
'ENERGY_CONSUMPTION_POTENTIAL',
|
||||
|
|
@ -195,6 +194,8 @@ COLUMNTYPES = {
|
|||
'MAINHEATCONT_DESCRIPTION': 'object',
|
||||
'EXTENSION_COUNT': 'float64',
|
||||
'LODGEMENT_DATE': 'object',
|
||||
**dict(zip(EFFICIENCY_FEATURES, ['object', ] * len(EFFICIENCY_FEATURES))),
|
||||
**dict(zip(POTENTIAL_COLUMNS, ['float64', ] * len(POTENTIAL_COLUMNS)))
|
||||
}
|
||||
|
||||
# For modelling, we don't allow records with more than 100 SAP points
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue