mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
adding new kwh etl process to backend
This commit is contained in:
parent
73be979c29
commit
fffb179219
1 changed files with 5 additions and 117 deletions
|
|
@ -43,6 +43,7 @@ from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3
|
|||
from backend.ml_models.Valuation import PropertyValuation
|
||||
|
||||
from etl.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel
|
||||
from etl.bill_savings.KwhData import KwhData
|
||||
from etl.spatial.OpenUprnClient import OpenUprnClient
|
||||
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
|
||||
|
||||
|
|
@ -432,123 +433,10 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
|
||||
model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
|
||||
|
||||
epcs_for_scoring = energy_consumption_client.prepare_new_data(input_properties)
|
||||
|
||||
# prepare the data
|
||||
|
||||
# TODO - this needs to be moved to the etl process
|
||||
import numpy as np
|
||||
def add_features_from_code(df):
|
||||
|
||||
FEATURES = {
|
||||
"heating_kwh": [
|
||||
"lodgement-year", "lodgement-month", "current-energy-efficiency", "energy-consumption-current",
|
||||
"heating-cost-current", "heating-cost-potential", "total-floor-area", "number-heated-rooms",
|
||||
"mainheat-description", "mainheat-energy-eff", "main-fuel", "secondheat-description",
|
||||
"property-type",
|
||||
"built-form", "mainheatcont-description", "hotwater-description", "hot-water-energy-eff",
|
||||
"walls-energy-eff",
|
||||
"roof-energy-eff", "windows-description", "windows-energy-eff", "floor-description",
|
||||
"flat-top-storey",
|
||||
"flat-storey-count", "unheated-corridor-length", "solar-water-heating-flag",
|
||||
"mechanical-ventilation",
|
||||
"low-energy-lighting", "environment-impact-current", "energy-tariff",
|
||||
"county", "construction-age-band", "co2-emissions-current",
|
||||
],
|
||||
"hot_water_kwh": [
|
||||
"lodgement-year", "lodgement-month",
|
||||
"current-energy-efficiency",
|
||||
"energy-consumption-current",
|
||||
"hot-water-cost-current",
|
||||
"total-floor-area", "number-heated-rooms",
|
||||
"hotwater-description", "hot-water-energy-eff", "main-fuel", "property-type", "built-form",
|
||||
"co2-emissions-current",
|
||||
]
|
||||
}
|
||||
CATEGORICAL_COLUMNS = [
|
||||
"lodgement-year", "lodgement-month", "main-fuel", "mainheat-description", "number-heated-rooms",
|
||||
"number-habitable-rooms", "mainheat-energy-eff", "mainheatcont-description", "property-type",
|
||||
"built-form",
|
||||
"construction-age-band", "secondheat-description", "hotwater-description", "hot-water-energy-eff",
|
||||
"walls-description", "walls-energy-eff", "roof-description", "roof-energy-eff", "floor-description",
|
||||
"county",
|
||||
"windows-description", "windows-energy-eff", "flat-top-storey",
|
||||
"flat-storey-count", "unheated-corridor-length", "solar-water-heating-flag", "mechanical-ventilation",
|
||||
"low-energy-lighting", "environment-impact-current", "energy-tariff", "current-energy-rating"
|
||||
]
|
||||
|
||||
NUMERICAL_COLUMNS = list({
|
||||
x for x in FEATURES["heating_kwh"] + FEATURES["hot_water_kwh"]
|
||||
if x not in CATEGORICAL_COLUMNS
|
||||
})
|
||||
|
||||
"""Performs feature engineering on the dataset."""
|
||||
df["lodgement-date"] = pd.to_datetime(df["lodgement-date"])
|
||||
df["lodgement-year"] = df["lodgement-date"].dt.year
|
||||
df["lodgement-month"] = df["lodgement-date"].dt.month
|
||||
|
||||
# For walls, roof, floor description where we have average thermal transmittance, to avoid too many
|
||||
# categories
|
||||
# we group them
|
||||
ranges = {
|
||||
"lessthan 0.1": (0, 0.1),
|
||||
"0.1 - 0.3": (0.1, 0.3),
|
||||
"0.3 - 0.5": (0.3, 0.5),
|
||||
"morethan 0.5": (0.5, 2.5),
|
||||
}
|
||||
|
||||
# Generate the lookup table
|
||||
thermal_transmittance_lookup_table = []
|
||||
for i in range(1, 251):
|
||||
value = i / 100
|
||||
for label, (low, high) in ranges.items():
|
||||
if low < value <= high:
|
||||
thermal_transmittance_lookup_table.append({"from": value, "to": label})
|
||||
break
|
||||
|
||||
# Convert to DataFrame for display
|
||||
thermal_transmittance_lookup_table = pd.DataFrame(thermal_transmittance_lookup_table)
|
||||
thermal_transmittance_lookup_table["from"] = thermal_transmittance_lookup_table["from"].astype(str)
|
||||
|
||||
# Apply the lookup table to the data
|
||||
for feature in ["walls-description", "roof-description", "floor-description"]:
|
||||
cleaned_df = pd.DataFrame(cleaned[feature])[["original_description", "thermal_transmittance"]]
|
||||
# Round to 2 decimal places and convert to string
|
||||
cleaned_df["thermal_transmittance"] = cleaned_df["thermal_transmittance"].round(2).astype(str)
|
||||
|
||||
df = df.merge(
|
||||
cleaned_df,
|
||||
how="left",
|
||||
left_on=feature,
|
||||
right_on="original_description",
|
||||
)
|
||||
# We now have the thermal transmittance in the data, which we can use to group with the lookup table
|
||||
df = df.merge(
|
||||
thermal_transmittance_lookup_table,
|
||||
how="left",
|
||||
left_on="thermal_transmittance",
|
||||
right_on="from",
|
||||
)
|
||||
# Where "to" is populated, replace feature with to
|
||||
df[feature] = np.where(
|
||||
~pd.isnull(df["to"]),
|
||||
df["to"],
|
||||
df[feature]
|
||||
)
|
||||
df = df.drop(columns=["original_description", "thermal_transmittance", "from", "to"])
|
||||
|
||||
# Convert data types
|
||||
df[NUMERICAL_COLUMNS] = df[NUMERICAL_COLUMNS].apply(pd.to_numeric)
|
||||
df[CATEGORICAL_COLUMNS] = df[CATEGORICAL_COLUMNS].astype(str)
|
||||
|
||||
return df
|
||||
|
||||
def add_estimate_annual_kwh(df):
|
||||
df['estimate_annual_kwh'] = df['energy-consumption-current'] * df['total-floor-area']
|
||||
return df
|
||||
|
||||
epcs_for_scoring = add_features_from_code(epcs_for_scoring)
|
||||
epcs_for_scoring = add_estimate_annual_kwh(epcs_for_scoring)
|
||||
epcs_for_scoring = KwhData.transform(
|
||||
data=pd.DataFrame([p.epc_record.original_epc for p in input_properties]),
|
||||
cleaned=cleaned,
|
||||
)
|
||||
|
||||
kwh_predictions = model_api.predict_all(
|
||||
df=epcs_for_scoring,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue