mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
fixed silly error in all_cleaners
This commit is contained in:
parent
95226a73ff
commit
2acf5c3534
2 changed files with 31 additions and 2 deletions
|
|
@ -75,6 +75,20 @@ epc_data = pd.read_csv(
|
|||
low_memory=False
|
||||
)
|
||||
|
||||
# TODO: Store this for cleaning
|
||||
costs_by_floor_area = epc_data[
|
||||
pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2024-01-01"
|
||||
][["TOTAL_FLOOR_AREA", "CURRENT_ENERGY_EFFICIENCY", "LIGHTING_COST_CURRENT", "HEATING_COST_CURRENT",
|
||||
"HOT_WATER_COST_CURRENT"]].copy()
|
||||
|
||||
costs_by_floor_area.columns = [c.lower().replace("_", "-") for c in costs_by_floor_area.columns]
|
||||
for c in ["lighting-cost-current", "heating-cost-current", "hot-water-cost-current"]:
|
||||
costs_by_floor_area[c + "_scaled"] = costs_by_floor_area[c] / costs_by_floor_area["total-floor-area"]
|
||||
|
||||
costs_by_floor_area = costs_by_floor_area.groupby("current-energy-efficiency")[
|
||||
["lighting-cost-current_scaled", "heating-cost-current_scaled", "hot-water-cost-current_scaled"]
|
||||
].mean().reset_index()
|
||||
|
||||
sample_epc_data = epc_data.drop_duplicates("UPRN").sample(1000).reset_index(drop=True)
|
||||
|
||||
# Load the input properties
|
||||
|
|
@ -140,12 +154,27 @@ for p in tqdm(input_properties):
|
|||
mocked_kwh_predictions["heating_kwh_predictions"] = pd.DataFrame(mocked_kwh_predictions["heating_kwh_predictions"])
|
||||
mocked_kwh_predictions["hotwater_kwh_predictions"] = pd.DataFrame(mocked_kwh_predictions["hotwater_kwh_predictions"])
|
||||
|
||||
# TODO: We might want to implement this generally, via an ETL process
|
||||
for p in input_properties:
|
||||
for col in ["lighting-cost-current", "heating-cost-current", "hot-water-cost-current"]:
|
||||
if pd.isnull(p.data[col]):
|
||||
min_diff = abs(
|
||||
(costs_by_floor_area["current-energy-efficiency"] - p.data["current-energy-efficiency"])
|
||||
).min()
|
||||
df = costs_by_floor_area[
|
||||
abs((costs_by_floor_area["current-energy-efficiency"] - p.data[
|
||||
"current-energy-efficiency"])) == min_diff
|
||||
]
|
||||
if df.shape[0] > 1:
|
||||
df = df.head(1)
|
||||
p.data[col] = (df[col + "_scaled"] * p.data["total-floor-area"]).values[0]
|
||||
|
||||
[
|
||||
p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=mocked_kwh_predictions) for p in
|
||||
input_properties
|
||||
]
|
||||
|
||||
for p in input_properties:
|
||||
for p in tqdm(input_properties):
|
||||
# TEMP
|
||||
p.DATA_ANOMALY_MATCHES = DATA_ANOMALY_MATCHES
|
||||
p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=mocked_kwh_predictions)
|
||||
|
|
|
|||
|
|
@ -17,5 +17,5 @@ all_cleaner_map = {
|
|||
'roof-description': RoofAttributes,
|
||||
'walls-description': WallAttributes,
|
||||
'windows-description': WindowAttributes,
|
||||
'lighting-description:': LightingAttributes,
|
||||
'lighting-description': LightingAttributes,
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue