mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
adding a dummy testing script
This commit is contained in:
parent
db67e0e23f
commit
54964bcf17
2 changed files with 68 additions and 6 deletions
|
|
@ -3,7 +3,7 @@ import pandas as pd
|
|||
from tqdm import tqdm
|
||||
from model_data.BaseUtility import BaseUtility
|
||||
from pathlib import Path
|
||||
from simulation_system.Settings import (
|
||||
from model_data.simulation_system.Settings import (
|
||||
MANDATORY_FIXED_FEATURES,
|
||||
AVERAGE_FIXED_FEATURES,
|
||||
LATEST_FIELD,
|
||||
|
|
@ -28,7 +28,6 @@ def app():
|
|||
|
||||
dataset = []
|
||||
|
||||
|
||||
for directory in tqdm(directories):
|
||||
|
||||
filepath = directory / "certificates.csv"
|
||||
|
|
@ -43,16 +42,16 @@ def app():
|
|||
# Fixed features - these are property attributes that shouldn't change over time
|
||||
fixed_data = {}
|
||||
|
||||
# If a property has changed building type, we can ignore the epc rating i.e. this should be 1 unique row
|
||||
if max(modified_property_data[MANDATORY_FIXED_FEATURES].nunique()) > 1:
|
||||
continue
|
||||
|
||||
# Map all anomaly values to None
|
||||
data_anomaly_map = dict(zip(BaseUtility.DATA_ANOMALY_MATCHES, [None]*len(BaseUtility.DATA_ANOMALY_MATCHES)))
|
||||
|
||||
# Use replace function to map data (if exists in key), to corresponding value - i.e. Remove invalid values
|
||||
modified_property_data = property_data.replace(data_anomaly_map)
|
||||
modified_property_data = modified_property_data.replace(np.NAN, None)
|
||||
|
||||
# If a property has changed building type, we can ignore the epc rating i.e. this should be 1 unique row
|
||||
if max(modified_property_data[MANDATORY_FIXED_FEATURES].nunique()) > 1:
|
||||
continue
|
||||
|
||||
# Remap certain columns
|
||||
modified_property_data['FLOOR_LEVEL'] = modified_property_data['FLOOR_LEVEL'].replace(FLOOR_LEVEL_MAP)
|
||||
|
|
|
|||
63
model_data/simulation_system/energy_predictor.py
Normal file
63
model_data/simulation_system/energy_predictor.py
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
from pathlib import Path
|
||||
from Settings import (
|
||||
RDSAP_RESPONSE,
|
||||
FLOOR_LEVEL_MAP,
|
||||
BUILT_FORM_REMAP,
|
||||
EARLIEST_EPC_DATE,
|
||||
FULLY_GLAZED_DESCRIPTIONS
|
||||
)
|
||||
from model_data.BaseUtility import BaseUtility
|
||||
from tqdm import tqdm
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
DATA_DIRECTORY = Path(__file__).parent / 'data' / 'all-domestic-certificates'
|
||||
|
||||
def main():
|
||||
"""
|
||||
Extract all information to do a simple predictor for RDSAP
|
||||
"""
|
||||
|
||||
directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
|
||||
directories = directories[0:10]
|
||||
dfs = []
|
||||
for directory in tqdm(directories):
|
||||
filepath = directory / "certificates.csv"
|
||||
df = pd.read_csv(filepath)
|
||||
|
||||
# Remove any bad uprns and ignore old/bad data
|
||||
df = df[~pd.isnull(df["UPRN"])]
|
||||
df = df[df["LODGEMENT_DATE"] >= EARLIEST_EPC_DATE]
|
||||
df = df[df["TRANSACTION_TYPE"] != "new dwelling"]
|
||||
df = df[~df["FLOOR_LEVEL"].isin(["top floor", "mid floor"])]
|
||||
|
||||
# Change multi glaze proportion
|
||||
no_multi_glaze_proportion_index = pd.isnull(df["MULTI_GLAZE_PROPORTION"]) & (df["WINDOWS_DESCRIPTION"].isin(FULLY_GLAZED_DESCRIPTIONS))
|
||||
df.loc[no_multi_glaze_proportion_index, 'MULTI_GLAZE_PROPORTION'] = 100
|
||||
|
||||
# Recast
|
||||
df["UPRN"] = df["UPRN"].astype(int).astype(str)
|
||||
df['MAIN_HEATING_CONTROLS'] = df['MAIN_HEATING_CONTROLS'].astype(float)
|
||||
|
||||
# Sort Data
|
||||
df = df.sort_values(["UPRN", "LODGEMENT_DATE"], ascending=True)
|
||||
|
||||
# Map all anomaly values to None
|
||||
data_anomaly_map = dict(zip(BaseUtility.DATA_ANOMALY_MATCHES, [None]*len(BaseUtility.DATA_ANOMALY_MATCHES)))
|
||||
|
||||
# Use replace function to map data (if exists in key), to corresponding value - i.e. Remove invalid values
|
||||
df = df.replace(data_anomaly_map)
|
||||
df = df.replace(np.NAN, None)
|
||||
|
||||
# Remap certain columns
|
||||
df['FLOOR_LEVEL'] = df['FLOOR_LEVEL'].replace(FLOOR_LEVEL_MAP)
|
||||
df['BUILT_FROM'] = df['BUILT_FORM'].replace(BUILT_FORM_REMAP)
|
||||
|
||||
dfs.append(df)
|
||||
|
||||
data = pd.concat(dfs)
|
||||
data.to_parquet('./energy_predictor_data.parquet')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Reference in a new issue