mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
debuggin sap model data prep
This commit is contained in:
parent
de9810af43
commit
6ddc9fddca
4 changed files with 68 additions and 26 deletions
|
|
@ -632,7 +632,6 @@ class Property(Definitions):
|
|||
'PHOTO_SUPPLY',
|
||||
'LOW_ENERGY_LIGHTING',
|
||||
'SOLAR_WATER_HEATING_FLAG',
|
||||
'BUILT_FORM',
|
||||
'GLAZED_TYPE',
|
||||
'CONSTITUENCY',
|
||||
'NUMBER_HEATED_ROOMS',
|
||||
|
|
@ -642,6 +641,21 @@ class Property(Definitions):
|
|||
k: self.data[k.lower().replace("_", "-")] for k in epc_raw_columns
|
||||
}
|
||||
|
||||
built_form_cleaning_map = {
|
||||
"Flat": "Mid-Terrace",
|
||||
"House": "Semi-Detached",
|
||||
"Bungalow": "Detached",
|
||||
"Maisonette": "Mid-Terrace"
|
||||
}
|
||||
|
||||
built_form = self.data["built-form"]
|
||||
if built_form in self.DATA_ANOMALY_MATCHES:
|
||||
# TODO: If built form isn't captured, we use the most common value for that property type - we shall
|
||||
# improve this methodology
|
||||
built_form = built_form_cleaning_map.get(self.data["property-type"])
|
||||
if not built_form:
|
||||
raise NotImplementedError("Not handled this property type when cleaning built form")
|
||||
|
||||
property_data = {
|
||||
**walls,
|
||||
**roof,
|
||||
|
|
@ -653,15 +667,16 @@ class Property(Definitions):
|
|||
**windows,
|
||||
"SECONDHEAT_DESCRIPTION": second_heating,
|
||||
"DAYS_TO": DataProcessor.calculate_days_to(self.data["lodgement-date"]),
|
||||
"SAP": self.data["current-energy-efficiency"],
|
||||
"CARBON": self.data["co2-emissions-current"],
|
||||
"HEAT_DEMAND": self.data["energy-consumption-current"],
|
||||
"SAP": float(self.data["current-energy-efficiency"]),
|
||||
"CARBON": float(self.data["co2-emissions-current"]),
|
||||
"HEAT_DEMAND": float(self.data["energy-consumption-current"]),
|
||||
"estimated_perimeter": self.perimeter,
|
||||
"CONSTRUCTION_AGE_BAND": self.construction_age_band,
|
||||
"FLOOR_HEIGHT": self.floor_height,
|
||||
"NUMBER_HABITABLE_ROOMS": self.number_of_rooms,
|
||||
"TOTAL_FLOOR_AREA": self.floor_area,
|
||||
**epc_raw_data
|
||||
**epc_raw_data,
|
||||
"BUILT_FORM": built_form,
|
||||
}
|
||||
|
||||
return property_data
|
||||
|
|
|
|||
|
|
@ -157,13 +157,19 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
data_processor = DataProcessor(None, newdata=True)
|
||||
data_processor.insert_data(pd.DataFrame([p.get_model_data()]))
|
||||
data_processor.pre_process()
|
||||
data_processor.data = data_processor.clean_missings_after_description_process(
|
||||
data_processor.data, [
|
||||
c for c in data_processor.data.columns if
|
||||
("thermal_transmittance" in c) or ("insulation_thickness" in c)
|
||||
]
|
||||
)
|
||||
|
||||
starting_epc_data = data_processor.get_component_features(suffix="_STARTING")
|
||||
ending_epc_data = data_processor.get_component_features(suffix="_ENDING")
|
||||
fixed_data = data_processor.get_fixed_features()
|
||||
|
||||
# We update the ending record with the recommended updates and we set lodgement date to today
|
||||
ending_epc_data["LODGEMENT_DATE_ENDING"] = data_processor.calculate_days_to(created_at)
|
||||
ending_epc_data["DAYS_TO_ENDING"] = data_processor.calculate_days_to(created_at)
|
||||
|
||||
for recommendations_by_type in property_recommendations:
|
||||
for rec in recommendations_by_type:
|
||||
|
|
@ -175,21 +181,38 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
fixed_data=fixed_data,
|
||||
)
|
||||
|
||||
fer
|
||||
|
||||
none_cols = []
|
||||
for col in scoring_dict.keys():
|
||||
if col in [
|
||||
"UPRN", "id", "LOCAL_AUTHORITY",
|
||||
]:
|
||||
continue
|
||||
|
||||
if col in ["SAP_STARTING", "HEAT_DEMAND_STARTING", "CARBON_STARTING", "FLOOR_HEIGHT_STARTING"]:
|
||||
if scoring_dict[col]:
|
||||
if col in [
|
||||
"SAP_STARTING", "HEAT_DEMAND_STARTING", "CARBON_STARTING", "FLOOR_HEIGHT_STARTING",
|
||||
"TOTAL_FLOOR_AREA_STARTING", "DAYS_TO_STARTING", "estimated_perimeter_STARTING",
|
||||
"SAP_ENDING", "HEAT_DEMAND_ENDING",
|
||||
"CARBON_ENDING", "FLOOR_HEIGHT_ENDING",
|
||||
"TOTAL_FLOOR_AREA_ENDING", "DAYS_TO_ENDING", "estimated_perimeter_ENDING"
|
||||
]:
|
||||
try:
|
||||
if scoring_dict[col] is None:
|
||||
blah1
|
||||
float(scoring_dict[col])
|
||||
continue
|
||||
except:
|
||||
raise Exception("wtf")
|
||||
|
||||
unique_vals = sap_change_dataset[col].unique()
|
||||
if scoring_dict[col] not in unique_vals:
|
||||
if scoring_dict[col] is None:
|
||||
none_cols.append(col)
|
||||
continue
|
||||
blah
|
||||
|
||||
if none_cols:
|
||||
blahblah
|
||||
|
||||
recommendations_scoring_data.append(scoring_dict)
|
||||
|
||||
# cleanup
|
||||
|
|
|
|||
|
|
@ -550,3 +550,22 @@ class DataProcessor:
|
|||
return (
|
||||
pd.to_datetime(lodgement_date) - pd.to_datetime(EARLIEST_EPC_DATE)
|
||||
).dt.days
|
||||
|
||||
@staticmethod
|
||||
def clean_missings_after_description_process(df, ignore_cols=None):
|
||||
missings = pd.isnull(df).sum()
|
||||
missings = missings[missings > 0]
|
||||
|
||||
if ignore_cols:
|
||||
missings = missings[~missings.index.isin(ignore_cols)]
|
||||
|
||||
for col in missings.index:
|
||||
unique_values = df[col].unique()
|
||||
if True in unique_values or False in unique_values:
|
||||
df[col] = df[col].fillna(False)
|
||||
if "none" in unique_values:
|
||||
df[col] = df[col].fillna("none")
|
||||
else:
|
||||
df[col] = df[col].fillna("Unknown")
|
||||
|
||||
return df
|
||||
|
|
|
|||
|
|
@ -363,21 +363,6 @@ def make_uvalues(df):
|
|||
return df
|
||||
|
||||
|
||||
def clean_missings_after_description_process(df):
|
||||
missings = pd.isnull(df).sum()
|
||||
missings = missings[missings > 0]
|
||||
for col in missings.index:
|
||||
unique_values = df[col].unique()
|
||||
if True in unique_values or False in unique_values:
|
||||
df[col] = df[col].fillna(False)
|
||||
if "none" in unique_values:
|
||||
df[col] = df[col].fillna("none")
|
||||
else:
|
||||
df[col] = df[col].fillna("Unknown")
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def app():
|
||||
# Get all the files in the directory
|
||||
|
||||
|
|
@ -544,7 +529,7 @@ def app():
|
|||
# Those nulls should be False. clean_missings_after_description_process handles this but shouldn't
|
||||
# need to
|
||||
|
||||
data_by_urpn_df = clean_missings_after_description_process(data_by_urpn_df)
|
||||
data_by_urpn_df = DataProcessor.clean_missings_after_description_process(data_by_urpn_df)
|
||||
|
||||
if pd.isnull(data_by_urpn_df).sum().sum():
|
||||
raise ValueError("Null values found in dataset after process_and_prune_desriptions")
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue