mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
fixed but which is dropping roof description
This commit is contained in:
parent
b71e76449f
commit
10fc349114
3 changed files with 20 additions and 20 deletions
|
|
@ -414,6 +414,7 @@ class DataProcessor:
|
|||
# We observed 7 final records with missing windows and 2 records with missing hot water so we shall remove them
|
||||
self.data = self.data[~pd.isnull(self.data["WINDOWS_DESCRIPTION"])]
|
||||
self.data = self.data[~pd.isnull(self.data["HOTWATER_DESCRIPTION"])]
|
||||
self.data = self.data[~pd.isnull(self.data["ROOF_DESCRIPTION"])]
|
||||
|
||||
def clean_multi_glaze_proportion(self) -> None:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -211,4 +211,5 @@ fill_na_map = {
|
|||
"LOW_ENERGY_LIGHTING": 0,
|
||||
"MAINHEATCONT_DESCRIPTION": "Unknown",
|
||||
"EXTENSION_COUNT": 0,
|
||||
"NUMBER_OPEN_FIREPLACES": 0
|
||||
}
|
||||
|
|
|
|||
|
|
@ -81,11 +81,11 @@ def process_and_prune_desriptions(df, cleaned_lookup):
|
|||
'no_data_ENDING',
|
||||
],
|
||||
"roof": [
|
||||
'original_description', 'clean_description', 'thermal_transmittance',
|
||||
'original_description', 'thermal_transmittance',
|
||||
'thermal_transmittance_unit', 'is_pitched', 'is_roof_room', 'is_loft',
|
||||
'is_flat', 'is_thatched', 'is_at_rafters', 'is_assumed',
|
||||
'has_dwelling_above', 'is_valid', 'insulation_thickness',
|
||||
'original_description_ENDING', 'clean_description_ENDING',
|
||||
'original_description_ENDING',
|
||||
'thermal_transmittance_ENDING', 'thermal_transmittance_unit_ENDING',
|
||||
'is_pitched_ENDING', 'is_roof_room_ENDING', 'is_loft_ENDING',
|
||||
'is_flat_ENDING', 'is_thatched_ENDING', 'is_at_rafters_ENDING',
|
||||
|
|
@ -180,13 +180,25 @@ def app():
|
|||
df = data_processor.pre_process()
|
||||
cleaning_averages = data_processor.make_cleaning_averages()
|
||||
|
||||
# We have some odd cases with missing constituency so we fill
|
||||
df = df.fillna({"CONSTITUENCY": df["CONSTITUENCY"].mode().values[0]})
|
||||
|
||||
df = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=df,
|
||||
cleaning_data=cleaning_averages,
|
||||
cols_to_merge_on=COLUMNS_TO_MERGE_ON
|
||||
)
|
||||
|
||||
data_by_urpn = []
|
||||
for uprn, property_data in df.groupby("UPRN", observed=True):
|
||||
|
||||
# Fixed features - these are property attributes that shouldn't change over time
|
||||
fixed_data = {}
|
||||
|
||||
# If a property has changed building type, we can ignore the epc rating i.e. this should be 1 unique row
|
||||
if any(property_data[MANDATORY_FIXED_FEATURES].nunique() > 1):
|
||||
if any(property_data[MANDATORY_FIXED_FEATURES].nunique() > 1) or (
|
||||
pd.isnull(property_data[MANDATORY_FIXED_FEATURES]).sum().sum() > 0
|
||||
):
|
||||
continue
|
||||
|
||||
# Take the latest row for both the LATEST_FEILDS and MANDATORY FIELDS
|
||||
|
|
@ -195,36 +207,22 @@ def app():
|
|||
property_data[MANDATORY_FIXED_FEATURES].iloc[-1].to_dict()
|
||||
)
|
||||
|
||||
# Extract the columns that are not all None
|
||||
modified_property_data = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=property_data,
|
||||
cleaning_data=cleaning_averages,
|
||||
cols_to_merge_on=COLUMNS_TO_MERGE_ON
|
||||
)
|
||||
|
||||
# Combine all fields together
|
||||
fixed_data.update(mandatory_field_data)
|
||||
fixed_data.update(latest_field_data)
|
||||
|
||||
# Apply cleaning to fixed_data
|
||||
fixed_data = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=pd.DataFrame([fixed_data]),
|
||||
cleaning_data=cleaning_averages,
|
||||
cols_to_merge_on=COLUMNS_TO_MERGE_ON
|
||||
).to_dict("records")[0]
|
||||
|
||||
# We include the lodgement date here as we probably need to factor time into the
|
||||
# model, since EPC standards and rigour have changed over time
|
||||
variable_data = modified_property_data[
|
||||
variable_data = property_data[
|
||||
COMPONENT_FEATURES + ["LODGEMENT_DATE", RDSAP_RESPONSE, HEAT_DEMAND_RESPONSE, CARBON_RESPONSE]
|
||||
]
|
||||
|
||||
# Note: we look at changes between subsequent EPCS, however we could look at other permutations
|
||||
# e.g. first vs second, second vs third and also first vs third
|
||||
property_model_data = []
|
||||
for idx in range(0, modified_property_data.shape[0] - 1):
|
||||
for idx in range(0, property_data.shape[0] - 1):
|
||||
|
||||
if idx >= modified_property_data.shape[0] - 1:
|
||||
if idx >= property_data.shape[0] - 1:
|
||||
break
|
||||
|
||||
earliest_record = variable_data.iloc[idx]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue