Merge branch 'main' of https://github.com/Hestia-Homes/Model

2026-07-27 23:35:01 +00:00 · 2023-08-15 17:33:52 +01:00 · 2023-08-15 17:33:52 +01:00 · eccf4814b9
commit eccf4814b9
parent e8d31d56a6 18673e3147
2 changed files with 12 additions and 14 deletions
--- a/model_data/simulation_system/DataProcessor.py
+++ b/model_data/simulation_system/DataProcessor.py
@ -27,6 +27,8 @@ class DataProcessor:
        """
        self.load_data(low_memory=DATA_PROCESSOR_SETTINGS['low_memory'])
        self.confine_data()
+
+        # TODO: CLean number of heated rooms and habitable rooms 
        self.recast_df_columns(column_mappings=DATA_PROCESSOR_SETTINGS['column_mappings'])
        self.clean_multi_glaze_proportion()
        self.retain_multiple_epc_properties(epc_minimum_count=DATA_PROCESSOR_SETTINGS['epc_minimum_count'])
@ -39,10 +41,11 @@ class DataProcessor:
        # Define a custom function to calculate the median, excluding missing values
        def median_without_missing(group):
            return group[AVERAGE_FIXED_FEATURES].median(skipna=True)
-
+    
        cleaning_averages = self.data.groupby(
            ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
-            observed=True
+            observed=True,
+            dropna=False
        ).apply(median_without_missing).reset_index()

        general_averages = self.data.groupby(["PROPERTY_TYPE", "BUILT_FORM"], observed=True).apply(
--- a/model_data/simulation_system/app.py
+++ b/model_data/simulation_system/app.py
@ -42,8 +42,8 @@ def app():
            # Fixed features - these are property attributes that shouldn't change over time
            fixed_data = {}

-            # If a property has changed building type, we can ignore the epc rating i.e. this should be 1 unique row
-            if max(modified_property_data[MANDATORY_FIXED_FEATURES].nunique()) > 1:
+             # If a property has changed building type, we can ignore the epc rating i.e. this should be 1 unique row
+            if max(property_data[MANDATORY_FIXED_FEATURES].nunique()) > 1:
                continue

            # Map all anomaly values to None
@ -70,7 +70,7 @@ def app():
            columns_to_merge_on = ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "NUMBER_HABITABLE_ROOMS",
                         "NUMBER_HEATED_ROOMS"]
            
-            if any(modified_property_data[columns_to_merge_on].isna()):
+            if modified_property_data[columns_to_merge_on].isna().values.any():
                # If there are any NA value, back fill first (i.e most recent), then forward fill if needed
                modified_property_data[columns_to_merge_on] = modified_property_data[columns_to_merge_on].fillna(method='bfill').fillna(method='ffill')
            
@ -80,12 +80,14 @@ def app():

            #  Get the corresponding groupby and merge, and fill in NA values
            cleaning_averages_to_merge = cleaning_averages.groupby(columns_to_merge_on)[['TOTAL_FLOOR_AREA', 'FLOOR_HEIGHT']].mean()
+            
            modified_property_data = pd.merge(modified_property_data, cleaning_averages_to_merge, on=columns_to_merge_on, suffixes=['', '_AVERAGE'])
            modified_property_data['TOTAL_FLOOR_AREA'] = modified_property_data['TOTAL_FLOOR_AREA'].fillna(modified_property_data['TOTAL_FLOOR_AREA_AVERAGE'])
            modified_property_data['FLOOR_HEIGHT'] = modified_property_data['FLOOR_HEIGHT'].fillna(modified_property_data['FLOOR_HEIGHT_AVERAGE'])
            modified_property_data = modified_property_data.drop(columns=['TOTAL_FLOOR_AREA_AVERAGE', 'FLOOR_HEIGHT_AVERAGE'])

            for field in AVERAGE_FIXED_FEATURES:
+
                vals =  list(modified_property_data[field].dropna().unique())
                if len(vals) > 1:
                    # Check the values are too far apart
@ -93,11 +95,9 @@ def app():
                    if abs(vals[0] - vals[1]) / vals[0] > 0.1:
                        # Take the more recent value since it's likely to be more accurate
                        vals = [vals[-1]]
+         

-                if vals:
-                    field_value = np.mean(vals)
-                
-                fixed_data[field] = field_value
+                fixed_data[field] = np.mean(vals)

            #Combine all fields together
            fixed_data.update(mandatory_field_data)
@ -122,11 +122,6 @@ def app():
                rdsap_change = ending_record[RDSAP_RESPONSE] - starting_record[RDSAP_RESPONSE]
                heat_demand_change = ending_record[HEAT_DEMAND_RESPONSE] - starting_record[HEAT_DEMAND_RESPONSE]

-                # TODO: Should this be <= 0?
-                if rdsap_change == 0:
-                    # Assumption: We aren't interested in records that exhibit no change
-                    continue
-
                # TODO: We need to pre-process the data. For instance, rather than using static for roofs, walls and
                #       floors, we may want to use the U-value. We may also want to handle the (assumed) tags
                #       within descriptions