From a2c61395de78fb175c8207ebc698fcca2f4eef6b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 21 Oct 2024 18:34:18 +0100
Subject: [PATCH] new environment working

---
 .../db/functions/recommendations_functions.py | 22 +++++++++----------
 backend/app/plan/router.py                    |  1 +
 backend/ml_models/Valuation.py                |  8 +++----
 backend/requirements/requirements.txt         |  3 +++
 etl/bill_savings/EnergyConsumptionModel.py    | 14 ++++++------
 etl/epc/DataProcessor.py                      | 16 +++++++-------
 etl/epc/settings.py                           |  7 ++++--
 recommendations/Recommendations.py            | 16 +++++++-------
 utils/s3.py                                   |  2 +-
 9 files changed, 48 insertions(+), 41 deletions(-)

diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py
index feeced10..d6e41c61 100644
--- a/backend/app/db/functions/recommendations_functions.py
+++ b/backend/app/db/functions/recommendations_functions.py
@@ -110,19 +110,19 @@ def upload_recommendations(session: Session, recommendations_to_upload, property
                 "type": rec["type"],
                 "measure_type": rec["measure_type"],
                 "description": rec["description"],
-                "estimated_cost": rec["total"],
+                "estimated_cost": float(rec["total"]),
                 "default": rec["default"],
-                "starting_u_value": rec.get("starting_u_value"),
-                "new_u_value": rec.get("new_u_value"),
-                "sap_points": rec["sap_points"],
-                "energy_savings": rec["heat_demand"],
-                "kwh_savings": rec["kwh_savings"],
-                "co2_equivalent_savings": rec["co2_equivalent_savings"],
-                "total_work_hours": rec["labour_hours"],
-                "energy_cost_savings": rec["energy_cost_savings"],
-                "labour_days": rec["labour_days"],
+                "starting_u_value": float(rec.get("starting_u_value")) if rec.get("starting_u_value") else None,
+                "new_u_value": float(rec.get("new_u_value")) if rec.get("new_u_value") else None,
+                "sap_points": float(rec["sap_points"]),
+                "energy_savings": float(rec["heat_demand"]),
+                "kwh_savings": float(rec["kwh_savings"]),
+                "co2_equivalent_savings": float(rec["co2_equivalent_savings"]),
+                "total_work_hours": float(rec["labour_hours"]),
+                "energy_cost_savings": float(rec["energy_cost_savings"]),
+                "labour_days": float(rec["labour_days"]),
                 "already_installed": rec["already_installed"],
-                "heat_demand": rec["heat_demand"]
+                "heat_demand": float(rec["heat_demand"])
             }
             for rec in recommendations_to_upload
         ]
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 51b03d01..074b5b75 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -774,6 +774,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                         update_or_create_property_spatial_details(session, p.uprn, p.spatial)
 
                         property_data = p.get_full_property_data(current_valuation=valuations["current_value"])
+                        
                         update_property_data(
                             session, property_id=p.id, portfolio_id=body.portfolio_id, property_data=property_data
                         )
diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py
index c6c1582b..92c55641 100644
--- a/backend/ml_models/Valuation.py
+++ b/backend/ml_models/Valuation.py
@@ -244,8 +244,8 @@ class PropertyValuation:
 
         return {
             "current_value": current_value,
-            "lower_bound_increased_value": current_value * (1 + min_increase),
-            "upper_bound_increased_value": current_value * (1 + max_increase),
-            "average_increased_value": current_value * (1 + avg_increase),
-            "average_increase": current_value * (1 + avg_increase) - current_value
+            "lower_bound_increased_value": float(current_value * (1 + min_increase)),
+            "upper_bound_increased_value": float(current_value * (1 + max_increase)),
+            "average_increased_value": float(current_value * (1 + avg_increase)),
+            "average_increase": float(current_value * (1 + avg_increase) - current_value)
         }
diff --git a/backend/requirements/requirements.txt b/backend/requirements/requirements.txt
index 82e44fcf..11f29183 100644
--- a/backend/requirements/requirements.txt
+++ b/backend/requirements/requirements.txt
@@ -24,3 +24,6 @@ msgpack==1.1.0
 scikit-learn==1.5.2
 cffi==1.15.1
 mip==1.15.0
+pyarrow==17.0.0
+fastparquet==2024.5.0
+
diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py
index 153f4ee2..1ccfee60 100644
--- a/etl/bill_savings/EnergyConsumptionModel.py
+++ b/etl/bill_savings/EnergyConsumptionModel.py
@@ -79,13 +79,13 @@ class EnergyConsumptionModel:
             if x not in self.CATEGORICAL_COLUMNS
         })
 
-        if model_paths:
-            for target, path in model_paths.items():
-                # Read model
-                self.models[target] = read_pickle_from_s3(
-                    bucket_name=f"retrofit-model-directory-{environment}", s3_file_name=path
-                )
-                # Read dummy schema
+        # if model_paths:
+        #     for target, path in model_paths.items():
+        #         # Read model
+        #         self.models[target] = read_pickle_from_s3(
+        #             bucket_name=f"retrofit-model-directory-{environment}", s3_file_name=path
+        #         )
+        # Read dummy schema
 
         if dummy_schema_path:
             self.dummy_schema = read_pickle_from_s3(
diff --git a/etl/epc/DataProcessor.py b/etl/epc/DataProcessor.py
index 4ad854c1..9655cf77 100644
--- a/etl/epc/DataProcessor.py
+++ b/etl/epc/DataProcessor.py
@@ -263,7 +263,7 @@ class EPCDataProcessor:
 
         # Use replace function to map data (if exists in key), to corresponding value - i.e. Remove invalid values
         data = self.data.replace(data_anomaly_map)
-        data = data.replace(np.NAN, None)
+        data = data.replace(np.nan, None)
 
         self.data = data
 
@@ -384,7 +384,7 @@ class EPCDataProcessor:
             has_missings = pd.isnull(self.data[col]).sum()
             while has_missings:
                 self.data = apply_clean(
-                    data=self.data, matching_columns=matching_columns[0 : to_index + 1]
+                    data=self.data, matching_columns=matching_columns[0: to_index + 1]
                 )
                 has_missings = pd.isnull(self.data[col]).sum()
 
@@ -487,7 +487,7 @@ class EPCDataProcessor:
 
         filled_data = (
             self.data.groupby("UPRN", group_keys=True)[columns_to_fill]
-            .apply(lambda group: group.fillna(method="bfill").fillna(method="ffill"))
+            .apply(lambda group: group.bfill().ffill().infer_objects(copy=False))
             .reset_index()
             .set_index("level_1")
             .sort_index()
@@ -791,7 +791,7 @@ class EPCDataProcessor:
         We fill photo supply with zeros where it's missing
         """
 
-        self.data["PHOTO_SUPPLY"] = self.data["PHOTO_SUPPLY"].fillna(0)
+        self.data["PHOTO_SUPPLY"] = self.data["PHOTO_SUPPLY"].astype("Int64").fillna(0)
 
     @staticmethod
     def apply_averages_cleaning(
@@ -858,12 +858,12 @@ class EPCDataProcessor:
 
         # Fill NaN values with averages
         for col in cols_to_clean:
-            data_to_clean[col].fillna(data_to_clean[f"{col}_AVERAGE"], inplace=True)
-            data_to_clean.drop(columns=[f"{col}_AVERAGE"], inplace=True)
+            data_to_clean[col] = data_to_clean[col].fillna(data_to_clean[f"{col}_AVERAGE"])
+            data_to_clean = data_to_clean.drop(columns=[f"{col}_AVERAGE"])
             # If we still have missings
-            data_to_clean[col].fillna(data_to_clean[col].mean(), inplace=True)
+            data_to_clean[col] = data_to_clean[col].fillna(data_to_clean[col].mean())
             # Final step if we still have missings - use global mean
-            data_to_clean[col].fillna(global_averages[col], inplace=True)
+            data_to_clean[col] = data_to_clean[col].fillna(global_averages[col])
 
         return data_to_clean
 
diff --git a/etl/epc/settings.py b/etl/epc/settings.py
index a814750f..2a9b1746 100644
--- a/etl/epc/settings.py
+++ b/etl/epc/settings.py
@@ -182,7 +182,6 @@ EFFICIENCY_FEATURES = [
 
 ROOM_FEATURES = ["number_habitable_rooms", "number_heated_rooms"]
 
-
 COMPONENT_FEATURES = CORE_COMPONENT_FEATURES + [
     "TRANSACTION_TYPE",
     "ENERGY_TARIFF",  # Not sure if this is relevant
@@ -241,7 +240,11 @@ BUILT_FORM_REMAP = {
 DATA_PROCESSOR_SETTINGS = {
     "low_memory": False,
     "epc_minimum_count": 1,
-    "column_mappings": {"UPRN": [int, str]},
+    "column_mappings": {
+        "UPRN": [int, str],
+        "NUMBER_HEATED_ROOMS": [float],
+        "NUMBER_HABITABLE_ROOMS": [float],
+    },
 }
 
 # This has a manual mapping of the column types required
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index aa7e041e..dd51b47d 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -730,8 +730,8 @@ class Recommendations:
                             "id": STARTING_DUMMY_ID_VALUE,
                             "phase": STARTING_DUMMY_ID_VALUE,
                             "recommendation_id": STARTING_DUMMY_ID_VALUE,
-                            "predictions_heating": property_kwh["heating"],
-                            "predictions_hotwater": property_kwh["hot_water"],
+                            "predictions_heating": float(property_kwh["heating"]),
+                            "predictions_hotwater": float(property_kwh["hot_water"]),
                         }
                     ]
                 ),
@@ -854,12 +854,12 @@ class Recommendations:
 
         # We return a dictionary that contains the individual costs, that can be stored to the database
         current_energy_bill = {
-            "heating_cost_current": starting_figures["heating_cost"],
-            "hot_water_cost_current": starting_figures["hotwater_cost"],
-            "lighting_cost_current": property_instance.energy_cost_estimates["unadjusted"]["lighting"],
-            "appliances_cost_current": property_instance.energy_cost_estimates["unadjusted"]["appliances"],
-            "gas_standing_charge": gas_standing_charge,
-            "electricity_standing_charge": electricity_standing_charge,
+            "heating_cost_current": float(starting_figures["heating_cost"]),
+            "hot_water_cost_current": float(starting_figures["hotwater_cost"]),
+            "lighting_cost_current": float(property_instance.energy_cost_estimates["unadjusted"]["lighting"]),
+            "appliances_cost_current": float(property_instance.energy_cost_estimates["unadjusted"]["appliances"]),
+            "gas_standing_charge": float(gas_standing_charge),
+            "electricity_standing_charge": float(electricity_standing_charge),
         }
 
         return current_energy_bill
diff --git a/utils/s3.py b/utils/s3.py
index ca0cbfac..1a686b55 100644
--- a/utils/s3.py
+++ b/utils/s3.py
@@ -192,7 +192,7 @@ def read_pickle_from_s3(bucket_name, s3_file_name):
     try:
         data = pickle.loads(serialized_data)
     except Exception as e:
-        logger.errpr(f'Failed to deserialize data: {str(e)}')
+        logger.error(f'Failed to deserialize data: {str(e)}')
         return None
 
     return data