From a2c61395de78fb175c8207ebc698fcca2f4eef6b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 21 Oct 2024 18:34:18 +0100 Subject: [PATCH] new environment working --- .../db/functions/recommendations_functions.py | 22 +++++++++---------- backend/app/plan/router.py | 1 + backend/ml_models/Valuation.py | 8 +++---- backend/requirements/requirements.txt | 3 +++ etl/bill_savings/EnergyConsumptionModel.py | 14 ++++++------ etl/epc/DataProcessor.py | 16 +++++++------- etl/epc/settings.py | 7 ++++-- recommendations/Recommendations.py | 16 +++++++------- utils/s3.py | 2 +- 9 files changed, 48 insertions(+), 41 deletions(-) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index feeced10..d6e41c61 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -110,19 +110,19 @@ def upload_recommendations(session: Session, recommendations_to_upload, property "type": rec["type"], "measure_type": rec["measure_type"], "description": rec["description"], - "estimated_cost": rec["total"], + "estimated_cost": float(rec["total"]), "default": rec["default"], - "starting_u_value": rec.get("starting_u_value"), - "new_u_value": rec.get("new_u_value"), - "sap_points": rec["sap_points"], - "energy_savings": rec["heat_demand"], - "kwh_savings": rec["kwh_savings"], - "co2_equivalent_savings": rec["co2_equivalent_savings"], - "total_work_hours": rec["labour_hours"], - "energy_cost_savings": rec["energy_cost_savings"], - "labour_days": rec["labour_days"], + "starting_u_value": float(rec.get("starting_u_value")) if rec.get("starting_u_value") else None, + "new_u_value": float(rec.get("new_u_value")) if rec.get("new_u_value") else None, + "sap_points": float(rec["sap_points"]), + "energy_savings": float(rec["heat_demand"]), + "kwh_savings": float(rec["kwh_savings"]), + "co2_equivalent_savings": float(rec["co2_equivalent_savings"]), + "total_work_hours": float(rec["labour_hours"]), + "energy_cost_savings": float(rec["energy_cost_savings"]), + "labour_days": float(rec["labour_days"]), "already_installed": rec["already_installed"], - "heat_demand": rec["heat_demand"] + "heat_demand": float(rec["heat_demand"]) } for rec in recommendations_to_upload ] diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 51b03d01..074b5b75 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -774,6 +774,7 @@ async def trigger_plan(body: PlanTriggerRequest): update_or_create_property_spatial_details(session, p.uprn, p.spatial) property_data = p.get_full_property_data(current_valuation=valuations["current_value"]) + update_property_data( session, property_id=p.id, portfolio_id=body.portfolio_id, property_data=property_data ) diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py index c6c1582b..92c55641 100644 --- a/backend/ml_models/Valuation.py +++ b/backend/ml_models/Valuation.py @@ -244,8 +244,8 @@ class PropertyValuation: return { "current_value": current_value, - "lower_bound_increased_value": current_value * (1 + min_increase), - "upper_bound_increased_value": current_value * (1 + max_increase), - "average_increased_value": current_value * (1 + avg_increase), - "average_increase": current_value * (1 + avg_increase) - current_value + "lower_bound_increased_value": float(current_value * (1 + min_increase)), + "upper_bound_increased_value": float(current_value * (1 + max_increase)), + "average_increased_value": float(current_value * (1 + avg_increase)), + "average_increase": float(current_value * (1 + avg_increase) - current_value) } diff --git a/backend/requirements/requirements.txt b/backend/requirements/requirements.txt index 82e44fcf..11f29183 100644 --- a/backend/requirements/requirements.txt +++ b/backend/requirements/requirements.txt @@ -24,3 +24,6 @@ msgpack==1.1.0 scikit-learn==1.5.2 cffi==1.15.1 mip==1.15.0 +pyarrow==17.0.0 +fastparquet==2024.5.0 + diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py index 153f4ee2..1ccfee60 100644 --- a/etl/bill_savings/EnergyConsumptionModel.py +++ b/etl/bill_savings/EnergyConsumptionModel.py @@ -79,13 +79,13 @@ class EnergyConsumptionModel: if x not in self.CATEGORICAL_COLUMNS }) - if model_paths: - for target, path in model_paths.items(): - # Read model - self.models[target] = read_pickle_from_s3( - bucket_name=f"retrofit-model-directory-{environment}", s3_file_name=path - ) - # Read dummy schema + # if model_paths: + # for target, path in model_paths.items(): + # # Read model + # self.models[target] = read_pickle_from_s3( + # bucket_name=f"retrofit-model-directory-{environment}", s3_file_name=path + # ) + # Read dummy schema if dummy_schema_path: self.dummy_schema = read_pickle_from_s3( diff --git a/etl/epc/DataProcessor.py b/etl/epc/DataProcessor.py index 4ad854c1..9655cf77 100644 --- a/etl/epc/DataProcessor.py +++ b/etl/epc/DataProcessor.py @@ -263,7 +263,7 @@ class EPCDataProcessor: # Use replace function to map data (if exists in key), to corresponding value - i.e. Remove invalid values data = self.data.replace(data_anomaly_map) - data = data.replace(np.NAN, None) + data = data.replace(np.nan, None) self.data = data @@ -384,7 +384,7 @@ class EPCDataProcessor: has_missings = pd.isnull(self.data[col]).sum() while has_missings: self.data = apply_clean( - data=self.data, matching_columns=matching_columns[0 : to_index + 1] + data=self.data, matching_columns=matching_columns[0: to_index + 1] ) has_missings = pd.isnull(self.data[col]).sum() @@ -487,7 +487,7 @@ class EPCDataProcessor: filled_data = ( self.data.groupby("UPRN", group_keys=True)[columns_to_fill] - .apply(lambda group: group.fillna(method="bfill").fillna(method="ffill")) + .apply(lambda group: group.bfill().ffill().infer_objects(copy=False)) .reset_index() .set_index("level_1") .sort_index() @@ -791,7 +791,7 @@ class EPCDataProcessor: We fill photo supply with zeros where it's missing """ - self.data["PHOTO_SUPPLY"] = self.data["PHOTO_SUPPLY"].fillna(0) + self.data["PHOTO_SUPPLY"] = self.data["PHOTO_SUPPLY"].astype("Int64").fillna(0) @staticmethod def apply_averages_cleaning( @@ -858,12 +858,12 @@ class EPCDataProcessor: # Fill NaN values with averages for col in cols_to_clean: - data_to_clean[col].fillna(data_to_clean[f"{col}_AVERAGE"], inplace=True) - data_to_clean.drop(columns=[f"{col}_AVERAGE"], inplace=True) + data_to_clean[col] = data_to_clean[col].fillna(data_to_clean[f"{col}_AVERAGE"]) + data_to_clean = data_to_clean.drop(columns=[f"{col}_AVERAGE"]) # If we still have missings - data_to_clean[col].fillna(data_to_clean[col].mean(), inplace=True) + data_to_clean[col] = data_to_clean[col].fillna(data_to_clean[col].mean()) # Final step if we still have missings - use global mean - data_to_clean[col].fillna(global_averages[col], inplace=True) + data_to_clean[col] = data_to_clean[col].fillna(global_averages[col]) return data_to_clean diff --git a/etl/epc/settings.py b/etl/epc/settings.py index a814750f..2a9b1746 100644 --- a/etl/epc/settings.py +++ b/etl/epc/settings.py @@ -182,7 +182,6 @@ EFFICIENCY_FEATURES = [ ROOM_FEATURES = ["number_habitable_rooms", "number_heated_rooms"] - COMPONENT_FEATURES = CORE_COMPONENT_FEATURES + [ "TRANSACTION_TYPE", "ENERGY_TARIFF", # Not sure if this is relevant @@ -241,7 +240,11 @@ BUILT_FORM_REMAP = { DATA_PROCESSOR_SETTINGS = { "low_memory": False, "epc_minimum_count": 1, - "column_mappings": {"UPRN": [int, str]}, + "column_mappings": { + "UPRN": [int, str], + "NUMBER_HEATED_ROOMS": [float], + "NUMBER_HABITABLE_ROOMS": [float], + }, } # This has a manual mapping of the column types required diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index aa7e041e..dd51b47d 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -730,8 +730,8 @@ class Recommendations: "id": STARTING_DUMMY_ID_VALUE, "phase": STARTING_DUMMY_ID_VALUE, "recommendation_id": STARTING_DUMMY_ID_VALUE, - "predictions_heating": property_kwh["heating"], - "predictions_hotwater": property_kwh["hot_water"], + "predictions_heating": float(property_kwh["heating"]), + "predictions_hotwater": float(property_kwh["hot_water"]), } ] ), @@ -854,12 +854,12 @@ class Recommendations: # We return a dictionary that contains the individual costs, that can be stored to the database current_energy_bill = { - "heating_cost_current": starting_figures["heating_cost"], - "hot_water_cost_current": starting_figures["hotwater_cost"], - "lighting_cost_current": property_instance.energy_cost_estimates["unadjusted"]["lighting"], - "appliances_cost_current": property_instance.energy_cost_estimates["unadjusted"]["appliances"], - "gas_standing_charge": gas_standing_charge, - "electricity_standing_charge": electricity_standing_charge, + "heating_cost_current": float(starting_figures["heating_cost"]), + "hot_water_cost_current": float(starting_figures["hotwater_cost"]), + "lighting_cost_current": float(property_instance.energy_cost_estimates["unadjusted"]["lighting"]), + "appliances_cost_current": float(property_instance.energy_cost_estimates["unadjusted"]["appliances"]), + "gas_standing_charge": float(gas_standing_charge), + "electricity_standing_charge": float(electricity_standing_charge), } return current_energy_bill diff --git a/utils/s3.py b/utils/s3.py index ca0cbfac..1a686b55 100644 --- a/utils/s3.py +++ b/utils/s3.py @@ -192,7 +192,7 @@ def read_pickle_from_s3(bucket_name, s3_file_name): try: data = pickle.loads(serialized_data) except Exception as e: - logger.errpr(f'Failed to deserialize data: {str(e)}') + logger.error(f'Failed to deserialize data: {str(e)}') return None return data