From fde3a3f24c14bae32e593fb8eab71385d2035752 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 10 Oct 2023 13:00:04 +0800 Subject: [PATCH] debugging cleaning --- backend/Property.py | 15 ++++----- backend/app/plan/router.py | 62 +++++++++++++++++++------------------- backend/app/plan/utils.py | 3 +- etl/epc/DataProcessor.py | 2 +- 4 files changed, 42 insertions(+), 40 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 8d70ba8c..2a201b97 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -510,10 +510,10 @@ class Property(Definitions): result = property_dimensions[(property_dimensions["PROPERTY_TYPE"] == self.data["property-type"])] - if self.age_band: - result = result[(result["CONSTRUCTION_AGE_BAND"] == self.age_band)] + if self.construction_age_band is not None and self.construction_age_band not in self.DATA_ANOMALY_MATCHES: + result = result[(result["CONSTRUCTION_AGE_BAND"] == self.construction_age_band)] - if self.data["built-form"] not in self.DATA_ANOMALY_MATCHES: + if self.data["built-form"] not in self.DATA_ANOMALY_MATCHES and self.data["built-form"] in result["BUILT_FORM"]: result = result[(result["BUILT_FORM"] == self.data["built-form"])] return result[["NUMBER_HABITABLE_ROOMS", "TOTAL_FLOOR_AREA", "FLOOR_HEIGHT"]].mean() @@ -536,10 +536,11 @@ class Property(Definitions): if not self.data["number-habitable-rooms"] or ( self.data["floor-height"] == "" or self.data["floor-height"] in self.DATA_ANOMALY_MATCHES ): - property_dimensions = read_dataframe_from_s3_parquet( - bucket_name=DATA_BUCKET, file_key=f"property_dimensions/{self.data['local-authority']}.parquet" - ) - self.property_dimensions = self._filter_property_dimensions(property_dimensions) + if self.property_dimensions is None: + property_dimensions = read_dataframe_from_s3_parquet( + bucket_name=DATA_BUCKET, file_key=f"property_dimensions/{self.data['local-authority']}.parquet" + ) + self.property_dimensions = self._filter_property_dimensions(property_dimensions) if not self.data["number-habitable-rooms"]: self.number_of_rooms = float(self.property_dimensions["NUMBER_HABITABLE_ROOMS"].round()) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 4f6a7f10..111d407c 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -51,7 +51,7 @@ router = APIRouter( async def trigger_plan(body: PlanTriggerRequest): logger.info("Connecting to db") session = sessionmaker(bind=db_engine)() - created_at = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + created_at = datetime.now().isoformat() try: session.begin() @@ -118,7 +118,7 @@ async def trigger_plan(body: PlanTriggerRequest): recommendations = {} recommendations_scoring_data = [] - for p in input_properties: + for p in tqdm(input_properties): property_recommendations = [] # Property recommendations @@ -172,7 +172,7 @@ async def trigger_plan(body: PlanTriggerRequest): ending_epc_data["DAYS_TO_ENDING"] = data_processor.calculate_days_to(created_at) for recommendations_by_type in property_recommendations: - for rec in recommendations_by_type: + for i, rec in enumerate(recommendations_by_type): scoring_dict = create_recommendation_scoring_data( property=p, recommendation=rec, @@ -180,38 +180,38 @@ async def trigger_plan(body: PlanTriggerRequest): ending_epc_data=ending_epc_data, fixed_data=fixed_data, ) + if i == 0: + none_cols = [] + for col in scoring_dict.keys(): + if col in [ + "UPRN", "id", "LOCAL_AUTHORITY", + ]: + continue - none_cols = [] - for col in scoring_dict.keys(): - if col in [ - "UPRN", "id", "LOCAL_AUTHORITY", - ]: - continue + if col in [ + "SAP_STARTING", "HEAT_DEMAND_STARTING", "CARBON_STARTING", "FLOOR_HEIGHT_STARTING", + "TOTAL_FLOOR_AREA_STARTING", "DAYS_TO_STARTING", "estimated_perimeter_STARTING", + "SAP_ENDING", "HEAT_DEMAND_ENDING", + "CARBON_ENDING", "FLOOR_HEIGHT_ENDING", + "TOTAL_FLOOR_AREA_ENDING", "DAYS_TO_ENDING", "estimated_perimeter_ENDING" + ]: + try: + if scoring_dict[col] is None: + blah1 + float(scoring_dict[col]) + continue + except: + raise Exception("wtf") - if col in [ - "SAP_STARTING", "HEAT_DEMAND_STARTING", "CARBON_STARTING", "FLOOR_HEIGHT_STARTING", - "TOTAL_FLOOR_AREA_STARTING", "DAYS_TO_STARTING", "estimated_perimeter_STARTING", - "SAP_ENDING", "HEAT_DEMAND_ENDING", - "CARBON_ENDING", "FLOOR_HEIGHT_ENDING", - "TOTAL_FLOOR_AREA_ENDING", "DAYS_TO_ENDING", "estimated_perimeter_ENDING" - ]: - try: + unique_vals = sap_change_dataset[col].unique() + if scoring_dict[col] not in unique_vals: if scoring_dict[col] is None: - blah1 - float(scoring_dict[col]) - continue - except: - raise Exception("wtf") + none_cols.append(col) + continue + blah - unique_vals = sap_change_dataset[col].unique() - if scoring_dict[col] not in unique_vals: - if scoring_dict[col] is None: - none_cols.append(col) - continue - blah - - if none_cols: - blahblah + if none_cols: + blahblah recommendations_scoring_data.append(scoring_dict) diff --git a/backend/app/plan/utils.py b/backend/app/plan/utils.py index 4f1a1ad2..05e28768 100644 --- a/backend/app/plan/utils.py +++ b/backend/app/plan/utils.py @@ -166,7 +166,8 @@ def create_recommendation_scoring_data( insulation_thickness=recommendation["parts"][0]["depths"][0], age_band=property.age_band, ) - scoring_dict["floor_insulation_thickness_ENDING"] = "above average" + # We don't really see above average for this in the training data + scoring_dict["floor_insulation_thickness_ENDING"] = "average" else: if not scoring_dict["floor_thermal_transmittance_ENDING"]: scoring_dict["floor_thermal_transmittance_ENDING"] = get_floor_u_value( diff --git a/etl/epc/DataProcessor.py b/etl/epc/DataProcessor.py index cbbc68a7..e9c84c3c 100644 --- a/etl/epc/DataProcessor.py +++ b/etl/epc/DataProcessor.py @@ -544,7 +544,7 @@ class DataProcessor: if isinstance(lodgement_date, str): return ( - pd.to_datetime(lodgement_date).tz_localize(None) - pd.to_datetime(EARLIEST_EPC_DATE) + pd.to_datetime(lodgement_date) - pd.to_datetime(EARLIEST_EPC_DATE) ).days return (