diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py index bc8f9cc6..492b9042 100644 --- a/backend/app/assumptions.py +++ b/backend/app/assumptions.py @@ -86,6 +86,8 @@ DESCRIPTIONS_TO_FUEL_TYPES = { "cop": AVERAGE_ASHP_EFFICIENCY / 100}, "Ground source heat pump, underfloor, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100}, "Electric ceiling heating": {"fuel": "Electricity", "cop": 1}, + "Boiler and radiators, wood chips": {"fuel": "Wood Logs", "cop": 0.85}, + "Oil range cooker, no cylinder thermostat": {"fuel": "Oil", "cop": 0.85}, } # These are the measure types where if there is a ventilation recommendation, we force the inclusion of it diff --git a/backend/app/db/functions/tasks/Tasks.py b/backend/app/db/functions/tasks/Tasks.py index 30acf370..4aa7fb43 100644 --- a/backend/app/db/functions/tasks/Tasks.py +++ b/backend/app/db/functions/tasks/Tasks.py @@ -27,6 +27,7 @@ class SubTaskInterface: # -------------------------------------------------------- def create_subtask(self, task_id: UUID, inputs: Optional[Dict[str, Any]] = None, status=None): + now = datetime.now(timezone.utc) with get_db_session() as session: task = session.get(Task, task_id) if not task: @@ -39,7 +40,7 @@ class SubTaskInterface: task_id=task_id, inputs=json.dumps(inputs) if inputs else None, status=status, - job_started=None, + job_started=now, job_completed=None, ) @@ -271,12 +272,13 @@ class TasksInterface: :param task_only: If True, only create the Task record, without a SubTask :return: """ + now = datetime.now(timezone.utc) with get_db_session() as session: task = Task( task_source=task_source, service=service, status="waiting", - job_started=None, + job_started=now, job_completed=None, ) diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py index 62825cc5..3fa84136 100644 --- a/etl/epc/Dataset.py +++ b/etl/epc/Dataset.py @@ -212,11 +212,11 @@ class TrainingDataset(BaseDataset): common_cols = [[col + "_starting", col + "_ending"] for col in common_cols] self.df = self.df.loc[ - :, - no_suffix_cols - + only_ending_cols - + [col for cols in common_cols for col in cols], - ] + :, + no_suffix_cols + + only_ending_cols + + [col for cols in common_cols for col in cols], + ] def _remove_abnormal_change_in_floor_area(self): """ @@ -394,12 +394,13 @@ class TrainingDataset(BaseDataset): axis=1, ) - roof_starting_uvalue = self.df["roof_thermal_transmittance"].fillna( - roof_starting_uvalue - ) - roof_ending_uvalue = self.df["roof_thermal_transmittance_ending"].fillna( - roof_ending_uvalue - ) + roof_starting_uvalue = pd.to_numeric( + self.df["roof_thermal_transmittance"], errors="coerce" + ).fillna(roof_starting_uvalue) + + roof_ending_uvalue = pd.to_numeric( + self.df["roof_thermal_transmittance_ending"], errors="coerce" + ).fillna(roof_ending_uvalue) # ~~~~~~~~~~~~~~~~~~ # Floor @@ -459,20 +460,20 @@ class TrainingDataset(BaseDataset): axis=1, ) - floor_starting_uvalue = self.df["floor_thermal_transmittance"].fillna( - floor_starting_uvalue - ) - floor_ending_uvalue = self.df["floor_thermal_transmittance_ending"].fillna( - floor_ending_uvalue - ) + floor_starting_uvalue = pd.to_numeric( + self.df["floor_thermal_transmittance"], errors="coerce" + ).fillna(floor_starting_uvalue) + floor_ending_uvalue = pd.to_numeric( + self.df["floor_thermal_transmittance_ending"], errors="coerce" + ).fillna(floor_ending_uvalue) for component in ["walls", "roof", "floor"]: - self.df[f"{component}_thermal_transmittance"] = self.df[ - f"{component}_thermal_transmittance" - ].fillna(eval(f"{component}_starting_uvalue")) - self.df[f"{component}_thermal_transmittance_ending"] = self.df[ - f"{component}_thermal_transmittance_ending" - ].fillna(eval(f"{component}_ending_uvalue")) + self.df[f"{component}_thermal_transmittance"] = pd.to_numeric( + self.df[f"{component}_thermal_transmittance"], errors="coerce" + ).fillna(eval(f"{component}_starting_uvalue")) + self.df[f"{component}_thermal_transmittance_ending"] = pd.to_numeric( + self.df[f"{component}_thermal_transmittance_ending"], errors="coerce" + ).fillna(eval(f"{component}_ending_uvalue")) self.df = self.df.drop( columns=[ @@ -521,7 +522,7 @@ class TrainingDataset(BaseDataset): expanded_df["is_sandstone_or_limestone"] == expanded_df["is_sandstone_or_limestone_ending"] ) - ] + ] elif component == "floor": expanded_df = expanded_df[ (expanded_df["is_suspended"] == expanded_df["is_suspended_ending"]) @@ -538,7 +539,7 @@ class TrainingDataset(BaseDataset): expanded_df["is_to_external_air"] == expanded_df["is_to_external_air_ending"] ) - ] + ] elif component == "roof": expanded_df = expanded_df[ (expanded_df["is_pitched"] == expanded_df["is_pitched_ending"]) @@ -551,7 +552,7 @@ class TrainingDataset(BaseDataset): expanded_df["has_dwelling_above"] == expanded_df["has_dwelling_above_ending"] ) - ] + ] return expanded_df diff --git a/etl/epc/Record.py b/etl/epc/Record.py index ec4dad96..c1c3ff67 100644 --- a/etl/epc/Record.py +++ b/etl/epc/Record.py @@ -38,6 +38,8 @@ DATA_BUCKET = os.environ.get( "DATA_BUCKET", "retrofit-data-dev" if ENVIRONMENT == "dev" else None ) +pd.set_option("future.no_silent_downcasting", True) + @dataclass class EPCRecord: @@ -392,7 +394,7 @@ class EPCRecord: floor_height_data = self.cleaning_data[ (self.cleaning_data["property_type"] == self.prepared_epc["property-type"]) & (self.cleaning_data["built_form"] == self.prepared_epc["built-form"]) - ] + ] average = floor_height_data["floor_height"].mean() sd = floor_height_data["floor_height"].std() # If we're in the top 0.5 percentile of floor heights, we'll set it to the average diff --git a/etl/epc/settings.py b/etl/epc/settings.py index d453080e..f4d0e174 100644 --- a/etl/epc/settings.py +++ b/etl/epc/settings.py @@ -51,8 +51,17 @@ DATA_ANOMALY_MATCHES = { "UNKNOWN", # "Unknown", - # Observed error case + # Observed error cases "(error), (error)", + "error , error", + "Description", + "description", + "Undefined Welsh description for crtrl code 2113", + "undefined welsh description for crtrl code 2113", + "Hot water system", + "hot water system", + "Heating system", + "heating system", } # Add the post_sap10 date to indicate if the epc is post sap10 diff --git a/etl/epc_clean/epc_attributes/MainheatControlAttributes.py b/etl/epc_clean/epc_attributes/MainheatControlAttributes.py index 0dcf97c5..3e2df543 100644 --- a/etl/epc_clean/epc_attributes/MainheatControlAttributes.py +++ b/etl/epc_clean/epc_attributes/MainheatControlAttributes.py @@ -119,7 +119,13 @@ class MainheatControlAttributes(Definitions): 'rheoli r tal a llaw': 'manual charge control', 'tal un gyfradd, thermostat ystafell yn unig': 'flat rate charging, room thermostat only', "rheoli'r t l llaw": "manual charge control", - "2205 rhaglennydd ac o leiaf ddau thermostat ystafell": "programmer and at least two room thermostats" + "2205 rhaglennydd ac o leiaf ddau thermostat ystafell": "programmer and at least two room thermostats", + "2603 rhaglennydd a thermostatau ar y cyfarpar": "programmer, room thermostat", + "2404 rheolyddion i wresogyddion storio sygçön cadw llawer o wres": "controls for high heat retention storage " + "heaters", + 'system dalu wedigçöi chysylltu +ó defnyddio gwres cymunedol, rhaglennydd ac o leiaf ddau thermostat ' + 'ystafell': 'charging system linked to use of community heating, programmer and at least two room thermostats' + } NO_DATA_DESCRIPTIONS = [ diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index 20f0c760..5d97a554 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -744,7 +744,7 @@ class Recommendations: # fairly regularly. A task has been added to planner to refactor this # We have observed an edge case where the fuel is described as not being community # but the hot water is. We handle as such - logger.warning("Hot water description not mapped: %s", heating_description) + logger.warning("Hot water description not mapped: %s", hotwater_description) mapped_hotwater = {"fuel": 'Unmapped', "cop": 0.9} return {