From 2ca02812100628f667dd85abbae76ec6b71411c0 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 17 Mar 2025 08:11:14 +0000 Subject: [PATCH] save current work load --- etl/development.py | 2 +- etl/pdfReader/sitenotes.py | 54 +++++++++++++++++++++++++++++--------- etl/transform/types.py | 14 ++++++++-- 3 files changed, 54 insertions(+), 16 deletions(-) diff --git a/etl/development.py b/etl/development.py index 0c211f8..d6a1b02 100644 --- a/etl/development.py +++ b/etl/development.py @@ -34,6 +34,6 @@ if __name__ == "__main__": # - [x] Finish off scraping for section that I need to finish # - [in progress] Pydantic format for deemed report -# - [] Generate deemed report +# - [] Generate deemed report -> scopre of automations thats possible # - [] Docker compose to an sql database in docker compose (2 hours, then work on sql) # - [] Deploy via terraform to aws (1 day) \ No newline at end of file diff --git a/etl/pdfReader/sitenotes.py b/etl/pdfReader/sitenotes.py index 05707ce..78ec346 100644 --- a/etl/pdfReader/sitenotes.py +++ b/etl/pdfReader/sitenotes.py @@ -3,7 +3,8 @@ from transform.types import ( CompanyInfo, SurverySummaryInfo, AssessorInfo, PropertyDescription, PropertyDetail, Dimension, Walls, Roofs, Floors, Door, VentilationAndCooling, - Lighting, WaterHeating, HotWaterCylinder, SolarWaterHeating + Lighting, WaterHeating, HotWaterCylinder, SolarWaterHeating, + ShowerAndBaths, FlueGasHeatRecoverySystem, ) from datetime import datetime @@ -48,9 +49,6 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): # These one are quick fixes can be done on train or one at a time - # self.get_section_16() - # self.get_section_17() - # self.get_section_18() # self.get_section_19() # self.get_section_20() # self.get_section_21() @@ -221,6 +219,18 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): # Section 15.1 Hot Water Cylinder hotWaterCylinder = self.get_hot_water_cylinder() + # Section 16 Solar Water Heating + solarWaterHeating = self.get_solar_water_heating() + + # Section 17.0 + # ignored as it has nothing in the copy i'm using. Future todo clarity + + # Section 18.0 + showerAndBaths = self.get_shower_and_baths() + + # Section 19.0 + fghrs = self.get_fghrs() + self.property_description = PropertyDescription( built_form = get_value("Built Form"), detachment_or_position = get_value("Detachment/Position"), @@ -269,6 +279,9 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): lighting=lighting, waterHeating=waterHeating, hotWaterCylinder=hotWaterCylinder, + solarWaterHeating=solarWaterHeating, + showerAndBaths=showerAndBaths, + flueGasHeatRecoverySystem=fghrs, ) @@ -407,8 +420,6 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): return walls - - def get_roof(self): data = self.raw_data[self.raw_data.index('8.0 Roofs'): self.raw_data.index('9.0 Floors')] sub_titles = [ @@ -746,13 +757,16 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): dict_ = self.two_columns_processor(data, sub_titles, avoid) return SolarWaterHeating( - solar_water_heating_details_known=True if dict_("solar_water_heating_details_known", "NO").upper() == "YES" else False + solar_water_heating_details_known=True if dict_.get("solar_water_heating_details_known", "NO").upper() == "YES" else False ) def get_section_17(self): - pass + """ + Furture todo + """ + raise NotImplemented("Please contact Jun-te Kim to implement this") - def get_section_18(self): + def get_shower_and_baths(self): data = self.get_data_between("18.0 Showers And Baths", "19.0 Flue Gas Heat Recovery System") sub_titles = [ "Number of Rooms with Bath and/or Shower", @@ -762,7 +776,8 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): "19.0 Flue Gas Heat Recovery System", ] - self.two_columns_processor(data, sub_titles, avoid, 18.0) + dict_1 = self.two_columns_processor(data, sub_titles, avoid) + avoid = [ "18.0 Showers And Baths", "19.0 Flue Gas Heat Recovery System", @@ -771,9 +786,17 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): "Number of Rooms with Mixer Shower and no", # Number of Rooms with Mixer Shower and no Bath "Number of Rooms with Mixer Shower and", # Number of Rooms with Mixer Shower and Bath ] - self.two_columns_processor(data, sub_titles, avoid, 18.0, 2) + dict_2 = self.two_columns_processor(data, sub_titles, avoid, 2) + + return ShowerAndBaths( + no_of_rooms_with_baths_and_or_shower=int(dict_1.get("number_of_rooms_with_bath_and/or_shower", -1)), + no_of_rooms_with_mixer_shower_and_no_baths=int(dict_2.get("number_of_rooms_with_mixer_shower_and_no", -1)), + no_of_rooms_with_mixer_shower_and_baths=int(dict_2.get("number_of_rooms_with_mixer_shower_and", -1)), + ) + + - def get_section_19(self): + def get_fghrs(self): data = self.get_data_between("19.0 Flue Gas Heat Recovery System","20.0 Photovoltaic Panel") sub_titles = [ "FGHRS Present", @@ -783,7 +806,12 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): "20.0 Photovoltaic Panel", ] - self.two_columns_processor(data, sub_titles, avoid, 19) + dict_ = self.two_columns_processor(data, sub_titles, avoid) + + return FlueGasHeatRecoverySystem( + fghrs_present=True if dict_.get("fghrs_present", "NO").upper() == "YES" else False + ) + def get_section_20(self): data = self.get_data_between("20.0 Photovoltaic Panel","21.0 Wind Turbine") diff --git a/etl/transform/types.py b/etl/transform/types.py index b3a9391..c48db71 100644 --- a/etl/transform/types.py +++ b/etl/transform/types.py @@ -122,7 +122,14 @@ class HotWaterCylinder(BaseModel): class SolarWaterHeating(BaseModel): solar_water_heating_details_known: bool - + +class ShowerAndBaths(BaseModel): + no_of_rooms_with_baths_and_or_shower: int + no_of_rooms_with_mixer_shower_and_no_baths: int + no_of_rooms_with_mixer_shower_and_baths: int + +class FlueGasHeatRecoverySystem(BaseModel): + fghrs_present: bool class PropertyDetail(BaseModel): @@ -156,4 +163,7 @@ class PropertyDescription(BaseModel): ventilationAndCooling: Optional[VentilationAndCooling] lighting: Optional[Lighting] waterHeating: Optional[WaterHeating] - hotWaterCylinder: Optional[HotWaterCylinder] \ No newline at end of file + hotWaterCylinder: Optional[HotWaterCylinder] + solarWaterHeating: Optional[SolarWaterHeating] + showerAndBaths: Optional[ShowerAndBaths] + flueGasHeatRecoverySystem: Optional[FlueGasHeatRecoverySystem]