diff --git a/etl/pdfReader/sitenotes.py b/etl/pdfReader/sitenotes.py index a4d315f..d53764f 100644 --- a/etl/pdfReader/sitenotes.py +++ b/etl/pdfReader/sitenotes.py @@ -5,7 +5,8 @@ from transform.types import ( Walls, Roofs, Floors, Door, VentilationAndCooling, Lighting, WaterHeating, HotWaterCylinder, SolarWaterHeating, ShowerAndBaths, FlueGasHeatRecoverySystem, PhotovoltaicPanel, - WindTurbine, OtherDetails + WindTurbine, OtherDetails, Windows, Heating, HeatingSystemControls, + HeatingType ) from datetime import datetime @@ -40,20 +41,12 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): self.transform_summary_information() self.transform_sections() - # Saves windows till the end as that requires thought - # self.get_section_11() - # Heaters require thought so will complete later # self.get_section_14() # self.get_section_14_1() # self.get_section_14_2() - # These one are quick fixes can be done on train or one at a time - # self.get_section_20() - # self.get_section_21() - # self.get_section_22() - def transform_summary_information(self): # Summary Information avoid = [ @@ -207,12 +200,24 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): # Section 10 door = self.get_door() + windows = self.get_windows() + print(windows["main_property"]) + # Section 12 ventilationAndCooling = self.get_ventilation_and_cooling() # Section 13 lighting = self.get_lighting() + # Section 14.0 + main_heating = self.get_main_heating() + + # Section 14.1 + secondary_heating = self.get_secondary_heating() + + # Section 14.2 + secondary_heating_type = self.get_secondary_heating_type() + # Section 15.0 Water Heating waterHeating = self.get_water_heating() @@ -260,6 +265,7 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): wall=walls[0], roof=roofs[0], floor=floors[0], + windows=windows.get("main_property", []), )if no_of_main_property > 0 else None, ex1_property=PropertyDetail( age_band= age_bands[1], @@ -267,6 +273,7 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): wall=walls[1], roof=roofs[1], floor=floors[1], + windows=windows.get("extension_1", []), )if no_of_extension_1 > 0 else None, ex2_property=PropertyDetail( age_band= age_bands[2], @@ -274,6 +281,7 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): wall=walls[2], roof=roofs[2], floor=floors[2], + windows=windows.get("extension_2", []), )if no_of_extension_2 > 0 else None, ex3_property=PropertyDetail( age_band= age_bands[3], @@ -281,6 +289,7 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): wall=walls[3], roof=roofs[3], floor=floors[3], + windows=windows.get("extension_3", []), )if no_of_extension_4 > 0 else None, conservatory=conservatory, door=door, @@ -294,6 +303,9 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): photovoltaicPanel=photovoltaicPanel, windTurbine=windTurbine, otherDetails=otherDetails, + mainHeating=main_heating, + secondaryHeatingType=secondary_heating_type, + mainHeating2=secondary_heating, ) @@ -563,7 +575,7 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): dict_.update({f"{items.lower().replace('-', '_').replace(' ','_')}":get_value(items)}) return dict_ - def get_section_11(self): + def get_windows(self): data = self.get_data_between("Window Location", "12.0 Ventilation & Cooling") headers = data[:8] data_entries = data[8:] @@ -605,25 +617,26 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): title = None until = 0 + dict_to_return = {} for i, items in enumerate(data_entries): if data_entries[i] in subtitles: title = data_entries[i].lower().replace(" ", "_").replace("-", "_") - setattr(self, f"section_11_{title}_window", []) + dict_to_return.update({f"{title}":[]}) if title and until == i: entry = data_entries[i:] index = find_compose_index(entry,orientation) new_entry = entry[index-3:index+3] - dict_ = { - "glazing type": new_entry[0], - "Area (m2)": new_entry[1], - "Roof Window": new_entry[2], - "Orientation": new_entry[3], - "U-value (W/m²K)": new_entry[4], - "g-value": new_entry[5], - } - lst = getattr(self, f"section_11_{title}_window") - lst.append(dict_) + window = Windows( + glazing_type= new_entry[0], + area_m2=float(new_entry[1]), + roof_window=True if new_entry[2].upper() == "YES" else False, + orientation=new_entry[3].upper(), + u_value_w_m2_k=int(new_entry[4]), + g_value=int(new_entry[5]), + ) + dict_to_return[f"{title}"].append(window) until = index + 3 + i + return dict_to_return def get_ventilation_and_cooling(self): data = self.raw_data[self.raw_data.index('12.0 Ventilation & Cooling'): self.raw_data.index('13.0 Lighting')] @@ -655,13 +668,12 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): "Total number of L.E.L. fittings", ] dict_ = self.two_columns_processor(data, sub_titles, avoid = avoid) - print(dict_) return Lighting( total_no_of_light_fittings=int(dict_["total_number_of_light_fittings"]), total_no_of_lel_fittings=int(dict_["total_number_of_l.e.l._fittings"]), ) - def get_section_14(self): + def get_main_heating(self): data = self.raw_data[self.raw_data.index('14.0 Main Heating1'): self.raw_data.index('14.1 Main Heating2')] main_titles = [ "Main Heating Type", @@ -683,9 +695,27 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): "Mains Gas Available", ] - self.two_column_with_extension_processor(data, sub_titles, main_titles, 14.0) + lst_ = self.two_column_with_extension_processor(data, sub_titles, main_titles) + dict_ = lst_[0] + return Heating( + type="main", + heating_source=dict_.get("heating_source", ""), + efficiency_source=dict_.get("efficiency_source", ""), + heating_fuel=dict_.get("heating_fuel", ""), + brand_name=dict_.get("brand_name", ""), + model_name=dict_.get("model_name", ""), + model_qualifer=dict_.get("model_qualifier", ""), + controls=HeatingSystemControls( + control_type=dict_.get("control_type", ""), + flue_type=dict_.get("flue_type",""), + fan_assisted_flue=True if dict_.get("fan_assisted_flue", "NO").upper() == "YES" else False, + heat_emitter_type=dict_.get("heat_emitter_type", ""), + electricity_meter_type=dict_.get("electricity_meter_type", ""), + mains_gas_available=True if dict_.get("mains_gas_available", "NO").upper() == "YES" else False, + ) + ) - def get_section_14_1(self): + def get_secondary_heating(self): data = self.raw_data[self.raw_data.index("14.1 Main Heating2"):self.raw_data.index("14.2 Secondary Heating Type")] main_titles = [ "Second Main Heating Type", @@ -704,9 +734,29 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): "Fan Assisted Flue", "Heat Emitter Type", ] - self.two_column_with_extension_processor(data, sub_titles, main_titles, 14.1) + list = self.two_column_with_extension_processor(data, sub_titles, main_titles) + dict_ = list[0] + + return Heating( + type="secondary", + percentage_of_heated_floor_area_served=dict_.get("percentage_of_heated_floor_area_served_(%)", ""), + heating_source=dict_.get("heating_source", "") if dict_["heating_source"] is not None else "", + efficiency_source=dict_.get("efficiency_source", "") if dict_["efficiency_source"] is not None else "", + heating_fuel=dict_.get("heating_fuel", "") if dict_.get("heating_fuel") is not None else "", + brand_name=dict_.get("brand_name", ""), + model_name=dict_.get("model_name", ""), + model_qualifer=dict_.get("model_qualifier", ""), + controls=HeatingSystemControls( + control_type=dict_.get("control_type", ""), + flue_type=dict_.get("flue_type",""), + fan_assisted_flue=True if dict_.get("fan_assisted_flue", "NO").upper() == "YES" else False, + heat_emitter_type=dict_.get("heat_emitter_type", ""), + electricity_meter_type=dict_.get("electricity_meter_type", ""), + mains_gas_available=True if dict_.get("mains_gas_available", "NO").upper() == "YES" else False, + ) + ) - def get_section_14_2(self): + def get_secondary_heating_type(self): data = self.raw_data[self.raw_data.index("14.2 Secondary Heating Type"):self.raw_data.index("15.0 Water Heating")] avoid = [ "14.2 Secondary Heating Type", @@ -716,7 +766,11 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): "Heating Type", "Fuel Type", ] - self.two_columns_processor(data, sub_titles, avoid, 14.2) + dict_ = self.two_columns_processor(data, sub_titles, avoid) + return HeatingType( + heating_type=dict_.get("heating_type", ""), + fuel_type=dict_.get("fuel_type", ""), + ) def get_water_heating(self): data = self.raw_data[self.raw_data.index("15.0 Water Heating"):self.raw_data.index("15.1 Hot Water Cylinder")] diff --git a/etl/transform/types.py b/etl/transform/types.py index e6d2a91..cefb894 100644 --- a/etl/transform/types.py +++ b/etl/transform/types.py @@ -76,14 +76,6 @@ class AssessorInfo(BaseModel): phone_number: Optional[str] = None email_address: Optional[EmailStr] = None -class Windows(BaseModel): - glazing_type: str - area_m2: float - roof_window: bool - orientation: str - u_value_w_m2_k: int - g_value: int - class VentilationAndCooling(BaseModel): no_of_open_fireplaces: int ventilation_type: str @@ -93,22 +85,29 @@ class Lighting(BaseModel): total_no_of_light_fittings: int total_no_of_lel_fittings: int -class MainHeatingSystemControls(BaseModel): +class HeatingSystemControls(BaseModel): control_type: str flue_type: str - fan_assisted_flue: str + fan_assisted_flue: bool heat_emitter_type: str - electricity_meter_type: Optional[str] - mains_gas_available: Optional[bool] + electricity_meter_type: Optional[str] = "" + mains_gas_available: Optional[bool] = False -class MainHeating(BaseModel): +class Heating(BaseModel): + type: str heating_source: str efficiency_source: str heating_fuel: str brand_name: str model_name: str model_qualifer: str - controls: MainHeatingSystemControls + sap_2009_table: Optional[str] = "" + percentage_of_heated_floor_area_served: Optional[str] = "" + controls: HeatingSystemControls + +class HeatingType(BaseModel): + heating_type: str + fuel_type: str class WaterHeating(BaseModel): heating_type: str @@ -142,12 +141,22 @@ class OtherDetails(BaseModel): electricity_meter_type: str main_gas_avalible: bool +class Windows(BaseModel): + glazing_type: str + area_m2: float + roof_window: bool + orientation: str + u_value_w_m2_k: int + g_value: int + class PropertyDetail(BaseModel): age_band: str dimensions: List[Dimension] = [] wall: Optional[Walls] = None roof: Optional[Roofs] = None floor: Optional[Floors] = None + windows: Optional[List[Windows]] = [] + class PropertyDescription(BaseModel): built_form: str @@ -180,3 +189,6 @@ class PropertyDescription(BaseModel): photovoltaicPanel: Optional[PhotovoltaicPanel] windTurbine: Optional[WindTurbine] otherDetails: Optional[OtherDetails] + mainHeating: Optional[Heating] + mainHeating2: Optional[Heating] + secondaryHeatingType: Optional[HeatingType]