diff --git a/etl/pdfReader/sitenotes.py b/etl/pdfReader/sitenotes.py index 0c5a3dd..f74eabc 100644 --- a/etl/pdfReader/sitenotes.py +++ b/etl/pdfReader/sitenotes.py @@ -1,5 +1,5 @@ from etl.pdfReader.reportType import ReportType -from transform.types import CompanyInfo, SurverySummaryInfo, AssessorInfo +from transform.types import CompanyInfo, SurverySummaryInfo, AssessorInfo, PropertyDescription, PropertyDetail from datetime import datetime class SiteNotesExtractor(): @@ -23,6 +23,7 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): self.type = ReportType.QUIDOS_SITE_NOTE self.company_information = None self.survey_information = None + self.property_description = None self.setup() def setup(self): @@ -30,31 +31,29 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): A function to read QUIDOS SITE REPORT and get all data """ self.get_summary_information() - self.get_section_1() - self.get_section_2() - self.get_section_3() - self.get_section_4() - self.get_section_5() - self.get_section_6() - self.get_section_7() - self.get_section_8() - self.get_section_9() - self.get_section_10() - self.get_section_11() - self.get_section_12() - self.get_section_13() - self.get_section_14() - self.get_section_14_1() - self.get_section_14_2() - self.get_section_15_0() - self.get_section_15_1() - self.get_section_16() - self.get_section_17() - self.get_section_18() - self.get_section_19() - self.get_section_20() - self.get_section_21() - self.get_section_22() + self.get_section_1_and_2_and_3() + # self.get_section_4() + # self.get_section_5() + # self.get_section_6() + # self.get_section_7() + # self.get_section_8() + # self.get_section_9() + # self.get_section_10() + # self.get_section_11() + # self.get_section_12() + # self.get_section_13() + # self.get_section_14() + # self.get_section_14_1() + # self.get_section_14_2() + # self.get_section_15_0() + # self.get_section_15_1() + # self.get_section_16() + # self.get_section_17() + # self.get_section_18() + # self.get_section_19() + # self.get_section_20() + # self.get_section_21() + # self.get_section_22() def get_summary_information(self): # Summary Information @@ -88,6 +87,12 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): ] get_value = lambda key: None if self.raw_data[self.raw_data.index(key) + 1] in avoid else self.raw_data[self.raw_data.index(key) + 1] + index = self.get_x_occurance(self.raw_data, "Current annual emissions") + if index: + including_9_92_emission_factor = self.raw_data[index + 1] + else: + including_9_92_emission_factor = None + self.survey_information = SurverySummaryInfo( reference_number = get_value('Reference Number'), epc_language = get_value('EPC Language'), @@ -104,6 +109,9 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): potential_sap = get_value('Potential SAP rating'), current_ei = get_value('Current EI rating'), potential_ei = get_value('Potential EI rating'), + current_annual_emissions = get_value('Current annual emissions'), + current_annual_energy_costs = get_value('Current annual energy costs'), + current_annual_emission_including_0925_multiplayer=including_9_92_emission_factor, ) @@ -134,20 +142,9 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): email_address = get_value("E-mail address"), address = assessor_address, ) - - - self.current_annual_emissions = get_value('Current annual emissions') - self.current_annual_energy_costs = get_value('Current annual energy costs') - - index = self.get_x_occurance(self.raw_data, "Current annual emissions") - if index: - self.current_annual_emission_including_9_92_emission_factor = self.raw_data[index + 1] - else: - self.current_annual_emission_inlcluding_9_92_emission_actor = None - - def get_section_1(self): - data = self.raw_data[self.raw_data.index("1.0 Property Type"):self.raw_data.index("2.0 Number Of")] + def get_section_1_and_2_and_3(self): + data = self.get_data_between("1.0 Property Type","2.0 Number Of") avoid = [ "1.0 Property Type", @@ -155,12 +152,11 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): "Detachment/Position", "2.0 Number Of" ] + get_value = lambda key: None if self.raw_data[self.raw_data.index(key) + 1] in avoid else self.raw_data[self.raw_data.index(key) + 1] + - self.property_type_built_form = get_value("Built Form") - self.property_type_detatchment_position = get_value("Detachment/Position") - - def get_section_2(self): + # Section 2 data = self.raw_data[self.raw_data.index("2.0 Number Of"):self.raw_data.index("3.0 Date Built")] avoid = [ @@ -178,22 +174,41 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): 'Percentage of Draught Proofed(%)', "3.0 Date Built", ] + + # Section 3 + age_bands = self.get_age_band() - get_value = lambda key: None if self.raw_data[self.raw_data.index(key) + 1] in avoid else self.raw_data[self.raw_data.index(key) + 1] - self.main_property = get_value("Main Property") - self.extension_1 = get_value('Extension 1') - self.extension_2 = get_value('Extension 2') - self.extension_3 = get_value('Extension 3') - self.extension_4 = get_value('Extension 4') - self.no_of_habitable_rooms = get_value('Number of Habitable Rooms') - self.no_of_heated_habitable_rooms = get_value('Number of Heated Habitable Rooms') - self.no_of_heated_basement = get_value('Heated Basement') - self.conservatory_type = get_value('Conservatory Type') - self.terrain_type = get_value('Terrain Type') - self.percentage_of_draught_proofed = get_value('Percentage of Draught Proofed(%)') + self.property_description = PropertyDescription( + built_form = get_value("Built Form"), + detachment_or_position = get_value("Detachment/Position"), + no_of_main_property = int(get_value("Main Property")), + no_of_extension_1 = int(get_value('Extension 1') or 0), + no_of_extension_2 = int(get_value('Extension 2') or 0), + no_of_extension_3 = int(get_value('Extension 3') or 0), + no_of_extension_4 = int(get_value('Extension 4') or 0), + no_of_habitable_rooms = int(get_value('Number of Habitable Rooms')), + no_of_heated_rooms = int(get_value('Number of Heated Habitable Rooms')), + heated_basement = False if get_value('Heated Basement') == "NO" else True, + conservatory_type = get_value('Conservatory Type'), + terrain_type = get_value('Terrain Type'), + percentage_of_draught_proofed= int(get_value('Percentage of Draught Proofed(%)')), + main_property=PropertyDetail( + age_band= age_bands[0] + )if age_bands[0] else None, + ex1_property=PropertyDetail( + age_band= age_bands[1] + )if age_bands[1] else None, + ex2_property=PropertyDetail( + age_band= age_bands[2] + )if age_bands[2] else None, + ex3_property=PropertyDetail( + age_band= age_bands[3] + )if age_bands[3] else None, + ) - def get_section_3(self): + + def get_age_band(self): data = self.raw_data[self.raw_data.index('3.0 Date Built'):self.raw_data.index('4.0 Dimensions')] avoid = [ '3.0 Date Built', @@ -205,22 +220,27 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): 'Extension 4', '4.0 Dimensions', ] - + property_age = [] get_value = lambda x: None if data[data.index(x) + 1] in avoid else data[data.index(x) + 1] - if self.main_property: - self.main_property_age_band = get_value("Main Property") + + age = (get_value("Main Property")) + if age: + property_age.append(age) else: - self.main_property_age_band = None + property_age.append(None) for i in range(1,4): - if getattr(self, f"extension_{i}") and f"Extension {i}" in data: - setattr(self, f"extension_{i}_age_band", get_value(f"Extension {i}")) + if f"Extension {i}" in data: + property_age.append(get_value(f"Extension {i}")) else: - setattr(self, f"extension_{i}_age_band", None) - + property_age.append(None) - def get_section_4(self): - data = self.raw_data[self.raw_data.index('4.0 Dimensions'): self.raw_data.index('5.0 Conservatory')] + + return property_age + + + def get_section_4(self, no_of_main_property): + data = self.get_data_between('4.0 Dimensions','5.0 Conservatory') avoid = [ '4.0 Dimensions', '5.0 Conservatory', @@ -252,9 +272,9 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): } allNumbers.append(details) return allNumbers - - if self.main_property and "Main Property" in data: - self.main_property_dimensions = create_dimensions_array("Main Property", int(self.main_property)) + + if "Main Property" in data: + self.main_property_dimensions = create_dimensions_array("Main Property", no_of_main_property) else: self.main_property_dimensions = None diff --git a/etl/transform/types.py b/etl/transform/types.py index 75d14bb..4eb2e20 100644 --- a/etl/transform/types.py +++ b/etl/transform/types.py @@ -31,6 +31,10 @@ class SurverySummaryInfo(BaseModel): potential_sap: str current_ei: str potential_ei: str + current_annual_emissions: str + current_annual_emission_including_0925_multiplayer: str + current_annual_energy_costs: str + class AssessorInfo(BaseModel): accreditation_number: str @@ -38,3 +42,25 @@ class AssessorInfo(BaseModel): phone_number: Optional[str] = None email_address: Optional[EmailStr] = None +class PropertyDetail(BaseModel): + age_band: str + +class PropertyDescription(BaseModel): + built_form: str + detachment_or_position: str + no_of_main_property: int + no_of_extension_1: Optional[int] = 0 + no_of_extension_2: Optional[int] = 0 + no_of_extension_3: Optional[int] = 0 + no_of_extension_4: Optional[int] = 0 + no_of_habitable_rooms: int + no_of_heated_rooms: int + heated_basement: bool + conservatory_type: str + percentage_of_draught_proofed: int + terrain_type: str + main_property: PropertyDetail + ex1_property: Optional[PropertyDetail] = None + ex2_property: Optional[PropertyDetail] = None + ex3_property: Optional[PropertyDetail] = None + ex4_property: Optional[PropertyDetail] = None \ No newline at end of file