diff --git a/etl/condition_report_etl.py b/etl/condition_report_etl.py index 1f035ad..5284172 100644 --- a/etl/condition_report_etl.py +++ b/etl/condition_report_etl.py @@ -1,3 +1,6 @@ from etl.surveyedData.surveryedData import surveyedDataProcessor condition_report_file_path = "/workspaces/survey-extractor/etl/files/osmosis_condition_report.pdf" -sdp = surveyedDataProcessor("123 Fake Street", [condition_report_file_path]) \ No newline at end of file +sdp = surveyedDataProcessor("123 Fake Street", [condition_report_file_path]) + + +# TODO: add the ability to add document type, and sharepoint or s3 link so we can process access it again \ No newline at end of file diff --git a/etl/pdfReader/sitenotes.py b/etl/pdfReader/sitenotes.py index 3be34f5..e70dff9 100644 --- a/etl/pdfReader/sitenotes.py +++ b/etl/pdfReader/sitenotes.py @@ -14,7 +14,7 @@ from etl.transform.conditionReportTypes import ( ExternalElevationGableOne, ExternalElevationGableTwo, ExternalElevationRear, ConservatoryOrOutbuilding, AccessAndElevations, Hallway, RoomInfo, WindowsInfo, VentilationInfo, LivingRoom, DiningRoom, Kitchen, Rooms, Utility, WC, Landing, Bedroom, Bathroom, LoftSpace, RoomInRoof, HeatingSystem, GeneralConditionHeatingSystem, - MainHeatingOne, MainHeatingTwo, SecondaryHeating, HeatingByRoom, Renewables + MainHeatingOne, MainHeatingTwo, SecondaryHeating, HeatingByRoom, Renewables, Occupant, EnergyUse, Heating, ShowerAndBath, FridgeAndFreezers, Cooker, TumbleDryer ) from datetime import datetime from pprint import pprint @@ -99,6 +99,7 @@ class ConditionReport(SiteNotesExtractor): self.get_section_2() self.get_section_3() self.get_section_4() + self.get_section_5() self.get_section_0() def get_section_0(self): @@ -562,8 +563,87 @@ class ConditionReport(SiteNotesExtractor): are_the_gas_and_electricity_meters_accessible=True if self.get_next_value(data, "Are the Gas and Electricity Meters accessible?") else False, dual_or_single_electric_meter=self.get_next_value(data, "Dual or single electric meter?"), ) + + def get_section_5(self): + _ = self.get_occupants() + _ = self.get_energy_use() + _ = self.get_heating() + _ = self.get_shower_and_bath() + _ = self.get_appliances() + _ = self.get_fridge_and_freezers() + _ = self.get_cooker() + _ = self.get_tumble_dryer() + pprint(_) + + def get_occupants(self): + data = self.get_data_between("Occupants", "Energy use") + second_data = self.get_data_between("Tumble dryer", "Media summary") + return Occupant( + name=self.get_next_value(second_data, "Name of the occupant:"), + have_evidence_of_12_months_of_fuel_bill_data= True if self.get_next_value(second_data, "Have you evidenced 12 months of fuel bill data?").lower() == "yes" else False, + total_number_of_occupants=int(self.get_next_value(data, "Total number of occupants:")), + no_of_adult_occupants=int(self.get_next_value(data, "No. of Adult Occupants (18+)")), + no_of_child_occupants=int(self.get_next_value(data, "No. of Child Occupants (Under 18)")), + no_of_occupant_of_a_pensionable_age=int(self.get_next_value(data, "No. of occupant of a pensionable age")), + are_there_any_vulnerable_people=True if self.get_next_value(data, "Are there any vulnerable people?").lower() == "yes" else False, + is_there_anyone_with_a_disability=True if self.get_next_value(data, "Is there anyone with a disability?").lower() == "yes" else False, + status_of_occupant=self.get_next_value(data, "Status of the occupant:"), + landlord_has_written_confirmation_that_the_tenent_agrees_to_the_assessment_been_supplied=True if self.get_next_value(data, "the assessment been supplied").lower() == "yes" else False, + ) + + def get_energy_use(self): + data = self.get_data_between("Energy use", "Heating") + return EnergyUse( + property_tenure=self.get_next_value(data, "Property tenure:"), + who_is_the_electricity_payer=self.get_next_value(data, "Who is the electricity bill payer?") + ) + + def get_heating(self): + data = self.get_data_between("Heating", "Shower & bath") + return Heating( + room_stat_in_temperature_in_celsius=self.get_next_value(data, "Room Stat Temperature (in °C)", avoid=["Room Stat Location", '\xa0']), + room_stat_location=self.get_next_value(data, "Room Stat Location", avoid = ["Is the heating pattern known?", '\xa0']), + is_the_heating_pattern_known=self.get_next_value(data, "Is the heating pattern known?", avoid=["Shower & bath", '\xa0']), + ) + + def get_shower_and_bath(self): + data = self.get_data_between("Shower & bath", "Appliances") + return ShowerAndBath( + shower_type=self.get_next_value(data, "Shower Type:"), + do_you_know_the_no_of_showers_per_day_per_week=True if self.get_next_value(data, "Do you know the number of showers per day or per week?").lower() == "yes" else False, + please_input_no_of_showers_and_specify_a_day_or_a_week=self.get_next_value(data, '"per week"'), + do_you_know_the_number_of_baths_per_day_or_per_week=self.get_next_value(data, "Do you know the number of baths per day or per week?"), + ) + + def get_appliances(self): + print("Skipped appliances due to not having this example yet") + + def get_fridge_and_freezers(self): + data = self.get_data_between("Fridge & freezers", "Cooker") + return FridgeAndFreezers( + no_of_stand_alone_seperate_fridges=int(self.get_next_value(data,"No. of Standalone (separate) Fridges:")), + no_of_stand_alone_seperate_freezers=int(self.get_next_value(data, "No. of Standalone (separate) Freezers:")), + no_of_stand_alone_or_integrated_fridge_freezers=int(self.get_next_value(data, "No. of Standalone or Integrated Fridge Freezers:")) + ) + + def get_cooker(self): + data = self.get_data_between("Cooker", "Tumble dryer") + return Cooker( + cooker_type=self.get_next_value(data, "Cooker Type:"), + normal_large_range=self.get_next_value(data, "Normal - Large - Range"), + range_fuel=self.get_next_value(data, "Range Fuel:") + ) + + def get_tumble_dryer(self): + data = self.get_data_between("Tumble dryer", "Have you evidenced 12 months of fuel bill data?") + return TumbleDryer( + percentage_of_annual_use=int(self.get_next_value(data, "Percentage of annual use:")), + space_for_outdoor_drying=True if self.get_next_value(data,"Space for outdoor drying?").lower() == "yes" else False, + ) + + class QuidosSiteNotesExtractor(SiteNotesExtractor): def __init__(self, data_list): super().__init__(data_list) diff --git a/etl/transform/conditionReportTypes.py b/etl/transform/conditionReportTypes.py index 82a9f39..f51e575 100644 --- a/etl/transform/conditionReportTypes.py +++ b/etl/transform/conditionReportTypes.py @@ -230,6 +230,7 @@ class Occupant(BaseModel): class TumbleDryer(BaseModel): percentage_of_annual_use: int + space_for_outdoor_drying: bool class Cooker(BaseModel): range_fuel: str @@ -248,12 +249,12 @@ class ShowerAndBath(BaseModel): shower_type: str do_you_know_the_no_of_showers_per_day_per_week: bool please_input_no_of_showers_and_specify_a_day_or_a_week: str - + do_you_know_the_number_of_baths_per_day_or_per_week: str class Heating(BaseModel): # TODO find one with an example of this one - room_stat_in_temperature_in_celsius: str - room_stat_location: str - is_the_heating_pattern_known: str + room_stat_in_temperature_in_celsius: Optional[str] = None + room_stat_location: Optional[str] = None + is_the_heating_pattern_known: Optional[str] = None class EnergyUse(BaseModel): property_tenure: str