extraction finished now to do load

This commit is contained in:
Jun-te Kim 2025-06-16 15:33:46 +00:00
parent 1a03afdd0c
commit 5f12cea953
3 changed files with 90 additions and 6 deletions

View file

@ -1,3 +1,6 @@
from etl.surveyedData.surveryedData import surveyedDataProcessor
condition_report_file_path = "/workspaces/survey-extractor/etl/files/osmosis_condition_report.pdf"
sdp = surveyedDataProcessor("123 Fake Street", [condition_report_file_path])
sdp = surveyedDataProcessor("123 Fake Street", [condition_report_file_path])
# TODO: add the ability to add document type, and sharepoint or s3 link so we can process access it again

View file

@ -14,7 +14,7 @@ from etl.transform.conditionReportTypes import (
ExternalElevationGableOne, ExternalElevationGableTwo, ExternalElevationRear, ConservatoryOrOutbuilding,
AccessAndElevations, Hallway, RoomInfo, WindowsInfo, VentilationInfo, LivingRoom, DiningRoom, Kitchen, Rooms,
Utility, WC, Landing, Bedroom, Bathroom, LoftSpace, RoomInRoof, HeatingSystem, GeneralConditionHeatingSystem,
MainHeatingOne, MainHeatingTwo, SecondaryHeating, HeatingByRoom, Renewables
MainHeatingOne, MainHeatingTwo, SecondaryHeating, HeatingByRoom, Renewables, Occupant, EnergyUse, Heating, ShowerAndBath, FridgeAndFreezers, Cooker, TumbleDryer
)
from datetime import datetime
from pprint import pprint
@ -99,6 +99,7 @@ class ConditionReport(SiteNotesExtractor):
self.get_section_2()
self.get_section_3()
self.get_section_4()
self.get_section_5()
self.get_section_0()
def get_section_0(self):
@ -562,8 +563,87 @@ class ConditionReport(SiteNotesExtractor):
are_the_gas_and_electricity_meters_accessible=True if self.get_next_value(data, "Are the Gas and Electricity Meters accessible?") else False,
dual_or_single_electric_meter=self.get_next_value(data, "Dual or single electric meter?"),
)
def get_section_5(self):
_ = self.get_occupants()
_ = self.get_energy_use()
_ = self.get_heating()
_ = self.get_shower_and_bath()
_ = self.get_appliances()
_ = self.get_fridge_and_freezers()
_ = self.get_cooker()
_ = self.get_tumble_dryer()
pprint(_)
def get_occupants(self):
data = self.get_data_between("Occupants", "Energy use")
second_data = self.get_data_between("Tumble dryer", "Media summary")
return Occupant(
name=self.get_next_value(second_data, "Name of the occupant:"),
have_evidence_of_12_months_of_fuel_bill_data= True if self.get_next_value(second_data, "Have you evidenced 12 months of fuel bill data?").lower() == "yes" else False,
total_number_of_occupants=int(self.get_next_value(data, "Total number of occupants:")),
no_of_adult_occupants=int(self.get_next_value(data, "No. of Adult Occupants (18+)")),
no_of_child_occupants=int(self.get_next_value(data, "No. of Child Occupants (Under 18)")),
no_of_occupant_of_a_pensionable_age=int(self.get_next_value(data, "No. of occupant of a pensionable age")),
are_there_any_vulnerable_people=True if self.get_next_value(data, "Are there any vulnerable people?").lower() == "yes" else False,
is_there_anyone_with_a_disability=True if self.get_next_value(data, "Is there anyone with a disability?").lower() == "yes" else False,
status_of_occupant=self.get_next_value(data, "Status of the occupant:"),
landlord_has_written_confirmation_that_the_tenent_agrees_to_the_assessment_been_supplied=True if self.get_next_value(data, "the assessment been supplied").lower() == "yes" else False,
)
def get_energy_use(self):
data = self.get_data_between("Energy use", "Heating")
return EnergyUse(
property_tenure=self.get_next_value(data, "Property tenure:"),
who_is_the_electricity_payer=self.get_next_value(data, "Who is the electricity bill payer?")
)
def get_heating(self):
data = self.get_data_between("Heating", "Shower & bath")
return Heating(
room_stat_in_temperature_in_celsius=self.get_next_value(data, "Room Stat Temperature (in °C)", avoid=["Room Stat Location", '\xa0']),
room_stat_location=self.get_next_value(data, "Room Stat Location", avoid = ["Is the heating pattern known?", '\xa0']),
is_the_heating_pattern_known=self.get_next_value(data, "Is the heating pattern known?", avoid=["Shower & bath", '\xa0']),
)
def get_shower_and_bath(self):
data = self.get_data_between("Shower & bath", "Appliances")
return ShowerAndBath(
shower_type=self.get_next_value(data, "Shower Type:"),
do_you_know_the_no_of_showers_per_day_per_week=True if self.get_next_value(data, "Do you know the number of showers per day or per week?").lower() == "yes" else False,
please_input_no_of_showers_and_specify_a_day_or_a_week=self.get_next_value(data, '"per week"'),
do_you_know_the_number_of_baths_per_day_or_per_week=self.get_next_value(data, "Do you know the number of baths per day or per week?"),
)
def get_appliances(self):
print("Skipped appliances due to not having this example yet")
def get_fridge_and_freezers(self):
data = self.get_data_between("Fridge & freezers", "Cooker")
return FridgeAndFreezers(
no_of_stand_alone_seperate_fridges=int(self.get_next_value(data,"No. of Standalone (separate) Fridges:")),
no_of_stand_alone_seperate_freezers=int(self.get_next_value(data, "No. of Standalone (separate) Freezers:")),
no_of_stand_alone_or_integrated_fridge_freezers=int(self.get_next_value(data, "No. of Standalone or Integrated Fridge Freezers:"))
)
def get_cooker(self):
data = self.get_data_between("Cooker", "Tumble dryer")
return Cooker(
cooker_type=self.get_next_value(data, "Cooker Type:"),
normal_large_range=self.get_next_value(data, "Normal - Large - Range"),
range_fuel=self.get_next_value(data, "Range Fuel:")
)
def get_tumble_dryer(self):
data = self.get_data_between("Tumble dryer", "Have you evidenced 12 months of fuel bill data?")
return TumbleDryer(
percentage_of_annual_use=int(self.get_next_value(data, "Percentage of annual use:")),
space_for_outdoor_drying=True if self.get_next_value(data,"Space for outdoor drying?").lower() == "yes" else False,
)
class QuidosSiteNotesExtractor(SiteNotesExtractor):
def __init__(self, data_list):
super().__init__(data_list)

View file

@ -230,6 +230,7 @@ class Occupant(BaseModel):
class TumbleDryer(BaseModel):
percentage_of_annual_use: int
space_for_outdoor_drying: bool
class Cooker(BaseModel):
range_fuel: str
@ -248,12 +249,12 @@ class ShowerAndBath(BaseModel):
shower_type: str
do_you_know_the_no_of_showers_per_day_per_week: bool
please_input_no_of_showers_and_specify_a_day_or_a_week: str
do_you_know_the_number_of_baths_per_day_or_per_week: str
class Heating(BaseModel):
# TODO find one with an example of this one
room_stat_in_temperature_in_celsius: str
room_stat_location: str
is_the_heating_pattern_known: str
room_stat_in_temperature_in_celsius: Optional[str] = None
room_stat_location: Optional[str] = None
is_the_heating_pattern_known: Optional[str] = None
class EnergyUse(BaseModel):
property_tenure: str