From af77c0ffbce655369e3fd3499db86ebbc4a66ed9 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 26 Aug 2025 10:29:11 +0000 Subject: [PATCH] occupant information --- .github/workflows/lambda_main.yml | 2 +- .../lambda/extractor_and_loader/docker/app.py | 16 +++++++-- etl/fileReader/sitenotes.py | 33 ++++++++++++------- 3 files changed, 35 insertions(+), 16 deletions(-) diff --git a/.github/workflows/lambda_main.yml b/.github/workflows/lambda_main.yml index 94e053a..6330833 100644 --- a/.github/workflows/lambda_main.yml +++ b/.github/workflows/lambda_main.yml @@ -2,7 +2,7 @@ name: Lambda Main Workflow on: push: - branches: [main, feature/document_upload] + branches: [main, feautre/additional_features_in_condition_report_extraction] env: AWS_REGION: eu-west-2 diff --git a/deployment/lambda/extractor_and_loader/docker/app.py b/deployment/lambda/extractor_and_loader/docker/app.py index 929c16a..2da0228 100644 --- a/deployment/lambda/extractor_and_loader/docker/app.py +++ b/deployment/lambda/extractor_and_loader/docker/app.py @@ -57,15 +57,17 @@ def serialize_model(model: Any): else: return model -def make_final_json(rooms_obj, heating_system_obj): +def make_final_json(rooms_obj, heating_system_obj, occupant): # Convert to dict recursively rooms_data = serialize_model(rooms_obj) heating_data = serialize_model(heating_system_obj) + occupant_data = serialize_model(occupant) # Combine into one big JSON-ready dict final_data = { "rooms": rooms_data, - "heating_system": heating_data + "heating_system": heating_data, + "occupant_info": occupant_data, } # Convert to pretty JSON string @@ -198,10 +200,18 @@ def handler(event, context): print("Downloading file locally for extraction...") local_path = download_private_s3_file(file_uri) + local_path = os.path.join(os.path.join(os.getcwd(), "../..", "home/Downloads/67-Aylestone-Road-1.pdf")) + + + print("Extracting file...") reader = pdfReaderToText(local_path) obj = WarmHomesConditionReport(reader.text_list) - json_ = make_final_json(obj.master_obj[0], obj.master_obj[1]) + json_ = make_final_json( + obj.master_obj[0], + obj.master_obj[1], + obj.master_obj[2] + ) print("Extracted completed, made json") print("uploading json to s3 bucket...") diff --git a/etl/fileReader/sitenotes.py b/etl/fileReader/sitenotes.py index 073a30a..0901ed1 100644 --- a/etl/fileReader/sitenotes.py +++ b/etl/fileReader/sitenotes.py @@ -105,18 +105,19 @@ class WarmHomesConditionReport(SiteNotesExtractor): def __init__(self, data_list): super().__init__(data_list) self.type = ReportType.WARM_HOMES_CONDITION_REPORT - room, heating_system = self.setup_condition_report() - self.master_obj = room, heating_system + room, heating_system, occupant = self.setup_condition_report() + + self.master_obj = room, heating_system, occupant def setup_condition_report(self): # general_information = self.get_section_1() # access_and_elevations = self.get_section_2() rooms = self.get_section_3() heating_system = self.get_section_4() - # occupant_assessment = self.get_section_5() + occupant_assessment = self.get_section_5() # site_name, reference_code, address, postcode = self.get_section_0() - return rooms, heating_system + return rooms, heating_system, occupant_assessment def get_section_0(self): data = self.get_data_between("Project Site Name", "1. General Information") @@ -606,13 +607,14 @@ class WarmHomesConditionReport(SiteNotesExtractor): def get_section_5(self): occupants = self.get_occupants() - energy_use = self.get_energy_use() - heating = self.get_heating() - shower_and_bath = self.get_shower_and_bath() - appliances = self.get_appliances() - fridge_and_freezers = self.get_fridge_and_freezers() - cooker = self.get_cooker() - tumble_dryer = self.get_tumble_dryer() + # energy_use = self.get_energy_use() + # heating = self.get_heating() + # shower_and_bath = self.get_shower_and_bath() + # appliances = self.get_appliances() + # fridge_and_freezers = self.get_fridge_and_freezers() + # cooker = self.get_cooker() + # tumble_dryer = self.get_tumble_dryer() + return occupants return OccupantAssessment( occupant=occupants, energy_use=energy_use, @@ -627,12 +629,19 @@ class WarmHomesConditionReport(SiteNotesExtractor): def get_occupants(self): data = self.get_data_between("Occupants", "Energy use") second_data = self.get_data_between("Tumble dryer", "Media summary") + no_of_child_occupants = self.get_next_value(data, "No. of Child Occupants (Under 18)") + if no_of_child_occupants == "\xa0": + no_of_child_occupants = 0 + else: + no_of_child_occupants = int(no_of_child_occupants) + + print(self.get_next_value(data, "No. of Child Occupants (Under 18)")) return Occupant( name=self.get_next_value(second_data, "Name of the occupant:"), have_evidence_of_12_months_of_fuel_bill_data= True if self.get_next_value(second_data, "Have you evidenced 12 months of fuel bill data?").lower() == "yes" else False, total_number_of_occupants=int(self.get_next_value(data, "Total number of occupants:")), no_of_adult_occupants=int(self.get_next_value(data, "No. of Adult Occupants (18+)")), - no_of_child_occupants=int(self.get_next_value(data, "No. of Child Occupants (Under 18)")), + no_of_child_occupants=no_of_child_occupants, no_of_occupant_of_a_pensionable_age=int(self.get_next_value(data, "No. of occupant of a pensionable age")), are_there_any_vulnerable_people=True if self.get_next_value(data, "Are there any vulnerable people?").lower() == "yes" else False, is_there_anyone_with_a_disability=True if self.get_next_value(data, "Is there anyone with a disability?").lower() == "yes" else False,