From 413538ac3cd9090894535329ca04f58046cd7d49 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 17 Jun 2025 07:55:11 +0000 Subject: [PATCH] save dimitra working copy --- etl/dimitra_hubspot_notes_gather.py | 3 ++ etl/pdfReader/sitenotes.py | 65 +++++++++++++++++++---------- 2 files changed, 45 insertions(+), 23 deletions(-) diff --git a/etl/dimitra_hubspot_notes_gather.py b/etl/dimitra_hubspot_notes_gather.py index 3abacfd..39b970d 100644 --- a/etl/dimitra_hubspot_notes_gather.py +++ b/etl/dimitra_hubspot_notes_gather.py @@ -63,6 +63,7 @@ for pipeline in pipelines.results: for stage in pipeline.stages: if stage.label.upper().strip() not in [s.upper() for s in exclude_stage.get(pipeline_name, [])]: for deals in hubspot.get_all_deals_from_stage_id(stage.id): + print(f"Scraping deal {deals['deal_id']}") time.sleep(1) deal_notes_by_week = {"Week 1": [], "Week 2": [], "Week 3": []} notes = hubspot.get_notes_from_deals_id(deals["deal_id"]) @@ -72,6 +73,7 @@ for pipeline in pipelines.results: if not week_label: continue html_body = note['note'] + print(f"Debugging purposes html_body looks like {html_body}") soup = BeautifulSoup(html_body, "html.parser") plain_text = soup.get_text(separator="\n") deal_notes_by_week[week_label].append(plain_text) @@ -85,6 +87,7 @@ for pipeline in pipelines.results: except: owner_name = "Couldn't find owner information" + # Unique identifier to Domna Homes' hubspot portal_id = 145275138 notes_data[pipeline_name].append({ diff --git a/etl/pdfReader/sitenotes.py b/etl/pdfReader/sitenotes.py index e70dff9..1c9f532 100644 --- a/etl/pdfReader/sitenotes.py +++ b/etl/pdfReader/sitenotes.py @@ -95,15 +95,34 @@ class ConditionReport(SiteNotesExtractor): self.setup() def setup(self): - self.get_section_1() - self.get_section_2() - self.get_section_3() - self.get_section_4() - self.get_section_5() - self.get_section_0() + assesor_details, inspection_and_project, the_property, main_elevation, elevations = self.get_section_1() + access_and_elevations = self.get_section_2() + rooms = self.get_section_3() + general_condition_of_heating_system, main_heating_one, main_heating_two, secondary_heating, heating_by_room, renewables = self.get_section_4() + occupants, energy_use, heating, shower_and_bath, appliances, fridge_and_freezers, cooker, tumble_dryer = self.get_section_5() + site_name, reference_code, address, postcode = self.get_section_0() + + # TODO: Complete this function + self.make_condition_report_object( + assesor_details, + inspection_and_project, + the_property, + main_elevation, + elevations, + access_and_elevations, + rooms, + + ) def get_section_0(self): - print("Please complete") + data = self.get_data_between("Project Site Name", "1. General Information") + site_name = self.get_next_value(data, "Project Site Name") + reference_code = self.get_next_value(data, "Property Reference Code") + address = self.get_next_value(data, "Property Address") + postcode = self.get_data_between("Postcode", "Main Image")[1:] + postcode = " ".join(postcode) + + return site_name, reference_code, address, postcode def get_section_1(self): assessor_details = self.get_assessor_details() @@ -479,13 +498,13 @@ class ConditionReport(SiteNotesExtractor): ) def get_section_4(self): - _ = self.get_general_condition_of_heating_system() - _ = self.get_main_heating_one() - _ = self.get_main_heating_two() - _ = self.get_secondary_heating() - _ = self.get_heating_by_room() - _ = self.get_renewables() - pprint(_) + general_condition_of_heating_system = self.get_general_condition_of_heating_system() + main_heating_one = self.get_main_heating_one() + main_heating_two = self.get_main_heating_two() + secondary_heating = self.get_secondary_heating() + heating_by_room = self.get_heating_by_room() + renewables = self.get_renewables() + return general_condition_of_heating_system, main_heating_one, main_heating_two, secondary_heating, heating_by_room, renewables def get_main_heating_one(self): data = self.get_data_between("Main Heating 1", "Main Heating 2") @@ -565,15 +584,15 @@ class ConditionReport(SiteNotesExtractor): ) def get_section_5(self): - _ = self.get_occupants() - _ = self.get_energy_use() - _ = self.get_heating() - _ = self.get_shower_and_bath() - _ = self.get_appliances() - _ = self.get_fridge_and_freezers() - _ = self.get_cooker() - _ = self.get_tumble_dryer() - pprint(_) + occupants = self.get_occupants() + energy_use = self.get_energy_use() + heating = self.get_heating() + shower_and_bath = self.get_shower_and_bath() + appliances = self.get_appliances() + fridge_and_freezers = self.get_fridge_and_freezers() + cooker = self.get_cooker() + tumble_dryer = self.get_tumble_dryer() + return occupants, energy_use, heating, shower_and_bath, appliances, fridge_and_freezers, cooker, tumble_dryer def get_occupants(self): data = self.get_data_between("Occupants", "Energy use")