From 42fe38a1aa5f7bbf27f52c7d1c3a01f0388f9202 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 13 Mar 2025 18:05:31 +0000 Subject: [PATCH] section 5 complete for conservatory --- etl/development.py | 3 +-- etl/pdfReader/sitenotes.py | 11 ++++++----- etl/transform/types.py | 3 ++- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/etl/development.py b/etl/development.py index bd28e13..06a9bb1 100644 --- a/etl/development.py +++ b/etl/development.py @@ -10,9 +10,8 @@ logger = Logger(name="main.py", level=logging.DEBUG).get_logger() DATA_LOC_1 = "/tmp/sharepoint/Abdul Koddus/W.C. 03.03.2025/Southern Housing/10 Turnberry Close TN38 0WL/PRE SITE NOTES.pdf" DATA_LOC_2 = "/tmp/sharepoint/Abdul Koddus/W.C. 03.03.2025/Southern Housing/16 Sunningdale Drive TN38 0WB/PRE SITE NOTES.pdf" +# Extract and trasform pdfReader = pdfReaderToText(DATA_LOC_1) - -# Extract doc2 = pdfReader.get_reader() pdfReader2 = pdfReaderToText(DATA_LOC_2) doc1 = pdfReader2.get_reader() diff --git a/etl/pdfReader/sitenotes.py b/etl/pdfReader/sitenotes.py index e4bb335..9ece91f 100644 --- a/etl/pdfReader/sitenotes.py +++ b/etl/pdfReader/sitenotes.py @@ -32,9 +32,6 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): """ self.transform_summary_information() self.transform_sections() - # self.get_section_4() - # self.get_section_5() - # self.get_section_6() # self.get_section_7() # self.get_section_8() # self.get_section_9() @@ -193,6 +190,9 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): no_of_extension_4, ) + # Section 5 + conservatory = self.is_there_a_conservatory() + self.property_description = PropertyDescription( built_form = get_value("Built Form"), @@ -224,6 +224,7 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): age_band= age_bands[3], dimensions=dimensions["ex3"] if "ex3" in dimensions else [], )if no_of_extension_4 > 0 else None, + conservatory=conservatory, ) @@ -305,7 +306,7 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): return_dict.update({f"ex{i}" : create_dimensions_array(f"Extension {i} Property", ext[i-1])}) return return_dict - def get_section_5(self): + def is_there_a_conservatory(self): data = self.raw_data[self.raw_data.index('5.0 Conservatory'):self.raw_data.index('7.0 Walls')] avoid = [ 'Is there a conservatory?', @@ -313,7 +314,7 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): ] get_value = lambda key: None if self.raw_data[self.raw_data.index(key) + 1] in avoid else self.raw_data[self.raw_data.index(key) + 1] - self.conservatory = True if get_value("Is there a conservatory?") == "YES" else False + return True if get_value("Is there a conservatory?").upper() == "YES" else False def get_section_6(self): diff --git a/etl/transform/types.py b/etl/transform/types.py index fd61bf4..1058637 100644 --- a/etl/transform/types.py +++ b/etl/transform/types.py @@ -70,4 +70,5 @@ class PropertyDescription(BaseModel): ex1_property: Optional[PropertyDetail] = None ex2_property: Optional[PropertyDetail] = None ex3_property: Optional[PropertyDetail] = None - ex4_property: Optional[PropertyDetail] = None \ No newline at end of file + ex4_property: Optional[PropertyDetail] = None + conservatory: bool \ No newline at end of file