save settings

2026-06-08 11:17:29 +00:00 · 2025-03-12 15:43:57 +00:00 · 2025-03-12 15:43:57 +00:00 · e0694efb86
commit e0694efb86
parent a4e916d68c
4 changed files with 45 additions and 4 deletions
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -1,4 +1,11 @@
 {
    "jupyter.interactiveWindow.textEditor.executeSelection": true,
    "python.REPL.sendToNativeREPL": true
+
+    // Hot reload setting that needs to be in user settings
+    // "jupyter.runStartupCommands": [
+    //     "%load_ext autoreload", "%autoreload 2"
+    // ]
+
+
 }
--- a/etl/main.py
+++ b/etl/main.py
@ -5,6 +5,7 @@ from pprint import pprint, pformat
 import logging
 from etl.utils.logger import Logger
 from etl.validator.validator import DomnaSharePointValidator
+
 logger = Logger(name="main.py", level=logging.DEBUG).get_logger()
 DATA_LOC_1 = "/tmp/sharepoint/Abdul Koddus/W.C. 03.03.2025/Southern Housing/10 Turnberry Close TN38 0WL/PRE SITE NOTES.pdf"
 DATA_LOC_2 = "/tmp/sharepoint/Abdul Koddus/W.C. 03.03.2025/Southern Housing/16 Sunningdale Drive TN38 0WB/PRE SITE NOTES.pdf"
@ -13,8 +14,7 @@ pdfReader = pdfReaderToText(DATA_LOC_1)
 doc2 = pdfReader.get_reader()
 pdfReader2 = pdfReaderToText(DATA_LOC_2)
 doc1 = pdfReader2.get_reader()
-vars(doc1)
-
+# vars(doc1)

 def main():
    pass
@ -34,8 +34,8 @@ def main():
    # logger.info(pformat(list_of_house_ass_names))

    # POC of downloading each file
-    # south_coast_scraper = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION_SERVICE)
-    # south_coast_scraper.download_file_for_each_address()
+    south_coast_scraper = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION_SERVICE)
+    south_coast_scraper.download_file_for_each_address()

    # POC of pdf reader

--- a/etl/pdfReader/pdfReaderToText.py
+++ b/etl/pdfReader/pdfReaderToText.py
@ -13,6 +13,7 @@ class pdfReaderToText():
        self.text_list = []
        self.get_text_from_pdf_file()
        self.type = None
+        print("everything from scracth")

    def get_text_from_pdf_file(self):
        self.logger.debug(f"Extrating text from {self.source_path}")
--- a/etl/pdfReader/sitenotes.py
+++ b/etl/pdfReader/sitenotes.py
@ -10,6 +10,9 @@ class SiteNotesExtractor():
        except IndexError:
            return None  # Return None if the value does not occur twice
        
+    def get_data_between(self, a, b):
+        return self.raw_data[self.raw_data.index(a):self.raw_data.index(b)]
+        


 class QuidosSiteNotes(SiteNotesExtractor):
@ -42,6 +45,8 @@ class QuidosSiteNotes(SiteNotesExtractor):
        self.get_section_15_0()
        self.get_section_15_1()
        self.get_section_16()
+        self.get_section_17()
+        self.get_section_18()
    
    def get_summary_information(self):
        # Summary Information
@ -488,6 +493,34 @@ class QuidosSiteNotes(SiteNotesExtractor):
        ]

        self.two_columns_processor(data, sub_titles, avoid, 16.0)
+    
+    def get_section_17(self):
+        pass
+
+    def get_section_18(self):
+        data = self.get_data_between("18.0 Showers And Baths", "19.0 Flue Gas Heat Recovery System")
+        sub_titles = [
+            "Number of Rooms with Bath and/or Shower",
+            "Number of Rooms with Mixer Shower and no Bath",
+            "Number of Rooms with Mixer Shower and Bath",
+        ]
+        avoid = [
+            "18.0 Showers And Baths",
+            "19.0 Flue Gas Heat Recovery System",
+        ]
+
+        self.two_columns_processor(data, sub_titles, avoid, 18.0)
+        print("hello seems to khklkjbe")
+    
+    def get_section_19(self):
+        data = self.get_data_between("19.0 Flue Gas Heat Recovery System","20.0 Photovoltaic Panel")
+        sub_titles = [
+
+        ]
+        avoid = [
+
+        ]
+
 # Extract
 # Transform ( wiht validation pydantnic)
 # Load