save settings

This commit is contained in:
Jun-te Kim 2025-03-12 15:43:57 +00:00
parent a4e916d68c
commit e0694efb86
4 changed files with 45 additions and 4 deletions

View file

@ -1,4 +1,11 @@
{
"jupyter.interactiveWindow.textEditor.executeSelection": true,
"python.REPL.sendToNativeREPL": true
// Hot reload setting that needs to be in user settings
// "jupyter.runStartupCommands": [
// "%load_ext autoreload", "%autoreload 2"
// ]
}

View file

@ -5,6 +5,7 @@ from pprint import pprint, pformat
import logging
from etl.utils.logger import Logger
from etl.validator.validator import DomnaSharePointValidator
logger = Logger(name="main.py", level=logging.DEBUG).get_logger()
DATA_LOC_1 = "/tmp/sharepoint/Abdul Koddus/W.C. 03.03.2025/Southern Housing/10 Turnberry Close TN38 0WL/PRE SITE NOTES.pdf"
DATA_LOC_2 = "/tmp/sharepoint/Abdul Koddus/W.C. 03.03.2025/Southern Housing/16 Sunningdale Drive TN38 0WB/PRE SITE NOTES.pdf"
@ -13,8 +14,7 @@ pdfReader = pdfReaderToText(DATA_LOC_1)
doc2 = pdfReader.get_reader()
pdfReader2 = pdfReaderToText(DATA_LOC_2)
doc1 = pdfReader2.get_reader()
vars(doc1)
# vars(doc1)
def main():
pass
@ -34,8 +34,8 @@ def main():
# logger.info(pformat(list_of_house_ass_names))
# POC of downloading each file
# south_coast_scraper = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION_SERVICE)
# south_coast_scraper.download_file_for_each_address()
south_coast_scraper = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION_SERVICE)
south_coast_scraper.download_file_for_each_address()
# POC of pdf reader

View file

@ -13,6 +13,7 @@ class pdfReaderToText():
self.text_list = []
self.get_text_from_pdf_file()
self.type = None
print("everything from scracth")
def get_text_from_pdf_file(self):
self.logger.debug(f"Extrating text from {self.source_path}")

View file

@ -10,6 +10,9 @@ class SiteNotesExtractor():
except IndexError:
return None # Return None if the value does not occur twice
def get_data_between(self, a, b):
return self.raw_data[self.raw_data.index(a):self.raw_data.index(b)]
class QuidosSiteNotes(SiteNotesExtractor):
@ -42,6 +45,8 @@ class QuidosSiteNotes(SiteNotesExtractor):
self.get_section_15_0()
self.get_section_15_1()
self.get_section_16()
self.get_section_17()
self.get_section_18()
def get_summary_information(self):
# Summary Information
@ -488,6 +493,34 @@ class QuidosSiteNotes(SiteNotesExtractor):
]
self.two_columns_processor(data, sub_titles, avoid, 16.0)
def get_section_17(self):
pass
def get_section_18(self):
data = self.get_data_between("18.0 Showers And Baths", "19.0 Flue Gas Heat Recovery System")
sub_titles = [
"Number of Rooms with Bath and/or Shower",
"Number of Rooms with Mixer Shower and no Bath",
"Number of Rooms with Mixer Shower and Bath",
]
avoid = [
"18.0 Showers And Baths",
"19.0 Flue Gas Heat Recovery System",
]
self.two_columns_processor(data, sub_titles, avoid, 18.0)
print("hello seems to khklkjbe")
def get_section_19(self):
data = self.get_data_between("19.0 Flue Gas Heat Recovery System","20.0 Photovoltaic Panel")
sub_titles = [
]
avoid = [
]
# Extract
# Transform ( wiht validation pydantnic)
# Load