mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-08 11:17:29 +00:00
save settings
This commit is contained in:
parent
a4e916d68c
commit
e0694efb86
4 changed files with 45 additions and 4 deletions
7
.vscode/settings.json
vendored
7
.vscode/settings.json
vendored
|
|
@ -1,4 +1,11 @@
|
|||
{
|
||||
"jupyter.interactiveWindow.textEditor.executeSelection": true,
|
||||
"python.REPL.sendToNativeREPL": true
|
||||
|
||||
// Hot reload setting that needs to be in user settings
|
||||
// "jupyter.runStartupCommands": [
|
||||
// "%load_ext autoreload", "%autoreload 2"
|
||||
// ]
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -5,6 +5,7 @@ from pprint import pprint, pformat
|
|||
import logging
|
||||
from etl.utils.logger import Logger
|
||||
from etl.validator.validator import DomnaSharePointValidator
|
||||
|
||||
logger = Logger(name="main.py", level=logging.DEBUG).get_logger()
|
||||
DATA_LOC_1 = "/tmp/sharepoint/Abdul Koddus/W.C. 03.03.2025/Southern Housing/10 Turnberry Close TN38 0WL/PRE SITE NOTES.pdf"
|
||||
DATA_LOC_2 = "/tmp/sharepoint/Abdul Koddus/W.C. 03.03.2025/Southern Housing/16 Sunningdale Drive TN38 0WB/PRE SITE NOTES.pdf"
|
||||
|
|
@ -13,8 +14,7 @@ pdfReader = pdfReaderToText(DATA_LOC_1)
|
|||
doc2 = pdfReader.get_reader()
|
||||
pdfReader2 = pdfReaderToText(DATA_LOC_2)
|
||||
doc1 = pdfReader2.get_reader()
|
||||
vars(doc1)
|
||||
|
||||
# vars(doc1)
|
||||
|
||||
def main():
|
||||
pass
|
||||
|
|
@ -34,8 +34,8 @@ def main():
|
|||
# logger.info(pformat(list_of_house_ass_names))
|
||||
|
||||
# POC of downloading each file
|
||||
# south_coast_scraper = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION_SERVICE)
|
||||
# south_coast_scraper.download_file_for_each_address()
|
||||
south_coast_scraper = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION_SERVICE)
|
||||
south_coast_scraper.download_file_for_each_address()
|
||||
|
||||
# POC of pdf reader
|
||||
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ class pdfReaderToText():
|
|||
self.text_list = []
|
||||
self.get_text_from_pdf_file()
|
||||
self.type = None
|
||||
print("everything from scracth")
|
||||
|
||||
def get_text_from_pdf_file(self):
|
||||
self.logger.debug(f"Extrating text from {self.source_path}")
|
||||
|
|
|
|||
|
|
@ -10,6 +10,9 @@ class SiteNotesExtractor():
|
|||
except IndexError:
|
||||
return None # Return None if the value does not occur twice
|
||||
|
||||
def get_data_between(self, a, b):
|
||||
return self.raw_data[self.raw_data.index(a):self.raw_data.index(b)]
|
||||
|
||||
|
||||
|
||||
class QuidosSiteNotes(SiteNotesExtractor):
|
||||
|
|
@ -42,6 +45,8 @@ class QuidosSiteNotes(SiteNotesExtractor):
|
|||
self.get_section_15_0()
|
||||
self.get_section_15_1()
|
||||
self.get_section_16()
|
||||
self.get_section_17()
|
||||
self.get_section_18()
|
||||
|
||||
def get_summary_information(self):
|
||||
# Summary Information
|
||||
|
|
@ -488,6 +493,34 @@ class QuidosSiteNotes(SiteNotesExtractor):
|
|||
]
|
||||
|
||||
self.two_columns_processor(data, sub_titles, avoid, 16.0)
|
||||
|
||||
def get_section_17(self):
|
||||
pass
|
||||
|
||||
def get_section_18(self):
|
||||
data = self.get_data_between("18.0 Showers And Baths", "19.0 Flue Gas Heat Recovery System")
|
||||
sub_titles = [
|
||||
"Number of Rooms with Bath and/or Shower",
|
||||
"Number of Rooms with Mixer Shower and no Bath",
|
||||
"Number of Rooms with Mixer Shower and Bath",
|
||||
]
|
||||
avoid = [
|
||||
"18.0 Showers And Baths",
|
||||
"19.0 Flue Gas Heat Recovery System",
|
||||
]
|
||||
|
||||
self.two_columns_processor(data, sub_titles, avoid, 18.0)
|
||||
print("hello seems to khklkjbe")
|
||||
|
||||
def get_section_19(self):
|
||||
data = self.get_data_between("19.0 Flue Gas Heat Recovery System","20.0 Photovoltaic Panel")
|
||||
sub_titles = [
|
||||
|
||||
]
|
||||
avoid = [
|
||||
|
||||
]
|
||||
|
||||
# Extract
|
||||
# Transform ( wiht validation pydantnic)
|
||||
# Load
|
||||
Loading…
Add table
Reference in a new issue