mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-30 13:10:56 +00:00
save work
This commit is contained in:
parent
e0694efb86
commit
6427b030f8
3 changed files with 49 additions and 5 deletions
|
|
@ -14,7 +14,7 @@ pdfReader = pdfReaderToText(DATA_LOC_1)
|
|||
doc2 = pdfReader.get_reader()
|
||||
pdfReader2 = pdfReaderToText(DATA_LOC_2)
|
||||
doc1 = pdfReader2.get_reader()
|
||||
# vars(doc1)
|
||||
vars(doc1)
|
||||
|
||||
def main():
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@ class pdfReaderToText():
|
|||
self.text_list = []
|
||||
self.get_text_from_pdf_file()
|
||||
self.type = None
|
||||
print("everything from scracth")
|
||||
|
||||
def get_text_from_pdf_file(self):
|
||||
self.logger.debug(f"Extrating text from {self.source_path}")
|
||||
|
|
|
|||
|
|
@ -47,6 +47,10 @@ class QuidosSiteNotes(SiteNotesExtractor):
|
|||
self.get_section_16()
|
||||
self.get_section_17()
|
||||
self.get_section_18()
|
||||
self.get_section_19()
|
||||
self.get_section_20()
|
||||
self.get_section_21()
|
||||
self.get_section_22()
|
||||
|
||||
def get_summary_information(self):
|
||||
# Summary Information
|
||||
|
|
@ -510,17 +514,58 @@ class QuidosSiteNotes(SiteNotesExtractor):
|
|||
]
|
||||
|
||||
self.two_columns_processor(data, sub_titles, avoid, 18.0)
|
||||
print("hello seems to khklkjbe")
|
||||
|
||||
def get_section_19(self):
|
||||
data = self.get_data_between("19.0 Flue Gas Heat Recovery System","20.0 Photovoltaic Panel")
|
||||
sub_titles = [
|
||||
|
||||
"FGHRS Present",
|
||||
]
|
||||
avoid = [
|
||||
|
||||
"19.0 Flue Gas Heat Recovery System",
|
||||
"20.0 Photovoltaic Panel",
|
||||
]
|
||||
|
||||
self.two_columns_processor(data, sub_titles, avoid, 19)
|
||||
|
||||
def get_section_20(self):
|
||||
data = self.get_data_between("20.0 Photovoltaic Panel","21.0 Wind Turbine")
|
||||
print(data)
|
||||
sub_titles = [
|
||||
"PVs are connected to dwelling electricity"
|
||||
"Percentage of External Roof Area with PVs"
|
||||
]
|
||||
avoid = [
|
||||
"20.0 Photovoltaic Panel",
|
||||
"21.0 Wind Turbine",
|
||||
]
|
||||
self.two_columns_processor(data, sub_titles, avoid, 20)
|
||||
|
||||
def get_section_21(self):
|
||||
data = self.get_data_between("21.0 Wind Turbine","22.0 Other Details")
|
||||
sub_titles = [
|
||||
"Wind Turbine",
|
||||
]
|
||||
avoid = [
|
||||
"21.0 Wind Turbine",
|
||||
"22.0 Other Details",
|
||||
]
|
||||
self.two_columns_processor(data, sub_titles, avoid, 21)
|
||||
|
||||
def get_section_22(self):
|
||||
data = self.get_data_between("22.0 Other Details","Recommendations (Carbon Saving Figures Are For Guidance Only)")
|
||||
sub_titles = [
|
||||
"Electricity Meter Type",
|
||||
"Mains Gas Available",
|
||||
]
|
||||
avoid = [
|
||||
"22.0 Other Details",
|
||||
"Recommendations (Carbon Saving Figures Are For Guidance Only)",
|
||||
]
|
||||
|
||||
self.two_columns_processor(data, sub_titles, avoid, 22)
|
||||
|
||||
|
||||
# Section 20 and 11, check results for 18 to 22
|
||||
# Extract
|
||||
# Transform ( wiht validation pydantnic)
|
||||
# Load
|
||||
Loading…
Add table
Reference in a new issue