save work

This commit is contained in:
Jun-te Kim 2025-03-12 16:00:47 +00:00
parent e0694efb86
commit 6427b030f8
3 changed files with 49 additions and 5 deletions

View file

@ -14,7 +14,7 @@ pdfReader = pdfReaderToText(DATA_LOC_1)
doc2 = pdfReader.get_reader()
pdfReader2 = pdfReaderToText(DATA_LOC_2)
doc1 = pdfReader2.get_reader()
# vars(doc1)
vars(doc1)
def main():
pass

View file

@ -13,7 +13,6 @@ class pdfReaderToText():
self.text_list = []
self.get_text_from_pdf_file()
self.type = None
print("everything from scracth")
def get_text_from_pdf_file(self):
self.logger.debug(f"Extrating text from {self.source_path}")

View file

@ -47,6 +47,10 @@ class QuidosSiteNotes(SiteNotesExtractor):
self.get_section_16()
self.get_section_17()
self.get_section_18()
self.get_section_19()
self.get_section_20()
self.get_section_21()
self.get_section_22()
def get_summary_information(self):
# Summary Information
@ -510,17 +514,58 @@ class QuidosSiteNotes(SiteNotesExtractor):
]
self.two_columns_processor(data, sub_titles, avoid, 18.0)
print("hello seems to khklkjbe")
def get_section_19(self):
data = self.get_data_between("19.0 Flue Gas Heat Recovery System","20.0 Photovoltaic Panel")
sub_titles = [
"FGHRS Present",
]
avoid = [
"19.0 Flue Gas Heat Recovery System",
"20.0 Photovoltaic Panel",
]
self.two_columns_processor(data, sub_titles, avoid, 19)
def get_section_20(self):
data = self.get_data_between("20.0 Photovoltaic Panel","21.0 Wind Turbine")
print(data)
sub_titles = [
"PVs are connected to dwelling electricity"
"Percentage of External Roof Area with PVs"
]
avoid = [
"20.0 Photovoltaic Panel",
"21.0 Wind Turbine",
]
self.two_columns_processor(data, sub_titles, avoid, 20)
def get_section_21(self):
data = self.get_data_between("21.0 Wind Turbine","22.0 Other Details")
sub_titles = [
"Wind Turbine",
]
avoid = [
"21.0 Wind Turbine",
"22.0 Other Details",
]
self.two_columns_processor(data, sub_titles, avoid, 21)
def get_section_22(self):
data = self.get_data_between("22.0 Other Details","Recommendations (Carbon Saving Figures Are For Guidance Only)")
sub_titles = [
"Electricity Meter Type",
"Mains Gas Available",
]
avoid = [
"22.0 Other Details",
"Recommendations (Carbon Saving Figures Are For Guidance Only)",
]
self.two_columns_processor(data, sub_titles, avoid, 22)
# Section 20 and 11, check results for 18 to 22
# Extract
# Transform ( wiht validation pydantnic)
# Load