section 7 completed

This commit is contained in:
Jun-te Kim 2025-03-12 12:01:32 +00:00
parent 0e80eba347
commit 3f51c8f529
2 changed files with 79 additions and 12 deletions

View file

@ -6,9 +6,17 @@ import logging
from etl.utils.logger import Logger
from etl.validator.validator import DomnaSharePointValidator
logger = Logger(name="main.py", level=logging.DEBUG).get_logger()
DATA_LOC_1 = "/tmp/sharepoint/Abdul Koddus/W.C. 03.03.2025/Southern Housing/10 Turnberry Close TN38 0WL/PRE SITE NOTES.pdf"
DATA_LOC_2 = "/tmp/sharepoint/Abdul Koddus/W.C. 03.03.2025/Southern Housing/16 Sunningdale Drive TN38 0WB/PRE SITE NOTES.pdf"
pdfReader = pdfReaderToText(DATA_LOC_1)
doc1 = pdfReader.get_reader()
pdfReader = pdfReaderToText(DATA_LOC_2)
doc2 = pdfReader.get_reader()
def main():
pass
# POC PDF Reader
# list_ = pdfReaderToText(INTERESTING_FILE_LOC).get_list_of_test()
# pprint(list_)
@ -29,13 +37,7 @@ def main():
# south_coast_scraper.download_file_for_each_address()
# POC of pdf reader
DATA_LOC_1 = "/tmp/sharepoint/Abdul Koddus/W.C. 03.03.2025/Southern Housing/10 Turnberry Close TN38 0WL/PRE SITE NOTES.pdf"
DATA_LOC_2 = "/tmp/sharepoint/Abdul Koddus/W.C. 03.03.2025/Southern Housing/16 Sunningdale Drive TN38 0WB/PRE SITE NOTES.pdf"
pdfReader = pdfReaderToText(DATA_LOC_1)
doc1 = pdfReader.get_reader()
pdfReader = pdfReaderToText(DATA_LOC_2)
doc2 = pdfReader.get_reader()
if __name__ == "__main__":
@ -50,7 +52,6 @@ if __name__ == "__main__":
# Work out productivity metirc (number of address in submission folder, with at least one file included)
# Khalim would like these metrics from the pdf
# address, uprn, assessor's name validation, current sap rating, current annual emissions. DImension

View file

@ -9,6 +9,7 @@ class SiteNotes():
return [i for i, v in enumerate(lst) if v == value][x]
except IndexError:
return None # Return None if the value does not occur twice
class QuidosSiteNotes(SiteNotes):
@ -26,6 +27,9 @@ class QuidosSiteNotes(SiteNotes):
self.get_section_2()
self.get_section_3()
self.get_section_4()
self.get_section_5()
self.get_section_6()
self.get_section_7()
def get_summary_information(self):
# Summary Information
@ -57,7 +61,7 @@ class QuidosSiteNotes(SiteNotes):
'Current annual energy costs',
'Emission figures including 9.92 emission factor of 0.925',
]
get_value = lambda key: None if self.raw_data[self.raw_data.index(key) + 1] in avoid else self.raw_data[self.raw_data.index(key) + 1]
get_value = lambda key: None if self.raw_data[self.raw_data.index(key) + 1] in avoid else self.raw_data[self.raw_data.index(key) + 1]
self.reference_number = get_value('Reference Number')
self.epc_language = get_value('EPC Language')
@ -107,7 +111,8 @@ class QuidosSiteNotes(SiteNotes):
"Detachment/Position",
"2.0 Number Of"
]
get_value = lambda x: None if data[data.index(x) + 1] in avoid else data[data.index(x) + 1]
get_value = lambda key: None if self.raw_data[self.raw_data.index(key) + 1] in avoid else self.raw_data[self.raw_data.index(key) + 1]
self.property_type_built_form = get_value("Built Form")
self.property_type_detatchment_position = get_value("Detachment/Position")
@ -130,7 +135,8 @@ class QuidosSiteNotes(SiteNotes):
"3.0 Date Built",
]
get_value = lambda x: None if data[data.index(x) + 1] in avoid else data[data.index(x) + 1]
get_value = lambda key: None if self.raw_data[self.raw_data.index(key) + 1] in avoid else self.raw_data[self.raw_data.index(key) + 1]
self.main_property = get_value("Main Property")
self.extension_1 = get_value('Extension 1')
self.extension_2 = get_value('Extension 2')
@ -213,9 +219,69 @@ class QuidosSiteNotes(SiteNotes):
setattr(self, f"extension_{i}_dimensions)", create_dimensions_array(f"Extension {i} Property", int(getattr(self, f"extension_{i}"))))
else:
setattr(self, f"extensions_{i}_dimensions",None)
def get_section_5(self):
data = self.raw_data[self.raw_data.index('5.0 Conservatory'):self.raw_data.index('7.0 Walls')]
avoid = [
'Is there a conservatory?',
'7.0 Walls'
]
get_value = lambda key: None if self.raw_data[self.raw_data.index(key) + 1] in avoid else self.raw_data[self.raw_data.index(key) + 1]
self.conservatory = True if get_value("Is there a conservatory?") == "YES" else False
def get_section_6(self):
pass
def get_section_7(self):
data = self.raw_data[self.raw_data.index('7.0 Walls'): self.raw_data.index('8.0 Roofs')]
avoid = [
"7.0 Walls",
"8.0 Roofs",
"Construction",
"Insulation",
"Insulation Thickness(mm)",
"Wall Thickness Measured?",
"Wall Thickness Measured",
"Wall Thickness(mm)",
"U-value Known?",
"U-value Known",
"U-value (W/m²K)",
"Dry-lining?",
"Alternative Wall Present?",
"Alternative Wall Present",
]
get_value = lambda key: None if self.raw_data[self.raw_data.index(key) + 1] in avoid else self.raw_data[self.raw_data.index(key) + 1]
# Main property
main_info = data[data.index("Main Property"):data.index("Extension 1")]
for i,item in enumerate(main_info):
if item in avoid:
setattr(self, f"main_property_{item.lower().replace(' ', '_').replace('-', '_')}", get_value(item))
for j in range(1, 5):
main_data = data[data.index(f"Extension {j}"):]
get_value = lambda key: None if main_data[main_data.index(key) + 1] in avoid else main_data[main_data.index(key) + 1]
for i,item in enumerate(main_data):
if item in avoid:
setattr(self, f"extensions_{j}_{item.lower().replace(' ', '_').replace('-', '_')}", get_value(item))
def get_section_8(self):
pass
# validatin
# function of object of type this