mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-30 13:10:56 +00:00
section 7 completed
This commit is contained in:
parent
0e80eba347
commit
3f51c8f529
2 changed files with 79 additions and 12 deletions
17
etl/main.py
17
etl/main.py
|
|
@ -6,9 +6,17 @@ import logging
|
|||
from etl.utils.logger import Logger
|
||||
from etl.validator.validator import DomnaSharePointValidator
|
||||
logger = Logger(name="main.py", level=logging.DEBUG).get_logger()
|
||||
DATA_LOC_1 = "/tmp/sharepoint/Abdul Koddus/W.C. 03.03.2025/Southern Housing/10 Turnberry Close TN38 0WL/PRE SITE NOTES.pdf"
|
||||
DATA_LOC_2 = "/tmp/sharepoint/Abdul Koddus/W.C. 03.03.2025/Southern Housing/16 Sunningdale Drive TN38 0WB/PRE SITE NOTES.pdf"
|
||||
|
||||
pdfReader = pdfReaderToText(DATA_LOC_1)
|
||||
doc1 = pdfReader.get_reader()
|
||||
pdfReader = pdfReaderToText(DATA_LOC_2)
|
||||
doc2 = pdfReader.get_reader()
|
||||
|
||||
|
||||
def main():
|
||||
pass
|
||||
# POC PDF Reader
|
||||
# list_ = pdfReaderToText(INTERESTING_FILE_LOC).get_list_of_test()
|
||||
# pprint(list_)
|
||||
|
|
@ -29,13 +37,7 @@ def main():
|
|||
# south_coast_scraper.download_file_for_each_address()
|
||||
|
||||
# POC of pdf reader
|
||||
DATA_LOC_1 = "/tmp/sharepoint/Abdul Koddus/W.C. 03.03.2025/Southern Housing/10 Turnberry Close TN38 0WL/PRE SITE NOTES.pdf"
|
||||
DATA_LOC_2 = "/tmp/sharepoint/Abdul Koddus/W.C. 03.03.2025/Southern Housing/16 Sunningdale Drive TN38 0WB/PRE SITE NOTES.pdf"
|
||||
|
||||
pdfReader = pdfReaderToText(DATA_LOC_1)
|
||||
doc1 = pdfReader.get_reader()
|
||||
pdfReader = pdfReaderToText(DATA_LOC_2)
|
||||
doc2 = pdfReader.get_reader()
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
@ -50,7 +52,6 @@ if __name__ == "__main__":
|
|||
|
||||
# Work out productivity metirc (number of address in submission folder, with at least one file included)
|
||||
|
||||
|
||||
# Khalim would like these metrics from the pdf
|
||||
# address, uprn, assessor's name validation, current sap rating, current annual emissions. DImension
|
||||
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ class SiteNotes():
|
|||
return [i for i, v in enumerate(lst) if v == value][x]
|
||||
except IndexError:
|
||||
return None # Return None if the value does not occur twice
|
||||
|
||||
|
||||
|
||||
class QuidosSiteNotes(SiteNotes):
|
||||
|
|
@ -26,6 +27,9 @@ class QuidosSiteNotes(SiteNotes):
|
|||
self.get_section_2()
|
||||
self.get_section_3()
|
||||
self.get_section_4()
|
||||
self.get_section_5()
|
||||
self.get_section_6()
|
||||
self.get_section_7()
|
||||
|
||||
def get_summary_information(self):
|
||||
# Summary Information
|
||||
|
|
@ -57,7 +61,7 @@ class QuidosSiteNotes(SiteNotes):
|
|||
'Current annual energy costs',
|
||||
'Emission figures including 9.92 emission factor of 0.925',
|
||||
]
|
||||
get_value = lambda key: None if self.raw_data[self.raw_data.index(key) + 1] in avoid else self.raw_data[self.raw_data.index(key) + 1]
|
||||
get_value = lambda key: None if self.raw_data[self.raw_data.index(key) + 1] in avoid else self.raw_data[self.raw_data.index(key) + 1]
|
||||
|
||||
self.reference_number = get_value('Reference Number')
|
||||
self.epc_language = get_value('EPC Language')
|
||||
|
|
@ -107,7 +111,8 @@ class QuidosSiteNotes(SiteNotes):
|
|||
"Detachment/Position",
|
||||
"2.0 Number Of"
|
||||
]
|
||||
get_value = lambda x: None if data[data.index(x) + 1] in avoid else data[data.index(x) + 1]
|
||||
get_value = lambda key: None if self.raw_data[self.raw_data.index(key) + 1] in avoid else self.raw_data[self.raw_data.index(key) + 1]
|
||||
|
||||
self.property_type_built_form = get_value("Built Form")
|
||||
self.property_type_detatchment_position = get_value("Detachment/Position")
|
||||
|
||||
|
|
@ -130,7 +135,8 @@ class QuidosSiteNotes(SiteNotes):
|
|||
"3.0 Date Built",
|
||||
]
|
||||
|
||||
get_value = lambda x: None if data[data.index(x) + 1] in avoid else data[data.index(x) + 1]
|
||||
get_value = lambda key: None if self.raw_data[self.raw_data.index(key) + 1] in avoid else self.raw_data[self.raw_data.index(key) + 1]
|
||||
|
||||
self.main_property = get_value("Main Property")
|
||||
self.extension_1 = get_value('Extension 1')
|
||||
self.extension_2 = get_value('Extension 2')
|
||||
|
|
@ -213,9 +219,69 @@ class QuidosSiteNotes(SiteNotes):
|
|||
setattr(self, f"extension_{i}_dimensions)", create_dimensions_array(f"Extension {i} Property", int(getattr(self, f"extension_{i}"))))
|
||||
else:
|
||||
setattr(self, f"extensions_{i}_dimensions",None)
|
||||
|
||||
def get_section_5(self):
|
||||
data = self.raw_data[self.raw_data.index('5.0 Conservatory'):self.raw_data.index('7.0 Walls')]
|
||||
avoid = [
|
||||
'Is there a conservatory?',
|
||||
'7.0 Walls'
|
||||
]
|
||||
get_value = lambda key: None if self.raw_data[self.raw_data.index(key) + 1] in avoid else self.raw_data[self.raw_data.index(key) + 1]
|
||||
|
||||
self.conservatory = True if get_value("Is there a conservatory?") == "YES" else False
|
||||
|
||||
|
||||
def get_section_6(self):
|
||||
pass
|
||||
|
||||
def get_section_7(self):
|
||||
data = self.raw_data[self.raw_data.index('7.0 Walls'): self.raw_data.index('8.0 Roofs')]
|
||||
avoid = [
|
||||
"7.0 Walls",
|
||||
"8.0 Roofs",
|
||||
"Construction",
|
||||
"Insulation",
|
||||
"Insulation Thickness(mm)",
|
||||
"Wall Thickness Measured?",
|
||||
"Wall Thickness Measured",
|
||||
"Wall Thickness(mm)",
|
||||
"U-value Known?",
|
||||
"U-value Known",
|
||||
"U-value (W/m²K)",
|
||||
"Dry-lining?",
|
||||
"Alternative Wall Present?",
|
||||
"Alternative Wall Present",
|
||||
]
|
||||
|
||||
get_value = lambda key: None if self.raw_data[self.raw_data.index(key) + 1] in avoid else self.raw_data[self.raw_data.index(key) + 1]
|
||||
|
||||
# Main property
|
||||
main_info = data[data.index("Main Property"):data.index("Extension 1")]
|
||||
for i,item in enumerate(main_info):
|
||||
if item in avoid:
|
||||
setattr(self, f"main_property_{item.lower().replace(' ', '_').replace('-', '_')}", get_value(item))
|
||||
|
||||
for j in range(1, 5):
|
||||
main_data = data[data.index(f"Extension {j}"):]
|
||||
get_value = lambda key: None if main_data[main_data.index(key) + 1] in avoid else main_data[main_data.index(key) + 1]
|
||||
for i,item in enumerate(main_data):
|
||||
if item in avoid:
|
||||
setattr(self, f"extensions_{j}_{item.lower().replace(' ', '_').replace('-', '_')}", get_value(item))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def get_section_8(self):
|
||||
pass
|
||||
|
||||
|
||||
|
||||
# validatin
|
||||
# function of object of type this
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue