mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-08 11:17:29 +00:00
windows scraping is working
This commit is contained in:
parent
c5b8143eab
commit
bbaddbefa2
2 changed files with 80 additions and 11 deletions
|
|
@ -17,8 +17,8 @@ doc1 = pdfReader2.get_reader()
|
|||
vars(doc1)
|
||||
|
||||
def main():
|
||||
south_coast_scraper = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION_SERVICE, development = True)
|
||||
south_coast_scraper.download_file_for_each_address()
|
||||
# south_coast_scraper = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION_SERVICE, development = True)
|
||||
# south_coast_scraper.download_file_for_each_address()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ class QuidosSiteNotes(SiteNotesExtractor):
|
|||
self.get_section_8()
|
||||
self.get_section_9()
|
||||
self.get_section_10()
|
||||
# self.get_section_11()
|
||||
self.get_section_11()
|
||||
self.get_section_12()
|
||||
self.get_section_13()
|
||||
self.get_section_14()
|
||||
|
|
@ -359,11 +359,11 @@ class QuidosSiteNotes(SiteNotesExtractor):
|
|||
]
|
||||
self.two_columns_processor(data, sub_titles, avoid, 10)
|
||||
|
||||
def two_columns_processor(self, data, sub_titles_to_gather, avoid, section):
|
||||
def two_columns_processor(self, data, sub_titles_to_gather, avoid, section, indexAdd = 1):
|
||||
def get_value(key):
|
||||
try:
|
||||
index = data.index(key)
|
||||
value = data[index + 1]
|
||||
value = data[index + indexAdd]
|
||||
return None if value in avoid else value
|
||||
except (ValueError, IndexError):
|
||||
return None
|
||||
|
|
@ -375,7 +375,66 @@ class QuidosSiteNotes(SiteNotesExtractor):
|
|||
setattr(self, f"section_{section}_{items.lower().replace('-', '_').replace(' ','_')}", get_value(items))
|
||||
|
||||
def get_section_11(self):
|
||||
raise RuntimeError("Please complete me")
|
||||
data = self.get_data_between("Window Location", "12.0 Ventilation & Cooling")
|
||||
headers = data[:8]
|
||||
data_entries = data[8:]
|
||||
|
||||
num_attributes = 5
|
||||
subtitles=[
|
||||
"Main Property",
|
||||
"Extension 1",
|
||||
"Extension 2",
|
||||
"Extension 3",
|
||||
"Extension 4",
|
||||
]
|
||||
|
||||
orientation = [
|
||||
"north",
|
||||
"east",
|
||||
"west",
|
||||
"south",
|
||||
"n",
|
||||
"w",
|
||||
"s",
|
||||
"e",
|
||||
"nw",
|
||||
"ne",
|
||||
"sw",
|
||||
"se",
|
||||
"south west",
|
||||
"south east",
|
||||
"north west",
|
||||
"north east",
|
||||
|
||||
]
|
||||
|
||||
def find_compose_index(lst, compose):
|
||||
for i, item in enumerate(lst):
|
||||
if item.lower() in compose:
|
||||
return i
|
||||
return None
|
||||
|
||||
title = None
|
||||
until = 0
|
||||
for i, items in enumerate(data_entries):
|
||||
if data_entries[i] in subtitles:
|
||||
title = data_entries[i].lower().replace(" ", "_").replace("-", "_")
|
||||
setattr(self, f"section_11_{title}_window", [])
|
||||
if title and until == i:
|
||||
entry = data_entries[i:]
|
||||
index = find_compose_index(entry,orientation)
|
||||
new_entry = entry[index-3:index+3]
|
||||
dict_ = {
|
||||
"glazing type": new_entry[0],
|
||||
"Area (m2)": new_entry[1],
|
||||
"Roof Window": new_entry[2],
|
||||
"Orientation": new_entry[3],
|
||||
"U-value (W/m²K)": new_entry[4],
|
||||
"g-value": new_entry[5],
|
||||
}
|
||||
lst = getattr(self, f"section_11_{title}_window")
|
||||
lst.append(dict_)
|
||||
until = index + 3 + i
|
||||
|
||||
def get_section_12(self):
|
||||
data = self.raw_data[self.raw_data.index('12.0 Ventilation & Cooling'): self.raw_data.index('13.0 Lighting')]
|
||||
|
|
@ -505,8 +564,6 @@ class QuidosSiteNotes(SiteNotesExtractor):
|
|||
data = self.get_data_between("18.0 Showers And Baths", "19.0 Flue Gas Heat Recovery System")
|
||||
sub_titles = [
|
||||
"Number of Rooms with Bath and/or Shower",
|
||||
"Number of Rooms with Mixer Shower and no Bath",
|
||||
"Number of Rooms with Mixer Shower and Bath",
|
||||
]
|
||||
avoid = [
|
||||
"18.0 Showers And Baths",
|
||||
|
|
@ -514,6 +571,15 @@ class QuidosSiteNotes(SiteNotesExtractor):
|
|||
]
|
||||
|
||||
self.two_columns_processor(data, sub_titles, avoid, 18.0)
|
||||
avoid = [
|
||||
"18.0 Showers And Baths",
|
||||
"19.0 Flue Gas Heat Recovery System",
|
||||
]
|
||||
sub_titles = [
|
||||
"Number of Rooms with Mixer Shower and no", # Number of Rooms with Mixer Shower and no Bath
|
||||
"Number of Rooms with Mixer Shower and", # Number of Rooms with Mixer Shower and Bath
|
||||
]
|
||||
self.two_columns_processor(data, sub_titles, avoid, 18.0, 2)
|
||||
|
||||
def get_section_19(self):
|
||||
data = self.get_data_between("19.0 Flue Gas Heat Recovery System","20.0 Photovoltaic Panel")
|
||||
|
|
@ -529,9 +595,7 @@ class QuidosSiteNotes(SiteNotesExtractor):
|
|||
|
||||
def get_section_20(self):
|
||||
data = self.get_data_between("20.0 Photovoltaic Panel","21.0 Wind Turbine")
|
||||
print(data)
|
||||
sub_titles = [
|
||||
"PVs are connected to dwelling electricity"
|
||||
"Percentage of External Roof Area with PVs"
|
||||
]
|
||||
avoid = [
|
||||
|
|
@ -539,6 +603,11 @@ class QuidosSiteNotes(SiteNotesExtractor):
|
|||
"21.0 Wind Turbine",
|
||||
]
|
||||
self.two_columns_processor(data, sub_titles, avoid, 20)
|
||||
|
||||
sub_titles = [
|
||||
"PVs are connected to dwelling electricity" # PVs are connected to dwelling electricity meter
|
||||
]
|
||||
self.two_columns_processor(data, sub_titles, avoid, 20, 2)
|
||||
|
||||
def get_section_21(self):
|
||||
data = self.get_data_between("21.0 Wind Turbine","22.0 Other Details")
|
||||
|
|
@ -565,7 +634,7 @@ class QuidosSiteNotes(SiteNotesExtractor):
|
|||
self.two_columns_processor(data, sub_titles, avoid, 22)
|
||||
|
||||
|
||||
# Section 20 and 11, check results for 18 to 22
|
||||
# Section and 11
|
||||
# Extract
|
||||
# Transform ( wiht validation pydantnic)
|
||||
# Load
|
||||
Loading…
Add table
Reference in a new issue