windows scraping is working

This commit is contained in:
Jun-te Kim 2025-03-13 10:01:59 +00:00
parent c5b8143eab
commit bbaddbefa2
2 changed files with 80 additions and 11 deletions

View file

@ -17,8 +17,8 @@ doc1 = pdfReader2.get_reader()
vars(doc1)
def main():
south_coast_scraper = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION_SERVICE, development = True)
south_coast_scraper.download_file_for_each_address()
# south_coast_scraper = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION_SERVICE, development = True)
# south_coast_scraper.download_file_for_each_address()
if __name__ == "__main__":
main()

View file

@ -36,7 +36,7 @@ class QuidosSiteNotes(SiteNotesExtractor):
self.get_section_8()
self.get_section_9()
self.get_section_10()
# self.get_section_11()
self.get_section_11()
self.get_section_12()
self.get_section_13()
self.get_section_14()
@ -359,11 +359,11 @@ class QuidosSiteNotes(SiteNotesExtractor):
]
self.two_columns_processor(data, sub_titles, avoid, 10)
def two_columns_processor(self, data, sub_titles_to_gather, avoid, section):
def two_columns_processor(self, data, sub_titles_to_gather, avoid, section, indexAdd = 1):
def get_value(key):
try:
index = data.index(key)
value = data[index + 1]
value = data[index + indexAdd]
return None if value in avoid else value
except (ValueError, IndexError):
return None
@ -375,7 +375,66 @@ class QuidosSiteNotes(SiteNotesExtractor):
setattr(self, f"section_{section}_{items.lower().replace('-', '_').replace(' ','_')}", get_value(items))
def get_section_11(self):
raise RuntimeError("Please complete me")
data = self.get_data_between("Window Location", "12.0 Ventilation & Cooling")
headers = data[:8]
data_entries = data[8:]
num_attributes = 5
subtitles=[
"Main Property",
"Extension 1",
"Extension 2",
"Extension 3",
"Extension 4",
]
orientation = [
"north",
"east",
"west",
"south",
"n",
"w",
"s",
"e",
"nw",
"ne",
"sw",
"se",
"south west",
"south east",
"north west",
"north east",
]
def find_compose_index(lst, compose):
for i, item in enumerate(lst):
if item.lower() in compose:
return i
return None
title = None
until = 0
for i, items in enumerate(data_entries):
if data_entries[i] in subtitles:
title = data_entries[i].lower().replace(" ", "_").replace("-", "_")
setattr(self, f"section_11_{title}_window", [])
if title and until == i:
entry = data_entries[i:]
index = find_compose_index(entry,orientation)
new_entry = entry[index-3:index+3]
dict_ = {
"glazing type": new_entry[0],
"Area (m2)": new_entry[1],
"Roof Window": new_entry[2],
"Orientation": new_entry[3],
"U-value (W/m²K)": new_entry[4],
"g-value": new_entry[5],
}
lst = getattr(self, f"section_11_{title}_window")
lst.append(dict_)
until = index + 3 + i
def get_section_12(self):
data = self.raw_data[self.raw_data.index('12.0 Ventilation & Cooling'): self.raw_data.index('13.0 Lighting')]
@ -505,8 +564,6 @@ class QuidosSiteNotes(SiteNotesExtractor):
data = self.get_data_between("18.0 Showers And Baths", "19.0 Flue Gas Heat Recovery System")
sub_titles = [
"Number of Rooms with Bath and/or Shower",
"Number of Rooms with Mixer Shower and no Bath",
"Number of Rooms with Mixer Shower and Bath",
]
avoid = [
"18.0 Showers And Baths",
@ -514,6 +571,15 @@ class QuidosSiteNotes(SiteNotesExtractor):
]
self.two_columns_processor(data, sub_titles, avoid, 18.0)
avoid = [
"18.0 Showers And Baths",
"19.0 Flue Gas Heat Recovery System",
]
sub_titles = [
"Number of Rooms with Mixer Shower and no", # Number of Rooms with Mixer Shower and no Bath
"Number of Rooms with Mixer Shower and", # Number of Rooms with Mixer Shower and Bath
]
self.two_columns_processor(data, sub_titles, avoid, 18.0, 2)
def get_section_19(self):
data = self.get_data_between("19.0 Flue Gas Heat Recovery System","20.0 Photovoltaic Panel")
@ -529,9 +595,7 @@ class QuidosSiteNotes(SiteNotesExtractor):
def get_section_20(self):
data = self.get_data_between("20.0 Photovoltaic Panel","21.0 Wind Turbine")
print(data)
sub_titles = [
"PVs are connected to dwelling electricity"
"Percentage of External Roof Area with PVs"
]
avoid = [
@ -539,6 +603,11 @@ class QuidosSiteNotes(SiteNotesExtractor):
"21.0 Wind Turbine",
]
self.two_columns_processor(data, sub_titles, avoid, 20)
sub_titles = [
"PVs are connected to dwelling electricity" # PVs are connected to dwelling electricity meter
]
self.two_columns_processor(data, sub_titles, avoid, 20, 2)
def get_section_21(self):
data = self.get_data_between("21.0 Wind Turbine","22.0 Other Details")
@ -565,7 +634,7 @@ class QuidosSiteNotes(SiteNotesExtractor):
self.two_columns_processor(data, sub_titles, avoid, 22)
# Section 20 and 11, check results for 18 to 22
# Section and 11
# Extract
# Transform ( wiht validation pydantnic)
# Load