handling different format of surveyed windows

This commit is contained in:
Khalim Conn-Kowlessar 2024-10-23 11:45:57 +01:00
parent 7cf7a8aa61
commit 362e657ab5
2 changed files with 92 additions and 2 deletions

View file

@ -0,0 +1,60 @@
import os
from io import BytesIO
from etl.xml_survey_extraction.XmlParser import XmlParser
SURVEY_FOLDER_PATH = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/RESIDENT SURVEYS"
def main():
"""
This script handles the extraction of data from the XML files in the survey folders.
:return:
"""
# Step 1: List all subfolders inside SURVEY_FOLDER_PATH.
subfolders = [f.path for f in os.scandir(SURVEY_FOLDER_PATH) if f.is_dir()]
# Step 2: Loop through each subfolder and find the XML files.
extracted_surveys = []
for subfolder in subfolders:
print(f"Searching in subfolder: {subfolder}")
# Find all XML files in the current subfolder.
xml_files = [f for f in os.listdir(subfolder) if f.endswith('.xml')]
if not xml_files:
raise FileNotFoundError(f"No XML files found in subfolder: {subfolder}")
# If any XML files are found, perform the data extraction. We use the subfolder name as the survey key.
for xml_file in xml_files:
xml_path = os.path.join(subfolder, xml_file)
print(f"Processing XML file: {xml_path}")
# Read in the XML and parse it using the XmlParser class.
with open(xml_path, 'rb') as file:
xml_data_io = BytesIO(file.read())
uprn = None # Set the UPRN if available.
# Create an XmlParser instance
xml_parser = XmlParser(
file=xml_data_io,
filekey=xml_path,
surveyor_company="",
uprn=uprn,
)
# Run the parser to extract the data
xml_parser.run()
# Store the extracted data for further processing
extracted_surveys.append({
"epc": xml_parser.epc,
"additional_data": xml_parser.additional_data,
"subfolder": subfolder
})
print(f"Extracted {len(extracted_surveys)} surveys.")
# Process the extracted_surveys as needed, for example, save to a database or write to a file.
if __name__ == "__main__":
main()

View file

@ -769,8 +769,6 @@ class XmlParser:
:return:
"""
sap_windows = self.xml.getElementsByTagName("SAP-Windows")[0].getElementsByTagName("SAP-Window")
glazing_type_lookup = {
"3": "double glazing, unknown install date",
"5": "Single glazing",
@ -787,6 +785,38 @@ class XmlParser:
"8": "North West"
}
sap_windows = self.xml.getElementsByTagName("SAP-Windows")
if not sap_windows:
# We look for Multi-Glazed-Proportion
multiple_glazing_type = self.xml.getElementsByTagName("SAP-Property-Details")[0].getElementsByTagName(
"Multiple-Glazing-Type"
)[0].firstChild.nodeValue
pvc_frame = self.xml.getElementsByTagName("SAP-Property-Details")[0].getElementsByTagName(
"PVC-Window-Frames"
)[0].firstChild.nodeValue
multple_glazed_proportion = self.xml.getElementsByTagName("SAP-Property-Details")[0].getElementsByTagName(
"Multiple-Glazed-Proportion"
)[0].firstChild.nodeValue
self.windows = [
{
"window_location": None,
"window_area": None,
"window_type": None,
"glazing_type": glazing_type_lookup[multiple_glazing_type],
"pvc_frame": pvc_frame,
"glazing_gap": None,
"orientation": None,
"multple_glazed_proportion": multple_glazed_proportion
}
]
return
sap_windows = sap_windows[0].getElementsByTagName("SAP-Window")
self.windows = [
self._parse_windows_content(
window=window,