debugged get_property_type from xml extractor

This commit is contained in:
Khalim Conn-Kowlessar 2024-09-04 11:33:48 +01:00
parent 64b423ad2e
commit 32cdd70b71
2 changed files with 91 additions and 18 deletions

View file

@ -103,14 +103,60 @@ async def upload(body: EnergyAssessmentUploadPayload):
uprn = int(assessment.rstrip("/").split("/")[-1])
assessments_map[uprn] = {
"xmls": uploaded_xmls,
"eprs": eprs,
"condition_reports": condition_reports,
"evidence_reports": evidence_reports,
"summary_reports": summary_reports,
"floor_plans": floor_plans
"EPR": eprs,
"Condition Report": condition_reports,
"Evidence Report": evidence_reports,
"Summary Information": summary_reports,
"Floor PLan": floor_plans
}
logger.info(f"Exatracted XMLS for the energy assessments")
logger.info("Extracted energy assessment data and storing file locations to database")
xml_data_to_store = []
energy_assessment_documents = []
for uprn, files in assessments_map.items():
# Create the rows of data to insert into the energy assessment documents
property_ea_docs = []
for doc_type, doc_files in files.items():
if doc_type == "xmls":
continue
property_ea_docs.append(
{
"uprn": uprn,
"document_type": doc_type,
"document_location": doc_files
}
)
energy_assessment_documents.extend(property_ea_docs)
xmls = files["xmls"]
extracted_data = {}
for xml in xmls:
xml_data = read_from_s3(bucket_name=get_settings().ENERGY_ASSESSMENTS_BUCKET, s3_file_name=xml)
xml_data_io = BytesIO(xml_data)
xml_parser = XmlParser(
file=xml_data_io,
filekey=os.path.join(f"s3://{get_settings().ENERGY_ASSESSMENTS_BUCKET}", xml),
uprn=uprn,
surveyor_company=body.surveyor,
)
xml_parser.run()
if xml_parser.is_lig:
logger.info(f"Extracted data from {xml}")
extracted_epc = xml_parser.epc
extracted_additional_data = xml_parser.additional_data
data_to_update = {
**extracted_epc, **extracted_additional_data
}
# We need to update the keys to match the database schema - i.e. we should replace all hyphens with
# underscores
data_to_update = {k.replace("-", "_"): v for k, v in data_to_update.items()}
extracted_data.update(data_to_update)
xml_data_to_store.append(extracted_data)
except IntegrityError:
logger.error("Database integrity error occurred", exc_info=True)

View file

@ -9,6 +9,7 @@ from etl.xml_survey_extraction.pcdb import heating_data
PROPERTY_TYPE_LOOKUP = {
"0": "House",
"House": "House",
"2": "Flat"
}
@ -471,6 +472,13 @@ class XmlParser:
if not property_type:
property_type = self.xml.getElementsByTagName('PropertyType1')
if len(property_type) > 1:
property_types = {PROPERTY_TYPE_LOOKUP[p.firstChild.nodeValue] for p in property_type}
if len(property_types) > 1:
raise ValueError("Multiple property types found")
return property_types.pop()
return PROPERTY_TYPE_LOOKUP[property_type[0].firstChild.nodeValue]
def get_sap(self):
@ -683,6 +691,30 @@ class XmlParser:
self.perimeter = self.heat_loss_perimeter + self.party_wall_length
@staticmethod
def _parse_windows_content(window, glazing_type_lookup, orientation_lookup):
# There may not be a pvc frame
pvc_frame = window.getElementsByTagName("PVC-Frame")
pvc_frame = pvc_frame[0].firstChild.nodeValue if pvc_frame else None
# There may not be a glazing gap for single glazed windows
glazing_gap = window.getElementsByTagName("Glazing-Gap")
glazing_gap = glazing_gap[0].firstChild.nodeValue if glazing_gap else None
parsed = {
"window_location": window.getElementsByTagName("Window-Location")[0].firstChild.nodeValue,
"window_area": window.getElementsByTagName("Window-Area")[0].firstChild.nodeValue,
"window_type": window.getElementsByTagName("Window-Type")[0].firstChild.nodeValue,
"glazing_type": glazing_type_lookup[
window.getElementsByTagName("Glazing-Type")[0].firstChild.nodeValue
],
"pvc_frame": pvc_frame,
"glazing_gap": glazing_gap,
"orientation": orientation_lookup[window.getElementsByTagName("Orientation")[0].firstChild.nodeValue]
}
return parsed
def get_windows(self):
"""
Extracts data about the windows in the property, including the number of windows and the window type.
@ -692,7 +724,8 @@ class XmlParser:
sap_windows = self.xml.getElementsByTagName("SAP-Windows")[0].getElementsByTagName("SAP-Window")
glazing_type_lookup = {
"3": "double glazing, unknown install date"
"3": "double glazing, unknown install date",
"5": "Single glazing",
}
orientation_lookup = {
@ -707,15 +740,9 @@ class XmlParser:
}
self.windows = [
{
"window_location": window.getElementsByTagName("Window-Location")[0].firstChild.nodeValue,
"window_area": window.getElementsByTagName("Window-Area")[0].firstChild.nodeValue,
"window_type": window.getElementsByTagName("Window-Type")[0].firstChild.nodeValue,
"glazing_type": glazing_type_lookup[
window.getElementsByTagName("Glazing-Type")[0].firstChild.nodeValue
],
"pvc_frame": window.getElementsByTagName("PVC-Frame")[0].firstChild.nodeValue,
"glazing_gap": window.getElementsByTagName("Glazing-Gap")[0].firstChild.nodeValue,
"orientation": orientation_lookup[window.getElementsByTagName("Orientation")[0].firstChild.nodeValue]
} for window in sap_windows
self._parse_windows_content(
window=window,
glazing_type_lookup=glazing_type_lookup,
orientation_lookup=orientation_lookup
) for window in sap_windows
]