Model/etl/customers/aiha/xml_extraction.py
Khalim Conn-Kowlessar 2eaf19c2bb minor
2024-11-15 14:56:00 +00:00

988 lines
38 KiB
Python

import os
from io import BytesIO
import pandas as pd
from etl.xml_survey_extraction.XmlParser import XmlParser
SURVEY_FOLDER_PATH = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/RESIDENT SURVEYS"
CONTINGENCY_RATE = 0.26
def sap_to_epc(sap_points: int | float):
"""
Simple utility function to convert SAP points to EPC rating.
:param sap_points: numerical value of SAP points, typically between 0 and 100
:return:
"""
if sap_points <= 0:
raise ValueError("SAP points should be above 0.")
if sap_points >= 92:
return "A"
elif sap_points >= 81:
return "B"
elif sap_points >= 69:
return "C"
elif sap_points >= 55:
return "D"
elif sap_points >= 39:
return "E"
elif sap_points >= 21:
return "F"
else:
return "G"
def main():
"""
This script handles the extraction of data from the XML files in the survey folders.
:return:
"""
# Step 1: List all subfolders inside SURVEY_FOLDER_PATH.
subfolders = [f.path for f in os.scandir(SURVEY_FOLDER_PATH) if f.is_dir()]
# Step 2: Loop through each subfolder and find the XML files.
extracted_surveys = []
for subfolder in subfolders:
print(f"Searching in subfolder: {subfolder}")
# Find all XML files in the current subfolder.
xml_files = [f for f in os.listdir(subfolder) if f.endswith('.xml')]
if not xml_files:
print(f"No XML files found in subfolder: {subfolder}")
continue
# If any XML files are found, perform the data extraction. We use the subfolder name as the survey key.
for xml_file in xml_files:
xml_path = os.path.join(subfolder, xml_file)
print(f"Processing XML file: {xml_path}")
# Read in the XML and parse it using the XmlParser class.
with open(xml_path, 'rb') as file:
xml_data_io = BytesIO(file.read())
uprn = None # Set the UPRN if available.
# Create an XmlParser instance
xml_parser = XmlParser(
file=xml_data_io,
filekey=xml_path,
surveyor_company="",
uprn=uprn,
)
# Run the parser to extract the data
xml_parser.run()
if not xml_parser.epc:
# If we don't have a lig xml
continue
# Store the extracted data for further processing
extracted_surveys.append({
"survey_key": subfolder.split("/")[-1],
**xml_parser.epc,
**xml_parser.additional_data
})
print(f"Extracted {len(extracted_surveys)} surveys.")
# Process the extracted_surveys as needed, for example, save to a database or write to a file.
extracted_surveys = pd.DataFrame(extracted_surveys)
# THis is the data we need for the AIHA project
measures_data = extracted_surveys[
["survey_key", "address", "postcode", "current-energy-efficiency", "current-energy-rating",
"number_of_floors", "walls-description", "property-type", "built-form"]
]
measures_data = measures_data.sort_values("survey_key", ascending=True)
measures_data.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/extracted_property_data.csv",
)
# Note:
# The properties will still have "Very poor" ratings for their hot water
# TODO
# - AIH001-03 has a loft that is inaccessible - ask Chenai about why this property didn't have access to the loft
# [Can't remember, not clear - Chenai will check]
# - AIH001-08 and AIH001-09, check if it's freehold - could solar work as both of these units are part of the same
# buulding [Question for Lewis & Kevin]
# - AIH001-09 - Is it not possible to install a loft hatch? [IT IS NOT, NO ACCESS - would need to accessed from
# the other unit]
# - AIH001-09 - Is there definitely an immersion water heater? Is this definitely the case for the other units?
# [Question for Lewis & Kevin] - [YES - ASHP!!!!]
# TODO: Check which properties are in a conservation area
# TODO: AIH001-16 - Is the loft insulation suitable (already has 100mm in the RIR)
# TODO: Adjust Archetype 14 homes to exclude double glazing? Or should we exclude entirely
recommended_measures = [
{
"survey_key": "AIH001-01",
"starting_sap": 69,
"recommended_measures": [],
"notes": "Is EPC C"
},
{
"survey_key": "AIH001-02",
"starting_sap": 65,
"recommended_measures": [
{
"measure": "Solar PV",
"description": "2.4kWp Solar PV system",
"config": [
{
"size": "2.4W",
"orientation": "Horizontal",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 7,
"ending_sap": 72,
"notes": "The array can be mounted on the flat roof, so that panels are south facing"
},
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 4,
"ending_sap": 76
}
],
},
{
"survey_key": "AIH001-03",
"starting_sap": 43,
"recommended_measures": [
{
"measure": "Cylinder Insulation",
"description": "80mm cylinder insulation",
"sap_points": 1,
"ending_sap": 44,
},
{
"measure": "Solar PV",
"description": "4kWp Solar PV system",
"config": [
{
"size": "4kWp",
"orientation": "East",
"elavation": 30,
"overshading": "None or little",
},
],
"sap_points": 10,
"ending_sap": 54
},
{
"measure": "Air Source Heat Pump",
"description": "Ecoforest ecoAIR EVI 4-20 20kW air source heat pump (+TTZC)",
"sap_points": 20,
"ending_sap": 74
},
{
"measure": "Tariff Review",
"description": "Switch to 24-hour tariff",
"sap_points": 15,
"ending_sap": 89
}
],
"notes": "Unclear if the loft is accessible"
},
{
"survey_key": "AIH001-04",
"starting_sap": 48,
"recommended_measures": [
{
"measure": "Flat Roof Insulation",
"description": "100mm flat roof insulation",
"floor_area": 39.1482, # based on area of top floor
"sap_points": 4,
"ending_sap": 52
},
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 3,
"ending_sap": 55
},
{
"measure": "Solar PV",
"description": "4kWp Solar PV system",
"config": [
{
"size": "4kWp",
"orientation": "South",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 15,
"ending_sap": 70
}
],
"notes": "Roof is flat, PV array should be installed south facing with elevation"
},
{
"survey_key": "AIH001-05",
"starting_sap": 54,
"recommended_measures": [
{
"measure": "Flat Roof Insulation",
"description": "100mm flat roof insulation",
"floor_area": 49.48, # based on area of top floor
"sap_points": 5,
"ending_sap": 59,
},
{
"measure": "Cylinder Insulation",
"description": "80mm cylinder insulation",
"sap_points": 2,
"ending_sap": 61,
},
{
"measure": "Solar PV",
"description": "4kWp Solar PV system",
"config": [
{
"size": "4kW",
"orientation": "Horizontal",
"elavation": 30,
"overshading": "Modest",
}
],
"sap_points": 9,
"ending_sap": 70
},
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 3,
"ending_sap": 73
}
],
"notes": ""
},
{
"survey_key": "AIH001-06",
"starting_sap": 62,
"recommended_measures": [
{
"measure": "Cylinder Insulation",
"description": "80mm cylinder insulation",
"sap_points": 2,
"ending_sap": 64,
},
{
"measure": "Solar PV",
"description": "2kWp Solar PV system",
"config": [
{
"size": "2kW",
"orientation": "South",
"elavation": 30,
"overshading": "Modest",
}
],
"sap_points": 6,
"ending_sap": 70
}
]
},
{
"survey_key": "AIH001-07",
"starting_sap": 74,
"recommended_measures": [],
"notes": "Is EPC C"
},
{
"survey_key": "AIH001-08",
"starting_sap": 56,
"recommended_measures": [
{
"measure": "Loft Insulation",
"description": "300mm loft insulation",
"floor_area": 54.2864, # Based on area of top floor
"sap_points": 2,
"ending_sap": 58,
},
{
"measure": "Cylinder Insulation",
"description": "80mm cylinder insulation",
"sap_points": 4,
"ending_sap": 62,
},
{
"measure": "Internal Wall Insulation",
"description": "100mm internal wall insulation",
"hlp": 24.13 * 2.63,
"sap_points": 7,
"ending_sap": 69,
},
{
"measure": "Ventilation",
"description": "2x DMEV fans",
"sap_points": 0,
"ending_sap": 69,
}
]
},
{
"survey_key": "AIH001-09",
"starting_sap": 44,
"recommended_measures": [
{
"measure": "Internal Wall Insulation",
"description": "100mm internal wall insulation",
"hlp": (22.35 * 3.24) + (22.13 * 2.53),
"sap_points": 8,
"ending_sap": 52,
},
{
"measure": "Cavity Wall Insulation",
"description": "CWI to rdSAP default standard",
"hlp": (2.68 * 2.39) + (5.93 * 2.63) + (6.13 * 2.39), # 1st & 2nd extension
"sap_points": 1,
"ending_sap": 53,
},
{
"measure": "Ventilation",
"description": "2x DMEV fans",
"sap_points": 0,
"ending_sap": 53,
},
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 3,
"ending_sap": 56,
},
{
"measure": "Solar PV",
"description": "1.6kWp Solar PV system",
"config": [
{
"size": "1.6W",
"orientation": "South-East",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 6,
"ending_sap": 62
},
{
"measure": "Loft Insulation",
"description": "300mm loft insulation",
"floor_area": 63.59 + 12.31, # Based on area of main building and 1st extension
"sap_points": 8,
"ending_sap": 70,
"notes": "Loft is inaccessible from this unit - would need to be accessed from the other unit, "
"which is also owned by AIHA"
}
],
"notes": "This property is a house split into 2 flats. We can install a PV array for both units (one array"
"per unit). Area on south-east part of roof is ~22m2 with no overshadowing. Flat roof area is 8m2"
"with modest overshadowing. We suggest a 3.2kWp system, across two units"
},
{
"survey_key": "AIH001-11",
"starting_sap": 59,
"recommended_measures": [
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 4,
"ending_sap": 63,
},
{
"measure": "Internal Wall Insulation",
"description": "100mm internal wall insulation",
"hlp": (18.50 * 3.12) + (19.00 * 2.75),
"sap_points": 5,
"ending_sap": 68,
},
{
"measure": "Cylinder Insulation",
"description": "80mm cylinder insulation",
"sap_points": 1,
"ending_sap": 69,
}
]
},
{
"survey_key": "AIH001-12",
"starting_sap": 46,
"recommended_measures": [
{
"measure": "Double Glazing",
"description": "Installation of double glazing",
"n_windows": 20, # Counted the bay windows each as 3
"windows_area": 10.66,
"sap_points": 3,
"ending_sap": 49,
},
# {
# "measure": "Solar PV",
# "description": "3.2kWp Solar PV system",
# "config": [
# {
# "size": "3.2W",
# "orientation": "East",
# "elavation": 30,
# "overshading": "Little or none",
# }
# ],
# "sap_points": 9,
# "ending_sap": 58
# },
{
"measure": "Air Source Heat Pump",
"description": "Ecoforest ecoAIR EVI 4-20 20kW air source heat pump (+TTZC)",
"sap_points": 15,
"ending_sap": 65
},
{
"measure": "Tariff Review",
"description": "Switch to 24-hour tariff",
"sap_points": 15,
"ending_sap": 80
}
]
},
{
"survey_key": "AIH001-13",
"starting_sap": 53,
"recommended_measures": [
{
"measure": "Roof Insulation",
"description": "100mm+ RIR insulation on all surfaces (ceiling u=0.16, walls u=0.3)",
"floor_area": 39.75, # based on the floor area of the RIR
"sap_points": 6,
"ending_sap": 59,
},
{
"measure": "Flat Roof Insulation",
"description": "100mm flat roof insulation",
"floor_area": 33.06, # Based on area of the extension
"sap_points": 2,
"ending_sap": 61,
},
{
"measure": "Cavity Wall Insulation",
"description": "CWI to rdSAP default standard",
"hlp": (35.40 * 2.65) + (26.70 * 2.73) + (16.30 * 2.71), # 1st & 2nd extension
"sap_points": 6,
"ending_sap": 67,
},
{
"measure": "Ventilation",
"description": "2x DMEV fans",
"sap_points": 0,
"ending_sap": 67,
},
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 2,
"ending_sap": 69,
},
{
"measure": "Solar PV",
"description": "4kWp Solar PV system",
"config": [
{
"size": "4kW",
"orientation": "Horizontal",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 9,
"ending_sap": 78
}
]
},
{
"survey_key": "AIH001-14",
"starting_sap": 63,
"recommended_measures": [
{
"measure": "Cavity Wall Insulation",
"description": "CWI to rdSAP default standard",
"hlp": (11.00 * 2.6) + (11.00 * 2.65) + (4.60 * 2.7),
"sap_points": 5,
"ending_sap": 68,
},
{
"measure": "Ventilation",
"description": "2x DMEV fans",
"sap_points": 0,
"ending_sap": 68,
},
{
"measure": "Loft Insulation",
"description": "300mm loft insulation", # Based on area of main building
"floor_area": 59.20,
"sap_points": 1,
"ending_sap": 69,
},
{
"measure": "Solar PV",
"description": "3.2kWp Solar PV system",
"sap_points": 10,
"ending_sap": 79,
}
]
},
{
"survey_key": "AIH001-15",
"starting_sap": 60,
"recommended_measures": [
{
"measure": "Loft Insulation",
"description": "300mm loft insulation",
"floor_area": 73.81, # Based on area of main building
"sap_points": 1,
"ending_sap": 61,
},
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 3,
"ending_sap": 64,
},
{
"measure": "Solar PV",
"description": "3.2kWp Solar PV system",
"config": [
{
"size": "3.2W",
"orientation": "North-West",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 7,
"ending_sap": 71,
"notes": "The array is North-west facing and therefore will be slightly less efficient than south"
"facing, however the impact is not so severe as to make the installation not worthwhile."
"Ground mounted"
}
]
},
{
"survey_key": "AIH001-16",
"starting_sap": 60,
"recommended_measures": [
{
"measure": "Cavity Wall Insulation",
"description": "CWI to rdSAP default standard",
"hlp": (21.56 * 2.60) + (26.79 * 2.8) + (6.74 * 2.60),
"sap_points": 4,
"ending_sap": 64,
},
{
"measure": "Ventilation",
"description": "2x DMEV fans",
"sap_points": 0,
"ending_sap": 64,
},
{
"measure": "Loft Insulation",
"description": "300mm loft insulation",
"floor_area": 20.92, # Based on floor area of RIR
"sap_points": 1,
"ending_sap": 65,
},
{
"measure": "Solar PV",
"description": "2.4kWp Solar PV system",
"config": [
{
"size": "2.4W",
"orientation": "South-East",
"elavation": 30,
"overshading": "Modest",
}
],
"sap_points": 5,
"ending_sap": 70,
}
]
},
{
"survey_key": "AIH001-17",
"starting_sap": 62,
"recommended_measures": [
{
"measure": "Cylinder Insulation",
"description": "80mm cylinder insulation",
"sap_points": 1,
"ending_sap": 63,
},
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 3,
"ending_sap": 66,
},
{
"measure": "Solar PV",
"description": "4kWp Solar PV system",
"config": [
{
"size": "3.2kW",
"orientation": "East",
"elavation": 30,
"overshading": "None or little",
},
{
"size": "0.8kW",
"orientation": "West",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 12,
"ending_sap": 78,
}
]
},
{
"survey_key": "AIH001-18",
"starting_sap": 58,
"recommended_measures": [
{
"measure": "Loft Insulation",
"description": "300mm loft insulation",
"floor_area": 37.52, # Based on area of main building and 1st extension
"sap_points": 7,
"ending_sap": 65,
},
{
"measure": "Cylinder Insulation",
"description": "80mm cylinder insulation",
"sap_points": 1,
"ending_sap": 66,
},
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 2,
"ending_sap": 68,
},
{
"measure": "Solar PV",
"description": "3.2kWp Solar PV system",
"config": [
{
"size": "3.2W",
"orientation": "North-East",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 7,
"ending_sap": 75,
}
],
},
{
"survey_key": "AIH001-19",
"starting_sap": 76,
"recommended_measures": []
},
{
"survey_key": "AIH001-20",
"starting_sap": 82,
"recommended_measures": []
},
{
"survey_key": "AIH001-21",
"starting_sap": 53,
"recommended_measures": [
{
"measure": "Cylinder Insulation",
"description": "80mm cylinder insulation",
"sap_points": 2,
"ending_sap": 55,
},
{
"measure": "Roof Insulation",
"description": "100mm+ RIR insulation on all surfaces (ceiling u=0.16, walls u=0.3)",
"floor_area": 22.80, # Based on floor area of RIR
"sap_points": 7,
"ending_sap": 62,
},
{
"measure": "Solar PV",
"description": "2.4kWp Solar PV system",
"config": [
{
"size": "1.6kWp",
"orientation": "Horizontal",
"elavation": 30,
"overshading": "None or little",
},
{
"size": "0.8kWp",
"orientation": "South-East",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 9,
"ending_sap": 71,
},
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 3,
"ending_sap": 74,
}
]
},
{
"survey_key": "AIH001-SIMULATED-01",
"elmhurst_reference": "000020",
"starting_sap": None,
"recommended_measures": [
{
"measure": "Internal Wall Insulation",
"description": "100mm internal wall insulation",
"hlp": (22.35 * 3.24) + (22.13 * 2.53),
"sap_points": 8,
"ending_sap": 52,
},
{
"measure": "Cavity Wall Insulation",
"description": "CWI to rdSAP default standard",
"hlp": (2.68 * 2.39) + (5.93 * 2.63) + (6.13 * 2.39), # 1st & 2nd extension
"sap_points": 1,
"ending_sap": 53,
},
{
"measure": "Ventilation",
"description": "2x DMEV fans",
"sap_points": 0,
"ending_sap": 53,
},
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 3,
"ending_sap": 56,
},
{
"measure": "Solar PV",
"description": "1.6kWp Solar PV system",
"config": [
{
"size": "1.6W",
"orientation": "South-East",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 6,
"ending_sap": 62
},
{
"measure": "Loft Insulation",
"description": "300mm loft insulation",
"floor_area": 63.59 + 12.31, # Based on area of main building and 1st extension
"sap_points": 8,
"ending_sap": 70,
"notes": "Loft is inaccessible from this unit - would need to be accessed from the other unit, "
"which is also owned by AIHA"
}
],
"notes": "This was cloned from 80A. There is no existing data for 80B"
},
{
"survey_key": "AIH001-SIMULATED-05",
"starting_sap": 68,
"recommended_measures": [
{
"measure": "Loft Insulation",
"description": "300mm loft insulation",
"floor_area": 42.5,
"sap_points": 1,
"ending_sap": 69,
},
{
"measure": "Solar PV",
"description": "3.2kWp Solar PV system",
"config": [
{
"size": "3.2W",
"orientation": "North-East",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 8,
"ending_sap": 77,
}
]
}
]
scaffolding_data = [
{
"number_of_floors": 2,
"price": 841,
},
{
"number_of_floors": 3,
"price": 1077,
}
]
# TODO - Need an update cost for cylinder insulation
pricing_data = [
{'item': '80mm cylinder insulation', 'unit_price': 50, 'unit': 'unit'},
{'item': '100mm internal wall insulation', 'unit_price': 244.8, 'unit': 'hlp_m2'},
{'item': 'CWI to rdSAP default standard', 'unit_price': 14.21, 'unit': 'hlp_m2'},
{'item': 'Window draught proofing improvements', 'unit_price': 63, 'unit': 'window'},
{'item': '100mm flat roof insulation', 'unit_price': 195, 'unit': 'floor_m2'},
{'item': 'Switch to 24-hour tariff', 'unit_price': 0, 'unit': None},
{'item': 'Installation of double glazing', 'unit_price': 1074, 'unit': 'window'},
{'item': 'Ecoforest ecoAIR EVI 4-20 20kW air source heat pump (+TTZC)', 'unit_price': 21189 + 1200,
'unit': 'unit'},
{'item': '100mm+ RIR insulation on all surfaces (ceiling u=0.16, walls u=0.3)', 'unit_price': 244.80,
'unit': 'floor_m2'},
{'item': '300mm loft insulation', 'unit_price': 16.07, 'unit': 'floor_m2'},
{'item': 'Smart Thermostat', 'unit_price': 1200, 'unit': 'unit'},
{'item': '2x DMEV fans', 'unit_price': 1070, 'unit': 'unit'},
{'item': '1.6kWp Solar PV system', 'unit_price': 3040, 'unit': 'unit_needs_scaffolding'},
{'item': '2kWp Solar PV system', 'unit_price': 3201, 'unit': 'unit_needs_scaffolding'},
{'item': '2.4kWp Solar PV system', 'unit_price': 3363, 'unit': 'unit_needs_scaffolding'},
{'item': '3.2kWp Solar PV system', 'unit_price': 3686, 'unit': 'unit_needs_scaffolding'},
{'item': '4kWp Solar PV system', 'unit_price': 4009, 'unit': 'unit_needs_scaffolding'},
{'item': '5.6kWp Solar PV system', 'unit_price': 5015, 'unit': 'unit_needs_scaffolding'},
]
pricing_data = pd.DataFrame(pricing_data)
for recommendation in recommended_measures:
property_data = measures_data[measures_data["survey_key"] == recommendation["survey_key"]].squeeze()
total_cost = 0
for measure in recommendation["recommended_measures"]:
measure_pricing = pricing_data[pricing_data["item"] == measure["description"]]
measure_unit = measure_pricing["unit"].values[0]
if measure_unit in ["unit", None]:
measure_cost = float(measure_pricing["unit_price"].values[0])
elif measure_unit == "unit_needs_scaffolding":
n_floors = property_data["number_of_floors"]
scaffolding_cost = [x for x in scaffolding_data if x["number_of_floors"] == n_floors][0]["price"]
measure_cost = float(measure_pricing["unit_price"].values[0]) + scaffolding_cost
elif measure_unit == "floor_m2":
measure_cost = float(measure_pricing["unit_price"].values[0]) * measure["floor_area"]
elif measure_unit == "hlp_m2":
measure_cost = float(measure_pricing["unit_price"].values[0]) * measure["hlp"]
elif measure_unit == "window":
measure_cost = float(measure_pricing["unit_price"].values[0]) * measure["n_windows"]
else:
raise Exception("Unknown unit type")
measure["Total Cost"] = measure_cost
total_cost += measure_cost
recommendation["total_cost"] = total_cost
# Step 1: Normalize the recommended_measures data into a DataFrame.
normalized_measures = []
for survey in recommended_measures:
survey_key = survey["survey_key"]
starting_sap = survey["starting_sap"]
total_cost = survey.get("total_cost", 0)
for measure in survey.get("recommended_measures", []):
# Include hlp and floor_area for each measure if available
hlp = measure.get("hlp", None)
floor_area = measure.get("floor_area", None)
normalized_measures.append({
"survey_key": survey_key,
"hlp": hlp,
"floor_area": floor_area,
"starting_sap": starting_sap,
"measure": measure["measure"],
"description": measure.get("description", ""),
"sap_points": measure.get("sap_points", 0),
"measure_cost": measure.get("Total Cost", 0),
"total_cost": total_cost
})
# Convert the normalized list into a DataFrame.
measures_df = pd.DataFrame(normalized_measures)
# Step 2: Pivot the measures_df to have a column for each measure type, using the description as values.
pivoted_measures = measures_df.pivot_table(
index="survey_key",
columns="measure",
values="description",
aggfunc=lambda x: ' '.join(x), # Concatenate descriptions if there are multiple entries.
fill_value=None
).reset_index()
measures_columns = [x for x in pivoted_measures.columns if x not in ["survey_key"]]
# We add a "Cost of" column for each measure
for measure in measures_columns:
pivoted_measures[f"Cost of {measure}"] = None
pivoted_floor_area = measures_df.pivot_table(
index="survey_key",
columns="measure",
values="floor_area",
aggfunc="first" # Use 'first' since each measure should only appear once per survey_key
).add_prefix("floor_area - ").reset_index()
pivoted_hlp = measures_df.pivot_table(
index="survey_key",
columns="measure",
values="hlp",
aggfunc="first"
).add_prefix("hlp - ").reset_index()
# Merge hlp and floor_area data
pivoted_measures = pivoted_measures.merge(pivoted_hlp, on="survey_key", how="left")
pivoted_measures = pivoted_measures.merge(pivoted_floor_area, on="survey_key", how="left")
# Step 3: Calculate the total sap points and total cost for each survey.
totals = measures_df.groupby("survey_key").agg(
total_sap_points=("sap_points", "sum"),
).reset_index()
# Merge total sap points into the pivoted measures.
pivoted_measures = pd.merge(pivoted_measures, totals, on="survey_key", how="left")
# pivoted_measures["Cost Contingency"] = pivoted_measures["total_cost_of_measures"] * CONTINGENCY_RATE
# pivoted_measures["Total Cost"] = pivoted_measures["total_cost_of_measures"] + pivoted_measures["Cost Contingency"]
# Step 4: Extract starting SAP for each survey key.
starting_sap_df = measures_df.drop_duplicates(subset=["survey_key"])[["survey_key", "starting_sap"]]
# Merge starting SAP back onto pivoted measures.
result_df = pd.merge(pivoted_measures, starting_sap_df, on="survey_key", how="left")
# Step 5: Calculate the ending SAP.
result_df["Ending SAP"] = result_df["starting_sap"] + result_df["total_sap_points"]
result_df["Ending EPC Rating"] = result_df["Ending SAP"].apply(sap_to_epc)
# Step 6: Merge the result with the measures_data to get the final DataFrame.
final_measures = measures_data.merge(
result_df, how="left", on="survey_key"
)
final_measures.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/Measures packages.csv")
# Store costs
pricing_data.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/Pricing data.csv")
# if __name__ == "__main__":
# main()