working on proposed sample for stonewater

This commit is contained in:
Khalim Conn-Kowlessar 2024-10-30 20:30:05 +00:00
parent bccf3c621b
commit 7e26fb4b86

View file

@ -486,7 +486,7 @@ def extract_epr(pdf_path):
data["Postcode"] = data["Address"].split(",")[-1].strip()
# Extract Current and Potential SAP ratings
sap_match = re.search(r"GG \(1-20\)(\d{1,2})(\d{1,2})", text)
sap_match = re.search(r"GG \(1-20\)\s*(\d{1,2})\s*(\d{1,2})", text)
current_sap, _ = int(sap_match.group(1)), int(sap_match.group(2))
data["Current SAP Rating"] = current_sap
@ -896,7 +896,6 @@ def main():
# Find Osmosis IDs that are in the packages board but not in the matching looking
missing_ids = set(retrofit_packages_board["Address ID"]) - set(matching_lookup["Address ID"])
missing_ids = list(missing_ids)
print(len(missing_ids))
if missing_ids:
# We check that the missing ids have no data yet
if len(missing_ids) != 8:
@ -937,6 +936,7 @@ def main():
"Actual SAP Rating",
"Modelled SAP Band",
"Modelled SAP Rating",
"Package Ref",
] + measure_columns
],
on=["Address ID", "Name"],
@ -995,7 +995,206 @@ def main():
if stonewater_data["Address ID"].duplicated().sum():
raise Exception("Duplicate Address IDs")
# Save this data to excel
stonewater_data.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - costed retrofit packages.xlsx", index=False)
cost_sheet = [
{
"measure": "EWI 0.30 w.m2.K", "cost": 298.35, "unit": "m2"
},
{
"measure": "CWI RdSAP Default", "cost": 14.21, "unit": "m2"
},
{
"measure": "Poss Extract CWI & Refill (issues identified)", "cost": 14.21 + 25, "unit": "m2"
},
{
"measure": "IWI 0.30 w.m2.K", "cost": 244.80, "unit": "m2"
},
{
"measure": "EWI/IWI 0.3", "cost": (298.35 + 244.8) / 2, "unit": "m2"
},
{
"measure": "Loft Insulation 0.11 w.m2.K", "cost": 16.07, "unit": "m2"
},
{
"measure": "Flat Roof 0.11 w.m2.K", "cost": 195, "unit": "m2"
},
{
"measure": "DG Window 1.30 w.m2.K", "cost": 1140, "unit": "each"
},
{
"measure": "Secondary 2.40", "cost": 974, "unit": "each"
},
{
"measure": "Ins. Door 1.30 w.m2.K", "cost": None, "unit": "each"
},
{
"measure": "Ins. Door 1.40 w.m2.K", "cost": None, "unit": "each"
},
{
"measure": "DMEV", "cost": 900, "unit": "each"
},
{
"measure": "ASHP Vaillant 102607 5kw", "cost": None, "unit": "each"
},
{
"measure": "HHRSH Quantum 150", "cost": None, "unit": "each"
},
{
"measure": "Dual Stat Tank 210lt 50mm Foam", "cost": None, "unit": "each"
},
{
"measure": "Dual Stat Tank 160lt 50mm Foam", "cost": None, "unit": "each"
},
{
"measure": "Dual Stat Tank 110lt 50mm Foam", "cost": None, "unit": "each"
},
{
"measure": "Smart Thermostat", "cost": 1200, "unit": "each"
},
{
"measure": "TRV's", "cost": 350, "unit": "each"
},
{
"measure": "Solar PV - 3.0kwp", "cost": 4365.0, "unit": "each"
},
{
"measure": "Solar PV - 1.5kwp", "cost": 3881, "unit": "each"
},
{
"measure": "LEL", "cost": 35, "unit": "per bulb"
},
{
"measure": "Roof 0.16 - Walls 0.30", "cost": 180, "unit": "floor area m2"
},
{
"measure": "Roof 0.16 - Walls 0.16", "cost": 180, "unit": "floor area m2"
},
]
cost_sheet = pd.DataFrame(cost_sheet)
# Save cost sheet - ideally this will be used as a secondary sheet for Stonewater
cost_sheet.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - cost sheet.xlsx", index=False)
stonewater_data["Room in Roof"].value_counts()
# stonewater_data[~pd.isnull(stonewater_data["Room in Roof"])]["survey_folder"].values
create_proposed_wave_3_bid(
costed_packages_filepath=os.path.join(
CUSTOMER_FOLDER_PATH, "Stonewater - Costed Retrofit Packages 20241030 (WIP).xlsx"
),
archetypes_sheet_filepath=os.path.join(
CUSTOMER_FOLDER_PATH, "Stonewater SHDF_3_0_Board Triage 22.05.24 - Archetyped V3.1.xlsx"
)
)
def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepath):
# We read in the costed packages
costed_packages = pd.read_excel(costed_packages_filepath)
archetypes_to_cost = costed_packages[
[
"Name", "Address ID", "Archetype ID", "Current SAP Rating", "Current EPC Band", "Modelled SAP Band",
"Modelled SAP Rating", 'Total Cost of Measures', 'Contingency Cost',
'Total Cost of Measures inc Contingency'
]
].copy()
# We take properties that are EPC D and below (61% of units)
archetypes_to_cost = archetypes_to_cost[archetypes_to_cost["Current EPC Band"].isin(["D", "E", "F", "G"])]
archetypes_to_cost["Has been modelled"] = ~pd.isnull(archetypes_to_cost["Modelled SAP Band"])
average_cost = archetypes_to_cost[
archetypes_to_cost["Has been modelled"]
]['Total Cost of Measures inc Contingency'].mean()
print(average_cost)
# These are the Arhetypes that will likely be suitable for Wave 3
archetypes_sheet = pd.read_excel(archetypes_sheet_filepath, header=4)
archetypes_sheet = archetypes_sheet[~pd.isnull(archetypes_sheet["Address ID"])]
archetypes_sheet = archetypes_sheet[archetypes_sheet["Address ID"] != "Address ID"]
archetypes_sheet["Address ID"] = archetypes_sheet["Address ID"].astype(int)
# We merge the property details onto the costed archetypes
archetypes_to_cost = archetypes_to_cost.merge(
archetypes_sheet[["Address ID", "Property Type", "Wall Type", "Roof Type", "Heating"]],
on="Address ID",
how="left"
)
proposed_sample = archetypes_sheet[archetypes_sheet["Archetype ID"].isin(archetypes_to_cost["Archetype ID"])]
proposed_sample = proposed_sample[
[
"Name", "Postcode", "UPRN", "UDPRN", "Address ID", "Osm. ID", "Archetype ID",
"Property Type", "Wall Type", "Roof Type", "Heating"
]
]
# We classify into high and low confidence
match_classification = []
for _, home in tqdm(proposed_sample.iterrows(), total=len(proposed_sample)):
surveyed = archetypes_to_cost[archetypes_to_cost["Archetype ID"] == home["Archetype ID"]]
# We now check if we have a perfect match
surveyed = surveyed[
(surveyed["Property Type"] == home["Property Type"]) &
(surveyed["Wall Type"] == home["Wall Type"]) &
(surveyed["Roof Type"] == home["Roof Type"]) &
(surveyed["Heating"] == home["Heating"])
]
if surveyed.empty:
match_classification.append(
{
"Address ID": home["Address ID"],
"Match to Surveyed": "Approximate"
}
)
continue
match_classification.append(
{
"Address ID": home["Address ID"],
"Match to Surveyed": "Exact"
}
)
match_classification = pd.DataFrame(match_classification)
proposed_sample = proposed_sample.merge(
match_classification,
on="Address ID",
how="left",
)
# Merge on the cost per archetype
cost_per_archetype = (
archetypes_to_cost.groupby("Archetype ID")[['Total Cost of Measures inc Contingency']].mean().reset_index()
)
proposed_sample = proposed_sample.merge(
cost_per_archetype,
on="Archetype ID",
how="left"
)
# We add on a boolean to indicate if a property from that archetype has been modelled
proposed_sample = proposed_sample.merge(
archetypes_to_cost.groupby("Archetype ID")[["Has been modelled"]].any().reset_index(),
on="Archetype ID",
how="left"
)
proposed_sample["Total Cost of Measures inc Contingency"] = np.where(
~proposed_sample["Has been modelled"],
None, proposed_sample["Total Cost of Measures inc Contingency"]
)
# Save excel
proposed_sample.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - Proposed Wave 3 Bid (WIP).xlsx", index=False)
# if __name__ == "__main__":
# main()