mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
192 lines
7 KiB
Python
192 lines
7 KiB
Python
"""
|
|
12th April 2025
|
|
This script attempts to clean up the various pieces of data we have for Bromford, with the intention of producing a
|
|
standardised asset list
|
|
"""
|
|
|
|
import pandas as pd
|
|
|
|
# Step 1
|
|
# The inspectons data is spread across three different files. We attempt to produce one finalised asset list, with
|
|
# comprehensive inspections
|
|
|
|
# Primary asset list
|
|
asset_list = pd.read_excel(
|
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Bromford Asset "
|
|
"List.xlsx",
|
|
sheet_name="Asset List"
|
|
)
|
|
|
|
#
|
|
inspections_1 = pd.read_excel(
|
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Inspections/BROMFORD "
|
|
"MDS.xlsx",
|
|
sheet_name="Data list"
|
|
)
|
|
inspections_1["Heating Type"] = (inspections_1["Heating Type"] + " " + inspections_1["Heating fuel"]).str.strip()
|
|
|
|
inspections_2 = pd.read_excel(
|
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Inspections/BROMFORD "
|
|
"MERLIN LANE.xlsx",
|
|
sheet_name="Report"
|
|
)
|
|
inspections_2["AssetTypeDesc"] = inspections_2["PropertyType"].str.split(" ").str[-1]
|
|
inspections_2["PropTypeDesc"] = inspections_2["PropertyType"].str.split(" ").str[:-1].str.join(" ")
|
|
|
|
inspections_3 = pd.read_excel(
|
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Inspections/BROMFORD "
|
|
"SEVERN VALE - KLARKE.xlsx",
|
|
sheet_name="Asset report"
|
|
)
|
|
|
|
inspections_3["FullAddress"] = inspections_3["T1_Address1"] + ", " + inspections_3["T1_Address2"]
|
|
|
|
# On inspections 3, we have multiple sheets which describe the heating
|
|
heating_systems = []
|
|
for sheet_name in [
|
|
"Storage Heaters", "No Heating", "Underfloor Heating", "Rointe Electric Heating", "Air Source Heating",
|
|
"Gas Central Heating", "Electric Boiler", "Oil Fired Central Heating",
|
|
"Communal Boilers", "Panel Heaters"
|
|
]:
|
|
df = pd.read_excel(
|
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme "
|
|
"Rebuild/Inspections/BROMFORD "
|
|
"SEVERN VALE - KLARKE.xlsx",
|
|
sheet_name=sheet_name
|
|
)
|
|
df = df[["UPRN"]]
|
|
df["Heating Type"] = sheet_name
|
|
heating_systems.append(df)
|
|
|
|
heating_systems = pd.concat(heating_systems)
|
|
# We have no clue which one is correct, we have some dupes
|
|
heating_systems = heating_systems.drop_duplicates("UPRN")
|
|
heating_systems = heating_systems.rename(columns={"UPRN": "Asset"})
|
|
heating_systems["Asset"] = heating_systems["Asset"].astype(int)
|
|
|
|
inspections_3 = inspections_3.merge(heating_systems, how="left", on="Asset")
|
|
|
|
# Create a consolidated inspections sheet
|
|
inspections = pd.concat(
|
|
[
|
|
inspections_1[["Asset", "Construction type", 'Heating Type', "WFT Findings", "Eligibility (Red/Yellow/Green)"]],
|
|
inspections_2[["Asset", "Construction type", "WFT Findings", "Eligibility (Red/Yellow/Green)"]],
|
|
inspections_3[["Asset", 'Heating Type', "WFT Findings", "Eligibility (Red/Yellow/Green)"]],
|
|
]
|
|
)
|
|
|
|
inspections_address_data = pd.concat(
|
|
[
|
|
inspections_1[
|
|
["Asset", "FullAddress", "PostCode", "ConYear", "Beds", "AssetTypeDesc", "PropTypeDesc", 'ManAreaDesc', ]
|
|
],
|
|
inspections_2[
|
|
['Asset', 'FullAddress', 'AccomType', "AssetTypeDesc", "PropTypeDesc", 'ConYear', 'Postcode']
|
|
].rename(columns={"Postcode": "PostCode"}),
|
|
inspections_3[
|
|
['Asset', "FullAddress", 'T1_Postcode', 'T1_Build Year', 'T1_AssetType']
|
|
].rename(
|
|
columns={"T1_Postcode": "PostCode", "T1_Build Year": "ConYear", "T1_AssetType": "AssetTypeDesc"}
|
|
),
|
|
]
|
|
)
|
|
|
|
# Remove some error values
|
|
inspections = inspections[~inspections["Asset"].isin(
|
|
[
|
|
"They're all green partial fill they're all green this",
|
|
"South Staffordshire District Council",
|
|
'Blk Milton Crt F9-10, Perton, Wolverhampton'
|
|
]
|
|
)]
|
|
|
|
inspections["Asset"] = inspections["Asset"].astype(str)
|
|
asset_list["Asset"] = asset_list["Asset"].astype(str)
|
|
inspections_address_data["Asset"] = inspections_address_data["Asset"].astype(str)
|
|
inspections['WFT Findings'] = inspections['WFT Findings'].replace(r'^\s*$', pd.NA, regex=True)
|
|
|
|
# We have some cases where the inspetions data has dupes on Asset (the ID column). We take the instance that is
|
|
# populated
|
|
inspections = inspections.sort_values(by='WFT Findings', na_position='last')
|
|
inspections = inspections.drop_duplicates(subset='Asset', keep='first')
|
|
|
|
# We have dupes in the asset list
|
|
asset_list = asset_list.drop_duplicates("Asset")
|
|
|
|
# Merge on
|
|
missed_asset_ids = inspections[
|
|
~inspections["Asset"].isin(asset_list["Asset"].values)
|
|
]["Asset"].values
|
|
|
|
missed_assets = inspections_address_data[
|
|
inspections_address_data["Asset"].isin(missed_asset_ids)
|
|
]
|
|
missed_assets = missed_assets.drop_duplicates("Asset")
|
|
|
|
# We produce a larger asset list
|
|
asset_list = pd.concat([asset_list, missed_assets])
|
|
|
|
asset_list = asset_list.merge(
|
|
inspections, how="left", on="Asset"
|
|
)
|
|
asset_list["WFT Findings"] = asset_list["WFT Findings"].fillna("No Inspections Note")
|
|
|
|
# Store
|
|
# asset_list.to_excel(
|
|
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared "
|
|
# "data/asset_list.xlsx"
|
|
# )
|
|
|
|
# We now prepare outcomes into a single file
|
|
pv_outcomes = pd.read_csv(
|
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Bromford PV "
|
|
"Outcomes.csv",
|
|
encoding='cp1252'
|
|
)
|
|
pv_outcomes["measure_type"] = "solar"
|
|
|
|
other_outcomes = pd.read_excel(
|
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/(Bromford) "
|
|
"15.04.2024.xlsx",
|
|
sheet_name="ECO4 & GBIS",
|
|
header=1
|
|
)
|
|
other_outcomes["measure_type"] = "cwi"
|
|
|
|
combined_outcomes = pd.concat(
|
|
[
|
|
other_outcomes[["NO", "ADDRESS", "POSTCODE", "WEEK COMMENCING", "OUTCOMES", "NOTES"]].rename(
|
|
columns={
|
|
"NO": "No", "ADDRESS": "Address", "POSTCODE": "Postcode", "WEEK COMMENCING": "Week Commencing",
|
|
"OUTCOMES": "Outcome", "NOTES": "Notes"
|
|
}
|
|
),
|
|
pv_outcomes[['No', 'Address', 'Postcode', "Week Commencing", "Outcome", "Notes"]]
|
|
]
|
|
)
|
|
|
|
# Store
|
|
# combined_outcomes.to_excel(
|
|
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared "
|
|
# "data/outcomes.xlsx"
|
|
# )
|
|
|
|
# Submissions sheet -
|
|
eco3_submissions = pd.read_csv(
|
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/ECO 3 Submissions.csv",
|
|
encoding='cp1252'
|
|
)
|
|
# Get rid of the unnamed columns
|
|
unnamed_columns = [c for c in eco3_submissions.columns if "Unnamed: " in c]
|
|
eco3_submissions = eco3_submissions.drop(columns=unnamed_columns)
|
|
# Store
|
|
eco3_submissions.to_csv(
|
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/ECO 3 submissions.csv",
|
|
index=False
|
|
)
|
|
|
|
eco4_submissions = pd.read_csv(
|
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/ECO 4 submissions.csv",
|
|
)
|
|
|
|
same_cols = [c for c in eco4_submissions.columns if c in eco3_submissions.columns]
|