mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
708 lines
25 KiB
Python
708 lines
25 KiB
Python
"""
|
|
This script is used to reconcile the data from the Community Housing project, to understand the differences in
|
|
the various asset lists, and the work that has been conducted
|
|
"""
|
|
import os
|
|
import pandas as pd
|
|
import numpy as np
|
|
from tqdm import tqdm
|
|
from asset_list.AssetList import AssetList
|
|
from backend.SearchEpc import SearchEpc
|
|
|
|
# Data preparation
|
|
outcomes_1 = pd.read_excel(
|
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Community Housing/Programme Reconciliation/Outcomes "
|
|
"Community Housing.xlsx",
|
|
sheet_name="Sheet1",
|
|
)
|
|
outcomes_2 = pd.read_excel(
|
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Community Housing/Programme Reconciliation/Outcomes "
|
|
"Community Housing.xlsx",
|
|
sheet_name="ECO4 + PV",
|
|
)
|
|
outcomes_2["Type of Funding"] = "ECO4 Solar"
|
|
|
|
combined_outcomes = pd.concat([outcomes_1, outcomes_2], ignore_index=True)
|
|
combined_outcomes.columns = [
|
|
'Surveyor', 'Housing Association', 'No.', 'Address', 'Postcode', 'Outcome', 'Type of Funding', "Notes",
|
|
'Previous letter sent Date:', 'Date Letter sent', 'Installer'
|
|
]
|
|
# Store
|
|
combined_outcomes.to_excel(
|
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Community Housing/Programme "
|
|
"Reconciliation/combined_outcomes.xlsx",
|
|
)
|
|
|
|
################################################################################################
|
|
# Config for asset list standardisation
|
|
################################################################################################
|
|
|
|
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Community Housing/Programme Reconciliation"
|
|
data_filename = "Community Housing - Original Asset List Copy for Reconciliation.xlsx"
|
|
sheet_name = "Assets"
|
|
postcode_column = 'Postcode'
|
|
fulladdress_column = "Full Address"
|
|
address1_column = None
|
|
address1_method = "house_number_extraction"
|
|
address_cols_to_concat = []
|
|
missing_postcodes_method = None
|
|
landlord_year_built = "Build_Date"
|
|
landlord_os_uprn = None
|
|
landlord_property_type = "Asset_Type1"
|
|
landlord_built_form = "Asset_Classification"
|
|
landlord_wall_construction = None
|
|
landlord_roof_construction = None
|
|
landlord_heating_system = "Heat Source Static"
|
|
landlord_existing_pv = None
|
|
landlord_property_id = "Asset_Reference"
|
|
landlord_sap = None
|
|
outcomes_filename = [
|
|
os.path.join(data_folder, "combined_outcomes.xlsx"),
|
|
]
|
|
outcomes_sheetname = ["Sheet1"]
|
|
outcomes_postcode = ["Postcode"]
|
|
outcomes_houseno = ["No."]
|
|
outcomes_id = [None]
|
|
outcomes_address = ["Address"]
|
|
master_filepaths = [
|
|
os.path.join(data_folder, "Submissions - for analysis.csv"),
|
|
]
|
|
master_to_asset_list_filepath = None
|
|
phase = False
|
|
ecosurv_landlords = "community community|community housing|mr community|david lindwood"
|
|
|
|
manual_uprn_map = {}
|
|
|
|
asset_list = AssetList(
|
|
local_filepath=os.path.join(data_folder, data_filename),
|
|
header=0,
|
|
sheet_name=sheet_name,
|
|
address1_colname=address1_column,
|
|
postcode_colname=postcode_column,
|
|
landlord_property_id=landlord_property_id,
|
|
full_address_colname=fulladdress_column,
|
|
full_address_cols_to_concat=address_cols_to_concat,
|
|
missing_postcodes_method=missing_postcodes_method,
|
|
address1_extraction_method=address1_method,
|
|
landlord_year_built=landlord_year_built,
|
|
landlord_uprn=landlord_os_uprn,
|
|
landlord_property_type=landlord_property_type,
|
|
landlord_built_form=landlord_built_form,
|
|
landlord_wall_construction=landlord_wall_construction,
|
|
landlord_roof_construction=landlord_roof_construction,
|
|
landlord_heating_system=landlord_heating_system,
|
|
landlord_existing_pv=landlord_existing_pv,
|
|
landlord_sap=landlord_sap,
|
|
phase=phase
|
|
)
|
|
asset_list.init_standardise()
|
|
|
|
asset_list.apply_standardiation()
|
|
|
|
# We now flag properties that have been treated under existing programmes
|
|
asset_list.flag_outcomes(
|
|
outcomes_filepaths=outcomes_filename,
|
|
outcomes_sheetname=outcomes_sheetname,
|
|
outcomes_address=outcomes_address,
|
|
outcomes_postcode=outcomes_postcode,
|
|
outcomes_houseno=outcomes_houseno,
|
|
outcomes_id=outcomes_id
|
|
)
|
|
|
|
if pd.isnull(asset_list.outcomes["domna_property_id"]).sum() == 1:
|
|
# We fix this one manually
|
|
asset_list.outcomes["domna_property_id"] = asset_list.outcomes["domna_property_id"].fillna(
|
|
"29walternashroadeastbirchencoppicekidderminsterdy117ea-caa3a8d92ea9"
|
|
)
|
|
else:
|
|
raise Exception("Something went wrong")
|
|
|
|
asset_list.flag_survey_master(
|
|
master_filepaths=master_filepaths,
|
|
master_to_asset_list_filepath=master_to_asset_list_filepath
|
|
)
|
|
|
|
master_surveyed = asset_list.master_surveyed
|
|
scheme_map = {
|
|
"ECO4 A/W": "ECO4",
|
|
'ECO4 GBIS': "GBIS",
|
|
'ECO4 - REMEDIAL CWI ONLY': "ECO4 Remedial",
|
|
"ECO4 GBIS REMEDIAL": "GBIS Remedial",
|
|
'ECO4 - Remedial CWI Only': "ECO4 Remedial",
|
|
'ECO4 GBIS Remedial': "GBIS Remedial"
|
|
}
|
|
master_surveyed["funding_scheme"] = master_surveyed["funding_scheme"].map(scheme_map)
|
|
master_surveyed["survey_reference"] = master_surveyed["funding_scheme"] + ": " + master_surveyed["measure_mix"]
|
|
master_surveyed = master_surveyed.merge(
|
|
asset_list.standardised_asset_list[["domna_property_id", "landlord_property_id"]],
|
|
how="left",
|
|
on="landlord_property_id",
|
|
)
|
|
if pd.isnull(master_surveyed["domna_property_id"]).sum():
|
|
raise ValueError("Some of the master surveyed properties do not have a domna_property_id")
|
|
# Flag anything in outcomes that has been listed as surveyed, that is NOT in the master_surveyed sheet
|
|
surveyed_outcomes = asset_list.outcomes[
|
|
asset_list.outcomes["Outcome"].isin(["surveyed", "surveyed"])
|
|
]
|
|
outcomes_not_in_master = surveyed_outcomes[
|
|
~surveyed_outcomes["domna_property_id"].isin(master_surveyed["domna_property_id"])
|
|
]
|
|
outcomes_not_in_master["Type of Funding"] = outcomes_not_in_master["Type of Funding"].fillna("Work Type Not Filled In")
|
|
|
|
asset_list.flag_ecosurv(
|
|
ecosurv_landlords=ecosurv_landlords,
|
|
landlords_to_ignore=[
|
|
"Watford Community housing", "Eastlight Community housing", "Mr Tower Hamlets Community Housing"
|
|
]
|
|
)
|
|
|
|
# These are properties NOT on the Community Housing asset list that were sold under the wrong HA
|
|
# asset_list.ecosurv_no_match.to_csv(
|
|
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Community Housing/Programme "
|
|
# "Reconciliation/Ecosurv - properties sold to Community Housing, not belonging to them.csv",
|
|
# index=False
|
|
# )
|
|
|
|
# We read in the works, split by sold to SGEC and on-hold
|
|
billed_to_installer = pd.read_csv(
|
|
os.path.join(
|
|
data_folder, "Community Housing Deck of works", "SGEC BILLED -Table 1.csv"
|
|
),
|
|
)
|
|
billed_to_installer["billed"] = True
|
|
|
|
not_billed_to_installer = pd.read_csv(
|
|
os.path.join(
|
|
data_folder, "Community Housing Deck of works", "ON HOLD -Table 1.csv"
|
|
),
|
|
)
|
|
not_billed_to_installer["billed"] = False
|
|
|
|
sgec_billings = pd.concat(
|
|
[billed_to_installer, not_billed_to_installer],
|
|
)
|
|
sgec_billings = sgec_billings.reset_index(drop=True)
|
|
sgec_billings["row_id"] = sgec_billings.index
|
|
|
|
# We match these two lists back to the domna_property_id. They SHOULD match to submissions
|
|
scheme_col = (
|
|
"AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" if
|
|
"AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" in billed_to_installer.columns else "AFFORDABLE WARMTH"
|
|
)
|
|
postcode_col = "POSTCODE" if "POSTCODE" in billed_to_installer.columns else "Post Code"
|
|
house_no_col = 'NO.' if 'NO.' in billed_to_installer.columns else "NO"
|
|
property_type_col = (
|
|
"PROPERTY TYPE As per table emailed" if
|
|
"PROPERTY TYPE As per table emailed" in
|
|
billed_to_installer.columns else "PROPERTY TYPE As per table emailed"
|
|
)
|
|
measure_mix_col = "MEASURE COMBO"
|
|
manual_corrections = {
|
|
"30+DY12 1EB": "73440300",
|
|
"32+DY12 1EB": "73440320",
|
|
"1+DY11 7ES": "20150010",
|
|
"12+DY11 7EP": "9460120",
|
|
"72+DY11 7PA": "88520720",
|
|
"39+DY13 0DR": "44250390",
|
|
"43+DY11 7EF": "2460430",
|
|
"45+DY11 7EG": "2460450",
|
|
"47+DY11 7EG": "2460470",
|
|
"49+DY11 7EG": "2460490",
|
|
"11+DY13 0HB": "87320110",
|
|
"4+DY130HA": "87320040"
|
|
}
|
|
billed_lookup = []
|
|
for _, row in tqdm(sgec_billings.iterrows(), total=len(sgec_billings)):
|
|
postcode = row["Post Code"]
|
|
houseno = row["NO."]
|
|
|
|
# We need to correct some records
|
|
if manual_corrections.get("+".join([houseno, postcode])):
|
|
landlord_pid = manual_corrections["+".join([houseno, postcode])]
|
|
df = asset_list.standardised_asset_list[
|
|
(asset_list.standardised_asset_list["landlord_property_id"] == landlord_pid)
|
|
]
|
|
if df.shape[0] != 1:
|
|
raise ValueError(f"More than one match found for {landlord_pid} in the standardised asset list")
|
|
billed_lookup.append(
|
|
{
|
|
"domna_property_id": df["domna_property_id"].values[0],
|
|
"row_id": row["row_id"],
|
|
}
|
|
)
|
|
continue
|
|
|
|
df = master_surveyed[
|
|
(master_surveyed["original_house_no"] == houseno) &
|
|
(master_surveyed["original_postcode"] == postcode)
|
|
]
|
|
if df.shape[0] != 1:
|
|
# Try a search on the asset list
|
|
postcode_no_space = row[postcode_col].strip().replace(" ", "").lower()
|
|
|
|
df = asset_list.standardised_asset_list[
|
|
(
|
|
asset_list.standardised_asset_list[asset_list.STANDARD_POSTCODE]
|
|
.str.strip().str.lower().str.replace(" ", "") == postcode_no_space
|
|
)
|
|
].copy()
|
|
|
|
house_no = row[house_no_col]
|
|
if isinstance(house_no, float):
|
|
house_no = str(int(house_no)).lower()
|
|
else:
|
|
house_no = str(house_no).lower()
|
|
|
|
df["house_no"] = df.apply(
|
|
lambda x: SearchEpc.get_house_number(
|
|
str(x[asset_list.STANDARD_ADDRESS_1]), str(x[asset_list.STANDARD_POSTCODE])
|
|
),
|
|
axis=1
|
|
)
|
|
df = df[df["house_no"].str.lower() == house_no].copy()
|
|
|
|
if df.shape[0] == 1:
|
|
billed_lookup.append(
|
|
{
|
|
"domna_property_id": df["domna_property_id"].values[0],
|
|
"row_id": row["row_id"],
|
|
}
|
|
)
|
|
continue
|
|
|
|
raise ValueError(f"More than one match found for {'+'.join([houseno, postcode])} in the master surveyed list")
|
|
|
|
billed_lookup.append(
|
|
{
|
|
"domna_property_id": df["domna_property_id"].values[0],
|
|
"row_id": row["row_id"],
|
|
}
|
|
)
|
|
|
|
billed_lookup = pd.DataFrame(billed_lookup)
|
|
|
|
sgec_billings = sgec_billings.merge(
|
|
billed_lookup,
|
|
how="left",
|
|
on="row_id"
|
|
)
|
|
|
|
# We get the asset list that Community Housing thinks they sent Warmfront
|
|
|
|
master_data_sheet = pd.read_excel(
|
|
os.path.join(
|
|
data_folder, "Warmfront.xlsx"
|
|
),
|
|
sheet_name="Asset Stock List (3)",
|
|
)
|
|
master_data_sheet["Asset_Reference"] = master_data_sheet["Asset_Reference"].astype(str)
|
|
|
|
# 1) We check that all of the properties in the asset list we have on file are in the asset list that Community Housing
|
|
# believe they sent Warmfront
|
|
if not asset_list.standardised_asset_list[
|
|
~asset_list.standardised_asset_list["landlord_property_id"].isin(
|
|
master_data_sheet["Asset_Reference"].astype(str).values
|
|
)
|
|
].empty:
|
|
raise ValueError("Some of the properties in the asset list are not in the Warmfront asset list")
|
|
|
|
# This column documents whether or not the property is in the asset list that the WFT were sent
|
|
# There are 189 properties that were never sent to WFT, but all properties are accounted for in the asset list
|
|
master_data_sheet["Is Property in WFT Asset List"] = master_data_sheet["Asset_Reference"].astype(str).isin(
|
|
asset_list.standardised_asset_list["landlord_property_id"].astype(str).values
|
|
)
|
|
|
|
# We now merge on the Warmfront findings
|
|
master_data_sheet = master_data_sheet.merge(
|
|
asset_list.standardised_asset_list[["landlord_property_id", "non-intrusives: ECO Eligibility"]],
|
|
how="left",
|
|
left_on="Asset_Reference",
|
|
right_on="landlord_property_id"
|
|
)
|
|
master_data_sheet["non-intrusives: ECO Eligibility"] = master_data_sheet["non-intrusives: ECO Eligibility"].fillna(
|
|
"Not in original asset list"
|
|
)
|
|
|
|
# SGEC did a number of CIGA checks. We match these onto the master data sheet
|
|
|
|
# TODO: Need to split the programme into historical 2023 and 2024 (there was a cutoff data in late 2024 which seemed
|
|
# to be the start of the new programme
|
|
# Seems like there were 2 main checks - it also seems like this was a 2 phase programme, where these CIGA checks
|
|
# correspond to phase 2
|
|
ciga_checks_1 = pd.read_excel(
|
|
os.path.join(
|
|
data_folder, "CIGA Checks", "2 CIGA Check WFT 14102024 x1073.xlsx"
|
|
),
|
|
sheet_name="Worksheet"
|
|
)
|
|
ciga_checks_1 = ciga_checks_1[~pd.isnull(ciga_checks_1["Postcode"])]
|
|
ciga_checks_1["request"] = "1073 properties"
|
|
ciga_checks_2 = pd.read_excel(
|
|
os.path.join(
|
|
data_folder, "CIGA Checks", "2 CIGA Check 01112024 x125.xlsx"
|
|
),
|
|
sheet_name="Worksheet"
|
|
)
|
|
ciga_checks_2 = ciga_checks_2[~pd.isnull(ciga_checks_2["Postcode"])]
|
|
ciga_checks_2["request"] = "125 flats"
|
|
|
|
cigas = pd.concat([ciga_checks_1, ciga_checks_2], ignore_index=True)
|
|
cigas["row_id"] = cigas.index
|
|
|
|
# We add some temp columns to allow for easier matching
|
|
asset_list.standardised_asset_list["house_no"] = asset_list.standardised_asset_list.apply(
|
|
lambda x: SearchEpc.get_house_number(
|
|
str(x["domna_full_address"]), str(x["domna_postcode"])
|
|
),
|
|
axis=1
|
|
)
|
|
|
|
manual_fixes = {
|
|
"2 Austcliffe Road Cookley, Kidderminster": "2250020",
|
|
'5 Brett Young Close, Kidderminster': "9800050"
|
|
}
|
|
incorrect_ciga_return = [
|
|
"19 Wood Street, Kidderminster",
|
|
"nan Charles Street",
|
|
"53 Harold Evers Way, Kidderminster",
|
|
'63 Harold Evers way'
|
|
]
|
|
ciga_lookup = []
|
|
for _, row in tqdm(cigas.iterrows(), total=len(cigas)):
|
|
|
|
if manual_fixes.get(row["Matched Address"]):
|
|
ll_pid = manual_fixes[row["Matched Address"]]
|
|
df = asset_list.standardised_asset_list[
|
|
(asset_list.standardised_asset_list["landlord_property_id"] == ll_pid)
|
|
]
|
|
ciga_lookup.append(
|
|
{
|
|
"domna_property_id": df["domna_property_id"].values[0],
|
|
"row_id": row["row_id"],
|
|
}
|
|
)
|
|
continue
|
|
|
|
if (row["Matched Address"] in incorrect_ciga_return) or (
|
|
" ".join([str(row["Address1"]), row["Address2"]]) in incorrect_ciga_return
|
|
):
|
|
continue
|
|
|
|
df = asset_list.standardised_asset_list[
|
|
(asset_list.standardised_asset_list["domna_postcode"] == row["Postcode"])
|
|
]
|
|
|
|
df = df[(df["house_no"].astype(str) == str(row["Address1"]))]
|
|
|
|
if df.empty:
|
|
df = asset_list.standardised_asset_list[
|
|
(asset_list.standardised_asset_list["domna_postcode"] == row["Matched Postcode"])
|
|
]
|
|
df = df[(df["house_no"].astype(str) == str(row["Address1"]))]
|
|
|
|
if df.shape[0] > 1:
|
|
df = asset_list.standardised_asset_list[
|
|
(asset_list.standardised_asset_list["domna_full_address"].str.lower().str.replace(",", "").str.contains(
|
|
row["Matched Address"].lower().replace(",", ""), na=False))
|
|
]
|
|
if df.empty:
|
|
df = asset_list.standardised_asset_list[
|
|
(asset_list.standardised_asset_list["domna_full_address"].str.lower().str.replace(",", "").str.contains(
|
|
row["Address2"].lower().replace(",", ""), na=False))
|
|
]
|
|
|
|
df = df[(df["house_no"].astype(str) == str(row["Address1"]))]
|
|
|
|
if df.shape[0] != 1:
|
|
raise Exception("More than one match found for {row['Address1']} in the asset list")
|
|
|
|
ciga_lookup.append(
|
|
{
|
|
"domna_property_id": df["domna_property_id"].values[0],
|
|
"row_id": row["row_id"],
|
|
}
|
|
)
|
|
|
|
ciga_lookup = pd.DataFrame(ciga_lookup)
|
|
|
|
cigas = cigas.merge(
|
|
ciga_lookup,
|
|
how="left",
|
|
on="row_id"
|
|
)
|
|
cigas = cigas[~pd.isnull(cigas["domna_property_id"])]
|
|
|
|
cigas = cigas.merge(
|
|
asset_list.standardised_asset_list[["domna_property_id", "landlord_property_id"]],
|
|
how="left",
|
|
on="domna_property_id"
|
|
)
|
|
|
|
# Note 4 entries in the CIGA checks did NOT match to the asset list (were for properties not owned by Community Housing)
|
|
master_data_sheet = master_data_sheet.merge(
|
|
cigas[["landlord_property_id", "Guarantee", "request"]].rename(
|
|
columns={"request": "CIGA request batch"}
|
|
),
|
|
how="left",
|
|
on="landlord_property_id"
|
|
)
|
|
|
|
# Fill missing survey_reference with funding_scheme
|
|
master_surveyed["survey_reference"] = master_surveyed["survey_reference"].fillna(
|
|
master_surveyed["funding_scheme"]
|
|
)
|
|
|
|
master_surveyed_to_merge = master_surveyed[
|
|
~master_surveyed["domna_property_id"].isin(sgec_billings["domna_property_id"].values)
|
|
]
|
|
master_surveyed_to_merge["Survey Status"] = "Surveyed, Submitted, not on SGEC Deck of Works"
|
|
|
|
# We now merge on what we've surveyed and submitted
|
|
master_data_sheet = master_data_sheet.merge(
|
|
master_surveyed_to_merge[
|
|
["landlord_property_id", "survey_reference", "submission_date", "cancelled", "Survey Status"]
|
|
].rename(
|
|
columns={
|
|
"survey_reference": "Survey Type", "submission_date": "Survey Date",
|
|
"cancelled": "Was the Install Cancelled?"
|
|
}
|
|
),
|
|
how="left",
|
|
on="landlord_property_id"
|
|
)
|
|
|
|
# We now deduce the status of the work based on sgec_billings
|
|
sgec_billings = sgec_billings.merge(
|
|
asset_list.standardised_asset_list[["landlord_property_id", "domna_property_id"]],
|
|
how="left",
|
|
on="domna_property_id"
|
|
)
|
|
|
|
dupe_ids = sgec_billings[sgec_billings["domna_property_id"].duplicated()]["domna_property_id"]
|
|
# We sort by domna_property_id and billed (where true should be first) and take the first instance
|
|
sgec_billings = sgec_billings.sort_values(
|
|
["domna_property_id", "billed"], ascending=[True, False]
|
|
)
|
|
sgec_billings = sgec_billings.drop_duplicates(
|
|
subset=["domna_property_id"],
|
|
keep="first"
|
|
)
|
|
|
|
sgec_billings["Survey Type"] = (
|
|
sgec_billings["SUBMISSION TYPE - ECO4,GBIS,SHDF,EPC or OTHER"].map(scheme_map) + ": " +
|
|
sgec_billings["MEASURE COMBO"]
|
|
)
|
|
sgec_billings["Survey Type"] = sgec_billings["Survey Type"].fillna(
|
|
sgec_billings["SUBMISSION TYPE - ECO4,GBIS,SHDF,EPC or OTHER"].map(scheme_map)
|
|
)
|
|
sgec_billings["Survey Date"] = sgec_billings['SUBMISSION DATE']
|
|
sgec_billings["Was the Install Cancelled?"] = (
|
|
sgec_billings["INSTALLED"].astype(str).str.lower().str.contains("cancel")
|
|
)
|
|
|
|
sgec_billings['Survey Status'] = np.where(
|
|
sgec_billings["billed"] == True,
|
|
"Surveyed, Submitted, on SGEC Deck of Works",
|
|
"Surveyed, not submitted to SGEC, on SGEC Deck of Works"
|
|
)
|
|
|
|
master_data_sheet = master_data_sheet.merge(
|
|
sgec_billings[
|
|
["landlord_property_id", "Survey Type", "Survey Date", "Was the Install Cancelled?", "Survey Status"]],
|
|
how="left",
|
|
on="landlord_property_id",
|
|
suffixes=("", "_y")
|
|
)
|
|
|
|
for col in ["Survey Type", "Survey Date", "Was the Install Cancelled?", "Survey Status"]:
|
|
master_data_sheet[col] = np.where(
|
|
pd.isnull(master_data_sheet[col]) & ~pd.isnull(master_data_sheet[col + "_y"]),
|
|
master_data_sheet[col + "_y"],
|
|
master_data_sheet[col]
|
|
)
|
|
master_data_sheet = master_data_sheet.drop(columns=[col + "_y"])
|
|
|
|
outcomes_not_in_master = outcomes_not_in_master.merge(
|
|
asset_list.standardised_asset_list[["landlord_property_id", "domna_property_id"]],
|
|
how="left",
|
|
left_on="domna_property_id",
|
|
right_on="domna_property_id"
|
|
)
|
|
# We also filter out any that were in the SGEC billings
|
|
outcomes_not_in_master = outcomes_not_in_master[
|
|
~outcomes_not_in_master["domna_property_id"].isin(sgec_billings["domna_property_id"].values)
|
|
]
|
|
|
|
# We now merge on outcomes. There are a small number of surveyed outcomes that were not submitted
|
|
master_data_sheet = master_data_sheet.merge(
|
|
outcomes_not_in_master[["landlord_property_id", 'Type of Funding', "Date Letter sent"]],
|
|
how="left",
|
|
on="landlord_property_id",
|
|
)
|
|
master_data_sheet["Survey Status"] = np.where(
|
|
pd.isnull(master_data_sheet["Survey Type"]) & ~pd.isnull(master_data_sheet["Type of Funding"]),
|
|
"Surveyed, On Outcomes, not submitted",
|
|
master_data_sheet["Survey Status"]
|
|
)
|
|
|
|
master_data_sheet["Survey Type"] = np.where(
|
|
pd.isnull(master_data_sheet["Survey Type"]) & ~pd.isnull(master_data_sheet["Type of Funding"]),
|
|
master_data_sheet["Type of Funding"],
|
|
master_data_sheet["Survey Type"]
|
|
)
|
|
master_data_sheet["Survey Date"] = np.where(
|
|
pd.isnull(master_data_sheet["Survey Date"]) & ~pd.isnull(master_data_sheet["Date Letter sent"]),
|
|
master_data_sheet["Date Letter sent"],
|
|
master_data_sheet["Survey Date"]
|
|
)
|
|
master_data_sheet = master_data_sheet.drop(columns=["Type of Funding", "Date Letter sent"])
|
|
|
|
# We now need to compare the submissions that SGEC have sent us, because the deck of works is likely incorrect given
|
|
# given the number of properties that have been received by SGEC
|
|
|
|
# We have submissions from the following dates:
|
|
# - 18/11/2024
|
|
# - 10/03/2024
|
|
# - A sheet that claims to be 25/11/2024 but has 18/11/2024 as the submission date
|
|
# - 16/12/2025
|
|
# - 02/12/2024
|
|
# - 10/02/2025
|
|
sgec_received_submissions = []
|
|
for filename in [
|
|
"4x108 18.11.24 - RT MASTERS SGEC INVOICE.xlsx",
|
|
"4x144 COMMUNITY HOUSING TOTAL PROJECT INV 10032025.xlsx",
|
|
"4x19 25.11.2024 - RT Master SGEC.xlsx",
|
|
"4x37 16.12.2024 - SGEC INVOICED.xlsx",
|
|
"4x60 02.12.2024 - RT SGEC INV.xlsx",
|
|
"4x78 10.02.2025 MASTERS - SGEC INVOICED-CORRECT.xlsx"
|
|
]:
|
|
data = pd.read_excel(
|
|
os.path.join(
|
|
data_folder, "SGEC Received Submissions", filename
|
|
),
|
|
)
|
|
data["filename"] = filename
|
|
sgec_received_submissions.append(data)
|
|
|
|
sgec_received_submissions = pd.concat(sgec_received_submissions)
|
|
sgec_received_submissions = sgec_received_submissions.reset_index(drop=True)
|
|
sgec_received_submissions["row_id"] = sgec_received_submissions.index
|
|
|
|
manual_fix = {
|
|
"5a+DY10 3JR": "6856005A",
|
|
'12+DY10 3JR': "78900120",
|
|
"9+DY10 3JR": "86280090",
|
|
'10+DY10 3JL': "86280100",
|
|
"66+DY10 3JS": "68560660",
|
|
"70+DY10 3JS": "68560700",
|
|
"72+DY10 3JS": "68560720",
|
|
"12+DY10 3JP": "86280120",
|
|
"2A+DY11 5TZ": "6872002A",
|
|
"3A+DY11 5TZ": "6872003A",
|
|
"4A+DY11 5TZ": "6872004A"
|
|
}
|
|
sgec_received_submissions_lookup = []
|
|
for _, row in tqdm(sgec_received_submissions.iterrows(), total=len(sgec_received_submissions)):
|
|
|
|
_key = "+".join([str(row["NO."]), str(row["Post Code"])])
|
|
|
|
if manual_fix.get(_key) is not None:
|
|
ll_pid = manual_fix[_key]
|
|
sgec_received_submissions_lookup.append(
|
|
{
|
|
"row_id": row["row_id"],
|
|
"landlord_property_id": ll_pid,
|
|
}
|
|
)
|
|
continue
|
|
|
|
match = sgec_billings[
|
|
(sgec_billings['NO.'].astype(str) == str(row['NO.'])) &
|
|
(sgec_billings['Post Code'] == row['Post Code'])
|
|
]
|
|
|
|
if match.shape[0] > 1:
|
|
raise Exception(f"something went wrong {_key} {row['Street / Block Name']}")
|
|
|
|
if match.shape[0] == 1:
|
|
sgec_received_submissions_lookup.append(
|
|
{
|
|
"row_id": row["row_id"],
|
|
"landlord_property_id": match["landlord_property_id"].values[0],
|
|
}
|
|
)
|
|
continue
|
|
|
|
match = master_surveyed[
|
|
(master_surveyed['original_house_no'].astype(str) == str(row['NO.'])) &
|
|
(master_surveyed['original_postcode'] == row['Post Code'])
|
|
]
|
|
|
|
if match.shape[0] > 1:
|
|
raise Exception(f"something went wrong 2 {_key} {row['Street / Block Name']}")
|
|
|
|
if match.shape[0] == 0:
|
|
raise Exception(f"No match {_key} {row['Street / Block Name']}")
|
|
|
|
sgec_received_submissions_lookup.append(
|
|
{
|
|
"row_id": row["row_id"],
|
|
"landlord_property_id": match["landlord_property_id"].values[0],
|
|
}
|
|
)
|
|
|
|
sgec_received_submissions_lookup = pd.DataFrame(sgec_received_submissions_lookup)
|
|
sgec_received_submissions = sgec_received_submissions.merge(
|
|
sgec_received_submissions_lookup[["row_id", "landlord_property_id"]],
|
|
how="left",
|
|
on="row_id"
|
|
)
|
|
|
|
sgec_received_submissions["Survey Type"] = (
|
|
sgec_received_submissions["SUBMISSION TYPE - ECO4,GBIS,SHDF,EPC or OTHER"].map(scheme_map) + ": " +
|
|
sgec_received_submissions["MEASURE COMBO"]
|
|
)
|
|
|
|
sgec_received_submissions["Survey Type"] = sgec_received_submissions["Survey Type"].fillna(
|
|
sgec_received_submissions["SUBMISSION TYPE - ECO4,GBIS,SHDF,EPC or OTHER"].map(scheme_map)
|
|
)
|
|
sgec_received_submissions["Survey Date"] = sgec_received_submissions['SUBMISSION DATE']
|
|
sgec_received_submissions["Was the Install Cancelled?"] = (
|
|
sgec_received_submissions["INSTALLED"].astype(str).str.lower().str.contains("cancel")
|
|
)
|
|
sgec_received_submissions['Survey Status'] = "Submission sent to SGEC, Confirmed by SGEC"
|
|
sgec_received_submissions["Survey Received by SGEC"] = True
|
|
|
|
# We now merge on the submissions that SGEC have sent us
|
|
master_data_sheet = master_data_sheet.merge(
|
|
sgec_received_submissions[
|
|
[
|
|
"landlord_property_id", "Survey Type", "Survey Date", "Was the Install Cancelled?", "Survey Status",
|
|
"Survey Received by SGEC"
|
|
]
|
|
],
|
|
how="left",
|
|
on="landlord_property_id",
|
|
suffixes=("", "_y")
|
|
)
|
|
|
|
# Fill in the gaps
|
|
for col in ["Survey Type", "Survey Date", "Was the Install Cancelled?", "Survey Status"]:
|
|
master_data_sheet[col] = np.where(
|
|
pd.isnull(master_data_sheet[col]) & ~pd.isnull(master_data_sheet[col + "_y"]),
|
|
master_data_sheet[col + "_y"],
|
|
master_data_sheet[col]
|
|
)
|
|
master_data_sheet = master_data_sheet.drop(columns=[col + "_y"])
|
|
|
|
if master_data_sheet["Asset_Reference"].duplicated().sum():
|
|
raise ValueError("There are duplicates in the asset reference column")
|
|
|
|
# Drop this at the end
|
|
master_data_sheet = master_data_sheet.drop(columns=["landlord_property_id"])
|
|
|
|
master_data_sheet.to_excel(
|
|
os.path.join(
|
|
data_folder, "Draft Results.xlsx"
|
|
),
|
|
)
|