Merge pull request #425 from Hestia-Homes/remote-assessment-api

Handling error case for fetching addresses
This commit is contained in:
KhalimCK 2025-05-13 11:54:51 +01:00 committed by GitHub
commit 174d9ea6a9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 1958 additions and 614 deletions

View file

@ -5,6 +5,7 @@ import tiktoken
from pprint import pprint
from datetime import datetime
from numpy.ma.core import masked_not_equal
from openai import OpenAI
import numpy as np
import pandas as pd
@ -19,6 +20,7 @@ import asset_list.mappings.heating_systems as heating_mappings
import asset_list.mappings.exising_pv as existing_pv_mappings
import asset_list.mappings.built_form as built_form_mappings
import asset_list.mappings.roof as roof_mappings
import asset_list.mappings.outcomes as outcomes_mappings
from recommendations.recommendation_utils import (
estimate_perimeter,
@ -691,6 +693,9 @@ class AssetList:
c for c in self.OLD_FORMAT_NON_INTRUSIVE_COLNAMES if c in self.standardised_asset_list.columns
]
if "Warmfront Finding" in self.standardised_asset_list.columns:
non_intrusive_columns.append("Warmfront Finding")
self.keep_variables += non_intrusive_columns
self.rename_map = {
@ -734,6 +739,11 @@ class AssetList:
self.standardised_asset_list[self.landlord_year_built].replace(self.DATETIME_REMAP)
)
no_data_codes = {"No Data": None}
self.standardised_asset_list[self.landlord_year_built] = (
self.standardised_asset_list[self.landlord_year_built].replace(no_data_codes)
)
self.standardised_asset_list[self.landlord_year_built] = pd.to_datetime(
self.standardised_asset_list[self.landlord_year_built]
)
@ -754,7 +764,8 @@ class AssetList:
"This cell has an external reference that can't be shown or edited. Editing this cell will "
"remove the external reference.",
"ND",
'PIMSS EMPTY'
'PIMSS EMPTY',
"UNKNOWN"
]
if pd.isnull(date_str) or date_str in known_errors or (date_str == 0):
@ -929,7 +940,10 @@ class AssetList:
raise ValueError(f"Dataframe must contain the column {self.DOMNA_PROPERTY_ID}")
if df[self.DOMNA_PROPERTY_ID].duplicated().sum():
raise ValueError(f"{self.DOMNA_PROPERTY_ID} contains duplicated IDs")
df = df.drop_duplicates(
subset=[self.DOMNA_PROPERTY_ID],
keep="first"
)
self.standardised_asset_list = self.standardised_asset_list.merge(
df, how="left", on=self.DOMNA_PROPERTY_ID
@ -1139,21 +1153,29 @@ class AssetList:
# We add a SAP category for all work type identification
self.standardised_asset_list["SAP Category"] = np.where(
(
(self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= 68) |
(self.standardised_asset_list[self.STANDARD_SAP] <= 68)
(self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= 54) |
(self.standardised_asset_list[self.STANDARD_SAP] <= 54)
),
"SAP Rating 68 or less",
"SAP Rating 54 or less",
np.where(
(
(
self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <=
self.EMPTY_CAVITY_SAP_THRESHOLD
) | (self.standardised_asset_list[self.STANDARD_SAP] <= self.EMPTY_CAVITY_SAP_THRESHOLD)
(self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= 68) |
(self.standardised_asset_list[self.STANDARD_SAP] <= 68)
),
"SAP Rating 55-68",
np.where(
(
(
self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <=
self.EMPTY_CAVITY_SAP_THRESHOLD
) | (self.standardised_asset_list[self.STANDARD_SAP] <= self.EMPTY_CAVITY_SAP_THRESHOLD)
),
f"SAP Rating 69-{self.EMPTY_CAVITY_SAP_THRESHOLD}",
f"SAP Rating {self.EMPTY_CAVITY_SAP_THRESHOLD + 1} or more"
),
f"SAP Rating 69-{self.EMPTY_CAVITY_SAP_THRESHOLD}",
f"SAP Rating {self.EMPTY_CAVITY_SAP_THRESHOLD + 1} or more"
)
)
else:
# We add a SAP category for all work type identification
# We break into 4 categories (54 or less, 55-68, 69-74, 75 or more)
@ -1213,11 +1235,11 @@ class AssetList:
elif self.old_format_non_intrusives_present:
non_intrusives_wall_filter = (
self.standardised_asset_list['non-intrusives: WFT Findings'].str.lower().str.strip().isin(
["empty cavity", "partial fill"]
["empty cavity", "partial fill", "empty", "EMPTY CAVITY 70MM", "partial"]
) | (
(
self.standardised_asset_list['non-intrusives: WFT Findings']
.str.lower().str.strip().str.contains("empty cavity|partial fill") &
.str.lower().str.strip().str.contains("empty cavity|partial fill|empty|partial") &
~self.standardised_asset_list['non-intrusives: WFT Findings']
.astype(str).str.lower().str.strip().str.contains("major access issues")
)
@ -1250,7 +1272,7 @@ class AssetList:
)
self.standardised_asset_list["non_intrusive_indicates_empty_cavity_has_solar"] = (
pd.isnull(self.standardised_asset_list["non_intrusive_indicates_empty_cavity"]) &
~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] &
(~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) &
non_intrusives_wall_filter &
year_built_filter &
@ -1262,23 +1284,35 @@ class AssetList:
# We also add a filter on anything that was generally identified by the non-intrusives
self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_year_filter"] = (
pd.isnull(self.standardised_asset_list["non_intrusive_indicates_empty_cavity"]) &
pd.isnull(self.standardised_asset_list["non_intrusive_indicates_empty_cavity_has_solar"]) &
~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] &
~self.standardised_asset_list["non_intrusive_indicates_empty_cavity_has_solar"] &
(~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) &
non_intrusives_wall_filter
)
self.standardised_asset_list["epc_indicates_empty_cavity"] = (
self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]].str.lower().isin(
self.EPC_NO_WALL_INSULATION_DESCRIPTIONS
) & (
self.standardised_asset_list["epc_year_upper_bound"] <= self.EMPTY_CAVITY_YEAR_THRESHOLD
) & (
~self.standardised_asset_list[self.ATTRIBUTE_EPC_PRE_YEAR_THRESHOLD]
) & (
~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])
if (not self.non_intrusives_eligibility) and (not self.old_format_non_intrusives_present):
# If we have NO inspections data, we capture all of the wall types and don't filter on age of the EPC
self.standardised_asset_list["epc_indicates_empty_cavity"] = (
self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]].str.lower().isin(
self.EPC_NO_WALL_INSULATION_DESCRIPTIONS
) & (
self.standardised_asset_list["epc_year_upper_bound"] <= self.EMPTY_CAVITY_YEAR_THRESHOLD
) & (
~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])
)
)
else:
self.standardised_asset_list["epc_indicates_empty_cavity"] = (
self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]].str.lower().isin(
self.EPC_NO_WALL_INSULATION_DESCRIPTIONS
) & (
self.standardised_asset_list["epc_year_upper_bound"] <= self.EMPTY_CAVITY_YEAR_THRESHOLD
) & (
~self.standardised_asset_list[self.ATTRIBUTE_EPC_PRE_YEAR_THRESHOLD]
) & (
~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])
)
)
)
self.standardised_asset_list["landlord_data_indicates_empty_cavity"] = (
self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin(["uninsulated cavity"]) &
@ -1326,6 +1360,9 @@ class AssetList:
self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] = (
extraction_wall_filter & year_built_filter
)
self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_year_filter"] = (
extraction_wall_filter & ~year_built_filter
)
elif self.old_format_non_intrusives_present:
print("Review these categories!!!!")
@ -1339,10 +1376,11 @@ class AssetList:
self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] = (
extraction_wall_filter
)
self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_year_filter"] = False
else:
self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] = False
self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_sap_filter"] = False
self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_year_filter"] = False
######################################################
# Solar
@ -1470,7 +1508,7 @@ class AssetList:
)
# We merge on the u-value for average thermal transmittance
roof_roof_data = pd.DataFrame(cleaned["roof-description"])[
roof_data = pd.DataFrame(cleaned["roof-description"])[
["original_description", "thermal_transmittance", "is_pitched", "is_loft"]
].rename(
columns={
@ -1480,7 +1518,7 @@ class AssetList:
)
self.standardised_asset_list = self.standardised_asset_list.merge(
roof_roof_data, how="left", on=self.EPC_API_DATA_NAMES["roof-description"]
roof_data, how="left", on=self.EPC_API_DATA_NAMES["roof-description"]
)
# If the u-value of a roof is less than 0.7 we consider it insulated
@ -1724,8 +1762,8 @@ class AssetList:
~self.standardised_asset_list["epc_indicates_empty_cavity"] &
pd.isnull(self.standardised_asset_list["cavity_reason"])
),
"Landlord Data Shows Empty Cavity, EPC & Inspections Shows Filled: " + self.standardised_asset_list[
"SAP Category"],
"Landlord Data Shows Empty Cavity, EPC & Inspections Shows Filled or Non-cavity: " +
self.standardised_asset_list["SAP Category"],
self.standardised_asset_list["cavity_reason"]
)
@ -1739,6 +1777,16 @@ class AssetList:
self.standardised_asset_list["cavity_reason"]
)
self.standardised_asset_list["cavity_reason"] = np.where(
(
self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_year_filter"] &
pd.isnull(self.standardised_asset_list["cavity_reason"])
),
f"Non-Intrusive Data Shows Cavity Extraction, built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: " +
self.standardised_asset_list["SAP Category"],
self.standardised_asset_list["cavity_reason"]
)
######################################################
# Flag solar
######################################################
@ -1761,6 +1809,16 @@ class AssetList:
self.standardised_asset_list["solar_reason"]
)
# Finally, anything flagged for solar should not be flagged for cavity - make them None
self.standardised_asset_list["cavity_reason"] = np.where(
(
~pd.isnull(self.standardised_asset_list["solar_reason"]) &
~pd.isnull(self.standardised_asset_list["cavity_reason"])
),
None,
self.standardised_asset_list["cavity_reason"]
)
# Flag anything that has existing outcomes
if (self.outcomes is not None) and ("surveyed" in self.standardised_asset_list.columns):
@ -2160,7 +2218,7 @@ class AssetList:
self.hubspot_data = programme_data
def flag_ecosurv(self, ecosurv_landlords=None):
def flag_ecosurv(self, ecosurv_landlords=None, landlords_to_ignore=None):
"""
This class will match ecosurv data to the asset list
@ -2170,12 +2228,9 @@ class AssetList:
return
# TODO: Fetch from Sharepoint
ecosurv_filepath = "/Users/khalimconn-kowlessar/Documents/hestia/Ecosurv/15.04.csv"
ecosurv_filepath = "/Users/khalimconn-kowlessar/Documents/hestia/Ecosurv/07.05.2025.csv"
logger.info("Getting Ecosurv data from %s", ecosurv_filepath)
self.ecosurv = pd.read_csv(
ecosurv_filepath,
encoding="cp437"
)
self.ecosurv = pd.read_csv(ecosurv_filepath, encoding="cp437")
landlords = self.ecosurv["Landlord"].value_counts().reset_index(drop=False)
landlord_references = landlords[
@ -2186,6 +2241,11 @@ class AssetList:
self.ecosurv["Landlord"].isin(landlord_references["Landlord"].values)
]
if landlords_to_ignore is not None:
landlord_ecosurv_data = landlord_ecosurv_data[
~landlord_ecosurv_data["Landlord"].isin(landlords_to_ignore)
]
# Try and match to asset list
matched = []
unmatched = []
@ -2247,6 +2307,11 @@ class AssetList:
# We now match
matched = pd.DataFrame(matched)
# We'll possibly have duplicates here, where properties have been sold twice. Ww de-dupe
if matched[self.STANDARD_LANDLORD_PROPERTY_ID].duplicated().sum():
# It doesn't matter too much which record we take
matched = matched.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID])
self.standardised_asset_list = self.standardised_asset_list.merge(
matched,
how="left",
@ -2260,46 +2325,82 @@ class AssetList:
def flag_outcomes(
self,
outcomes_filepath,
outcomes_filepaths,
outcomes_sheetname,
outcomes_address,
outcomes_postcode,
outcomes_houseno,
outcomes_id
):
if outcomes_filepath is None:
if not outcomes_filepaths:
return
self.outcomes = pd.read_excel(outcomes_filepath, sheet_name=outcomes_sheetname)
self.outcomes["row_id"] = self.outcomes.index
if outcomes_houseno is None:
outcomes_houseno = "houseno"
self.outcomes["houseno"] = self.outcomes[outcomes_address].apply(
lambda x: SearchEpc.get_house_number(x, self.outcomes[outcomes_postcode])
)
logger.info("Matching outcomes to asset list")
# Merge the outcomes onto the asset list - we check we're able to match sufficiently well
self.outcomes = []
outcomes_no_match = []
lookup = []
nomatch = []
for _, x in tqdm(self.outcomes.iterrows(), total=len(self.outcomes)):
for idx, outcomes_filepath in enumerate(outcomes_filepaths):
outcomes = pd.read_excel(outcomes_filepath, sheet_name=outcomes_sheetname[idx])
outcomes["row_id"] = outcomes.index
if pd.isnull(x[outcomes_address]):
continue
if outcomes_houseno[idx] is None:
outcomes_houseno = "houseno"
outcomes["houseno"] = outcomes[outcomes_address[idx]].apply(
lambda x: SearchEpc.get_house_number(x, outcomes[outcomes_postcode])
)
# Check if we have an id
oid = x[outcomes_id] if outcomes_id is not None else None
# We handle an edge case that occured for LHP
if "Notes / Outcomes" in outcomes.columns and "Outcome" not in outcomes.columns:
# We use the re-mapper to handle this:
outcomes["Notes / Outcomes"] = outcomes["Notes / Outcomes"].str.strip()
values_to_remap = outcomes["Notes / Outcomes"].unique()
# We want to map this to our standardised list of property types we're interested in
remapper = DataRemapper(
standard_values=outcomes_mappings.outcomes_values, standard_map=outcomes_mappings.outcomes_map
)
remap_dictionary = remapper.standardize_list(values_to_remap=values_to_remap.tolist())
# Perform the remap
outcomes["Outcome"] = outcomes["Notes / Outcomes"].map(remap_dictionary)
outcomes["Outcome"] = outcomes["Outcome"].str.lower()
logger.info("Matching outcomes to asset list")
# Merge the outcomes onto the asset list - we check we're able to match sufficiently well
lookup_i = []
nomatch_i = []
for _, x in tqdm(outcomes.iterrows(), total=len(outcomes)):
if pd.isnull(x[outcomes_address[idx]]) or not x[outcomes_address[idx]]:
continue
# Check if we have an id
oid = x[outcomes_id[idx]] if outcomes_id[idx] is not None else None
if oid is not None:
matched = self.standardised_asset_list[
(self.standardised_asset_list[
self.STANDARD_LANDLORD_PROPERTY_ID
].str.strip() == oid)
]
if matched.shape[0] == 1:
lookup_i.append(
{
"row_id": x["row_id"],
self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0]
}
)
continue
address_clean = x[outcomes_address[idx]].lower().replace(",", "").replace(" ", " ")
if oid is not None:
matched = self.standardised_asset_list[
(self.standardised_asset_list[
self.STANDARD_LANDLORD_PROPERTY_ID
].str.strip() == oid)
self.STANDARD_FULL_ADDRESS
].str.lower().str.replace(",", "").str.replace(" ", " ") == address_clean)
]
if matched.shape[0] == 1:
lookup.append(
lookup_i.append(
{
"row_id": x["row_id"],
self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0]
@ -2307,65 +2408,65 @@ class AssetList:
)
continue
address_clean = x[outcomes_address].lower().replace(",", "").replace(" ", " ")
self.outcomes["Outcome"] = self.outcomes["Outcome"].str.lower()
matched = self.standardised_asset_list[
(self.standardised_asset_list[
self.STANDARD_FULL_ADDRESS
].str.lower().str.replace(",", "").str.replace(" ", " ") == address_clean)
]
if matched.shape[0] == 1:
lookup.append(
{
"row_id": x["row_id"],
self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0]
}
)
continue
matched = self.standardised_asset_list[
(self.standardised_asset_list[self.STANDARD_POSTCODE].str.strip() == x[outcomes_postcode])
].copy()
if not matched.empty:
matched["houseno"] = matched.apply(
lambda x: SearchEpc.get_house_number(
str(x[self.STANDARD_ADDRESS_1]), str(x[self.STANDARD_POSTCODE])
),
axis=1
)
matched = matched[
matched["houseno"].astype(str) == str(x[outcomes_houseno])
]
if matched.shape[0] == 1:
lookup.append(
{
"row_id": x["row_id"],
self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0]
}
matched = self.standardised_asset_list[
(self.standardised_asset_list[self.STANDARD_POSTCODE].str.strip() == x[outcomes_postcode[idx]])
].copy()
if not matched.empty:
matched["houseno"] = matched.apply(
lambda x: SearchEpc.get_house_number(
str(x[self.STANDARD_ADDRESS_1]), str(x[self.STANDARD_POSTCODE])
),
axis=1
)
continue
elif not matched.empty:
# Use levenstein distance to match
matched["address"] = matched[self.STANDARD_ADDRESS_1] + " " + matched[self.STANDARD_POSTCODE]
best_match = process.extractOne(x["Address"], matched[self.STANDARD_FULL_ADDRESS].values)[0]
matched = matched[matched[self.STANDARD_FULL_ADDRESS] == best_match]
lookup.append(
{
"row_id": x["row_id"],
self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0]
}
)
continue
if pd.isnull(x[outcomes_houseno[idx]]):
house_no_to_match = SearchEpc.get_house_number(
str(x[outcomes_address[idx]]), str(x[outcomes_postcode[idx]])
)
if isinstance(house_no_to_match, str):
house_no_to_match = house_no_to_match.lower()
else:
house_no_to_match = str(x[outcomes_houseno[idx]]).strip()
nomatch.append(x["row_id"])
matched = matched[matched["houseno"].astype(str) == house_no_to_match]
if matched.shape[0] == 1:
lookup_i.append(
{
"row_id": x["row_id"],
self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0]
}
)
continue
elif not matched.empty:
# Use levenstein distance to match
matched["address"] = (
matched[self.STANDARD_ADDRESS_1] + " " + matched[self.STANDARD_POSTCODE]
)
self.outcomes_no_match = self.outcomes[self.outcomes["row_id"].isin(nomatch)]
lookup = pd.DataFrame(lookup)
best_match = process.extractOne(
x[outcomes_address[idx]], matched[self.STANDARD_FULL_ADDRESS].values
)[0]
matched = matched[matched[self.STANDARD_FULL_ADDRESS] == best_match]
lookup_i.append(
{
"row_id": x["row_id"],
self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0]
}
)
continue
nomatch_i.append(x["row_id"])
outcomes_no_match_i = outcomes[outcomes["row_id"].isin(nomatch_i)]
lookup_i = pd.DataFrame(lookup_i)
outcomes_no_match.append(outcomes_no_match_i)
lookup.append(lookup_i)
self.outcomes.append(outcomes)
lookup = pd.concat(lookup)
self.outcomes_no_match = pd.concat(outcomes_no_match)
self.outcomes = pd.concat(self.outcomes)
if lookup.empty:
return
@ -2376,10 +2477,21 @@ class AssetList:
# that the surveyor had a detailed explanation as to why they couldn't gain access so if this has
# happened multiple times, in this case we judge that the work may not be viable
date_col = "Week Commencing" if "Week Commencing" in self.outcomes else "Survey Date"
if "Week Commencing" in self.outcomes.columns:
date_col = "Week Commencing"
elif "Survey Date" in self.outcomes.columns:
date_col = "Survey Date"
elif "Date letters sent" in self.outcomes.columns:
date_col = "Date letters sent"
elif "Date Letter sent" in self.outcomes.columns:
date_col = "Date Letter sent"
else:
raise NotImplementedError("Invalid date in outcomes - implement me")
notes_col = "Notes" if "Notes" in outcomes.columns else "Notes / Outcomes"
lookup = lookup.merge(
self.outcomes[["row_id", "Outcome", "Notes", date_col]], how="left", on="row_id"
self.outcomes[["row_id", "Outcome", notes_col, date_col]], how="left", on="row_id"
)
visit_counts = (
@ -2390,11 +2502,36 @@ class AssetList:
.sort_values("visit_count", ascending=False)
)
def extract_date(s):
if isinstance(s, str):
match = re.search(r"(\d{2}\.\d{2}\.\d{4})", s)
if match:
return pd.to_datetime(match.group(1), format="%d.%m.%Y", errors="coerce")
return pd.NaT
lookup['parsed_date'] = lookup[date_col].apply(extract_date)
def get_latest_note(group):
surveyed = group[group['Outcome'] == 'surveyed']
if not surveyed.empty:
return surveyed.sort_values('parsed_date', ascending=False).iloc[0]
else:
return group.sort_values('parsed_date', ascending=False).iloc[0]
latest_note = (
lookup.groupby('domna_property_id', group_keys=False).
apply(get_latest_note).
reset_index(drop=True)
)
latest_note = latest_note[["domna_property_id", notes_col]]
pivot_df = lookup.groupby(["domna_property_id", "Outcome"]).size().unstack(fill_value=0).reset_index()
pivot_df = pivot_df.merge(
visit_counts, how="left", on="domna_property_id"
)
# We want the latest note
if pivot_df[self.DOMNA_PROPERTY_ID].duplicated().sum():
raise Exception("We have duplicated property IDs in the outcomes data")
@ -2406,6 +2543,14 @@ class AssetList:
self.standardised_asset_list = self.standardised_asset_list.merge(
pivot_df, how="left", left_on=self.DOMNA_PROPERTY_ID, right_on="domna_property_id"
)
# Merge the latest note
self.standardised_asset_list = self.standardised_asset_list.merge(
latest_note.rename(columns={notes_col: "Latest Route March Note"}),
how="left", left_on=self.DOMNA_PROPERTY_ID, right_on="domna_property_id"
)
if self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated().sum():
raise ValueError("Duplicates appreared - something went wrong")
self.outcomes = self.outcomes.sort_values("domna_property_id", ascending=False)
@ -2432,36 +2577,43 @@ class AssetList:
# Strip columns
master_data.columns = [c.strip() for c in master_data.columns]
master_data.columns = [re.sub(r'\s+', ' ', c) for c in master_data.columns]
# Drop any unnamed columns
unnamed_columns = [c for c in master_data.columns if "Unnamed:" in c]
master_data = master_data.drop(columns=unnamed_columns)
if not id_map.empty:
master_data = master_data.merge(
id_map, how="left", on=['NO.', 'Street / Block Name', 'Post Code']
)
install_col = (
"INSTALLED OR CANCELLED" if "INSTALLED OR CANCELLED" in master_data.columns
else "INSTALL / CANCELLATION DATE"
)
if "INSTALLED OR CANCELLED" in master_data.columns:
install_col = "INSTALLED OR CANCELLED"
elif "INSTALL / CANCELLATION DATE" in master_data.columns:
install_col = "INSTALL / CANCELLATION DATE"
elif 'INSTALL/ CANCELLATION DATE' in master_data.columns:
install_col = 'INSTALL/ CANCELLATION DATE'
else:
raise ValueError("No install or cancellation date")
submission_col = (
"SUBMISSION DATE" if "SUBMISSION DATE" in master_data.columns else "SUBMISSION DATE TO INSTALLERS"
)
if "UPRN" in master_data.columns:
# We just need to check if any were cancelled
master_to_append = master_data[
["UPRN", install_col, submission_col]
].rename(
columns={
"UPRN": self.STANDARD_LANDLORD_PROPERTY_ID,
install_col: "survey_status",
submission_col: "submission_date"
}
)
master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel")
master_surveyed.append(master_to_append)
continue
# if "UPRN" in master_data.columns:
# # We just need to check if any were cancelled
# master_to_append = master_data[
# ["UPRN", install_col, submission_col]
# ].rename(
# columns={
# "UPRN": self.STANDARD_LANDLORD_PROPERTY_ID,
# install_col: "survey_status",
# submission_col: "submission_date"
# }
# )
# master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel")
#
# master_surveyed.append(master_to_append)
# continue
master_data["row_id"] = master_data.index
@ -2472,27 +2624,84 @@ class AssetList:
axis=1
)
scheme_col = (
"AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" if
"AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" in master_data.columns else "AFFORDABLE WARMTH"
)
postcode_col = "POSTCODE" if "POSTCODE" in master_data.columns else "Post Code"
house_no_col = 'NO.' if 'NO.' in master_data.columns else "NO"
property_type_col = (
"PROPERTY TYPE As per table emailed" if
"PROPERTY TYPE As per table emailed" in
master_data.columns else "PROPERTY TYPE As per table emailed"
)
measure_mix_col = "MEASURE COMBO"
# Otherwise, we need to match algorithmically
has_property_id = "UPRN" in master_data.columns
logger.info("Matching master data to asset list")
matched = []
unmatched = []
for _, row in tqdm(master_data.iterrows(), total=len(master_data)):
original_house_no = row[house_no_col]
original_street = row["Street / Block Name"]
original_postcode = row[postcode_col]
if pd.isnull(row[postcode_col]):
continue
# if has_property_id:
# submission_uprn = row["UPRN"]
#
# if not pd.isnull(submission_uprn):
# df = self.standardised_asset_list[
# self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] == submission_uprn
# ]
postcode_no_space = row[postcode_col].strip().replace(" ", "").lower()
df = self.standardised_asset_list[
(
self.standardised_asset_list[self.STANDARD_POSTCODE].str.strip().str.lower().str.replace(" ",
"")
== postcode_no_space
self.standardised_asset_list[self.STANDARD_POSTCODE]
.str.strip().str.lower().str.replace(" ", "") == postcode_no_space
)
]
house_no = row[house_no_col]
if isinstance(house_no, (float, int)):
house_no = str(int(house_no))
if house_no not in df["house_no"].values:
# Handle postcode errors
postal_region = row[postcode_col].split(" ")[0].lower()
df = self.standardised_asset_list[
(
self.standardised_asset_list[self.STANDARD_POSTCODE]
.str.strip().str.lower().str.startswith(postal_region)
)
]
if house_no not in df["house_no"].values:
unmatched.append(row["row_id"])
continue
df = df[df["house_no"] == house_no]
if df.shape[0] > 1:
df = df[
df[self.STANDARD_FULL_ADDRESS].str.lower().str.contains(row["Street / Block Name"].lower())
]
if df.shape[0] == 0:
unmatched.append(row["row_id"])
continue
matched.append(
{
"row_id": row["row_id"],
"original_house_no": original_house_no,
"original_street": original_street,
"original_postcode": original_postcode,
self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0],
}
)
if house_no in df["house_no"].values:
df = df[df["house_no"] == house_no]
@ -2528,14 +2737,12 @@ class AssetList:
]
if any(
df[self.STANDARD_PROPERTY_TYPE].str.contains(
row["PROPERTY TYPE As per table emailed"].split(" ")[-1].lower()
)
df[self.STANDARD_PROPERTY_TYPE].str.contains(row[property_type_col].split(" ")[-1].lower())
):
# We ignore "block of flats" entries
df = df[
df[self.STANDARD_PROPERTY_TYPE].str.contains(
row["PROPERTY TYPE As per table emailed"].split(" ")[-1].lower()
row[property_type_col].split(" ")[-1].lower()
) & (df[self.STANDARD_PROPERTY_TYPE] != "block of flats")
]
@ -2545,6 +2752,9 @@ class AssetList:
matched.append(
{
"row_id": row["row_id"],
"original_house_no": original_house_no,
"original_street": original_street,
"original_postcode": original_postcode,
self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0],
}
)
@ -2553,10 +2763,12 @@ class AssetList:
# We match the "UPRN" which is the landlords ID, onto the master sheet
matched = pd.DataFrame(matched)
master_to_append = master_data[["row_id", install_col, submission_col]].merge(
master_to_append = master_data[[scheme_col, "row_id", install_col, submission_col, measure_mix_col]].merge(
matched, how="left", on="row_id"
).rename(
columns={
scheme_col: "funding_scheme",
measure_mix_col: "measure_mix",
install_col: "survey_status",
submission_col: "submission_date"
}
@ -2567,10 +2779,6 @@ class AssetList:
master_data["row_id"].isin(unmatched)
]
scheme_col = (
"AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" if
"AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" in master_data.columns else "AFFORDABLE WARMTH"
)
# The columns are massively different - we take just a few
unmatched_df = unmatched_df[
[

View file

@ -62,352 +62,126 @@ def app():
Property UPRN
"""
# TODO:
# For cavity work:
# - Flag any entries that have a different wall type between non-intrusive data against EPC
# - Worth double checking entries that have a difference in wall construction
# - Look at anything that is flagged as an empty cavity but the EPC data says its a filled cavity
# - Look at the current EPC scores - Anything that is C75 or above, especially if its assumed no insulation
# - By postcode, we can try and deduce if all of the addresses are a flats and then estimate if 50% of the flats
# are less than C75
# - Flag anything pre SAP2012
# - Flag anything over 5 years old
# - Look at year built vs age band
#
# For Solar:
# - Discount any that have solar PV - based on non-intrusives and from the inspections team
# - In the heating, discount anything that isnt ashp, ghsp, hhrs, electric storage - possibly homes with
# electric room heaters but it might need to be an EPC E
# - Fabric - check the floor, wall and roof:
# - Filled or empty cavity is good
# - Insulated solid/timber/system built is good
# - SCIS/CEG needs solid floors
# - JJC dont care
# - Anything with a loft 200 or below
# - Anything C75 and above wont qualify
# - Insulated loft = 200mm
# - We want: fully insulated property (all wall types), EPC D or below (floors should be solid)
# - Or the insulation required is loft/cavity (floors should be solid)
# Torus
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Torus/Phase 1"
data_filename = "Torus Property Asset List - Phase 1.xlsx"
sheet_name = "TORUS"
# Thurrock
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thurrock"
data_filename = "THURROCK COUNCIL.xlsx"
sheet_name = "Assets"
postcode_column = 'Postcode'
fulladdress_column = None
address1_column = "AddressLine1"
address1_method = None
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
missing_postcodes_method = None
landlord_year_built = "Property Age"
landlord_os_uprn = "NatUPRN"
landlord_property_type = "Property Type"
landlord_built_form = "Built Form"
landlord_wall_construction = "Wall Construction"
landlord_roof_construction = "Roof Construction"
landlord_heating_system = "Space Heating Source"
landlord_existing_pv = "Low Carbon Technology (Solar PV)"
landlord_property_id = "UPRN"
landlord_sap = "SAP Score"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
outcomes_address = None
master_filepaths = []
master_to_asset_list_filepath = None
phase = True
# Southern Midlands
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/Midlands Properties - Apr 2025"
data_filename = "Southern Housing Midlands Property List - combined.xlsx"
sheet_name = "Sheet 1"
postcode_column = 'Post Code'
fulladdress_column = "Address"
fulladdress_column = "Full Address"
address1_column = None
address1_method = "house_number_extraction"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = "Age_1"
landlord_year_built = "Construction Date"
landlord_os_uprn = None
landlord_property_type = "Prop_Type"
landlord_built_form = "Prop_Type"
landlord_wall_construction = "Walls_P"
landlord_heating_system = "Heating System"
landlord_property_type = "Property Type"
landlord_built_form = "Property Subtype"
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = "Main Heating Type"
landlord_existing_pv = None
landlord_property_id = "AssetID"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
outcomes_address = None
landlord_property_id = "Property Reference"
landlord_sap = None
outcomes_filename = []
outcomes_sheetname = []
outcomes_postcode = []
outcomes_houseno = []
outcomes_id = []
outcomes_address = []
master_filepaths = []
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = None
# PFP London
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/London"
data_filename = "PFP AREAS SURROUNDING LONDON - JAY, RUTH & LANE.xlsx"
sheet_name = "PFP SURROUNDING LONDON"
# Medway
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway"
data_filename = "MEDWAY Asset List.xlsx"
sheet_name = "Asset list"
postcode_column = 'Postcode'
fulladdress_column = None
address1_column = "AddressLine1"
address1_column = "House Number"
address1_method = None
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
address_cols_to_concat = ["House Number", "Street 1"]
missing_postcodes_method = None
landlord_year_built = None
landlord_year_built = "Year Built"
landlord_os_uprn = None
landlord_property_type = "Archetype (PFP)"
landlord_built_form = "Archetype (PFP)"
landlord_property_type = "Property Type - Academy"
landlord_built_form = "Property Type - Academy"
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "Uprn"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
landlord_property_id = "Row ID"
landlord_sap = None
outcomes_filename = []
outcomes_sheetname = []
outcomes_postcode = []
outcomes_houseno = []
outcomes_id = []
outcomes_address = []
master_filepaths = []
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = None
# PFP North-West
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/North-West"
data_filename = "Places for People NORTH WEST - INSPECTIONS MASTER - UPDATE.xlsx"
sheet_name = "CHECKED"
# MHS
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS"
data_filename = "MHS HOMES (Full Asset List) - for programme build.xlsx"
sheet_name = "Sheet1"
postcode_column = 'Postcode'
fulladdress_column = None
address1_column = "AddressLine1"
address1_method = None
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
fulladdress_column = "FullAddress"
address1_column = None
address1_method = "house_number_extraction"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = None
landlord_year_built = "BuiltInYear"
landlord_os_uprn = None
landlord_property_type = "Archetype (PFP)"
landlord_built_form = "Archetype (PFP)"
landlord_property_type = "AssetType"
landlord_built_form = "PropertyType"
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "Uprn"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
master_filepaths = []
master_to_asset_list_filepath = None
# PFP North-East
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/North-East"
data_filename = "Places for People NORTH EAST - INSPECTIONS MASTER.xlsx"
sheet_name = "CHECKED"
postcode_column = 'Postcode'
fulladdress_column = None
address1_column = "AddressLine1"
address1_method = None
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "Archetype (PFP)"
landlord_built_form = "Archetype (PFP)"
landlord_wall_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "Uprn"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
master_filepaths = []
master_to_asset_list_filepath = None
# PFP East
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/East"
data_filename = "PFP EAST - Master - DN LN NG NR PE POSTCODES.xlsx"
sheet_name = "PFP EAST"
postcode_column = 'Postcode'
fulladdress_column = None
address1_column = "AddressLine1"
address1_method = None
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "Archetype (PFP)"
landlord_built_form = "Archetype (PFP)"
landlord_wall_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "Uprn"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
master_filepaths = []
master_to_asset_list_filepath = None
# Wates
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Wates - "
data_filename = "ECO 4 Wates.xlsx"
sheet_name = "Roadmap Homes"
postcode_column = 'Postcode'
fulladdress_column = None
address1_column = "Address Line 1"
address1_method = None
address_cols_to_concat = ["Address Line 1", "Address Line 2", "Address Line 3"]
missing_postcodes_method = None
landlord_year_built = "Build Year"
landlord_os_uprn = None
landlord_property_type = "Archetype"
landlord_built_form = "Archetype"
landlord_wall_construction = "Wall"
landlord_heating_system = "Heating Type"
landlord_existing_pv = None
landlord_property_id = "UPRN"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
landlord_sap = None
outcomes_filename = []
outcomes_sheetname = []
outcomes_postcode = []
outcomes_houseno = []
outcomes_id = []
outcomes_address = []
master_filepaths = []
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = None
# Ealing
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Programme data - 04032025"
# data_filename = "Ealing BC - Property Plus Tenure 25.02.2025.xlsx"
# sheet_name = "IGNORE - FULL MAIN"
# postcode_column = 'Postcode'
# Southern Midlands
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/Midlands Properties - Apr 2025"
# data_filename = "Southern Housing Midlands Property List - combined.xlsx"
# sheet_name = "Sheet 1"
# postcode_column = 'Post Code'
# fulladdress_column = "Address"
# address1_column = None
# address1_method = "first_word"
# address1_method = "house_number_extraction"
# address_cols_to_concat = []
# missing_postcodes_method = None
# landlord_year_built = "Year Built"
# landlord_year_built = "Age_1"
# landlord_os_uprn = None
# landlord_property_type = "Property Type Code"
# landlord_wall_construction = None
# landlord_heating_system = None
# landlord_property_type = "Prop_Type"
# landlord_built_form = "Prop_Type"
# landlord_wall_construction = "Walls_P"
# landlord_heating_system = "Heating System"
# landlord_existing_pv = None
# landlord_property_id = "Property ref"
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester"
# data_filename = "Warmfront data- Colchester Borough Homes (Complete).xlsx"
# sheet_name = "Sheet1"
# postcode_column = 'Full Address.1'
# fulladdress_column = "Full Address"
# address1_column = None
# address1_method = "first_word"
# address_cols_to_concat = []
# missing_postcodes_method = None
# landlord_year_built = "Build Date"
# landlord_os_uprn = None
# landlord_property_type = "Property Type"
# landlord_wall_construction = "Wallinsul"
# landlord_heating_system = "HeatSorc"
# landlord_existing_pv = None
# landlord_property_id = "Property Reference"
# landlord_property_id = "AssetID"
# outcomes_filename = None
# outcomes_sheetname = None
# outcomes_postcode = None
# outcomes_houseno = None
# outcomes_id = None
# outcomes_address = None
# master_filepaths = []
# master_to_asset_list_filepath = None
# For Westward
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Westward"
data_filename = "WESTWARD - completed list - 20.03.2025.xlsx"
sheet_name = "Sheet1"
postcode_column = "WFT EDIT Postcode"
fulladdress_column = "Address"
address1_column = None
address1_method = "house_number_extraction"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = "Build date"
landlord_os_uprn = "UPRN"
landlord_property_type = "Location type"
landlord_built_form = None
landlord_wall_construction = "Wall Construction (EPC)"
landlord_heating_system = "Heat Source"
landlord_existing_pv = "PV (Y/N)"
landlord_property_id = "Place ref"
landlord_roof_construction = None
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
master_filepaths = []
master_to_asset_list_filepath = None
outcomes_id = None
outcomes_address = None
phase = False
ecosurv_landlords = None
# For ACIS - programme re-build
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/ACIS Full Programme Review March 2025"
# data_filename = "ACIS asset list.xlsx"
# sheet_name = "Assets"
# address1_column = "House No"
# postcode_column = "Postcode"
# landlord_property_id = "UPRN"
# fulladdress_column = None
# address_cols_to_concat = ["House No", "Street", "Town"]
# missing_postcodes_method = None
# address1_method = None
# landlord_year_built = "YEAR BUILT"
# landlord_os_uprn = None
# landlord_property_type = "Property type"
# landlord_built_form = None
# landlord_wall_construction = "Wall Constuction"
# landlord_roof_construction = None
# landlord_sap = None
# landlord_heating_system = "Heating"
# landlord_existing_pv = None
# outcomes_filename = "ACIS Group - 25.11.2024 - outcomes.xlsx"
# outcomes_sheetname = "Feedback"
# outcomes_postcode = "Postcode"
# outcomes_address = "Address"
# outcomes_houseno = "No"
# outcomes_id = None
# master_filepaths = [
# os.path.join(data_folder, "ECO 3 -Table 1.csv"),
# os.path.join(data_folder, "ECO 4 -Table 1.csv"),
# ]
# master_to_asset_list_filepath = None
# phase = False
# ecosurv_landlords = None
# For plus dane
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane"
data_filename = "PLUS DANE Asset List - for analysis.xlsx"
sheet_name = "Asset List"
address1_column = " Address"
postcode_column = " Postcode"
landlord_property_id = "UPRN"
fulladdress_column = " Address"
address_cols_to_concat = []
missing_postcodes_method = None
address1_method = None
landlord_year_built = "Property Age"
landlord_os_uprn = None
landlord_property_type = "Property Type"
landlord_wall_construction = "Landlord Wall Full"
landlord_heating_system = "Landlord Heating"
landlord_existing_pv = None
outcomes_filename = "plus dane outcomes.xlsx"
outcomes_sheetname = "EVERYTHING"
outcomes_postcode = "Post Code"
outcomes_houseno = "Numb."
master_filepaths = [
os.path.join(data_folder, "JJC Rolling Master.csv"),
os.path.join(data_folder, "SCIS Rolling Master.csv"),
]
master_to_asset_list_filepath = os.path.join(data_folder, "surveys_to_assets.csv")
# Maps addresses to uprn in problematic cases
manual_uprn_map = {}
@ -482,7 +256,7 @@ def app():
# We now flag properties that have been treated under existing programmes
asset_list.flag_outcomes(
outcomes_filepath=os.path.join(data_folder, outcomes_filename) if outcomes_filename else None,
outcomes_filepaths=outcomes_filename,
outcomes_sheetname=outcomes_sheetname,
outcomes_address=outcomes_address,
outcomes_postcode=outcomes_postcode,
@ -504,7 +278,7 @@ def app():
epc_api_only = False
force_retrieve_data = False
skip = None # Used to skip already completed chunks
chunk_size = 1000
chunk_size = 5000
filename = "Chunk {i}.csv"
download_folder = os.path.join(data_folder, "Chunks")
if not os.path.exists(download_folder):
@ -611,6 +385,12 @@ def app():
transformed_data.append(row_data)
transformed_df = pd.DataFrame(transformed_data)
for col in [
"Floor insulation (solid floor)",
"Floor insulation", "Floor insulation (suspended floor)"
]:
if col not in transformed_df.columns:
transformed_df[col] = False
transformed_df = transformed_df[
[
asset_list.DOMNA_PROPERTY_ID, "Floor insulation (solid floor)",

View file

@ -5,7 +5,7 @@ STANDARD_BUILT_FORMS = {
# Houses
"end-terrace", "semi-detached", "detached", "mid-terrace",
# Flats
"ground floor", "mid-floor", "top-floor", "basement"
"ground floor", "mid-floor", "top-floor", "basement", "low rise", "high rise",
}
BUILT_FORM_MAPPINGS = {
@ -212,5 +212,123 @@ BUILT_FORM_MAPPINGS = {
'5 Ext. Wall Flat': 'unknown',
'Unknown': 'unknown',
'Enclosed mid-terrace': 'mid-terrace',
'Enclosed end-terrace': 'end-terrace'
'Enclosed end-terrace': 'end-terrace',
'House GROUND FLOOR': 'ground floor',
'Flat? GROUND FLOOR': 'ground floor',
'House SD SEMI DETACHED': 'semi-detached',
'House SEMI DETACHED': 'semi-detached',
'Flat GROUND FLOOR': 'ground floor',
'': 'unknown',
'Flat SEVENTH FLOOR': 'mid-floor',
'House D': 'detached',
'House ET': 'end-terrace',
'House SD Homeless Unit': 'semi-detached',
'House MT Homeless Unit': 'mid-terrace',
'Bungalow ET': 'end-terrace',
'Bungalow D': 'detached',
'House SD': 'semi-detached',
'Bungalow Sheltered Accomodation': 'unknown',
'House. SD': 'semi-detached',
'Flat FIRST FLOOR MAISONETTE': 'ground floor',
'Bungalow SD': 'semi-detached',
'Flat FIRST FLOOR': 'ground floor',
'Flat Sheltered Accomodation': 'unknown',
'Flat SIXTH FLOOR': 'mid-floor',
'Flat EIGHTH FLOOR': 'mid-floor',
'Flat FOURTH FLOOR': 'mid-floor',
'Flat Homeless Unit': 'unknown',
'Bungalow MT': 'mid-terrace',
'Bungalow Homeless Unit': 'unknown',
'House MT': 'mid-terrace',
'Flat FIFTH FLOOR': 'mid-floor',
'Flat NINTH FLOOR': 'mid-floor',
'House SD FIRST FLOOR': 'semi-detached',
'Bungalow Supported housing': 'unknown',
'Flat THIRD FLOOR': 'mid-floor',
'Flat SECOND FLOOR': 'mid-floor',
'House Homeless Unit': 'unknown',
'Flat ELEVENTH FLOOR': 'mid-floor',
'Flat TENTH FLOOR': 'mid-floor',
'House. MT': 'mid-terrace',
'Ground Floor Bedsit': 'ground floor',
'Mid Terrace With Passage': 'mid-terrace',
'End Of Terrace': 'end-terrace',
'Ground Floor Maisonette': 'ground floor',
'First Floor Bedsit': 'mid-floor',
'GROUND FLOOR BEDSIT': 'ground floor',
'GROUND FLOOR FLAT': 'ground floor',
'BUNGALOW': 'unknown',
'HOUSE 1 LIVING ROOM': 'unknown',
'MAISONETTE OVER SHOP': 'unknown',
'SECOND FLOOR FLAT': 'mid-floor',
'FIRST FLOOR FLAT': 'ground floor',
'GROUND FL MAISONETTE': 'ground floor',
'HOUSE 2 LIVING ROOMS': 'unknown',
'FLAT OVER SHOP': 'unknown',
'4 Bed Detached House': 'detached',
'2 Bed Detached House': 'detached',
'3 Bed Detached Bungalow': 'detached',
'1 Bed Semi-Detached House': 'semi-detached',
'2 Bed Semi-Detached House': 'semi-detached',
'2 Bed Detached Bungalow': 'detached',
'1 Bed Mid Terrace Bungalow': 'mid-terrace',
'4 Bed Semi-Detached Bungalow': 'semi-detached',
'3 Bed Mid Terrace Bungalow': 'mid-terrace',
'3 Bed Semi-Detached Bungalow': 'semi-detached',
'3 Bed Mid Terrace House': 'mid-terrace',
'2 Bed Mid Terrace House': 'mid-terrace',
'3 Bed Detached House': 'detached',
'2 Bed Semi-Detached Bungalow': 'semi-detached',
'5 Bed Mid Terrace House': 'mid-terrace',
'2 Bed Mid Terrace Bungalow': 'mid-terrace',
'3 Bed Semi-Detached House': 'semi-detached',
'1 Bed Semi-Detached Bungalow': 'semi-detached',
'4 Bed Mid Terrace House': 'mid-terrace',
'1 Bed Detached Bungalow': 'detached',
'5 Bed Semi-Detached House': 'semi-detached',
'6 Bed Detached House': 'detached',
'1 Bed Mid Terrace House': 'mid-terrace',
'4 Bed Semi-Detached House': 'semi-detached',
'TBA': 'unknown',
'1 Bed EOT House': 'end-terrace',
'3 Bed Flat': 'unknown',
'5 Bed EOT House': 'end-terrace',
'1 Bed EOT Bungalow': 'end-terrace',
'2 Bed EOT House': 'end-terrace',
'1 Bed Studio Flat': 'unknown',
'3 Bed Maison': 'unknown',
'Commercial Letting': 'unknown',
'4 Bed Maison': 'unknown',
'2 Bed Flat': 'unknown',
'3 Bed EOT House': 'end-terrace',
'2 Bed Maison': 'unknown',
'4 Bed EOT House': 'end-terrace',
'1 Bed Flat': 'unknown',
'3 Bed EOT Bungalow': 'end-terrace',
'1 Bed Maison': 'unknown',
'2 Bed EOT Bungalow': 'end-terrace',
'Bungalow detached': 'detached',
'Bungalow semi detached': 'semi-detached',
'Sheltered bungalow semi detached': 'semi-detached',
'Bedsit bungalow semi detached': 'semi-detached',
'Semi detached house': 'semi-detached',
'Bedsit bungalow terraced': 'mid-terrace', 'Terraced house': 'mid-terrace',
'Sheltered flat': 'unknown',
'APD Bungalow': 'unknown',
'Flat with partition': 'unknown',
'APD flat': 'unknown',
'Sheltered warden flat': 'unknown',
'Sheltered bedsit': 'unknown',
'Sheltered bungalow terraced': 'mid-terrace',
'Block': 'unknown',
'Bungalow terraced': 'mid-terrace',
'Maisonette flat': 'unknown',
'Sheltered bedsit disabled': 'unknown',
'Bedsit Flat': 'unknown',
'Low Rise': 'low rise',
'Upper Floor': 'top-floor',
'High Rise': 'high rise',
}

View file

@ -5,7 +5,7 @@ STANDARD_HEATING_SYSTEMS = {
"gas boiler, radiators",
"electric storage heaters",
"district heating",
"communal heating"
"communal heating",
"gas condensing boiler",
"oil boiler",
"gas condensing combi",
@ -32,7 +32,7 @@ STANDARD_HEATING_SYSTEMS = {
HEATING_MAPPINGS = {
"Combi - GAS": "gas combi boiler",
"E7 Storage Heaters": "high heat retention storage heaters",
"E7 Storage Heaters": "electric storage heaters",
"District heating system": "district heating",
"Condensing Boiler - GAS": "gas condensing boiler",
"Boiler Oil/other": "oil boiler",
@ -50,7 +50,7 @@ HEATING_MAPPINGS = {
"Gas fire": "other",
"Backboiler - Solid fuel": "other",
'combi - gas': 'gas combi boiler',
'e7 storage heaters': 'high heat retention storage heaters',
'e7 storage heaters': 'electric storage heaters',
'district heating system': 'district heating',
'condensing boiler - gas': 'gas condensing boiler',
'boiler oil/other': 'oil boiler',
@ -220,5 +220,76 @@ HEATING_MAPPINGS = {
'Boiler/ underfloor': 'electric underfloor',
'Storage system': "non-electric underfloor",
'BOILER': 'gas combi boiler',
'SPACE_HEATER': 'room heaters'
'SPACE_HEATER': 'room heaters',
'AIR': 'air source heat pump',
'FSOL': 'solid fuel',
'PDEV': 'unknown',
'GASF': 'gas boiler, radiators',
'CONO': 'no heating',
'FELE HRSH': 'high heat retention storage heaters',
'FOIL': 'oil boiler',
'FDEV': 'unknown',
'FNON': 'non-electric underfloor',
'FGAS': 'gas combi boiler',
'FELE': 'electric fuel',
'GRNE': 'ground source heat pump',
'High Heat Storage Heaters': 'high heat retention storage heaters',
'Electric Radiators': 'electric radiators',
'Electric Air Source Heat Pump': 'air source heat pump',
'Gas Combi Condensing Boiler': 'gas condensing combi',
'Electric Boiler Heating': 'electric boiler',
'Solid Fuel Open Back Boiler Heating': 'solid fuel',
'Solid Fuel Closed Back Boiler Heating': 'solid fuel',
'Oil Boiler': 'oil boiler',
'Electric Storage Heaters': 'electric storage heaters',
'Gas Combi Boiler Heating': 'gas combi boiler',
'Electric NIBE Heating System': 'air source heat pump',
'Gas Back Boiler': 'gas boiler, radiators',
'Electric Gel/Oil Filled Radiators': 'electric radiators',
'No Information': 'unknown',
'Oil Combination Boiler Heating': 'oil boiler',
'Electric DSR Heat Retention Radiators': 'high heat retention storage heaters',
'Communal Heating System': 'communal heating',
'Description': 'unknown',
'Oil Combi Condensing Boiler Heating': 'oil boiler',
'Gas Combi Condensing Boiler Heating': 'gas condensing combi',
'Electric Warm Air Heating': 'electric fuel',
'Gas System Boiler Heating': 'gas boiler, radiators',
'Gas Back Boiler Heating': 'gas boiler, radiators',
'Electric Gel/Oil Fllled Radiators': 'electric radiators',
'Gas Condensing Boiler Heating': 'gas condensing combi',
'Gas Combi Condensing Boiler Heatiner': 'gas condensing combi',
'Oil Standard Boiler Heating': 'oil boiler',
'Oil Condensing Boiler Heating': 'oil boiler',
'Electric ASHP': 'air source heat pump',
'Modern Slimline Storage Heaters': 'electric storage heaters',
# These are boiler makes from Unitas
'UNKNOWN': 'unknown',
'IDEAL': 'gas combi boiler',
'VAILLANT': 'gas combi boiler',
'THORN': 'gas combi boiler',
'WORCESTER': 'gas combi boiler',
'GLOW WORM': 'gas combi boiler',
'VOKERA': 'gas combi boiler',
'POTTERTON': 'gas combi boiler',
'BAXI SOLO': 'gas combi boiler',
'BAXI BERMUDA': 'gas combi boiler',
'BAXI': 'gas combi boiler',
'Combi Boiler': 'gas combi boiler',
'Air Source Heat Pump': 'air source heat pump',
'Dual Fuel': 'other',
'Regular Boiler': 'gas condensing boiler',
'No Main Heating': 'no heating',
'None (via Communal System)': 'communal heating',
'No Mains Heating': 'no heating',
'Open Fire with Back Boiler': 'solid fuel',
'No Gas Boiler': 'no heating',
'Back Boiler': 'solid fuel',
"This cell has an external reference that can't be shown or edited. Editing this cell will remove the external "
"reference.": 'unknown',
'Communal Heating': 'communal heating',
'No Data': 'unknown',
'Boiler System': 'gas condensing boiler',
}

View file

@ -0,0 +1,231 @@
"""
This script was produced to handle the non-standard outcomes, observed in the LHP outcomes sheet
"""
import numpy as np
outcomes_values = [
"Access Issues", "No Outcome", "Asked for a later date", "Customer Refusal",
"Installer Refusal", "No Answer", "Not Viable", "Surveyed",
"Rescheduled", "Not Knocked", "Void"
]
outcomes_map = {
'Access issues, shed against rear wall. Sent photos to Matt JJC, declined': 'Access Issues',
'NO ANSWER /TICKET LEFT': 'No Answer',
'Looks Void - No Answer': 'No Answer',
'No Answer - they were in - No response to my drop card': 'No Answer', 'No Answer': 'No Answer',
'No Answer - Even they were in - No response to my drop card': 'No Answer', 'no answer': 'No Answer',
'NO ANSWER': 'No Answer', 'No answer': 'No Answer',
np.nan: 'unknown',
'Access Issues Health reasons try another time': 'Access Issues',
'LOFT FULL, CUSTOMER WONT REMOVE': 'Access Issues',
'Failed Appointment - Ivy': 'Access Issues',
'Failed Appointment - Void soon': 'Void',
'Hoarding in loft': 'Access Issues',
'Non Complained - Extension at rear and side': 'Not Viable',
'Said No letter - then texted me I can only do outside but cant come in': 'Customer Refusal',
'Hoarding - unwilling to shift from loft': 'Customer Refusal',
'Overgrown vegatation - Happy for HA to deal with': 'Access Issues',
'No access to side of property': 'Not Viable',
'Very rude': 'Customer Refusal',
'REFUSED ACCESS': 'Customer Refusal',
'SURVEYED': 'Surveyed',
'ELECTRIC ROOM HEATERS. Kieran to check re funding and possible PV?': 'Not Viable',
'SUBMITTED': 'Surveyed',
'2 single storey extensions': 'Not Viable',
'Rebook': 'Rescheduled',
'surveyed': 'Surveyed',
'not intrested': 'Customer Refusal',
'Fixed seating area against rear elevation': 'Not Viable',
"Matt said can't install": 'Installer Refusal',
'Gave excuses to come this and that time and no reponse': 'No Answer',
'NOT KNOCKED': 'Not Knocked',
'VOID PROPERTY': 'Void',
'Glass lean to. JJC declined': 'Installer Refusal',
'Left slip Overgrown vegatation': 'No Answer',
'covid': 'Rescheduled',
'Lean-to on side elevation': 'Not Viable',
'Opted out as moving out': 'Customer Refusal',
'Surveyed': 'Surveyed',
'refused': 'Customer Refusal',
'COVID': 'Rescheduled',
'Said No letter received and didnt answer again': 'No Answer',
'Survey completed': 'Surveyed',
'Loft fully boarded': 'Access Issues',
'Not Available during the day': 'No Answer',
'Conservatory. JJC declined.': 'Installer Refusal',
'Booked for 19.10.23': 'Rescheduled',
'LETTER LEFT': 'No Answer',
'Knocked/lettered': 'No Answer',
'Survey Complete': 'Surveyed',
'Refused by calling office': 'Customer Refusal',
'Extension on rear elevation': 'No Viable',
'Left Slip - Potential access issue with conservatory': 'Access Issues',
'Overgrown vegatation': 'Access Issues',
'Left slip Overgrown Ivy and Hedge': 'No Answer',
'NOT AVAILABLE THIS WEEK': 'No Answer',
'Unwilling to clear loft': 'Access Issues',
'survey complete': 'Surveyed',
'ivy on wall': 'Access Issues',
'not in': 'No Answer',
'Covid shrub very close to building': 'Rescheduled',
'ON HOLIDAY, UNDER 18 IN HOUSE': 'Rescheduled',
'wont do as extention': 'Not Viable',
'IN, WONT ANSWER': 'Customer Refusal',
'Too many plants next to the walls': 'Access Issues',
'obstructions': 'Access Issues',
'Left slip -Wall plant': 'Access Issues',
'On holiday': 'No Answer',
'Failed appointment': 'No Answer',
'LOFT FULLY BOARDED': 'Access Issues',
'ivy and didnt want people inside the house': 'Customer Refusal',
'Partly IWI': 'Not Viable',
'Covid': 'Rescheduled',
'REFUSE TO REMOVE IVY': 'Access Issues',
'Insulated 2 years ago. Carbon bead in walls, 300mm rock wool in loft': 'Not Viable',
'INCONVIENIENT TIME': 'No Answer',
'EXT TO REAR': 'Not Viable',
'Not In': 'No Answer',
'Damp issues.Black mould on walls': 'Access Issues',
'Lean to. JJC declined': 'Installer Refusal',
'DISABLED CHILD / INCONVIENIENT': 'Customer Refusal',
'Plants on wall': 'Access Issues',
'Left Slip': 'No Answer',
'Never answered': 'No Answer',
'SOLAR PV CONNECTED TO MAINS': 'Not Viable',
'Bungalow': 'unknown',
'call back': 'No Answer',
'Message from WFT OFFICE; tenant unavailable this week, no telephone number provided': 'Rescheduled',
'LEAN TO PRESENT': 'Not Viable',
'She said come Tuesday and never answered': 'Rescheduled',
'Sold': 'Surveyed',
'Too much mould and cluttered house': 'Access Issues',
'Overgrown vegatation will call when clear': 'Access Issues',
'LOFT DEC 2013': 'Not Viable',
'Ivy': 'Access Issues',
'Booked for next week': 'Rescheduled',
'empty': 'Void',
'Been told property is empty as tenant has passed away': 'Void',
'Non Complianced - Single Storey Extension to the front and rear': 'Not Viable',
'Going back this week': 'Rescheduled',
'Loft insulated in last few months. Ongoing damp issues in bathroom, black mould up wall': 'Access Issues',
'rear Extension': 'Not Viable',
'DECKING AROUND PROPERTY IN BREACH OF DPC BY 300MM': 'Not Viable',
'Said no letter received': 'Customer Refusal',
'Unwell, not convenient this week': 'Rescheduled',
'IVY on Wall': 'Access Issues',
'REFUSED EXTRACTOR': 'Customer Refusal',
'ON HOLIDAY': 'Rescheduled',
'COVID. Not this week.': 'Rescheduled',
'COVID POSITIVE': 'Rescheduled',
'VOID. Appears to be under refurbishment': 'Void',
'Survey Completed': 'Surveyed',
'INCONVIENIENT': 'Rescheduled',
'Knocked/lettered. 07598 112360': 'No Answer',
'Single skin lean to. JJC declined': 'Installer Refusal',
'DENIES LETER, REFUSED ACCESS': 'Customer Refusal',
'Loft hoard unable to clear': 'Access Issues',
'Left Slip - Look Void': 'Void',
'EXCESSIVE IVY GROWTH, CUSOMER UNABLE TO REMOVE, ELDERLEY': 'Access Issues',
'Refused': 'Customer Refusal',
'REFUSED / INCONVENIENT': 'Customer Refusal',
'AGGRESSIVE DOGS LOOSE IN FRONT GARDEN': 'Access Issues',
'EXCESSIVE IVY': 'Access Issues',
"Won't remove plastic roof": 'Access Issues',
'SURVEY COMPLETED': 'Surveyed',
'VOID. Under refurbishment. Electric storage heating currently removed for refurbishment': 'Void',
'Surveyed ECO4': 'Surveyed',
'after 5.30': 'Rescheduled',
'CUSTOMER IN, WONT ANSWER DOOR': 'No Answer',
'IVY': 'Access Issues',
'Single storey extension on gable': 'Not Viable',
'No answer.': 'No Answer',
'Full extension at rear. Not viable.': 'Not Viable',
'Access issues': 'Access Issues',
'VOID PROPERTY NOW': 'Void',
'Not viable': 'Not Viable',
'Looks like a VOID property': 'Void',
'NOT VIABLE': 'Not Viable',
'No Answer.': 'No Answer',
'Not viable.': 'Not Viable',
'Looks to be void.': 'Void',
'Access issues and loft fully boarded/full': 'Access Issues',
'Extension on property. Not Viable': 'Not Viable',
'No good. Serious Access issues.': 'Access Issues',
'Surveyed and Submitted': 'Surveyed',
'UNSANITARY CONDITIONS, RUBBISH EVERYWHERE': 'Access Issues',
'Will call when rubbish removed.': 'Access Issues',
'Covered in Ivy': 'Access Issues',
'CUSTOMER REFUSED': 'Customer Refusal',
'Still covered in ivy': 'Access Issues',
'CUSTOMER SHOUTED OUT OF WINDOW TO COME BACK ANOTHER TIME': 'Customer Refusal',
"Extension on property, can't be done.": 'Not Viable',
'Will be looking to do Survey WC 19.02': 'Rescheduled',
"Tenant was working, couldn't do survey.": 'No Answer',
'PROPERTY EMPTY, SPOKE TO EX TENNANT WHO LEFT 3 WEEKS AGO?': 'Void',
'Will call back.': 'Rescheduled',
"Tenant not interested. Won't empty loft.": 'Customer Refusal',
"Won't answer door.": 'Customer Refusal',
"Tenant 'Doesn't want anything to do with LHP'": 'Customer Refusal',
"Loft full. Tenant won't empty.": 'Access Issues',
'Covered in foliage': 'Access Issues',
'Customer not home for appointment.': 'No Answer',
'Blown in bead': 'Not Viable',
'Distance to property to far from road.': 'Access Issues',
'LOFT FULL, CUSTOMER UNABLE TO CLEAR': 'Access Issues',
'Stuff against rear wall. Will call when removed.': 'Access Issues',
'Will call when rubbish is removed': 'Access Issues',
'Mid Terrace': 'unknown',
'Tile Hung areas.': 'Not Viable',
'REFUSED / UNABLE TO CLEAR LOFT': 'Customer Refusal',
'Calling back on Monday (19.02)': 'Rescheduled',
'Solid Wall': 'Not Viable',
'FAULTY PHONE NUMBER, 3 X KNOCK, LETTER LEFT ON FIRST ATTEMPT, NO REPLY OR CALL BACK': 'No Answer',
'Not interested': 'Customer Refusal',
'ACCESS DENIED': 'Customer Refusal',
'Covered in Ivy.': 'Access Issues',
'UNABLE TO GENERATE SAP GAIN WITH EXTENSIONS FRONT AND REAR': 'Not Viable',
'Extension on the property.': 'Not Viable',
"Covered in Ivy. Can't remove it.": 'Access Issues',
'Booked in, but not in when called back': 'No Answer',
'EXCESSIVE IVY ON WALLS (SEE PICS)': 'Access Issues',
'Moved out': 'Void',
'Buying the property. Not interested.': 'Customer Refusal',
'Not been to yet': 'No Answer',
'CUSTOMER STATES LOFT WAS INSULATED A FEW MONTHS AGO BY LHP': 'Customer Refusal',
'Will try again.': 'No Answer',
'HOUSE MARTINS NESTING IN EAVES OF 3 ADJOINING PROPERTIES': 'Access Issues',
'Told me to call back': 'Rescheduled',
'CUSTOMER SAYS PROPERTY ALREADY REFUSED AT PREVIOUS SURVEY, NO REASON GIVEN': 'Customer Refusal',
"Won't answer the door.": 'Customer Refusal',
'Tenant not interested.': 'Customer Refusal',
'Keep trying, keeps putting me off.': 'Customer Refusal',
'Already insulated.': 'Not Viable',
'Works all day.': 'No Answer',
'PROPERTY COVER IN FOILAGE AND SHRUBS': 'Access Issues',
'ACCESS IVY GROWTH, LEAN TO / CONSERVATORY IN WAY OF REAR': 'Not Viable',
"Tenant unwell. Doesn't want survey.": 'No Answer',
'Wont empty loft.': 'Access Issues',
'LOFT FULLY BOARDED AS PREVIOUSLY DISCUSSED WITH CUSTOMER BY PREVIOUS SURVEYOR': 'Access Issues',
"Property can't be done.": 'Not Viable',
'Works everyday. Will call.': 'No Answer',
'A LOT OF FOLIAGE IN WAY, PROPERTY LOOKS EMPTY FROM OUTSIDE?': 'Void',
"Very old tenant. Said they didn't want it.": 'Customer Refusal',
'Covered in ivy. Unable to remove.': 'Access Issues',
'Climbers on walls': 'Access Issues',
'Will not remove foliage': 'Access Issues',
'Not Interested.': 'Customer Refusal',
'OFF GAS': 'unknown',
'Tenant not interested': 'Customer Refusal',
'Will call me. Left my number.': 'Rescheduled',
'Keep trying but keeps putting me off': 'Customer Refusal',
'Moving out.': 'Void',
'Booked in': 'Recheduled',
'Refused Survey': 'Customr Refusal',
'Big dogs running around front garden.': 'Access Issues',
'CUSTOMER HAS CLADDED WALL AT REAR IN CONSERVATORY, REFUSED INTERNAL DRILL': 'Customer Refusal',
'Booked in.': 'Rescheduled',
'WRONG ADDRESS?': 'unknown',
'Works everyday. Will call me.': 'No Answer',
'Will not remove foliage.': 'Access Issues'
}

View file

@ -194,5 +194,63 @@ PROPERTY_MAPPING = {
'Maisonette 2 Ext. Wall': 'maisonette',
'5 Ext. Wall Flat': 'flat',
'Bungalow Semi Detached': 'bungalow',
'COMINT': 'unknown'
'COMINT': 'unknown',
'12 SBEDSIT': 'bedsit',
'01 HOUSE': 'house',
'05 BEDSIT': 'bedsit',
'14 SFLAT': 'flat',
'09 PBEDSIT': 'bedsit',
'10 PBUNGALOW': 'bungalow',
'13 SBUNGALOW': 'bungalow',
'11 PFLAT': 'flat',
'02 FLAT': 'flat',
'04 MAISONETTE': 'maisonette',
'01 HOUSE MID': 'house',
'03 BUNGALOW': 'bungalow',
'Flat?': 'flat',
'Bungalow ET': 'bungalow',
'House. SD': 'house',
'Bungalow SD': 'bungalow',
'Bungalow D': 'bungalow',
'House D': 'house',
'House SD': 'house',
'House ET': 'house',
'Bungalow MT': 'bungalow',
'House MT': 'house',
'House. MT': 'house',
'': 'unknown',
'GROUND FLOOR BEDSIT': 'bedsit',
'HOUSE 1 LIVING ROOM': 'house',
'MAISONETTE OVER SHOP': 'maisonette',
'GROUND FLOOR FLAT': 'flat',
'SECOND FLOOR FLAT': 'flat',
'FIRST FLOOR FLAT': 'flat',
'GROUND FL MAISONETTE': 'maisonette',
'HOUSE 2 LIVING ROOMS': 'house',
'FLAT OVER SHOP': 'flat',
'House With Integral Garage': 'house',
'Flat Over Parking/Accessway': 'flat',
'Flat Over Binstore': 'flat',
'Flat Over Garage': 'flat',
'House With Independent Garage': 'house',
'Studio': 'flat',
'Bedsit bungalow terraced': 'bedsit',
'Terraced house': 'house',
'Sheltered flat': 'flat',
'APD Bungalow': 'bungalow',
'Flat with partition': 'flat',
'Bungalow detached': 'bungalow',
'APD flat': 'flat',
'Sheltered warden flat': 'flat',
'Bungalow semi detached': 'bungalow',
'Sheltered bedsit': 'bedsit',
'Sheltered bungalow terraced': 'bungalow',
'Sheltered bungalow semi detached': 'bungalow',
'Bungalow terraced': 'bungalow',
'Maisonette flat': 'maisonette',
'Sheltered bedsit disabled': 'bedsit',
'Bedsit bungalow semi detached': 'bedsit',
'Bedsit Flat': 'bedsit',
'Semi detached house': 'house',
'Unit': 'unknown'
}

View file

@ -6,6 +6,7 @@ STANDARD_ROOF_CONSTRUCTIONS = {
"pitched unknown access to loft",
"piched unknown insulation",
"pitched insulated",
"pitched less than 100mm insulation"
"another dwelling above",
"flat unknown insulation",
"unknown insulated",
@ -23,5 +24,18 @@ ROOF_CONSTRUCTION_MAPPINGS = {
'2018 onwards': 'unknown',
'Pitched (vaulted ceiling)': 'pitched insulated',
np.nan: "unknown",
None: "unknown"
None: "unknown",
'Unknown': 'unknown',
'270mm': 'pitched insulated',
'300mm+': 'pitched insulated',
'100mm': 'pitched less than 100mm insulation',
'250mm': 'pitched insulated',
'300mm': 'pitched insulated',
'No Loft space': 'pitched no access to loft',
'75mm': 'pitched less than 100mm insulation',
'150mm': 'pitched insulated',
'No Loft Hatch': 'pitched unknown access to loft',
'200mm': 'pitched insulated',
'0-49mm': 'pitched less than 100mm insulation',
'50mm': 'pitched less than 100mm insulation',
}

View file

@ -212,12 +212,17 @@ WALL_CONSTRUCTION_MAPPINGS = {
'Cornish': 'system built',
'Rwate': 'system built',
'Hill Presweld Steel': 'system built',
'Cavity Filled Cavity': 'filled cavity',
'Cavity Unknown': 'cavity unknown insulation',
'Cavity Filled Cavity (internal)': 'filled cavity',
'': 'unknown',
'Cavity Internal Insulation': 'filled cavity',
'Cavity As Built': "uninsulated cavity"
'Cavity As Built': "uninsulated cavity",
'Non Trad Large Panel System': 'system built',
'Non Trad Cornish': 'system built',
'Non Trad Reema': 'system built',
'Traditional Cavity Brickwork': 'cavity unknown insulation',
'System build (undefined)': 'system built',
'Non Trad Wimpey': 'system built',
'Non Trad Wates': 'system built'
}

View file

@ -172,7 +172,7 @@ class SearchEpc:
self.address1 = address1
self.postcode = postcode
self.full_address = full_address
self.full_address = full_address if full_address is not None else self.address1
self.uprn = uprn
self.house_number = self.get_house_number(self.address1)
self.numeric_house_number = self.extract_numeric_housenumber_part(self.house_number)
@ -265,9 +265,7 @@ class SearchEpc:
for retry in range(self.max_retries):
try:
response = self.client.domestic.call(method="get", url=url, params=params)
if response:
self.data = response
return {
@ -368,8 +366,11 @@ class SearchEpc:
unique_property_types = {r["property-type"] for r in rows}
# We allow for variation in property type across flats/maisonettes
if (len(uprns) == 1) and ((len(unique_property_types) == 1) or unique_property_types == {"Flat", "Maisonette"}):
return rows
# If we know that we have a flat/maisonette, we allow for both property types
if property_type in ["Flat", "Maisonette"]:
if ((len(uprns) == 1) and ((len(unique_property_types) == 1)
) or unique_property_types == {"Flat", "Maisonette"}):
return rows
if property_type is not None:
# We can do a filter on the property type
@ -388,11 +389,27 @@ class SearchEpc:
# We check if post town is included in the address
if any([r["posttown"].lower() in address.lower() for r in rows]):
best_match = process.extractOne(
best_match1 = process.extractOne(
address, [", ".join([r["address"], r["posttown"]]) for r in rows], score_cutoff=0
)
# Get all of the scores
rows_filtered = [r for r in rows if ", ".join([r["address"], r["posttown"]]) == best_match[0]]
best_match2 = process.extractOne(
address, [", ".join([r["address"]]) for r in rows], score_cutoff=0
)
# Pick the largest score
if best_match1[1] >= best_match2[1]:
# Get all of the scores
rows_filtered = [r for r in rows if ", ".join([r["address"], r["posttown"]]) == best_match1[0]]
else:
# Get all of the scores
rows_filtered = [r for r in rows if r["address"] == best_match2[0]]
# If we have multiple, we filter on newest lodgment date
if len(rows_filtered) > 1:
rows_filtered = [
r for r in rows_filtered
if r["lodgement-datetime"] == max([x["lodgement-datetime"] for x in rows_filtered])
]
else:
best_match = process.extractOne(address, [r["address"] for r in rows], score_cutoff=0)
# Get the UPRN for the best match

View file

@ -0,0 +1,708 @@
"""
This script is used to reconcile the data from the Community Housing project, to understand the differences in
the various asset lists, and the work that has been conducted
"""
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
from asset_list.AssetList import AssetList
from backend.SearchEpc import SearchEpc
# Data preparation
outcomes_1 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Community Housing/Programme Reconciliation/Outcomes "
"Community Housing.xlsx",
sheet_name="Sheet1",
)
outcomes_2 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Community Housing/Programme Reconciliation/Outcomes "
"Community Housing.xlsx",
sheet_name="ECO4 + PV",
)
outcomes_2["Type of Funding"] = "ECO4 Solar"
combined_outcomes = pd.concat([outcomes_1, outcomes_2], ignore_index=True)
combined_outcomes.columns = [
'Surveyor', 'Housing Association', 'No.', 'Address', 'Postcode', 'Outcome', 'Type of Funding', "Notes",
'Previous letter sent Date:', 'Date Letter sent', 'Installer'
]
# Store
combined_outcomes.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Community Housing/Programme "
"Reconciliation/combined_outcomes.xlsx",
)
################################################################################################
# Config for asset list standardisation
################################################################################################
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Community Housing/Programme Reconciliation"
data_filename = "Community Housing - Original Asset List Copy for Reconciliation.xlsx"
sheet_name = "Assets"
postcode_column = 'Postcode'
fulladdress_column = "Full Address"
address1_column = None
address1_method = "house_number_extraction"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = "Build_Date"
landlord_os_uprn = None
landlord_property_type = "Asset_Type1"
landlord_built_form = "Asset_Classification"
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = "Heat Source Static"
landlord_existing_pv = None
landlord_property_id = "Asset_Reference"
landlord_sap = None
outcomes_filename = [
os.path.join(data_folder, "combined_outcomes.xlsx"),
]
outcomes_sheetname = ["Sheet1"]
outcomes_postcode = ["Postcode"]
outcomes_houseno = ["No."]
outcomes_id = [None]
outcomes_address = ["Address"]
master_filepaths = [
os.path.join(data_folder, "Submissions - for analysis.csv"),
]
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = "community community|community housing|mr community|david lindwood"
manual_uprn_map = {}
asset_list = AssetList(
local_filepath=os.path.join(data_folder, data_filename),
header=0,
sheet_name=sheet_name,
address1_colname=address1_column,
postcode_colname=postcode_column,
landlord_property_id=landlord_property_id,
full_address_colname=fulladdress_column,
full_address_cols_to_concat=address_cols_to_concat,
missing_postcodes_method=missing_postcodes_method,
address1_extraction_method=address1_method,
landlord_year_built=landlord_year_built,
landlord_uprn=landlord_os_uprn,
landlord_property_type=landlord_property_type,
landlord_built_form=landlord_built_form,
landlord_wall_construction=landlord_wall_construction,
landlord_roof_construction=landlord_roof_construction,
landlord_heating_system=landlord_heating_system,
landlord_existing_pv=landlord_existing_pv,
landlord_sap=landlord_sap,
phase=phase
)
asset_list.init_standardise()
asset_list.apply_standardiation()
# We now flag properties that have been treated under existing programmes
asset_list.flag_outcomes(
outcomes_filepaths=outcomes_filename,
outcomes_sheetname=outcomes_sheetname,
outcomes_address=outcomes_address,
outcomes_postcode=outcomes_postcode,
outcomes_houseno=outcomes_houseno,
outcomes_id=outcomes_id
)
if pd.isnull(asset_list.outcomes["domna_property_id"]).sum() == 1:
# We fix this one manually
asset_list.outcomes["domna_property_id"] = asset_list.outcomes["domna_property_id"].fillna(
"29walternashroadeastbirchencoppicekidderminsterdy117ea-caa3a8d92ea9"
)
else:
raise Exception("Something went wrong")
asset_list.flag_survey_master(
master_filepaths=master_filepaths,
master_to_asset_list_filepath=master_to_asset_list_filepath
)
master_surveyed = asset_list.master_surveyed
scheme_map = {
"ECO4 A/W": "ECO4",
'ECO4 GBIS': "GBIS",
'ECO4 - REMEDIAL CWI ONLY': "ECO4 Remedial",
"ECO4 GBIS REMEDIAL": "GBIS Remedial",
'ECO4 - Remedial CWI Only': "ECO4 Remedial",
'ECO4 GBIS Remedial': "GBIS Remedial"
}
master_surveyed["funding_scheme"] = master_surveyed["funding_scheme"].map(scheme_map)
master_surveyed["survey_reference"] = master_surveyed["funding_scheme"] + ": " + master_surveyed["measure_mix"]
master_surveyed = master_surveyed.merge(
asset_list.standardised_asset_list[["domna_property_id", "landlord_property_id"]],
how="left",
on="landlord_property_id",
)
if pd.isnull(master_surveyed["domna_property_id"]).sum():
raise ValueError("Some of the master surveyed properties do not have a domna_property_id")
# Flag anything in outcomes that has been listed as surveyed, that is NOT in the master_surveyed sheet
surveyed_outcomes = asset_list.outcomes[
asset_list.outcomes["Outcome"].isin(["surveyed", "surveyed"])
]
outcomes_not_in_master = surveyed_outcomes[
~surveyed_outcomes["domna_property_id"].isin(master_surveyed["domna_property_id"])
]
outcomes_not_in_master["Type of Funding"] = outcomes_not_in_master["Type of Funding"].fillna("Work Type Not Filled In")
asset_list.flag_ecosurv(
ecosurv_landlords=ecosurv_landlords,
landlords_to_ignore=[
"Watford Community housing", "Eastlight Community housing", "Mr Tower Hamlets Community Housing"
]
)
# These are properties NOT on the Community Housing asset list that were sold under the wrong HA
# asset_list.ecosurv_no_match.to_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Community Housing/Programme "
# "Reconciliation/Ecosurv - properties sold to Community Housing, not belonging to them.csv",
# index=False
# )
# We read in the works, split by sold to SGEC and on-hold
billed_to_installer = pd.read_csv(
os.path.join(
data_folder, "Community Housing Deck of works", "SGEC BILLED -Table 1.csv"
),
)
billed_to_installer["billed"] = True
not_billed_to_installer = pd.read_csv(
os.path.join(
data_folder, "Community Housing Deck of works", "ON HOLD -Table 1.csv"
),
)
not_billed_to_installer["billed"] = False
sgec_billings = pd.concat(
[billed_to_installer, not_billed_to_installer],
)
sgec_billings = sgec_billings.reset_index(drop=True)
sgec_billings["row_id"] = sgec_billings.index
# We match these two lists back to the domna_property_id. They SHOULD match to submissions
scheme_col = (
"AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" if
"AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" in billed_to_installer.columns else "AFFORDABLE WARMTH"
)
postcode_col = "POSTCODE" if "POSTCODE" in billed_to_installer.columns else "Post Code"
house_no_col = 'NO.' if 'NO.' in billed_to_installer.columns else "NO"
property_type_col = (
"PROPERTY TYPE As per table emailed" if
"PROPERTY TYPE As per table emailed" in
billed_to_installer.columns else "PROPERTY TYPE As per table emailed"
)
measure_mix_col = "MEASURE COMBO"
manual_corrections = {
"30+DY12 1EB": "73440300",
"32+DY12 1EB": "73440320",
"1+DY11 7ES": "20150010",
"12+DY11 7EP": "9460120",
"72+DY11 7PA": "88520720",
"39+DY13 0DR": "44250390",
"43+DY11 7EF": "2460430",
"45+DY11 7EG": "2460450",
"47+DY11 7EG": "2460470",
"49+DY11 7EG": "2460490",
"11+DY13 0HB": "87320110",
"4+DY130HA": "87320040"
}
billed_lookup = []
for _, row in tqdm(sgec_billings.iterrows(), total=len(sgec_billings)):
postcode = row["Post Code"]
houseno = row["NO."]
# We need to correct some records
if manual_corrections.get("+".join([houseno, postcode])):
landlord_pid = manual_corrections["+".join([houseno, postcode])]
df = asset_list.standardised_asset_list[
(asset_list.standardised_asset_list["landlord_property_id"] == landlord_pid)
]
if df.shape[0] != 1:
raise ValueError(f"More than one match found for {landlord_pid} in the standardised asset list")
billed_lookup.append(
{
"domna_property_id": df["domna_property_id"].values[0],
"row_id": row["row_id"],
}
)
continue
df = master_surveyed[
(master_surveyed["original_house_no"] == houseno) &
(master_surveyed["original_postcode"] == postcode)
]
if df.shape[0] != 1:
# Try a search on the asset list
postcode_no_space = row[postcode_col].strip().replace(" ", "").lower()
df = asset_list.standardised_asset_list[
(
asset_list.standardised_asset_list[asset_list.STANDARD_POSTCODE]
.str.strip().str.lower().str.replace(" ", "") == postcode_no_space
)
].copy()
house_no = row[house_no_col]
if isinstance(house_no, float):
house_no = str(int(house_no)).lower()
else:
house_no = str(house_no).lower()
df["house_no"] = df.apply(
lambda x: SearchEpc.get_house_number(
str(x[asset_list.STANDARD_ADDRESS_1]), str(x[asset_list.STANDARD_POSTCODE])
),
axis=1
)
df = df[df["house_no"].str.lower() == house_no].copy()
if df.shape[0] == 1:
billed_lookup.append(
{
"domna_property_id": df["domna_property_id"].values[0],
"row_id": row["row_id"],
}
)
continue
raise ValueError(f"More than one match found for {'+'.join([houseno, postcode])} in the master surveyed list")
billed_lookup.append(
{
"domna_property_id": df["domna_property_id"].values[0],
"row_id": row["row_id"],
}
)
billed_lookup = pd.DataFrame(billed_lookup)
sgec_billings = sgec_billings.merge(
billed_lookup,
how="left",
on="row_id"
)
# We get the asset list that Community Housing thinks they sent Warmfront
master_data_sheet = pd.read_excel(
os.path.join(
data_folder, "Warmfront.xlsx"
),
sheet_name="Asset Stock List (3)",
)
master_data_sheet["Asset_Reference"] = master_data_sheet["Asset_Reference"].astype(str)
# 1) We check that all of the properties in the asset list we have on file are in the asset list that Community Housing
# believe they sent Warmfront
if not asset_list.standardised_asset_list[
~asset_list.standardised_asset_list["landlord_property_id"].isin(
master_data_sheet["Asset_Reference"].astype(str).values
)
].empty:
raise ValueError("Some of the properties in the asset list are not in the Warmfront asset list")
# This column documents whether or not the property is in the asset list that the WFT were sent
# There are 189 properties that were never sent to WFT, but all properties are accounted for in the asset list
master_data_sheet["Is Property in WFT Asset List"] = master_data_sheet["Asset_Reference"].astype(str).isin(
asset_list.standardised_asset_list["landlord_property_id"].astype(str).values
)
# We now merge on the Warmfront findings
master_data_sheet = master_data_sheet.merge(
asset_list.standardised_asset_list[["landlord_property_id", "non-intrusives: ECO Eligibility"]],
how="left",
left_on="Asset_Reference",
right_on="landlord_property_id"
)
master_data_sheet["non-intrusives: ECO Eligibility"] = master_data_sheet["non-intrusives: ECO Eligibility"].fillna(
"Not in original asset list"
)
# SGEC did a number of CIGA checks. We match these onto the master data sheet
# TODO: Need to split the programme into historical 2023 and 2024 (there was a cutoff data in late 2024 which seemed
# to be the start of the new programme
# Seems like there were 2 main checks - it also seems like this was a 2 phase programme, where these CIGA checks
# correspond to phase 2
ciga_checks_1 = pd.read_excel(
os.path.join(
data_folder, "CIGA Checks", "2 CIGA Check WFT 14102024 x1073.xlsx"
),
sheet_name="Worksheet"
)
ciga_checks_1 = ciga_checks_1[~pd.isnull(ciga_checks_1["Postcode"])]
ciga_checks_1["request"] = "1073 properties"
ciga_checks_2 = pd.read_excel(
os.path.join(
data_folder, "CIGA Checks", "2 CIGA Check 01112024 x125.xlsx"
),
sheet_name="Worksheet"
)
ciga_checks_2 = ciga_checks_2[~pd.isnull(ciga_checks_2["Postcode"])]
ciga_checks_2["request"] = "125 flats"
cigas = pd.concat([ciga_checks_1, ciga_checks_2], ignore_index=True)
cigas["row_id"] = cigas.index
# We add some temp columns to allow for easier matching
asset_list.standardised_asset_list["house_no"] = asset_list.standardised_asset_list.apply(
lambda x: SearchEpc.get_house_number(
str(x["domna_full_address"]), str(x["domna_postcode"])
),
axis=1
)
manual_fixes = {
"2 Austcliffe Road Cookley, Kidderminster": "2250020",
'5 Brett Young Close, Kidderminster': "9800050"
}
incorrect_ciga_return = [
"19 Wood Street, Kidderminster",
"nan Charles Street",
"53 Harold Evers Way, Kidderminster",
'63 Harold Evers way'
]
ciga_lookup = []
for _, row in tqdm(cigas.iterrows(), total=len(cigas)):
if manual_fixes.get(row["Matched Address"]):
ll_pid = manual_fixes[row["Matched Address"]]
df = asset_list.standardised_asset_list[
(asset_list.standardised_asset_list["landlord_property_id"] == ll_pid)
]
ciga_lookup.append(
{
"domna_property_id": df["domna_property_id"].values[0],
"row_id": row["row_id"],
}
)
continue
if (row["Matched Address"] in incorrect_ciga_return) or (
" ".join([str(row["Address1"]), row["Address2"]]) in incorrect_ciga_return
):
continue
df = asset_list.standardised_asset_list[
(asset_list.standardised_asset_list["domna_postcode"] == row["Postcode"])
]
df = df[(df["house_no"].astype(str) == str(row["Address1"]))]
if df.empty:
df = asset_list.standardised_asset_list[
(asset_list.standardised_asset_list["domna_postcode"] == row["Matched Postcode"])
]
df = df[(df["house_no"].astype(str) == str(row["Address1"]))]
if df.shape[0] > 1:
df = asset_list.standardised_asset_list[
(asset_list.standardised_asset_list["domna_full_address"].str.lower().str.replace(",", "").str.contains(
row["Matched Address"].lower().replace(",", ""), na=False))
]
if df.empty:
df = asset_list.standardised_asset_list[
(asset_list.standardised_asset_list["domna_full_address"].str.lower().str.replace(",", "").str.contains(
row["Address2"].lower().replace(",", ""), na=False))
]
df = df[(df["house_no"].astype(str) == str(row["Address1"]))]
if df.shape[0] != 1:
raise Exception("More than one match found for {row['Address1']} in the asset list")
ciga_lookup.append(
{
"domna_property_id": df["domna_property_id"].values[0],
"row_id": row["row_id"],
}
)
ciga_lookup = pd.DataFrame(ciga_lookup)
cigas = cigas.merge(
ciga_lookup,
how="left",
on="row_id"
)
cigas = cigas[~pd.isnull(cigas["domna_property_id"])]
cigas = cigas.merge(
asset_list.standardised_asset_list[["domna_property_id", "landlord_property_id"]],
how="left",
on="domna_property_id"
)
# Note 4 entries in the CIGA checks did NOT match to the asset list (were for properties not owned by Community Housing)
master_data_sheet = master_data_sheet.merge(
cigas[["landlord_property_id", "Guarantee", "request"]].rename(
columns={"request": "CIGA request batch"}
),
how="left",
on="landlord_property_id"
)
# Fill missing survey_reference with funding_scheme
master_surveyed["survey_reference"] = master_surveyed["survey_reference"].fillna(
master_surveyed["funding_scheme"]
)
master_surveyed_to_merge = master_surveyed[
~master_surveyed["domna_property_id"].isin(sgec_billings["domna_property_id"].values)
]
master_surveyed_to_merge["Survey Status"] = "Surveyed, Submitted, not on SGEC Deck of Works"
# We now merge on what we've surveyed and submitted
master_data_sheet = master_data_sheet.merge(
master_surveyed_to_merge[
["landlord_property_id", "survey_reference", "submission_date", "cancelled", "Survey Status"]
].rename(
columns={
"survey_reference": "Survey Type", "submission_date": "Survey Date",
"cancelled": "Was the Install Cancelled?"
}
),
how="left",
on="landlord_property_id"
)
# We now deduce the status of the work based on sgec_billings
sgec_billings = sgec_billings.merge(
asset_list.standardised_asset_list[["landlord_property_id", "domna_property_id"]],
how="left",
on="domna_property_id"
)
dupe_ids = sgec_billings[sgec_billings["domna_property_id"].duplicated()]["domna_property_id"]
# We sort by domna_property_id and billed (where true should be first) and take the first instance
sgec_billings = sgec_billings.sort_values(
["domna_property_id", "billed"], ascending=[True, False]
)
sgec_billings = sgec_billings.drop_duplicates(
subset=["domna_property_id"],
keep="first"
)
sgec_billings["Survey Type"] = (
sgec_billings["SUBMISSION TYPE - ECO4,GBIS,SHDF,EPC or OTHER"].map(scheme_map) + ": " +
sgec_billings["MEASURE COMBO"]
)
sgec_billings["Survey Type"] = sgec_billings["Survey Type"].fillna(
sgec_billings["SUBMISSION TYPE - ECO4,GBIS,SHDF,EPC or OTHER"].map(scheme_map)
)
sgec_billings["Survey Date"] = sgec_billings['SUBMISSION DATE']
sgec_billings["Was the Install Cancelled?"] = (
sgec_billings["INSTALLED"].astype(str).str.lower().str.contains("cancel")
)
sgec_billings['Survey Status'] = np.where(
sgec_billings["billed"] == True,
"Surveyed, Submitted, on SGEC Deck of Works",
"Surveyed, not submitted to SGEC, on SGEC Deck of Works"
)
master_data_sheet = master_data_sheet.merge(
sgec_billings[
["landlord_property_id", "Survey Type", "Survey Date", "Was the Install Cancelled?", "Survey Status"]],
how="left",
on="landlord_property_id",
suffixes=("", "_y")
)
for col in ["Survey Type", "Survey Date", "Was the Install Cancelled?", "Survey Status"]:
master_data_sheet[col] = np.where(
pd.isnull(master_data_sheet[col]) & ~pd.isnull(master_data_sheet[col + "_y"]),
master_data_sheet[col + "_y"],
master_data_sheet[col]
)
master_data_sheet = master_data_sheet.drop(columns=[col + "_y"])
outcomes_not_in_master = outcomes_not_in_master.merge(
asset_list.standardised_asset_list[["landlord_property_id", "domna_property_id"]],
how="left",
left_on="domna_property_id",
right_on="domna_property_id"
)
# We also filter out any that were in the SGEC billings
outcomes_not_in_master = outcomes_not_in_master[
~outcomes_not_in_master["domna_property_id"].isin(sgec_billings["domna_property_id"].values)
]
# We now merge on outcomes. There are a small number of surveyed outcomes that were not submitted
master_data_sheet = master_data_sheet.merge(
outcomes_not_in_master[["landlord_property_id", 'Type of Funding', "Date Letter sent"]],
how="left",
on="landlord_property_id",
)
master_data_sheet["Survey Status"] = np.where(
pd.isnull(master_data_sheet["Survey Type"]) & ~pd.isnull(master_data_sheet["Type of Funding"]),
"Surveyed, On Outcomes, not submitted",
master_data_sheet["Survey Status"]
)
master_data_sheet["Survey Type"] = np.where(
pd.isnull(master_data_sheet["Survey Type"]) & ~pd.isnull(master_data_sheet["Type of Funding"]),
master_data_sheet["Type of Funding"],
master_data_sheet["Survey Type"]
)
master_data_sheet["Survey Date"] = np.where(
pd.isnull(master_data_sheet["Survey Date"]) & ~pd.isnull(master_data_sheet["Date Letter sent"]),
master_data_sheet["Date Letter sent"],
master_data_sheet["Survey Date"]
)
master_data_sheet = master_data_sheet.drop(columns=["Type of Funding", "Date Letter sent"])
# We now need to compare the submissions that SGEC have sent us, because the deck of works is likely incorrect given
# given the number of properties that have been received by SGEC
# We have submissions from the following dates:
# - 18/11/2024
# - 10/03/2024
# - A sheet that claims to be 25/11/2024 but has 18/11/2024 as the submission date
# - 16/12/2025
# - 02/12/2024
# - 10/02/2025
sgec_received_submissions = []
for filename in [
"4x108 18.11.24 - RT MASTERS SGEC INVOICE.xlsx",
"4x144 COMMUNITY HOUSING TOTAL PROJECT INV 10032025.xlsx",
"4x19 25.11.2024 - RT Master SGEC.xlsx",
"4x37 16.12.2024 - SGEC INVOICED.xlsx",
"4x60 02.12.2024 - RT SGEC INV.xlsx",
"4x78 10.02.2025 MASTERS - SGEC INVOICED-CORRECT.xlsx"
]:
data = pd.read_excel(
os.path.join(
data_folder, "SGEC Received Submissions", filename
),
)
data["filename"] = filename
sgec_received_submissions.append(data)
sgec_received_submissions = pd.concat(sgec_received_submissions)
sgec_received_submissions = sgec_received_submissions.reset_index(drop=True)
sgec_received_submissions["row_id"] = sgec_received_submissions.index
manual_fix = {
"5a+DY10 3JR": "6856005A",
'12+DY10 3JR': "78900120",
"9+DY10 3JR": "86280090",
'10+DY10 3JL': "86280100",
"66+DY10 3JS": "68560660",
"70+DY10 3JS": "68560700",
"72+DY10 3JS": "68560720",
"12+DY10 3JP": "86280120",
"2A+DY11 5TZ": "6872002A",
"3A+DY11 5TZ": "6872003A",
"4A+DY11 5TZ": "6872004A"
}
sgec_received_submissions_lookup = []
for _, row in tqdm(sgec_received_submissions.iterrows(), total=len(sgec_received_submissions)):
_key = "+".join([str(row["NO."]), str(row["Post Code"])])
if manual_fix.get(_key) is not None:
ll_pid = manual_fix[_key]
sgec_received_submissions_lookup.append(
{
"row_id": row["row_id"],
"landlord_property_id": ll_pid,
}
)
continue
match = sgec_billings[
(sgec_billings['NO.'].astype(str) == str(row['NO.'])) &
(sgec_billings['Post Code'] == row['Post Code'])
]
if match.shape[0] > 1:
raise Exception(f"something went wrong {_key} {row['Street / Block Name']}")
if match.shape[0] == 1:
sgec_received_submissions_lookup.append(
{
"row_id": row["row_id"],
"landlord_property_id": match["landlord_property_id"].values[0],
}
)
continue
match = master_surveyed[
(master_surveyed['original_house_no'].astype(str) == str(row['NO.'])) &
(master_surveyed['original_postcode'] == row['Post Code'])
]
if match.shape[0] > 1:
raise Exception(f"something went wrong 2 {_key} {row['Street / Block Name']}")
if match.shape[0] == 0:
raise Exception(f"No match {_key} {row['Street / Block Name']}")
sgec_received_submissions_lookup.append(
{
"row_id": row["row_id"],
"landlord_property_id": match["landlord_property_id"].values[0],
}
)
sgec_received_submissions_lookup = pd.DataFrame(sgec_received_submissions_lookup)
sgec_received_submissions = sgec_received_submissions.merge(
sgec_received_submissions_lookup[["row_id", "landlord_property_id"]],
how="left",
on="row_id"
)
sgec_received_submissions["Survey Type"] = (
sgec_received_submissions["SUBMISSION TYPE - ECO4,GBIS,SHDF,EPC or OTHER"].map(scheme_map) + ": " +
sgec_received_submissions["MEASURE COMBO"]
)
sgec_received_submissions["Survey Type"] = sgec_received_submissions["Survey Type"].fillna(
sgec_received_submissions["SUBMISSION TYPE - ECO4,GBIS,SHDF,EPC or OTHER"].map(scheme_map)
)
sgec_received_submissions["Survey Date"] = sgec_received_submissions['SUBMISSION DATE']
sgec_received_submissions["Was the Install Cancelled?"] = (
sgec_received_submissions["INSTALLED"].astype(str).str.lower().str.contains("cancel")
)
sgec_received_submissions['Survey Status'] = "Submission sent to SGEC, Confirmed by SGEC"
sgec_received_submissions["Survey Received by SGEC"] = True
# We now merge on the submissions that SGEC have sent us
master_data_sheet = master_data_sheet.merge(
sgec_received_submissions[
[
"landlord_property_id", "Survey Type", "Survey Date", "Was the Install Cancelled?", "Survey Status",
"Survey Received by SGEC"
]
],
how="left",
on="landlord_property_id",
suffixes=("", "_y")
)
# Fill in the gaps
for col in ["Survey Type", "Survey Date", "Was the Install Cancelled?", "Survey Status"]:
master_data_sheet[col] = np.where(
pd.isnull(master_data_sheet[col]) & ~pd.isnull(master_data_sheet[col + "_y"]),
master_data_sheet[col + "_y"],
master_data_sheet[col]
)
master_data_sheet = master_data_sheet.drop(columns=[col + "_y"])
if master_data_sheet["Asset_Reference"].duplicated().sum():
raise ValueError("There are duplicates in the asset reference column")
# Drop this at the end
master_data_sheet = master_data_sheet.drop(columns=["landlord_property_id"])
master_data_sheet.to_excel(
os.path.join(
data_folder, "Draft Results.xlsx"
),
)

View file

@ -0,0 +1,51 @@
import pandas as pd
tabs = [
"Straight Fill", "Solar PV - Straight Fill", "RDF CIGA checks", "Solar PV - RDF CIGA Checks",
"AT BUILD", "Solar PV - AT BUILD"
]
programme_revisions = []
for tab in tabs:
original_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Westward/Route March/WESTWARD - Route March Prep.xlsx",
sheet_name=tab,
)
revised_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Westward/Route March/WESTWARD - GBIS List revised for "
"Domna.xlsx",
sheet_name=tab,
)
revised_list["Client Review"] = "Retain in programme"
df = original_list[["Place ref"]].copy()
df["Tab"] = tab
df = df.merge(revised_list[["Place ref", "Client Review"]], how="left", on="Place ref")
df["Client Review"] = df["Client Review"].fillna("Remove from programme")
programme_revisions.append(df)
programme_revisions = pd.concat(programme_revisions)
# Read in the standardised asset list and create the column to append to that
al = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Westward/WESTWARD - completed list - "
"08.05.2025 - Standardised - Client Review.xlsx",
sheet_name="Standardised Asset List",
)
client_revisions = al[["landlord_property_id"]].merge(
programme_revisions[["Place ref", "Client Review"]],
how="left",
left_on="landlord_property_id",
right_on="Place ref",
)
client_revisions["Client Review"] = client_revisions["Client Review"].fillna("Needs Review")
client_revisions["Client Review Date"] = "08/05/2025"
client_revisions.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Westward/Route March/client_revisions.csv", index=False
)

View file

@ -20,9 +20,9 @@ def app():
"ventilation": 350,
"Room Roof Insulation": 210,
"Loft insulation": 15,
"Internal wall insulation": 215,
"Internal wall insulation": 131,
"External wall insulation": 298.35,
"Solid wall insulation": 215,
"Solid wall insulation": 131,
"LEDs": 35, # per light
"Flat Roof Insulation": 195,
"Double Glazing": 1140,
@ -71,149 +71,10 @@ def app():
"Ground Floor Flat": 10
}
# If we have a flat, we won't use the 199m2 floor area
floor_areas = [73, 97, 199]
# We remove age bracket, as we ended up with 360 combinations
# age_brackets = ["1945-1970", "1971-2002", "Post 2002"]
wall_type = ["cavity", "non-cavity"]
roof_type = ["pitched", "other"]
planning_constraints = [True, False]
# This is the list of all combinations of the above variables
combinations_untrimmed = product(
*[
dwelling_types, floor_areas, wall_type, roof_type, planning_constraints
]
)
# TODO: Possibly need to add an additional cost for immersion hot water
combinations = []
for comb in combinations_untrimmed:
if "Flat" in comb[0] and comb[1] == 199:
continue
# If we have a flat, not too much difference if it's in a conservation area or not
if "Flat" in comb[0] and comb[4] is True:
continue
combinations.append(comb)
risk_matrix = []
for combination in combinations:
n_floors = num_floors_map[combination[0]]
bf = built_form_map[combination[0]]
pt = "House" if "Flat" not in combination[0] else "Flat"
# Model the home as a box
ground_floor_area = combination[1] / n_floors
perimeter = np.sqrt(ground_floor_area) * 4
# This is the amount of insulation required
external_wall_area = estimate_external_wall_area(
num_floors=n_floors,
floor_height=2.5,
perimeter=perimeter,
built_form=bf
)
n_rooms = np.floor(combination[1] / 15)
n_windows = estimate_windows(
property_type=pt,
built_form=bf,
construction_age_band="",
floor_area=combination[1],
number_habitable_rooms=n_rooms
)
# We determine the exact upgrade pathway for this combination, guided by the generic upgrade pathway
combination_upgrade_pathway = []
for upgrade in upgrade_path:
if upgrade == "wall_insulation":
if combination[2] == "cavity":
combination_upgrade_pathway.append("cavity_wall_insulation")
else:
combination_upgrade_pathway.append("solid_wall_insulation")
continue
if upgrade == "roof_insulation":
if combination[3] == "pitched":
combination_upgrade_pathway.append("loft_insulation")
else:
combination_upgrade_pathway.append("non_pitched_roof_insualtion")
continue
if upgrade == "ventilation":
combination_upgrade_pathway.append("ventilation")
continue
if upgrade == "low_energy_lighting":
combination_upgrade_pathway.append("low_energy_lighting")
continue
if upgrade == "windows":
if not combination[4]:
combination_upgrade_pathway.append("double_glazing")
else:
combination_upgrade_pathway.append("secondary_glazing")
continue
if upgrade == "heating":
if combination[0] in ["Semi Detached House", "Detached House"]:
combination_upgrade_pathway.append("high_heat_retention_storage")
else:
combination_upgrade_pathway.append("air_source_heat_pump")
continue
if upgrade == "solar":
if combination[0] in ["Semi Detached House", "Detached House", "Mid Terrace House"]:
combination_upgrade_pathway.append("solar_pv")
continue
combination_costs = []
for measure in combination_upgrade_pathway:
unit_cost = pricing_matrix[measure]
# Wall insulation
if measure in ["cavity_wall_insulation", "internal_wall_insulation", "external_wall_insulation"]:
cost = unit_cost * external_wall_area
elif measure in ["loft_insulation"]:
cost = unit_cost * ground_floor_area
elif measure == "ventilation":
if combination[1] == 73:
cost = unit_cost * 2
elif combination[1] == 97:
cost = unit_cost * 3
else:
cost = unit_cost * 4
elif measure == "low_energy_lighting":
n_lights = lighting_count[combination[0]]
if combination[1] == 73:
inflation = 1
elif combination[1] == 97:
inflation = 1.2
else:
inflation = 1.5
cost = unit_cost * n_lights * inflation
elif measure in ["double_glazing", "secondary_glazing"]:
cost = unit_cost * n_windows
elif measure == "high_heat_retention_storage":
cost = unit_cost * n_rooms
elif measure in ["air_source_heat_pump", "solar_pv"]:
cost = unit_cost
else:
raise NotImplementedError("Implement: %s" % measure)
combination_costs.append(
{
"measure": measure,
"cost": cost
}
)
combination_costs = pd.DataFrame(combination_costs)
contingency = 0.26
epr_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Risk Matrix/EPR Data.xlsx", header=1
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Risk Matrix/EPR Data V2.xlsx", header=1
)
epr_data["Measure added"].value_counts()
epr_data["row_id"] = epr_data.index
@ -318,6 +179,6 @@ def app():
)
with pd.ExcelWriter(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Risk Matrix/risk_matrix.xlsx") as writer:
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Risk Matrix/risk_matrix V2.xlsx") as writer:
risk_matrix.to_excel(writer, sheet_name="Risk Matrix", index=False)
pricing_df.to_excel(writer, sheet_name="Pricing Assumptions", index=False)

View file

@ -0,0 +1,134 @@
"""
On the standardised asset list, this script will flag the pilot assets.
"""
import pandas as pd
import os
import numpy as np
from tqdm import tqdm
PILOT_PROJECT_CODE = "MHS-000-PILOT"
MHS_PHASE_1_PROJECT_CODE = "MHS-001"
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/MHS HOMES (Full Asset List) - for programme build - "
"Standardised.xlsx",
sheet_name="Standardised Asset List",
)
flat_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/MHS HOMES (Full Asset List) - for programme build - "
"Standardised.xlsx",
sheet_name="Flat Data",
)
pilot = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/MHS 334 x Pilot reviewed - KB notes end column.xlsx"
)
ciga_checks = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/MHS CIGA Check 03042025_201.xlsx"
)
ciga_checks["row_id"] = ciga_checks.index
asset_list["project_code"] = None
asset_list["project_code"] = np.where(
asset_list["landlord_property_id"].isin(pilot["Place Reference"]),
PILOT_PROJECT_CODE,
asset_list["project_code"],
)
# We now flag the next phase of the programme
asset_list["project_code"] = np.where(
(~pd.isnull(asset_list["cavity_reason"]) | ~pd.isnull(asset_list["solar_reason"])) & pd.isnull(
asset_list["project_code"]),
MHS_PHASE_1_PROJECT_CODE,
asset_list["project_code"],
)
# We now flag the CIGA checks
manual_fixes = {
"123 Columbine Close, Rochester": "2213861230"
}
ciga_lookup = []
for _, row in tqdm(ciga_checks.iterrows(), total=len(ciga_checks)):
if manual_fixes.get(row["Matched Address"]):
ll_pid = manual_fixes[row["Matched Address"]]
df = asset_list[
(asset_list["landlord_property_id"].astype(str) == ll_pid)
]
ciga_lookup.append(
{
"domna_property_id": df["domna_property_id"].values[0],
"row_id": row["row_id"],
}
)
continue
df = asset_list[
(asset_list["domna_postcode"] == row["Postcode"])
]
df = df[
(df["domna_address_1"].astype(str) == str(row["Address1"]))
]
if df.empty:
df = asset_list[
(asset_list["domna_postcode"] == row["Matched Postcode"])
]
df = df[(df["domna_address_1"].astype(str) == str(row["Address1"]))]
if df.shape[0] > 1:
df = asset_list[
(asset_list["domna_full_address"].str.lower().str.replace(",", "").str.contains(
row["Matched Address"].lower().replace(",", ""), na=False))
]
if df.empty:
df = asset_list[
(asset_list["domna_full_address"].str.lower().str.replace(",", "").str.contains(
row["Address2"].lower().replace(",", ""), na=False))
]
df = df[(df["domna_address_1"].astype(str) == str(row["Address1"]))]
if df.shape[0] != 1:
raise Exception("More than one match found for {row['Address1']} in the asset list")
ciga_lookup.append(
{
"domna_property_id": df["domna_property_id"].values[0],
"row_id": row["row_id"],
}
)
ciga_lookup = pd.DataFrame(ciga_lookup)
ciga_lookup = ciga_lookup.merge(
ciga_checks[["row_id", "Guarantee"]].rename(
columns={"Guarantee": "ciga_guarantee"}
), how="left", on="row_id"
)
ciga_lookup["ciga_check_complete"] = True
asset_list = asset_list.merge(
ciga_lookup[["domna_property_id", "ciga_guarantee"]],
how="left",
on="domna_property_id"
)
# Check we matched addresses correctly
# match_check = ciga_lookup.merge(
# ciga_checks, how="left", on="row_id"
# ).merge(
# asset_list[["domna_property_id", "domna_full_address"]], how="left", on="domna_property_id"
# )
# match_check = match_check[["Matched Address", "domna_full_address"]]
# Save
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/12052025 MHS Standardised Asset List - "
"programme.xlsx")
# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
with pd.ExcelWriter(filename) as writer:
asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
flat_data.to_excel(writer, sheet_name="Flat Data", index=False)

View file

@ -0,0 +1,60 @@
"""
The data held on file for MHS is fairly incomplete, where not every single property has an observation
"""
from tqdm import tqdm
import pandas as pd
from docutils.utils.math.tex2mathml_extern import blahtexml
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/MHS HOMES (Full Asset List) - for analysis.xlsx",
sheet_name="Data"
)
# When this list was checked, an observation was made per postcode, and so we need to extrapolate those findings
inspections_observatons = asset_list[["UPRN", "Postcode", "ManagementGroup", "WFT Findings"]].copy()
populated = []
for _, group in tqdm(inspections_observatons.groupby("Postcode"),
total=len(inspections_observatons.groupby("Postcode"))):
if all(pd.isnull(group["WFT Findings"])):
group["WFT Findings"] = "Property not inspected"
populated.append(group)
continue
fill_observation = group["WFT Findings"].values[0]
if pd.isnull(fill_observation):
group["WFT Findings"] = group["WFT Findings"].fillna("Property not inspected")
populated.append(group)
continue
group = group.reset_index(drop=True)
group_filled = []
for idx, x in group.iterrows():
if idx == 0:
group_filled.append(x)
continue
new_value = x["WFT Findings"]
if not pd.isnull(new_value):
fill_observation = new_value
x["WFT Findings"] = fill_observation
group_filled.append(x)
group_filled = pd.DataFrame(group_filled)
populated.append(group_filled)
populated = pd.concat(populated)
missed = populated[~populated["UPRN"].isin(asset_list["UPRN"].values)]
asset_list = asset_list.drop(columns=["WFT Findings"]).merge(
populated.drop(columns=["Postcode", "ManagementGroup"]), how="left", on="UPRN"
)
# Store the data
asset_list.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/MHS HOMES (Full Asset List) - for programme build.xlsx"
)

View file

@ -0,0 +1,28 @@
"""
Simple script to tidy up the unitas asset list
"""
import pandas as pd
df = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Unitas/UNITAS - Asset List.xlsx",
sheet_name="Asset List"
)
df["Warmfront Finding"] = df["Warmfront Finding"].str.lower().str.strip()
mapping = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Unitas/unitas-mapped-categories.csv",
)
al = df.merge(
mapping[["non-intrusives: WFT Findings", 'mapped_category']].rename(
columns={"mapped_category": "WFT Findings"}
),
how="left",
left_on="Warmfront Finding",
right_on="non-intrusives: WFT Findings"
)
al.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Unitas/unitas_asset_list_for_analysis.xlsx",
index=False
)