mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Merge pull request #425 from Hestia-Homes/remote-assessment-api
Handling error case for fetching addresses
This commit is contained in:
commit
174d9ea6a9
15 changed files with 1958 additions and 614 deletions
|
|
@ -5,6 +5,7 @@ import tiktoken
|
|||
from pprint import pprint
|
||||
from datetime import datetime
|
||||
|
||||
from numpy.ma.core import masked_not_equal
|
||||
from openai import OpenAI
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
|
@ -19,6 +20,7 @@ import asset_list.mappings.heating_systems as heating_mappings
|
|||
import asset_list.mappings.exising_pv as existing_pv_mappings
|
||||
import asset_list.mappings.built_form as built_form_mappings
|
||||
import asset_list.mappings.roof as roof_mappings
|
||||
import asset_list.mappings.outcomes as outcomes_mappings
|
||||
|
||||
from recommendations.recommendation_utils import (
|
||||
estimate_perimeter,
|
||||
|
|
@ -691,6 +693,9 @@ class AssetList:
|
|||
c for c in self.OLD_FORMAT_NON_INTRUSIVE_COLNAMES if c in self.standardised_asset_list.columns
|
||||
]
|
||||
|
||||
if "Warmfront Finding" in self.standardised_asset_list.columns:
|
||||
non_intrusive_columns.append("Warmfront Finding")
|
||||
|
||||
self.keep_variables += non_intrusive_columns
|
||||
|
||||
self.rename_map = {
|
||||
|
|
@ -734,6 +739,11 @@ class AssetList:
|
|||
self.standardised_asset_list[self.landlord_year_built].replace(self.DATETIME_REMAP)
|
||||
)
|
||||
|
||||
no_data_codes = {"No Data": None}
|
||||
self.standardised_asset_list[self.landlord_year_built] = (
|
||||
self.standardised_asset_list[self.landlord_year_built].replace(no_data_codes)
|
||||
)
|
||||
|
||||
self.standardised_asset_list[self.landlord_year_built] = pd.to_datetime(
|
||||
self.standardised_asset_list[self.landlord_year_built]
|
||||
)
|
||||
|
|
@ -754,7 +764,8 @@ class AssetList:
|
|||
"This cell has an external reference that can't be shown or edited. Editing this cell will "
|
||||
"remove the external reference.",
|
||||
"ND",
|
||||
'PIMSS EMPTY'
|
||||
'PIMSS EMPTY',
|
||||
"UNKNOWN"
|
||||
]
|
||||
|
||||
if pd.isnull(date_str) or date_str in known_errors or (date_str == 0):
|
||||
|
|
@ -929,7 +940,10 @@ class AssetList:
|
|||
raise ValueError(f"Dataframe must contain the column {self.DOMNA_PROPERTY_ID}")
|
||||
|
||||
if df[self.DOMNA_PROPERTY_ID].duplicated().sum():
|
||||
raise ValueError(f"{self.DOMNA_PROPERTY_ID} contains duplicated IDs")
|
||||
df = df.drop_duplicates(
|
||||
subset=[self.DOMNA_PROPERTY_ID],
|
||||
keep="first"
|
||||
)
|
||||
|
||||
self.standardised_asset_list = self.standardised_asset_list.merge(
|
||||
df, how="left", on=self.DOMNA_PROPERTY_ID
|
||||
|
|
@ -1139,21 +1153,29 @@ class AssetList:
|
|||
# We add a SAP category for all work type identification
|
||||
self.standardised_asset_list["SAP Category"] = np.where(
|
||||
(
|
||||
(self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= 68) |
|
||||
(self.standardised_asset_list[self.STANDARD_SAP] <= 68)
|
||||
(self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= 54) |
|
||||
(self.standardised_asset_list[self.STANDARD_SAP] <= 54)
|
||||
),
|
||||
"SAP Rating 68 or less",
|
||||
"SAP Rating 54 or less",
|
||||
np.where(
|
||||
(
|
||||
(
|
||||
self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <=
|
||||
self.EMPTY_CAVITY_SAP_THRESHOLD
|
||||
) | (self.standardised_asset_list[self.STANDARD_SAP] <= self.EMPTY_CAVITY_SAP_THRESHOLD)
|
||||
(self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= 68) |
|
||||
(self.standardised_asset_list[self.STANDARD_SAP] <= 68)
|
||||
),
|
||||
"SAP Rating 55-68",
|
||||
np.where(
|
||||
(
|
||||
(
|
||||
self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <=
|
||||
self.EMPTY_CAVITY_SAP_THRESHOLD
|
||||
) | (self.standardised_asset_list[self.STANDARD_SAP] <= self.EMPTY_CAVITY_SAP_THRESHOLD)
|
||||
),
|
||||
f"SAP Rating 69-{self.EMPTY_CAVITY_SAP_THRESHOLD}",
|
||||
f"SAP Rating {self.EMPTY_CAVITY_SAP_THRESHOLD + 1} or more"
|
||||
),
|
||||
f"SAP Rating 69-{self.EMPTY_CAVITY_SAP_THRESHOLD}",
|
||||
f"SAP Rating {self.EMPTY_CAVITY_SAP_THRESHOLD + 1} or more"
|
||||
)
|
||||
)
|
||||
|
||||
else:
|
||||
# We add a SAP category for all work type identification
|
||||
# We break into 4 categories (54 or less, 55-68, 69-74, 75 or more)
|
||||
|
|
@ -1213,11 +1235,11 @@ class AssetList:
|
|||
elif self.old_format_non_intrusives_present:
|
||||
non_intrusives_wall_filter = (
|
||||
self.standardised_asset_list['non-intrusives: WFT Findings'].str.lower().str.strip().isin(
|
||||
["empty cavity", "partial fill"]
|
||||
["empty cavity", "partial fill", "empty", "EMPTY CAVITY 70MM", "partial"]
|
||||
) | (
|
||||
(
|
||||
self.standardised_asset_list['non-intrusives: WFT Findings']
|
||||
.str.lower().str.strip().str.contains("empty cavity|partial fill") &
|
||||
.str.lower().str.strip().str.contains("empty cavity|partial fill|empty|partial") &
|
||||
~self.standardised_asset_list['non-intrusives: WFT Findings']
|
||||
.astype(str).str.lower().str.strip().str.contains("major access issues")
|
||||
)
|
||||
|
|
@ -1250,7 +1272,7 @@ class AssetList:
|
|||
)
|
||||
|
||||
self.standardised_asset_list["non_intrusive_indicates_empty_cavity_has_solar"] = (
|
||||
pd.isnull(self.standardised_asset_list["non_intrusive_indicates_empty_cavity"]) &
|
||||
~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] &
|
||||
(~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) &
|
||||
non_intrusives_wall_filter &
|
||||
year_built_filter &
|
||||
|
|
@ -1262,23 +1284,35 @@ class AssetList:
|
|||
|
||||
# We also add a filter on anything that was generally identified by the non-intrusives
|
||||
self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_year_filter"] = (
|
||||
pd.isnull(self.standardised_asset_list["non_intrusive_indicates_empty_cavity"]) &
|
||||
pd.isnull(self.standardised_asset_list["non_intrusive_indicates_empty_cavity_has_solar"]) &
|
||||
~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] &
|
||||
~self.standardised_asset_list["non_intrusive_indicates_empty_cavity_has_solar"] &
|
||||
(~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) &
|
||||
non_intrusives_wall_filter
|
||||
)
|
||||
|
||||
self.standardised_asset_list["epc_indicates_empty_cavity"] = (
|
||||
self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]].str.lower().isin(
|
||||
self.EPC_NO_WALL_INSULATION_DESCRIPTIONS
|
||||
) & (
|
||||
self.standardised_asset_list["epc_year_upper_bound"] <= self.EMPTY_CAVITY_YEAR_THRESHOLD
|
||||
) & (
|
||||
~self.standardised_asset_list[self.ATTRIBUTE_EPC_PRE_YEAR_THRESHOLD]
|
||||
) & (
|
||||
~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])
|
||||
if (not self.non_intrusives_eligibility) and (not self.old_format_non_intrusives_present):
|
||||
# If we have NO inspections data, we capture all of the wall types and don't filter on age of the EPC
|
||||
self.standardised_asset_list["epc_indicates_empty_cavity"] = (
|
||||
self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]].str.lower().isin(
|
||||
self.EPC_NO_WALL_INSULATION_DESCRIPTIONS
|
||||
) & (
|
||||
self.standardised_asset_list["epc_year_upper_bound"] <= self.EMPTY_CAVITY_YEAR_THRESHOLD
|
||||
) & (
|
||||
~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])
|
||||
)
|
||||
)
|
||||
else:
|
||||
self.standardised_asset_list["epc_indicates_empty_cavity"] = (
|
||||
self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]].str.lower().isin(
|
||||
self.EPC_NO_WALL_INSULATION_DESCRIPTIONS
|
||||
) & (
|
||||
self.standardised_asset_list["epc_year_upper_bound"] <= self.EMPTY_CAVITY_YEAR_THRESHOLD
|
||||
) & (
|
||||
~self.standardised_asset_list[self.ATTRIBUTE_EPC_PRE_YEAR_THRESHOLD]
|
||||
) & (
|
||||
~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
self.standardised_asset_list["landlord_data_indicates_empty_cavity"] = (
|
||||
self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin(["uninsulated cavity"]) &
|
||||
|
|
@ -1326,6 +1360,9 @@ class AssetList:
|
|||
self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] = (
|
||||
extraction_wall_filter & year_built_filter
|
||||
)
|
||||
self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_year_filter"] = (
|
||||
extraction_wall_filter & ~year_built_filter
|
||||
)
|
||||
|
||||
elif self.old_format_non_intrusives_present:
|
||||
print("Review these categories!!!!")
|
||||
|
|
@ -1339,10 +1376,11 @@ class AssetList:
|
|||
self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] = (
|
||||
extraction_wall_filter
|
||||
)
|
||||
self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_year_filter"] = False
|
||||
|
||||
else:
|
||||
self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] = False
|
||||
self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_sap_filter"] = False
|
||||
self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_year_filter"] = False
|
||||
|
||||
######################################################
|
||||
# Solar
|
||||
|
|
@ -1470,7 +1508,7 @@ class AssetList:
|
|||
)
|
||||
|
||||
# We merge on the u-value for average thermal transmittance
|
||||
roof_roof_data = pd.DataFrame(cleaned["roof-description"])[
|
||||
roof_data = pd.DataFrame(cleaned["roof-description"])[
|
||||
["original_description", "thermal_transmittance", "is_pitched", "is_loft"]
|
||||
].rename(
|
||||
columns={
|
||||
|
|
@ -1480,7 +1518,7 @@ class AssetList:
|
|||
)
|
||||
|
||||
self.standardised_asset_list = self.standardised_asset_list.merge(
|
||||
roof_roof_data, how="left", on=self.EPC_API_DATA_NAMES["roof-description"]
|
||||
roof_data, how="left", on=self.EPC_API_DATA_NAMES["roof-description"]
|
||||
)
|
||||
|
||||
# If the u-value of a roof is less than 0.7 we consider it insulated
|
||||
|
|
@ -1724,8 +1762,8 @@ class AssetList:
|
|||
~self.standardised_asset_list["epc_indicates_empty_cavity"] &
|
||||
pd.isnull(self.standardised_asset_list["cavity_reason"])
|
||||
),
|
||||
"Landlord Data Shows Empty Cavity, EPC & Inspections Shows Filled: " + self.standardised_asset_list[
|
||||
"SAP Category"],
|
||||
"Landlord Data Shows Empty Cavity, EPC & Inspections Shows Filled or Non-cavity: " +
|
||||
self.standardised_asset_list["SAP Category"],
|
||||
self.standardised_asset_list["cavity_reason"]
|
||||
)
|
||||
|
||||
|
|
@ -1739,6 +1777,16 @@ class AssetList:
|
|||
self.standardised_asset_list["cavity_reason"]
|
||||
)
|
||||
|
||||
self.standardised_asset_list["cavity_reason"] = np.where(
|
||||
(
|
||||
self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_year_filter"] &
|
||||
pd.isnull(self.standardised_asset_list["cavity_reason"])
|
||||
),
|
||||
f"Non-Intrusive Data Shows Cavity Extraction, built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: " +
|
||||
self.standardised_asset_list["SAP Category"],
|
||||
self.standardised_asset_list["cavity_reason"]
|
||||
)
|
||||
|
||||
######################################################
|
||||
# Flag solar
|
||||
######################################################
|
||||
|
|
@ -1761,6 +1809,16 @@ class AssetList:
|
|||
self.standardised_asset_list["solar_reason"]
|
||||
)
|
||||
|
||||
# Finally, anything flagged for solar should not be flagged for cavity - make them None
|
||||
self.standardised_asset_list["cavity_reason"] = np.where(
|
||||
(
|
||||
~pd.isnull(self.standardised_asset_list["solar_reason"]) &
|
||||
~pd.isnull(self.standardised_asset_list["cavity_reason"])
|
||||
),
|
||||
None,
|
||||
self.standardised_asset_list["cavity_reason"]
|
||||
)
|
||||
|
||||
# Flag anything that has existing outcomes
|
||||
if (self.outcomes is not None) and ("surveyed" in self.standardised_asset_list.columns):
|
||||
|
||||
|
|
@ -2160,7 +2218,7 @@ class AssetList:
|
|||
|
||||
self.hubspot_data = programme_data
|
||||
|
||||
def flag_ecosurv(self, ecosurv_landlords=None):
|
||||
def flag_ecosurv(self, ecosurv_landlords=None, landlords_to_ignore=None):
|
||||
|
||||
"""
|
||||
This class will match ecosurv data to the asset list
|
||||
|
|
@ -2170,12 +2228,9 @@ class AssetList:
|
|||
return
|
||||
|
||||
# TODO: Fetch from Sharepoint
|
||||
ecosurv_filepath = "/Users/khalimconn-kowlessar/Documents/hestia/Ecosurv/15.04.csv"
|
||||
ecosurv_filepath = "/Users/khalimconn-kowlessar/Documents/hestia/Ecosurv/07.05.2025.csv"
|
||||
logger.info("Getting Ecosurv data from %s", ecosurv_filepath)
|
||||
self.ecosurv = pd.read_csv(
|
||||
ecosurv_filepath,
|
||||
encoding="cp437"
|
||||
)
|
||||
self.ecosurv = pd.read_csv(ecosurv_filepath, encoding="cp437")
|
||||
|
||||
landlords = self.ecosurv["Landlord"].value_counts().reset_index(drop=False)
|
||||
landlord_references = landlords[
|
||||
|
|
@ -2186,6 +2241,11 @@ class AssetList:
|
|||
self.ecosurv["Landlord"].isin(landlord_references["Landlord"].values)
|
||||
]
|
||||
|
||||
if landlords_to_ignore is not None:
|
||||
landlord_ecosurv_data = landlord_ecosurv_data[
|
||||
~landlord_ecosurv_data["Landlord"].isin(landlords_to_ignore)
|
||||
]
|
||||
|
||||
# Try and match to asset list
|
||||
matched = []
|
||||
unmatched = []
|
||||
|
|
@ -2247,6 +2307,11 @@ class AssetList:
|
|||
|
||||
# We now match
|
||||
matched = pd.DataFrame(matched)
|
||||
# We'll possibly have duplicates here, where properties have been sold twice. Ww de-dupe
|
||||
if matched[self.STANDARD_LANDLORD_PROPERTY_ID].duplicated().sum():
|
||||
# It doesn't matter too much which record we take
|
||||
matched = matched.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID])
|
||||
|
||||
self.standardised_asset_list = self.standardised_asset_list.merge(
|
||||
matched,
|
||||
how="left",
|
||||
|
|
@ -2260,46 +2325,82 @@ class AssetList:
|
|||
|
||||
def flag_outcomes(
|
||||
self,
|
||||
outcomes_filepath,
|
||||
outcomes_filepaths,
|
||||
outcomes_sheetname,
|
||||
outcomes_address,
|
||||
outcomes_postcode,
|
||||
outcomes_houseno,
|
||||
outcomes_id
|
||||
):
|
||||
if outcomes_filepath is None:
|
||||
if not outcomes_filepaths:
|
||||
return
|
||||
|
||||
self.outcomes = pd.read_excel(outcomes_filepath, sheet_name=outcomes_sheetname)
|
||||
self.outcomes["row_id"] = self.outcomes.index
|
||||
|
||||
if outcomes_houseno is None:
|
||||
outcomes_houseno = "houseno"
|
||||
self.outcomes["houseno"] = self.outcomes[outcomes_address].apply(
|
||||
lambda x: SearchEpc.get_house_number(x, self.outcomes[outcomes_postcode])
|
||||
)
|
||||
|
||||
logger.info("Matching outcomes to asset list")
|
||||
# Merge the outcomes onto the asset list - we check we're able to match sufficiently well
|
||||
self.outcomes = []
|
||||
outcomes_no_match = []
|
||||
lookup = []
|
||||
nomatch = []
|
||||
for _, x in tqdm(self.outcomes.iterrows(), total=len(self.outcomes)):
|
||||
for idx, outcomes_filepath in enumerate(outcomes_filepaths):
|
||||
outcomes = pd.read_excel(outcomes_filepath, sheet_name=outcomes_sheetname[idx])
|
||||
outcomes["row_id"] = outcomes.index
|
||||
|
||||
if pd.isnull(x[outcomes_address]):
|
||||
continue
|
||||
if outcomes_houseno[idx] is None:
|
||||
outcomes_houseno = "houseno"
|
||||
outcomes["houseno"] = outcomes[outcomes_address[idx]].apply(
|
||||
lambda x: SearchEpc.get_house_number(x, outcomes[outcomes_postcode])
|
||||
)
|
||||
|
||||
# Check if we have an id
|
||||
oid = x[outcomes_id] if outcomes_id is not None else None
|
||||
# We handle an edge case that occured for LHP
|
||||
if "Notes / Outcomes" in outcomes.columns and "Outcome" not in outcomes.columns:
|
||||
# We use the re-mapper to handle this:
|
||||
outcomes["Notes / Outcomes"] = outcomes["Notes / Outcomes"].str.strip()
|
||||
values_to_remap = outcomes["Notes / Outcomes"].unique()
|
||||
# We want to map this to our standardised list of property types we're interested in
|
||||
remapper = DataRemapper(
|
||||
standard_values=outcomes_mappings.outcomes_values, standard_map=outcomes_mappings.outcomes_map
|
||||
)
|
||||
remap_dictionary = remapper.standardize_list(values_to_remap=values_to_remap.tolist())
|
||||
# Perform the remap
|
||||
outcomes["Outcome"] = outcomes["Notes / Outcomes"].map(remap_dictionary)
|
||||
|
||||
outcomes["Outcome"] = outcomes["Outcome"].str.lower()
|
||||
|
||||
logger.info("Matching outcomes to asset list")
|
||||
# Merge the outcomes onto the asset list - we check we're able to match sufficiently well
|
||||
lookup_i = []
|
||||
nomatch_i = []
|
||||
for _, x in tqdm(outcomes.iterrows(), total=len(outcomes)):
|
||||
|
||||
if pd.isnull(x[outcomes_address[idx]]) or not x[outcomes_address[idx]]:
|
||||
continue
|
||||
|
||||
# Check if we have an id
|
||||
oid = x[outcomes_id[idx]] if outcomes_id[idx] is not None else None
|
||||
|
||||
if oid is not None:
|
||||
matched = self.standardised_asset_list[
|
||||
(self.standardised_asset_list[
|
||||
self.STANDARD_LANDLORD_PROPERTY_ID
|
||||
].str.strip() == oid)
|
||||
]
|
||||
|
||||
if matched.shape[0] == 1:
|
||||
lookup_i.append(
|
||||
{
|
||||
"row_id": x["row_id"],
|
||||
self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0]
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
address_clean = x[outcomes_address[idx]].lower().replace(",", "").replace(" ", " ")
|
||||
|
||||
if oid is not None:
|
||||
matched = self.standardised_asset_list[
|
||||
(self.standardised_asset_list[
|
||||
self.STANDARD_LANDLORD_PROPERTY_ID
|
||||
].str.strip() == oid)
|
||||
self.STANDARD_FULL_ADDRESS
|
||||
].str.lower().str.replace(",", "").str.replace(" ", " ") == address_clean)
|
||||
]
|
||||
|
||||
if matched.shape[0] == 1:
|
||||
lookup.append(
|
||||
lookup_i.append(
|
||||
{
|
||||
"row_id": x["row_id"],
|
||||
self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0]
|
||||
|
|
@ -2307,65 +2408,65 @@ class AssetList:
|
|||
)
|
||||
continue
|
||||
|
||||
address_clean = x[outcomes_address].lower().replace(",", "").replace(" ", " ")
|
||||
|
||||
self.outcomes["Outcome"] = self.outcomes["Outcome"].str.lower()
|
||||
|
||||
matched = self.standardised_asset_list[
|
||||
(self.standardised_asset_list[
|
||||
self.STANDARD_FULL_ADDRESS
|
||||
].str.lower().str.replace(",", "").str.replace(" ", " ") == address_clean)
|
||||
]
|
||||
|
||||
if matched.shape[0] == 1:
|
||||
lookup.append(
|
||||
{
|
||||
"row_id": x["row_id"],
|
||||
self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0]
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
matched = self.standardised_asset_list[
|
||||
(self.standardised_asset_list[self.STANDARD_POSTCODE].str.strip() == x[outcomes_postcode])
|
||||
].copy()
|
||||
if not matched.empty:
|
||||
matched["houseno"] = matched.apply(
|
||||
lambda x: SearchEpc.get_house_number(
|
||||
str(x[self.STANDARD_ADDRESS_1]), str(x[self.STANDARD_POSTCODE])
|
||||
),
|
||||
axis=1
|
||||
)
|
||||
|
||||
matched = matched[
|
||||
matched["houseno"].astype(str) == str(x[outcomes_houseno])
|
||||
]
|
||||
if matched.shape[0] == 1:
|
||||
lookup.append(
|
||||
{
|
||||
"row_id": x["row_id"],
|
||||
self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0]
|
||||
}
|
||||
matched = self.standardised_asset_list[
|
||||
(self.standardised_asset_list[self.STANDARD_POSTCODE].str.strip() == x[outcomes_postcode[idx]])
|
||||
].copy()
|
||||
if not matched.empty:
|
||||
matched["houseno"] = matched.apply(
|
||||
lambda x: SearchEpc.get_house_number(
|
||||
str(x[self.STANDARD_ADDRESS_1]), str(x[self.STANDARD_POSTCODE])
|
||||
),
|
||||
axis=1
|
||||
)
|
||||
continue
|
||||
elif not matched.empty:
|
||||
# Use levenstein distance to match
|
||||
matched["address"] = matched[self.STANDARD_ADDRESS_1] + " " + matched[self.STANDARD_POSTCODE]
|
||||
|
||||
best_match = process.extractOne(x["Address"], matched[self.STANDARD_FULL_ADDRESS].values)[0]
|
||||
matched = matched[matched[self.STANDARD_FULL_ADDRESS] == best_match]
|
||||
lookup.append(
|
||||
{
|
||||
"row_id": x["row_id"],
|
||||
self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0]
|
||||
}
|
||||
)
|
||||
continue
|
||||
if pd.isnull(x[outcomes_houseno[idx]]):
|
||||
house_no_to_match = SearchEpc.get_house_number(
|
||||
str(x[outcomes_address[idx]]), str(x[outcomes_postcode[idx]])
|
||||
)
|
||||
if isinstance(house_no_to_match, str):
|
||||
house_no_to_match = house_no_to_match.lower()
|
||||
else:
|
||||
house_no_to_match = str(x[outcomes_houseno[idx]]).strip()
|
||||
|
||||
nomatch.append(x["row_id"])
|
||||
matched = matched[matched["houseno"].astype(str) == house_no_to_match]
|
||||
if matched.shape[0] == 1:
|
||||
lookup_i.append(
|
||||
{
|
||||
"row_id": x["row_id"],
|
||||
self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0]
|
||||
}
|
||||
)
|
||||
continue
|
||||
elif not matched.empty:
|
||||
# Use levenstein distance to match
|
||||
matched["address"] = (
|
||||
matched[self.STANDARD_ADDRESS_1] + " " + matched[self.STANDARD_POSTCODE]
|
||||
)
|
||||
|
||||
self.outcomes_no_match = self.outcomes[self.outcomes["row_id"].isin(nomatch)]
|
||||
lookup = pd.DataFrame(lookup)
|
||||
best_match = process.extractOne(
|
||||
x[outcomes_address[idx]], matched[self.STANDARD_FULL_ADDRESS].values
|
||||
)[0]
|
||||
matched = matched[matched[self.STANDARD_FULL_ADDRESS] == best_match]
|
||||
lookup_i.append(
|
||||
{
|
||||
"row_id": x["row_id"],
|
||||
self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0]
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
nomatch_i.append(x["row_id"])
|
||||
|
||||
outcomes_no_match_i = outcomes[outcomes["row_id"].isin(nomatch_i)]
|
||||
lookup_i = pd.DataFrame(lookup_i)
|
||||
|
||||
outcomes_no_match.append(outcomes_no_match_i)
|
||||
lookup.append(lookup_i)
|
||||
self.outcomes.append(outcomes)
|
||||
|
||||
lookup = pd.concat(lookup)
|
||||
self.outcomes_no_match = pd.concat(outcomes_no_match)
|
||||
self.outcomes = pd.concat(self.outcomes)
|
||||
|
||||
if lookup.empty:
|
||||
return
|
||||
|
|
@ -2376,10 +2477,21 @@ class AssetList:
|
|||
# that the surveyor had a detailed explanation as to why they couldn't gain access so if this has
|
||||
# happened multiple times, in this case we judge that the work may not be viable
|
||||
|
||||
date_col = "Week Commencing" if "Week Commencing" in self.outcomes else "Survey Date"
|
||||
if "Week Commencing" in self.outcomes.columns:
|
||||
date_col = "Week Commencing"
|
||||
elif "Survey Date" in self.outcomes.columns:
|
||||
date_col = "Survey Date"
|
||||
elif "Date letters sent" in self.outcomes.columns:
|
||||
date_col = "Date letters sent"
|
||||
elif "Date Letter sent" in self.outcomes.columns:
|
||||
date_col = "Date Letter sent"
|
||||
else:
|
||||
raise NotImplementedError("Invalid date in outcomes - implement me")
|
||||
|
||||
notes_col = "Notes" if "Notes" in outcomes.columns else "Notes / Outcomes"
|
||||
|
||||
lookup = lookup.merge(
|
||||
self.outcomes[["row_id", "Outcome", "Notes", date_col]], how="left", on="row_id"
|
||||
self.outcomes[["row_id", "Outcome", notes_col, date_col]], how="left", on="row_id"
|
||||
)
|
||||
|
||||
visit_counts = (
|
||||
|
|
@ -2390,11 +2502,36 @@ class AssetList:
|
|||
.sort_values("visit_count", ascending=False)
|
||||
)
|
||||
|
||||
def extract_date(s):
|
||||
if isinstance(s, str):
|
||||
match = re.search(r"(\d{2}\.\d{2}\.\d{4})", s)
|
||||
if match:
|
||||
return pd.to_datetime(match.group(1), format="%d.%m.%Y", errors="coerce")
|
||||
return pd.NaT
|
||||
|
||||
lookup['parsed_date'] = lookup[date_col].apply(extract_date)
|
||||
|
||||
def get_latest_note(group):
|
||||
surveyed = group[group['Outcome'] == 'surveyed']
|
||||
if not surveyed.empty:
|
||||
return surveyed.sort_values('parsed_date', ascending=False).iloc[0]
|
||||
else:
|
||||
return group.sort_values('parsed_date', ascending=False).iloc[0]
|
||||
|
||||
latest_note = (
|
||||
lookup.groupby('domna_property_id', group_keys=False).
|
||||
apply(get_latest_note).
|
||||
reset_index(drop=True)
|
||||
)
|
||||
latest_note = latest_note[["domna_property_id", notes_col]]
|
||||
|
||||
pivot_df = lookup.groupby(["domna_property_id", "Outcome"]).size().unstack(fill_value=0).reset_index()
|
||||
pivot_df = pivot_df.merge(
|
||||
visit_counts, how="left", on="domna_property_id"
|
||||
)
|
||||
|
||||
# We want the latest note
|
||||
|
||||
if pivot_df[self.DOMNA_PROPERTY_ID].duplicated().sum():
|
||||
raise Exception("We have duplicated property IDs in the outcomes data")
|
||||
|
||||
|
|
@ -2406,6 +2543,14 @@ class AssetList:
|
|||
self.standardised_asset_list = self.standardised_asset_list.merge(
|
||||
pivot_df, how="left", left_on=self.DOMNA_PROPERTY_ID, right_on="domna_property_id"
|
||||
)
|
||||
# Merge the latest note
|
||||
self.standardised_asset_list = self.standardised_asset_list.merge(
|
||||
latest_note.rename(columns={notes_col: "Latest Route March Note"}),
|
||||
how="left", left_on=self.DOMNA_PROPERTY_ID, right_on="domna_property_id"
|
||||
)
|
||||
|
||||
if self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated().sum():
|
||||
raise ValueError("Duplicates appreared - something went wrong")
|
||||
|
||||
self.outcomes = self.outcomes.sort_values("domna_property_id", ascending=False)
|
||||
|
||||
|
|
@ -2432,36 +2577,43 @@ class AssetList:
|
|||
# Strip columns
|
||||
master_data.columns = [c.strip() for c in master_data.columns]
|
||||
master_data.columns = [re.sub(r'\s+', ' ', c) for c in master_data.columns]
|
||||
# Drop any unnamed columns
|
||||
unnamed_columns = [c for c in master_data.columns if "Unnamed:" in c]
|
||||
master_data = master_data.drop(columns=unnamed_columns)
|
||||
|
||||
if not id_map.empty:
|
||||
master_data = master_data.merge(
|
||||
id_map, how="left", on=['NO.', 'Street / Block Name', 'Post Code']
|
||||
)
|
||||
|
||||
install_col = (
|
||||
"INSTALLED OR CANCELLED" if "INSTALLED OR CANCELLED" in master_data.columns
|
||||
else "INSTALL / CANCELLATION DATE"
|
||||
)
|
||||
if "INSTALLED OR CANCELLED" in master_data.columns:
|
||||
install_col = "INSTALLED OR CANCELLED"
|
||||
elif "INSTALL / CANCELLATION DATE" in master_data.columns:
|
||||
install_col = "INSTALL / CANCELLATION DATE"
|
||||
elif 'INSTALL/ CANCELLATION DATE' in master_data.columns:
|
||||
install_col = 'INSTALL/ CANCELLATION DATE'
|
||||
else:
|
||||
raise ValueError("No install or cancellation date")
|
||||
|
||||
submission_col = (
|
||||
"SUBMISSION DATE" if "SUBMISSION DATE" in master_data.columns else "SUBMISSION DATE TO INSTALLERS"
|
||||
)
|
||||
|
||||
if "UPRN" in master_data.columns:
|
||||
# We just need to check if any were cancelled
|
||||
master_to_append = master_data[
|
||||
["UPRN", install_col, submission_col]
|
||||
].rename(
|
||||
columns={
|
||||
"UPRN": self.STANDARD_LANDLORD_PROPERTY_ID,
|
||||
install_col: "survey_status",
|
||||
submission_col: "submission_date"
|
||||
}
|
||||
)
|
||||
master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel")
|
||||
|
||||
master_surveyed.append(master_to_append)
|
||||
continue
|
||||
# if "UPRN" in master_data.columns:
|
||||
# # We just need to check if any were cancelled
|
||||
# master_to_append = master_data[
|
||||
# ["UPRN", install_col, submission_col]
|
||||
# ].rename(
|
||||
# columns={
|
||||
# "UPRN": self.STANDARD_LANDLORD_PROPERTY_ID,
|
||||
# install_col: "survey_status",
|
||||
# submission_col: "submission_date"
|
||||
# }
|
||||
# )
|
||||
# master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel")
|
||||
#
|
||||
# master_surveyed.append(master_to_append)
|
||||
# continue
|
||||
|
||||
master_data["row_id"] = master_data.index
|
||||
|
||||
|
|
@ -2472,27 +2624,84 @@ class AssetList:
|
|||
axis=1
|
||||
)
|
||||
|
||||
scheme_col = (
|
||||
"AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" if
|
||||
"AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" in master_data.columns else "AFFORDABLE WARMTH"
|
||||
)
|
||||
postcode_col = "POSTCODE" if "POSTCODE" in master_data.columns else "Post Code"
|
||||
house_no_col = 'NO.' if 'NO.' in master_data.columns else "NO"
|
||||
property_type_col = (
|
||||
"PROPERTY TYPE As per table emailed" if
|
||||
"PROPERTY TYPE As per table emailed" in
|
||||
master_data.columns else "PROPERTY TYPE As per table emailed"
|
||||
)
|
||||
measure_mix_col = "MEASURE COMBO"
|
||||
|
||||
# Otherwise, we need to match algorithmically
|
||||
has_property_id = "UPRN" in master_data.columns
|
||||
logger.info("Matching master data to asset list")
|
||||
matched = []
|
||||
unmatched = []
|
||||
for _, row in tqdm(master_data.iterrows(), total=len(master_data)):
|
||||
|
||||
original_house_no = row[house_no_col]
|
||||
original_street = row["Street / Block Name"]
|
||||
original_postcode = row[postcode_col]
|
||||
|
||||
if pd.isnull(row[postcode_col]):
|
||||
continue
|
||||
|
||||
# if has_property_id:
|
||||
# submission_uprn = row["UPRN"]
|
||||
#
|
||||
# if not pd.isnull(submission_uprn):
|
||||
# df = self.standardised_asset_list[
|
||||
# self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] == submission_uprn
|
||||
# ]
|
||||
|
||||
postcode_no_space = row[postcode_col].strip().replace(" ", "").lower()
|
||||
|
||||
df = self.standardised_asset_list[
|
||||
(
|
||||
self.standardised_asset_list[self.STANDARD_POSTCODE].str.strip().str.lower().str.replace(" ",
|
||||
"")
|
||||
== postcode_no_space
|
||||
self.standardised_asset_list[self.STANDARD_POSTCODE]
|
||||
.str.strip().str.lower().str.replace(" ", "") == postcode_no_space
|
||||
)
|
||||
]
|
||||
|
||||
house_no = row[house_no_col]
|
||||
if isinstance(house_no, (float, int)):
|
||||
house_no = str(int(house_no))
|
||||
|
||||
if house_no not in df["house_no"].values:
|
||||
# Handle postcode errors
|
||||
postal_region = row[postcode_col].split(" ")[0].lower()
|
||||
df = self.standardised_asset_list[
|
||||
(
|
||||
self.standardised_asset_list[self.STANDARD_POSTCODE]
|
||||
.str.strip().str.lower().str.startswith(postal_region)
|
||||
)
|
||||
]
|
||||
|
||||
if house_no not in df["house_no"].values:
|
||||
unmatched.append(row["row_id"])
|
||||
continue
|
||||
df = df[df["house_no"] == house_no]
|
||||
if df.shape[0] > 1:
|
||||
df = df[
|
||||
df[self.STANDARD_FULL_ADDRESS].str.lower().str.contains(row["Street / Block Name"].lower())
|
||||
]
|
||||
if df.shape[0] == 0:
|
||||
unmatched.append(row["row_id"])
|
||||
continue
|
||||
matched.append(
|
||||
{
|
||||
"row_id": row["row_id"],
|
||||
"original_house_no": original_house_no,
|
||||
"original_street": original_street,
|
||||
"original_postcode": original_postcode,
|
||||
self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0],
|
||||
}
|
||||
)
|
||||
|
||||
if house_no in df["house_no"].values:
|
||||
df = df[df["house_no"] == house_no]
|
||||
|
|
@ -2528,14 +2737,12 @@ class AssetList:
|
|||
]
|
||||
|
||||
if any(
|
||||
df[self.STANDARD_PROPERTY_TYPE].str.contains(
|
||||
row["PROPERTY TYPE As per table emailed"].split(" ")[-1].lower()
|
||||
)
|
||||
df[self.STANDARD_PROPERTY_TYPE].str.contains(row[property_type_col].split(" ")[-1].lower())
|
||||
):
|
||||
# We ignore "block of flats" entries
|
||||
df = df[
|
||||
df[self.STANDARD_PROPERTY_TYPE].str.contains(
|
||||
row["PROPERTY TYPE As per table emailed"].split(" ")[-1].lower()
|
||||
row[property_type_col].split(" ")[-1].lower()
|
||||
) & (df[self.STANDARD_PROPERTY_TYPE] != "block of flats")
|
||||
]
|
||||
|
||||
|
|
@ -2545,6 +2752,9 @@ class AssetList:
|
|||
matched.append(
|
||||
{
|
||||
"row_id": row["row_id"],
|
||||
"original_house_no": original_house_no,
|
||||
"original_street": original_street,
|
||||
"original_postcode": original_postcode,
|
||||
self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0],
|
||||
}
|
||||
)
|
||||
|
|
@ -2553,10 +2763,12 @@ class AssetList:
|
|||
|
||||
# We match the "UPRN" which is the landlords ID, onto the master sheet
|
||||
matched = pd.DataFrame(matched)
|
||||
master_to_append = master_data[["row_id", install_col, submission_col]].merge(
|
||||
master_to_append = master_data[[scheme_col, "row_id", install_col, submission_col, measure_mix_col]].merge(
|
||||
matched, how="left", on="row_id"
|
||||
).rename(
|
||||
columns={
|
||||
scheme_col: "funding_scheme",
|
||||
measure_mix_col: "measure_mix",
|
||||
install_col: "survey_status",
|
||||
submission_col: "submission_date"
|
||||
}
|
||||
|
|
@ -2567,10 +2779,6 @@ class AssetList:
|
|||
master_data["row_id"].isin(unmatched)
|
||||
]
|
||||
|
||||
scheme_col = (
|
||||
"AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" if
|
||||
"AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" in master_data.columns else "AFFORDABLE WARMTH"
|
||||
)
|
||||
# The columns are massively different - we take just a few
|
||||
unmatched_df = unmatched_df[
|
||||
[
|
||||
|
|
|
|||
|
|
@ -62,352 +62,126 @@ def app():
|
|||
Property UPRN
|
||||
"""
|
||||
|
||||
# TODO:
|
||||
# For cavity work:
|
||||
# - Flag any entries that have a different wall type between non-intrusive data against EPC
|
||||
# - Worth double checking entries that have a difference in wall construction
|
||||
# - Look at anything that is flagged as an empty cavity but the EPC data says it’s a filled cavity
|
||||
# - Look at the current EPC scores - Anything that is C75 or above, especially if it’s assumed no insulation
|
||||
# - By postcode, we can try and deduce if all of the addresses are a flats and then estimate if 50% of the flats
|
||||
# are less than C75
|
||||
# - Flag anything pre SAP2012
|
||||
# - Flag anything over 5 years old
|
||||
# - Look at year built vs age band
|
||||
#
|
||||
# For Solar:
|
||||
# - Discount any that have solar PV - based on non-intrusives and from the inspections team
|
||||
# - In the heating, discount anything that isn’t ashp, ghsp, hhrs, electric storage - possibly homes with
|
||||
# electric room heaters but it might need to be an EPC E
|
||||
# - Fabric - check the floor, wall and roof:
|
||||
# - Filled or empty cavity is good
|
||||
# - Insulated solid/timber/system built is good
|
||||
# - SCIS/CEG needs solid floors
|
||||
# - JJC don’t care
|
||||
# - Anything with a loft 200 or below
|
||||
# - Anything C75 and above won’t qualify
|
||||
# - Insulated loft = 200mm
|
||||
# - We want: fully insulated property (all wall types), EPC D or below (floors should be solid)
|
||||
# - Or the insulation required is loft/cavity (floors should be solid)
|
||||
|
||||
# Torus
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Torus/Phase 1"
|
||||
data_filename = "Torus Property Asset List - Phase 1.xlsx"
|
||||
sheet_name = "TORUS"
|
||||
# Thurrock
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thurrock"
|
||||
data_filename = "THURROCK COUNCIL.xlsx"
|
||||
sheet_name = "Assets"
|
||||
postcode_column = 'Postcode'
|
||||
fulladdress_column = None
|
||||
address1_column = "AddressLine1"
|
||||
address1_method = None
|
||||
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = "Property Age"
|
||||
landlord_os_uprn = "NatUPRN"
|
||||
landlord_property_type = "Property Type"
|
||||
landlord_built_form = "Built Form"
|
||||
landlord_wall_construction = "Wall Construction"
|
||||
landlord_roof_construction = "Roof Construction"
|
||||
landlord_heating_system = "Space Heating Source"
|
||||
landlord_existing_pv = "Low Carbon Technology (Solar PV)"
|
||||
landlord_property_id = "UPRN"
|
||||
landlord_sap = "SAP Score"
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
outcomes_postcode = None
|
||||
outcomes_houseno = None
|
||||
outcomes_id = None
|
||||
outcomes_address = None
|
||||
master_filepaths = []
|
||||
master_to_asset_list_filepath = None
|
||||
phase = True
|
||||
|
||||
# Southern Midlands
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/Midlands Properties - Apr 2025"
|
||||
data_filename = "Southern Housing Midlands Property List - combined.xlsx"
|
||||
sheet_name = "Sheet 1"
|
||||
postcode_column = 'Post Code'
|
||||
fulladdress_column = "Address"
|
||||
fulladdress_column = "Full Address"
|
||||
address1_column = None
|
||||
address1_method = "house_number_extraction"
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = "Age_1"
|
||||
landlord_year_built = "Construction Date"
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Prop_Type"
|
||||
landlord_built_form = "Prop_Type"
|
||||
landlord_wall_construction = "Walls_P"
|
||||
landlord_heating_system = "Heating System"
|
||||
landlord_property_type = "Property Type"
|
||||
landlord_built_form = "Property Subtype"
|
||||
landlord_wall_construction = None
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = "Main Heating Type"
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "AssetID"
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
outcomes_postcode = None
|
||||
outcomes_houseno = None
|
||||
outcomes_id = None
|
||||
outcomes_address = None
|
||||
landlord_property_id = "Property Reference"
|
||||
landlord_sap = None
|
||||
outcomes_filename = []
|
||||
outcomes_sheetname = []
|
||||
outcomes_postcode = []
|
||||
outcomes_houseno = []
|
||||
outcomes_id = []
|
||||
outcomes_address = []
|
||||
master_filepaths = []
|
||||
master_to_asset_list_filepath = None
|
||||
phase = False
|
||||
ecosurv_landlords = None
|
||||
|
||||
# PFP London
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/London"
|
||||
data_filename = "PFP AREAS SURROUNDING LONDON - JAY, RUTH & LANE.xlsx"
|
||||
sheet_name = "PFP SURROUNDING LONDON"
|
||||
# Medway
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway"
|
||||
data_filename = "MEDWAY Asset List.xlsx"
|
||||
sheet_name = "Asset list"
|
||||
postcode_column = 'Postcode'
|
||||
fulladdress_column = None
|
||||
address1_column = "AddressLine1"
|
||||
address1_column = "House Number"
|
||||
address1_method = None
|
||||
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
|
||||
address_cols_to_concat = ["House Number", "Street 1"]
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = None
|
||||
landlord_year_built = "Year Built"
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Archetype (PFP)"
|
||||
landlord_built_form = "Archetype (PFP)"
|
||||
landlord_property_type = "Property Type - Academy"
|
||||
landlord_built_form = "Property Type - Academy"
|
||||
landlord_wall_construction = None
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "Uprn"
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
outcomes_postcode = None
|
||||
outcomes_houseno = None
|
||||
outcomes_id = None
|
||||
landlord_property_id = "Row ID"
|
||||
landlord_sap = None
|
||||
outcomes_filename = []
|
||||
outcomes_sheetname = []
|
||||
outcomes_postcode = []
|
||||
outcomes_houseno = []
|
||||
outcomes_id = []
|
||||
outcomes_address = []
|
||||
master_filepaths = []
|
||||
master_to_asset_list_filepath = None
|
||||
phase = False
|
||||
ecosurv_landlords = None
|
||||
|
||||
# PFP North-West
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/North-West"
|
||||
data_filename = "Places for People NORTH WEST - INSPECTIONS MASTER - UPDATE.xlsx"
|
||||
sheet_name = "CHECKED"
|
||||
# MHS
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS"
|
||||
data_filename = "MHS HOMES (Full Asset List) - for programme build.xlsx"
|
||||
sheet_name = "Sheet1"
|
||||
postcode_column = 'Postcode'
|
||||
fulladdress_column = None
|
||||
address1_column = "AddressLine1"
|
||||
address1_method = None
|
||||
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
|
||||
fulladdress_column = "FullAddress"
|
||||
address1_column = None
|
||||
address1_method = "house_number_extraction"
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = None
|
||||
landlord_year_built = "BuiltInYear"
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Archetype (PFP)"
|
||||
landlord_built_form = "Archetype (PFP)"
|
||||
landlord_property_type = "AssetType"
|
||||
landlord_built_form = "PropertyType"
|
||||
landlord_wall_construction = None
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "Uprn"
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
outcomes_postcode = None
|
||||
outcomes_houseno = None
|
||||
outcomes_id = None
|
||||
master_filepaths = []
|
||||
master_to_asset_list_filepath = None
|
||||
|
||||
# PFP North-East
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/North-East"
|
||||
data_filename = "Places for People NORTH EAST - INSPECTIONS MASTER.xlsx"
|
||||
sheet_name = "CHECKED"
|
||||
postcode_column = 'Postcode'
|
||||
fulladdress_column = None
|
||||
address1_column = "AddressLine1"
|
||||
address1_method = None
|
||||
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = None
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Archetype (PFP)"
|
||||
landlord_built_form = "Archetype (PFP)"
|
||||
landlord_wall_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "Uprn"
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
outcomes_postcode = None
|
||||
outcomes_houseno = None
|
||||
outcomes_id = None
|
||||
master_filepaths = []
|
||||
master_to_asset_list_filepath = None
|
||||
|
||||
# PFP East
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/East"
|
||||
data_filename = "PFP EAST - Master - DN LN NG NR PE POSTCODES.xlsx"
|
||||
sheet_name = "PFP EAST"
|
||||
postcode_column = 'Postcode'
|
||||
fulladdress_column = None
|
||||
address1_column = "AddressLine1"
|
||||
address1_method = None
|
||||
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = None
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Archetype (PFP)"
|
||||
landlord_built_form = "Archetype (PFP)"
|
||||
landlord_wall_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "Uprn"
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
outcomes_postcode = None
|
||||
outcomes_houseno = None
|
||||
outcomes_id = None
|
||||
master_filepaths = []
|
||||
master_to_asset_list_filepath = None
|
||||
|
||||
# Wates
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Wates - "
|
||||
data_filename = "ECO 4 Wates.xlsx"
|
||||
sheet_name = "Roadmap Homes"
|
||||
postcode_column = 'Postcode'
|
||||
fulladdress_column = None
|
||||
address1_column = "Address Line 1"
|
||||
address1_method = None
|
||||
address_cols_to_concat = ["Address Line 1", "Address Line 2", "Address Line 3"]
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = "Build Year"
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Archetype"
|
||||
landlord_built_form = "Archetype"
|
||||
landlord_wall_construction = "Wall"
|
||||
landlord_heating_system = "Heating Type"
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "UPRN"
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
outcomes_postcode = None
|
||||
outcomes_houseno = None
|
||||
landlord_sap = None
|
||||
outcomes_filename = []
|
||||
outcomes_sheetname = []
|
||||
outcomes_postcode = []
|
||||
outcomes_houseno = []
|
||||
outcomes_id = []
|
||||
outcomes_address = []
|
||||
master_filepaths = []
|
||||
master_to_asset_list_filepath = None
|
||||
phase = False
|
||||
ecosurv_landlords = None
|
||||
|
||||
# Ealing
|
||||
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Programme data - 04032025"
|
||||
# data_filename = "Ealing BC - Property Plus Tenure 25.02.2025.xlsx"
|
||||
# sheet_name = "IGNORE - FULL MAIN"
|
||||
# postcode_column = 'Postcode'
|
||||
# Southern Midlands
|
||||
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/Midlands Properties - Apr 2025"
|
||||
# data_filename = "Southern Housing Midlands Property List - combined.xlsx"
|
||||
# sheet_name = "Sheet 1"
|
||||
# postcode_column = 'Post Code'
|
||||
# fulladdress_column = "Address"
|
||||
# address1_column = None
|
||||
# address1_method = "first_word"
|
||||
# address1_method = "house_number_extraction"
|
||||
# address_cols_to_concat = []
|
||||
# missing_postcodes_method = None
|
||||
# landlord_year_built = "Year Built"
|
||||
# landlord_year_built = "Age_1"
|
||||
# landlord_os_uprn = None
|
||||
# landlord_property_type = "Property Type Code"
|
||||
# landlord_wall_construction = None
|
||||
# landlord_heating_system = None
|
||||
# landlord_property_type = "Prop_Type"
|
||||
# landlord_built_form = "Prop_Type"
|
||||
# landlord_wall_construction = "Walls_P"
|
||||
# landlord_heating_system = "Heating System"
|
||||
# landlord_existing_pv = None
|
||||
# landlord_property_id = "Property ref"
|
||||
|
||||
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester"
|
||||
# data_filename = "Warmfront data- Colchester Borough Homes (Complete).xlsx"
|
||||
# sheet_name = "Sheet1"
|
||||
# postcode_column = 'Full Address.1'
|
||||
# fulladdress_column = "Full Address"
|
||||
# address1_column = None
|
||||
# address1_method = "first_word"
|
||||
# address_cols_to_concat = []
|
||||
# missing_postcodes_method = None
|
||||
# landlord_year_built = "Build Date"
|
||||
# landlord_os_uprn = None
|
||||
# landlord_property_type = "Property Type"
|
||||
# landlord_wall_construction = "Wallinsul"
|
||||
# landlord_heating_system = "HeatSorc"
|
||||
# landlord_existing_pv = None
|
||||
# landlord_property_id = "Property Reference"
|
||||
# landlord_property_id = "AssetID"
|
||||
# outcomes_filename = None
|
||||
# outcomes_sheetname = None
|
||||
# outcomes_postcode = None
|
||||
# outcomes_houseno = None
|
||||
# outcomes_id = None
|
||||
# outcomes_address = None
|
||||
# master_filepaths = []
|
||||
# master_to_asset_list_filepath = None
|
||||
|
||||
# For Westward
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Westward"
|
||||
data_filename = "WESTWARD - completed list - 20.03.2025.xlsx"
|
||||
sheet_name = "Sheet1"
|
||||
postcode_column = "WFT EDIT Postcode"
|
||||
fulladdress_column = "Address"
|
||||
address1_column = None
|
||||
address1_method = "house_number_extraction"
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = "Build date"
|
||||
landlord_os_uprn = "UPRN"
|
||||
landlord_property_type = "Location type"
|
||||
landlord_built_form = None
|
||||
landlord_wall_construction = "Wall Construction (EPC)"
|
||||
landlord_heating_system = "Heat Source"
|
||||
landlord_existing_pv = "PV (Y/N)"
|
||||
landlord_property_id = "Place ref"
|
||||
landlord_roof_construction = None
|
||||
landlord_sap = None
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
outcomes_postcode = None
|
||||
outcomes_houseno = None
|
||||
master_filepaths = []
|
||||
master_to_asset_list_filepath = None
|
||||
outcomes_id = None
|
||||
outcomes_address = None
|
||||
phase = False
|
||||
ecosurv_landlords = None
|
||||
|
||||
# For ACIS - programme re-build
|
||||
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/ACIS Full Programme Review March 2025"
|
||||
# data_filename = "ACIS asset list.xlsx"
|
||||
# sheet_name = "Assets"
|
||||
# address1_column = "House No"
|
||||
# postcode_column = "Postcode"
|
||||
# landlord_property_id = "UPRN"
|
||||
# fulladdress_column = None
|
||||
# address_cols_to_concat = ["House No", "Street", "Town"]
|
||||
# missing_postcodes_method = None
|
||||
# address1_method = None
|
||||
# landlord_year_built = "YEAR BUILT"
|
||||
# landlord_os_uprn = None
|
||||
# landlord_property_type = "Property type"
|
||||
# landlord_built_form = None
|
||||
# landlord_wall_construction = "Wall Constuction"
|
||||
# landlord_roof_construction = None
|
||||
# landlord_sap = None
|
||||
# landlord_heating_system = "Heating"
|
||||
# landlord_existing_pv = None
|
||||
# outcomes_filename = "ACIS Group - 25.11.2024 - outcomes.xlsx"
|
||||
# outcomes_sheetname = "Feedback"
|
||||
# outcomes_postcode = "Postcode"
|
||||
# outcomes_address = "Address"
|
||||
# outcomes_houseno = "No"
|
||||
# outcomes_id = None
|
||||
# master_filepaths = [
|
||||
# os.path.join(data_folder, "ECO 3 -Table 1.csv"),
|
||||
# os.path.join(data_folder, "ECO 4 -Table 1.csv"),
|
||||
# ]
|
||||
# master_to_asset_list_filepath = None
|
||||
# phase = False
|
||||
# ecosurv_landlords = None
|
||||
|
||||
# For plus dane
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane"
|
||||
data_filename = "PLUS DANE Asset List - for analysis.xlsx"
|
||||
sheet_name = "Asset List"
|
||||
address1_column = " Address"
|
||||
postcode_column = " Postcode"
|
||||
landlord_property_id = "UPRN"
|
||||
fulladdress_column = " Address"
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
address1_method = None
|
||||
landlord_year_built = "Property Age"
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Property Type"
|
||||
landlord_wall_construction = "Landlord Wall Full"
|
||||
landlord_heating_system = "Landlord Heating"
|
||||
landlord_existing_pv = None
|
||||
outcomes_filename = "plus dane outcomes.xlsx"
|
||||
outcomes_sheetname = "EVERYTHING"
|
||||
outcomes_postcode = "Post Code"
|
||||
outcomes_houseno = "Numb."
|
||||
master_filepaths = [
|
||||
os.path.join(data_folder, "JJC Rolling Master.csv"),
|
||||
os.path.join(data_folder, "SCIS Rolling Master.csv"),
|
||||
]
|
||||
master_to_asset_list_filepath = os.path.join(data_folder, "surveys_to_assets.csv")
|
||||
|
||||
# Maps addresses to uprn in problematic cases
|
||||
manual_uprn_map = {}
|
||||
|
||||
|
|
@ -482,7 +256,7 @@ def app():
|
|||
|
||||
# We now flag properties that have been treated under existing programmes
|
||||
asset_list.flag_outcomes(
|
||||
outcomes_filepath=os.path.join(data_folder, outcomes_filename) if outcomes_filename else None,
|
||||
outcomes_filepaths=outcomes_filename,
|
||||
outcomes_sheetname=outcomes_sheetname,
|
||||
outcomes_address=outcomes_address,
|
||||
outcomes_postcode=outcomes_postcode,
|
||||
|
|
@ -504,7 +278,7 @@ def app():
|
|||
epc_api_only = False
|
||||
force_retrieve_data = False
|
||||
skip = None # Used to skip already completed chunks
|
||||
chunk_size = 1000
|
||||
chunk_size = 5000
|
||||
filename = "Chunk {i}.csv"
|
||||
download_folder = os.path.join(data_folder, "Chunks")
|
||||
if not os.path.exists(download_folder):
|
||||
|
|
@ -611,6 +385,12 @@ def app():
|
|||
transformed_data.append(row_data)
|
||||
|
||||
transformed_df = pd.DataFrame(transformed_data)
|
||||
for col in [
|
||||
"Floor insulation (solid floor)",
|
||||
"Floor insulation", "Floor insulation (suspended floor)"
|
||||
]:
|
||||
if col not in transformed_df.columns:
|
||||
transformed_df[col] = False
|
||||
transformed_df = transformed_df[
|
||||
[
|
||||
asset_list.DOMNA_PROPERTY_ID, "Floor insulation (solid floor)",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ STANDARD_BUILT_FORMS = {
|
|||
# Houses
|
||||
"end-terrace", "semi-detached", "detached", "mid-terrace",
|
||||
# Flats
|
||||
"ground floor", "mid-floor", "top-floor", "basement"
|
||||
"ground floor", "mid-floor", "top-floor", "basement", "low rise", "high rise",
|
||||
}
|
||||
|
||||
BUILT_FORM_MAPPINGS = {
|
||||
|
|
@ -212,5 +212,123 @@ BUILT_FORM_MAPPINGS = {
|
|||
'5 Ext. Wall Flat': 'unknown',
|
||||
'Unknown': 'unknown',
|
||||
'Enclosed mid-terrace': 'mid-terrace',
|
||||
'Enclosed end-terrace': 'end-terrace'
|
||||
'Enclosed end-terrace': 'end-terrace',
|
||||
|
||||
'House GROUND FLOOR': 'ground floor',
|
||||
'Flat? GROUND FLOOR': 'ground floor',
|
||||
'House SD SEMI DETACHED': 'semi-detached',
|
||||
'House SEMI DETACHED': 'semi-detached',
|
||||
'Flat GROUND FLOOR': 'ground floor',
|
||||
'': 'unknown',
|
||||
'Flat SEVENTH FLOOR': 'mid-floor',
|
||||
'House D': 'detached',
|
||||
'House ET': 'end-terrace',
|
||||
'House SD Homeless Unit': 'semi-detached',
|
||||
'House MT Homeless Unit': 'mid-terrace',
|
||||
'Bungalow ET': 'end-terrace',
|
||||
'Bungalow D': 'detached',
|
||||
'House SD': 'semi-detached',
|
||||
'Bungalow Sheltered Accomodation': 'unknown',
|
||||
'House. SD': 'semi-detached',
|
||||
'Flat FIRST FLOOR MAISONETTE': 'ground floor',
|
||||
'Bungalow SD': 'semi-detached',
|
||||
'Flat FIRST FLOOR': 'ground floor',
|
||||
'Flat Sheltered Accomodation': 'unknown',
|
||||
'Flat SIXTH FLOOR': 'mid-floor',
|
||||
'Flat EIGHTH FLOOR': 'mid-floor',
|
||||
'Flat FOURTH FLOOR': 'mid-floor',
|
||||
'Flat Homeless Unit': 'unknown',
|
||||
'Bungalow MT': 'mid-terrace',
|
||||
'Bungalow Homeless Unit': 'unknown',
|
||||
'House MT': 'mid-terrace',
|
||||
'Flat FIFTH FLOOR': 'mid-floor',
|
||||
'Flat NINTH FLOOR': 'mid-floor',
|
||||
'House SD FIRST FLOOR': 'semi-detached',
|
||||
'Bungalow Supported housing': 'unknown',
|
||||
'Flat THIRD FLOOR': 'mid-floor',
|
||||
'Flat SECOND FLOOR': 'mid-floor',
|
||||
'House Homeless Unit': 'unknown',
|
||||
'Flat ELEVENTH FLOOR': 'mid-floor',
|
||||
'Flat TENTH FLOOR': 'mid-floor',
|
||||
'House. MT': 'mid-terrace',
|
||||
'Ground Floor Bedsit': 'ground floor',
|
||||
'Mid Terrace With Passage': 'mid-terrace',
|
||||
'End Of Terrace': 'end-terrace',
|
||||
'Ground Floor Maisonette': 'ground floor',
|
||||
'First Floor Bedsit': 'mid-floor',
|
||||
'GROUND FLOOR BEDSIT': 'ground floor',
|
||||
'GROUND FLOOR FLAT': 'ground floor',
|
||||
'BUNGALOW': 'unknown',
|
||||
'HOUSE 1 LIVING ROOM': 'unknown',
|
||||
'MAISONETTE OVER SHOP': 'unknown',
|
||||
'SECOND FLOOR FLAT': 'mid-floor',
|
||||
'FIRST FLOOR FLAT': 'ground floor',
|
||||
'GROUND FL MAISONETTE': 'ground floor',
|
||||
'HOUSE 2 LIVING ROOMS': 'unknown',
|
||||
'FLAT OVER SHOP': 'unknown',
|
||||
|
||||
'4 Bed Detached House': 'detached',
|
||||
'2 Bed Detached House': 'detached',
|
||||
'3 Bed Detached Bungalow': 'detached',
|
||||
'1 Bed Semi-Detached House': 'semi-detached',
|
||||
'2 Bed Semi-Detached House': 'semi-detached',
|
||||
'2 Bed Detached Bungalow': 'detached',
|
||||
'1 Bed Mid Terrace Bungalow': 'mid-terrace',
|
||||
'4 Bed Semi-Detached Bungalow': 'semi-detached',
|
||||
'3 Bed Mid Terrace Bungalow': 'mid-terrace',
|
||||
'3 Bed Semi-Detached Bungalow': 'semi-detached',
|
||||
'3 Bed Mid Terrace House': 'mid-terrace',
|
||||
'2 Bed Mid Terrace House': 'mid-terrace',
|
||||
'3 Bed Detached House': 'detached',
|
||||
'2 Bed Semi-Detached Bungalow': 'semi-detached',
|
||||
'5 Bed Mid Terrace House': 'mid-terrace',
|
||||
'2 Bed Mid Terrace Bungalow': 'mid-terrace',
|
||||
'3 Bed Semi-Detached House': 'semi-detached',
|
||||
'1 Bed Semi-Detached Bungalow': 'semi-detached',
|
||||
'4 Bed Mid Terrace House': 'mid-terrace',
|
||||
'1 Bed Detached Bungalow': 'detached',
|
||||
'5 Bed Semi-Detached House': 'semi-detached',
|
||||
'6 Bed Detached House': 'detached',
|
||||
'1 Bed Mid Terrace House': 'mid-terrace',
|
||||
'4 Bed Semi-Detached House': 'semi-detached',
|
||||
'TBA': 'unknown',
|
||||
'1 Bed EOT House': 'end-terrace',
|
||||
'3 Bed Flat': 'unknown',
|
||||
'5 Bed EOT House': 'end-terrace',
|
||||
'1 Bed EOT Bungalow': 'end-terrace',
|
||||
'2 Bed EOT House': 'end-terrace',
|
||||
'1 Bed Studio Flat': 'unknown',
|
||||
'3 Bed Maison': 'unknown',
|
||||
'Commercial Letting': 'unknown',
|
||||
'4 Bed Maison': 'unknown',
|
||||
'2 Bed Flat': 'unknown',
|
||||
'3 Bed EOT House': 'end-terrace',
|
||||
'2 Bed Maison': 'unknown',
|
||||
'4 Bed EOT House': 'end-terrace',
|
||||
'1 Bed Flat': 'unknown',
|
||||
'3 Bed EOT Bungalow': 'end-terrace',
|
||||
'1 Bed Maison': 'unknown',
|
||||
'2 Bed EOT Bungalow': 'end-terrace',
|
||||
|
||||
'Bungalow detached': 'detached',
|
||||
'Bungalow semi detached': 'semi-detached',
|
||||
'Sheltered bungalow semi detached': 'semi-detached',
|
||||
'Bedsit bungalow semi detached': 'semi-detached',
|
||||
'Semi detached house': 'semi-detached',
|
||||
'Bedsit bungalow terraced': 'mid-terrace', 'Terraced house': 'mid-terrace',
|
||||
'Sheltered flat': 'unknown',
|
||||
'APD Bungalow': 'unknown',
|
||||
'Flat with partition': 'unknown',
|
||||
'APD flat': 'unknown',
|
||||
'Sheltered warden flat': 'unknown',
|
||||
'Sheltered bedsit': 'unknown',
|
||||
'Sheltered bungalow terraced': 'mid-terrace',
|
||||
'Block': 'unknown',
|
||||
'Bungalow terraced': 'mid-terrace',
|
||||
'Maisonette flat': 'unknown',
|
||||
'Sheltered bedsit disabled': 'unknown',
|
||||
'Bedsit Flat': 'unknown',
|
||||
'Low Rise': 'low rise',
|
||||
'Upper Floor': 'top-floor',
|
||||
'High Rise': 'high rise',
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ STANDARD_HEATING_SYSTEMS = {
|
|||
"gas boiler, radiators",
|
||||
"electric storage heaters",
|
||||
"district heating",
|
||||
"communal heating"
|
||||
"communal heating",
|
||||
"gas condensing boiler",
|
||||
"oil boiler",
|
||||
"gas condensing combi",
|
||||
|
|
@ -32,7 +32,7 @@ STANDARD_HEATING_SYSTEMS = {
|
|||
|
||||
HEATING_MAPPINGS = {
|
||||
"Combi - GAS": "gas combi boiler",
|
||||
"E7 Storage Heaters": "high heat retention storage heaters",
|
||||
"E7 Storage Heaters": "electric storage heaters",
|
||||
"District heating system": "district heating",
|
||||
"Condensing Boiler - GAS": "gas condensing boiler",
|
||||
"Boiler Oil/other": "oil boiler",
|
||||
|
|
@ -50,7 +50,7 @@ HEATING_MAPPINGS = {
|
|||
"Gas fire": "other",
|
||||
"Backboiler - Solid fuel": "other",
|
||||
'combi - gas': 'gas combi boiler',
|
||||
'e7 storage heaters': 'high heat retention storage heaters',
|
||||
'e7 storage heaters': 'electric storage heaters',
|
||||
'district heating system': 'district heating',
|
||||
'condensing boiler - gas': 'gas condensing boiler',
|
||||
'boiler oil/other': 'oil boiler',
|
||||
|
|
@ -220,5 +220,76 @@ HEATING_MAPPINGS = {
|
|||
'Boiler/ underfloor': 'electric underfloor',
|
||||
'Storage system': "non-electric underfloor",
|
||||
'BOILER': 'gas combi boiler',
|
||||
'SPACE_HEATER': 'room heaters'
|
||||
'SPACE_HEATER': 'room heaters',
|
||||
'AIR': 'air source heat pump',
|
||||
'FSOL': 'solid fuel',
|
||||
'PDEV': 'unknown',
|
||||
'GASF': 'gas boiler, radiators',
|
||||
'CONO': 'no heating',
|
||||
'FELE HRSH': 'high heat retention storage heaters',
|
||||
'FOIL': 'oil boiler',
|
||||
'FDEV': 'unknown',
|
||||
'FNON': 'non-electric underfloor',
|
||||
'FGAS': 'gas combi boiler',
|
||||
'FELE': 'electric fuel',
|
||||
'GRNE': 'ground source heat pump',
|
||||
|
||||
'High Heat Storage Heaters': 'high heat retention storage heaters',
|
||||
'Electric Radiators': 'electric radiators',
|
||||
'Electric Air Source Heat Pump': 'air source heat pump',
|
||||
'Gas Combi Condensing Boiler': 'gas condensing combi',
|
||||
'Electric Boiler Heating': 'electric boiler',
|
||||
'Solid Fuel Open Back Boiler Heating': 'solid fuel',
|
||||
'Solid Fuel Closed Back Boiler Heating': 'solid fuel',
|
||||
'Oil Boiler': 'oil boiler',
|
||||
'Electric Storage Heaters': 'electric storage heaters',
|
||||
'Gas Combi Boiler Heating': 'gas combi boiler',
|
||||
'Electric NIBE Heating System': 'air source heat pump',
|
||||
'Gas Back Boiler': 'gas boiler, radiators',
|
||||
'Electric Gel/Oil Filled Radiators': 'electric radiators',
|
||||
'No Information': 'unknown',
|
||||
'Oil Combination Boiler Heating': 'oil boiler',
|
||||
'Electric DSR Heat Retention Radiators': 'high heat retention storage heaters',
|
||||
'Communal Heating System': 'communal heating',
|
||||
'Description': 'unknown',
|
||||
'Oil Combi Condensing Boiler Heating': 'oil boiler',
|
||||
'Gas Combi Condensing Boiler Heating': 'gas condensing combi',
|
||||
'Electric Warm Air Heating': 'electric fuel',
|
||||
'Gas System Boiler Heating': 'gas boiler, radiators',
|
||||
'Gas Back Boiler Heating': 'gas boiler, radiators',
|
||||
'Electric Gel/Oil Fllled Radiators': 'electric radiators',
|
||||
'Gas Condensing Boiler Heating': 'gas condensing combi',
|
||||
'Gas Combi Condensing Boiler Heatiner': 'gas condensing combi',
|
||||
'Oil Standard Boiler Heating': 'oil boiler',
|
||||
'Oil Condensing Boiler Heating': 'oil boiler',
|
||||
'Electric ASHP': 'air source heat pump',
|
||||
'Modern Slimline Storage Heaters': 'electric storage heaters',
|
||||
# These are boiler makes from Unitas
|
||||
'UNKNOWN': 'unknown',
|
||||
'IDEAL': 'gas combi boiler',
|
||||
'VAILLANT': 'gas combi boiler',
|
||||
'THORN': 'gas combi boiler',
|
||||
'WORCESTER': 'gas combi boiler',
|
||||
'GLOW WORM': 'gas combi boiler',
|
||||
'VOKERA': 'gas combi boiler',
|
||||
'POTTERTON': 'gas combi boiler',
|
||||
'BAXI SOLO': 'gas combi boiler',
|
||||
'BAXI BERMUDA': 'gas combi boiler',
|
||||
'BAXI': 'gas combi boiler',
|
||||
|
||||
'Combi Boiler': 'gas combi boiler',
|
||||
'Air Source Heat Pump': 'air source heat pump',
|
||||
'Dual Fuel': 'other',
|
||||
'Regular Boiler': 'gas condensing boiler',
|
||||
'No Main Heating': 'no heating',
|
||||
'None (via Communal System)': 'communal heating',
|
||||
'No Mains Heating': 'no heating',
|
||||
'Open Fire with Back Boiler': 'solid fuel',
|
||||
'No Gas Boiler': 'no heating',
|
||||
'Back Boiler': 'solid fuel',
|
||||
"This cell has an external reference that can't be shown or edited. Editing this cell will remove the external "
|
||||
"reference.": 'unknown',
|
||||
'Communal Heating': 'communal heating',
|
||||
'No Data': 'unknown',
|
||||
'Boiler System': 'gas condensing boiler',
|
||||
}
|
||||
|
|
|
|||
231
asset_list/mappings/outcomes.py
Normal file
231
asset_list/mappings/outcomes.py
Normal file
|
|
@ -0,0 +1,231 @@
|
|||
"""
|
||||
This script was produced to handle the non-standard outcomes, observed in the LHP outcomes sheet
|
||||
"""
|
||||
import numpy as np
|
||||
|
||||
outcomes_values = [
|
||||
"Access Issues", "No Outcome", "Asked for a later date", "Customer Refusal",
|
||||
"Installer Refusal", "No Answer", "Not Viable", "Surveyed",
|
||||
"Rescheduled", "Not Knocked", "Void"
|
||||
]
|
||||
|
||||
outcomes_map = {
|
||||
'Access issues, shed against rear wall. Sent photos to Matt JJC, declined': 'Access Issues',
|
||||
'NO ANSWER /TICKET LEFT': 'No Answer',
|
||||
'Looks Void - No Answer': 'No Answer',
|
||||
'No Answer - they were in - No response to my drop card': 'No Answer', 'No Answer': 'No Answer',
|
||||
'No Answer - Even they were in - No response to my drop card': 'No Answer', 'no answer': 'No Answer',
|
||||
'NO ANSWER': 'No Answer', 'No answer': 'No Answer',
|
||||
np.nan: 'unknown',
|
||||
'Access Issues Health reasons try another time': 'Access Issues',
|
||||
'LOFT FULL, CUSTOMER WONT REMOVE': 'Access Issues',
|
||||
'Failed Appointment - Ivy': 'Access Issues',
|
||||
'Failed Appointment - Void soon': 'Void',
|
||||
'Hoarding in loft': 'Access Issues',
|
||||
'Non Complained - Extension at rear and side': 'Not Viable',
|
||||
'Said No letter - then texted me I can only do outside but cant come in': 'Customer Refusal',
|
||||
'Hoarding - unwilling to shift from loft': 'Customer Refusal',
|
||||
'Overgrown vegatation - Happy for HA to deal with': 'Access Issues',
|
||||
'No access to side of property': 'Not Viable',
|
||||
'Very rude': 'Customer Refusal',
|
||||
'REFUSED ACCESS': 'Customer Refusal',
|
||||
'SURVEYED': 'Surveyed',
|
||||
'ELECTRIC ROOM HEATERS. Kieran to check re funding and possible PV?': 'Not Viable',
|
||||
'SUBMITTED': 'Surveyed',
|
||||
'2 single storey extensions': 'Not Viable',
|
||||
'Rebook': 'Rescheduled',
|
||||
'surveyed': 'Surveyed',
|
||||
'not intrested': 'Customer Refusal',
|
||||
'Fixed seating area against rear elevation': 'Not Viable',
|
||||
"Matt said can't install": 'Installer Refusal',
|
||||
'Gave excuses to come this and that time and no reponse': 'No Answer',
|
||||
'NOT KNOCKED': 'Not Knocked',
|
||||
'VOID PROPERTY': 'Void',
|
||||
'Glass lean to. JJC declined': 'Installer Refusal',
|
||||
'Left slip Overgrown vegatation': 'No Answer',
|
||||
'covid': 'Rescheduled',
|
||||
'Lean-to on side elevation': 'Not Viable',
|
||||
'Opted out as moving out': 'Customer Refusal',
|
||||
'Surveyed': 'Surveyed',
|
||||
'refused': 'Customer Refusal',
|
||||
'COVID': 'Rescheduled',
|
||||
'Said No letter received and didn’t answer again': 'No Answer',
|
||||
'Survey completed': 'Surveyed',
|
||||
'Loft fully boarded': 'Access Issues',
|
||||
'Not Available during the day': 'No Answer',
|
||||
'Conservatory. JJC declined.': 'Installer Refusal',
|
||||
'Booked for 19.10.23': 'Rescheduled',
|
||||
'LETTER LEFT': 'No Answer',
|
||||
'Knocked/lettered': 'No Answer',
|
||||
'Survey Complete': 'Surveyed',
|
||||
'Refused by calling office': 'Customer Refusal',
|
||||
'Extension on rear elevation': 'No Viable',
|
||||
'Left Slip - Potential access issue with conservatory': 'Access Issues',
|
||||
'Overgrown vegatation': 'Access Issues',
|
||||
'Left slip Overgrown Ivy and Hedge': 'No Answer',
|
||||
'NOT AVAILABLE THIS WEEK': 'No Answer',
|
||||
'Unwilling to clear loft': 'Access Issues',
|
||||
'survey complete': 'Surveyed',
|
||||
'ivy on wall': 'Access Issues',
|
||||
'not in': 'No Answer',
|
||||
'Covid shrub very close to building': 'Rescheduled',
|
||||
'ON HOLIDAY, UNDER 18 IN HOUSE': 'Rescheduled',
|
||||
'wont do as extention': 'Not Viable',
|
||||
'IN, WONT ANSWER': 'Customer Refusal',
|
||||
'Too many plants next to the walls': 'Access Issues',
|
||||
'obstructions': 'Access Issues',
|
||||
'Left slip -Wall plant': 'Access Issues',
|
||||
'On holiday': 'No Answer',
|
||||
'Failed appointment': 'No Answer',
|
||||
'LOFT FULLY BOARDED': 'Access Issues',
|
||||
'ivy and didn’t want people inside the house': 'Customer Refusal',
|
||||
'Partly IWI': 'Not Viable',
|
||||
'Covid': 'Rescheduled',
|
||||
'REFUSE TO REMOVE IVY': 'Access Issues',
|
||||
'Insulated 2 years ago. Carbon bead in walls, 300mm rock wool in loft': 'Not Viable',
|
||||
'INCONVIENIENT TIME': 'No Answer',
|
||||
'EXT TO REAR': 'Not Viable',
|
||||
'Not In': 'No Answer',
|
||||
'Damp issues.Black mould on walls': 'Access Issues',
|
||||
'Lean to. JJC declined': 'Installer Refusal',
|
||||
'DISABLED CHILD / INCONVIENIENT': 'Customer Refusal',
|
||||
'Plants on wall': 'Access Issues',
|
||||
'Left Slip': 'No Answer',
|
||||
'Never answered': 'No Answer',
|
||||
'SOLAR PV CONNECTED TO MAINS': 'Not Viable',
|
||||
'Bungalow': 'unknown',
|
||||
'call back': 'No Answer',
|
||||
'Message from WFT OFFICE; tenant unavailable this week, no telephone number provided': 'Rescheduled',
|
||||
'LEAN TO PRESENT': 'Not Viable',
|
||||
'She said come Tuesday and never answered': 'Rescheduled',
|
||||
'Sold': 'Surveyed',
|
||||
'Too much mould and cluttered house': 'Access Issues',
|
||||
'Overgrown vegatation will call when clear': 'Access Issues',
|
||||
'LOFT DEC 2013': 'Not Viable',
|
||||
'Ivy': 'Access Issues',
|
||||
'Booked for next week': 'Rescheduled',
|
||||
'empty': 'Void',
|
||||
'Been told property is empty as tenant has passed away': 'Void',
|
||||
'Non Complianced - Single Storey Extension to the front and rear': 'Not Viable',
|
||||
'Going back this week': 'Rescheduled',
|
||||
'Loft insulated in last few months. Ongoing damp issues in bathroom, black mould up wall': 'Access Issues',
|
||||
'rear Extension': 'Not Viable',
|
||||
'DECKING AROUND PROPERTY IN BREACH OF DPC BY 300MM': 'Not Viable',
|
||||
'Said no letter received': 'Customer Refusal',
|
||||
'Unwell, not convenient this week': 'Rescheduled',
|
||||
'IVY on Wall': 'Access Issues',
|
||||
'REFUSED EXTRACTOR': 'Customer Refusal',
|
||||
'ON HOLIDAY': 'Rescheduled',
|
||||
'COVID. Not this week.': 'Rescheduled',
|
||||
'COVID POSITIVE': 'Rescheduled',
|
||||
'VOID. Appears to be under refurbishment': 'Void',
|
||||
'Survey Completed': 'Surveyed',
|
||||
'INCONVIENIENT': 'Rescheduled',
|
||||
'Knocked/lettered. 07598 112360': 'No Answer',
|
||||
'Single skin lean to. JJC declined': 'Installer Refusal',
|
||||
'DENIES LETER, REFUSED ACCESS': 'Customer Refusal',
|
||||
'Loft hoard unable to clear': 'Access Issues',
|
||||
'Left Slip - Look Void': 'Void',
|
||||
'EXCESSIVE IVY GROWTH, CUSOMER UNABLE TO REMOVE, ELDERLEY': 'Access Issues',
|
||||
'Refused': 'Customer Refusal',
|
||||
'REFUSED / INCONVENIENT': 'Customer Refusal',
|
||||
'AGGRESSIVE DOGS LOOSE IN FRONT GARDEN': 'Access Issues',
|
||||
'EXCESSIVE IVY': 'Access Issues',
|
||||
"Won't remove plastic roof": 'Access Issues',
|
||||
'SURVEY COMPLETED': 'Surveyed',
|
||||
'VOID. Under refurbishment. Electric storage heating currently removed for refurbishment': 'Void',
|
||||
'Surveyed ECO4': 'Surveyed',
|
||||
'after 5.30': 'Rescheduled',
|
||||
'CUSTOMER IN, WONT ANSWER DOOR': 'No Answer',
|
||||
'IVY': 'Access Issues',
|
||||
'Single storey extension on gable': 'Not Viable',
|
||||
'No answer.': 'No Answer',
|
||||
'Full extension at rear. Not viable.': 'Not Viable',
|
||||
'Access issues': 'Access Issues',
|
||||
'VOID PROPERTY NOW': 'Void',
|
||||
'Not viable': 'Not Viable',
|
||||
'Looks like a VOID property': 'Void',
|
||||
'NOT VIABLE': 'Not Viable',
|
||||
'No Answer.': 'No Answer',
|
||||
'Not viable.': 'Not Viable',
|
||||
'Looks to be void.': 'Void',
|
||||
'Access issues and loft fully boarded/full': 'Access Issues',
|
||||
'Extension on property. Not Viable': 'Not Viable',
|
||||
'No good. Serious Access issues.': 'Access Issues',
|
||||
'Surveyed and Submitted': 'Surveyed',
|
||||
'UNSANITARY CONDITIONS, RUBBISH EVERYWHERE': 'Access Issues',
|
||||
'Will call when rubbish removed.': 'Access Issues',
|
||||
'Covered in Ivy': 'Access Issues',
|
||||
'CUSTOMER REFUSED': 'Customer Refusal',
|
||||
'Still covered in ivy': 'Access Issues',
|
||||
'CUSTOMER SHOUTED OUT OF WINDOW TO COME BACK ANOTHER TIME': 'Customer Refusal',
|
||||
"Extension on property, can't be done.": 'Not Viable',
|
||||
'Will be looking to do Survey WC 19.02': 'Rescheduled',
|
||||
"Tenant was working, couldn't do survey.": 'No Answer',
|
||||
'PROPERTY EMPTY, SPOKE TO EX TENNANT WHO LEFT 3 WEEKS AGO?': 'Void',
|
||||
'Will call back.': 'Rescheduled',
|
||||
"Tenant not interested. Won't empty loft.": 'Customer Refusal',
|
||||
"Won't answer door.": 'Customer Refusal',
|
||||
"Tenant 'Doesn't want anything to do with LHP'": 'Customer Refusal',
|
||||
"Loft full. Tenant won't empty.": 'Access Issues',
|
||||
'Covered in foliage': 'Access Issues',
|
||||
'Customer not home for appointment.': 'No Answer',
|
||||
'Blown in bead': 'Not Viable',
|
||||
'Distance to property to far from road.': 'Access Issues',
|
||||
'LOFT FULL, CUSTOMER UNABLE TO CLEAR': 'Access Issues',
|
||||
'Stuff against rear wall. Will call when removed.': 'Access Issues',
|
||||
'Will call when rubbish is removed': 'Access Issues',
|
||||
'Mid Terrace': 'unknown',
|
||||
'Tile Hung areas.': 'Not Viable',
|
||||
'REFUSED / UNABLE TO CLEAR LOFT': 'Customer Refusal',
|
||||
'Calling back on Monday (19.02)': 'Rescheduled',
|
||||
'Solid Wall': 'Not Viable',
|
||||
'FAULTY PHONE NUMBER, 3 X KNOCK, LETTER LEFT ON FIRST ATTEMPT, NO REPLY OR CALL BACK': 'No Answer',
|
||||
'Not interested': 'Customer Refusal',
|
||||
'ACCESS DENIED': 'Customer Refusal',
|
||||
'Covered in Ivy.': 'Access Issues',
|
||||
'UNABLE TO GENERATE SAP GAIN WITH EXTENSIONS FRONT AND REAR': 'Not Viable',
|
||||
'Extension on the property.': 'Not Viable',
|
||||
"Covered in Ivy. Can't remove it.": 'Access Issues',
|
||||
'Booked in, but not in when called back': 'No Answer',
|
||||
'EXCESSIVE IVY ON WALLS (SEE PICS)': 'Access Issues',
|
||||
'Moved out': 'Void',
|
||||
'Buying the property. Not interested.': 'Customer Refusal',
|
||||
'Not been to yet': 'No Answer',
|
||||
'CUSTOMER STATES LOFT WAS INSULATED A FEW MONTHS AGO BY LHP': 'Customer Refusal',
|
||||
'Will try again.': 'No Answer',
|
||||
'HOUSE MARTINS NESTING IN EAVES OF 3 ADJOINING PROPERTIES': 'Access Issues',
|
||||
'Told me to call back': 'Rescheduled',
|
||||
'CUSTOMER SAYS PROPERTY ALREADY REFUSED AT PREVIOUS SURVEY, NO REASON GIVEN': 'Customer Refusal',
|
||||
"Won't answer the door.": 'Customer Refusal',
|
||||
'Tenant not interested.': 'Customer Refusal',
|
||||
'Keep trying, keeps putting me off.': 'Customer Refusal',
|
||||
'Already insulated.': 'Not Viable',
|
||||
'Works all day.': 'No Answer',
|
||||
'PROPERTY COVER IN FOILAGE AND SHRUBS': 'Access Issues',
|
||||
'ACCESS IVY GROWTH, LEAN TO / CONSERVATORY IN WAY OF REAR': 'Not Viable',
|
||||
"Tenant unwell. Doesn't want survey.": 'No Answer',
|
||||
'Wont empty loft.': 'Access Issues',
|
||||
'LOFT FULLY BOARDED AS PREVIOUSLY DISCUSSED WITH CUSTOMER BY PREVIOUS SURVEYOR': 'Access Issues',
|
||||
"Property can't be done.": 'Not Viable',
|
||||
'Works everyday. Will call.': 'No Answer',
|
||||
'A LOT OF FOLIAGE IN WAY, PROPERTY LOOKS EMPTY FROM OUTSIDE?': 'Void',
|
||||
"Very old tenant. Said they didn't want it.": 'Customer Refusal',
|
||||
'Covered in ivy. Unable to remove.': 'Access Issues',
|
||||
'Climbers on walls': 'Access Issues',
|
||||
'Will not remove foliage': 'Access Issues',
|
||||
'Not Interested.': 'Customer Refusal',
|
||||
'OFF GAS': 'unknown',
|
||||
'Tenant not interested': 'Customer Refusal',
|
||||
'Will call me. Left my number.': 'Rescheduled',
|
||||
'Keep trying but keeps putting me off': 'Customer Refusal',
|
||||
'Moving out.': 'Void',
|
||||
'Booked in': 'Recheduled',
|
||||
'Refused Survey': 'Customr Refusal',
|
||||
'Big dogs running around front garden.': 'Access Issues',
|
||||
'CUSTOMER HAS CLADDED WALL AT REAR IN CONSERVATORY, REFUSED INTERNAL DRILL': 'Customer Refusal',
|
||||
'Booked in.': 'Rescheduled',
|
||||
'WRONG ADDRESS?': 'unknown',
|
||||
'Works everyday. Will call me.': 'No Answer',
|
||||
'Will not remove foliage.': 'Access Issues'
|
||||
}
|
||||
|
|
@ -194,5 +194,63 @@ PROPERTY_MAPPING = {
|
|||
'Maisonette 2 Ext. Wall': 'maisonette',
|
||||
'5 Ext. Wall Flat': 'flat',
|
||||
'Bungalow Semi Detached': 'bungalow',
|
||||
'COMINT': 'unknown'
|
||||
'COMINT': 'unknown',
|
||||
'12 SBEDSIT': 'bedsit',
|
||||
'01 HOUSE': 'house',
|
||||
'05 BEDSIT': 'bedsit',
|
||||
'14 SFLAT': 'flat',
|
||||
'09 PBEDSIT': 'bedsit',
|
||||
'10 PBUNGALOW': 'bungalow',
|
||||
'13 SBUNGALOW': 'bungalow',
|
||||
'11 PFLAT': 'flat',
|
||||
'02 FLAT': 'flat',
|
||||
'04 MAISONETTE': 'maisonette',
|
||||
'01 HOUSE MID': 'house',
|
||||
'03 BUNGALOW': 'bungalow',
|
||||
'Flat?': 'flat',
|
||||
'Bungalow ET': 'bungalow',
|
||||
'House. SD': 'house',
|
||||
'Bungalow SD': 'bungalow',
|
||||
'Bungalow D': 'bungalow',
|
||||
'House D': 'house',
|
||||
'House SD': 'house',
|
||||
'House ET': 'house',
|
||||
'Bungalow MT': 'bungalow',
|
||||
'House MT': 'house',
|
||||
'House. MT': 'house',
|
||||
'': 'unknown',
|
||||
'GROUND FLOOR BEDSIT': 'bedsit',
|
||||
'HOUSE 1 LIVING ROOM': 'house',
|
||||
'MAISONETTE OVER SHOP': 'maisonette',
|
||||
'GROUND FLOOR FLAT': 'flat',
|
||||
'SECOND FLOOR FLAT': 'flat',
|
||||
'FIRST FLOOR FLAT': 'flat',
|
||||
'GROUND FL MAISONETTE': 'maisonette',
|
||||
'HOUSE 2 LIVING ROOMS': 'house',
|
||||
'FLAT OVER SHOP': 'flat',
|
||||
'House With Integral Garage': 'house',
|
||||
'Flat Over Parking/Accessway': 'flat',
|
||||
'Flat Over Binstore': 'flat',
|
||||
'Flat Over Garage': 'flat',
|
||||
'House With Independent Garage': 'house',
|
||||
'Studio': 'flat',
|
||||
'Bedsit bungalow terraced': 'bedsit',
|
||||
'Terraced house': 'house',
|
||||
'Sheltered flat': 'flat',
|
||||
'APD Bungalow': 'bungalow',
|
||||
'Flat with partition': 'flat',
|
||||
'Bungalow detached': 'bungalow',
|
||||
'APD flat': 'flat',
|
||||
'Sheltered warden flat': 'flat',
|
||||
'Bungalow semi detached': 'bungalow',
|
||||
'Sheltered bedsit': 'bedsit',
|
||||
'Sheltered bungalow terraced': 'bungalow',
|
||||
'Sheltered bungalow semi detached': 'bungalow',
|
||||
'Bungalow terraced': 'bungalow',
|
||||
'Maisonette flat': 'maisonette',
|
||||
'Sheltered bedsit disabled': 'bedsit',
|
||||
'Bedsit bungalow semi detached': 'bedsit',
|
||||
'Bedsit Flat': 'bedsit',
|
||||
'Semi detached house': 'house',
|
||||
'Unit': 'unknown'
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ STANDARD_ROOF_CONSTRUCTIONS = {
|
|||
"pitched unknown access to loft",
|
||||
"piched unknown insulation",
|
||||
"pitched insulated",
|
||||
"pitched less than 100mm insulation"
|
||||
"another dwelling above",
|
||||
"flat unknown insulation",
|
||||
"unknown insulated",
|
||||
|
|
@ -23,5 +24,18 @@ ROOF_CONSTRUCTION_MAPPINGS = {
|
|||
'2018 onwards': 'unknown',
|
||||
'Pitched (vaulted ceiling)': 'pitched insulated',
|
||||
np.nan: "unknown",
|
||||
None: "unknown"
|
||||
None: "unknown",
|
||||
'Unknown': 'unknown',
|
||||
'270mm': 'pitched insulated',
|
||||
'300mm+': 'pitched insulated',
|
||||
'100mm': 'pitched less than 100mm insulation',
|
||||
'250mm': 'pitched insulated',
|
||||
'300mm': 'pitched insulated',
|
||||
'No Loft space': 'pitched no access to loft',
|
||||
'75mm': 'pitched less than 100mm insulation',
|
||||
'150mm': 'pitched insulated',
|
||||
'No Loft Hatch': 'pitched unknown access to loft',
|
||||
'200mm': 'pitched insulated',
|
||||
'0-49mm': 'pitched less than 100mm insulation',
|
||||
'50mm': 'pitched less than 100mm insulation',
|
||||
}
|
||||
|
|
|
|||
|
|
@ -212,12 +212,17 @@ WALL_CONSTRUCTION_MAPPINGS = {
|
|||
'Cornish': 'system built',
|
||||
'Rwate': 'system built',
|
||||
'Hill Presweld Steel': 'system built',
|
||||
|
||||
'Cavity Filled Cavity': 'filled cavity',
|
||||
'Cavity Unknown': 'cavity unknown insulation',
|
||||
'Cavity Filled Cavity (internal)': 'filled cavity',
|
||||
'': 'unknown',
|
||||
'Cavity Internal Insulation': 'filled cavity',
|
||||
'Cavity As Built': "uninsulated cavity"
|
||||
|
||||
'Cavity As Built': "uninsulated cavity",
|
||||
'Non Trad Large Panel System': 'system built',
|
||||
'Non Trad Cornish': 'system built',
|
||||
'Non Trad Reema': 'system built',
|
||||
'Traditional Cavity Brickwork': 'cavity unknown insulation',
|
||||
'System build (undefined)': 'system built',
|
||||
'Non Trad Wimpey': 'system built',
|
||||
'Non Trad Wates': 'system built'
|
||||
}
|
||||
|
|
|
|||
|
|
@ -172,7 +172,7 @@ class SearchEpc:
|
|||
|
||||
self.address1 = address1
|
||||
self.postcode = postcode
|
||||
self.full_address = full_address
|
||||
self.full_address = full_address if full_address is not None else self.address1
|
||||
self.uprn = uprn
|
||||
self.house_number = self.get_house_number(self.address1)
|
||||
self.numeric_house_number = self.extract_numeric_housenumber_part(self.house_number)
|
||||
|
|
@ -265,9 +265,7 @@ class SearchEpc:
|
|||
|
||||
for retry in range(self.max_retries):
|
||||
try:
|
||||
|
||||
response = self.client.domestic.call(method="get", url=url, params=params)
|
||||
|
||||
if response:
|
||||
self.data = response
|
||||
return {
|
||||
|
|
@ -368,8 +366,11 @@ class SearchEpc:
|
|||
unique_property_types = {r["property-type"] for r in rows}
|
||||
|
||||
# We allow for variation in property type across flats/maisonettes
|
||||
if (len(uprns) == 1) and ((len(unique_property_types) == 1) or unique_property_types == {"Flat", "Maisonette"}):
|
||||
return rows
|
||||
# If we know that we have a flat/maisonette, we allow for both property types
|
||||
if property_type in ["Flat", "Maisonette"]:
|
||||
if ((len(uprns) == 1) and ((len(unique_property_types) == 1)
|
||||
) or unique_property_types == {"Flat", "Maisonette"}):
|
||||
return rows
|
||||
|
||||
if property_type is not None:
|
||||
# We can do a filter on the property type
|
||||
|
|
@ -388,11 +389,27 @@ class SearchEpc:
|
|||
|
||||
# We check if post town is included in the address
|
||||
if any([r["posttown"].lower() in address.lower() for r in rows]):
|
||||
best_match = process.extractOne(
|
||||
best_match1 = process.extractOne(
|
||||
address, [", ".join([r["address"], r["posttown"]]) for r in rows], score_cutoff=0
|
||||
)
|
||||
# Get all of the scores
|
||||
rows_filtered = [r for r in rows if ", ".join([r["address"], r["posttown"]]) == best_match[0]]
|
||||
best_match2 = process.extractOne(
|
||||
address, [", ".join([r["address"]]) for r in rows], score_cutoff=0
|
||||
)
|
||||
# Pick the largest score
|
||||
if best_match1[1] >= best_match2[1]:
|
||||
# Get all of the scores
|
||||
rows_filtered = [r for r in rows if ", ".join([r["address"], r["posttown"]]) == best_match1[0]]
|
||||
else:
|
||||
# Get all of the scores
|
||||
rows_filtered = [r for r in rows if r["address"] == best_match2[0]]
|
||||
|
||||
# If we have multiple, we filter on newest lodgment date
|
||||
if len(rows_filtered) > 1:
|
||||
rows_filtered = [
|
||||
r for r in rows_filtered
|
||||
if r["lodgement-datetime"] == max([x["lodgement-datetime"] for x in rows_filtered])
|
||||
]
|
||||
|
||||
else:
|
||||
best_match = process.extractOne(address, [r["address"] for r in rows], score_cutoff=0)
|
||||
# Get the UPRN for the best match
|
||||
|
|
|
|||
708
etl/customers/Community Housing/reconciliation.py
Normal file
708
etl/customers/Community Housing/reconciliation.py
Normal file
|
|
@ -0,0 +1,708 @@
|
|||
"""
|
||||
This script is used to reconcile the data from the Community Housing project, to understand the differences in
|
||||
the various asset lists, and the work that has been conducted
|
||||
"""
|
||||
import os
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
from asset_list.AssetList import AssetList
|
||||
from backend.SearchEpc import SearchEpc
|
||||
|
||||
# Data preparation
|
||||
outcomes_1 = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Community Housing/Programme Reconciliation/Outcomes "
|
||||
"Community Housing.xlsx",
|
||||
sheet_name="Sheet1",
|
||||
)
|
||||
outcomes_2 = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Community Housing/Programme Reconciliation/Outcomes "
|
||||
"Community Housing.xlsx",
|
||||
sheet_name="ECO4 + PV",
|
||||
)
|
||||
outcomes_2["Type of Funding"] = "ECO4 Solar"
|
||||
|
||||
combined_outcomes = pd.concat([outcomes_1, outcomes_2], ignore_index=True)
|
||||
combined_outcomes.columns = [
|
||||
'Surveyor', 'Housing Association', 'No.', 'Address', 'Postcode', 'Outcome', 'Type of Funding', "Notes",
|
||||
'Previous letter sent Date:', 'Date Letter sent', 'Installer'
|
||||
]
|
||||
# Store
|
||||
combined_outcomes.to_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Community Housing/Programme "
|
||||
"Reconciliation/combined_outcomes.xlsx",
|
||||
)
|
||||
|
||||
################################################################################################
|
||||
# Config for asset list standardisation
|
||||
################################################################################################
|
||||
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Community Housing/Programme Reconciliation"
|
||||
data_filename = "Community Housing - Original Asset List Copy for Reconciliation.xlsx"
|
||||
sheet_name = "Assets"
|
||||
postcode_column = 'Postcode'
|
||||
fulladdress_column = "Full Address"
|
||||
address1_column = None
|
||||
address1_method = "house_number_extraction"
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = "Build_Date"
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Asset_Type1"
|
||||
landlord_built_form = "Asset_Classification"
|
||||
landlord_wall_construction = None
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = "Heat Source Static"
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "Asset_Reference"
|
||||
landlord_sap = None
|
||||
outcomes_filename = [
|
||||
os.path.join(data_folder, "combined_outcomes.xlsx"),
|
||||
]
|
||||
outcomes_sheetname = ["Sheet1"]
|
||||
outcomes_postcode = ["Postcode"]
|
||||
outcomes_houseno = ["No."]
|
||||
outcomes_id = [None]
|
||||
outcomes_address = ["Address"]
|
||||
master_filepaths = [
|
||||
os.path.join(data_folder, "Submissions - for analysis.csv"),
|
||||
]
|
||||
master_to_asset_list_filepath = None
|
||||
phase = False
|
||||
ecosurv_landlords = "community community|community housing|mr community|david lindwood"
|
||||
|
||||
manual_uprn_map = {}
|
||||
|
||||
asset_list = AssetList(
|
||||
local_filepath=os.path.join(data_folder, data_filename),
|
||||
header=0,
|
||||
sheet_name=sheet_name,
|
||||
address1_colname=address1_column,
|
||||
postcode_colname=postcode_column,
|
||||
landlord_property_id=landlord_property_id,
|
||||
full_address_colname=fulladdress_column,
|
||||
full_address_cols_to_concat=address_cols_to_concat,
|
||||
missing_postcodes_method=missing_postcodes_method,
|
||||
address1_extraction_method=address1_method,
|
||||
landlord_year_built=landlord_year_built,
|
||||
landlord_uprn=landlord_os_uprn,
|
||||
landlord_property_type=landlord_property_type,
|
||||
landlord_built_form=landlord_built_form,
|
||||
landlord_wall_construction=landlord_wall_construction,
|
||||
landlord_roof_construction=landlord_roof_construction,
|
||||
landlord_heating_system=landlord_heating_system,
|
||||
landlord_existing_pv=landlord_existing_pv,
|
||||
landlord_sap=landlord_sap,
|
||||
phase=phase
|
||||
)
|
||||
asset_list.init_standardise()
|
||||
|
||||
asset_list.apply_standardiation()
|
||||
|
||||
# We now flag properties that have been treated under existing programmes
|
||||
asset_list.flag_outcomes(
|
||||
outcomes_filepaths=outcomes_filename,
|
||||
outcomes_sheetname=outcomes_sheetname,
|
||||
outcomes_address=outcomes_address,
|
||||
outcomes_postcode=outcomes_postcode,
|
||||
outcomes_houseno=outcomes_houseno,
|
||||
outcomes_id=outcomes_id
|
||||
)
|
||||
|
||||
if pd.isnull(asset_list.outcomes["domna_property_id"]).sum() == 1:
|
||||
# We fix this one manually
|
||||
asset_list.outcomes["domna_property_id"] = asset_list.outcomes["domna_property_id"].fillna(
|
||||
"29walternashroadeastbirchencoppicekidderminsterdy117ea-caa3a8d92ea9"
|
||||
)
|
||||
else:
|
||||
raise Exception("Something went wrong")
|
||||
|
||||
asset_list.flag_survey_master(
|
||||
master_filepaths=master_filepaths,
|
||||
master_to_asset_list_filepath=master_to_asset_list_filepath
|
||||
)
|
||||
|
||||
master_surveyed = asset_list.master_surveyed
|
||||
scheme_map = {
|
||||
"ECO4 A/W": "ECO4",
|
||||
'ECO4 GBIS': "GBIS",
|
||||
'ECO4 - REMEDIAL CWI ONLY': "ECO4 Remedial",
|
||||
"ECO4 GBIS REMEDIAL": "GBIS Remedial",
|
||||
'ECO4 - Remedial CWI Only': "ECO4 Remedial",
|
||||
'ECO4 GBIS Remedial': "GBIS Remedial"
|
||||
}
|
||||
master_surveyed["funding_scheme"] = master_surveyed["funding_scheme"].map(scheme_map)
|
||||
master_surveyed["survey_reference"] = master_surveyed["funding_scheme"] + ": " + master_surveyed["measure_mix"]
|
||||
master_surveyed = master_surveyed.merge(
|
||||
asset_list.standardised_asset_list[["domna_property_id", "landlord_property_id"]],
|
||||
how="left",
|
||||
on="landlord_property_id",
|
||||
)
|
||||
if pd.isnull(master_surveyed["domna_property_id"]).sum():
|
||||
raise ValueError("Some of the master surveyed properties do not have a domna_property_id")
|
||||
# Flag anything in outcomes that has been listed as surveyed, that is NOT in the master_surveyed sheet
|
||||
surveyed_outcomes = asset_list.outcomes[
|
||||
asset_list.outcomes["Outcome"].isin(["surveyed", "surveyed"])
|
||||
]
|
||||
outcomes_not_in_master = surveyed_outcomes[
|
||||
~surveyed_outcomes["domna_property_id"].isin(master_surveyed["domna_property_id"])
|
||||
]
|
||||
outcomes_not_in_master["Type of Funding"] = outcomes_not_in_master["Type of Funding"].fillna("Work Type Not Filled In")
|
||||
|
||||
asset_list.flag_ecosurv(
|
||||
ecosurv_landlords=ecosurv_landlords,
|
||||
landlords_to_ignore=[
|
||||
"Watford Community housing", "Eastlight Community housing", "Mr Tower Hamlets Community Housing"
|
||||
]
|
||||
)
|
||||
|
||||
# These are properties NOT on the Community Housing asset list that were sold under the wrong HA
|
||||
# asset_list.ecosurv_no_match.to_csv(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Community Housing/Programme "
|
||||
# "Reconciliation/Ecosurv - properties sold to Community Housing, not belonging to them.csv",
|
||||
# index=False
|
||||
# )
|
||||
|
||||
# We read in the works, split by sold to SGEC and on-hold
|
||||
billed_to_installer = pd.read_csv(
|
||||
os.path.join(
|
||||
data_folder, "Community Housing Deck of works", "SGEC BILLED -Table 1.csv"
|
||||
),
|
||||
)
|
||||
billed_to_installer["billed"] = True
|
||||
|
||||
not_billed_to_installer = pd.read_csv(
|
||||
os.path.join(
|
||||
data_folder, "Community Housing Deck of works", "ON HOLD -Table 1.csv"
|
||||
),
|
||||
)
|
||||
not_billed_to_installer["billed"] = False
|
||||
|
||||
sgec_billings = pd.concat(
|
||||
[billed_to_installer, not_billed_to_installer],
|
||||
)
|
||||
sgec_billings = sgec_billings.reset_index(drop=True)
|
||||
sgec_billings["row_id"] = sgec_billings.index
|
||||
|
||||
# We match these two lists back to the domna_property_id. They SHOULD match to submissions
|
||||
scheme_col = (
|
||||
"AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" if
|
||||
"AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" in billed_to_installer.columns else "AFFORDABLE WARMTH"
|
||||
)
|
||||
postcode_col = "POSTCODE" if "POSTCODE" in billed_to_installer.columns else "Post Code"
|
||||
house_no_col = 'NO.' if 'NO.' in billed_to_installer.columns else "NO"
|
||||
property_type_col = (
|
||||
"PROPERTY TYPE As per table emailed" if
|
||||
"PROPERTY TYPE As per table emailed" in
|
||||
billed_to_installer.columns else "PROPERTY TYPE As per table emailed"
|
||||
)
|
||||
measure_mix_col = "MEASURE COMBO"
|
||||
manual_corrections = {
|
||||
"30+DY12 1EB": "73440300",
|
||||
"32+DY12 1EB": "73440320",
|
||||
"1+DY11 7ES": "20150010",
|
||||
"12+DY11 7EP": "9460120",
|
||||
"72+DY11 7PA": "88520720",
|
||||
"39+DY13 0DR": "44250390",
|
||||
"43+DY11 7EF": "2460430",
|
||||
"45+DY11 7EG": "2460450",
|
||||
"47+DY11 7EG": "2460470",
|
||||
"49+DY11 7EG": "2460490",
|
||||
"11+DY13 0HB": "87320110",
|
||||
"4+DY130HA": "87320040"
|
||||
}
|
||||
billed_lookup = []
|
||||
for _, row in tqdm(sgec_billings.iterrows(), total=len(sgec_billings)):
|
||||
postcode = row["Post Code"]
|
||||
houseno = row["NO."]
|
||||
|
||||
# We need to correct some records
|
||||
if manual_corrections.get("+".join([houseno, postcode])):
|
||||
landlord_pid = manual_corrections["+".join([houseno, postcode])]
|
||||
df = asset_list.standardised_asset_list[
|
||||
(asset_list.standardised_asset_list["landlord_property_id"] == landlord_pid)
|
||||
]
|
||||
if df.shape[0] != 1:
|
||||
raise ValueError(f"More than one match found for {landlord_pid} in the standardised asset list")
|
||||
billed_lookup.append(
|
||||
{
|
||||
"domna_property_id": df["domna_property_id"].values[0],
|
||||
"row_id": row["row_id"],
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
df = master_surveyed[
|
||||
(master_surveyed["original_house_no"] == houseno) &
|
||||
(master_surveyed["original_postcode"] == postcode)
|
||||
]
|
||||
if df.shape[0] != 1:
|
||||
# Try a search on the asset list
|
||||
postcode_no_space = row[postcode_col].strip().replace(" ", "").lower()
|
||||
|
||||
df = asset_list.standardised_asset_list[
|
||||
(
|
||||
asset_list.standardised_asset_list[asset_list.STANDARD_POSTCODE]
|
||||
.str.strip().str.lower().str.replace(" ", "") == postcode_no_space
|
||||
)
|
||||
].copy()
|
||||
|
||||
house_no = row[house_no_col]
|
||||
if isinstance(house_no, float):
|
||||
house_no = str(int(house_no)).lower()
|
||||
else:
|
||||
house_no = str(house_no).lower()
|
||||
|
||||
df["house_no"] = df.apply(
|
||||
lambda x: SearchEpc.get_house_number(
|
||||
str(x[asset_list.STANDARD_ADDRESS_1]), str(x[asset_list.STANDARD_POSTCODE])
|
||||
),
|
||||
axis=1
|
||||
)
|
||||
df = df[df["house_no"].str.lower() == house_no].copy()
|
||||
|
||||
if df.shape[0] == 1:
|
||||
billed_lookup.append(
|
||||
{
|
||||
"domna_property_id": df["domna_property_id"].values[0],
|
||||
"row_id": row["row_id"],
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
raise ValueError(f"More than one match found for {'+'.join([houseno, postcode])} in the master surveyed list")
|
||||
|
||||
billed_lookup.append(
|
||||
{
|
||||
"domna_property_id": df["domna_property_id"].values[0],
|
||||
"row_id": row["row_id"],
|
||||
}
|
||||
)
|
||||
|
||||
billed_lookup = pd.DataFrame(billed_lookup)
|
||||
|
||||
sgec_billings = sgec_billings.merge(
|
||||
billed_lookup,
|
||||
how="left",
|
||||
on="row_id"
|
||||
)
|
||||
|
||||
# We get the asset list that Community Housing thinks they sent Warmfront
|
||||
|
||||
master_data_sheet = pd.read_excel(
|
||||
os.path.join(
|
||||
data_folder, "Warmfront.xlsx"
|
||||
),
|
||||
sheet_name="Asset Stock List (3)",
|
||||
)
|
||||
master_data_sheet["Asset_Reference"] = master_data_sheet["Asset_Reference"].astype(str)
|
||||
|
||||
# 1) We check that all of the properties in the asset list we have on file are in the asset list that Community Housing
|
||||
# believe they sent Warmfront
|
||||
if not asset_list.standardised_asset_list[
|
||||
~asset_list.standardised_asset_list["landlord_property_id"].isin(
|
||||
master_data_sheet["Asset_Reference"].astype(str).values
|
||||
)
|
||||
].empty:
|
||||
raise ValueError("Some of the properties in the asset list are not in the Warmfront asset list")
|
||||
|
||||
# This column documents whether or not the property is in the asset list that the WFT were sent
|
||||
# There are 189 properties that were never sent to WFT, but all properties are accounted for in the asset list
|
||||
master_data_sheet["Is Property in WFT Asset List"] = master_data_sheet["Asset_Reference"].astype(str).isin(
|
||||
asset_list.standardised_asset_list["landlord_property_id"].astype(str).values
|
||||
)
|
||||
|
||||
# We now merge on the Warmfront findings
|
||||
master_data_sheet = master_data_sheet.merge(
|
||||
asset_list.standardised_asset_list[["landlord_property_id", "non-intrusives: ECO Eligibility"]],
|
||||
how="left",
|
||||
left_on="Asset_Reference",
|
||||
right_on="landlord_property_id"
|
||||
)
|
||||
master_data_sheet["non-intrusives: ECO Eligibility"] = master_data_sheet["non-intrusives: ECO Eligibility"].fillna(
|
||||
"Not in original asset list"
|
||||
)
|
||||
|
||||
# SGEC did a number of CIGA checks. We match these onto the master data sheet
|
||||
|
||||
# TODO: Need to split the programme into historical 2023 and 2024 (there was a cutoff data in late 2024 which seemed
|
||||
# to be the start of the new programme
|
||||
# Seems like there were 2 main checks - it also seems like this was a 2 phase programme, where these CIGA checks
|
||||
# correspond to phase 2
|
||||
ciga_checks_1 = pd.read_excel(
|
||||
os.path.join(
|
||||
data_folder, "CIGA Checks", "2 CIGA Check WFT 14102024 x1073.xlsx"
|
||||
),
|
||||
sheet_name="Worksheet"
|
||||
)
|
||||
ciga_checks_1 = ciga_checks_1[~pd.isnull(ciga_checks_1["Postcode"])]
|
||||
ciga_checks_1["request"] = "1073 properties"
|
||||
ciga_checks_2 = pd.read_excel(
|
||||
os.path.join(
|
||||
data_folder, "CIGA Checks", "2 CIGA Check 01112024 x125.xlsx"
|
||||
),
|
||||
sheet_name="Worksheet"
|
||||
)
|
||||
ciga_checks_2 = ciga_checks_2[~pd.isnull(ciga_checks_2["Postcode"])]
|
||||
ciga_checks_2["request"] = "125 flats"
|
||||
|
||||
cigas = pd.concat([ciga_checks_1, ciga_checks_2], ignore_index=True)
|
||||
cigas["row_id"] = cigas.index
|
||||
|
||||
# We add some temp columns to allow for easier matching
|
||||
asset_list.standardised_asset_list["house_no"] = asset_list.standardised_asset_list.apply(
|
||||
lambda x: SearchEpc.get_house_number(
|
||||
str(x["domna_full_address"]), str(x["domna_postcode"])
|
||||
),
|
||||
axis=1
|
||||
)
|
||||
|
||||
manual_fixes = {
|
||||
"2 Austcliffe Road Cookley, Kidderminster": "2250020",
|
||||
'5 Brett Young Close, Kidderminster': "9800050"
|
||||
}
|
||||
incorrect_ciga_return = [
|
||||
"19 Wood Street, Kidderminster",
|
||||
"nan Charles Street",
|
||||
"53 Harold Evers Way, Kidderminster",
|
||||
'63 Harold Evers way'
|
||||
]
|
||||
ciga_lookup = []
|
||||
for _, row in tqdm(cigas.iterrows(), total=len(cigas)):
|
||||
|
||||
if manual_fixes.get(row["Matched Address"]):
|
||||
ll_pid = manual_fixes[row["Matched Address"]]
|
||||
df = asset_list.standardised_asset_list[
|
||||
(asset_list.standardised_asset_list["landlord_property_id"] == ll_pid)
|
||||
]
|
||||
ciga_lookup.append(
|
||||
{
|
||||
"domna_property_id": df["domna_property_id"].values[0],
|
||||
"row_id": row["row_id"],
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
if (row["Matched Address"] in incorrect_ciga_return) or (
|
||||
" ".join([str(row["Address1"]), row["Address2"]]) in incorrect_ciga_return
|
||||
):
|
||||
continue
|
||||
|
||||
df = asset_list.standardised_asset_list[
|
||||
(asset_list.standardised_asset_list["domna_postcode"] == row["Postcode"])
|
||||
]
|
||||
|
||||
df = df[(df["house_no"].astype(str) == str(row["Address1"]))]
|
||||
|
||||
if df.empty:
|
||||
df = asset_list.standardised_asset_list[
|
||||
(asset_list.standardised_asset_list["domna_postcode"] == row["Matched Postcode"])
|
||||
]
|
||||
df = df[(df["house_no"].astype(str) == str(row["Address1"]))]
|
||||
|
||||
if df.shape[0] > 1:
|
||||
df = asset_list.standardised_asset_list[
|
||||
(asset_list.standardised_asset_list["domna_full_address"].str.lower().str.replace(",", "").str.contains(
|
||||
row["Matched Address"].lower().replace(",", ""), na=False))
|
||||
]
|
||||
if df.empty:
|
||||
df = asset_list.standardised_asset_list[
|
||||
(asset_list.standardised_asset_list["domna_full_address"].str.lower().str.replace(",", "").str.contains(
|
||||
row["Address2"].lower().replace(",", ""), na=False))
|
||||
]
|
||||
|
||||
df = df[(df["house_no"].astype(str) == str(row["Address1"]))]
|
||||
|
||||
if df.shape[0] != 1:
|
||||
raise Exception("More than one match found for {row['Address1']} in the asset list")
|
||||
|
||||
ciga_lookup.append(
|
||||
{
|
||||
"domna_property_id": df["domna_property_id"].values[0],
|
||||
"row_id": row["row_id"],
|
||||
}
|
||||
)
|
||||
|
||||
ciga_lookup = pd.DataFrame(ciga_lookup)
|
||||
|
||||
cigas = cigas.merge(
|
||||
ciga_lookup,
|
||||
how="left",
|
||||
on="row_id"
|
||||
)
|
||||
cigas = cigas[~pd.isnull(cigas["domna_property_id"])]
|
||||
|
||||
cigas = cigas.merge(
|
||||
asset_list.standardised_asset_list[["domna_property_id", "landlord_property_id"]],
|
||||
how="left",
|
||||
on="domna_property_id"
|
||||
)
|
||||
|
||||
# Note 4 entries in the CIGA checks did NOT match to the asset list (were for properties not owned by Community Housing)
|
||||
master_data_sheet = master_data_sheet.merge(
|
||||
cigas[["landlord_property_id", "Guarantee", "request"]].rename(
|
||||
columns={"request": "CIGA request batch"}
|
||||
),
|
||||
how="left",
|
||||
on="landlord_property_id"
|
||||
)
|
||||
|
||||
# Fill missing survey_reference with funding_scheme
|
||||
master_surveyed["survey_reference"] = master_surveyed["survey_reference"].fillna(
|
||||
master_surveyed["funding_scheme"]
|
||||
)
|
||||
|
||||
master_surveyed_to_merge = master_surveyed[
|
||||
~master_surveyed["domna_property_id"].isin(sgec_billings["domna_property_id"].values)
|
||||
]
|
||||
master_surveyed_to_merge["Survey Status"] = "Surveyed, Submitted, not on SGEC Deck of Works"
|
||||
|
||||
# We now merge on what we've surveyed and submitted
|
||||
master_data_sheet = master_data_sheet.merge(
|
||||
master_surveyed_to_merge[
|
||||
["landlord_property_id", "survey_reference", "submission_date", "cancelled", "Survey Status"]
|
||||
].rename(
|
||||
columns={
|
||||
"survey_reference": "Survey Type", "submission_date": "Survey Date",
|
||||
"cancelled": "Was the Install Cancelled?"
|
||||
}
|
||||
),
|
||||
how="left",
|
||||
on="landlord_property_id"
|
||||
)
|
||||
|
||||
# We now deduce the status of the work based on sgec_billings
|
||||
sgec_billings = sgec_billings.merge(
|
||||
asset_list.standardised_asset_list[["landlord_property_id", "domna_property_id"]],
|
||||
how="left",
|
||||
on="domna_property_id"
|
||||
)
|
||||
|
||||
dupe_ids = sgec_billings[sgec_billings["domna_property_id"].duplicated()]["domna_property_id"]
|
||||
# We sort by domna_property_id and billed (where true should be first) and take the first instance
|
||||
sgec_billings = sgec_billings.sort_values(
|
||||
["domna_property_id", "billed"], ascending=[True, False]
|
||||
)
|
||||
sgec_billings = sgec_billings.drop_duplicates(
|
||||
subset=["domna_property_id"],
|
||||
keep="first"
|
||||
)
|
||||
|
||||
sgec_billings["Survey Type"] = (
|
||||
sgec_billings["SUBMISSION TYPE - ECO4,GBIS,SHDF,EPC or OTHER"].map(scheme_map) + ": " +
|
||||
sgec_billings["MEASURE COMBO"]
|
||||
)
|
||||
sgec_billings["Survey Type"] = sgec_billings["Survey Type"].fillna(
|
||||
sgec_billings["SUBMISSION TYPE - ECO4,GBIS,SHDF,EPC or OTHER"].map(scheme_map)
|
||||
)
|
||||
sgec_billings["Survey Date"] = sgec_billings['SUBMISSION DATE']
|
||||
sgec_billings["Was the Install Cancelled?"] = (
|
||||
sgec_billings["INSTALLED"].astype(str).str.lower().str.contains("cancel")
|
||||
)
|
||||
|
||||
sgec_billings['Survey Status'] = np.where(
|
||||
sgec_billings["billed"] == True,
|
||||
"Surveyed, Submitted, on SGEC Deck of Works",
|
||||
"Surveyed, not submitted to SGEC, on SGEC Deck of Works"
|
||||
)
|
||||
|
||||
master_data_sheet = master_data_sheet.merge(
|
||||
sgec_billings[
|
||||
["landlord_property_id", "Survey Type", "Survey Date", "Was the Install Cancelled?", "Survey Status"]],
|
||||
how="left",
|
||||
on="landlord_property_id",
|
||||
suffixes=("", "_y")
|
||||
)
|
||||
|
||||
for col in ["Survey Type", "Survey Date", "Was the Install Cancelled?", "Survey Status"]:
|
||||
master_data_sheet[col] = np.where(
|
||||
pd.isnull(master_data_sheet[col]) & ~pd.isnull(master_data_sheet[col + "_y"]),
|
||||
master_data_sheet[col + "_y"],
|
||||
master_data_sheet[col]
|
||||
)
|
||||
master_data_sheet = master_data_sheet.drop(columns=[col + "_y"])
|
||||
|
||||
outcomes_not_in_master = outcomes_not_in_master.merge(
|
||||
asset_list.standardised_asset_list[["landlord_property_id", "domna_property_id"]],
|
||||
how="left",
|
||||
left_on="domna_property_id",
|
||||
right_on="domna_property_id"
|
||||
)
|
||||
# We also filter out any that were in the SGEC billings
|
||||
outcomes_not_in_master = outcomes_not_in_master[
|
||||
~outcomes_not_in_master["domna_property_id"].isin(sgec_billings["domna_property_id"].values)
|
||||
]
|
||||
|
||||
# We now merge on outcomes. There are a small number of surveyed outcomes that were not submitted
|
||||
master_data_sheet = master_data_sheet.merge(
|
||||
outcomes_not_in_master[["landlord_property_id", 'Type of Funding', "Date Letter sent"]],
|
||||
how="left",
|
||||
on="landlord_property_id",
|
||||
)
|
||||
master_data_sheet["Survey Status"] = np.where(
|
||||
pd.isnull(master_data_sheet["Survey Type"]) & ~pd.isnull(master_data_sheet["Type of Funding"]),
|
||||
"Surveyed, On Outcomes, not submitted",
|
||||
master_data_sheet["Survey Status"]
|
||||
)
|
||||
|
||||
master_data_sheet["Survey Type"] = np.where(
|
||||
pd.isnull(master_data_sheet["Survey Type"]) & ~pd.isnull(master_data_sheet["Type of Funding"]),
|
||||
master_data_sheet["Type of Funding"],
|
||||
master_data_sheet["Survey Type"]
|
||||
)
|
||||
master_data_sheet["Survey Date"] = np.where(
|
||||
pd.isnull(master_data_sheet["Survey Date"]) & ~pd.isnull(master_data_sheet["Date Letter sent"]),
|
||||
master_data_sheet["Date Letter sent"],
|
||||
master_data_sheet["Survey Date"]
|
||||
)
|
||||
master_data_sheet = master_data_sheet.drop(columns=["Type of Funding", "Date Letter sent"])
|
||||
|
||||
# We now need to compare the submissions that SGEC have sent us, because the deck of works is likely incorrect given
|
||||
# given the number of properties that have been received by SGEC
|
||||
|
||||
# We have submissions from the following dates:
|
||||
# - 18/11/2024
|
||||
# - 10/03/2024
|
||||
# - A sheet that claims to be 25/11/2024 but has 18/11/2024 as the submission date
|
||||
# - 16/12/2025
|
||||
# - 02/12/2024
|
||||
# - 10/02/2025
|
||||
sgec_received_submissions = []
|
||||
for filename in [
|
||||
"4x108 18.11.24 - RT MASTERS SGEC INVOICE.xlsx",
|
||||
"4x144 COMMUNITY HOUSING TOTAL PROJECT INV 10032025.xlsx",
|
||||
"4x19 25.11.2024 - RT Master SGEC.xlsx",
|
||||
"4x37 16.12.2024 - SGEC INVOICED.xlsx",
|
||||
"4x60 02.12.2024 - RT SGEC INV.xlsx",
|
||||
"4x78 10.02.2025 MASTERS - SGEC INVOICED-CORRECT.xlsx"
|
||||
]:
|
||||
data = pd.read_excel(
|
||||
os.path.join(
|
||||
data_folder, "SGEC Received Submissions", filename
|
||||
),
|
||||
)
|
||||
data["filename"] = filename
|
||||
sgec_received_submissions.append(data)
|
||||
|
||||
sgec_received_submissions = pd.concat(sgec_received_submissions)
|
||||
sgec_received_submissions = sgec_received_submissions.reset_index(drop=True)
|
||||
sgec_received_submissions["row_id"] = sgec_received_submissions.index
|
||||
|
||||
manual_fix = {
|
||||
"5a+DY10 3JR": "6856005A",
|
||||
'12+DY10 3JR': "78900120",
|
||||
"9+DY10 3JR": "86280090",
|
||||
'10+DY10 3JL': "86280100",
|
||||
"66+DY10 3JS": "68560660",
|
||||
"70+DY10 3JS": "68560700",
|
||||
"72+DY10 3JS": "68560720",
|
||||
"12+DY10 3JP": "86280120",
|
||||
"2A+DY11 5TZ": "6872002A",
|
||||
"3A+DY11 5TZ": "6872003A",
|
||||
"4A+DY11 5TZ": "6872004A"
|
||||
}
|
||||
sgec_received_submissions_lookup = []
|
||||
for _, row in tqdm(sgec_received_submissions.iterrows(), total=len(sgec_received_submissions)):
|
||||
|
||||
_key = "+".join([str(row["NO."]), str(row["Post Code"])])
|
||||
|
||||
if manual_fix.get(_key) is not None:
|
||||
ll_pid = manual_fix[_key]
|
||||
sgec_received_submissions_lookup.append(
|
||||
{
|
||||
"row_id": row["row_id"],
|
||||
"landlord_property_id": ll_pid,
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
match = sgec_billings[
|
||||
(sgec_billings['NO.'].astype(str) == str(row['NO.'])) &
|
||||
(sgec_billings['Post Code'] == row['Post Code'])
|
||||
]
|
||||
|
||||
if match.shape[0] > 1:
|
||||
raise Exception(f"something went wrong {_key} {row['Street / Block Name']}")
|
||||
|
||||
if match.shape[0] == 1:
|
||||
sgec_received_submissions_lookup.append(
|
||||
{
|
||||
"row_id": row["row_id"],
|
||||
"landlord_property_id": match["landlord_property_id"].values[0],
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
match = master_surveyed[
|
||||
(master_surveyed['original_house_no'].astype(str) == str(row['NO.'])) &
|
||||
(master_surveyed['original_postcode'] == row['Post Code'])
|
||||
]
|
||||
|
||||
if match.shape[0] > 1:
|
||||
raise Exception(f"something went wrong 2 {_key} {row['Street / Block Name']}")
|
||||
|
||||
if match.shape[0] == 0:
|
||||
raise Exception(f"No match {_key} {row['Street / Block Name']}")
|
||||
|
||||
sgec_received_submissions_lookup.append(
|
||||
{
|
||||
"row_id": row["row_id"],
|
||||
"landlord_property_id": match["landlord_property_id"].values[0],
|
||||
}
|
||||
)
|
||||
|
||||
sgec_received_submissions_lookup = pd.DataFrame(sgec_received_submissions_lookup)
|
||||
sgec_received_submissions = sgec_received_submissions.merge(
|
||||
sgec_received_submissions_lookup[["row_id", "landlord_property_id"]],
|
||||
how="left",
|
||||
on="row_id"
|
||||
)
|
||||
|
||||
sgec_received_submissions["Survey Type"] = (
|
||||
sgec_received_submissions["SUBMISSION TYPE - ECO4,GBIS,SHDF,EPC or OTHER"].map(scheme_map) + ": " +
|
||||
sgec_received_submissions["MEASURE COMBO"]
|
||||
)
|
||||
|
||||
sgec_received_submissions["Survey Type"] = sgec_received_submissions["Survey Type"].fillna(
|
||||
sgec_received_submissions["SUBMISSION TYPE - ECO4,GBIS,SHDF,EPC or OTHER"].map(scheme_map)
|
||||
)
|
||||
sgec_received_submissions["Survey Date"] = sgec_received_submissions['SUBMISSION DATE']
|
||||
sgec_received_submissions["Was the Install Cancelled?"] = (
|
||||
sgec_received_submissions["INSTALLED"].astype(str).str.lower().str.contains("cancel")
|
||||
)
|
||||
sgec_received_submissions['Survey Status'] = "Submission sent to SGEC, Confirmed by SGEC"
|
||||
sgec_received_submissions["Survey Received by SGEC"] = True
|
||||
|
||||
# We now merge on the submissions that SGEC have sent us
|
||||
master_data_sheet = master_data_sheet.merge(
|
||||
sgec_received_submissions[
|
||||
[
|
||||
"landlord_property_id", "Survey Type", "Survey Date", "Was the Install Cancelled?", "Survey Status",
|
||||
"Survey Received by SGEC"
|
||||
]
|
||||
],
|
||||
how="left",
|
||||
on="landlord_property_id",
|
||||
suffixes=("", "_y")
|
||||
)
|
||||
|
||||
# Fill in the gaps
|
||||
for col in ["Survey Type", "Survey Date", "Was the Install Cancelled?", "Survey Status"]:
|
||||
master_data_sheet[col] = np.where(
|
||||
pd.isnull(master_data_sheet[col]) & ~pd.isnull(master_data_sheet[col + "_y"]),
|
||||
master_data_sheet[col + "_y"],
|
||||
master_data_sheet[col]
|
||||
)
|
||||
master_data_sheet = master_data_sheet.drop(columns=[col + "_y"])
|
||||
|
||||
if master_data_sheet["Asset_Reference"].duplicated().sum():
|
||||
raise ValueError("There are duplicates in the asset reference column")
|
||||
|
||||
# Drop this at the end
|
||||
master_data_sheet = master_data_sheet.drop(columns=["landlord_property_id"])
|
||||
|
||||
master_data_sheet.to_excel(
|
||||
os.path.join(
|
||||
data_folder, "Draft Results.xlsx"
|
||||
),
|
||||
)
|
||||
51
etl/customers/Westward/Route March Reconciliation.py
Normal file
51
etl/customers/Westward/Route March Reconciliation.py
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
import pandas as pd
|
||||
|
||||
tabs = [
|
||||
"Straight Fill", "Solar PV - Straight Fill", "RDF CIGA checks", "Solar PV - RDF CIGA Checks",
|
||||
"AT BUILD", "Solar PV - AT BUILD"
|
||||
]
|
||||
|
||||
programme_revisions = []
|
||||
for tab in tabs:
|
||||
original_list = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Westward/Route March/WESTWARD - Route March Prep.xlsx",
|
||||
sheet_name=tab,
|
||||
)
|
||||
|
||||
revised_list = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Westward/Route March/WESTWARD - GBIS List revised for "
|
||||
"Domna.xlsx",
|
||||
sheet_name=tab,
|
||||
)
|
||||
revised_list["Client Review"] = "Retain in programme"
|
||||
|
||||
df = original_list[["Place ref"]].copy()
|
||||
df["Tab"] = tab
|
||||
|
||||
df = df.merge(revised_list[["Place ref", "Client Review"]], how="left", on="Place ref")
|
||||
df["Client Review"] = df["Client Review"].fillna("Remove from programme")
|
||||
|
||||
programme_revisions.append(df)
|
||||
|
||||
programme_revisions = pd.concat(programme_revisions)
|
||||
|
||||
# Read in the standardised asset list and create the column to append to that
|
||||
al = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Westward/WESTWARD - completed list - "
|
||||
"08.05.2025 - Standardised - Client Review.xlsx",
|
||||
sheet_name="Standardised Asset List",
|
||||
)
|
||||
|
||||
client_revisions = al[["landlord_property_id"]].merge(
|
||||
programme_revisions[["Place ref", "Client Review"]],
|
||||
how="left",
|
||||
left_on="landlord_property_id",
|
||||
right_on="Place ref",
|
||||
)
|
||||
|
||||
client_revisions["Client Review"] = client_revisions["Client Review"].fillna("Needs Review")
|
||||
client_revisions["Client Review Date"] = "08/05/2025"
|
||||
|
||||
client_revisions.to_csv(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Westward/Route March/client_revisions.csv", index=False
|
||||
)
|
||||
|
|
@ -20,9 +20,9 @@ def app():
|
|||
"ventilation": 350,
|
||||
"Room Roof Insulation": 210,
|
||||
"Loft insulation": 15,
|
||||
"Internal wall insulation": 215,
|
||||
"Internal wall insulation": 131,
|
||||
"External wall insulation": 298.35,
|
||||
"Solid wall insulation": 215,
|
||||
"Solid wall insulation": 131,
|
||||
"LEDs": 35, # per light
|
||||
"Flat Roof Insulation": 195,
|
||||
"Double Glazing": 1140,
|
||||
|
|
@ -71,149 +71,10 @@ def app():
|
|||
"Ground Floor Flat": 10
|
||||
}
|
||||
|
||||
# If we have a flat, we won't use the 199m2 floor area
|
||||
floor_areas = [73, 97, 199]
|
||||
# We remove age bracket, as we ended up with 360 combinations
|
||||
# age_brackets = ["1945-1970", "1971-2002", "Post 2002"]
|
||||
wall_type = ["cavity", "non-cavity"]
|
||||
roof_type = ["pitched", "other"]
|
||||
planning_constraints = [True, False]
|
||||
|
||||
# This is the list of all combinations of the above variables
|
||||
combinations_untrimmed = product(
|
||||
*[
|
||||
dwelling_types, floor_areas, wall_type, roof_type, planning_constraints
|
||||
]
|
||||
)
|
||||
|
||||
# TODO: Possibly need to add an additional cost for immersion hot water
|
||||
combinations = []
|
||||
for comb in combinations_untrimmed:
|
||||
if "Flat" in comb[0] and comb[1] == 199:
|
||||
continue
|
||||
|
||||
# If we have a flat, not too much difference if it's in a conservation area or not
|
||||
if "Flat" in comb[0] and comb[4] is True:
|
||||
continue
|
||||
combinations.append(comb)
|
||||
|
||||
risk_matrix = []
|
||||
for combination in combinations:
|
||||
n_floors = num_floors_map[combination[0]]
|
||||
bf = built_form_map[combination[0]]
|
||||
pt = "House" if "Flat" not in combination[0] else "Flat"
|
||||
# Model the home as a box
|
||||
ground_floor_area = combination[1] / n_floors
|
||||
perimeter = np.sqrt(ground_floor_area) * 4
|
||||
|
||||
# This is the amount of insulation required
|
||||
external_wall_area = estimate_external_wall_area(
|
||||
num_floors=n_floors,
|
||||
floor_height=2.5,
|
||||
perimeter=perimeter,
|
||||
built_form=bf
|
||||
)
|
||||
|
||||
n_rooms = np.floor(combination[1] / 15)
|
||||
|
||||
n_windows = estimate_windows(
|
||||
property_type=pt,
|
||||
built_form=bf,
|
||||
construction_age_band="",
|
||||
floor_area=combination[1],
|
||||
number_habitable_rooms=n_rooms
|
||||
)
|
||||
|
||||
# We determine the exact upgrade pathway for this combination, guided by the generic upgrade pathway
|
||||
combination_upgrade_pathway = []
|
||||
for upgrade in upgrade_path:
|
||||
if upgrade == "wall_insulation":
|
||||
if combination[2] == "cavity":
|
||||
combination_upgrade_pathway.append("cavity_wall_insulation")
|
||||
else:
|
||||
combination_upgrade_pathway.append("solid_wall_insulation")
|
||||
continue
|
||||
|
||||
if upgrade == "roof_insulation":
|
||||
if combination[3] == "pitched":
|
||||
combination_upgrade_pathway.append("loft_insulation")
|
||||
else:
|
||||
combination_upgrade_pathway.append("non_pitched_roof_insualtion")
|
||||
continue
|
||||
|
||||
if upgrade == "ventilation":
|
||||
combination_upgrade_pathway.append("ventilation")
|
||||
continue
|
||||
|
||||
if upgrade == "low_energy_lighting":
|
||||
combination_upgrade_pathway.append("low_energy_lighting")
|
||||
continue
|
||||
|
||||
if upgrade == "windows":
|
||||
if not combination[4]:
|
||||
combination_upgrade_pathway.append("double_glazing")
|
||||
else:
|
||||
combination_upgrade_pathway.append("secondary_glazing")
|
||||
continue
|
||||
|
||||
if upgrade == "heating":
|
||||
if combination[0] in ["Semi Detached House", "Detached House"]:
|
||||
combination_upgrade_pathway.append("high_heat_retention_storage")
|
||||
else:
|
||||
combination_upgrade_pathway.append("air_source_heat_pump")
|
||||
continue
|
||||
|
||||
if upgrade == "solar":
|
||||
if combination[0] in ["Semi Detached House", "Detached House", "Mid Terrace House"]:
|
||||
combination_upgrade_pathway.append("solar_pv")
|
||||
continue
|
||||
|
||||
combination_costs = []
|
||||
for measure in combination_upgrade_pathway:
|
||||
unit_cost = pricing_matrix[measure]
|
||||
# Wall insulation
|
||||
if measure in ["cavity_wall_insulation", "internal_wall_insulation", "external_wall_insulation"]:
|
||||
cost = unit_cost * external_wall_area
|
||||
elif measure in ["loft_insulation"]:
|
||||
cost = unit_cost * ground_floor_area
|
||||
elif measure == "ventilation":
|
||||
if combination[1] == 73:
|
||||
cost = unit_cost * 2
|
||||
elif combination[1] == 97:
|
||||
cost = unit_cost * 3
|
||||
else:
|
||||
cost = unit_cost * 4
|
||||
elif measure == "low_energy_lighting":
|
||||
n_lights = lighting_count[combination[0]]
|
||||
if combination[1] == 73:
|
||||
inflation = 1
|
||||
elif combination[1] == 97:
|
||||
inflation = 1.2
|
||||
else:
|
||||
inflation = 1.5
|
||||
cost = unit_cost * n_lights * inflation
|
||||
elif measure in ["double_glazing", "secondary_glazing"]:
|
||||
cost = unit_cost * n_windows
|
||||
elif measure == "high_heat_retention_storage":
|
||||
cost = unit_cost * n_rooms
|
||||
elif measure in ["air_source_heat_pump", "solar_pv"]:
|
||||
cost = unit_cost
|
||||
else:
|
||||
raise NotImplementedError("Implement: %s" % measure)
|
||||
|
||||
combination_costs.append(
|
||||
{
|
||||
"measure": measure,
|
||||
"cost": cost
|
||||
}
|
||||
)
|
||||
|
||||
combination_costs = pd.DataFrame(combination_costs)
|
||||
|
||||
contingency = 0.26
|
||||
|
||||
epr_data = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Risk Matrix/EPR Data.xlsx", header=1
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Risk Matrix/EPR Data V2.xlsx", header=1
|
||||
)
|
||||
epr_data["Measure added"].value_counts()
|
||||
epr_data["row_id"] = epr_data.index
|
||||
|
|
@ -318,6 +179,6 @@ def app():
|
|||
)
|
||||
|
||||
with pd.ExcelWriter(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Risk Matrix/risk_matrix.xlsx") as writer:
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Risk Matrix/risk_matrix V2.xlsx") as writer:
|
||||
risk_matrix.to_excel(writer, sheet_name="Risk Matrix", index=False)
|
||||
pricing_df.to_excel(writer, sheet_name="Pricing Assumptions", index=False)
|
||||
|
|
|
|||
134
etl/customers/mhs/flag_pilot.py
Normal file
134
etl/customers/mhs/flag_pilot.py
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
"""
|
||||
On the standardised asset list, this script will flag the pilot assets.
|
||||
"""
|
||||
import pandas as pd
|
||||
import os
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
|
||||
PILOT_PROJECT_CODE = "MHS-000-PILOT"
|
||||
MHS_PHASE_1_PROJECT_CODE = "MHS-001"
|
||||
|
||||
asset_list = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/MHS HOMES (Full Asset List) - for programme build - "
|
||||
"Standardised.xlsx",
|
||||
sheet_name="Standardised Asset List",
|
||||
)
|
||||
flat_data = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/MHS HOMES (Full Asset List) - for programme build - "
|
||||
"Standardised.xlsx",
|
||||
sheet_name="Flat Data",
|
||||
)
|
||||
|
||||
pilot = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/MHS 334 x Pilot reviewed - KB notes end column.xlsx"
|
||||
)
|
||||
ciga_checks = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/MHS CIGA Check 03042025_201.xlsx"
|
||||
)
|
||||
ciga_checks["row_id"] = ciga_checks.index
|
||||
|
||||
asset_list["project_code"] = None
|
||||
|
||||
asset_list["project_code"] = np.where(
|
||||
asset_list["landlord_property_id"].isin(pilot["Place Reference"]),
|
||||
PILOT_PROJECT_CODE,
|
||||
asset_list["project_code"],
|
||||
)
|
||||
# We now flag the next phase of the programme
|
||||
asset_list["project_code"] = np.where(
|
||||
(~pd.isnull(asset_list["cavity_reason"]) | ~pd.isnull(asset_list["solar_reason"])) & pd.isnull(
|
||||
asset_list["project_code"]),
|
||||
MHS_PHASE_1_PROJECT_CODE,
|
||||
asset_list["project_code"],
|
||||
)
|
||||
|
||||
# We now flag the CIGA checks
|
||||
manual_fixes = {
|
||||
"123 Columbine Close, Rochester": "2213861230"
|
||||
}
|
||||
ciga_lookup = []
|
||||
for _, row in tqdm(ciga_checks.iterrows(), total=len(ciga_checks)):
|
||||
|
||||
if manual_fixes.get(row["Matched Address"]):
|
||||
ll_pid = manual_fixes[row["Matched Address"]]
|
||||
df = asset_list[
|
||||
(asset_list["landlord_property_id"].astype(str) == ll_pid)
|
||||
]
|
||||
ciga_lookup.append(
|
||||
{
|
||||
"domna_property_id": df["domna_property_id"].values[0],
|
||||
"row_id": row["row_id"],
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
df = asset_list[
|
||||
(asset_list["domna_postcode"] == row["Postcode"])
|
||||
]
|
||||
|
||||
df = df[
|
||||
(df["domna_address_1"].astype(str) == str(row["Address1"]))
|
||||
]
|
||||
|
||||
if df.empty:
|
||||
df = asset_list[
|
||||
(asset_list["domna_postcode"] == row["Matched Postcode"])
|
||||
]
|
||||
df = df[(df["domna_address_1"].astype(str) == str(row["Address1"]))]
|
||||
|
||||
if df.shape[0] > 1:
|
||||
df = asset_list[
|
||||
(asset_list["domna_full_address"].str.lower().str.replace(",", "").str.contains(
|
||||
row["Matched Address"].lower().replace(",", ""), na=False))
|
||||
]
|
||||
if df.empty:
|
||||
df = asset_list[
|
||||
(asset_list["domna_full_address"].str.lower().str.replace(",", "").str.contains(
|
||||
row["Address2"].lower().replace(",", ""), na=False))
|
||||
]
|
||||
|
||||
df = df[(df["domna_address_1"].astype(str) == str(row["Address1"]))]
|
||||
|
||||
if df.shape[0] != 1:
|
||||
raise Exception("More than one match found for {row['Address1']} in the asset list")
|
||||
|
||||
ciga_lookup.append(
|
||||
{
|
||||
"domna_property_id": df["domna_property_id"].values[0],
|
||||
"row_id": row["row_id"],
|
||||
}
|
||||
)
|
||||
|
||||
ciga_lookup = pd.DataFrame(ciga_lookup)
|
||||
|
||||
ciga_lookup = ciga_lookup.merge(
|
||||
ciga_checks[["row_id", "Guarantee"]].rename(
|
||||
columns={"Guarantee": "ciga_guarantee"}
|
||||
), how="left", on="row_id"
|
||||
)
|
||||
ciga_lookup["ciga_check_complete"] = True
|
||||
|
||||
asset_list = asset_list.merge(
|
||||
ciga_lookup[["domna_property_id", "ciga_guarantee"]],
|
||||
how="left",
|
||||
on="domna_property_id"
|
||||
)
|
||||
|
||||
# Check we matched addresses correctly
|
||||
# match_check = ciga_lookup.merge(
|
||||
# ciga_checks, how="left", on="row_id"
|
||||
# ).merge(
|
||||
# asset_list[["domna_property_id", "domna_full_address"]], how="left", on="domna_property_id"
|
||||
# )
|
||||
# match_check = match_check[["Matched Address", "domna_full_address"]]
|
||||
|
||||
# Save
|
||||
|
||||
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/12052025 MHS Standardised Asset List - "
|
||||
"programme.xlsx")
|
||||
# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
|
||||
|
||||
with pd.ExcelWriter(filename) as writer:
|
||||
asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
|
||||
flat_data.to_excel(writer, sheet_name="Flat Data", index=False)
|
||||
60
etl/customers/mhs/prepare_data.py
Normal file
60
etl/customers/mhs/prepare_data.py
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
"""
|
||||
The data held on file for MHS is fairly incomplete, where not every single property has an observation
|
||||
"""
|
||||
from tqdm import tqdm
|
||||
import pandas as pd
|
||||
from docutils.utils.math.tex2mathml_extern import blahtexml
|
||||
|
||||
asset_list = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/MHS HOMES (Full Asset List) - for analysis.xlsx",
|
||||
sheet_name="Data"
|
||||
)
|
||||
# When this list was checked, an observation was made per postcode, and so we need to extrapolate those findings
|
||||
inspections_observatons = asset_list[["UPRN", "Postcode", "ManagementGroup", "WFT Findings"]].copy()
|
||||
|
||||
populated = []
|
||||
for _, group in tqdm(inspections_observatons.groupby("Postcode"),
|
||||
total=len(inspections_observatons.groupby("Postcode"))):
|
||||
|
||||
if all(pd.isnull(group["WFT Findings"])):
|
||||
group["WFT Findings"] = "Property not inspected"
|
||||
populated.append(group)
|
||||
continue
|
||||
|
||||
fill_observation = group["WFT Findings"].values[0]
|
||||
if pd.isnull(fill_observation):
|
||||
group["WFT Findings"] = group["WFT Findings"].fillna("Property not inspected")
|
||||
populated.append(group)
|
||||
continue
|
||||
|
||||
group = group.reset_index(drop=True)
|
||||
|
||||
group_filled = []
|
||||
for idx, x in group.iterrows():
|
||||
if idx == 0:
|
||||
group_filled.append(x)
|
||||
continue
|
||||
|
||||
new_value = x["WFT Findings"]
|
||||
if not pd.isnull(new_value):
|
||||
fill_observation = new_value
|
||||
|
||||
x["WFT Findings"] = fill_observation
|
||||
group_filled.append(x)
|
||||
|
||||
group_filled = pd.DataFrame(group_filled)
|
||||
|
||||
populated.append(group_filled)
|
||||
|
||||
populated = pd.concat(populated)
|
||||
|
||||
missed = populated[~populated["UPRN"].isin(asset_list["UPRN"].values)]
|
||||
|
||||
asset_list = asset_list.drop(columns=["WFT Findings"]).merge(
|
||||
populated.drop(columns=["Postcode", "ManagementGroup"]), how="left", on="UPRN"
|
||||
)
|
||||
|
||||
# Store the data
|
||||
asset_list.to_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/MHS HOMES (Full Asset List) - for programme build.xlsx"
|
||||
)
|
||||
28
etl/customers/unitas/preparing_programme_rebuild.py
Normal file
28
etl/customers/unitas/preparing_programme_rebuild.py
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
"""
|
||||
Simple script to tidy up the unitas asset list
|
||||
"""
|
||||
import pandas as pd
|
||||
|
||||
df = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Unitas/UNITAS - Asset List.xlsx",
|
||||
sheet_name="Asset List"
|
||||
)
|
||||
df["Warmfront Finding"] = df["Warmfront Finding"].str.lower().str.strip()
|
||||
|
||||
mapping = pd.read_csv(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Unitas/unitas-mapped-categories.csv",
|
||||
)
|
||||
|
||||
al = df.merge(
|
||||
mapping[["non-intrusives: WFT Findings", 'mapped_category']].rename(
|
||||
columns={"mapped_category": "WFT Findings"}
|
||||
),
|
||||
how="left",
|
||||
left_on="Warmfront Finding",
|
||||
right_on="non-intrusives: WFT Findings"
|
||||
)
|
||||
|
||||
al.to_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Unitas/unitas_asset_list_for_analysis.xlsx",
|
||||
index=False
|
||||
)
|
||||
Loading…
Add table
Reference in a new issue