diff --git a/.idea/Model.iml b/.idea/Model.iml
index c6561970..09f2e496 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
-
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 50cad4ca..fb10c6b0 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
-
+
diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py
index 4b7a11ec..ad3087c3 100644
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
@@ -4,8 +4,8 @@ import re
import tiktoken
from pprint import pprint
from datetime import datetime
+import asset_list.hubspot.config as hubspot_config
-from numpy.ma.core import masked_not_equal
from openai import OpenAI
import numpy as np
import pandas as pd
@@ -29,6 +29,7 @@ from recommendations.recommendation_utils import (
)
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes
logger = setup_logger()
@@ -279,6 +280,7 @@ class AssetList:
STANDARD_HEATING_SYSTEM = "landlord_heating_system"
STANDARD_EXISTING_PV = "landlord_existing_pv"
STANDARD_SAP = "landlord_sap_rating"
+ STANDARD_BLOCK_REFERENCE = "landlord_block_reference"
DOMNA_PROPERTY_ID = "domna_property_id"
@@ -292,6 +294,13 @@ class AssetList:
"Any further surveyor notes", 'Surveyors Name'
]
+ NON_INTRUSIVES_NEW_FORMAT_COLNAMES = [
+ "Has the property been re-walled?", "Is the property tile hung?", "Does the property have a render?",
+ "Does the property have cladding?", "Gable Wall Obstructions",
+ "Does the property have foliage that needs removal?",
+ "Potential unsafe environment", "Date of Inspection", "Borescoped?"
+ ]
+
NON_INTRUSIVES_ELIGIBILITY_COLUMN = "Eligibility (Red/Yellow/Green)"
OLD_FORMAT_NON_INTRUSIVE_COLNAMES = ['WFT Findings', 'ECO Eligibility']
@@ -342,6 +351,40 @@ class AssetList:
"cavity wall, as built, partial insulation",
]
+ # Work type prefixes:
+ # Empties
+ EMPTY_CAVITY_NON_INTRUSIVE = "Non-Intrusive Data Shows Empty Cavity"
+ EMPTY_CAVITY_NON_INTRUSIVE_YEAR = 'Non-Intrusive Data Shows Empty Cavity, built after 2002'
+ EPC_EMPTY_INSPECTIONS_RETRO_DRILLED = "EPC Shows Empty Cavity, inspections show retro drilled"
+ EPC_EMPTY_INSPECTIONS_FILLED = "EPC Shows Empty Cavity, inspections show filled or other"
+ EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD = "EPC Shows Empty Cavity, inspections show filled at build"
+ EPC_EMPTY_INSPECTIONS_NON_CAVITY = "EPC Shows Empty Cavity, inspections show non-cavity build"
+ EPC_EMPTY = "EPC Shows Empty Cavity"
+ LANDLORD_EMPTY_INSPECTIONS_OTHER = ("Landlord Data Shows Empty Cavity, EPC & Inspections Shows Filled or "
+ "Non-cavity")
+ # Extraction
+ EXTRACTION_NON_INTRUSIVE = "Non-Intrusive Data Shows Cavity Extraction"
+
+ # Solar
+ SOLAR_ELIGIBLE = "Solar Eligible"
+ SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED = "Solar Eligible, Solid Wall Uninsulated, EPC E or Below"
+ SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE = "Solar Eligible, Needs Heating Upgrade"
+
+ CRM_HISTORICAL_CAVITY_PRODUCT = {
+ "id": 156989182176, "unit_price": 0, "name": "Historical ECO Cavity"
+ }
+
+ CRM_PRODUCTS = {
+ "Empty Cavity - ECO4": {"id": 82733738177, "unit_price": 1000, "name": "Empty Cavity - ECO4"},
+ "Extract & Fill - ECO4": {"id": 100307905778, "unit_price": 500, "name": "Extract & Fill - ECO4"},
+ "Solar PV - ECO4": {"id": 82623589564, "unit_price": 1608, "name": "Solar PV - ECO4"},
+ "Solar PV + HHRSH - ECO4": {"id": 155529972924, "unit_price": 1608, "name": "Solar PV + HHRSH - ECO4"},
+ "Solar PV + Heating Upgrade - ECO4": {
+ "id": 109265426665, "unit_price": 1608, "name": "Solar PV + Heating Upgrade - ECO4"
+ },
+ "Historical ECO Cavity": CRM_HISTORICAL_CAVITY_PRODUCT
+ }
+
def __init__(
self,
local_filepath,
@@ -362,6 +405,7 @@ class AssetList:
landlord_heating_system=None,
landlord_existing_pv=None,
landlord_sap=None,
+ landlord_block_reference=None,
phase=False,
header=0
):
@@ -375,7 +419,7 @@ class AssetList:
self.standardised_asset_list = self.raw_asset_list.copy()
# Will be used to store aggregated figures against the various work types
self.work_type_figures = {}
- self.flat_data = None
+ self.block_analysis_df = None
self.duplicated_addresses = None
self.contact_details = None
self.contact_detail_fields = None
@@ -386,6 +430,7 @@ class AssetList:
self.unmatched_submissions = pd.DataFrame()
self.ecosurv = None
self.ecosurv_no_match = pd.DataFrame()
+ self.geographical_areas = pd.DataFrame()
# When this is True, we intend to break the programme into multiple phases. We may need to review
# how this is structured in the future, as depending on how we get future data, we may need to
@@ -400,6 +445,10 @@ class AssetList:
self.non_intrusives_eligibility = "Eligibility (Red/Yellow/Green)" in self.raw_asset_list.columns
+ self.new_format_non_insturives_present = (
+ "Has the property been re-walled?" in self.raw_asset_list.columns
+ )
+
# Names of columns
self.landlord_property_id = landlord_property_id
self.address1_colname = address1_colname
@@ -414,6 +463,7 @@ class AssetList:
self.landlord_heating_system = landlord_heating_system
self.landlord_existing_pv = landlord_existing_pv
self.landlord_sap = landlord_sap
+ self.landlord_block_reference = landlord_block_reference
# parameters for cleaning
self.full_address_cols_to_concat = full_address_cols_to_concat
@@ -479,6 +529,23 @@ class AssetList:
self.standardised_asset_list["Archetype"].copy()
)
+ self.prefixes_to_products = {
+ # Empty
+ self.EMPTY_CAVITY_NON_INTRUSIVE: self.CRM_PRODUCTS["Empty Cavity - ECO4"],
+ self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED: self.CRM_PRODUCTS["Empty Cavity - ECO4"],
+ self.EPC_EMPTY_INSPECTIONS_FILLED: self.CRM_PRODUCTS["Empty Cavity - ECO4"],
+ self.EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD: self.CRM_PRODUCTS["Empty Cavity - ECO4"],
+ self.EPC_EMPTY_INSPECTIONS_NON_CAVITY: self.CRM_PRODUCTS["Empty Cavity - ECO4"],
+ self.EPC_EMPTY: self.CRM_PRODUCTS["Empty Cavity - ECO4"],
+ self.LANDLORD_EMPTY_INSPECTIONS_OTHER: self.CRM_PRODUCTS["Empty Cavity - ECO4"],
+ # Extraction
+ self.EXTRACTION_NON_INTRUSIVE: self.CRM_PRODUCTS["Extract & Fill - ECO4"],
+ # Solar
+ self.SOLAR_ELIGIBLE: self.CRM_PRODUCTS["Solar PV - ECO4"],
+ self.SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED: self.CRM_PRODUCTS["Solar PV - ECO4"],
+ self.SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE: self.CRM_PRODUCTS["Solar PV + Heating Upgrade - ECO4"],
+ }
+
def _extract_address1(self, asset_list, full_address_col, postcode_col, method="first_two_words"):
if method not in self.ADDRESS_1_CLEANING_METHODS:
@@ -660,6 +727,7 @@ class AssetList:
self.landlord_heating_system,
self.landlord_existing_pv,
self.landlord_sap,
+ self.landlord_block_reference,
]
# Keep just non-null variables (e.g landlord may not provide uprn
self.keep_variables = [v for v in variables if v is not None]
@@ -677,6 +745,7 @@ class AssetList:
self.landlord_heating_system: self.STANDARD_HEATING_SYSTEM,
self.landlord_existing_pv: self.STANDARD_EXISTING_PV,
self.landlord_sap: self.STANDARD_SAP,
+ self.landlord_block_reference: self.STANDARD_BLOCK_REFERENCE
}
self.rename_map = {k: v for k, v in self.rename_map.items() if k is not None}
@@ -687,6 +756,9 @@ class AssetList:
if self.non_intrusives_eligibility:
non_intrusive_columns.append(self.NON_INTRUSIVES_ELIGIBILITY_COLUMN)
+ if self.new_format_non_insturives_present:
+ non_intrusive_columns += self.NON_INTRUSIVES_NEW_FORMAT_COLNAMES
+
if self.old_format_non_intrusives_present:
# We check if we have the ECO Eligibility column, which we might not have
non_intrusive_columns = [
@@ -920,7 +992,7 @@ class AssetList:
self.STANDARD_YEAR_BUILT,
self.STANDARD_WALL_CONSTRUCTION,
self.STANDARD_HEATING_SYSTEM,
- self.STANDARD_EXISTING_PV
+ self.STANDARD_BLOCK_REFERENCE,
] if v not in self.standardised_asset_list.columns
]
for v in missing_variables:
@@ -931,6 +1003,38 @@ class AssetList:
self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID].astype(str)
)
+ # CLean up the standard SAP column, that can be problematic
+ if self.landlord_sap is not None:
+ self.standardised_asset_list[self.STANDARD_SAP] = (
+ self.standardised_asset_list[self.STANDARD_SAP]
+ .astype(str)
+ .str.replace('\xa0', ' ', regex=False)
+ .str.strip()
+ )
+ self.standardised_asset_list[self.STANDARD_SAP] = np.where(
+ self.standardised_asset_list[self.STANDARD_SAP] == "",
+ None,
+ self.standardised_asset_list[self.STANDARD_SAP]
+ )
+ self.standardised_asset_list[self.STANDARD_SAP] = (
+ self.standardised_asset_list[self.STANDARD_SAP].astype(float)
+ )
+ # If it's zero, we set it to None
+ self.standardised_asset_list[self.STANDARD_SAP] = np.where(
+ self.standardised_asset_list[self.STANDARD_SAP] == 0,
+ None,
+ self.standardised_asset_list[self.STANDARD_SAP]
+ )
+
+ has_blocks_of_flats = (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats").sum()
+
+ # Perform block splitting, ahead of fetching the EPC data
+ # If we blocks of flats, without a landlord block reference, we create this
+ self.fill_landlord_block_reference(has_blocks_of_flats)
+
+ # If we have blocks of flats, we split these out into individual units.
+ self.split_blocks()
+
def merge_data(self, df: pd.DataFrame):
"""
Used to insert data into the standardised asset list, based on the domna property id
@@ -1147,7 +1251,7 @@ class AssetList:
processed_age_band, how="left"
)
- def identify_worktypes(self, cleaned):
+ def identify_worktypes(self):
if self.landlord_sap is not None:
# We add a SAP category for all work type identification
@@ -1176,6 +1280,13 @@ class AssetList:
)
)
+ self.standardised_asset_list["SAP Category"] = np.where(
+ pd.isnull(self.standardised_asset_list[self.STANDARD_SAP]) &
+ pd.isnull(self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]]),
+ "SAP Unknown",
+ self.standardised_asset_list["SAP Category"]
+ )
+
else:
# We add a SAP category for all work type identification
# We break into 4 categories (54 or less, 55-68, 69-74, 75 or more)
@@ -1196,6 +1307,11 @@ class AssetList:
),
)
)
+ self.standardised_asset_list["SAP Category"] = np.where(
+ pd.isnull(self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]]),
+ "SAP Unknown",
+ self.standardised_asset_list["SAP Category"]
+ )
# Before we being, we identify if a property has solar already as we use this
# for identifying cavity jobs
@@ -1426,13 +1542,22 @@ class AssetList:
)
)
+ # If the landlord has given us the heating system, we default to that on heating upgrades. Because of the
+ # poor heating in place, if the EPC indicates that this property had a low efficiency heating system but the
+ # landlord data suggests otherwise (e.g. there's a gas boiler), we default to what the landlord has told us
self.standardised_asset_list["solar_epc_data_indicates_requires_heating_upgrade"] = (
- self.standardised_asset_list[self.EPC_API_DATA_NAMES["mainheat-description"]].str.lower().str.contains(
- "electric storage heaters|room heaters"
+ (
+ self.standardised_asset_list[self.EPC_API_DATA_NAMES["mainheat-description"]].str.lower().str.contains(
+ "electric storage heaters|room heaters"
+ ) & (
+ self.standardised_asset_list[
+ self.EPC_API_DATA_NAMES["mainheatcont-description"]
+ ] != "Controls for high heat retention storage heaters"
+ )
) & (
- self.standardised_asset_list[
- self.EPC_API_DATA_NAMES["mainheatcont-description"]
- ] != "Controls for high heat retention storage heaters"
+ ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin(
+ ["district heating", "communal heating", "communal gas boiler"]
+ ) & ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].str.contains("gas ")
)
)
@@ -1501,19 +1626,9 @@ class AssetList:
else:
self.standardised_asset_list["solar_non_intrusives_walls_insulated"] = False
- # We merge on the u-value for average thermal transmittance
- walls_uvalue_data = pd.DataFrame(cleaned["walls-description"])
- walls_uvalue_data = walls_uvalue_data[
- ~pd.isnull(walls_uvalue_data["thermal_transmittance"])
- ][["original_description", "thermal_transmittance"]].rename(
- columns={
- "original_description": self.EPC_API_DATA_NAMES["walls-description"],
- "thermal_transmittance": "walls_u_value"
- }
- )
- self.standardised_asset_list = self.standardised_asset_list.merge(
- walls_uvalue_data, how="left", on=self.EPC_API_DATA_NAMES["walls-description"]
- )
+ self.standardised_asset_list["walls_u_value"] = self.standardised_asset_list[
+ self.EPC_API_DATA_NAMES["walls-description"]
+ ].apply(lambda x: WallAttributes(x).process()["thermal_transmittance"] if not pd.isnull(x) else None)
self.standardised_asset_list["solar_epc_walls_insulated"] = (
(
@@ -1526,16 +1641,20 @@ class AssetList:
)
)
- # We merge on the u-value for average thermal transmittance
- roof_data = pd.DataFrame(cleaned["roof-description"])[
- ["original_description", "thermal_transmittance", "is_pitched", "is_loft"]
- ].rename(
- columns={
- "original_description": self.EPC_API_DATA_NAMES["roof-description"],
- "thermal_transmittance": "roof_u_value",
- }
- )
-
+ roof_data = []
+ for desc in self.standardised_asset_list[
+ self.EPC_API_DATA_NAMES["roof-description"]
+ ].unique():
+ if pd.isnull(desc):
+ continue
+ roof_data.append(
+ {
+ self.EPC_API_DATA_NAMES["roof-description"]: desc,
+ **RoofAttributes(desc).process()
+ }
+ )
+ roof_data = pd.DataFrame(roof_data)
+ roof_data = roof_data.rename(columns={"thermal_transmittance": "roof_u_value"})
self.standardised_asset_list = self.standardised_asset_list.merge(
roof_data, how="left", on=self.EPC_API_DATA_NAMES["roof-description"]
)
@@ -1683,10 +1802,10 @@ class AssetList:
self.standardised_asset_list["cavity_reason"] = None
empty_cavity_map = {
- "non_intrusive_indicates_empty_cavity": "Non-Intrusive Data Shows Empty Cavity: ",
- "non_intrusive_indicates_empty_cavity_has_solar": "Non-Intrusive Data Shows Empty Cavity - property "
+ "non_intrusive_indicates_empty_cavity": self.EMPTY_CAVITY_NON_INTRUSIVE + ": ",
+ "non_intrusive_indicates_empty_cavity_has_solar": f"{self.EMPTY_CAVITY_NON_INTRUSIVE} - property "
"already has solar: ",
- "non_intrusive_indicates_empty_cavity_no_year_filter": f"Non-Intrusive Data Shows Empty Cavity, "
+ "non_intrusive_indicates_empty_cavity_no_year_filter": f"{self.EMPTY_CAVITY_NON_INTRUSIVE}, "
f"built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: ",
}
@@ -1711,7 +1830,7 @@ class AssetList:
)) &
pd.isnull(self.standardised_asset_list["cavity_reason"])
),
- "EPC Shows Empty Cavity, inspections show retro drilled: " + self.standardised_asset_list[
+ f"{self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED}: " + self.standardised_asset_list[
"SAP Category"],
self.standardised_asset_list["cavity_reason"]
)
@@ -1723,7 +1842,7 @@ class AssetList:
self.standardised_asset_list['non_intrusive_indicates_cavity_extraction'] &
pd.isnull(self.standardised_asset_list["cavity_reason"])
),
- "EPC Shows Empty Cavity, inspections show filled or other: " + self.standardised_asset_list[
+ f"{self.EPC_EMPTY_INSPECTIONS_FILLED}: " + self.standardised_asset_list[
"SAP Category"],
self.standardised_asset_list["cavity_reason"]
)
@@ -1735,7 +1854,7 @@ class AssetList:
(self.standardised_asset_list['non-intrusives: Insulated'] == "RETRO DRILLED") &
pd.isnull(self.standardised_asset_list["cavity_reason"])
),
- "EPC Shows Empty Cavity, inspections show retro drilled: " + self.standardised_asset_list[
+ f"{self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED}: " + self.standardised_asset_list[
"SAP Category"],
self.standardised_asset_list["cavity_reason"]
)
@@ -1747,8 +1866,7 @@ class AssetList:
(self.standardised_asset_list['non-intrusives: Insulated'] == "FILLED AT BUILD") &
pd.isnull(self.standardised_asset_list["cavity_reason"])
),
- "EPC Shows Empty Cavity, inspections show filled at build: " + self.standardised_asset_list[
- "SAP Category"],
+ f"{self.EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD}: " + self.standardised_asset_list["SAP Category"],
self.standardised_asset_list["cavity_reason"]
)
else:
@@ -1758,7 +1876,7 @@ class AssetList:
~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] &
pd.isnull(self.standardised_asset_list["cavity_reason"])
),
- "EPC Shows Empty Cavity: " + self.standardised_asset_list["SAP Category"],
+ f"{self.EPC_EMPTY}: " + self.standardised_asset_list["SAP Category"],
self.standardised_asset_list["cavity_reason"]
)
@@ -1768,10 +1886,12 @@ class AssetList:
~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] &
pd.isnull(self.standardised_asset_list["cavity_reason"])
),
- "EPC Shows Empty Cavity, inspections show non-cavity build: " + self.standardised_asset_list[
- "SAP Category"],
+ f"{self.EPC_EMPTY_INSPECTIONS_NON_CAVITY}: " + self.standardised_asset_list["SAP Category"],
self.standardised_asset_list["cavity_reason"]
)
+
+ # Work type prefixes
+
# Landlord data: The landlord's data indicates that the wall is an uninsulated cavity wall, but EPC and
# inspections show filled
self.standardised_asset_list["cavity_reason"] = np.where(
@@ -1781,7 +1901,7 @@ class AssetList:
~self.standardised_asset_list["epc_indicates_empty_cavity"] &
pd.isnull(self.standardised_asset_list["cavity_reason"])
),
- "Landlord Data Shows Empty Cavity, EPC & Inspections Shows Filled or Non-cavity: " +
+ f"{self.LANDLORD_EMPTY_INSPECTIONS_OTHER}: " +
self.standardised_asset_list["SAP Category"],
self.standardised_asset_list["cavity_reason"]
)
@@ -1792,7 +1912,7 @@ class AssetList:
self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] &
pd.isnull(self.standardised_asset_list["cavity_reason"])
),
- "Non-Intrusive Data Shows Cavity Extraction: " + self.standardised_asset_list["SAP Category"],
+ f"{self.EXTRACTION_NON_INTRUSIVE}: " + self.standardised_asset_list["SAP Category"],
self.standardised_asset_list["cavity_reason"]
)
@@ -1801,7 +1921,7 @@ class AssetList:
self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_year_filter"] &
pd.isnull(self.standardised_asset_list["cavity_reason"])
),
- f"Non-Intrusive Data Shows Cavity Extraction, built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: " +
+ f"{self.EXTRACTION_NON_INTRUSIVE}, built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: " +
self.standardised_asset_list["SAP Category"],
self.standardised_asset_list["cavity_reason"]
)
@@ -1814,11 +1934,9 @@ class AssetList:
# Map of variables and fill values for the solar_reason variable
# ordering of this map is important, where we flag our prioritised work types first
solar_reason_map = {
- "solar_eligible": "Solar Eligible: ",
- "solar_eligible_solid_wall_uninsulated": "Solar Eligible, Solid Wall Uninsulated, EPC E or Below: ",
- "solar_eligible_needs_heating_upgrade": (
- "Solar Eligible, Needs Heating Upgrade: "
- )
+ "solar_eligible": f"{self.SOLAR_ELIGIBLE}: ",
+ "solar_eligible_solid_wall_uninsulated": f"{self.SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED}: ",
+ "solar_eligible_needs_heating_upgrade": f"{self.SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE}: "
}
for variable, reason in solar_reason_map.items():
@@ -1864,17 +1982,18 @@ class AssetList:
for col in ["cavity_reason", "solar_reason"]:
self.standardised_asset_list[col] = np.where(
(
- (~pd.isnull(self.standardised_asset_list["submission_date"]))
+ (~pd.isnull(self.standardised_asset_list["submission_status"]))
),
None,
self.standardised_asset_list[col]
)
- if self.ecosurv is not None:
+ if self.ecosurv is not None and "ecosurv_install_status" in self.standardised_asset_list.columns:
+ # If we didn't match anything to ecosurv, the ecosurv_install_status won't exist
for col in ["cavity_reason", "solar_reason"]:
self.standardised_asset_list[col] = np.where(
(
- (~pd.isnull(self.standardised_asset_list["ecosurv_reference"]))
+ (~pd.isnull(self.standardised_asset_list["ecosurv_install_status"]))
),
None,
self.standardised_asset_list[col]
@@ -1911,42 +2030,301 @@ class AssetList:
self.outcomes[self.DOMNA_PROPERTY_ID].isin(identified_work)
]
- def flat_analysis(self):
-
- # We need to deduce the building name - we strip out the house number
-
- # We want to deduce if flats have 50% of the properties below C75
- # We group by postcode and property type
- grouped = self.standardised_asset_list.groupby(
- [self.STANDARD_POSTCODE, self.STANDARD_PROPERTY_TYPE]
+ # Finally, direct operations feedback has suggested that if a property is a flat that has a SAP rating of
+ # 76 or above, we should exclude it because it's likely not going to be eligible for anyting
+ self.standardised_asset_list["cavity_reason"] = np.where(
+ (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "flat") &
+ (self.standardised_asset_list["SAP Category"] == "SAP Rating 76 or more"),
+ self.standardised_asset_list["cavity_reason"] + " - (unlikely to quality)",
+ self.standardised_asset_list["cavity_reason"]
)
- flat_data = []
- for _, group in grouped:
- if "flat" in group[self.STANDARD_PROPERTY_TYPE].values:
- num_flats = group[self.STANDARD_PROPERTY_TYPE].shape[0]
- num_below_c75 = group[
- self.EPC_API_DATA_NAMES["current-energy-efficiency"]
- ].lt(self.FILLED_CAVITY_SAP_THRESHOLD).sum()
- # Check if any flats are below C69
- num_flats_below_c69 = group[
- self.EPC_API_DATA_NAMES["current-energy-efficiency"]
- ].lt(69).sum()
+ # Split cavity_reason on the colon and check if the first part is equal to one of the two options above
+ # that indicates empties
+ self.standardised_asset_list["identified_empty_cavity"] = (
+ self.standardised_asset_list["cavity_reason"].str.split(":").str[0].isin(
+ [self.EMPTY_CAVITY_NON_INTRUSIVE, self.EMPTY_CAVITY_NON_INTRUSIVE_YEAR, self.EPC_EMPTY]
+ )
+ )
- flat_data.append(
- {
- "Postcode": group[self.STANDARD_POSTCODE].iloc[0],
- "Property Type": "Flat",
- "Number of Flats with EPC": num_flats,
- "Number of Flats below C75": num_below_c75,
- "Proportion of Flat EPCs below C75": round(100 * num_below_c75 / num_flats),
- "Number of Flats Below C69": num_flats_below_c69,
- }
+ def fill_landlord_block_reference(self, has_blocks_of_flats):
+ if not has_blocks_of_flats:
+ return
+
+ # If we have blocks of flats, we fill the landlord_block_reference field with address 1 + postcode
+ self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE] = np.where(
+ (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats") & (
+ pd.isnull(self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE])
+ ),
+ self.standardised_asset_list[self.STANDARD_ADDRESS_1] + " " +
+ self.standardised_asset_list[self.STANDARD_POSTCODE],
+ self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE]
+ )
+
+ def split_blocks(self):
+ """
+ Where we have a single row that is a block of flats, we split this into multiple rows,
+ one for each unit. The data that we have will be copied across rows
+ :return:
+ """
+
+ blocks = self.standardised_asset_list[
+ self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats"
+ ].copy()
+
+ if blocks.empty:
+ return
+
+ RANGE_RE = re.compile(r'\b(\d+[A-Za-z]?)\s*[-–]\s*(\d+[A-Za-z]?)\b')
+ NUM_RE = re.compile(r'\b\d+[A-Za-z]?\b') # captures 12, 12A, etc.
+
+ expanded_rows = []
+
+ for _, row in blocks.iterrows():
+ addr = str(row[self.STANDARD_ADDRESS_1])
+
+ # 1 ─ Range (e.g. 1-7)
+ m_range = RANGE_RE.search(addr)
+ if m_range:
+ start, end = m_range.groups()
+ start, end = int(re.match(r'\d+', start)[0]), int(re.match(r'\d+', end)[0])
+ if start > end or (end - start) > 100:
+ raise ValueError(f"Suspicious range '{addr}'")
+ for n in range(start, end + 1):
+ new = row.copy()
+ new_addr = RANGE_RE.sub(str(n), addr, count=1)
+ original_full_address = new[self.STANDARD_FULL_ADDRESS]
+ new_full_address = original_full_address.replace(addr, new_addr)
+ new[self.STANDARD_ADDRESS_1] = new_addr
+ new[self.STANDARD_FULL_ADDRESS] = new_full_address
+ new[self.STANDARD_PROPERTY_TYPE] = "flat"
+ # Keep a record of the previous address 1
+ new["block_address1"] = addr
+ new["block_full_address"] = original_full_address
+ new["is_expended_block"] = True
+ # We update the full address
+
+ new[self.DOMNA_PROPERTY_ID] = f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}"
+ expanded_rows.append(new)
+ continue
+
+ # 2 ─ Explicit list (e.g. 1, 2, 5 Block)
+ nums = NUM_RE.findall(addr)
+ if len(nums) > 1 and ',' in addr:
+ for n in nums:
+ new = row.copy()
+ new_addr = re.sub(NUM_RE, n, addr, count=1) # replace the first number only
+ new[self.STANDARD_ADDRESS_1] = new_addr
+ new[self.DOMNA_PROPERTY_ID] = f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}"
+ expanded_rows.append(new)
+ continue
+
+ # 3 ─ Single number or no number, treat as individual dwelling
+ if (len(nums) == 1) or not nums:
+ expanded_rows.append(row)
+ continue
+
+ # Anything else with digits is unrecognised
+ raise NotImplementedError(f"Unhandled block format: '{addr}'")
+
+ expanded_blocks = pd.DataFrame(expanded_rows)
+
+ # We drop the blocks from the standardised asset list and append on the expanded blocks
+ self.standardised_asset_list = self.standardised_asset_list[
+ self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "block of flats"
+ ]
+
+ self.standardised_asset_list = pd.concat(
+ [self.standardised_asset_list, expanded_blocks],
+ ignore_index=True
+ )
+
+ # As a final clean up, for any blocks that are size 1, we don't includr a project code
+ sizes = (
+ expanded_blocks
+ .groupby(self.STANDARD_BLOCK_REFERENCE)[self.DOMNA_PROPERTY_ID]
+ .nunique()
+ .reset_index()
+ )
+ size_1 = sizes[sizes[self.DOMNA_PROPERTY_ID] <= 1]
+ # Remove the size 1 blocks from the standardised asset list
+ self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE] = np.where(
+ self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin(
+ size_1[self.STANDARD_BLOCK_REFERENCE].values
+ ),
+ None,
+ self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE]
+ )
+
+ def label_property_status(self):
+ """
+ This function is designed to be run after identify_worktypes() has been run, and will create a "property_status"
+ column, which will note where each property is (to be surveyed, surveyed, installed), using the stages we
+ recognise within hubspot
+ :return:
+ """
+
+ # For anything that is ready to go, that gets set to ready to be scheduled
+ self.standardised_asset_list["hubspot_status"] = np.where(
+ ~pd.isnull(self.standardised_asset_list["cavity_reason"]) |
+ ~pd.isnull(self.standardised_asset_list["solar_reason"]),
+ hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label,
+ None
+ )
+
+ # we step through the process of flagging completed surveys
+
+ # We utilise submissions, ecosurv and outcomes to define the hubspot status
+ # We'll take the maximum of these three columns, based on the enum integer value
+ label_to_enum = {e.label: e for e in hubspot_config.HubspotProcessStatus}
+
+ def get_max_status_from_columns(row):
+ status_candidates = []
+ for col in ["submission_status", "ecosurv_install_status", "outcome_status"]:
+ label = row.get(col)
+ if label in label_to_enum:
+ status_candidates.append(label_to_enum[label])
+ if not status_candidates:
+ return row["hubspot_status"] # fallback to existing status if no updates
+ return max(status_candidates).label
+
+ self.standardised_asset_list["hubspot_status"] = self.standardised_asset_list.apply(
+ get_max_status_from_columns, axis=1
+ )
+
+ self.standardised_asset_list["project_code"] = None
+ # if we have any blocks, where work is eligible, we flag them now
+ # These blocks may be refecence via the landlord_block_reference field, or by property types being
+ # blocks of flats
+ has_landlord_block_reference = sum(~pd.isnull(self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE]))
+
+ if has_landlord_block_reference:
+ # For blocks that have a 50% allocation, we create project codes
+ self.block_analysis()
+ # find any block refs with more than 50% emptires
+ viable_empty_blocks = self.block_analysis_df[
+ self.block_analysis_df['Percentage of Empties'] >= 0.50
+ ]
+
+ if not viable_empty_blocks.empty:
+ project_code_lookup = viable_empty_blocks[["Block Reference"]].copy()
+ self.standardised_asset_list = self.standardised_asset_list.merge(
+ project_code_lookup, how="left", left_on=self.STANDARD_BLOCK_REFERENCE, right_on="Block Reference"
)
+ self.standardised_asset_list["project_code"] = np.where(
+ ~pd.isnull(self.standardised_asset_list["Block Reference"]),
+ self.standardised_asset_list["Block Reference"],
+ self.standardised_asset_list["project_code"]
+ )
+ self.standardised_asset_list = self.standardised_asset_list.drop(columns=["Block Reference"])
- flat_data = pd.DataFrame(flat_data)
+ def analyse_geographies(self):
+ cavity_programme = (
+ self.standardised_asset_list[["domna_postcode", "cavity_reason"]]
+ .groupby(["domna_postcode"])["cavity_reason"]
+ .count()
+ .reset_index()
+ )
+ solar_programme = (
+ self.standardised_asset_list[["domna_postcode", "solar_reason"]]
+ .groupby(["domna_postcode"])["solar_reason"]
+ .count()
+ .reset_index()
+ )
+ postcodes = (
+ self.standardised_asset_list[["domna_postcode", "landlord_property_id"]]
+ .groupby("domna_postcode")["landlord_property_id"]
+ .count()
+ .reset_index()
+ .rename(columns={"landlord_property_id": "n_properties"})
+ )
+ geographical_areas = postcodes.merge(cavity_programme, how="left", on="domna_postcode").merge(
+ solar_programme, how="left", on="domna_postcode"
+ ).fillna(0)
+ geographical_areas["coverage"] = (
+ (
+ geographical_areas["solar_reason"] + geographical_areas["cavity_reason"]
+ ) / geographical_areas["n_properties"] * 100
+ )
- self.flat_data = flat_data
+ geographical_areas = geographical_areas.sort_values("coverage", ascending=False)
+ self.geographical_areas = geographical_areas
+
+ def block_analysis(self):
+
+ # Reverse mapping: label -> enum
+ LABEL_TO_ENUM = {e.label: e for e in hubspot_config.HubspotProcessStatus}
+
+ # Threshold status - anything that is at this stage or beyond is considered surveyed
+ threshold = hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.value
+
+ block_analysis = []
+ for block_reference, group in self.standardised_asset_list.groupby(self.STANDARD_BLOCK_REFERENCE):
+
+ cavity_breakdown = group["cavity_reason"].fillna("No Eligibility").value_counts(normalize=True) * 100
+
+ if all(cavity_breakdown.index == "No Eligibility"):
+ continue
+
+ # We check the % of empty vs not empty as right now, we're focused on empty
+ n_empties = (
+ (group["identified_empty_cavity"] == True) &
+ (~pd.isnull(group["cavity_reason"])) &
+ (~group["cavity_reason"].str.contains("(unlikely to quality)", case=False, na=False, regex=False))
+ ).sum()
+
+ works = group["hubspot_status"]
+ above_threshold = works.map(LABEL_TO_ENUM.get).dropna()
+ count_above = (above_threshold >= threshold).sum()
+ proportion_surveyed = count_above / len(works)
+ proportion_empty = n_empties / len(works)
+ # We auto-populate any blocks that have greater than 50% proportion empty
+
+ block_analysis.append(
+ {
+ "Block Reference": block_reference,
+ "Proportion of properties suryeyed": proportion_surveyed,
+ "Percentage of Empties": proportion_empty,
+ **cavity_breakdown.to_dict(),
+ }
+ )
+
+ block_analysis = pd.DataFrame(block_analysis)
+ block_analysis = block_analysis.fillna(0)
+
+ # We flag which properties are eligible for works. We need at least 50%
+ block_analysis["Eligible for Works"] = (
+ block_analysis["Percentage of Empties"] >= 0.50
+ )
+ block_analysis = block_analysis.sort_values("Percentage of Empties", ascending=False)
+
+ # For properties that are NOT eligible, we should update the cavity reason
+ ineligible_blocks = block_analysis[
+ ~block_analysis["Eligible for Works"]
+ ]["Block Reference"].values
+
+ eligible_blocks = block_analysis[
+ block_analysis["Eligible for Works"]
+ ]["Block Reference"].values
+
+ self.standardised_asset_list["cavity_reason"] = np.where(
+ self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin(ineligible_blocks),
+ self.standardised_asset_list["cavity_reason"] + " (Flat in block with less than 50% eligible)",
+ self.standardised_asset_list["cavity_reason"]
+ )
+
+ # if the property is in a block of flats that eligible, but the property itself is not eligible, we flag this
+ # The criteria is:
+ # =The property should be in a block of flats
+
+ self.standardised_asset_list["cavity_reason"] = np.where(
+ self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin(eligible_blocks),
+ self.standardised_asset_list["cavity_reason"]
+ + " " + "(Flat in block with more than 50% eligible, but not eligible itself)",
+ self.standardised_asset_list["cavity_reason"]
+ )
+
+ self.block_analysis_df = block_analysis
@staticmethod
def split_full_name(x):
@@ -1970,6 +2348,8 @@ class AssetList:
sheet_name,
landlord_property_id,
phone_number_column=None,
+ secondary_phone_number_column=None,
+ secondary_contact_full_name=None,
email_column=None,
fullname_column=None,
firstname_column=None,
@@ -1979,6 +2359,8 @@ class AssetList:
self.contact_detail_fields = {
"landlord_property_id": landlord_property_id,
"phone_number": phone_number_column,
+ "secondary_phone_number": secondary_phone_number_column,
+ "secondary_contact_full_name": secondary_contact_full_name,
"email": email_column,
"fullname": fullname_column,
"firstname": firstname_column,
@@ -1986,7 +2368,8 @@ class AssetList:
}
details_colnames = [
- phone_number_column, email_column, fullname_column, firstname_column, lastname_column
+ phone_number_column, secondary_phone_number_column, email_column, fullname_column, firstname_column,
+ lastname_column
]
# We'll fill them
none_details = [x for x in details_colnames if x is None]
@@ -2007,68 +2390,113 @@ class AssetList:
*contact_details[fullname_column].apply(self.split_full_name)
)
else:
- raise NotImplementedError("Implement me")
+ contact_details["title"] = None
self.contact_details = contact_details
- def prepare_for_crm(self, company_domain, crm_pipeline_name, first_dealstage, assigned_surveyors):
+ @classmethod
+ def load_standardised_asset_list(cls, filepath, sheet_name, header):
"""
- This function prepares the data for upload into Hubspot
+ This function is designed to load the standardised asset list from a file
:return:
"""
# This is a placeholder for now
+ # instantiate the class
+ instance = cls(
+ local_filepath=filepath,
+ sheet_name=sheet_name,
+ address1_colname=cls.STANDARD_ADDRESS_1,
+ postcode_colname=cls.STANDARD_POSTCODE,
+ full_address_colname=cls.STANDARD_FULL_ADDRESS,
+ landlord_property_id=cls.STANDARD_LANDLORD_PROPERTY_ID,
+ full_address_cols_to_concat=[],
+ missing_postcodes_method=None,
+ address1_extraction_method=None,
+ landlord_year_built=cls.STANDARD_YEAR_BUILT,
+ landlord_uprn=cls.STANDARD_UPRN,
+ landlord_property_type=cls.STANDARD_PROPERTY_TYPE,
+ landlord_built_form=cls.STANDARD_BUILT_FORM,
+ landlord_wall_construction=cls.STANDARD_WALL_CONSTRUCTION,
+ landlord_roof_construction=cls.STANDARD_ROOF_CONSTRUCTION,
+ landlord_heating_system=cls.STANDARD_HEATING_SYSTEM,
+ landlord_existing_pv=cls.STANDARD_EXISTING_PV,
+ landlord_sap=cls.STANDARD_SAP,
+ landlord_block_reference=cls.STANDARD_BLOCK_REFERENCE,
+ phase=False,
+ header=header
+ )
+ return instance
+ def prepare_for_crm(self, company_domain, installer_name, reconcile_programme=False):
+ """
+ This function prepares the data for upload into Hubspot
+ :param company_domain: The company domain name to be used in the CRM
+ :param installer_name: The name of the installer to be used in the CRM
+ :param reconcile_programme: If True, will include all properties with a project code, regardless of status
+ :raises ValueError: If the installer name is not valid or if there are missing products
+ :return:
+ """
# This maps the opportunities as we reference them, to the product data as stored in Hubspot
- product_lookup_table = {
- "Non-Intrusive Data Showed Cavity Extraction": {
- "name": "Extract & Fill - ECO4", "id": 100307905778, "unit_price": 500
- },
- "Non-Intrusive Data Showed Empty Cavity": {
- "name": "Empty Cavity & Loft - ECO4", "id": 82733738177, "unit_price": 1000
- },
- "Non-Intrusive Data Showed Empty Cavity but all SAP scores allowed": {
- "name": "Empty Cavity & Loft - ECO4", "id": 82733738177, "unit_price": 1000
- },
- "Non-Intrusive Data Showed Cavity Extraction but all SAP scores allowed": {
- "name": "Extract & Fill - ECO4", "id": 100307905778, "unit_price": 500
- },
- "EPC Data Showed Empty Cavity": {
- "name": "Empty Cavity & Loft - ECO4", "id": 82733738177, "unit_price": 1000
- },
- "Solid Floor, Insulated, No Solar": {
- "name": "Solar PV - ECO4", "id": 82623589564, "unit_price": 1608
- },
- "Solid Floor, Insulated, Needs Loft": {
- "name": "Solar PV - ECO4", "id": 82623589564, "unit_price": 1608
- },
- "Other Floor, Insulated, No Solar": {
- "name": "Solar PV - ECO4", "id": 82623589564, "unit_price": 1608
- },
- "Other Floor, Insulated, Needs Loft": {
- "name": "Solar PV - ECO4", "id": 82623589564, "unit_price": 1608
- }
- }
+ if not hubspot_config.Installer.is_valid_value(installer_name):
+ raise ValueError(f"Installer name {installer_name} is not valid. Please check the installer name.")
+
# We check if all products are covered in the lookup table
- cavity_products = self.standardised_asset_list["cavity_reason"].unique()
- solar_products = self.standardised_asset_list["solar_reason"].unique()
- # Check if there any options not in out lookup table
- if (
- any(x for x in cavity_products if x not in product_lookup_table) or
- any(x for x in solar_products if x not in product_lookup_table)
- ):
- raise ValueError("We have products not referenced in the lookup table - check this")
+ cavity_products = self.standardised_asset_list["cavity_reason"].unique().tolist()
+ cavity_products = [x for x in cavity_products if not pd.isnull(x)]
+ solar_products = self.standardised_asset_list["solar_reason"].unique().tolist()
+ solar_products = [x for x in solar_products if not pd.isnull(x)]
+
+ product_map = {}
+ for identified_product in cavity_products + solar_products:
+ if pd.isnull(identified_product):
+ continue
+
+ matched_product = None
+ for product_prefix, crm_product in self.prefixes_to_products.items():
+ if identified_product.startswith(product_prefix):
+ matched_product = crm_product
+
+ product_map[identified_product] = matched_product
+
+ # For each cavity and solar product, we iterate through the prexies and map to the products
programme_data = self.standardised_asset_list.copy()
+ programme_data["domna_full_address"] = (
+ programme_data["domna_full_address"].str.replace(";", ", ", regex=False).str.replace(" ", "")
+ )
- # Exclusions - these are properties we won't treat for the moment
- product_exclusions = [
- "Other Floor, Insulated, No Solar",
- "Other Floor, Insulated, Needs Loft"
- ]
- if product_exclusions:
- logger.warning("Excluding products: %s", product_exclusions)
+ # Format the two date columns
+ programme_data["survey_date"] = pd.to_datetime(programme_data["survey_date"], errors="coerce")
+ programme_data[self.EPC_API_DATA_NAMES["inspection-date"]] = pd.to_datetime(
+ programme_data[self.EPC_API_DATA_NAMES["inspection-date"]],
+ errors="coerce"
+ )
+ # Convert to dd/mm/yyyy format
+ programme_data["survey_date"] = programme_data["survey_date"].dt.strftime("%d/%m/%Y")
+ programme_data[self.EPC_API_DATA_NAMES["inspection-date"]] = (
+ programme_data[self.EPC_API_DATA_NAMES["inspection-date"]].dt.strftime("%d/%m/%Y")
+ )
- programme_data = programme_data[programme_data["solar_reason"].isin(product_exclusions) == False]
+ # We take rows that have a survyor and a date for the survey
+ # We include properties under 2 circumstances:
+ # 1) The hubspot status is ready to be scheduled and there is an assigned surveyor and week for survey
+ # 2) The hubspot status is something else, meaning this has been included in an existing programme
+ # 3) reconcile programme is true, and therefore all proeprties with a project code will be included
+
+ if reconcile_programme:
+ programme_data = programme_data[~pd.isnull(programme_data["project_code"])]
+ else:
+ ready_to_be_scheduled = (
+ (
+ programme_data["hubspot_status"] == hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label
+ ) & (~pd.isnull(programme_data["survey_date"]))
+ )
+ # completed_works = (
+ # (programme_data["hubspot_status"] !=
+ # hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label) &
+ # (~pd.isnull(programme_data["hubspot_status"]))
+ # )
+ programme_data = programme_data[ready_to_be_scheduled]
# Merge on the contact details
programme_data = programme_data.merge(
@@ -2081,26 +2509,47 @@ class AssetList:
programme_data["Company Domain Name "] = company_domain
# Append the product data onto the programme data
programme_data["cavity_product"] = programme_data["cavity_reason"].map(
- lambda x: product_lookup_table.get(x, {"name": None})["name"]
+ lambda x: product_map.get(x, {"name": None})["name"]
)
programme_data["solar_product"] = programme_data["solar_reason"].map(
- lambda x: product_lookup_table.get(x, {"name": None})["name"]
+ lambda x: product_map.get(x, {"name": None})["name"]
)
- programme_data["domna_product"] = programme_data["solar_reason"].copy()
+ # We check if we have any missings
+ cavity_missing = pd.isnull(programme_data[~pd.isnull(programme_data["cavity_reason"])]["cavity_product"]).sum()
+ solar_missing = pd.isnull(programme_data[~pd.isnull(programme_data["solar_reason"])]["solar_product"]).sum()
+
+ if cavity_missing > 0 or solar_missing > 0:
+ raise ValueError(
+ f"We have {cavity_missing} cavity products and {solar_missing} solar products that are not "
+ "mapped to a product in the lookup table. Please check the mapping."
+ )
+
+ programme_data["domna_product"] = programme_data["solar_product"].copy()
programme_data["domna_product"] = np.where(
pd.isnull(programme_data["domna_product"]),
- programme_data["solar_product"],
+ programme_data["cavity_product"],
programme_data["domna_product"]
)
# We filter just on rows where we have a product
- programme_data = programme_data[
- ~pd.isnull(programme_data["domna_product"])
- ]
- programme_data = programme_data.drop(columns=["solar_product", "cavity_product"])
+ if reconcile_programme:
+ # We include historical works, which will include hisorical cavity so we set these as extraction (as
+ # this is the main work mix)
+ programme_data["domna_product"] = programme_data["domna_product"].fillna(
+ self.CRM_HISTORICAL_CAVITY_PRODUCT["name"]
+ )
+ else:
+ # We shouldn't have any missing products
+ programme_data = programme_data[
+ ~pd.isnull(programme_data["survey_date"])
+ ]
+
+ if pd.isnull(programme_data["domna_product"]).sum():
+ raise ValueError("Missing products")
+ programme_data = programme_data.drop(columns=["solar_product", "cavity_product"])
product_df = (
- pd.DataFrame(product_lookup_table).T[["name", "id", "unit_price"]]
+ pd.DataFrame(self.CRM_PRODUCTS).T[["name", "id", "unit_price"]]
.reset_index()
.rename(
columns={
@@ -2115,28 +2564,98 @@ class AssetList:
product_df['Quantity '] = 1
# Append on the product data
- programme_data = programme_data.merge(
- product_df,
- how="left",
- on="domna_product",
- )
+ programme_data = programme_data.merge(product_df, how="left", on="domna_product")
# Add in deal and pipeline information
- programme_data["dealname"] = programme_data[self.STANDARD_FULL_ADDRESS] + " : " + programme_data[
- "domna_product"]
- programme_data['Pipeline '] = crm_pipeline_name
- programme_data['Deal Stage '] = first_dealstage
+ programme_data["dealname"] = (
+ programme_data[self.STANDARD_FULL_ADDRESS] + " : " + programme_data["domna_product"]
+ )
+ programme_data['Pipeline '] = hubspot_config.CRM_PIPELINE_NAME
programme_data['Associations: Listing'] = "Property Owner"
- programme_data = programme_data.merge(
- assigned_surveyors.rename(
- columns={self.landlord_property_id: self.STANDARD_LANDLORD_PROPERTY_ID}
- ), how="left", on=self.STANDARD_LANDLORD_PROPERTY_ID
+ # We determine which column we should use for the UPRN
+ if self.STANDARD_UPRN not in programme_data.columns:
+ uprn_column = self.EPC_API_DATA_NAMES["uprn"]
+ # If we're working form the EPC, we don't have this information if the EPC is estimated
+ programme_data[uprn_column] = np.where(
+ programme_data["estimated"] == True, None, programme_data[uprn_column]
+ )
+ else:
+ # Use the value that has the most coverage
+ uprn_column = "hubspot_uprn"
+ programme_data[uprn_column] = programme_data[self.STANDARD_UPRN].fillna(
+ programme_data[self.EPC_API_DATA_NAMES["uprn"]]
+ )
+
+ # Add in some columns if we have them
+ date_of_inspections = (
+ "Non-Intrusives: Date of Inspection" if
+ "Non-Intrusives: Date of Inspection" in programme_data.columns else None
)
+ # Ammend the property type and built form columns
+ programme_data["hubspot_property_type"] = programme_data[self.STANDARD_PROPERTY_TYPE].copy()
+ programme_data["hubspot_built_form"] = programme_data[self.STANDARD_BUILT_FORM].copy()
+
+ def _replace_property_description_data(programme_data, column_name):
+ """
+ Helper function to replace property type or built form data with a specified value.
+ """
+
+ if column_name == "hubspot_property_type":
+ valid_values = ["house", "bungalow", "flat", "maisonette"]
+ epc_fill_col = "property-type"
+ elif column_name == "hubspot_built_form":
+ valid_values = ["detached", "semi-detached", "mid-terrace", "end-terrace"]
+ epc_fill_col = "built-form"
+ else:
+ raise ValueError(f"Invalid column name: {column_name}. Must be 'hubspot_property_type' or "
+ f"'hubspot_built_form'.")
+
+ # Any vakue that is not house, bungalow, flat or maisonette is set to None
+ programme_data[column_name] = np.where(
+ ~programme_data[column_name].isin(valid_values),
+ None,
+ programme_data[column_name]
+ )
+ # We fill with the EPC property type
+ programme_data[column_name] = np.where(
+ pd.isnull(programme_data[column_name]),
+ programme_data[self.EPC_API_DATA_NAMES[epc_fill_col]],
+ programme_data[column_name]
+ )
+
+ programme_data[column_name] = programme_data[column_name].fillna("unknown")
+
+ return programme_data
+
+ # Clean up the property type and built form columns
+ programme_data = _replace_property_description_data(programme_data, "hubspot_property_type")
+ programme_data = _replace_property_description_data(programme_data, "hubspot_built_form")
+
+ # We accomodate the old vs new inspections format
+ if "non-intrusives: WFT Findings" in programme_data.columns:
+ # We have the old format - we only have notes
+ non_intrusives_surveyor_notes = "non-intrusives: WFT Findings"
+ non_intrusives_construction = None
+ non_intrusives_insulated = None
+ non_intrusives_insulation_material = None
+ non_intrusives_ciga_check_required = None
+ non_intrusives_pv_access = None
+ non_intrusives_roof_orientation = None
+ non_intrusives_surveyor_name = None
+ else:
+ non_intrusives_surveyor_notes = 'non-intrusives: Any further surveyor notes'
+ non_intrusives_construction = "non-intrusives: Construction"
+ non_intrusives_insulated = "non-intrusives: Insulated"
+ non_intrusives_insulation_material = "non-intrusives: Material"
+ non_intrusives_ciga_check_required = 'non-intrusives: CIGA Check Required'
+ non_intrusives_pv_access = 'non-intrusives: PV, ACCESS ISSUE, SEE NOTES'
+ non_intrusives_roof_orientation = 'non-intrusives: OFF GAS - ROOF ORIENTATION'
+ non_intrusives_surveyor_name = 'non-intrusives: Surveyors Name'
+
# This maps the hubspot schema to the template. Anything that is not covered in this will be flagged
schema_mappings = {
- 'Name ': self.DOMNA_PROPERTY_ID, # TODO: Maybe change this?
'Company Domain Name ': 'Company Domain Name ',
'Email ': (
self.contact_detail_fields["email"] if self.contact_detail_fields["email"] else None
@@ -2150,49 +2669,42 @@ class AssetList:
'Phone ': (
self.contact_detail_fields["phone_number"] if self.contact_detail_fields["phone_number"] else None
), # TODO: Review
+ 'Secondary Phone ': (
+ self.contact_detail_fields["secondary_phone_number"] if
+ self.contact_detail_fields["secondary_phone_number"] else None
+ ),
+ "Secondary Contact Full Name ": (
+ self.contact_detail_fields["secondary_contact_full_name"] if
+ self.contact_detail_fields["secondary_contact_full_name"] else None
+ ),
'Full Address ': self.STANDARD_FULL_ADDRESS,
'Address 1 ': self.STANDARD_ADDRESS_1,
'Address 2 ': None, # TODO: Don't have this for the moment
'Postcode ': self.STANDARD_POSTCODE,
- 'Property Type ': self.STANDARD_PROPERTY_TYPE,
- 'Property Sub Type ': None, # TODO: Don't have this for the moment
+ 'Property Type ': "hubspot_property_type",
+ 'Property Sub Type ': "hubspot_built_form",
'Bedroom(s) ': None, # TODO: Don't have this for the moment
'Domna Property ID ': self.DOMNA_PROPERTY_ID,
- 'National UPRN ': (
- self.STANDARD_UPRN if self.STANDARD_UPRN is not None else self.EPC_API_DATA_NAMES["uprn"]
- ),
+ # We populate this with the column that we have
+ 'National UPRN ': uprn_column,
'Owner Property ID ': self.STANDARD_LANDLORD_PROPERTY_ID,
'Wall Construction ': self.STANDARD_WALL_CONSTRUCTION,
'Heating System ': self.STANDARD_HEATING_SYSTEM,
'Year Built ': self.STANDARD_YEAR_BUILT,
'Boiler Make ': None, # TODO: Don't have this for the moment
'Boiler Model ': None, # TODO: Don't have this for the moment
- 'Non-Intrusives: Date Checked ': None,
- # TODO: Don't have this for the moment
- 'Non-Intrusives: Wall Type ': (
- "non-intrusives: Construction" if self.non_intrusives_present else None
- ),
- 'Non-intrusives: Insulation ': (
- "non-intrusives: Insulated" if self.non_intrusives_present else None
- ),
- 'Non-intrusives: Insulation Material ': (
- "non-intrusives: Material" if self.non_intrusives_present else None
- ),
- 'Non-Intrusives: CIGA Check Required ': (
- 'non-intrusives: CIGA Check Required' if self.non_intrusives_present else None
- ),
- 'Non-Intrusives: PV Access Issues ': (
- 'non-intrusives: PV, ACCESS ISSUE, SEE NOTES' if self.non_intrusives_present else None
- ),
- 'Non-Intrusives: Roof Orientation ': (
- 'non-intrusives: OFF GAS - ROOF ORIENTATION' if self.non_intrusives_present else None
- ),
- 'Non-Intrusives: Surveyor Notes ': (
- 'non-intrusives: Any further surveyor notes' if self.non_intrusives_present else None
- ),
- 'Non-Intrusives: Surveyor Name ': (
- 'non-intrusives: Surveyors Name' if self.non_intrusives_present else None
- ),
+ 'Non-Intrusives: Date Checked ': date_of_inspections,
+ 'Non-Intrusives: Wall Type ': non_intrusives_construction,
+ 'Non-intrusives: Insulation ': non_intrusives_insulated,
+ 'Non-intrusives: Insulation Material ':
+ non_intrusives_insulation_material,
+ 'Non-Intrusives: CIGA Check Required ':
+ non_intrusives_ciga_check_required,
+ 'Non-Intrusives: PV Access Issues ': non_intrusives_pv_access,
+ 'Non-Intrusives: Roof Orientation ':
+ non_intrusives_roof_orientation,
+ 'Non-Intrusives: Surveyor Notes ': non_intrusives_surveyor_notes,
+ 'Non-Intrusives: Surveyor Name ': non_intrusives_surveyor_name,
'CIGA: Date Requested ': None, # TODO: Don't have this for the moment
'CIGA: Cavity Guarantee Found ': None,
'Last EPC: Is Estimated ': self.EPC_API_DATA_NAMES["estimated"],
@@ -2209,18 +2721,24 @@ class AssetList:
'Last EPC: Floor ': self.EPC_API_DATA_NAMES["floor-description"],
'Last EPC: Room Height ': self.EPC_API_DATA_NAMES["floor-height"],
'Last EPC: Age Band ': self.EPC_API_DATA_NAMES["construction-age-band"],
- 'Deal Stage ': 'Deal Stage ',
'Pipeline ': 'Pipeline ',
- 'Expected Commencement Date ': None, # TODO: Need to set this,
+ 'Expected Commencement Date ': "survey_date",
'Deal Name ': "dealname", # Need to create this,
'Product ID ': 'Product ID ',
'Name ': 'Name ',
'Unit price ': 'Unit price ',
'Quantity ': 'Quantity ',
- 'Deal Owner': 'surveyor_email',
- 'Amount ': 'Unit price ',
+ 'Deal Owner': 'surveyor',
+ 'Project Code ': 'project_code',
+ 'Associations: Listing': 'Associations: Listing',
+ 'Deal Stage ': "hubspot_status",
}
+ # We sometimes columns if the landlord never provided them
+ missed_mapping_cols = [c for c in schema_mappings.values() if c not in programme_data.columns if c is not None]
+ for c in missed_mapping_cols:
+ programme_data[c] = None
+
# We now create the finalised dataset to be uploaded into Hubspot
variables_required = list(schema_mappings.values())
variables_required = [v for v in variables_required if v is not None]
@@ -2235,6 +2753,27 @@ class AssetList:
columns={v: k for k, v in schema_mappings.items() if v is not None}
)
+ programme_data['Installer '] = installer_name
+ programme_data['Name '] = (
+ programme_data['Full Address '] + " ," + programme_data['Postcode ']
+ )
+ # The listing owner email is the same as the surveyor email (deal owner), so they can see the listing
+ programme_data['Listing Owner Email '] = programme_data['Deal Owner']
+ programme_data['Amount '] = 0
+ programme_data["Deal Owner"] = np.where(
+ ~pd.isnull(programme_data["Deal Owner"]),
+ programme_data["Deal Owner"].astype(str).str.lower(),
+ programme_data["Deal Owner"]
+ )
+
+ # We make sure we have all of the columns that we need
+ missed_columns = [c for c in hubspot_config.CRM_UPLOAD_COLUMNS if c not in programme_data.columns]
+ if missed_columns:
+ raise ValueError(
+ f"We have the following columns that are not in the programme data: {missed_columns}. "
+ "Please check the mapping and ensure all required columns are present."
+ )
+
self.hubspot_data = programme_data
def flag_ecosurv(self, ecosurv_landlords=None, landlords_to_ignore=None):
@@ -2324,13 +2863,63 @@ class AssetList:
logger.info("Matched %s properties to ecosurv data", len(matched))
logger.info("%s properties in Ecosurv remain unmatched", len(unmatched))
- # We now match
+ if not matched:
+ return
+
+ # We now match
matched = pd.DataFrame(matched)
# We'll possibly have duplicates here, where properties have been sold twice. Ww de-dupe
if matched[self.STANDARD_LANDLORD_PROPERTY_ID].duplicated().sum():
# It doesn't matter too much which record we take
matched = matched.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID])
+ # We merge on the status of the property
+ matched = matched.merge(
+ self.ecosurv[["Reference", "Status", "Lead Status", "Tags"]].rename(
+ columns={
+ "Reference": "ecosurv_reference",
+ "Status": "ecosurv_status",
+ "Lead Status": "ecosurv_lead_status",
+ "Tags": "ecosurv_tags",
+ "Installer": "ecosurv_installer"
+ }
+ ), how="left", on="ecosurv_reference"
+ )
+
+ matched["ecosurv_install_status"] = hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER
+
+ # This mapping is ordered by process order, where lodgment is the final step so if we have an indication
+ # that the property is ready for lodgement, we set the status to that. We then proceed through the other
+ # statuses where the penultimate status is install complete
+ mapping = {
+ "Cancelled": hubspot_config.HubspotProcessStatus.INSTALLER_CANCELLED_FINALIZED,
+ "TrustMark: Lodged": hubspot_config.HubspotProcessStatus.LODGEMENT_COMPLETE,
+ "Retrofit: Complete": hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE,
+ "Retrofit: Awaiting TrustMark": hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE,
+ "Retrofit: Awaiting post checks": hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE,
+ "Installer Notification Sent": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
+ "Submitted to RC": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
+ "COONEY": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
+ "Signed off for install": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
+ "Retrofit: Signed off for install": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
+ "Audit": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
+ "Accepted": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
+ "Sold": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER
+ }
+
+ def get_max_status(tag_str):
+ if pd.isna(tag_str):
+ return None
+ matched_statuses = []
+ for tag, status in mapping.items():
+ if tag in tag_str:
+ matched_statuses.append(status)
+ if not matched_statuses:
+ return None
+ return max(matched_statuses).label
+
+ matched["ecosurv_install_status"] = matched["ecosurv_tags"].apply(get_max_status)
+
self.standardised_asset_list = self.standardised_asset_list.merge(
matched,
how="left",
@@ -2380,7 +2969,7 @@ class AssetList:
# Perform the remap
outcomes["Outcome"] = outcomes["Notes / Outcomes"].map(remap_dictionary)
- outcomes["Outcome"] = outcomes["Outcome"].str.lower()
+ outcomes["Outcome"] = outcomes["Outcome"].str.lower().str.strip()
logger.info("Matching outcomes to asset list")
# Merge the outcomes onto the asset list - we check we're able to match sufficiently well
@@ -2507,7 +3096,7 @@ class AssetList:
else:
raise NotImplementedError("Invalid date in outcomes - implement me")
- notes_col = "Notes" if "Notes" in outcomes.columns else "Notes / Outcomes"
+ notes_col = "Notes" if "Notes" in self.outcomes.columns else "Notes / Outcomes"
lookup = lookup.merge(
self.outcomes[["row_id", "Outcome", notes_col, date_col]], how="left", on="row_id"
@@ -2542,12 +3131,13 @@ class AssetList:
apply(get_latest_note).
reset_index(drop=True)
)
- latest_note = latest_note[["domna_property_id", notes_col]]
+ latest_note = latest_note[["domna_property_id", notes_col, "Outcome"]].rename(
+ columns={"Notes": "latest_outcome_note", "Outcome": "latest_outcome"}
+ )
pivot_df = lookup.groupby(["domna_property_id", "Outcome"]).size().unstack(fill_value=0).reset_index()
- pivot_df = pivot_df.merge(
- visit_counts, how="left", on="domna_property_id"
- )
+ pivot_df = pivot_df.merge(visit_counts, how="left", on="domna_property_id")
+ pivot_df = pivot_df.merge(latest_note, how="left", on="domna_property_id")
# We want the latest note
@@ -2558,15 +3148,32 @@ class AssetList:
self.outcomes["matched_to_asset_list"] = self.outcomes["row_id"].isin(lookup["row_id"].values)
self.outcomes = self.outcomes.merge(lookup[["row_id", "domna_property_id"]], how="left", on="row_id")
+ # We flag the outcome status, based on the outcome
+ pivot_df["outcome_status"] = None
+
+ if "surveyed" in pivot_df.columns:
+ pivot_df["outcome_status"] = np.where(
+ pivot_df["surveyed"] > 0, hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label,
+ pivot_df["outcome_status"]
+ )
+
+ if "installer refusal" in pivot_df.columns:
+ pivot_df["outcome_status"] = np.where(
+ pivot_df["installer refusal"] > 0, hubspot_config.HubspotProcessStatus.NOT_VIABLE.label,
+ pivot_df["outcome_status"]
+ )
+
+ pivot_df["outcome_status"] = np.where(
+ pivot_df["latest_outcome"].isin(["see notes"]) &
+ (pivot_df["outcome_status"] != hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label),
+ hubspot_config.HubspotProcessStatus.SURVEYED_NO_ACCESS_NEEDS_SIGN_OFF.label,
+ pivot_df["outcome_status"]
+ )
+
# We merge out pivoted outcomes onto the asset list
self.standardised_asset_list = self.standardised_asset_list.merge(
pivot_df, how="left", left_on=self.DOMNA_PROPERTY_ID, right_on="domna_property_id"
)
- # Merge the latest note
- self.standardised_asset_list = self.standardised_asset_list.merge(
- latest_note.rename(columns={notes_col: "Latest Route March Note"}),
- how="left", left_on=self.DOMNA_PROPERTY_ID, right_on="domna_property_id"
- )
if self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated().sum():
raise ValueError("Duplicates appreared - something went wrong")
@@ -2576,6 +3183,7 @@ class AssetList:
def flag_survey_master(
self,
master_filepaths,
+ master_id_colnames,
master_to_asset_list_filepath=None
):
# TODO: This probably needs further expansion
@@ -2591,7 +3199,7 @@ class AssetList:
logger.info("Getting masters and merging onto asset list")
master_surveyed = []
unmatched_submissions = []
- for filepath in master_filepaths:
+ for idx, filepath in enumerate(master_filepaths):
master_data = pd.read_csv(filepath)
# Strip columns
master_data.columns = [c.strip() for c in master_data.columns]
@@ -2618,22 +3226,6 @@ class AssetList:
"SUBMISSION DATE" if "SUBMISSION DATE" in master_data.columns else "SUBMISSION DATE TO INSTALLERS"
)
- # if "UPRN" in master_data.columns:
- # # We just need to check if any were cancelled
- # master_to_append = master_data[
- # ["UPRN", install_col, submission_col]
- # ].rename(
- # columns={
- # "UPRN": self.STANDARD_LANDLORD_PROPERTY_ID,
- # install_col: "survey_status",
- # submission_col: "submission_date"
- # }
- # )
- # master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel")
- #
- # master_surveyed.append(master_to_append)
- # continue
-
master_data["row_id"] = master_data.index
self.standardised_asset_list["house_no"] = self.standardised_asset_list.apply(
@@ -2643,21 +3235,33 @@ class AssetList:
axis=1
)
- scheme_col = (
- "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" if
- "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" in master_data.columns else "AFFORDABLE WARMTH"
- )
- postcode_col = "POSTCODE" if "POSTCODE" in master_data.columns else "Post Code"
- house_no_col = 'NO.' if 'NO.' in master_data.columns else "NO"
- property_type_col = (
- "PROPERTY TYPE As per table emailed" if
- "PROPERTY TYPE As per table emailed" in
- master_data.columns else "PROPERTY TYPE As per table emailed"
- )
- measure_mix_col = "MEASURE COMBO"
+ if "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" in master_data.columns:
+ scheme_col = "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION"
+ elif "AFFORDABLE WARMTH" in master_data.columns:
+ scheme_col = "AFFORDABLE WARMTH"
+ else:
+ scheme_col = "OFFICE USE ONLY"
+
+ postcode_col = "POSTCODE" if "POSTCODE" in master_data.columns else "Post Code"
+ if 'NO.' in master_data.columns:
+ house_no_col = 'NO.'
+ elif "NO" in master_data.columns:
+ house_no_col = 'NO'
+ else:
+ house_no_col = "NUMBER"
+
+ if "PROPERTY TYPE As per table emailed" in master_data.columns:
+ property_type_col = "PROPERTY TYPE As per table emailed"
+ elif "PROPERTY TYPE As per table emailed" in master_data.columns:
+ property_type_col = "PROPERTY TYPE As per table emailed"
+ else:
+ property_type_col = "PROPERTY TYPE (SEE DEEMED SCORES SHEET) Eg. 3W_Flat_1 (As per Matrix)"
+
+ measure_mix_col = "MEASURE COMBO"
+ installer_notes_col = "INSTALLERS NOTES ; REASONS FOR CANCELLATIONS"
+ installer_col = "INSTALLER"
+ town_colname = "TOWN" if "TOWN" in master_data.columns else 'Town/Area'
- # Otherwise, we need to match algorithmically
- has_property_id = "UPRN" in master_data.columns
logger.info("Matching master data to asset list")
matched = []
unmatched = []
@@ -2670,13 +3274,22 @@ class AssetList:
if pd.isnull(row[postcode_col]):
continue
- # if has_property_id:
- # submission_uprn = row["UPRN"]
- #
- # if not pd.isnull(submission_uprn):
- # df = self.standardised_asset_list[
- # self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] == submission_uprn
- # ]
+ if master_id_colnames[idx] is not None:
+ # Filter the standardised asset list on this
+ df = self.standardised_asset_list[
+ self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] == row[master_id_colnames[idx]]
+ ]
+ if df.shape[0] == 1:
+ matched.append(
+ {
+ "row_id": row["row_id"],
+ "original_house_no": original_house_no,
+ "original_street": original_street,
+ "original_postcode": original_postcode,
+ self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0],
+ }
+ )
+ continue
postcode_no_space = row[postcode_col].strip().replace(" ", "").lower()
@@ -2721,6 +3334,7 @@ class AssetList:
self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0],
}
)
+ continue
if house_no in df["house_no"].values:
df = df[df["house_no"] == house_no]
@@ -2736,7 +3350,8 @@ class AssetList:
df = df[
df[self.STANDARD_FULL_ADDRESS].str.lower().apply(
lambda x: process.extractOne(
- " ".join([row[house_no_col], row["Street / Block Name"], row["TOWN"]]).lower(),
+ " ".join(
+ [row[house_no_col], row["Street / Block Name"], row[town_colname]]).lower(),
x
)[1]
) > 90
@@ -2781,18 +3396,31 @@ class AssetList:
self.standardised_asset_list = self.standardised_asset_list.drop(columns="house_no")
# We match the "UPRN" which is the landlords ID, onto the master sheet
+
+ if measure_mix_col not in master_data.columns:
+ master_data[measure_mix_col] = "Measure mix not recorded"
+
matched = pd.DataFrame(matched)
- master_to_append = master_data[[scheme_col, "row_id", install_col, submission_col, measure_mix_col]].merge(
+ master_to_append = master_data[
+ [scheme_col, "row_id", install_col, submission_col, measure_mix_col, installer_notes_col, installer_col]
+ ].merge(
matched, how="left", on="row_id"
).rename(
columns={
scheme_col: "funding_scheme",
measure_mix_col: "measure_mix",
install_col: "survey_status",
- submission_col: "submission_date"
+ submission_col: "submission_date",
+ installer_notes_col: "submission_installer_notes",
+ installer_col: "submission_installer"
}
)
- master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel")
+ master_to_append["submission_cancelled"] = (
+ master_to_append["survey_status"].str.lower().str.contains("cancel")
+ )
+ master_to_append["submission_installed"] = (
+ master_to_append["survey_status"].str.lower().str.contains("installed")
+ )
master_surveyed.append(master_to_append)
unmatched_df = master_data[
master_data["row_id"].isin(unmatched)
@@ -2828,7 +3456,21 @@ class AssetList:
].astype(str)
# We de-dupe crudely on landlord property id
- self.master_surveyed = master_surveyed.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID])
+ self.master_surveyed = master_surveyed.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID]).copy()
+
+ # We now add the submission status, based on the hubspot stages
+ self.master_surveyed["submission_status"] = hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER.label
+ self.master_surveyed["submission_status"] = np.where(
+ self.master_surveyed["submission_cancelled"] == True,
+ hubspot_config.HubspotProcessStatus.INSTALLER_CANCELLED_FINALIZED.label,
+ self.master_surveyed["submission_status"]
+ )
+
+ self.master_surveyed["submission_status"] = np.where(
+ self.master_surveyed["submission_installed"] == True,
+ hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE.label,
+ self.master_surveyed["submission_status"]
+ )
self.standardised_asset_list = self.standardised_asset_list.merge(
self.master_surveyed, how="left", on=self.STANDARD_LANDLORD_PROPERTY_ID
diff --git a/asset_list/app.py b/asset_list/app.py
index bb898c09..7c0023ce 100644
--- a/asset_list/app.py
+++ b/asset_list/app.py
@@ -2,8 +2,6 @@ import os
import json
import pandas as pd
from pprint import pprint
-import msgpack
-from utils.s3 import read_from_s3
from asset_list.AssetList import AssetList
from asset_list.mappings.property_type import PROPERTY_MAPPING
from asset_list.mappings.built_form import BUILT_FORM_MAPPINGS
@@ -62,98 +60,227 @@ def app():
Property UPRN
"""
- # Thurrock
- data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thurrock"
- data_filename = "THURROCK COUNCIL - For analysis.xlsx"
- sheet_name = "Assets"
+ # NCHA
+ data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA"
+ data_filename = "Energy Information MASTER June 2025.xlsx"
+ sheet_name = "Data"
postcode_column = 'Postcode'
- fulladdress_column = "Full Address"
+ fulladdress_column = "Address"
address1_column = None
address1_method = "house_number_extraction"
address_cols_to_concat = []
missing_postcodes_method = None
- landlord_year_built = "Construction Date"
+ landlord_year_built = "Build Date (HAR10)"
landlord_os_uprn = None
- landlord_property_type = "Property Type"
- landlord_built_form = "Property Subtype"
- landlord_wall_construction = None
+ landlord_property_type = "Property Type (HAR10)"
+ landlord_built_form = "Build Form (EPC)"
+ landlord_wall_construction = "Wall Description"
landlord_roof_construction = None
- landlord_heating_system = "Main Heating Type"
+ landlord_heating_system = "HEAT Code"
landlord_existing_pv = None
- landlord_property_id = "Property Reference"
- landlord_sap = None
- outcomes_filename = []
- outcomes_sheetname = []
- outcomes_postcode = []
- outcomes_houseno = []
- outcomes_id = []
- outcomes_address = []
+ landlord_property_id = "Place ref"
+ landlord_sap = "EPC SAP"
+ outcomes_filename = None
+ outcomes_sheetname = None
+ outcomes_postcode = None
+ outcomes_houseno = None
+ outcomes_id = None
+ outcomes_address = None
master_filepaths = []
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = None
+ asset_list_header = 0
+ landlord_block_reference = None
+ master_id_colnames = []
- # Medway
- data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway"
- data_filename = "MEDWAY Asset List.xlsx"
- sheet_name = "Asset list"
- postcode_column = 'Postcode'
- fulladdress_column = None
- address1_column = "House Number"
- address1_method = None
- address_cols_to_concat = ["House Number", "Street 1"]
- missing_postcodes_method = None
- landlord_year_built = "Year Built"
- landlord_os_uprn = None
- landlord_property_type = "Property Type - Academy"
- landlord_built_form = "Property Type - Academy"
- landlord_wall_construction = None
- landlord_roof_construction = None
- landlord_heating_system = None
- landlord_existing_pv = None
- landlord_property_id = "Row ID"
- landlord_sap = None
- outcomes_filename = []
- outcomes_sheetname = []
- outcomes_postcode = []
- outcomes_houseno = []
- outcomes_id = []
- outcomes_address = []
- master_filepaths = []
- master_to_asset_list_filepath = None
- phase = False
- ecosurv_landlords = None
+ # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Calico"
+ # data_filename = "07.04 CALICO - Final List.xlsx"
+ # asset_list_header = 2
+ # sheet_name = "Final List"
+ # postcode_column = 'Postcode'
+ # fulladdress_column = None
+ # address1_column = "Property Number / Name"
+ # address1_method = None
+ # address_cols_to_concat = [
+ # "Property Number / Name",
+ # "Street",
+ # "Town"
+ # ]
+ # missing_postcodes_method = None
+ # landlord_year_built = "NROSH Estimated Build Date"
+ # landlord_os_uprn = None
+ # landlord_property_type = "Asset Type"
+ # landlord_built_form = None
+ # landlord_wall_construction = "Wall Type"
+ # landlord_heating_system = "Boiler Type"
+ # landlord_existing_pv = None
+ # landlord_property_id = "Asset Reference"
+ # outcomes_filename = []
+ # outcomes_sheetname = []
+ # outcomes_postcode = []
+ # outcomes_houseno = []
+ # outcomes_id = []
+ # outcomes_address = []
+ # master_filepaths = []
+ # master_id_colnames = []
+ # master_to_asset_list_filepath = None
+ # landlord_roof_construction = None
+ # landlord_block_reference = None
+ # landlord_sap = "Current Efficiency Rating - Score"
+ # phase = None
+ # ecosurv_landlords = None
- # MHS
- data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS"
- data_filename = "MHS HOMES (Full Asset List) - for programme build.xlsx"
- sheet_name = "Sheet1"
- postcode_column = 'Postcode'
- fulladdress_column = "FullAddress"
- address1_column = None
- address1_method = "house_number_extraction"
- address_cols_to_concat = []
- missing_postcodes_method = None
- landlord_year_built = "BuiltInYear"
- landlord_os_uprn = None
- landlord_property_type = "AssetType"
- landlord_built_form = "PropertyType"
- landlord_wall_construction = None
- landlord_roof_construction = None
- landlord_heating_system = None
- landlord_existing_pv = None
- landlord_property_id = "UPRN"
- landlord_sap = None
- outcomes_filename = []
- outcomes_sheetname = []
- outcomes_postcode = []
- outcomes_houseno = []
- outcomes_id = []
- outcomes_address = []
- master_filepaths = []
- master_to_asset_list_filepath = None
- phase = False
- ecosurv_landlords = None
+ # data_folder = (
+ # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/2018 Asset
+ # List"
+ # )
+ # data_filename = "LIVEWEST STOCK - 23rd October 2018.xlsx"
+ # sheet_name = "Assets"
+ # postcode_column = 'Postcode'
+ # fulladdress_column = "Address"
+ # address1_column = None
+ # address1_method = "house_number_extraction"
+ # address_cols_to_concat = []
+ # missing_postcodes_method = None
+ # landlord_year_built = "Build Year"
+ # landlord_os_uprn = None
+ # landlord_property_type = "Property Archetype"
+ # landlord_built_form = None
+ # landlord_wall_construction = None
+ # landlord_heating_system = "Heating Fuel Type"
+ # landlord_existing_pv = None
+ # landlord_property_id = "Uprn - DO NOT DELETE"
+ # outcomes_filename = [
+ # os.path.join(data_folder, "RT - LiveWest.xlsx")
+ # ]
+ # outcomes_sheetname = ["Feedback"]
+ # outcomes_postcode = ["Poscode"]
+ # outcomes_houseno = ["No."]
+ # outcomes_id = ["UPRN"]
+ # outcomes_address = ["Address"]
+ # master_filepaths = [
+ # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling
+ # Master "
+ # "- redacted for analysis/CAVITY-Table 1.csv"
+ # ]
+ # master_id_colnames = [None]
+ # master_to_asset_list_filepath = None
+ # landlord_roof_construction = None
+ # landlord_block_reference = None
+ # landlord_sap = None
+ # phase = None
+ # ecosurv_landlords = "livewest|live west"
+
+ # data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March "
+ # "2025/Livewest Asset List (Original) - csv")
+ # data_filename = "Report-Table 1.csv"
+ # sheet_name = None
+ # postcode_column = 'Postcode'
+ # fulladdress_column = "T1_Address"
+ # address1_column = None
+ # address1_method = "house_number_extraction"
+ # address_cols_to_concat = []
+ # missing_postcodes_method = None
+ # landlord_year_built = "Build Yr"
+ # landlord_os_uprn = None
+ # landlord_property_type = "T1_AssetType"
+ # landlord_built_form = "T1_AssetType"
+ # landlord_wall_construction = "Wall Type Cavity"
+ # landlord_heating_system = "Heating Fuel"
+ # landlord_existing_pv = None
+ # landlord_property_id = "T1_UPRN"
+ # outcomes_filename = [
+ # os.path.join(data_folder, "RT - LiveWest.xlsx")
+ # ]
+ # outcomes_address = ["Address"]
+ # outcomes_sheetname = ["Feedback"]
+ # outcomes_postcode = ["Poscode"]
+ # outcomes_houseno = ["No."]
+ # outcomes_id = ["UPRN"]
+ # master_filepaths = [
+ # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling
+ # Master "
+ # "- redacted for analysis/CAVITY-Table 1.csv"
+ # ]
+ # master_id_colnames = [None]
+ # master_to_asset_list_filepath = None
+ # landlord_roof_construction = None
+ # landlord_block_reference = None
+ # landlord_sap = None
+ # phase = None
+ # ecosurv_landlords = "livewest|live west"
+
+ # Stori
+ # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Storicymru"
+ # data_filename = "Asset list - for analysis.xlsx"
+ # sheet_name = "SAP and Costs Calculations"
+ # postcode_column = 'Postcode'
+ # fulladdress_column = "Address1"
+ # address1_column = None
+ # address1_method = "house_number_extraction"
+ # address_cols_to_concat = []
+ # missing_postcodes_method = None
+ # landlord_year_built = "Age"
+ # landlord_os_uprn = None
+ # landlord_property_type = "TYPE"
+ # landlord_built_form = "AGE / DETACHMENT"
+ # landlord_wall_construction = "WALL"
+ # landlord_roof_construction = "LOFT INSULATION"
+ # landlord_heating_system = "BOILER"
+ # landlord_existing_pv = "SOLAR PV"
+ # landlord_property_id = "UPRN"
+ # landlord_sap = "Current SAP Rating"
+ # landlord_block_reference = None
+ # outcomes_filename = []
+ # outcomes_sheetname = []
+ # outcomes_postcode = []
+ # outcomes_houseno = []
+ # outcomes_id = []
+ # outcomes_address = []
+ # master_filepaths = []
+ # master_to_asset_list_filepath = None
+ # master_id_colnames = []
+ # phase = False
+ # ecosurv_landlords = None
+
+ # Thrive - reconciliation
+ # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation"
+ # data_filename = "Thrive Asset List - Complete - Updated May 2025.xlsx"
+ # sheet_name = "Sheet1"
+ # postcode_column = 'postcode'
+ # fulladdress_column = "full_address"
+ # address1_column = "address_line_1"
+ # address1_method = None
+ # address_cols_to_concat = []
+ # missing_postcodes_method = None
+ # landlord_year_built = "age_band_calculated"
+ # landlord_os_uprn = None
+ # landlord_property_type = "property_type"
+ # landlord_built_form = "build_form"
+ # landlord_wall_construction = None
+ # landlord_roof_construction = "assumed_loft_insulation_thickness_updated"
+ # landlord_heating_system = "heating_type_updated"
+ # landlord_existing_pv = None
+ # landlord_property_id = "thrive_property_id"
+ # landlord_sap = "sap_rating_updated"
+ # landlord_block_reference = "block_reference"
+ # outcomes_filename = [
+ # os.path.join(data_folder, "Thrive - Outcomes - April 24-March25 - Corrected.xlsx")
+ # ]
+ # outcomes_sheetname = ["Sheet1"]
+ # outcomes_postcode = ["postcode"]
+ # outcomes_houseno = ["No."]
+ # outcomes_id = ["thrive_property_id"]
+ # outcomes_address = ["address"]
+ # master_filepaths = [
+ # os.path.join(data_folder, "Thrive Submissions ECO3 - with IDS.csv"),
+ # os.path.join(data_folder, "Thrive Submissions ECO4 - with IDS.csv"),
+ # ]
+ # master_to_asset_list_filepath = None
+ # master_id_colnames = ["thrive_property_id", "thrive_property_id"]
+ # phase = False
+ # ecosurv_landlords = "thrive"
# Southern Midlands
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/Midlands Properties - Apr 2025"
@@ -182,40 +309,12 @@ def app():
# master_filepaths = []
# master_to_asset_list_filepath = None
- data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/North-West"
- data_filename = "Places for People NORTH WEST - INSPECTIONS MASTER - UPDATE.xlsx"
- sheet_name = "CHECKED"
- postcode_column = 'Postcode'
- fulladdress_column = None
- address1_column = "AddressLine1"
- address1_method = None
- address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
- missing_postcodes_method = None
- landlord_year_built = None
- landlord_os_uprn = None
- landlord_property_type = "Archetype (PFP)"
- landlord_built_form = "Archetype (PFP)"
- landlord_wall_construction = None
- landlord_roof_construction = None
- landlord_heating_system = None
- landlord_existing_pv = None
- landlord_property_id = "Uprn"
- outcomes_filename = None
- outcomes_sheetname = None
- outcomes_postcode = None
- outcomes_houseno = None
- outcomes_id = None
- master_filepaths = []
- master_to_asset_list_filepath = None
- landlord_sap = None
- phase = None
-
# Maps addresses to uprn in problematic cases
manual_uprn_map = {}
asset_list = AssetList(
local_filepath=os.path.join(data_folder, data_filename),
- header=0,
+ header=asset_list_header,
sheet_name=sheet_name,
address1_colname=address1_column,
postcode_colname=postcode_column,
@@ -233,6 +332,7 @@ def app():
landlord_heating_system=landlord_heating_system,
landlord_existing_pv=landlord_existing_pv,
landlord_sap=landlord_sap,
+ landlord_block_reference=landlord_block_reference,
phase=phase
)
asset_list.init_standardise()
@@ -294,7 +394,8 @@ def app():
asset_list.flag_survey_master(
master_filepaths=master_filepaths,
- master_to_asset_list_filepath=master_to_asset_list_filepath
+ master_to_asset_list_filepath=master_to_asset_list_filepath,
+ master_id_colnames=master_id_colnames,
)
asset_list.flag_ecosurv(ecosurv_landlords)
@@ -306,7 +407,7 @@ def app():
epc_api_only = False
force_retrieve_data = False
skip = None # Used to skip already completed chunks
- chunk_size = 5000
+ chunk_size = 2000
filename = "Chunk {i}.csv"
download_folder = os.path.join(data_folder, "Chunks")
if not os.path.exists(download_folder):
@@ -486,59 +587,12 @@ def app():
)
asset_list.merge_data(epc_df)
-
asset_list.extract_attributes()
+ asset_list.identify_worktypes()
- cleaned = read_from_s3(
- s3_file_name="cleaned_epc_data/cleaned.bson",
- bucket_name="retrofit-data-dev"
- )
- cleaned = msgpack.unpackb(cleaned, raw=False)
-
- asset_list.identify_worktypes(cleaned)
-
- pprint(asset_list.work_type_figures)
-
- asset_list.flat_analysis()
-
- asset_list.load_contact_details(
- local_filepath=os.path.join(data_folder, "Full property list wth D&V report V look up 12.2.25.xlsx"),
- sheet_name="Report 1",
- landlord_property_id=asset_list.landlord_property_id,
- phone_number_column='Property Current Tel. Number',
- fullname_column='Proeprty Current Occupant',
- firstname_column=None,
- lastname_column=None,
- email_column=None, # TODO - we need this
- )
-
- # Convert to a format suitable for CRM
- # TODO: TEMP
- assigned_surveyors = pd.DataFrame(
- [
- {
- asset_list.landlord_property_id: "02610001",
- "week_commencing": "10/10/2025",
- "surveyor_name": "Khalim Conn-Kowlessar",
- "surveyor_email": "khalim@domna.homes",
- }
- ]
- )
-
- # TODO: Sort the output by postcode
-
- company_domain = "ealing.gov.uk"
- crm_pipeline_name = "Survey Management"
- first_dealstage = "READY TO BEGIN SCHEDULING"
- # TODO - temp, upload to either SharePoint or AWS
-
- asset_list.prepare_for_crm(
- assigned_surveyors=assigned_surveyors,
- company_domain=company_domain,
- crm_pipeline_name=crm_pipeline_name,
- first_dealstage=first_dealstage
- )
- hubspot_data = asset_list.hubspot_data
+ # We now flag the status of the property
+ asset_list.label_property_status()
+ asset_list.analyse_geographies()
# Store as an excel
filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx"
@@ -546,7 +600,8 @@ def app():
with pd.ExcelWriter(filename) as writer:
asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
- asset_list.flat_data.to_excel(writer, sheet_name="Flat Data", index=False)
+ if asset_list.block_analysis_df is not None:
+ asset_list.block_analysis_df.to_excel(writer, sheet_name="Block Analysis", index=False)
# If we have outcomes, we add a tab with the outcomes
if not asset_list.outcomes_for_output.empty:
asset_list.outcomes_for_output.to_excel(writer, sheet_name="Outcomes", index=False)
@@ -560,5 +615,5 @@ def app():
if not asset_list.ecosurv_no_match.empty:
asset_list.ecosurv_no_match.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False)
- # Store the Hubspot export as a csv
- hubspot_data.to_csv(os.path.join(data_folder, "Hubspot Export.csv"), index=False)
+ if not asset_list.geographical_areas.empty:
+ asset_list.geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False)
diff --git a/asset_list/hubspot/config.py b/asset_list/hubspot/config.py
new file mode 100644
index 00000000..5110fb5f
--- /dev/null
+++ b/asset_list/hubspot/config.py
@@ -0,0 +1,85 @@
+from enum import IntEnum, Enum
+
+CRM_PIPELINE_NAME = 'Operations - Housing Associations'
+
+
+class HubspotProcessStatus(IntEnum):
+ def __new__(cls, value, label):
+ obj = int.__new__(cls, value)
+ obj._value_ = value
+ obj.label = label
+ return obj
+
+ # the numerical values of this enum aren't important, but they define the order of operations
+
+ # This is the first stage, where a survey is ready to go
+ READY_TO_BE_SCHEDULED = 1, "READY TO BE SCHEDULED"
+ # The property didn't get access and needs sign off
+ SURVEYED_NO_ACCESS_NEEDS_SIGN_OFF = 2, "SURVEYED - NO ACCESS - NEED SIGN OFF"
+ # The survey has been completed. We don't have any update as to whether the property has been installed
+ SURVEYED_COMPLETED_SIGNED_OFF = 3, "SURVEYED - COMPLETED - SIGNED OFF"
+ # The property turned out to be ineligibile
+ NOT_VIABLE = 4, "NOT VIABLE"
+ # The property is with the installer. This will likely be the default for historic programmes
+ SUBMITTED_TO_INSTALLER = 5, "SUBMITTED TO INSTALLER"
+ # The property has been installed
+ INSTALL_COMPLETE = 6, "INSTALL COMPLETE"
+ # The install has complete and lodgement is complete
+ LODGEMENT_COMPLETE = 7, "LODGEMENT COMPLETE"
+ # The property has been cancelled
+ INSTALLER_CANCELLED_FINALIZED = 8, "INSTALLER CANCELLED - FINALIZED"
+
+
+class Installer(Enum):
+ SCIS = "SCIS"
+ JJ_CRUMP = "J & J CRUMP"
+ SGEC = "SGEC"
+
+ @classmethod
+ def is_valid_value(cls, value):
+ """
+ Check if the value is a valid installer.
+ """
+ return value in cls._value2member_map_
+
+
+CRM_UPLOAD_COLUMNS = [
+ 'Name ', 'Associations: Listing', 'Company Domain Name ',
+ 'Email ', 'First Name ', 'Last Name ',
+ 'Phone ', 'Secondary Phone ',
+ 'Secondary Contact Full Name ',
+ 'Listing Owner Email ',
+ 'Full Address ', 'Address 1 ',
+ 'Address 2 ', 'Postcode ',
+ 'Property Type ', 'Property Sub Type ',
+ 'Bedroom(s) ', 'Domna Property ID ',
+ 'National UPRN ', 'Owner Property ID ',
+ 'Wall Construction ', 'Heating System ',
+ 'Year Built ', 'Boiler Make ',
+ 'Boiler Model ',
+ 'Non-Intrusives: Date Checked ',
+ 'Non-Intrusives: Wall Type ',
+ 'Non-intrusives: Insulation ',
+ 'Non-intrusives: Insulation Material ',
+ 'Non-Intrusives: CIGA Check Required ',
+ 'Non-Intrusives: PV Access Issues ',
+ 'Non-Intrusives: Roof Orientation ',
+ 'Non-Intrusives: Surveyor Notes ',
+ 'Non-Intrusives: Surveyor Name ',
+ 'CIGA: Date Requested ',
+ 'CIGA: Cavity Guarantee Found ',
+ 'Last EPC: Is Estimated ',
+ 'Last EPC: EPC Rating ',
+ 'Last EPC: SAP Rating ',
+ 'Last EPC: Main Heating Description ',
+ 'Last EPC: Heating Controls ',
+ 'Last EPC: Lodgement Date ',
+ 'Last EPC: Floor Area ', 'Last EPC: Wall ',
+ 'Last EPC: Roof ', 'Last EPC: Floor ',
+ 'Last EPC: Room Height ',
+ 'Last EPC: Age Band ', 'Deal Stage ',
+ 'Pipeline ', 'Expected Commencement Date ',
+ 'Deal Name ', 'Project Code ',
+ 'Product ID ', 'Name ', 'Unit price ',
+ 'Quantity ', 'Deal Owner', 'Amount ', 'Installer '
+]
diff --git a/asset_list/hubspot/prepare_for_hubspot.py b/asset_list/hubspot/prepare_for_hubspot.py
new file mode 100644
index 00000000..eed6d7e7
--- /dev/null
+++ b/asset_list/hubspot/prepare_for_hubspot.py
@@ -0,0 +1,91 @@
+import os
+import pandas as pd
+from asset_list.AssetList import AssetList
+
+
+def app():
+ """
+ TODO: Operations may have removed some cavity_reason/solar_reason values from the standardised asset list after
+ review. So, we will need to update the hubspot status for these entries and set them to None, if they
+ were previously being set to ready for scheduling. We don't want to just filter on rows where
+ cavity_reason and solar_reason are populated, as if we want to include historical surveys, this will remove
+ them
+
+
+ TODO: If we wish to upload deals in batches
+
+ :return:
+ """
+
+ # inputs:
+ reconcile_programme = False # If True, the hubspot upload will include all properties with a project code
+ customer_domain = "https://sandwell.gov.uk"
+ installer_name = "J & J CRUMP"
+ asset_list_filepath = (
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell BC - Full Asset List MAIN - "
+ "Standardised.xlsx"
+ )
+ asset_list_sheet_name = "Proposed Program"
+ asset_list_header = 1
+
+ contact_details_filepath = (
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell Contact Details.xlsx"
+ )
+ contacts_sheet_name = "Sheet1"
+ contacts_landlord_property_id = "landlord_property_id"
+ contacts_phone_number_column = "phone_number"
+ contacts_secondary_phone_number_column = "secondary_phone_number"
+ contacts_secondary_contact_full_name = "secondary_contact_full_name"
+ contacts_email_column = "email"
+ contacts_fullname_column = "fullname"
+ contacts_firstname_column = "firstname"
+ contacts_lastname_column = "lastname"
+
+ existing_programme_filepath = (
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/property-status.csv"
+ )
+
+ asset_list = AssetList.load_standardised_asset_list(
+ asset_list_filepath, asset_list_sheet_name, asset_list_header
+ )
+ asset_list.load_contact_details(
+ local_filepath=contact_details_filepath,
+ sheet_name=contacts_sheet_name,
+ landlord_property_id=contacts_landlord_property_id,
+ phone_number_column=contacts_phone_number_column,
+ secondary_phone_number_column=contacts_secondary_phone_number_column,
+ secondary_contact_full_name=contacts_secondary_contact_full_name,
+ email_column=contacts_email_column,
+ fullname_column=contacts_fullname_column,
+ firstname_column=contacts_firstname_column,
+ lastname_column=contacts_lastname_column
+ )
+
+ asset_list.prepare_for_crm(
+ company_domain=customer_domain,
+ installer_name=installer_name,
+ reconcile_programme=reconcile_programme
+ )
+
+ # Remove the existing programme
+ existing_programme = pd.read_csv(existing_programme_filepath, encoding="utf-8-sig")
+ asset_list.hubspot_data = asset_list.hubspot_data[
+ ~asset_list.hubspot_data["Domna Property ID "].isin(
+ existing_programme['Domna Property ID'].values
+ )
+ ]
+
+ # Get the filepath and the filename. Append hubspot upload to the filename. We also change the file type to csv
+ directory, filename = os.path.split(asset_list_filepath)
+ name, ext = os.path.splitext(filename)
+ output_filename = f"{name} - Hubspot Upload.csv"
+ output_filepath = os.path.join(directory, output_filename)
+
+ if pd.isnull(asset_list.hubspot_data['Project Code ']).sum():
+ raise ValueError("FIX MEEE")
+
+ if pd.isnull(asset_list.hubspot_data['Deal Stage ']).any():
+ raise ValueError("Warning: Some rows have missing project codes. These will not be uploaded to HubSpot.")
+
+ # Just store locally
+ asset_list.hubspot_data.to_csv(output_filepath, index=False, encoding="utf-8-sig")
diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py
index 116c3203..45e45c54 100644
--- a/asset_list/mappings/built_form.py
+++ b/asset_list/mappings/built_form.py
@@ -331,4 +331,33 @@ BUILT_FORM_MAPPINGS = {
'Low Rise': 'low rise',
'Upper Floor': 'top-floor',
'High Rise': 'high rise',
+
+ '2012 ONWARDS DETACHED': 'detached',
+ '1950-66 END TERRACE': 'end-terrace',
+ '1976-82 MID TERRACED': 'mid-terrace',
+ '1950-66 MID TERRACE': 'mid-terrace',
+ '1991-95 DETACHED': 'detached',
+ '1976-82 END TERRACED': 'end-terrace',
+ '1967-75 DETACHED': 'detached',
+ 'PRE 1900 DETACHED': 'detached',
+ 'PRE 1900 MID TERRACE': 'mid-terrace',
+ '1900 DET': 'detached',
+ '1967-75 MID TERR': 'mid-terrace',
+ '1930-49 SEMI DET': 'semi-detached',
+ '1900-29 SEMI DET': 'semi-detached',
+ '1900-29 MID TERR': 'mid-terrace',
+ '1983- 90 MID TERR': 'mid-terrace',
+ '1976-82 MID TERR': 'mid-terrace',
+ '1983-90 END TERR': 'end-terrace',
+ '1991-95 SEMI DET': 'semi-detached',
+ '1983-90 SEMI DET': 'semi-detached',
+ '1991-95 MID TERR': 'mid-terrace',
+ '1950-66 SEMI DET': 'semi-detached',
+ '1900 MID TERR': 'mid-terrace',
+ '1967-75 SEMI DET': 'semi-detached',
+ '1983- 90 SEMI DET': 'semi-detached',
+ '1983-90 MID TERR': 'mid-terrace',
+ '1976-82 SEMI DET': 'semi-detached',
+ 'PRE 1900 MID TERR': 'mid-terrace'
+
}
diff --git a/asset_list/mappings/exising_pv.py b/asset_list/mappings/exising_pv.py
index 51f5f922..e67fafb4 100644
--- a/asset_list/mappings/exising_pv.py
+++ b/asset_list/mappings/exising_pv.py
@@ -16,5 +16,6 @@ EXISTING_PV_MAPPINGS = {
'PV: 25% roof area, PV: 3.6kWp array': 'already has PV',
'PV: 10% roof area, PV: 2kWp array': 'already has PV',
'PV: 50% roof area': 'already has PV',
- 'Solar PV': 'already has PV'
+ 'Solar PV': 'already has PV',
+ 'SOLAR PV': 'already has PV'
}
diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py
index 92f59f2c..1a46c429 100644
--- a/asset_list/mappings/heating_systems.py
+++ b/asset_list/mappings/heating_systems.py
@@ -27,7 +27,7 @@ STANDARD_HEATING_SYSTEMS = {
"electric ceiling",
"electric underfloor",
"no heating",
- "non-electric underfloor"
+ "non-electric underfloor",
}
HEATING_MAPPINGS = {
@@ -292,4 +292,39 @@ HEATING_MAPPINGS = {
'Communal Heating': 'communal heating',
'No Data': 'unknown',
'Boiler System': 'gas condensing boiler',
+ 'Storage heating': 'electric storage heaters',
+ 'Storage heating (HHRSH)': 'high heat retention storage heaters',
+
+ 'ELECTRIC BOILER': 'electric boiler',
+ 'STORAGE HEATERS': 'electric storage heaters',
+ 'GREENSTAR 24I JUNIOR': 'gas combi boiler',
+ 'generic cond combi post98': 'gas condensing combi',
+ 'SAP TABLE REG COND +98 NO PICTURE OF BOILER': 'gas condensing boiler',
+ 'ECO TEC PRO 28 H COMBI A': 'gas combi boiler',
+ 'GREENSTAR 25I ErP': 'gas combi boiler',
+ 'IDEAL LOGIC MAX COMBI C30': 'gas combi boiler',
+ 'ECO TEC PRO 28 (286/5-3)': 'gas combi boiler',
+ 'IDEAL LOGIC HEAT 30': 'gas boiler, radiators',
+ 'WORCESTER 240': 'gas boiler, radiators',
+ 'ECO TEC PRO 24 (246/5-3)': 'gas combi boiler',
+ 'ECO TEC PRO 28 (OLD)': 'gas combi boiler',
+ 'LOGIC COMBI2 C30': 'gas combi boiler',
+ 'GREENSTAR 28I JUNIOR': 'gas combi boiler',
+ 'WORCESTER 24i': 'gas combi boiler',
+ 'GREENSTAR 30I ErP': 'gas combi boiler',
+ '25 CDI': 'gas combi boiler',
+ 'GREENSTAR 28CDI COMPACT ErP': 'gas combi boiler',
+ 'GREENSTAR 24 RI': 'gas boiler, radiators',
+ 'BAXI COMBI 105 HE': 'gas combi boiler',
+ 'ECO TEC PRO 28 (OLD TYPE)': 'gas combi boiler',
+ 'WORCESTER 28 SI ll RSF': 'gas combi boiler',
+ 'GREENSTAR 30SI COMPACT ErP': 'gas combi boiler',
+ 'SAP TABLE REG COND +98 NO PICTURE OF CYLINDER': 'gas condensing boiler',
+ 'WORCESTER 24 SI ll RSF': 'gas combi boiler',
+ 'GREENSTAR 4000': 'gas combi boiler',
+ 'GREENSTAR 24i JUNIOR': 'gas combi boiler',
+ 'ECO TEC PRO 24 (OLD TYPE)': 'gas combi boiler',
+ 'GREENSTAR 30SI COMPACT': 'gas combi boiler',
+ 'BAXI DUO TEC 28 COMBI ErP': 'gas combi boiler',
+ 'Not applicable for this asset type': 'unknown'
}
diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py
index b705d6ef..bdb6580e 100644
--- a/asset_list/mappings/property_type.py
+++ b/asset_list/mappings/property_type.py
@@ -252,5 +252,19 @@ PROPERTY_MAPPING = {
'Bedsit bungalow semi detached': 'bedsit',
'Bedsit Flat': 'bedsit',
'Semi detached house': 'house',
- 'Unit': 'unknown'
+ 'Unit': 'unknown',
+ 'HOUSE (3 STOREY)': 'house',
+ 'FLAT GROUND FLOOR': 'flat',
+ 'FLAT TOP FLOOR': 'flat',
+
+ 'SHARED HOUSE': 'house',
+ 'MAISONETTE': 'maisonette',
+ 'DIRECT ACCESS HOSTEL': 'other',
+ 'Day centre': 'other',
+ 'Care home': 'other',
+ 'BLOCK (Communal)': 'block of flats',
+ 'SHOP': 'other',
+ 'Office Block': 'other',
+ 'BLOCK (Non-Communal)': 'block of flats',
+ 'Refuge': 'other'
}
diff --git a/asset_list/mappings/roof.py b/asset_list/mappings/roof.py
index 03d6f9af..13359ded 100644
--- a/asset_list/mappings/roof.py
+++ b/asset_list/mappings/roof.py
@@ -6,7 +6,7 @@ STANDARD_ROOF_CONSTRUCTIONS = {
"pitched unknown access to loft",
"piched unknown insulation",
"pitched insulated",
- "pitched less than 100mm insulation"
+ "pitched less than 100mm insulation",
"another dwelling above",
"flat unknown insulation",
"unknown insulated",
@@ -38,4 +38,18 @@ ROOF_CONSTRUCTION_MAPPINGS = {
'200mm': 'pitched insulated',
'0-49mm': 'pitched less than 100mm insulation',
'50mm': 'pitched less than 100mm insulation',
+ '': 'unknown',
+ 'NR': 'unknown',
+ 'Non-joist': 'unknown',
+ '25mm': 'pitched less than 100mm insulation',
+ '400mm+': 'pitched insulated',
+ '12mm': 'pitched less than 100mm insulation',
+
+ '150MM': 'pitched insulated',
+ '200MM': 'pitched insulated',
+ '250MM': 'pitched insulated',
+ '100MM': 'pitched less than 100mm insulation',
+ 'U/K': 'unknown',
+ 'U/K - 250MM RIR FLAT CEILING': 'flat unknown insulation',
+ 'U/K - 200MM RIR FLAT CEILING': 'flat unknown insulation'
}
diff --git a/asset_list/mappings/walls.py b/asset_list/mappings/walls.py
index 5e32531f..2e0a332f 100644
--- a/asset_list/mappings/walls.py
+++ b/asset_list/mappings/walls.py
@@ -224,5 +224,31 @@ WALL_CONSTRUCTION_MAPPINGS = {
'Traditional Cavity Brickwork': 'cavity unknown insulation',
'System build (undefined)': 'system built',
'Non Trad Wimpey': 'system built',
- 'Non Trad Wates': 'system built'
+ 'Non Trad Wates': 'system built',
+
+ 'CAVITY FILLED 270MM': 'filled cavity',
+ 'CAVITY FILLED 270MM': 'filled cavity',
+ 'CAVITY FILLED 250MM': 'filled cavity',
+ 'CAVITY FILLED 260MM': 'filled cavity',
+ 'CAVITY FILLED 260MM': 'filled cavity',
+ 'SOLID A/B 220MM': 'solid brick unknown insulation',
+ 'CAVITY A/B 300MM': "uninsulated cavity",
+ 'CAVITY A/B 250MM': "uninsulated cavity",
+ 'CAVITY A/B 260MM': "uninsulated cavity",
+ 'CAVITY A/B 270MM': "uninsulated cavity",
+ 'SOLID BRICK/CAVITY EXT': 'solid brick unknown insulation',
+ 'CAVITY EWI': 'filled cavity',
+ 'SANDSTONE/CAVITY EXT': 'sandstone or limestone',
+ 'SYSTEM BUILD 100MM EWI': 'system built',
+ 'CAVITY A/B 260MM': "uninsulated cavity",
+ 'CAVITY A/B 270MM': "uninsulated cavity",
+ 'CAVITY A/B 250MM': "uninsulated cavity",
+ 'System': 'system built',
+ 'Sandstone/Limestone': 'sandstone or limestone',
+ 'No Fines': 'system built',
+ 'Granite/Whinstone': 'granite or whinstone',
+ 'Not applicable to this asset type': 'unknown',
+ 'Steel Frame': 'system built',
+ 'Solid Wall As Built': 'uninsulated solid brick',
+ 'Solid As Built': 'uninsulated solid brick'
}
diff --git a/asset_list/utils.py b/asset_list/utils.py
index ff9db3f8..1678b8e9 100644
--- a/asset_list/utils.py
+++ b/asset_list/utils.py
@@ -79,7 +79,13 @@ def get_data(
uprn=uprn
)
# Force the skipping of estimating the EPC
- searcher.ordnance_survey_client.property_type = None
+ # We check if the property was split
+ if home.get("is_expended_block"):
+ searcher.ordnance_survey_client.property_type = "Flat"
+ searcher.property_type = "Flat"
+ searcher.set_strict_property_type_search()
+ else:
+ searcher.ordnance_survey_client.property_type = None
searcher.ordnance_survey_client.built_form = None
searcher.find_property(skip_os=True)
diff --git a/backend/Funding.py b/backend/Funding.py
index f5f85b9f..49d2d293 100644
--- a/backend/Funding.py
+++ b/backend/Funding.py
@@ -5,7 +5,7 @@ from typing import List
from backend.app.plan.schemas import HousingType
-class Funding:
+class FundingOld:
"""
Given a property, this class identifies if the home is possibly eligible for funding under
the various funding schemes. It will also calculate the expected amount of funding available
@@ -411,3 +411,190 @@ class Funding:
self.gbis()
# self.eco4()
self.whlg()
+
+
+class Funding:
+ """
+ New class to handle funding calculation
+ """
+
+ def __init__(
+ self,
+ tenure: HousingType,
+ social_cavity_abs_rate: float,
+ social_solid_abs_rate: float,
+ private_cavity_abs_rate: float,
+ private_solid_abs_rate: float,
+ project_scores_matrix,
+ whlg_eligible_postcodes
+ ):
+ self.tenure = tenure
+ self.social_cavity_abs_rate = social_cavity_abs_rate
+ self.social_solid_abs_rate = social_solid_abs_rate
+ self.private_cavity_abs_rate = private_cavity_abs_rate
+ self.private_solid_abs_rate = private_solid_abs_rate
+
+ self.starting_sap_band = None
+ self.ending_sap_band = None
+ self.floor_area_band = None
+ self.project_scores_matrix = project_scores_matrix
+ self.whlg_eligible_postcodes = whlg_eligible_postcodes
+
+ @staticmethod
+ def get_sap_band(sap_score_number):
+ bands = [
+ ("High_A", 96, float("inf")),
+ ("Low_A", 92, 96),
+ ("High_B", 86, 92),
+ ("Low_B", 81, 86),
+ ("High_C", 74.5, 81),
+ ("Low_C", 69, 74.5),
+ ("High_D", 61.5, 69),
+ ("Low_D", 55, 61.5),
+ ("High_E", 46.5, 55),
+ ("Low_E", 39, 46.5),
+ ("High_F", 29.5, 39),
+ ("Low_F", 21, 29.5),
+ ("High_G", 10.5, 21),
+ ("Low_G", 1, 10.5),
+ ]
+
+ for band, lower, upper in bands:
+ if lower <= sap_score_number < upper:
+ return band
+
+ return None
+
+ @staticmethod
+ def get_floor_area_band(floor_area):
+ if floor_area <= 72:
+ return "0-72"
+
+ if floor_area <= 97:
+ return "73-97"
+
+ if floor_area <= 199:
+ return "98-199"
+
+ return "200"
+
+ @staticmethod
+ def eco4_prs_eligibility(
+ starting_sap: int, measures: List, mainheat_description: str, heating_control_description: str
+ ):
+ """
+ Handles the eligibility criteria for private rental properties under eco
+ :return:
+ """
+
+ # Help to heat group
+ # 1) EPC E - G
+ # 2) Must receive one of SWI, FTCH, renewable heating or DHC
+ # 3) Tenant must be on benefits
+
+ # We don't consider the tenant being on benefits - we just notify the end user that this is a requirement
+
+ meets_epc = starting_sap <= 54
+ has_solid_wall = "internal_wall_insulation" in measures or "external_wall_insulation" in measures
+ # We check if the property has a heating system that means solar pv counts as a renewable heating system
+
+ has_eligible_electric_heating = any(x in mainheat_description for x in [
+ "air source heat pump", "ground source heat pump", "boiler and radiators, electric"
+ ]) | (("electric storage heaters" in mainheat_description) and
+ (heating_control_description.lower() == "controls for high heat retention storage heaters")
+ )
+
+ # Counts as renewable heating
+ solar_renweable_heating = has_eligible_electric_heating & ("solar_pv" in measures)
+ # Is a renewable heating
+ ashp = "air_source_heat_pump" in measures
+
+ if meets_epc & (solar_renweable_heating or ashp or has_solid_wall):
+ return True
+
+ return False
+
+ def calculate_full_project_abs(self):
+
+ # Filter the project scores matrix
+ data = self.project_scores_matrix[
+ (self.project_scores_matrix["Floor Area Segment"] == self.floor_area_band) &
+ (self.project_scores_matrix["Starting Band"] == self.starting_sap_band) &
+ (self.project_scores_matrix["Finishing Band"] == self.ending_sap_band)
+ ]
+
+ if data.emtpy:
+ raise ValueError("Missing abs rate, check the project scores matrix")
+
+ return data["Cost Savings"].values[0]
+
+ def check_funding(
+ self, measures: List,
+ starting_sap: int,
+ ending_sap: int,
+ floor_area: float,
+ mainheat_description: str,
+ heating_control_description: str,
+ is_cavity: bool
+ ):
+ """
+ Given a list of measures, this function will check if the package of measures is fundable
+ :param measures:
+ :param starting_sap:
+ :param ending_sap:
+ :param floor_area:
+ :param mainheat_description:
+ :param heating_control_description:
+ :param is_cavity: Indicates if the property has cavity wall insulation
+ :return:
+ """
+
+ # If it's an E or D, should get to an EPC C
+ if starting_sap >= 55 and ending_sap < 69:
+ raise NotImplementedError("This property doesn't have sufficient SAP movement")
+
+ if starting_sap <= 38 & ending_sap <= 55:
+ # F or G should get to D
+ raise NotImplementedError("Implement F or G to D eligibility")
+
+ self.starting_sap_band = self.get_sap_band(starting_sap)
+ self.ending_sap_band = self.get_sap_band(ending_sap)
+ self.floor_area_band = self.get_floor_area_band(floor_area)
+
+ ########################
+ # Private
+ ########################
+ # 1) ECO4
+ # 2) GBIS
+
+ if self.tenure == "Private":
+ is_eco4_eligible = self.eco4_prs_eligibility(
+ starting_sap=starting_sap,
+ measures=measures,
+ mainheat_description=mainheat_description,
+ heating_control_description=heating_control_description
+ )
+
+ # Need to implement
+ # 1) Package has to include an insulation measure
+ # 2) We should use the funding for the measure that has the largest partial project score
+ is_gbis_eligible = ()
+
+ if not is_eco4_eligible:
+ return
+ eco4_abs = self.calculate_full_project_abs()
+ # We estimate rates now
+ eco4_funding = (
+ eco4_abs * self.private_cavity_abs_rate if is_cavity else eco4_abs & self.private_solid_abs_rate
+ )
+
+ ########################
+ # Social
+ ########################
+ # 1) ECO4
+ # 2) GBIS
+
+ if self.tenure == "Social":
+ pass
+
+ raise NotImplementedError("Only implemented for Private or Social housing")
diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index 0010191a..16dd8f04 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -160,6 +160,9 @@ class SearchEpc:
"""
Address lines 1 and postcode are mandatory fields. The other address lines are optional
but can be used to find the epc for the home, if address1 and postcode are insufficient
+
+ If you wish to run a strict property type search, please run set_strict_property_type_search()
+
:param address1: string, propery's address line 1
:param postcode: string, propery's postcode
:param full_address: string, optional parameter, the full address of the property
@@ -189,6 +192,7 @@ class SearchEpc:
self.older_epcs = None
self.full_sap_epc = None
self.metadata = None
+ self.strict_property_type_search = False
# These are the address and postcode values, which we store in the database
self.address_clean = None
@@ -199,6 +203,14 @@ class SearchEpc:
self.property_type = property_type
self.fast = fast
+ def set_strict_property_type_search(self):
+ """
+ This method sets the strict property type search flag to True. When this flag is set, the search will
+ only return results that match the specified property type.
+ :return:
+ """
+ self.strict_property_type_search = True
+
@staticmethod
def get_house_number(address: str, postcode=None) -> str | None:
"""
@@ -315,6 +327,8 @@ class SearchEpc:
address_params["address"] = self.address1
if self.postcode:
address_params["postcode"] = self.postcode
+ if self.strict_property_type_search and self.property_type:
+ address_params["property-type"] = self.property_type.lower()
# We attempt the search with uprn params
@@ -365,11 +379,16 @@ class SearchEpc:
unique_property_types = {r["property-type"] for r in rows}
+ is_just_a_house = (len(unique_property_types) == 1) & (
+ ("House" in unique_property_types) | ("Bungalow" in unique_property_types)
+ )
+
# We allow for variation in property type across flats/maisonettes
# If we know that we have a flat/maisonette, we allow for both property types
- if property_type in ["Flat", "Maisonette"]:
- if ((len(uprns) == 1) and ((len(unique_property_types) == 1)
- ) or unique_property_types == {"Flat", "Maisonette"}):
+ # Make sure we have not JUST a house, or not JUST a flat/maisonette
+ if property_type in ["Flat", "Maisonette"] and not is_just_a_house:
+ if (((len(uprns) == 1) and ((len(unique_property_types) == 1)
+ ) or unique_property_types == {"Flat", "Maisonette"})):
return rows
if property_type is not None:
@@ -424,6 +443,8 @@ class SearchEpc:
return rows
+ raise ValueError("property type and address cannot both be None, at least one must be provided")
+
@staticmethod
def format_address(newest_epc):
"""
@@ -702,6 +723,18 @@ class SearchEpc:
exclude_old=exclude_old
)
+ # Check if it's a new build EPC. A property that doesn't have an EPC is not going to be a new build
+ # so we avoid comparing it to new builds
+ # TODO - this is experimental
+ newer_age_bands = [
+ "England and Wales: 1996-2002", "England and Wales: 2003-2006", "England and Wales: 2007-2011",
+ "England and Wales: 2012 onwards"
+ ]
+
+ if (~epc_data["construction-age-band"].isin(newer_age_bands)).sum():
+ # We have some older age bands, so we need to filter them out
+ epc_data = epc_data[~epc_data["construction-age-band"].isin(newer_age_bands)].copy()
+
# If we have missing lodgment date, we fill it with inspection-date
epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["inspection-date"])
# If we still have missing dates, we set it to the mean of the non NA dates
diff --git a/backend/engine/engine.py b/backend/engine/engine.py
index 58c3dc8e..5316fd03 100644
--- a/backend/engine/engine.py
+++ b/backend/engine/engine.py
@@ -507,7 +507,7 @@ async def model_engine(body: PlanTriggerRequest):
)
# if we have a remote assment data type, we pull the additional data and include it
- if body.event_type == "remote_assessment":
+ if (body.event_type == "remote_assessment") and not (epc_searcher.newest_epc["estimated"]):
logger.info("Retrieving find my epc data")
try:
property_non_invasive_recommendations, patch = RetrieveFindMyEpc.get_from_epc(
diff --git a/backend/tests/test_funding.py b/backend/tests/test_funding.py
new file mode 100644
index 00000000..311ab589
--- /dev/null
+++ b/backend/tests/test_funding.py
@@ -0,0 +1,52 @@
+import pytest
+import pandas as pd
+from utils.s3 import read_csv_from_s3
+from backend.Funding import Funding
+
+
+def get_funding_data():
+ """
+ This function retrieves the eco project scores matrix and the warm homes local grant funding data
+ :return:
+ """
+ project_scores_matrix = read_csv_from_s3(
+ bucket_name="retrofit-data-dev",
+ filepath="funding/ECO4 Full Project Scores Matrix.csv",
+ )
+ project_scores_matrix = pd.DataFrame(project_scores_matrix)
+ project_scores_matrix.columns = ['Floor Area Segment', 'Starting Band', 'Finishing Band', 'Cost Savings']
+ project_scores_matrix["Cost Savings"] = project_scores_matrix["Cost Savings"].astype(float)
+
+ whlg_eligible_postcodes = read_csv_from_s3(
+ bucket_name="retrofit-data-dev",
+ filepath="funding/whlg eligible postcodes.csv",
+ )
+ whlg_eligible_postcodes = pd.DataFrame(whlg_eligible_postcodes)
+
+ return project_scores_matrix, whlg_eligible_postcodes
+
+
+class TestFunding:
+
+ def test_prs(self):
+ eco_project_scores_matrix, whlg_eligible_postcodes = get_funding_data()
+ funding = Funding(
+ project_scores_matrix=eco_project_scores_matrix,
+ whlg_eligible_postcodes=whlg_eligible_postcodes,
+ social_cavity_abs_rate=13.5,
+ social_solid_abs_rate=17,
+ private_cavity_abs_rate=13.5,
+ private_solid_abs_rate=17,
+ tenure="Private",
+ )
+
+ measures_1 = ["internal_wall_insulation", "solar_pv"]
+ funding.check_funding(
+ measures=measures_1,
+ starting_sap=54,
+ ending_sap=69,
+ floor_area=73,
+ mainheat_description="Boiler and radiators, mains gas",
+ heating_control_description="Programmer, room thermostat and TRVs",
+ is_cavity=True
+ )
diff --git a/etl/customers/Futures Housing/validation_surveys.py b/etl/customers/Futures Housing/validation_surveys.py
new file mode 100644
index 00000000..1f8e6cfa
--- /dev/null
+++ b/etl/customers/Futures Housing/validation_surveys.py
@@ -0,0 +1,167 @@
+import pandas as pd
+
+
+def get_band(sap_score_number):
+ bands = [
+ ("High_A", 96, float("inf")),
+ ("Low_A", 92, 96),
+ ("High_B", 86, 92),
+ ("Low_B", 81, 86),
+ ("High_C", 74.5, 81),
+ ("Low_C", 69, 74.5),
+ ("High_D", 61.5, 69),
+ ("Low_D", 55, 61.5),
+ ("High_E", 46.5, 55),
+ ("Low_E", 39, 46.5),
+ ("High_F", 29.5, 39),
+ ("Low_F", 21, 29.5),
+ ("High_G", 10.5, 21),
+ ("Low_G", 1, 10.5),
+ ]
+
+ for band, lower, upper in bands:
+ if lower <= sap_score_number < upper:
+ return band
+
+ return None
+
+
+def classify_floor_area(floor_area):
+ if floor_area <= 72:
+ return "0-72"
+
+ if floor_area <= 97:
+ return "73-97"
+
+ if floor_area <= 199:
+ return "98-199"
+
+ return "200+"
+
+
+asset_list = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Wates - Futures Housing/ECO 4 Wates - Standardised.xlsx",
+ sheet_name="Standardised Asset List"
+)
+
+asset_list["starting_sap_band"] = asset_list["epc_sap_score_on_register"].apply(get_band)
+asset_list["floor_area_band"] = asset_list["epc_total_floor_area"].apply(classify_floor_area)
+
+# Objective:
+# We need to get a reasonable estimate for the cost of works for properties that are EPC D or below
+#
+# Therefore:
+# 1) We know that some properties that are currently EPC C may* qualify for ECO4 funding. Right now, we aren't trying
+# to determine which EPC C properties or above will qualify, just how much works will cost for properties that do
+# qualify
+# 2) We cannot survey everything, so before we undetake too much risk we should produce some costings for each of the
+# archetypes
+#
+# Driving Factors:
+# 1) Floor area band & starting SAP band - this will determine how much funding is produced
+# 2) Heating system - this will determine if the property needs a heating upgrade or not
+
+
+archetypes = asset_list[asset_list["epc_sap_score_on_register"] <= 68].groupby(
+ ["floor_area_band", "starting_sap_band", "landlord_heating_system"]
+)["landlord_property_id"].nunique().reset_index()
+archetypes = archetypes.rename(columns={"landlord_property_id": "n_properties"})
+archetypes = archetypes.sort_values("n_properties", ascending=False)
+archetypes["running_total"] = archetypes["n_properties"].cumsum()
+archetypes["cumulative_percentage"] = archetypes["running_total"] / archetypes["n_properties"].sum() * 100
+
+archetypes["is_electric"] = archetypes["landlord_heating_system"] != "boiler - other fuel"
+archetypes["needs_heating_upgrade"] = archetypes["landlord_heating_system"].isin(
+ ["boiler - other fuel", "electric storage heaters"]
+)
+archetypes = archetypes.reset_index(drop=True)
+
+# Right now, they don't want to treat the oil properties so we'll exclude them for the moment
+electric_heated_archetypes = (
+ archetypes[archetypes["landlord_heating_system"] != "boiler - other fuel"].copy().reset_index(drop=True)
+)
+electric_heated_archetypes["running_total"] = electric_heated_archetypes["n_properties"].cumsum()
+electric_heated_archetypes["cumulative_percentage"] = (
+ electric_heated_archetypes["running_total"] / electric_heated_archetypes["n_properties"].sum() * 100
+)
+
+# The main properties that need validation surveys are properties that require a heating upgrade
+electric_heated_archetypes = electric_heated_archetypes[electric_heated_archetypes["needs_heating_upgrade"]]
+electric_heated_archetypes = electric_heated_archetypes.merge(
+ archetypes[["starting_sap_band", "floor_area_band", "landlord_heating_system", "archetype_id"]],
+ how="left", on=["starting_sap_band", "floor_area_band", "landlord_heating_system"]
+)
+
+oil_archetypes = archetypes[
+ archetypes["landlord_heating_system"] == "boiler - other fuel"
+ ].copy().reset_index(drop=True)
+
+archetypes["archetype_id"] = archetypes.index
+
+asset_list = asset_list.merge(
+ archetypes[["starting_sap_band", "floor_area_band", "landlord_heating_system", "archetype_id"]],
+ how="left", on=["starting_sap_band", "floor_area_band", "landlord_heating_system"]
+)
+
+properties_for_verification = asset_list[
+ asset_list["archetype_id"].isin(electric_heated_archetypes["archetype_id"].values)
+].copy()
+properties_for_verification["postal_region"] = properties_for_verification["domna_postcode"].str.split(" ").str[
+ 0].str.strip()
+
+properties_for_verification["epc_age"] = (
+ pd.Timestamp.now() - pd.to_datetime(properties_for_verification["epc_inspection_date"])
+).dt.days
+
+# We also survey 2 oil heater properties, so we take the 2 most prevelant archetypes
+archetypes_for_survey = pd.concat(
+ [electric_heated_archetypes, oil_archetypes.head(2)]
+)
+
+# Take the property with the oldest EPC, by region. Prioritise estimated properties
+sample = []
+for _, config in archetypes_for_survey.iterrows():
+ properties = asset_list[
+ (asset_list["archetype_id"] == config["archetype_id"]) &
+ (asset_list["floor_area_band"] == config["floor_area_band"]) &
+ (asset_list["starting_sap_band"] == config["starting_sap_band"])
+ ]
+
+ if pd.isnull(properties["epc_inspection_date"]).sum():
+ sample_property = properties[pd.isnull(properties["epc_inspection_date"])].head(1).to_dict("records")
+ else:
+ # Take the property with the oldest EPC
+ sample_property = properties.sort_values("epc_inspection_date", ascending=True).head(1).to_dict("records")
+
+ sample.extend(sample_property)
+
+sample = pd.DataFrame(sample)
+
+sample = sample[
+ [
+ "landlord_property_id", "epc_inspection_date", "epc_sap_score_on_register", "starting_sap_band",
+ "floor_area_band", "landlord_heating_system", "domna_postcode", "domna_full_address", "archetype_id"
+ ]
+]
+
+archetypes = asset_list[["landlord_property_id", "archetype_id"]].copy()
+archetypes["archetype_id"] = archetypes["archetype_id"].astype(str)
+
+filename = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Wates - Futures Housing/archetypes.xlsx"
+# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
+
+with pd.ExcelWriter(filename) as writer:
+ archetypes.to_excel(writer, sheet_name="Archetypes", index=False)
+ sample.to_excel(writer, sheet_name="Survey Sample", index=False)
+
+# We store this
+
+# Questions:
+# 1) If futures are considering changing properties that have oil heating systems, we could include them and
+# we have 39 total archetypes. Otherwise, we have 25 archetypes
+# 2) Can futures provide us with any information on the model of air source heat pumps and associated controls they're
+# using
+
+# Recommendations:
+# 1) If they are willing to upgrade the heating systems of the oil properties, surveying 18 properties will cover
+#
diff --git a/etl/customers/cambridge/surveys.py b/etl/customers/cambridge/surveys.py
new file mode 100644
index 00000000..2aa52d6f
--- /dev/null
+++ b/etl/customers/cambridge/surveys.py
@@ -0,0 +1,24 @@
+import pandas as pd
+from backend.ml_models.Valuation import PropertyValuation
+from backend.app.utils import sap_to_epc
+
+# Read in the survey data
+surveys = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Cambridge/Survey Data.xlsx",
+ sheet_name="Survey data",
+)
+
+increases = []
+for _, x in surveys.iterrows():
+ current_epc = sap_to_epc(x["Pre SAP"])
+ target_epc = sap_to_epc(x["Scenario 1 Post SAP"])
+ current_value = x["Valuation"]
+
+ val = PropertyValuation.estimate_valuation_improvement(
+ current_value,
+ current_epc,
+ target_epc,
+ total_cost=None
+ )
+ avg_increase = val["average_increase"]
+ increases.append(round(avg_increase))
diff --git a/etl/customers/l_and_g/risk_matrix.py b/etl/customers/l_and_g/risk_matrix.py
index c800117e..8f5451fc 100644
--- a/etl/customers/l_and_g/risk_matrix.py
+++ b/etl/customers/l_and_g/risk_matrix.py
@@ -81,6 +81,7 @@ def app():
# We need to calculate the costs
cost_data = []
for _, row in epr_data.iterrows():
+
epc = row["EPC"][0]
sap = int(row["EPC"][1:])
diff --git a/etl/customers/places_for_people/abs.py b/etl/customers/places_for_people/abs.py
new file mode 100644
index 00000000..aa85a93f
--- /dev/null
+++ b/etl/customers/places_for_people/abs.py
@@ -0,0 +1,199 @@
+"""
+This script is to calculate the ABS for the Places for People London project
+"""
+
+import os
+import pandas as pd
+
+# London
+pfp_london_cav = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
+ "rates/PFP_areas_surrounding_london_reviewed_standardised_15052025.xlsx",
+ sheet_name="Cav Route",
+ header=1
+)
+pfp_london_cav = pfp_london_cav.rename(columns={"Route": "Route March"})
+pfp_london_pv = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
+ "rates/PFP_areas_surrounding_london_reviewed_standardised_15052025.xlsx",
+ sheet_name="PV Route",
+ header=1
+)
+pfp_london_pv = pfp_london_pv.rename(columns={"Route": "Route March"})
+pfp_london_cav["location"] = "London"
+pfp_london_pv["location"] = "London"
+# East
+pfp_east_cav = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
+ "rates/PFP_east_reviewed_standarised_15052025.xlsx",
+ sheet_name="Cav Route",
+ header=1
+)
+pfp_east_cav = pfp_east_cav.rename(columns={"Route": "Route March"})
+pfp_east_pv = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
+ "rates/PFP_east_reviewed_standarised_15052025.xlsx",
+ sheet_name="PV Route",
+ header=1
+)
+pfp_east_pv = pfp_east_pv.rename(columns={"Route": "Route March"})
+pfp_east_cav["location"] = "East"
+pfp_east_pv["location"] = "East"
+# North east
+pfp_north_east_cav = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
+ "rates/PFP_north_east_reviewed_standardised_15052025.xlsx",
+ sheet_name="Cav Route",
+ header=1
+)
+pfp_north_east_cav = pfp_north_east_cav.rename(columns={"Route": "Route March"})
+pfp_north_east_pv = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
+ "rates/PFP_north_east_reviewed_standardised_15052025.xlsx",
+ sheet_name="PV Route",
+ header=1
+)
+pfp_north_east_pv = pfp_north_east_pv.rename(columns={"Route": "Route March"})
+pfp_north_east_cav["location"] = "North East"
+pfp_north_east_pv["location"] = "North East"
+# North West
+pfp_north_west_cav = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
+ "rates/PFP_north_west_reviewed_standardised_15052025.xlsx",
+ sheet_name="Cav Route",
+ header=1
+)
+pfp_north_west_cav = pfp_north_west_cav.rename(columns={"Route": "Route March"})
+pfp_north_west_pv = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
+ "rates/PFP_north_west_reviewed_standardised_15052025.xlsx",
+ sheet_name="PV Route",
+ header=1
+)
+pfp_north_west_pv = pfp_north_west_pv.rename(columns={"Route": "Route March"})
+pfp_north_west_cav["location"] = "North West"
+pfp_north_west_pv["location"] = "North West"
+
+cav_route = pd.concat(
+ [
+ pfp_london_cav,
+ pfp_east_cav,
+ pfp_north_east_cav,
+ pfp_north_west_cav
+ ]
+)
+solar_route = pd.concat(
+ [
+ pfp_london_pv,
+ pfp_east_pv,
+ pfp_north_east_pv,
+ pfp_north_west_pv
+ ]
+)
+
+
+def get_band(sap_score_number):
+ bands = [
+ ("High_A", 96, float("inf")),
+ ("Low_A", 92, 96),
+ ("High_B", 86, 92),
+ ("Low_B", 81, 86),
+ ("High_C", 74.5, 81),
+ ("Low_C", 69, 74.5),
+ ("High_D", 61.5, 69),
+ ("Low_D", 55, 61.5),
+ ("High_E", 46.5, 55),
+ ("Low_E", 39, 46.5),
+ ("High_F", 29.5, 39),
+ ("Low_F", 21, 29.5),
+ ("High_G", 10.5, 21),
+ ("Low_G", 1, 10.5),
+ ]
+
+ for band, lower, upper in bands:
+ if lower <= sap_score_number < upper:
+ return band
+
+ return None
+
+
+def classify_floor_area(floor_area):
+ if floor_area <= 72:
+ return "0-72"
+
+ if floor_area <= 97:
+ return "73-97"
+
+ if floor_area <= 199:
+ return "98-199"
+
+ return "200+"
+
+
+# We classify the abs bounds
+solar_route["starting_abs_band"] = solar_route["epc_sap_score_on_register"].apply(get_band)
+solar_route["ending_abs_band_scenario1"] = "High_C"
+solar_route["ending_abs_band_scenario2"] = "Low_B"
+solar_route["epc_total_floor_area"] = solar_route["epc_total_floor_area"].fillna(90)
+solar_route["floor_area_band"] = solar_route["epc_total_floor_area"].apply(classify_floor_area)
+
+# We classify the abs bounds
+cav_route["epc_sap_score_on_register"] = cav_route["epc_sap_score_on_register"].fillna(68)
+cav_route["starting_abs_band"] = cav_route["epc_sap_score_on_register"].apply(get_band)
+cav_route["floor_area_band"] = cav_route["epc_total_floor_area"].apply(classify_floor_area)
+cav_route["ending_abs_band"] = "Low_C"
+
+abs_matrix = pd.read_csv(
+ "/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv"
+)
+
+cav_route = cav_route.merge(
+ abs_matrix.rename(columns={"Cost Savings": "ABS Rate"}),
+ how="left",
+ left_on=["starting_abs_band", "ending_abs_band", "floor_area_band"],
+ right_on=["Starting Band", "Finishing Band", "Floor Area Segment"],
+)
+solar_route = solar_route.merge(
+ abs_matrix.rename(columns={"Cost Savings": "ABS Rate"}),
+ how="left",
+ left_on=["starting_abs_band", "ending_abs_band_scenario1", "floor_area_band"],
+ right_on=["Starting Band", "Finishing Band", "Floor Area Segment"],
+)
+cav_route["ABS Rate"] = cav_route["ABS Rate"].fillna(0)
+solar_route["ABS Rate"] = solar_route["ABS Rate"].fillna(0)
+
+cav_abs_agg = (
+ cav_route.groupby("Route March").agg(
+ {
+ "ABS Rate": "sum",
+ "landlord_property_id": "count",
+ }
+ ).reset_index()
+)
+cav_abs_agg["Week Number"] = cav_abs_agg["Route March"].str.extract(r"(\d+)").astype(int)
+cav_abs_agg = cav_abs_agg.sort_values("Week Number", ascending=True)
+cav_abs_agg = cav_abs_agg.rename(columns={"landlord_property_id": "Number of Properties"})
+
+solar_abs_agg = (
+ solar_route.groupby("Route March").agg(
+ {
+ "ABS Rate": "sum",
+ "landlord_property_id": "count",
+ }
+ ).reset_index()
+)
+solar_abs_agg["Week Number"] = solar_abs_agg["Route March"].str.extract(r"(\d+)").astype(int)
+solar_abs_agg = solar_abs_agg.rename(columns={"landlord_property_id": "Number of Properties"})
+solar_abs_agg = solar_abs_agg.sort_values("Week Number", ascending=True)
+
+# We store the data
+# Store as an excel
+filename = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs rates/pfp programme rates.xlsx"
+# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
+
+with pd.ExcelWriter(filename) as writer:
+ solar_abs_agg.to_excel(writer, sheet_name="Solar ABS", index=False)
+ cav_abs_agg.to_excel(writer, sheet_name="Cav ABS", index=False)
+
+ cav_route.to_excel(writer, sheet_name="Cavity data", index=False)
+ solar_route.to_excel(writer, sheet_name="Solar data", index=False)
diff --git a/etl/customers/thrive/Programme Analysis.py b/etl/customers/thrive/Programme Analysis.py
index 521cfd30..2d6a0d69 100644
--- a/etl/customers/thrive/Programme Analysis.py
+++ b/etl/customers/thrive/Programme Analysis.py
@@ -8,6 +8,8 @@ address the following concerns:
"""
import pandas as pd
+from tqdm import tqdm
+from backend.SearchEpc import SearchEpc
# This is Thrive's list of properties and when they should have been surveyed
thrive_tracker = pd.read_excel(
@@ -51,27 +53,10 @@ original_columns = {
}
original_asset_list = original_asset_list[original_columns.keys()].rename(columns=original_columns)
-original_asset_list["Data Source"] = "Thrive Tracker"
+original_asset_list["Data Source"] = "Original Asset List"
+original_asset_list = original_asset_list.drop_duplicates()
# We append on the missed properties, with the information we have
-# 'Unnamed: 0', 'Thrive Notes', 'Priority', 'UPRN', 'Short Address', '#',
-# 'Adress Line 1', 'Postcode', 'Property Type', 'Build Form',
-# 'Build year', 'Assumed mm ', 'SAP', 'Name', 'Primary Number',
-# 'Secondary Number', 'Email', 'Thrive use: Tenancy Number',
-# 'Special Requirements ', 'CIGA', 'Date CIGA check received',
-# 'Proposed Progamme', 'New Proposed Programme',
-# 'Missing from Route March?', 'Date Letters Sent (w.c)', 'Work Type',
-# 'Warmfront Survey Notes', 'Notes Reply (Thrive)', 'Loft Insulation',
-# 'Cost for Vents', 'Cavity Depth', 'Cavity Condition',
-# 'Date Submitted to installer', 'PRRN Number',
-# 'Loft insulation required? (Thrive)', 'Date booked ',
-# 'Completed\n(yes/no)', 'Date Completed',
-# 'Vents installed?\n(number and location)',
-# 'Loft Top Up\n(amount of insulation) ', 'CIGA Warranty Provided ',
-# 'Notes', 'Works Number', 'CW KGI Uploaded', 'Keystone Fan Added',
-# 'SA Cavity Condition Updated', 'SA Loft & Energy Updated',
-# 'PRRN Submitted '
-
missed_properties["Full Address"] = (
missed_properties["#"].astype(str) + ", " +
missed_properties["Adress Line 1"].astype(str) + ", " +
@@ -94,6 +79,19 @@ missed_properties["WFT Findings"] = "Property Not Inspected"
missed_properties["ECO Eligibility"] = "Property Not Inspected"
missed_properties["Data Source"] = "Thrive Tracker"
+# We de-dupe ides in original_asset_list
+dupe_ids = original_asset_list[original_asset_list["thrive_property_id"].duplicated()]["thrive_property_id"].unique()
+dupes = original_asset_list[
+ original_asset_list["thrive_property_id"].isin(dupe_ids)
+].copy()
+dupes = dupes.sort_values("thrive_property_id")
+
+original_asset_list = original_asset_list.rename(
+ columns={
+ "detailed_property_type": "build_form"
+ }
+)
+
master_list = pd.concat([missed_properties, original_asset_list], ignore_index=True)
# We were provided with a data update for a sample of properties. We update the data with this information
@@ -103,12 +101,339 @@ data_update = pd.read_excel(
header=0
)
-new_properties = data_update[~data_update["UPRN"].isin(master_list["thrive_property_id"].astype(str).values)]
+new_properties = data_update[~data_update["UPRN"].isin(master_list["thrive_property_id"].astype(str).values)].copy()
+new_properties["Full Address"] = (
+ new_properties["#"].astype(str) + ", " +
+ new_properties["Adress Line 1"].astype(str) + ", " +
+ new_properties["Postcode"].astype(str)
+)
+new_properties = new_properties[missed_columns.keys()].rename(columns=missed_columns)
+new_properties["WFT Findings"] = "Property Not Inspected"
+new_properties["ECO Eligibility"] = "Property Not Inspected"
+new_properties["Data Source"] = "13.05.2025 Data Update"
+
+master_list = pd.concat([new_properties, master_list])
+
+# We append any new data on heating system, heating type, and insulation type, based on the data update
+master_list = master_list.merge(
+ data_update[["UPRN", "Heating Type", "Assumed mm ", "SAP"]].rename(
+ columns={
+ "Heating Type": "heating_type_updated",
+ "Assumed mm ": "assumed_loft_insulation_thickness_updated",
+ "SAP": "sap_rating_updated"
+ }
+ ),
+ how="left",
+ left_on="thrive_property_id",
+ right_on="UPRN"
+)
+
+# We fill the missings
+master_list["heating_type_updated"] = master_list["heating_type_updated"].fillna(master_list["heating_type"])
+master_list["assumed_loft_insulation_thickness_updated"] = master_list[
+ "assumed_loft_insulation_thickness_updated"
+].fillna(master_list["assumed_loft_insulation_thickness"])
+master_list["sap_rating_updated"] = master_list["sap_rating_updated"].fillna(master_list["sap_rating"])
+
+assert not master_list["thrive_property_id"].duplicated().sum(), "Duplicate thrive_property_id found in master_list"
+
+master_list["Address in tracker"] = master_list["thrive_property_id"].astype(str).isin(
+ thrive_tracker["UPRN"].astype(str).values
+)
+
+# Those the asset list - call it master asset list updated May2025
+master_list = master_list.drop(columns=["UPRN"])
+master_list["thrive_property_id"] = master_list["thrive_property_id"].astype(str)
+# master_list.to_excel(
+# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
+# "Complete - Updated May 2025.xlsx",
+# )
+
+master_list["house_number_TEMP"] = master_list.apply(
+ lambda x: SearchEpc.get_house_number(address=x["full_address"], postcode=x["postcode"]),
+ axis=1
+)
+
+# We add in the status of the property
+# TODO: Add the status of the property from the Thrive tracker
+outcomes = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive - Outcomes - April "
+ "24-March25.xlsx",
+ header=0
+)
+outcomes["row_id"] = outcomes.index
+
+# We have two ids which have the same phohe. nymber, but different UPRN, so we don't match to the tracker for these
+tracker_for_matching = thrive_tracker[
+ ~thrive_tracker["UPRN"].isin(['OAKGRE0065ABBLDW1', 'OAKGRE0066ABBLDW1', 'JACKET0102ABBLDW1', 'BELLCL0008BEDMDW1'])
+].copy()
+tracker_for_matching["Full Address"] = (
+ tracker_for_matching["#"].astype(str) + ", " +
+ tracker_for_matching["Adress Line 1"].astype(str) + ", " +
+ tracker_for_matching["Postcode"].astype(str)
+)
+
+outcomes_id_lookup = []
+for _, x in tqdm(outcomes.iterrows(), total=len(outcomes)):
+
+ hn = str(x["No."])
+ address = x["Address"]
+ postcode = x["Postcode"]
+ contact_no = str(x["Contact No"]) if not pd.isnull(x["Contact No"]) else str(x["Contact No.1"])
+ contact_no = None if contact_no == "nan" else contact_no
+
+ if address == "292 Micklefield Road":
+ hn = "292"
+
+ if (address == "Micklefield Road") & (hn == "302"):
+ hn = "292"
+
+ if (address == "103a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"):
+ hn = "103a"
+
+ if (address == "105a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"):
+ hn = "105a"
+
+ if (address == "107a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"):
+ hn = "107a"
+
+ #
+ # # We match this to the tracker
+ # m1 = tracker_for_matching[tracker_for_matching["Primary Number"].astype(str) == contact_no]
+ # # Many of the phone numbers don't have a leading zero in the tracker so we add them
+ # if (m1.shape[0] != 1) and not pd.isnull(contact_no):
+ # m1 = tracker_for_matching[tracker_for_matching["Primary Number"].astype(str) == contact_no.lstrip("0")]
+ #
+ # if m1.shape[0] > 1:
+ # raise ValueError(
+ # f"Error for {hn} - {address} - {postcode} - {contact_no} in the tracker"
+ # )
+
+ # if m1.empty:
+ m1 = tracker_for_matching[
+ (tracker_for_matching["#"].astype(str) == hn) &
+ (tracker_for_matching["Postcode"] == postcode)
+ ]
+
+ if m1.empty:
+ # Some properties aren't in the tracker, we match to the master list
+ m1 = master_list[
+ (master_list["house_number_TEMP"].astype(str) == hn) &
+ (master_list["postcode"] == postcode)
+ ]
+ outcomes_id_lookup.append(
+ {
+ "row_id": x["row_id"],
+ "thrive_property_id": m1["thrive_property_id"].values[0],
+ "address": m1["full_address"].values[0],
+ "postcode": m1["postcode"].values[0],
+ }
+ )
+ continue
+
+ if m1.shape[0] != 1:
+ raise ValueError(
+ f"Error for {hn} - {address} - {postcode} - {contact_no} in the tracker"
+ )
+
+ # We add the status to the master list
+ outcomes_id_lookup.append(
+ {
+ "row_id": x["row_id"],
+ "thrive_property_id": m1["UPRN"].values[0],
+ "address": m1["Full Address"].values[0],
+ "postcode": m1["Postcode"].values[0],
+ }
+ )
+
+outcomes_id_lookup = pd.DataFrame(outcomes_id_lookup)
+outcomes = outcomes.merge(
+ outcomes_id_lookup,
+ how="left",
+ left_on="row_id",
+ right_on="row_id"
+)
+
+outcomes = outcomes.drop(columns=["row_id"])
+outcomes = outcomes.rename(
+ columns={
+ "Outcomes": "Outcome",
+ "Notes (If 'no "
+ "answer' under outcomes, have you checked around the property for access issues where possible?)": "Notes",
+ }
+)
+# Store the corrected outcomes
+# outcomes.to_excel(
+# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive - Outcomes -
+# April 24-March25 - Corrected.xlsx",
+# index=False
+# )
-data_update = = data_update[["UPRN", ""]]
-# TODO: Flag the Thrive priorities and create a separate project code for these
-# TODO: Add the general project code
-# TODO: Add the thrive
\ No newline at end of file
+def parse_date(value):
+ # Strip any 'W.C' or 'w/c' prefix and clean whitespace
+ value = value.strip().lower().replace('w.c', '').replace('w/c', '').strip()
+ try:
+ # Try parsing the date with dayfirst=True
+ return pd.to_datetime(value, dayfirst=True, errors='coerce')
+ except Exception:
+ return pd.NaT
+
+
+outcomes['Parsed Date'] = outcomes['Date letters sent'].apply(parse_date)
+
+# Next step - match the submissions master to the asset list. We will append on the UPRN
+eco3_submissions = pd.read_csv(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
+ "ECO3.csv",
+ header=0
+)
+eco3_submissions["row_id"] = eco3_submissions.index
+
+eco4_submissions = pd.read_csv(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
+ "ECO4.csv",
+ header=0
+)
+eco4_submissions["row_id"] = eco4_submissions.index
+
+# List of properties never on the asset list
+not_on_master = [
+ "7+FOXGROVE PATH+WD19 6YL", "9+FOXGROVE PATH+WD19 6YL", "11+FOXGROVE PATH+WD19 6YL",
+ "20+LINCOLN DRIVE+WD19 7BA", "22+LINCOLN DRIVE+WD19 7BA", "24+LINCOLN DRIVE+WD19 7BA",
+ "26+LINCOLN DRIVE+WD19 7BA", "1+Ryman Court, Stag Lane+WD3 5HN", "6+Ryman Court, Stag Lane+WD3 5HN",
+ "9+Ryman Court, Stag Lane+WD3 5HN", "10+Ryman Court, Stag Lane+WD3 5HN", "11+Ryman Court, Stag Lane+WD3 5HN",
+ "12+Ryman Court, Stag Lane+WD3 5HN", "14+Ryman Court, Stag Lane+WD3 5HN", "15+Ryman Court, Stag Lane+WD3 5HN",
+ "20+Ryman Court, Stag Lane+WD3 5HN", "21+Ryman Court, Stag Lane+WD3 5HN", "22+Ryman Court, Stag Lane+WD3 5HN",
+ "25+Ryman Court, Stag Lane+WD3 5HN", "26+Ryman Court, Stag Lane+WD3 5HN", "31+Ryman Court, Stag Lane+WD3 5HN",
+ "33+Ryman Court, Stag Lane+WD3 5HN", "34+Ryman Court, Stag Lane+WD3 5HN",
+ '37+Ryman Court, Stag Lane+WD3 5HN', '38+Ryman Court, Stag Lane+WD3 5HN', '39+Ryman Court, Stag Lane+WD3 5HN',
+ '41+Ryman Court, Stag Lane+WD3 5HN', '43+Ryman Court, Stag Lane+WD3 5HN', '45+Ryman Court, Stag Lane+WD3 5HN',
+ '46+Ryman Court, Stag Lane+WD3 5HN', '48+Ryman Court, Stag Lane+WD3 5HN', '49+Ryman Court, Stag Lane+WD3 5HN',
+ '50+Ryman Court, Stag Lane+WD3 5HN', '52+Ryman Court, Stag Lane+WD3 5HN'
+]
+
+eco3_remap = {
+ "19+OAKHILL ROAD+WD5 8RE": ('19', 'OAKHILL ROAD', 'WD3 9RE'),
+ "29+OAKHILL ROAD+WD5 8RE": ('29', 'OAKHILL ROAD', 'WD3 9RE'),
+ "31+OAKHILL ROAD+WD5 8RE": ('31', 'OAKHILL ROAD', 'WD3 9RE'),
+ "44+OAKHILL ROAD+WD5 8RE": ('44', 'OAKHILL ROAD', 'WD3 9RF'),
+ "64+OAKHILL ROAD+WD4 8RF": ('64', 'OAKHILL ROAD', 'WD3 9RF'),
+ "11+LANCASTER WAY+WD3 PRE": ('11', 'LANCASTER WAY', 'WD5 0PQ'),
+ "16+LANCASTER WAY+WD3 PRE": ('16', 'LANCASTER WAY', 'WD5 0PQ'),
+ "58+TALBOT ROAD +WD31HE": ('58', 'TALBOT ROAD', 'WD3 1HE'),
+ "10+PEARTREE COURT/WELWYN GARDEN CITY+AL73XN": ('10', 'PEARTREE COURT/WELWYN GARDEN CITY', 'AL7 3XN'),
+ "25+GOBLINS GREEN/WELWYN GARDEN CITY+AL73ST": ('25', 'GOBLINS GREEN/WELWYN GARDEN CITY', 'AL7 3ST'),
+ "32+GOBLINS GREEN/WELWYN GARDEN CITY+AL73ST": ('32', 'GOBLINS GREEN/WELWYN GARDEN CITY', 'AL7 3ST'),
+ "94+BAKER ST/POTTERS BAR+EN62EP": ('94', 'BAKER ST/POTTERS BAR', 'EN6 2EP'),
+ '33+Tudor Way+WD3JA': ('33', 'Tudor Way', 'WD3 8JA'),
+ '120+Hazlewood lane +WD5 0HF': ('120', 'Hazlewood lane', 'WD5 0HE'),
+ '35+Rosehill gardens +WD5 0HE': ('35', 'Rosehill gardens', 'WD5 0HF'),
+ '18+Rosehill gardens +WD5 0HE': ('18', 'Rosehill gardens', 'WD5 0HF'),
+ '34+Rosehill gardens +WD5 0HE': ('34', 'Rosehill gardens', 'WD5 0HF'),
+ '58+Rosehill gardens +WD5 0HE': ('58', 'Rosehill gardens', 'WD5 0HF'),
+ '48+Rosehill gardens +WD5 0HE': ('48', 'Rosehill gardens', 'WD5 0HF'),
+ '45+Rosehill gardens +WD5 0HE': ('45', 'Rosehill gardens', 'WD5 0HF'),
+ '6+Rosehill gardens +WD5 0HE': ('6', 'Rosehill gardens', 'WD5 0HF'),
+ '2+Rosehill gardens +WD5 0HE': ('2', 'Rosehill gardens', 'WD5 0HF'),
+ '29+Rosehill gardens +WD5 0HE': ('29', 'Rosehill gardens', 'WD5 0HF'),
+ '61+GOLDEN DELL+AL8 4EE': ('61', 'GOLDEN DELL', 'AL7 4EE'),
+ '2O+EDINBURGH AVENUE+WD3 8LB': ('20', 'EDINBURGH AVENUE', 'WD3 8LB'),
+}
+
+eco3_lookup = []
+for _, row in tqdm(eco3_submissions.iterrows(), total=len(eco3_submissions)):
+ hn = row["NO "]
+ pc = row["Post Code"]
+ street = row["Street / Block Name"]
+ key = f"{hn}+{street}+{pc}"
+ if key in not_on_master:
+ continue
+
+ if key in eco3_remap:
+ hn, street, pc = eco3_remap[key]
+ # The postcode is different to the asse
+
+ # We filter the asset list, because it's hard to know how accurate this is
+ m1 = master_list[
+ (master_list["house_number_TEMP"].astype(str) == hn) &
+ (master_list["postcode"] == pc)
+ ]
+
+ if m1.shape[0] != 1:
+ raise ValueError(
+ f"Error for {key} in the tracker"
+ )
+
+ eco3_lookup.append(
+ {
+ "row_id": row["row_id"],
+ "thrive_property_id": m1["thrive_property_id"].values[0],
+ "submission_house_number": row["NO "],
+ "submission_address1": row["Street / Block Name"],
+ "submission_postcode": row["Post Code"],
+ }
+ )
+
+eco4_lookup = []
+for _, row in tqdm(eco4_submissions.iterrows(), total=len(eco4_submissions)):
+ hn = row["NO."]
+ pc = row["Post Code"]
+ street = row["Street / Block Name"]
+ key = f"{hn}+{street}+{pc}"
+ if key in not_on_master:
+ continue
+
+ if key in eco3_remap:
+ hn, street, pc = eco3_remap[key]
+ # The postcode is different to the asse
+
+ # We filter the asset list, because it's hard to know how accurate this is
+ m1 = master_list[
+ (master_list["house_number_TEMP"].astype(str) == hn) &
+ (master_list["postcode"].str.lower() == pc.lower())
+ ]
+
+ if m1.shape[0] != 1:
+ raise ValueError(
+ f"Error for {key} in the tracker"
+ )
+
+ eco4_lookup.append(
+ {
+ "row_id": row["row_id"],
+ "thrive_property_id": m1["thrive_property_id"].values[0],
+ "submission_house_number": row["NO."],
+ "submission_address1": row["Street / Block Name"],
+ "submission_postcode": row["Post Code"],
+ }
+ )
+
+# We match the lookups back to the submission sheets
+eco3_lookup = pd.DataFrame(eco3_lookup)
+eco3_submissions = eco3_submissions.merge(
+ eco3_lookup,
+ how="left",
+ on="row_id",
+)
+
+eco4_lookup = pd.DataFrame(eco4_lookup)
+eco4_submissions = eco4_submissions.merge(
+ eco4_lookup,
+ how="left",
+ on="row_id",
+)
+
+# Store
+eco3_submissions.to_csv(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
+ "ECO3 - with IDS.csv",
+ index=False
+)
+eco4_submissions.to_csv(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
+ "ECO4 - with IDS.csv",
+ index=False
+)
diff --git a/etl/customers/thrive/Project codes.py b/etl/customers/thrive/Project codes.py
new file mode 100644
index 00000000..01a15497
--- /dev/null
+++ b/etl/customers/thrive/Project codes.py
@@ -0,0 +1,130 @@
+"""
+THis script will take the standardised asset list and append on the project codes.
+We also, review the existing install status, in case anything is wrong
+"""
+import pandas as pd
+import numpy as np
+
+standardised_asset_list = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
+ "Complete - Updated May 2025 - Standardised.xlsx",
+ sheet_name="Standardised Asset List",
+)
+
+project_code_allocations = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Master Tracker (Thrive - "
+ "Warmfront).xlsx",
+ sheet_name="Master Tracker",
+ header=1
+)
+
+programme_codes = project_code_allocations[
+ ["UPRN", "Proposed Progamme", "New Proposed Programme", "Warmfront Survey Notes", ]
+].copy()
+programme_codes["programme_reference"] = programme_codes["New Proposed Programme"].copy()
+programme_codes["programme_reference"] = np.where(
+ pd.isnull(programme_codes["programme_reference"]),
+ programme_codes["Proposed Progamme"],
+ programme_codes["programme_reference"]
+)
+
+PROJECT_CODE_MAP = {
+ 'Phase 2': "THRIVE-002",
+ 'Phase 3': "THRIVE-003",
+ 'Phase 4': "THRIVE-004",
+ 'Phase 5': "THRIVE-005",
+ 'Phase 6': "THRIVE-006",
+ 'Phase 7': "THRIVE-007",
+ 'Phase 8': "THRIVE-008",
+ 'Phase 9': "THRIVE-009",
+ 'Phase 10': "THRIVE-010",
+ "Week 1": "THRIVE-WEEK-001",
+ "Week 2": "THRIVE-WEEK-002",
+ "Week 4": "THRIVE-WEEK-004",
+ "Week 7": "THRIVE-WEEK-007",
+}
+programme_codes["project_code"] = programme_codes["programme_reference"].map(PROJECT_CODE_MAP)
+
+thrive_notes = project_code_allocations[["UPRN", "Thrive Notes", "Priority", "Notes Reply (Thrive)"]].copy()
+
+standardised_asset_list = standardised_asset_list.merge(
+ programme_codes[["UPRN", "project_code", "programme_reference"]],
+ how="left",
+ left_on="landlord_property_id",
+ right_on="UPRN",
+).merge(
+ thrive_notes[["UPRN", "Thrive Notes", "Priority", "Notes Reply (Thrive)"]],
+ how="left",
+ on="UPRN",
+)
+
+standardised_asset_list = standardised_asset_list.drop(columns=["UPRN"])
+
+# We fill the project code for historical completions
+standardised_asset_list["project_code"] = np.where(
+ pd.isnull(standardised_asset_list["project_code"]) & (
+ standardised_asset_list["hubspot_status"] != "READY TO BE SCHEDULED"
+ ) & (
+ ~pd.isnull(standardised_asset_list["hubspot_status"])
+ ),
+ "THRIVE-HISTORICAL",
+ standardised_asset_list["project_code"]
+)
+
+# Store as an excel
+filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Programme - "
+ "reconciled.xlsx")
+# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
+# Other tabs:
+block_analysis = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
+ "Complete - Updated May 2025 - Standardised.xlsx",
+ sheet_name="Block Analysis",
+)
+outcomes = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
+ "Complete - Updated May 2025 - Standardised.xlsx",
+ sheet_name="Outcomes",
+)
+unmatched_submissions = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
+ "Complete - Updated May 2025 - Standardised.xlsx",
+ sheet_name="Unmatched Submissions",
+)
+unmatched_ecosurv = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
+ "Complete - Updated May 2025 - Standardised.xlsx",
+ sheet_name="Unmatched Ecosurv",
+)
+
+with pd.ExcelWriter(filename) as writer:
+ standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
+ block_analysis.to_excel(writer, sheet_name="Block Analysis", index=False)
+ # If we have outcomes, we add a tab with the outcomes
+ outcomes.to_excel(writer, sheet_name="Outcomes", index=False)
+ unmatched_submissions.to_excel(writer, sheet_name="Unmatched Submissions", index=False)
+ unmatched_ecosurv.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False)
+
+# A check, just comparing against the master tracker to make sure I have all of the installs
+asset_list = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
+ "Complete - Updated May 2025 - Standardised.xlsx",
+ sheet_name="Standardised Asset List",
+)
+
+master_tracker = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Master Tracker (Thrive - "
+ "Warmfront).xlsx",
+ sheet_name="Master Tracker",
+ header=1
+)
+
+df = asset_list[["landlord_property_id", "hubspot_status"]].merge(
+ master_tracker[~pd.isnull(master_tracker['Date Completed'])][["UPRN", "Date Completed"]],
+ how="inner",
+ left_on="landlord_property_id",
+ right_on="UPRN"
+)
+
+df["hubspot_status"].value_counts()
+df[df["hubspot_status"] == "SUBMITTED TO INSTALLER"]
diff --git a/recommendations/tests/test_lighting_recommendations.py b/recommendations/tests/test_lighting_recommendations.py
index dbb621e7..5fb914a8 100644
--- a/recommendations/tests/test_lighting_recommendations.py
+++ b/recommendations/tests/test_lighting_recommendations.py
@@ -49,6 +49,6 @@ class TestLightingRecommendations:
'lighting in all '
'fixed outlets',
'low-energy-lighting': 100},
- 'total': 240.24, 'subtotal': 200.20000000000002, 'vat': 40.040000000000006, 'contingency': 14.3,
- 'preliminaries': 14.3, 'material': 80.0, 'profit': 28.6, 'labour_hours': 3.2, 'labour_days': 0.4,
- 'labour_cost': 63.0, 'survey': False}]
+ 'total': 188.76000000000002, 'subtotal': 157.3, 'vat': 31.460000000000004, 'contingency': 14.3,
+ 'material': 80.0, 'labour_hours': 3.2, 'labour_days': 0.4, 'labour_cost': 63.0, 'survey': False}
+ ]