diff --git a/.idea/Model.iml b/.idea/Model.iml
index df6c4faa..96ad7a95 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
-
+
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 50cad4ca..fb10c6b0 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
-
+
diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py
index 0dedc1fd..48ea22f4 100644
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
@@ -4,6 +4,8 @@ import re
import tiktoken
from pprint import pprint
from datetime import datetime
+
+from docutils.utils.math.tex2mathml_extern import blahtexml
from openai import OpenAI
import numpy as np
import pandas as pd
@@ -663,7 +665,10 @@ class AssetList:
non_intrusive_columns.append(self.NON_INTRUSIVES_ELIGIBILITY_COLUMN)
if self.old_format_non_intrusives_present:
- non_intrusive_columns = self.OLD_FORMAT_NON_INTRUSIVE_COLNAMES
+ # We check if we have the ECO Eligibility column, which we might not have
+ non_intrusive_columns = [
+ c for c in self.OLD_FORMAT_NON_INTRUSIVE_COLNAMES if c in self.standardised_asset_list.columns
+ ]
self.keep_variables += non_intrusive_columns
@@ -731,7 +736,7 @@ class AssetList:
'PIMSS EMPTY'
]
- if pd.isnull(date_str) or date_str in known_errors:
+ if pd.isnull(date_str) or date_str in known_errors or (date_str == 0):
return None
if isinstance(date_str, str):
@@ -752,6 +757,10 @@ class AssetList:
if isinstance(date_str, datetime):
return date_str.year
+ if isinstance(date_str, float):
+ if str(int(date_str)).isdigit() & (len(str(int(date_str))) == 4):
+ return int(date_str)
+
# Check if date_str is a year itself
if str(date_str).isdigit() & (len(str(date_str)) == 4):
return int(date_str)
@@ -1325,7 +1334,7 @@ class AssetList:
)
self.standardised_asset_list["solar_landlord_data_indicates_needs_heating_upgrade"] = (
self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin(
- ["electric storage heaters", "room heaters", "electric radiators"]
+ ["electric storage heaters", "room heaters", "electric radiators", "no heating"]
)
)
@@ -2099,6 +2108,9 @@ class AssetList:
nomatch = []
for _, x in tqdm(self.outcomes.iterrows(), total=len(self.outcomes)):
+ if pd.isnull(x[outcomes_address]):
+ continue
+
# Check if we have an id
oid = x[outcomes_id] if outcomes_id is not None else None
@@ -2120,6 +2132,8 @@ class AssetList:
address_clean = x[outcomes_address].lower().replace(",", "").replace(" ", " ")
+ self.outcomes["Outcome"] = self.outcomes["Outcome"].str.lower()
+
matched = self.standardised_asset_list[
(self.standardised_asset_list[
self.STANDARD_FULL_ADDRESS
@@ -2140,7 +2154,9 @@ class AssetList:
].copy()
if not matched.empty:
matched["houseno"] = matched.apply(
- lambda x: SearchEpc.get_house_number(x[self.STANDARD_ADDRESS_1], x[self.STANDARD_POSTCODE]),
+ lambda x: SearchEpc.get_house_number(
+ str(x[self.STANDARD_ADDRESS_1]), str(x[self.STANDARD_POSTCODE])
+ ),
axis=1
)
@@ -2155,8 +2171,6 @@ class AssetList:
}
)
continue
- elif matched.shape[0] > 1:
- raise NotImplementedError("Check me")
elif not matched.empty:
# Use levenstein distance to match
matched["address"] = matched[self.STANDARD_ADDRESS_1] + " " + matched[self.STANDARD_POSTCODE]
@@ -2254,19 +2268,123 @@ class AssetList:
"SUBMISSION DATE" if "SUBMISSION DATE" in master_data.columns else "SUBMISSION DATE TO INSTALLERS"
)
- # We just need to check if any were cancelled
- master_to_append = master_data[
- ["UPRN", install_col, submission_col]
- ].rename(
+ if "UPRN" in master_data.columns:
+ # We just need to check if any were cancelled
+ master_to_append = master_data[
+ ["UPRN", install_col, submission_col]
+ ].rename(
+ columns={
+ "UPRN": self.STANDARD_LANDLORD_PROPERTY_ID,
+ install_col: "survey_status",
+ submission_col: "submission_date"
+ }
+ )
+ master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel")
+
+ master_surveyed.append(master_to_append)
+ continue
+
+ master_data["row_id"] = master_data.index
+
+ self.standardised_asset_list["house_no"] = self.standardised_asset_list.apply(
+ lambda x: SearchEpc.get_house_number(
+ str(x[self.STANDARD_ADDRESS_1]), str(x[self.STANDARD_POSTCODE])
+ ),
+ axis=1
+ )
+
+ # Otherwise, we need to match algorithmically
+ logger.info("Matching master data to asset list")
+ matched = []
+ unmatched = []
+ for _, row in tqdm(master_data.iterrows(), total=len(master_data)):
+ if pd.isnull(row["POSTCODE"]):
+ continue
+ postcode_no_space = row["POSTCODE"].strip().replace(" ", "").lower()
+
+ df = self.standardised_asset_list[
+ (
+ self.standardised_asset_list[self.STANDARD_POSTCODE].str.strip().str.lower().str.replace(" ",
+ "")
+ == postcode_no_space
+ )
+ ]
+
+ house_no = row["NO"]
+
+ if house_no in df["house_no"].values:
+ df = df[df["house_no"] == house_no]
+ if df.shape[0] != 1:
+ # Levenstein distance
+
+ if any(df[self.STANDARD_FULL_ADDRESS].str.contains(row["Street / Block Name"])):
+ df = df[
+ df[self.STANDARD_FULL_ADDRESS].str.contains(row["Street / Block Name"])
+ ]
+ else:
+ # Levenstein distance
+ df = df[
+ df[self.STANDARD_FULL_ADDRESS].str.lower().apply(
+ lambda x: process.extractOne(
+ " ".join([row["NO"], row["Street / Block Name"], row["TOWN"]]).lower(),
+ x
+ )[1]
+ ) > 90
+ ]
+
+ if df.shape[0] == 0:
+ unmatched.append(row["row_id"])
+ continue
+
+ if any(df[self.STANDARD_FULL_ADDRESS].str.lower().str.contains(
+ " ".join([row["NO"], row["Street / Block Name"]]).lower()
+ )):
+ df = df[
+ df[self.STANDARD_FULL_ADDRESS].str.lower().str.contains(
+ " ".join([row["NO"], row["Street / Block Name"]]).lower()
+ )
+ ]
+
+ if any(
+ df[self.STANDARD_PROPERTY_TYPE].str.contains(
+ row["PROPERTY TYPE As per table emailed"].split(" ")[-1].lower()
+ )
+ ):
+ # We ignore "block of flats" entries
+ df = df[
+ df[self.STANDARD_PROPERTY_TYPE].str.contains(
+ row["PROPERTY TYPE As per table emailed"].split(" ")[-1].lower()
+ ) & (df[self.STANDARD_PROPERTY_TYPE] != "block of flats")
+ ]
+
+ if df.shape[0] != 1:
+ # We have multiple matches
+ raise NotImplementedError("FIX ME")
+ matched.append(
+ {
+ "row_id": row["row_id"],
+ self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0],
+ }
+ )
+
+ self.standardised_asset_list = self.standardised_asset_list.drop(columns="house_no")
+
+ # We match the "UPRN" which is the landlords ID, onto the master sheet
+ matched = pd.DataFrame(matched)
+ master_to_append = master_data[["row_id", install_col, submission_col]].merge(
+ matched, how="left", on="row_id"
+ ).rename(
columns={
- "UPRN": self.STANDARD_LANDLORD_PROPERTY_ID,
install_col: "survey_status",
submission_col: "submission_date"
}
)
master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel")
-
master_surveyed.append(master_to_append)
+ unmatched_df = master_data[
+ master_data["row_id"].isin(unmatched)
+ ]
+ submissions_unmatched.append(unmatched_df)
master_surveyed = pd.concat(master_surveyed)
master_surveyed = master_surveyed[~pd.isnull(master_surveyed[self.STANDARD_LANDLORD_PROPERTY_ID])]
diff --git a/asset_list/app.py b/asset_list/app.py
index ae4b3cef..ee74b337 100644
--- a/asset_list/app.py
+++ b/asset_list/app.py
@@ -89,6 +89,42 @@ def app():
# - We want: fully insulated property (all wall types), EPC D or below (floors should be solid)
# - Or the insulation required is loft/cavity (floors should be solid)
+ # Bromford
+ data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme "
+ "Rebuild/Prepared data/")
+ data_filename = "asset_list.xlsx"
+ sheet_name = "Sheet1"
+ postcode_column = 'PostCode'
+ fulladdress_column = "FullAddress"
+ address1_column = None
+ address1_method = "house_number_extraction"
+ address_cols_to_concat = []
+ missing_postcodes_method = None
+ landlord_year_built = "ConYear"
+ landlord_os_uprn = None
+ landlord_property_type = "AssetTypeDesc"
+ landlord_built_form = "PropTypeDesc"
+ landlord_wall_construction = "Construction type"
+ landlord_roof_construction = None
+ landlord_heating_system = "Heating Type"
+ landlord_existing_pv = None
+ landlord_property_id = "Asset"
+ landlord_sap = None
+ outcomes_filename = "outcomes.xlsx"
+ outcomes_sheetname = "Sheet1"
+ outcomes_postcode = "Postcode"
+ outcomes_houseno = "No"
+ outcomes_id = None
+ outcomes_address = "Address"
+ master_filepaths = [
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared data/ECO "
+ "3 submissions.csv",
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared data/ECO "
+ "4 submissions.csv",
+ ]
+ master_to_asset_list_filepath = None
+ phase = False
+
# Torus
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Torus/Phase 1"
data_filename = "Torus Property Asset List - Phase 1.xlsx"
diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py
index cabd970e..e103f794 100644
--- a/asset_list/mappings/built_form.py
+++ b/asset_list/mappings/built_form.py
@@ -107,5 +107,42 @@ BUILT_FORM_MAPPINGS = {
'Semi-detached': 'semi-detached',
'Detached': 'detached',
'Flat / maisonette': 'unknown',
- '2014 onwards': 'unknown'
+ '2014 onwards': 'unknown',
+
+ 'Semi Detached': 'semi-detached',
+ 'End Terraced': 'end-terrace',
+ 'Basement': 'basement',
+ 'No': 'unknown',
+ 'Mid Terrace': 'mid-terrace',
+ 'Link Detached': 'detached',
+ 'Mid Terraced': 'mid-terrace',
+ 'Ground Floor': 'ground floor',
+ 'End Terrace': 'end-terrace',
+ 'Sheltrd Semi Det': 'semi-detached',
+ 'Shop': 'unknown',
+ 'Fourth Floor': 'mid-floor',
+ 'Terraced': 'mid-terrace',
+ 'Leasehold Terr': 'mid-terrace',
+ 'Room': 'unknown',
+ 'Second Floor': 'mid-floor',
+ 'Third Floor': 'mid-floor',
+ 'Office': 'unknown',
+ 'First Floor Over Arch': 'ground floor',
+ '16-25 IND-PPL': 'unknown',
+ 'Seventh Floor': 'top-floor',
+ 'Sheltered': 'unknown',
+ 'Shelt Bung End': 'end-terrace',
+ 'Room In Shared Accommodation': 'unknown',
+ 'Sheltred Bung Terrace': 'mid-terrace',
+ 'Garage In Block': 'unknown',
+ 'First Floor': 'ground floor',
+ 'First Floor Over Garage': 'ground floor',
+ 'Leasehold': 'unknown',
+ 'Sheltred Bung': 'unknown',
+ 'Garage': 'unknown',
+ 'Sixth Floor': 'top-floor',
+ 'Sheltered Bung': 'semi-detached',
+ 'Guest': 'unknown',
+ 'Fifth Floor': 'mid-floor'
+
}
diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py
index 42326575..7f2f81f2 100644
--- a/asset_list/mappings/heating_systems.py
+++ b/asset_list/mappings/heating_systems.py
@@ -23,7 +23,8 @@ STANDARD_HEATING_SYSTEMS = {
'gas combi boiler',
'unknown',
"electric ceiling",
- "electric underfloor"
+ "electric underfloor",
+ "no heating"
}
HEATING_MAPPINGS = {
@@ -87,7 +88,7 @@ HEATING_MAPPINGS = {
'Heat pump (air) Electricity': 'air source heat pump',
'Room heaters Electricity': 'electric radiators',
'Room heaters Oil': 'room heaters',
- 'No heating system ND': 'unknown',
+ 'No heating system ND': 'no heating',
'Heat pump (wet) Electricity': 'ground source heat pump',
'Room heaters Biomass': 'room heaters',
'ND Solid fuel': 'unknown',
@@ -98,11 +99,11 @@ HEATING_MAPPINGS = {
'Storage heating Electricity': 'electric storage heaters',
'ND Electricity': 'unknown',
'Community heating Community (non-gas)': 'district heating',
- 'No heating system N/A': 'unknown',
+ 'No heating system N/A': 'no heating',
'Boiler Solid fuel': 'boiler - other fuel',
'Community heating Community (mains gas)': 'communal gas boiler',
'Boiler Biomass': 'boiler - other fuel',
- 'No heating system Mains gas': 'unknown',
+ 'No heating system Mains gas': 'no heating',
'Storage heaters': 'electric storage heaters',
'Air Source': 'air source heat pump',
@@ -170,5 +171,36 @@ HEATING_MAPPINGS = {
'Heat pump (wet)': 'air source heat pump',
'Electric ceiling heating': 'electric ceiling',
'Electric under floor heating': 'electric underfloor',
- 'Community heating': 'district heating'
+ 'Community heating': 'district heating',
+
+ 'Wet - Radiators Air Source Heat Pump': 'air source heat pump',
+ 'Wet - Radiators Electric': 'electric boiler',
+ 'Storage Heaters': 'high heat retention storage heaters',
+ 'Wet - Radiators Oil': 'oil boiler',
+ 'Communal Wet - Radiators Gas': 'communal gas boiler',
+ 'Electric - Storage/Panel Heaters Electric': 'electric storage heaters',
+ 'Gas Central Heating': 'gas combi boiler',
+ 'Wet - Radiators Solar': 'other',
+ 'Electric - Storage/Panel Heaters LPG': 'electric storage heaters',
+ 'No Heating Solid': 'no heating',
+ 'Wet - Underfloor Gas': 'gas condensing boiler',
+ 'No Heating Electric': 'no heating',
+ 'Oil Fired Central Heating': 'oil boiler',
+ 'Warm Air Gas': 'other',
+ 'Communal Boilers': 'communal gas boiler',
+ 'Wet - Radiators Gas': 'gas combi boiler',
+ 'Wet - Radiators Solid': 'solid fuel',
+ 'Wet - Radiators LPG': 'other',
+ 'No Heating Gas': 'no heating',
+ 'No Heating': 'no heating',
+ 'Panel Heaters': 'electric radiators',
+ 'Rointe Electric Heating': 'electric storage heaters',
+ 'Underfloor Heating': 'electric underfloor',
+ 'Air Source Heating': 'air source heat pump',
+ 'Warm Air Electric': 'other',
+ 'Communal Wet - Radiators Electric': 'communal gas boiler',
+ 'Wet - Underfloor Solar': 'other',
+ 'No Heating Required Gas': 'unknown',
+ 'Electric - Storage/Panel Heaters Gas': 'electric storage heaters',
+ 'Electric - Storage/Panel Heaters Solid': 'electric storage heaters'
}
diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py
index f208081a..dc8dbf21 100644
--- a/asset_list/mappings/property_type.py
+++ b/asset_list/mappings/property_type.py
@@ -151,5 +151,32 @@ PROPERTY_MAPPING = {
'Flat: Enclosed End Terrace: Mid Floor': 'flat',
'Flat: Enclosed End Terrace: Ground Floor': 'flat',
'Flat: Enclosed Mid Terrace: Top Floor': 'flat',
- '2013 onwards': 'unknown'
+ '2013 onwards': 'unknown',
+
+ 'House 2 Storey': 'house',
+ 'Bung': 'bungalow',
+ 'House 3 Storey': 'house',
+ 'Shared Flat': 'flat',
+ 'd': 'unknown',
+ 'Mais': 'maisonette',
+ 'e': 'unknown',
+ 'Shared House': 'house',
+ 'House 4 Storey': 'house',
+ 'Shared Bungalow': 'bungalow',
+ 'Detch': 'house',
+ 'Shop': 'other',
+ 'Terr': 'house',
+ 'Terrace': 'house',
+ 'Description': 'unknown',
+ 'Hse': 'house',
+ 'Room': 'other',
+ 'Office': 'other',
+ 'Room In Shared Accommodation': 'other',
+ 'Apartment': 'flat',
+ 'm': 'unknown',
+ 'Garage': 'other',
+ 'Parking Space': 'other',
+ 'Community Centre': 'other',
+ 'Communal Facility': 'other',
+ 'Semi': 'house'
}
diff --git a/asset_list/mappings/roof.py b/asset_list/mappings/roof.py
index b98a773c..a95f0529 100644
--- a/asset_list/mappings/roof.py
+++ b/asset_list/mappings/roof.py
@@ -22,5 +22,6 @@ ROOF_CONSTRUCTION_MAPPINGS = {
'ND (inferred)': 'unknown',
'2018 onwards': 'unknown',
'Pitched (vaulted ceiling)': 'pitched insulated',
- np.nan: "unknown"
+ np.nan: "unknown",
+ None: "unknown"
}
diff --git a/asset_list/mappings/walls.py b/asset_list/mappings/walls.py
index 128e84af..c327338a 100644
--- a/asset_list/mappings/walls.py
+++ b/asset_list/mappings/walls.py
@@ -157,5 +157,14 @@ WALL_CONSTRUCTION_MAPPINGS = {
'Timber frame': 'timber frame unknown insulation',
'2017 onwards': 'new build - average thermal transmittance',
'ND (inferred)': 'unknown',
- 'Flat / maisonette': 'other'
+ 'Flat / maisonette': 'other',
+
+ 'Other': 'other',
+ 'Timber Frame': 'timber frame unknown insulation',
+ 'Cavity Wall': 'cavity unknown insulation',
+ 'Non-Traditional': 'system built',
+ 'PRC': 'system built',
+ 'Cross Wall': 'system built',
+ 'Solid Wall': 'solid brick unknown insulation',
+ 'Traditional': 'other'
}
diff --git a/backend/Property.py b/backend/Property.py
index 424242fd..52e8c213 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -107,7 +107,10 @@ class Property:
# cost and instead, provide a message that the measure has already been installed
self.already_installed = ast.literal_eval(already_installed['already_installed']) if already_installed else []
- self.non_invasive_recommendations = non_invasive_recommendations
+ self.non_invasive_recommendations = (
+ non_invasive_recommendations['recommendations'] if
+ non_invasive_recommendations else []
+ )
# This is a list of measures that have been recommended for the property
if isinstance(measures, list):
self.measures = measures
diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index 4237472d..5db3d4d1 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -83,7 +83,8 @@ class PlanTriggerRequest(BaseModel):
exclusions: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1)
inclusions: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1)
# This is a list of measures that we want to be included, if they are options
- required_measures: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1)
+ # Default to empty
+ required_measures: Optional[List[InclusionOrExclusionItem]] = Field(default=[], min_length=1)
scenario_name: Optional[str] = ""
multi_plan: Optional[bool] = False
diff --git a/etl/customers/bromford/data_cleanup.py b/etl/customers/bromford/data_cleanup.py
new file mode 100644
index 00000000..45429523
--- /dev/null
+++ b/etl/customers/bromford/data_cleanup.py
@@ -0,0 +1,192 @@
+"""
+12th April 2025
+This script attempts to clean up the various pieces of data we have for Bromford, with the intention of producing a
+standardised asset list
+"""
+
+import pandas as pd
+
+# Step 1
+# The inspectons data is spread across three different files. We attempt to produce one finalised asset list, with
+# comprehensive inspections
+
+# Primary asset list
+asset_list = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Bromford Asset "
+ "List.xlsx",
+ sheet_name="Asset List"
+)
+
+#
+inspections_1 = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Inspections/BROMFORD "
+ "MDS.xlsx",
+ sheet_name="Data list"
+)
+inspections_1["Heating Type"] = (inspections_1["Heating Type"] + " " + inspections_1["Heating fuel"]).str.strip()
+
+inspections_2 = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Inspections/BROMFORD "
+ "MERLIN LANE.xlsx",
+ sheet_name="Report"
+)
+inspections_2["AssetTypeDesc"] = inspections_2["PropertyType"].str.split(" ").str[-1]
+inspections_2["PropTypeDesc"] = inspections_2["PropertyType"].str.split(" ").str[:-1].str.join(" ")
+
+inspections_3 = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Inspections/BROMFORD "
+ "SEVERN VALE - KLARKE.xlsx",
+ sheet_name="Asset report"
+)
+
+inspections_3["FullAddress"] = inspections_3["T1_Address1"] + ", " + inspections_3["T1_Address2"]
+
+# On inspections 3, we have multiple sheets which describe the heating
+heating_systems = []
+for sheet_name in [
+ "Storage Heaters", "No Heating", "Underfloor Heating", "Rointe Electric Heating", "Air Source Heating",
+ "Gas Central Heating", "Electric Boiler", "Oil Fired Central Heating",
+ "Communal Boilers", "Panel Heaters"
+]:
+ df = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme "
+ "Rebuild/Inspections/BROMFORD "
+ "SEVERN VALE - KLARKE.xlsx",
+ sheet_name=sheet_name
+ )
+ df = df[["UPRN"]]
+ df["Heating Type"] = sheet_name
+ heating_systems.append(df)
+
+heating_systems = pd.concat(heating_systems)
+# We have no clue which one is correct, we have some dupes
+heating_systems = heating_systems.drop_duplicates("UPRN")
+heating_systems = heating_systems.rename(columns={"UPRN": "Asset"})
+heating_systems["Asset"] = heating_systems["Asset"].astype(int)
+
+inspections_3 = inspections_3.merge(heating_systems, how="left", on="Asset")
+
+# Create a consolidated inspections sheet
+inspections = pd.concat(
+ [
+ inspections_1[["Asset", "Construction type", 'Heating Type', "WFT Findings", "Eligibility (Red/Yellow/Green)"]],
+ inspections_2[["Asset", "Construction type", "WFT Findings", "Eligibility (Red/Yellow/Green)"]],
+ inspections_3[["Asset", 'Heating Type', "WFT Findings", "Eligibility (Red/Yellow/Green)"]],
+ ]
+)
+
+inspections_address_data = pd.concat(
+ [
+ inspections_1[
+ ["Asset", "FullAddress", "PostCode", "ConYear", "Beds", "AssetTypeDesc", "PropTypeDesc", 'ManAreaDesc', ]
+ ],
+ inspections_2[
+ ['Asset', 'FullAddress', 'AccomType', "AssetTypeDesc", "PropTypeDesc", 'ConYear', 'Postcode']
+ ].rename(columns={"Postcode": "PostCode"}),
+ inspections_3[
+ ['Asset', "FullAddress", 'T1_Postcode', 'T1_Build Year', 'T1_AssetType']
+ ].rename(
+ columns={"T1_Postcode": "PostCode", "T1_Build Year": "ConYear", "T1_AssetType": "AssetTypeDesc"}
+ ),
+ ]
+)
+
+# Remove some error values
+inspections = inspections[~inspections["Asset"].isin(
+ [
+ "They're all green partial fill they're all green this",
+ "South Staffordshire District Council",
+ 'Blk Milton Crt F9-10, Perton, Wolverhampton'
+ ]
+)]
+
+inspections["Asset"] = inspections["Asset"].astype(str)
+asset_list["Asset"] = asset_list["Asset"].astype(str)
+inspections_address_data["Asset"] = inspections_address_data["Asset"].astype(str)
+inspections['WFT Findings'] = inspections['WFT Findings'].replace(r'^\s*$', pd.NA, regex=True)
+
+# We have some cases where the inspetions data has dupes on Asset (the ID column). We take the instance that is
+# populated
+inspections = inspections.sort_values(by='WFT Findings', na_position='last')
+inspections = inspections.drop_duplicates(subset='Asset', keep='first')
+
+# We have dupes in the asset list
+asset_list = asset_list.drop_duplicates("Asset")
+
+# Merge on
+missed_asset_ids = inspections[
+ ~inspections["Asset"].isin(asset_list["Asset"].values)
+]["Asset"].values
+
+missed_assets = inspections_address_data[
+ inspections_address_data["Asset"].isin(missed_asset_ids)
+]
+missed_assets = missed_assets.drop_duplicates("Asset")
+
+# We produce a larger asset list
+asset_list = pd.concat([asset_list, missed_assets])
+
+asset_list = asset_list.merge(
+ inspections, how="left", on="Asset"
+)
+asset_list["WFT Findings"] = asset_list["WFT Findings"].fillna("No Inspections Note")
+
+# Store
+# asset_list.to_excel(
+# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared "
+# "data/asset_list.xlsx"
+# )
+
+# We now prepare outcomes into a single file
+pv_outcomes = pd.read_csv(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Bromford PV "
+ "Outcomes.csv",
+ encoding='cp1252'
+)
+pv_outcomes["measure_type"] = "solar"
+
+other_outcomes = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/(Bromford) "
+ "15.04.2024.xlsx",
+ sheet_name="ECO4 & GBIS",
+ header=1
+)
+other_outcomes["measure_type"] = "cwi"
+
+combined_outcomes = pd.concat(
+ [
+ other_outcomes[["NO", "ADDRESS", "POSTCODE", "WEEK COMMENCING", "OUTCOMES", "NOTES"]].rename(
+ columns={
+ "NO": "No", "ADDRESS": "Address", "POSTCODE": "Postcode", "WEEK COMMENCING": "Week Commencing",
+ "OUTCOMES": "Outcome", "NOTES": "Notes"
+ }
+ ),
+ pv_outcomes[['No', 'Address', 'Postcode', "Week Commencing", "Outcome", "Notes"]]
+ ]
+)
+
+# Store
+# combined_outcomes.to_excel(
+# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared "
+# "data/outcomes.xlsx"
+# )
+
+# Submissions sheet -
+eco3_submissions = pd.read_csv(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/ECO 3 Submissions.csv",
+ encoding='cp1252'
+)
+# Get rid of the unnamed columns
+unnamed_columns = [c for c in eco3_submissions.columns if "Unnamed: " in c]
+eco3_submissions = eco3_submissions.drop(columns=unnamed_columns)
+# Store
+eco3_submissions.to_csv(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/ECO 3 submissions.csv",
+ index=False
+)
+
+eco4_submissions = pd.read_csv(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/ECO 4 submissions.csv",
+)
+
+same_cols = [c for c in eco4_submissions.columns if c in eco3_submissions.columns]
diff --git a/etl/customers/remote_assessments/app.py b/etl/customers/remote_assessments/app.py
index 7e15c1f4..a8805a71 100644
--- a/etl/customers/remote_assessments/app.py
+++ b/etl/customers/remote_assessments/app.py
@@ -4,7 +4,7 @@ from dotenv import load_dotenv
from utils.s3 import save_csv_to_s3
from etl.find_my_epc.AssetListEpcData import AssetListEpcData
-PORTFOLIO_ID = 140
+PORTFOLIO_ID = 141
USER_ID = 8
load_dotenv(dotenv_path="backend/.env")
@@ -19,17 +19,20 @@ def app():
asset_list = [
{
- "address": "Brow Cottage",
- "postcode": "YO18 7PZ",
- "uprn": 10007630752,
- "property_type": "House",
- "built_form": "Semi-Detached",
+ "address": "196 Merrow Street",
+ "postcode": "SE17 2NP",
+ "uprn": 200003423454,
"patch": True
},
{
- "address": "Wyburn",
- "postcode": "DT1 2LL",
- "uprn": 100040630290
+ "address": "65 Liverpool Grove",
+ "postcode": "SE17 2HP",
+ "uprn": 200003423194
+ },
+ {
+ "address": "2 Brettell Street",
+ "postcode": "SE17 2NZ",
+ "uprn": 200003423607
},
]
asset_list = pd.DataFrame(asset_list)
@@ -71,12 +74,16 @@ def app():
valuation_data = [
{
- "valuation": 469_000,
- "uprn": 10007630752,
+ "valuation": 339_000,
+ "uprn": 200003423454,
},
{
- "valuation": 373_000,
- "uprn": 100040630290
+ "valuation": 374_000,
+ "uprn": 200003423194
+ },
+ {
+ "valuation": 719_000,
+ "uprn": 200003423607
},
]
# Store valuation data to s3
diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index e97f0202..76087a76 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1,7 +1,7 @@
import os
import re
import openpyxl
-import Levenshtein
+from fuzzywuzzy import fuzz
from pathlib import Path
import msgpack
from datetime import datetime
@@ -2771,7 +2771,8 @@ class DataLoader:
match_to = [x.replace(" ", "") for x in match_to]
# Perform matching between full key and match_to
- distances = [Levenshtein.distance(matching_string, s) for s in match_to]
+ distances = [100 - fuzz.ratio(matching_string, s) for s in match_to]
+
best_match_index = distances.index(min(distances))
# We might want to consider a threshold for the distance, however for the momeny,
# we don't consider this for the moment
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 2e044e12..0e73cffe 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -635,7 +635,7 @@ class Recommendations:
# By limiting here, we don't change the value in current_phase_values. This means that the
# future recommendations won't have an impact that is too large
li_sap_limit = RoofRecommendations.get_loft_insulation_sap_limit(
- property_instance.data["roof-energy-eff"], property_instance.data["extension-count"]
+ property_instance.data["roof-energy-eff"], property_instance.roof["insulation_thickness"]
)
if li_sap_limit is not None:
property_phase_impact["sap"] = min(property_phase_impact["sap"], li_sap_limit)
diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py
index 5f9707d9..cd7f82c4 100644
--- a/recommendations/RoofRecommendations.py
+++ b/recommendations/RoofRecommendations.py
@@ -64,16 +64,16 @@ class RoofRecommendations:
)
@classmethod
- def get_loft_insulation_sap_limit(cls, roof_energy_eff, extension_count):
+ def get_loft_insulation_sap_limit(cls, roof_energy_eff, existing_thickness):
"""
Get the SAP limit for loft insulation
:param roof_energy_eff:
:return:
"""
- if extension_count == 0:
- # No limit
- return None
+ if str(existing_thickness).isdigit():
+ if float(existing_thickness) >= 250:
+ return 0
if roof_energy_eff in ["Good", "Very Good"]:
return 1