From 3cfe938e273ab6e75a54ced3da5f970fd9c658eb Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 13 Apr 2025 21:39:35 +0100 Subject: [PATCH] adding matcing from sumissions sheet to asset list --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- asset_list/AssetList.py | 142 +++++++++++-- asset_list/app.py | 36 ++++ asset_list/mappings/built_form.py | 39 +++- asset_list/mappings/heating_systems.py | 42 +++- asset_list/mappings/property_type.py | 29 ++- asset_list/mappings/roof.py | 3 +- asset_list/mappings/walls.py | 11 +- backend/Property.py | 5 +- backend/app/plan/schemas.py | 3 +- etl/customers/bromford/data_cleanup.py | 192 ++++++++++++++++++ etl/customers/remote_assessments/app.py | 33 +-- .../ha_15_32/ha_analysis_batch_3.py | 5 +- recommendations/Recommendations.py | 2 +- recommendations/RoofRecommendations.py | 8 +- 16 files changed, 509 insertions(+), 45 deletions(-) create mode 100644 etl/customers/bromford/data_cleanup.py diff --git a/.idea/Model.iml b/.idea/Model.iml index df6c4faa..96ad7a95 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 50cad4ca..fb10c6b0 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index 0dedc1fd..48ea22f4 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -4,6 +4,8 @@ import re import tiktoken from pprint import pprint from datetime import datetime + +from docutils.utils.math.tex2mathml_extern import blahtexml from openai import OpenAI import numpy as np import pandas as pd @@ -663,7 +665,10 @@ class AssetList: non_intrusive_columns.append(self.NON_INTRUSIVES_ELIGIBILITY_COLUMN) if self.old_format_non_intrusives_present: - non_intrusive_columns = self.OLD_FORMAT_NON_INTRUSIVE_COLNAMES + # We check if we have the ECO Eligibility column, which we might not have + non_intrusive_columns = [ + c for c in self.OLD_FORMAT_NON_INTRUSIVE_COLNAMES if c in self.standardised_asset_list.columns + ] self.keep_variables += non_intrusive_columns @@ -731,7 +736,7 @@ class AssetList: 'PIMSS EMPTY' ] - if pd.isnull(date_str) or date_str in known_errors: + if pd.isnull(date_str) or date_str in known_errors or (date_str == 0): return None if isinstance(date_str, str): @@ -752,6 +757,10 @@ class AssetList: if isinstance(date_str, datetime): return date_str.year + if isinstance(date_str, float): + if str(int(date_str)).isdigit() & (len(str(int(date_str))) == 4): + return int(date_str) + # Check if date_str is a year itself if str(date_str).isdigit() & (len(str(date_str)) == 4): return int(date_str) @@ -1325,7 +1334,7 @@ class AssetList: ) self.standardised_asset_list["solar_landlord_data_indicates_needs_heating_upgrade"] = ( self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin( - ["electric storage heaters", "room heaters", "electric radiators"] + ["electric storage heaters", "room heaters", "electric radiators", "no heating"] ) ) @@ -2099,6 +2108,9 @@ class AssetList: nomatch = [] for _, x in tqdm(self.outcomes.iterrows(), total=len(self.outcomes)): + if pd.isnull(x[outcomes_address]): + continue + # Check if we have an id oid = x[outcomes_id] if outcomes_id is not None else None @@ -2120,6 +2132,8 @@ class AssetList: address_clean = x[outcomes_address].lower().replace(",", "").replace(" ", " ") + self.outcomes["Outcome"] = self.outcomes["Outcome"].str.lower() + matched = self.standardised_asset_list[ (self.standardised_asset_list[ self.STANDARD_FULL_ADDRESS @@ -2140,7 +2154,9 @@ class AssetList: ].copy() if not matched.empty: matched["houseno"] = matched.apply( - lambda x: SearchEpc.get_house_number(x[self.STANDARD_ADDRESS_1], x[self.STANDARD_POSTCODE]), + lambda x: SearchEpc.get_house_number( + str(x[self.STANDARD_ADDRESS_1]), str(x[self.STANDARD_POSTCODE]) + ), axis=1 ) @@ -2155,8 +2171,6 @@ class AssetList: } ) continue - elif matched.shape[0] > 1: - raise NotImplementedError("Check me") elif not matched.empty: # Use levenstein distance to match matched["address"] = matched[self.STANDARD_ADDRESS_1] + " " + matched[self.STANDARD_POSTCODE] @@ -2254,19 +2268,123 @@ class AssetList: "SUBMISSION DATE" if "SUBMISSION DATE" in master_data.columns else "SUBMISSION DATE TO INSTALLERS" ) - # We just need to check if any were cancelled - master_to_append = master_data[ - ["UPRN", install_col, submission_col] - ].rename( + if "UPRN" in master_data.columns: + # We just need to check if any were cancelled + master_to_append = master_data[ + ["UPRN", install_col, submission_col] + ].rename( + columns={ + "UPRN": self.STANDARD_LANDLORD_PROPERTY_ID, + install_col: "survey_status", + submission_col: "submission_date" + } + ) + master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel") + + master_surveyed.append(master_to_append) + continue + + master_data["row_id"] = master_data.index + + self.standardised_asset_list["house_no"] = self.standardised_asset_list.apply( + lambda x: SearchEpc.get_house_number( + str(x[self.STANDARD_ADDRESS_1]), str(x[self.STANDARD_POSTCODE]) + ), + axis=1 + ) + + # Otherwise, we need to match algorithmically + logger.info("Matching master data to asset list") + matched = [] + unmatched = [] + for _, row in tqdm(master_data.iterrows(), total=len(master_data)): + if pd.isnull(row["POSTCODE"]): + continue + postcode_no_space = row["POSTCODE"].strip().replace(" ", "").lower() + + df = self.standardised_asset_list[ + ( + self.standardised_asset_list[self.STANDARD_POSTCODE].str.strip().str.lower().str.replace(" ", + "") + == postcode_no_space + ) + ] + + house_no = row["NO"] + + if house_no in df["house_no"].values: + df = df[df["house_no"] == house_no] + if df.shape[0] != 1: + # Levenstein distance + + if any(df[self.STANDARD_FULL_ADDRESS].str.contains(row["Street / Block Name"])): + df = df[ + df[self.STANDARD_FULL_ADDRESS].str.contains(row["Street / Block Name"]) + ] + else: + # Levenstein distance + df = df[ + df[self.STANDARD_FULL_ADDRESS].str.lower().apply( + lambda x: process.extractOne( + " ".join([row["NO"], row["Street / Block Name"], row["TOWN"]]).lower(), + x + )[1] + ) > 90 + ] + + if df.shape[0] == 0: + unmatched.append(row["row_id"]) + continue + + if any(df[self.STANDARD_FULL_ADDRESS].str.lower().str.contains( + " ".join([row["NO"], row["Street / Block Name"]]).lower() + )): + df = df[ + df[self.STANDARD_FULL_ADDRESS].str.lower().str.contains( + " ".join([row["NO"], row["Street / Block Name"]]).lower() + ) + ] + + if any( + df[self.STANDARD_PROPERTY_TYPE].str.contains( + row["PROPERTY TYPE As per table emailed"].split(" ")[-1].lower() + ) + ): + # We ignore "block of flats" entries + df = df[ + df[self.STANDARD_PROPERTY_TYPE].str.contains( + row["PROPERTY TYPE As per table emailed"].split(" ")[-1].lower() + ) & (df[self.STANDARD_PROPERTY_TYPE] != "block of flats") + ] + + if df.shape[0] != 1: + # We have multiple matches + raise NotImplementedError("FIX ME") + matched.append( + { + "row_id": row["row_id"], + self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0], + } + ) + + self.standardised_asset_list = self.standardised_asset_list.drop(columns="house_no") + + # We match the "UPRN" which is the landlords ID, onto the master sheet + matched = pd.DataFrame(matched) + master_to_append = master_data[["row_id", install_col, submission_col]].merge( + matched, how="left", on="row_id" + ).rename( columns={ - "UPRN": self.STANDARD_LANDLORD_PROPERTY_ID, install_col: "survey_status", submission_col: "submission_date" } ) master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel") - master_surveyed.append(master_to_append) + unmatched_df = master_data[ + master_data["row_id"].isin(unmatched) + ] + submissions_unmatched.append(unmatched_df) master_surveyed = pd.concat(master_surveyed) master_surveyed = master_surveyed[~pd.isnull(master_surveyed[self.STANDARD_LANDLORD_PROPERTY_ID])] diff --git a/asset_list/app.py b/asset_list/app.py index ae4b3cef..ee74b337 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -89,6 +89,42 @@ def app(): # - We want: fully insulated property (all wall types), EPC D or below (floors should be solid) # - Or the insulation required is loft/cavity (floors should be solid) + # Bromford + data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme " + "Rebuild/Prepared data/") + data_filename = "asset_list.xlsx" + sheet_name = "Sheet1" + postcode_column = 'PostCode' + fulladdress_column = "FullAddress" + address1_column = None + address1_method = "house_number_extraction" + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = "ConYear" + landlord_os_uprn = None + landlord_property_type = "AssetTypeDesc" + landlord_built_form = "PropTypeDesc" + landlord_wall_construction = "Construction type" + landlord_roof_construction = None + landlord_heating_system = "Heating Type" + landlord_existing_pv = None + landlord_property_id = "Asset" + landlord_sap = None + outcomes_filename = "outcomes.xlsx" + outcomes_sheetname = "Sheet1" + outcomes_postcode = "Postcode" + outcomes_houseno = "No" + outcomes_id = None + outcomes_address = "Address" + master_filepaths = [ + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared data/ECO " + "3 submissions.csv", + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared data/ECO " + "4 submissions.csv", + ] + master_to_asset_list_filepath = None + phase = False + # Torus data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Torus/Phase 1" data_filename = "Torus Property Asset List - Phase 1.xlsx" diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py index cabd970e..e103f794 100644 --- a/asset_list/mappings/built_form.py +++ b/asset_list/mappings/built_form.py @@ -107,5 +107,42 @@ BUILT_FORM_MAPPINGS = { 'Semi-detached': 'semi-detached', 'Detached': 'detached', 'Flat / maisonette': 'unknown', - '2014 onwards': 'unknown' + '2014 onwards': 'unknown', + + 'Semi Detached': 'semi-detached', + 'End Terraced': 'end-terrace', + 'Basement': 'basement', + 'No': 'unknown', + 'Mid Terrace': 'mid-terrace', + 'Link Detached': 'detached', + 'Mid Terraced': 'mid-terrace', + 'Ground Floor': 'ground floor', + 'End Terrace': 'end-terrace', + 'Sheltrd Semi Det': 'semi-detached', + 'Shop': 'unknown', + 'Fourth Floor': 'mid-floor', + 'Terraced': 'mid-terrace', + 'Leasehold Terr': 'mid-terrace', + 'Room': 'unknown', + 'Second Floor': 'mid-floor', + 'Third Floor': 'mid-floor', + 'Office': 'unknown', + 'First Floor Over Arch': 'ground floor', + '16-25 IND-PPL': 'unknown', + 'Seventh Floor': 'top-floor', + 'Sheltered': 'unknown', + 'Shelt Bung End': 'end-terrace', + 'Room In Shared Accommodation': 'unknown', + 'Sheltred Bung Terrace': 'mid-terrace', + 'Garage In Block': 'unknown', + 'First Floor': 'ground floor', + 'First Floor Over Garage': 'ground floor', + 'Leasehold': 'unknown', + 'Sheltred Bung': 'unknown', + 'Garage': 'unknown', + 'Sixth Floor': 'top-floor', + 'Sheltered Bung': 'semi-detached', + 'Guest': 'unknown', + 'Fifth Floor': 'mid-floor' + } diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py index 42326575..7f2f81f2 100644 --- a/asset_list/mappings/heating_systems.py +++ b/asset_list/mappings/heating_systems.py @@ -23,7 +23,8 @@ STANDARD_HEATING_SYSTEMS = { 'gas combi boiler', 'unknown', "electric ceiling", - "electric underfloor" + "electric underfloor", + "no heating" } HEATING_MAPPINGS = { @@ -87,7 +88,7 @@ HEATING_MAPPINGS = { 'Heat pump (air) Electricity': 'air source heat pump', 'Room heaters Electricity': 'electric radiators', 'Room heaters Oil': 'room heaters', - 'No heating system ND': 'unknown', + 'No heating system ND': 'no heating', 'Heat pump (wet) Electricity': 'ground source heat pump', 'Room heaters Biomass': 'room heaters', 'ND Solid fuel': 'unknown', @@ -98,11 +99,11 @@ HEATING_MAPPINGS = { 'Storage heating Electricity': 'electric storage heaters', 'ND Electricity': 'unknown', 'Community heating Community (non-gas)': 'district heating', - 'No heating system N/A': 'unknown', + 'No heating system N/A': 'no heating', 'Boiler Solid fuel': 'boiler - other fuel', 'Community heating Community (mains gas)': 'communal gas boiler', 'Boiler Biomass': 'boiler - other fuel', - 'No heating system Mains gas': 'unknown', + 'No heating system Mains gas': 'no heating', 'Storage heaters': 'electric storage heaters', 'Air Source': 'air source heat pump', @@ -170,5 +171,36 @@ HEATING_MAPPINGS = { 'Heat pump (wet)': 'air source heat pump', 'Electric ceiling heating': 'electric ceiling', 'Electric under floor heating': 'electric underfloor', - 'Community heating': 'district heating' + 'Community heating': 'district heating', + + 'Wet - Radiators Air Source Heat Pump': 'air source heat pump', + 'Wet - Radiators Electric': 'electric boiler', + 'Storage Heaters': 'high heat retention storage heaters', + 'Wet - Radiators Oil': 'oil boiler', + 'Communal Wet - Radiators Gas': 'communal gas boiler', + 'Electric - Storage/Panel Heaters Electric': 'electric storage heaters', + 'Gas Central Heating': 'gas combi boiler', + 'Wet - Radiators Solar': 'other', + 'Electric - Storage/Panel Heaters LPG': 'electric storage heaters', + 'No Heating Solid': 'no heating', + 'Wet - Underfloor Gas': 'gas condensing boiler', + 'No Heating Electric': 'no heating', + 'Oil Fired Central Heating': 'oil boiler', + 'Warm Air Gas': 'other', + 'Communal Boilers': 'communal gas boiler', + 'Wet - Radiators Gas': 'gas combi boiler', + 'Wet - Radiators Solid': 'solid fuel', + 'Wet - Radiators LPG': 'other', + 'No Heating Gas': 'no heating', + 'No Heating': 'no heating', + 'Panel Heaters': 'electric radiators', + 'Rointe Electric Heating': 'electric storage heaters', + 'Underfloor Heating': 'electric underfloor', + 'Air Source Heating': 'air source heat pump', + 'Warm Air Electric': 'other', + 'Communal Wet - Radiators Electric': 'communal gas boiler', + 'Wet - Underfloor Solar': 'other', + 'No Heating Required Gas': 'unknown', + 'Electric - Storage/Panel Heaters Gas': 'electric storage heaters', + 'Electric - Storage/Panel Heaters Solid': 'electric storage heaters' } diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py index f208081a..dc8dbf21 100644 --- a/asset_list/mappings/property_type.py +++ b/asset_list/mappings/property_type.py @@ -151,5 +151,32 @@ PROPERTY_MAPPING = { 'Flat: Enclosed End Terrace: Mid Floor': 'flat', 'Flat: Enclosed End Terrace: Ground Floor': 'flat', 'Flat: Enclosed Mid Terrace: Top Floor': 'flat', - '2013 onwards': 'unknown' + '2013 onwards': 'unknown', + + 'House 2 Storey': 'house', + 'Bung': 'bungalow', + 'House 3 Storey': 'house', + 'Shared Flat': 'flat', + 'd': 'unknown', + 'Mais': 'maisonette', + 'e': 'unknown', + 'Shared House': 'house', + 'House 4 Storey': 'house', + 'Shared Bungalow': 'bungalow', + 'Detch': 'house', + 'Shop': 'other', + 'Terr': 'house', + 'Terrace': 'house', + 'Description': 'unknown', + 'Hse': 'house', + 'Room': 'other', + 'Office': 'other', + 'Room In Shared Accommodation': 'other', + 'Apartment': 'flat', + 'm': 'unknown', + 'Garage': 'other', + 'Parking Space': 'other', + 'Community Centre': 'other', + 'Communal Facility': 'other', + 'Semi': 'house' } diff --git a/asset_list/mappings/roof.py b/asset_list/mappings/roof.py index b98a773c..a95f0529 100644 --- a/asset_list/mappings/roof.py +++ b/asset_list/mappings/roof.py @@ -22,5 +22,6 @@ ROOF_CONSTRUCTION_MAPPINGS = { 'ND (inferred)': 'unknown', '2018 onwards': 'unknown', 'Pitched (vaulted ceiling)': 'pitched insulated', - np.nan: "unknown" + np.nan: "unknown", + None: "unknown" } diff --git a/asset_list/mappings/walls.py b/asset_list/mappings/walls.py index 128e84af..c327338a 100644 --- a/asset_list/mappings/walls.py +++ b/asset_list/mappings/walls.py @@ -157,5 +157,14 @@ WALL_CONSTRUCTION_MAPPINGS = { 'Timber frame': 'timber frame unknown insulation', '2017 onwards': 'new build - average thermal transmittance', 'ND (inferred)': 'unknown', - 'Flat / maisonette': 'other' + 'Flat / maisonette': 'other', + + 'Other': 'other', + 'Timber Frame': 'timber frame unknown insulation', + 'Cavity Wall': 'cavity unknown insulation', + 'Non-Traditional': 'system built', + 'PRC': 'system built', + 'Cross Wall': 'system built', + 'Solid Wall': 'solid brick unknown insulation', + 'Traditional': 'other' } diff --git a/backend/Property.py b/backend/Property.py index 424242fd..52e8c213 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -107,7 +107,10 @@ class Property: # cost and instead, provide a message that the measure has already been installed self.already_installed = ast.literal_eval(already_installed['already_installed']) if already_installed else [] - self.non_invasive_recommendations = non_invasive_recommendations + self.non_invasive_recommendations = ( + non_invasive_recommendations['recommendations'] if + non_invasive_recommendations else [] + ) # This is a list of measures that have been recommended for the property if isinstance(measures, list): self.measures = measures diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py index 4237472d..5db3d4d1 100644 --- a/backend/app/plan/schemas.py +++ b/backend/app/plan/schemas.py @@ -83,7 +83,8 @@ class PlanTriggerRequest(BaseModel): exclusions: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1) inclusions: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1) # This is a list of measures that we want to be included, if they are options - required_measures: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1) + # Default to empty + required_measures: Optional[List[InclusionOrExclusionItem]] = Field(default=[], min_length=1) scenario_name: Optional[str] = "" multi_plan: Optional[bool] = False diff --git a/etl/customers/bromford/data_cleanup.py b/etl/customers/bromford/data_cleanup.py new file mode 100644 index 00000000..45429523 --- /dev/null +++ b/etl/customers/bromford/data_cleanup.py @@ -0,0 +1,192 @@ +""" +12th April 2025 +This script attempts to clean up the various pieces of data we have for Bromford, with the intention of producing a +standardised asset list +""" + +import pandas as pd + +# Step 1 +# The inspectons data is spread across three different files. We attempt to produce one finalised asset list, with +# comprehensive inspections + +# Primary asset list +asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Bromford Asset " + "List.xlsx", + sheet_name="Asset List" +) + +# +inspections_1 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Inspections/BROMFORD " + "MDS.xlsx", + sheet_name="Data list" +) +inspections_1["Heating Type"] = (inspections_1["Heating Type"] + " " + inspections_1["Heating fuel"]).str.strip() + +inspections_2 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Inspections/BROMFORD " + "MERLIN LANE.xlsx", + sheet_name="Report" +) +inspections_2["AssetTypeDesc"] = inspections_2["PropertyType"].str.split(" ").str[-1] +inspections_2["PropTypeDesc"] = inspections_2["PropertyType"].str.split(" ").str[:-1].str.join(" ") + +inspections_3 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Inspections/BROMFORD " + "SEVERN VALE - KLARKE.xlsx", + sheet_name="Asset report" +) + +inspections_3["FullAddress"] = inspections_3["T1_Address1"] + ", " + inspections_3["T1_Address2"] + +# On inspections 3, we have multiple sheets which describe the heating +heating_systems = [] +for sheet_name in [ + "Storage Heaters", "No Heating", "Underfloor Heating", "Rointe Electric Heating", "Air Source Heating", + "Gas Central Heating", "Electric Boiler", "Oil Fired Central Heating", + "Communal Boilers", "Panel Heaters" +]: + df = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme " + "Rebuild/Inspections/BROMFORD " + "SEVERN VALE - KLARKE.xlsx", + sheet_name=sheet_name + ) + df = df[["UPRN"]] + df["Heating Type"] = sheet_name + heating_systems.append(df) + +heating_systems = pd.concat(heating_systems) +# We have no clue which one is correct, we have some dupes +heating_systems = heating_systems.drop_duplicates("UPRN") +heating_systems = heating_systems.rename(columns={"UPRN": "Asset"}) +heating_systems["Asset"] = heating_systems["Asset"].astype(int) + +inspections_3 = inspections_3.merge(heating_systems, how="left", on="Asset") + +# Create a consolidated inspections sheet +inspections = pd.concat( + [ + inspections_1[["Asset", "Construction type", 'Heating Type', "WFT Findings", "Eligibility (Red/Yellow/Green)"]], + inspections_2[["Asset", "Construction type", "WFT Findings", "Eligibility (Red/Yellow/Green)"]], + inspections_3[["Asset", 'Heating Type', "WFT Findings", "Eligibility (Red/Yellow/Green)"]], + ] +) + +inspections_address_data = pd.concat( + [ + inspections_1[ + ["Asset", "FullAddress", "PostCode", "ConYear", "Beds", "AssetTypeDesc", "PropTypeDesc", 'ManAreaDesc', ] + ], + inspections_2[ + ['Asset', 'FullAddress', 'AccomType', "AssetTypeDesc", "PropTypeDesc", 'ConYear', 'Postcode'] + ].rename(columns={"Postcode": "PostCode"}), + inspections_3[ + ['Asset', "FullAddress", 'T1_Postcode', 'T1_Build Year', 'T1_AssetType'] + ].rename( + columns={"T1_Postcode": "PostCode", "T1_Build Year": "ConYear", "T1_AssetType": "AssetTypeDesc"} + ), + ] +) + +# Remove some error values +inspections = inspections[~inspections["Asset"].isin( + [ + "They're all green partial fill they're all green this", + "South Staffordshire District Council", + 'Blk Milton Crt F9-10, Perton, Wolverhampton' + ] +)] + +inspections["Asset"] = inspections["Asset"].astype(str) +asset_list["Asset"] = asset_list["Asset"].astype(str) +inspections_address_data["Asset"] = inspections_address_data["Asset"].astype(str) +inspections['WFT Findings'] = inspections['WFT Findings'].replace(r'^\s*$', pd.NA, regex=True) + +# We have some cases where the inspetions data has dupes on Asset (the ID column). We take the instance that is +# populated +inspections = inspections.sort_values(by='WFT Findings', na_position='last') +inspections = inspections.drop_duplicates(subset='Asset', keep='first') + +# We have dupes in the asset list +asset_list = asset_list.drop_duplicates("Asset") + +# Merge on +missed_asset_ids = inspections[ + ~inspections["Asset"].isin(asset_list["Asset"].values) +]["Asset"].values + +missed_assets = inspections_address_data[ + inspections_address_data["Asset"].isin(missed_asset_ids) +] +missed_assets = missed_assets.drop_duplicates("Asset") + +# We produce a larger asset list +asset_list = pd.concat([asset_list, missed_assets]) + +asset_list = asset_list.merge( + inspections, how="left", on="Asset" +) +asset_list["WFT Findings"] = asset_list["WFT Findings"].fillna("No Inspections Note") + +# Store +# asset_list.to_excel( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared " +# "data/asset_list.xlsx" +# ) + +# We now prepare outcomes into a single file +pv_outcomes = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Bromford PV " + "Outcomes.csv", + encoding='cp1252' +) +pv_outcomes["measure_type"] = "solar" + +other_outcomes = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/(Bromford) " + "15.04.2024.xlsx", + sheet_name="ECO4 & GBIS", + header=1 +) +other_outcomes["measure_type"] = "cwi" + +combined_outcomes = pd.concat( + [ + other_outcomes[["NO", "ADDRESS", "POSTCODE", "WEEK COMMENCING", "OUTCOMES", "NOTES"]].rename( + columns={ + "NO": "No", "ADDRESS": "Address", "POSTCODE": "Postcode", "WEEK COMMENCING": "Week Commencing", + "OUTCOMES": "Outcome", "NOTES": "Notes" + } + ), + pv_outcomes[['No', 'Address', 'Postcode', "Week Commencing", "Outcome", "Notes"]] + ] +) + +# Store +# combined_outcomes.to_excel( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared " +# "data/outcomes.xlsx" +# ) + +# Submissions sheet - +eco3_submissions = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/ECO 3 Submissions.csv", + encoding='cp1252' +) +# Get rid of the unnamed columns +unnamed_columns = [c for c in eco3_submissions.columns if "Unnamed: " in c] +eco3_submissions = eco3_submissions.drop(columns=unnamed_columns) +# Store +eco3_submissions.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/ECO 3 submissions.csv", + index=False +) + +eco4_submissions = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/ECO 4 submissions.csv", +) + +same_cols = [c for c in eco4_submissions.columns if c in eco3_submissions.columns] diff --git a/etl/customers/remote_assessments/app.py b/etl/customers/remote_assessments/app.py index 7e15c1f4..a8805a71 100644 --- a/etl/customers/remote_assessments/app.py +++ b/etl/customers/remote_assessments/app.py @@ -4,7 +4,7 @@ from dotenv import load_dotenv from utils.s3 import save_csv_to_s3 from etl.find_my_epc.AssetListEpcData import AssetListEpcData -PORTFOLIO_ID = 140 +PORTFOLIO_ID = 141 USER_ID = 8 load_dotenv(dotenv_path="backend/.env") @@ -19,17 +19,20 @@ def app(): asset_list = [ { - "address": "Brow Cottage", - "postcode": "YO18 7PZ", - "uprn": 10007630752, - "property_type": "House", - "built_form": "Semi-Detached", + "address": "196 Merrow Street", + "postcode": "SE17 2NP", + "uprn": 200003423454, "patch": True }, { - "address": "Wyburn", - "postcode": "DT1 2LL", - "uprn": 100040630290 + "address": "65 Liverpool Grove", + "postcode": "SE17 2HP", + "uprn": 200003423194 + }, + { + "address": "2 Brettell Street", + "postcode": "SE17 2NZ", + "uprn": 200003423607 }, ] asset_list = pd.DataFrame(asset_list) @@ -71,12 +74,16 @@ def app(): valuation_data = [ { - "valuation": 469_000, - "uprn": 10007630752, + "valuation": 339_000, + "uprn": 200003423454, }, { - "valuation": 373_000, - "uprn": 100040630290 + "valuation": 374_000, + "uprn": 200003423194 + }, + { + "valuation": 719_000, + "uprn": 200003423607 }, ] # Store valuation data to s3 diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index e97f0202..76087a76 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1,7 +1,7 @@ import os import re import openpyxl -import Levenshtein +from fuzzywuzzy import fuzz from pathlib import Path import msgpack from datetime import datetime @@ -2771,7 +2771,8 @@ class DataLoader: match_to = [x.replace(" ", "") for x in match_to] # Perform matching between full key and match_to - distances = [Levenshtein.distance(matching_string, s) for s in match_to] + distances = [100 - fuzz.ratio(matching_string, s) for s in match_to] + best_match_index = distances.index(min(distances)) # We might want to consider a threshold for the distance, however for the momeny, # we don't consider this for the moment diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index 2e044e12..0e73cffe 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -635,7 +635,7 @@ class Recommendations: # By limiting here, we don't change the value in current_phase_values. This means that the # future recommendations won't have an impact that is too large li_sap_limit = RoofRecommendations.get_loft_insulation_sap_limit( - property_instance.data["roof-energy-eff"], property_instance.data["extension-count"] + property_instance.data["roof-energy-eff"], property_instance.roof["insulation_thickness"] ) if li_sap_limit is not None: property_phase_impact["sap"] = min(property_phase_impact["sap"], li_sap_limit) diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py index 5f9707d9..cd7f82c4 100644 --- a/recommendations/RoofRecommendations.py +++ b/recommendations/RoofRecommendations.py @@ -64,16 +64,16 @@ class RoofRecommendations: ) @classmethod - def get_loft_insulation_sap_limit(cls, roof_energy_eff, extension_count): + def get_loft_insulation_sap_limit(cls, roof_energy_eff, existing_thickness): """ Get the SAP limit for loft insulation :param roof_energy_eff: :return: """ - if extension_count == 0: - # No limit - return None + if str(existing_thickness).isdigit(): + if float(existing_thickness) >= 250: + return 0 if roof_energy_eff in ["Good", "Very Good"]: return 1