diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index c74a0b1f..e8a9dfaa 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -331,6 +331,9 @@ class SearchEpc: if row["lmk-key"] not in seen and not seen.add(row["lmk-key"]) ] + if data: + api_response["msg"] = self.SUCCESS + return api_response["msg"] def filter_rows(self, rows, property_type=None, address=None): diff --git a/etl/route_march_data_pull/app.py b/etl/route_march_data_pull/app.py index ee6a46d3..57239989 100644 --- a/etl/route_march_data_pull/app.py +++ b/etl/route_march_data_pull/app.py @@ -4,6 +4,7 @@ from BaseUtility import Definitions import pandas as pd import numpy as np from tqdm import tqdm +from datetime import datetime from dotenv import load_dotenv from backend.SearchEpc import SearchEpc @@ -172,7 +173,10 @@ def extract_address1(asset_list, full_address_col, postcode_col, method="first_t def process_age_band(x, year_built_column): - year_built = float(x[year_built_column]) + if isinstance(x[year_built_column], datetime): + year_built = x[year_built_column].year + else: + year_built = float(x[year_built_column]) if pd.isnull(x["Property Age Band"]) or ( x["Property Age Band"] in Definitions.DATA_ANOMALY_MATCHES @@ -195,6 +199,12 @@ def process_age_band(x, year_built_column): if year_built < 2007: return "EPC Age Band is older than Year Built" + if x["Property Age Band"] == "England and Wales: 2012 onwards": + if year_built >= 2012: + return "EPC Age Band Matches Year Built" + if year_built < 2012: + return "EPC Age Band is older than Year Built" + if x["Property Age Band"] == "England and Wales: before 1900": if year_built < 1900: return "EPC Age Band Matches Year Built" @@ -206,7 +216,7 @@ def process_age_band(x, year_built_column): # so we extract the lower and upper date age_band = x["Property Age Band"].split(": ")[1] lower_date, upper_date = age_band.split("-") - if year_built <= float(upper_date) and year_built <= float(upper_date): + if year_built <= float(upper_date) and year_built >= float(lower_date): return "EPC Age Band Matches Year Built" if year_built > float(upper_date): @@ -269,28 +279,33 @@ def app(): # - Or the insulation required is loft/cavity (floors should be solid) # For Westward - # DATA_FOLDER = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Westward" - # DATA_FILENAME = "WESTWARD - completed list..xlsx" - # SHEET_NAME = "Sheet1" - # POSTCODE_COLUMN = "WFT EDIT Postcode" - # FULLADDRESS_COLUMN = "Address" - # ADDRESS1_COLUMN = None - # ADDRESS1_METHOD = "house_number_extraction" - # ADDRESS_COLS_TO_CONCAT = [] - # MISSING_POSTCODES_METHOD = None - # PROPERTY_YEAR_BUILT = "Build date" - # UPRN_COLUMN = "UPRN" - DATA_FOLDER = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester" - DATA_FILENAME = "Warmfront data- Colchester Borough Homes (Complete).xlsx" + DATA_FOLDER = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Westward" + DATA_FILENAME = "WESTWARD - completed list..xlsx" SHEET_NAME = "Sheet1" - POSTCODE_COLUMN = 'Full Address.1' - FULLADDRESS_COLUMN = "Full Address" + POSTCODE_COLUMN = "WFT EDIT Postcode" + FULLADDRESS_COLUMN = "Address" ADDRESS1_COLUMN = None - ADDRESS1_METHOD = "first_word" + ADDRESS1_METHOD = "house_number_extraction" ADDRESS_COLS_TO_CONCAT = [] MISSING_POSTCODES_METHOD = None - PROPERTY_YEAR_BUILT = "Build Date" - UPRN_COLUMN = None + PROPERTY_YEAR_BUILT = "Build date" + UPRN_COLUMN = "UPRN" + # If we have the non-intrusives data, this should be true + HAS_NON_INTRUSIVES = True + + # DATA_FOLDER = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester" + # DATA_FILENAME = "Warmfront data- Colchester Borough Homes (Complete).xlsx" + # SHEET_NAME = "Sheet1" + # POSTCODE_COLUMN = 'Full Address.1' + # FULLADDRESS_COLUMN = "Full Address" + # ADDRESS1_COLUMN = None + # ADDRESS1_METHOD = "first_word" + # ADDRESS_COLS_TO_CONCAT = [] + # MISSING_POSTCODES_METHOD = None + # PROPERTY_YEAR_BUILT = "Build Date" + # UPRN_COLUMN = None + # # If we have the non-intrusives data, this should be true + # HAS_NON_INTRUSIVES = True # Maps addresses to uprn in problematic cases MANUAL_UPRN_MAP = {} @@ -358,6 +373,20 @@ def app(): asset_list[UPRN_COLUMN] = asset_list[UPRN_COLUMN].apply(convert_uprn) + # We attempt to process the year built column + if PROPERTY_YEAR_BUILT is not None: + # We check if we have a datetime + if isinstance(asset_list[PROPERTY_YEAR_BUILT].iloc[0], datetime): + # We treat any string columns - with common values we see + datetime_remap = { + "Pre 1900": datetime(year=1899, month=12, day=31), + } + asset_list[PROPERTY_YEAR_BUILT] = asset_list[PROPERTY_YEAR_BUILT].replace(datetime_remap) + + asset_list[PROPERTY_YEAR_BUILT] = pd.to_datetime(asset_list[PROPERTY_YEAR_BUILT]) + # Convert this to year + asset_list[PROPERTY_YEAR_BUILT] = asset_list[PROPERTY_YEAR_BUILT].dt.year + # We check for duplicated addresses asset_list["deduper"] = asset_list[FULLADDRESS_COLUMN] + asset_list[POSTCODE_COLUMN] if asset_list["deduper"].duplicated().sum(): @@ -579,11 +608,35 @@ def app(): # 3) If we have year in the asset list, we flag entries where the built year is different from the # EPC Age band if PROPERTY_YEAR_BUILT is not None: - raise Exception("THIS WAS WRONG!") asset_list["Does Age Match EPC Age Band?"] = asset_list.apply( lambda x: process_age_band(x, PROPERTY_YEAR_BUILT), axis=1 ) + if HAS_NON_INTRUSIVES: + # Empty cavity: + # 1) Has been flagged on the non-intrusives as being empty or partially filled + # 2) The age is before 1995 + # 3) Remove anything that likley has access issues + asset_list["Suitable for Cavity Fill"] = ( + (asset_list["Construction"] == "CAVITY") & + asset_list["Insulated"].isin(["EMPTY", "PARTIAL"]) & + ( + (asset_list[PROPERTY_YEAR_BUILT] <= 1995) # TODO, Or if the EPC age band is < 1995 + ) + ) + + # asset_list["Suitable for Extraction"] = + asset_list[ + (asset_list["Construction"] == "Cavity") & + asset_list["Insulated"].isin(["RETRO DRILLED"]) & + ( + (asset_list[PROPERTY_YEAR_BUILT] <= 1995) + ) & + ( + asset_list[] + ) + ] + # 4) Flag properties that look like they're good candidates for solar installs # Firstly, flag if the fabric is completely done