debygging epc searcher

This commit is contained in:
Khalim Conn-Kowlessar 2025-02-19 10:12:22 +00:00
parent 0de14c4e28
commit 55d2df1787
2 changed files with 77 additions and 21 deletions

View file

@ -331,6 +331,9 @@ class SearchEpc:
if row["lmk-key"] not in seen and not seen.add(row["lmk-key"]) if row["lmk-key"] not in seen and not seen.add(row["lmk-key"])
] ]
if data:
api_response["msg"] = self.SUCCESS
return api_response["msg"] return api_response["msg"]
def filter_rows(self, rows, property_type=None, address=None): def filter_rows(self, rows, property_type=None, address=None):

View file

@ -4,6 +4,7 @@ from BaseUtility import Definitions
import pandas as pd import pandas as pd
import numpy as np import numpy as np
from tqdm import tqdm from tqdm import tqdm
from datetime import datetime
from dotenv import load_dotenv from dotenv import load_dotenv
from backend.SearchEpc import SearchEpc from backend.SearchEpc import SearchEpc
@ -172,7 +173,10 @@ def extract_address1(asset_list, full_address_col, postcode_col, method="first_t
def process_age_band(x, year_built_column): def process_age_band(x, year_built_column):
year_built = float(x[year_built_column]) if isinstance(x[year_built_column], datetime):
year_built = x[year_built_column].year
else:
year_built = float(x[year_built_column])
if pd.isnull(x["Property Age Band"]) or ( if pd.isnull(x["Property Age Band"]) or (
x["Property Age Band"] in Definitions.DATA_ANOMALY_MATCHES x["Property Age Band"] in Definitions.DATA_ANOMALY_MATCHES
@ -195,6 +199,12 @@ def process_age_band(x, year_built_column):
if year_built < 2007: if year_built < 2007:
return "EPC Age Band is older than Year Built" return "EPC Age Band is older than Year Built"
if x["Property Age Band"] == "England and Wales: 2012 onwards":
if year_built >= 2012:
return "EPC Age Band Matches Year Built"
if year_built < 2012:
return "EPC Age Band is older than Year Built"
if x["Property Age Band"] == "England and Wales: before 1900": if x["Property Age Band"] == "England and Wales: before 1900":
if year_built < 1900: if year_built < 1900:
return "EPC Age Band Matches Year Built" return "EPC Age Band Matches Year Built"
@ -206,7 +216,7 @@ def process_age_band(x, year_built_column):
# so we extract the lower and upper date # so we extract the lower and upper date
age_band = x["Property Age Band"].split(": ")[1] age_band = x["Property Age Band"].split(": ")[1]
lower_date, upper_date = age_band.split("-") lower_date, upper_date = age_band.split("-")
if year_built <= float(upper_date) and year_built <= float(upper_date): if year_built <= float(upper_date) and year_built >= float(lower_date):
return "EPC Age Band Matches Year Built" return "EPC Age Band Matches Year Built"
if year_built > float(upper_date): if year_built > float(upper_date):
@ -269,28 +279,33 @@ def app():
# - Or the insulation required is loft/cavity (floors should be solid) # - Or the insulation required is loft/cavity (floors should be solid)
# For Westward # For Westward
# DATA_FOLDER = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Westward" DATA_FOLDER = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Westward"
# DATA_FILENAME = "WESTWARD - completed list..xlsx" DATA_FILENAME = "WESTWARD - completed list..xlsx"
# SHEET_NAME = "Sheet1"
# POSTCODE_COLUMN = "WFT EDIT Postcode"
# FULLADDRESS_COLUMN = "Address"
# ADDRESS1_COLUMN = None
# ADDRESS1_METHOD = "house_number_extraction"
# ADDRESS_COLS_TO_CONCAT = []
# MISSING_POSTCODES_METHOD = None
# PROPERTY_YEAR_BUILT = "Build date"
# UPRN_COLUMN = "UPRN"
DATA_FOLDER = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester"
DATA_FILENAME = "Warmfront data- Colchester Borough Homes (Complete).xlsx"
SHEET_NAME = "Sheet1" SHEET_NAME = "Sheet1"
POSTCODE_COLUMN = 'Full Address.1' POSTCODE_COLUMN = "WFT EDIT Postcode"
FULLADDRESS_COLUMN = "Full Address" FULLADDRESS_COLUMN = "Address"
ADDRESS1_COLUMN = None ADDRESS1_COLUMN = None
ADDRESS1_METHOD = "first_word" ADDRESS1_METHOD = "house_number_extraction"
ADDRESS_COLS_TO_CONCAT = [] ADDRESS_COLS_TO_CONCAT = []
MISSING_POSTCODES_METHOD = None MISSING_POSTCODES_METHOD = None
PROPERTY_YEAR_BUILT = "Build Date" PROPERTY_YEAR_BUILT = "Build date"
UPRN_COLUMN = None UPRN_COLUMN = "UPRN"
# If we have the non-intrusives data, this should be true
HAS_NON_INTRUSIVES = True
# DATA_FOLDER = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester"
# DATA_FILENAME = "Warmfront data- Colchester Borough Homes (Complete).xlsx"
# SHEET_NAME = "Sheet1"
# POSTCODE_COLUMN = 'Full Address.1'
# FULLADDRESS_COLUMN = "Full Address"
# ADDRESS1_COLUMN = None
# ADDRESS1_METHOD = "first_word"
# ADDRESS_COLS_TO_CONCAT = []
# MISSING_POSTCODES_METHOD = None
# PROPERTY_YEAR_BUILT = "Build Date"
# UPRN_COLUMN = None
# # If we have the non-intrusives data, this should be true
# HAS_NON_INTRUSIVES = True
# Maps addresses to uprn in problematic cases # Maps addresses to uprn in problematic cases
MANUAL_UPRN_MAP = {} MANUAL_UPRN_MAP = {}
@ -358,6 +373,20 @@ def app():
asset_list[UPRN_COLUMN] = asset_list[UPRN_COLUMN].apply(convert_uprn) asset_list[UPRN_COLUMN] = asset_list[UPRN_COLUMN].apply(convert_uprn)
# We attempt to process the year built column
if PROPERTY_YEAR_BUILT is not None:
# We check if we have a datetime
if isinstance(asset_list[PROPERTY_YEAR_BUILT].iloc[0], datetime):
# We treat any string columns - with common values we see
datetime_remap = {
"Pre 1900": datetime(year=1899, month=12, day=31),
}
asset_list[PROPERTY_YEAR_BUILT] = asset_list[PROPERTY_YEAR_BUILT].replace(datetime_remap)
asset_list[PROPERTY_YEAR_BUILT] = pd.to_datetime(asset_list[PROPERTY_YEAR_BUILT])
# Convert this to year
asset_list[PROPERTY_YEAR_BUILT] = asset_list[PROPERTY_YEAR_BUILT].dt.year
# We check for duplicated addresses # We check for duplicated addresses
asset_list["deduper"] = asset_list[FULLADDRESS_COLUMN] + asset_list[POSTCODE_COLUMN] asset_list["deduper"] = asset_list[FULLADDRESS_COLUMN] + asset_list[POSTCODE_COLUMN]
if asset_list["deduper"].duplicated().sum(): if asset_list["deduper"].duplicated().sum():
@ -579,11 +608,35 @@ def app():
# 3) If we have year in the asset list, we flag entries where the built year is different from the # 3) If we have year in the asset list, we flag entries where the built year is different from the
# EPC Age band # EPC Age band
if PROPERTY_YEAR_BUILT is not None: if PROPERTY_YEAR_BUILT is not None:
raise Exception("THIS WAS WRONG!")
asset_list["Does Age Match EPC Age Band?"] = asset_list.apply( asset_list["Does Age Match EPC Age Band?"] = asset_list.apply(
lambda x: process_age_band(x, PROPERTY_YEAR_BUILT), axis=1 lambda x: process_age_band(x, PROPERTY_YEAR_BUILT), axis=1
) )
if HAS_NON_INTRUSIVES:
# Empty cavity:
# 1) Has been flagged on the non-intrusives as being empty or partially filled
# 2) The age is before 1995
# 3) Remove anything that likley has access issues
asset_list["Suitable for Cavity Fill"] = (
(asset_list["Construction"] == "CAVITY") &
asset_list["Insulated"].isin(["EMPTY", "PARTIAL"]) &
(
(asset_list[PROPERTY_YEAR_BUILT] <= 1995) # TODO, Or if the EPC age band is < 1995
)
)
# asset_list["Suitable for Extraction"] =
asset_list[
(asset_list["Construction"] == "Cavity") &
asset_list["Insulated"].isin(["RETRO DRILLED"]) &
(
(asset_list[PROPERTY_YEAR_BUILT] <= 1995)
) &
(
asset_list[]
)
]
# 4) Flag properties that look like they're good candidates for solar installs # 4) Flag properties that look like they're good candidates for solar installs
# Firstly, flag if the fabric is completely done # Firstly, flag if the fabric is completely done