mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
debygging epc searcher
This commit is contained in:
parent
0de14c4e28
commit
55d2df1787
2 changed files with 77 additions and 21 deletions
|
|
@ -331,6 +331,9 @@ class SearchEpc:
|
||||||
if row["lmk-key"] not in seen and not seen.add(row["lmk-key"])
|
if row["lmk-key"] not in seen and not seen.add(row["lmk-key"])
|
||||||
]
|
]
|
||||||
|
|
||||||
|
if data:
|
||||||
|
api_response["msg"] = self.SUCCESS
|
||||||
|
|
||||||
return api_response["msg"]
|
return api_response["msg"]
|
||||||
|
|
||||||
def filter_rows(self, rows, property_type=None, address=None):
|
def filter_rows(self, rows, property_type=None, address=None):
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ from BaseUtility import Definitions
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from backend.SearchEpc import SearchEpc
|
from backend.SearchEpc import SearchEpc
|
||||||
|
|
@ -172,7 +173,10 @@ def extract_address1(asset_list, full_address_col, postcode_col, method="first_t
|
||||||
|
|
||||||
|
|
||||||
def process_age_band(x, year_built_column):
|
def process_age_band(x, year_built_column):
|
||||||
year_built = float(x[year_built_column])
|
if isinstance(x[year_built_column], datetime):
|
||||||
|
year_built = x[year_built_column].year
|
||||||
|
else:
|
||||||
|
year_built = float(x[year_built_column])
|
||||||
|
|
||||||
if pd.isnull(x["Property Age Band"]) or (
|
if pd.isnull(x["Property Age Band"]) or (
|
||||||
x["Property Age Band"] in Definitions.DATA_ANOMALY_MATCHES
|
x["Property Age Band"] in Definitions.DATA_ANOMALY_MATCHES
|
||||||
|
|
@ -195,6 +199,12 @@ def process_age_band(x, year_built_column):
|
||||||
if year_built < 2007:
|
if year_built < 2007:
|
||||||
return "EPC Age Band is older than Year Built"
|
return "EPC Age Band is older than Year Built"
|
||||||
|
|
||||||
|
if x["Property Age Band"] == "England and Wales: 2012 onwards":
|
||||||
|
if year_built >= 2012:
|
||||||
|
return "EPC Age Band Matches Year Built"
|
||||||
|
if year_built < 2012:
|
||||||
|
return "EPC Age Band is older than Year Built"
|
||||||
|
|
||||||
if x["Property Age Band"] == "England and Wales: before 1900":
|
if x["Property Age Band"] == "England and Wales: before 1900":
|
||||||
if year_built < 1900:
|
if year_built < 1900:
|
||||||
return "EPC Age Band Matches Year Built"
|
return "EPC Age Band Matches Year Built"
|
||||||
|
|
@ -206,7 +216,7 @@ def process_age_band(x, year_built_column):
|
||||||
# so we extract the lower and upper date
|
# so we extract the lower and upper date
|
||||||
age_band = x["Property Age Band"].split(": ")[1]
|
age_band = x["Property Age Band"].split(": ")[1]
|
||||||
lower_date, upper_date = age_band.split("-")
|
lower_date, upper_date = age_band.split("-")
|
||||||
if year_built <= float(upper_date) and year_built <= float(upper_date):
|
if year_built <= float(upper_date) and year_built >= float(lower_date):
|
||||||
return "EPC Age Band Matches Year Built"
|
return "EPC Age Band Matches Year Built"
|
||||||
|
|
||||||
if year_built > float(upper_date):
|
if year_built > float(upper_date):
|
||||||
|
|
@ -269,28 +279,33 @@ def app():
|
||||||
# - Or the insulation required is loft/cavity (floors should be solid)
|
# - Or the insulation required is loft/cavity (floors should be solid)
|
||||||
|
|
||||||
# For Westward
|
# For Westward
|
||||||
# DATA_FOLDER = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Westward"
|
DATA_FOLDER = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Westward"
|
||||||
# DATA_FILENAME = "WESTWARD - completed list..xlsx"
|
DATA_FILENAME = "WESTWARD - completed list..xlsx"
|
||||||
# SHEET_NAME = "Sheet1"
|
|
||||||
# POSTCODE_COLUMN = "WFT EDIT Postcode"
|
|
||||||
# FULLADDRESS_COLUMN = "Address"
|
|
||||||
# ADDRESS1_COLUMN = None
|
|
||||||
# ADDRESS1_METHOD = "house_number_extraction"
|
|
||||||
# ADDRESS_COLS_TO_CONCAT = []
|
|
||||||
# MISSING_POSTCODES_METHOD = None
|
|
||||||
# PROPERTY_YEAR_BUILT = "Build date"
|
|
||||||
# UPRN_COLUMN = "UPRN"
|
|
||||||
DATA_FOLDER = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester"
|
|
||||||
DATA_FILENAME = "Warmfront data- Colchester Borough Homes (Complete).xlsx"
|
|
||||||
SHEET_NAME = "Sheet1"
|
SHEET_NAME = "Sheet1"
|
||||||
POSTCODE_COLUMN = 'Full Address.1'
|
POSTCODE_COLUMN = "WFT EDIT Postcode"
|
||||||
FULLADDRESS_COLUMN = "Full Address"
|
FULLADDRESS_COLUMN = "Address"
|
||||||
ADDRESS1_COLUMN = None
|
ADDRESS1_COLUMN = None
|
||||||
ADDRESS1_METHOD = "first_word"
|
ADDRESS1_METHOD = "house_number_extraction"
|
||||||
ADDRESS_COLS_TO_CONCAT = []
|
ADDRESS_COLS_TO_CONCAT = []
|
||||||
MISSING_POSTCODES_METHOD = None
|
MISSING_POSTCODES_METHOD = None
|
||||||
PROPERTY_YEAR_BUILT = "Build Date"
|
PROPERTY_YEAR_BUILT = "Build date"
|
||||||
UPRN_COLUMN = None
|
UPRN_COLUMN = "UPRN"
|
||||||
|
# If we have the non-intrusives data, this should be true
|
||||||
|
HAS_NON_INTRUSIVES = True
|
||||||
|
|
||||||
|
# DATA_FOLDER = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester"
|
||||||
|
# DATA_FILENAME = "Warmfront data- Colchester Borough Homes (Complete).xlsx"
|
||||||
|
# SHEET_NAME = "Sheet1"
|
||||||
|
# POSTCODE_COLUMN = 'Full Address.1'
|
||||||
|
# FULLADDRESS_COLUMN = "Full Address"
|
||||||
|
# ADDRESS1_COLUMN = None
|
||||||
|
# ADDRESS1_METHOD = "first_word"
|
||||||
|
# ADDRESS_COLS_TO_CONCAT = []
|
||||||
|
# MISSING_POSTCODES_METHOD = None
|
||||||
|
# PROPERTY_YEAR_BUILT = "Build Date"
|
||||||
|
# UPRN_COLUMN = None
|
||||||
|
# # If we have the non-intrusives data, this should be true
|
||||||
|
# HAS_NON_INTRUSIVES = True
|
||||||
|
|
||||||
# Maps addresses to uprn in problematic cases
|
# Maps addresses to uprn in problematic cases
|
||||||
MANUAL_UPRN_MAP = {}
|
MANUAL_UPRN_MAP = {}
|
||||||
|
|
@ -358,6 +373,20 @@ def app():
|
||||||
|
|
||||||
asset_list[UPRN_COLUMN] = asset_list[UPRN_COLUMN].apply(convert_uprn)
|
asset_list[UPRN_COLUMN] = asset_list[UPRN_COLUMN].apply(convert_uprn)
|
||||||
|
|
||||||
|
# We attempt to process the year built column
|
||||||
|
if PROPERTY_YEAR_BUILT is not None:
|
||||||
|
# We check if we have a datetime
|
||||||
|
if isinstance(asset_list[PROPERTY_YEAR_BUILT].iloc[0], datetime):
|
||||||
|
# We treat any string columns - with common values we see
|
||||||
|
datetime_remap = {
|
||||||
|
"Pre 1900": datetime(year=1899, month=12, day=31),
|
||||||
|
}
|
||||||
|
asset_list[PROPERTY_YEAR_BUILT] = asset_list[PROPERTY_YEAR_BUILT].replace(datetime_remap)
|
||||||
|
|
||||||
|
asset_list[PROPERTY_YEAR_BUILT] = pd.to_datetime(asset_list[PROPERTY_YEAR_BUILT])
|
||||||
|
# Convert this to year
|
||||||
|
asset_list[PROPERTY_YEAR_BUILT] = asset_list[PROPERTY_YEAR_BUILT].dt.year
|
||||||
|
|
||||||
# We check for duplicated addresses
|
# We check for duplicated addresses
|
||||||
asset_list["deduper"] = asset_list[FULLADDRESS_COLUMN] + asset_list[POSTCODE_COLUMN]
|
asset_list["deduper"] = asset_list[FULLADDRESS_COLUMN] + asset_list[POSTCODE_COLUMN]
|
||||||
if asset_list["deduper"].duplicated().sum():
|
if asset_list["deduper"].duplicated().sum():
|
||||||
|
|
@ -579,11 +608,35 @@ def app():
|
||||||
# 3) If we have year in the asset list, we flag entries where the built year is different from the
|
# 3) If we have year in the asset list, we flag entries where the built year is different from the
|
||||||
# EPC Age band
|
# EPC Age band
|
||||||
if PROPERTY_YEAR_BUILT is not None:
|
if PROPERTY_YEAR_BUILT is not None:
|
||||||
raise Exception("THIS WAS WRONG!")
|
|
||||||
asset_list["Does Age Match EPC Age Band?"] = asset_list.apply(
|
asset_list["Does Age Match EPC Age Band?"] = asset_list.apply(
|
||||||
lambda x: process_age_band(x, PROPERTY_YEAR_BUILT), axis=1
|
lambda x: process_age_band(x, PROPERTY_YEAR_BUILT), axis=1
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if HAS_NON_INTRUSIVES:
|
||||||
|
# Empty cavity:
|
||||||
|
# 1) Has been flagged on the non-intrusives as being empty or partially filled
|
||||||
|
# 2) The age is before 1995
|
||||||
|
# 3) Remove anything that likley has access issues
|
||||||
|
asset_list["Suitable for Cavity Fill"] = (
|
||||||
|
(asset_list["Construction"] == "CAVITY") &
|
||||||
|
asset_list["Insulated"].isin(["EMPTY", "PARTIAL"]) &
|
||||||
|
(
|
||||||
|
(asset_list[PROPERTY_YEAR_BUILT] <= 1995) # TODO, Or if the EPC age band is < 1995
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# asset_list["Suitable for Extraction"] =
|
||||||
|
asset_list[
|
||||||
|
(asset_list["Construction"] == "Cavity") &
|
||||||
|
asset_list["Insulated"].isin(["RETRO DRILLED"]) &
|
||||||
|
(
|
||||||
|
(asset_list[PROPERTY_YEAR_BUILT] <= 1995)
|
||||||
|
) &
|
||||||
|
(
|
||||||
|
asset_list[]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
# 4) Flag properties that look like they're good candidates for solar installs
|
# 4) Flag properties that look like they're good candidates for solar installs
|
||||||
# Firstly, flag if the fabric is completely done
|
# Firstly, flag if the fabric is completely done
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue