Handling missing dates in SearchEpc class

This commit is contained in:
Khalim Conn-Kowlessar 2024-02-27 16:45:37 +00:00
parent b26e44b465
commit eb216e55d3
2 changed files with 11 additions and 5 deletions

View file

@ -30,7 +30,7 @@ vartypes = {
'environment-impact-potential': "Int64",
'glazed-type': 'str',
'heating-cost-current': 'float',
'address3': 'str',
# 'address3': 'str',
'mainheatcont-description': 'str',
'sheating-energy-eff': 'str',
'property-type': 'str',
@ -40,7 +40,7 @@ vartypes = {
'mechanical-ventilation': 'str',
'hot-water-cost-current': 'str',
'county': 'str',
'postcode': 'str',
# 'postcode': 'str',
'solar-water-heating-flag': 'str',
'constituency': 'str',
'co2-emissions-potential': 'float',
@ -55,7 +55,7 @@ vartypes = {
# 'inspection-date': str,
'mains-gas-flag': 'str',
'co2-emiss-curr-per-floor-area': 'float',
'address1': 'str',
# 'address1': 'str',
'heat-loss-corridor': 'str',
'flat-storey-count': "Int64",
'constituency-label': 'str',
@ -67,7 +67,7 @@ vartypes = {
'roof-description': 'str',
'floor-energy-eff': 'str',
'number-habitable-rooms': 'float',
'address2': 'str',
# 'address2': 'str',
'hot-water-env-eff': 'str',
'posttown': 'str',
'mainheatc-energy-eff': 'str',
@ -98,7 +98,7 @@ vartypes = {
# 'lodgement-date',
'extension-count': "Int64",
'mainheatc-env-eff': 'str',
'lmk-key': 'str',
# 'lmk-key': 'str',
'wind-turbine-count': "Int64",
'tenure': 'str',
'floor-level': 'str',
@ -575,6 +575,11 @@ class SearchEpc:
property_type=property_type
)
# If we have missing lodgment date, we fill it with inspection-date
epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["inspection-date"])
# If we still have missing dates, we set it to the mean of the non NA dates
epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["lodgement-datetime"].mean())
# For each attribute, we need to determine the datatype and use an appropriate method
# to estimate.
estimated_epc = {}

View file

@ -1135,6 +1135,7 @@ def get_epc_data(
scoring_data = []
nodata = []
failed_model_rows = []
# Failed at index 13691
for index, property_meta in tqdm(asset_list.iterrows(), total=len(asset_list)):
if property_meta["matching_postcode"] is None: