diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 75fac93f..ed6857c7 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -4,6 +4,7 @@ import re import usaddress import pandas as pd +import numpy as np from epc_api.client import EpcClient from utils.logger import setup_logger from typing import List @@ -11,6 +12,101 @@ from fuzzywuzzy import process logger = setup_logger() +vartypes = { + 'low-energy-fixed-light-count': "Int64", + # 'address': 'str', + # 'uprn-source': 'str', + 'floor-height': 'float', + 'heating-cost-potential': 'float', + 'unheated-corridor-length': 'float', + 'hot-water-cost-potential': 'float', + 'construction-age-band': 'str', + 'potential-energy-rating': 'str', + 'mainheat-energy-eff': 'str', + 'windows-env-eff': 'str', + 'lighting-energy-eff': 'str', + 'environment-impact-potential': "Int64", + 'glazed-type': 'str', + 'heating-cost-current': 'float', + 'address3': 'str', + 'mainheatcont-description': 'str', + 'sheating-energy-eff': 'str', + 'property-type': 'str', + 'local-authority-label': 'str', + 'fixed-lighting-outlets-count': "Int64", + 'energy-tariff': 'str', + 'mechanical-ventilation': 'str', + 'hot-water-cost-current': 'str', + 'county': 'str', + 'postcode': 'str', + 'solar-water-heating-flag': 'str', + 'constituency': 'str', + 'co2-emissions-potential': 'float', + 'number-heated-rooms': 'float', + 'floor-description': 'str', + 'energy-consumption-potential': 'float', + 'local-authority': 'str', + 'built-form': 'str', + 'number-open-fireplaces': "Int64", + 'windows-description': 'str', + 'glazed-area': 'str', + # 'inspection-date': str, + 'mains-gas-flag': 'str', + 'co2-emiss-curr-per-floor-area': 'float', + 'address1': 'str', + 'heat-loss-corridor': 'str', + 'flat-storey-count': "Int64", + 'constituency-label': 'str', + 'roof-energy-eff': 'str', + 'total-floor-area': 'float', + 'building-reference-number': 'str', + 'environment-impact-current': 'float', + 'co2-emissions-current': 'float', + 'roof-description': 'str', + 'floor-energy-eff': 'str', + 'number-habitable-rooms': 'float', + 'address2': 'str', + 'hot-water-env-eff': 'str', + 'posttown': 'str', + 'mainheatc-energy-eff': 'str', + 'main-fuel': 'str', + 'lighting-env-eff': 'str', + 'windows-energy-eff': 'str', + 'floor-env-eff': 'str', + 'sheating-env-eff': 'str', + 'lighting-description': 'str', + 'roof-env-eff': 'str', + 'walls-energy-eff': 'str', + 'photo-supply': 'float', + 'lighting-cost-potential': 'float', + 'mainheat-env-eff': 'str', + 'multi-glaze-proportion': 'float', + 'main-heating-controls': 'str', + # 'lodgement-datetime', + 'flat-top-storey': 'str', + 'current-energy-rating': 'str', + 'secondheat-description': 'str', + 'walls-env-eff': 'str', + 'transaction-type': 'str', + # 'uprn': "Int64", + 'current-energy-efficiency': 'float', + 'energy-consumption-current': 'float', + 'mainheat-description': 'str', + 'lighting-cost-current': 'float', + # 'lodgement-date', + 'extension-count': "Int64", + 'mainheatc-env-eff': 'str', + 'lmk-key': 'str', + 'wind-turbine-count': "Int64", + 'tenure': 'str', + 'floor-level': 'str', + 'potential-energy-efficiency': "Int64", + 'hot-water-energy-eff': 'str', + 'low-energy-lighting': 'float', + 'walls-description': 'str', + 'hotwater-description': 'str' +} + class SearchEpc: """ @@ -1953,9 +2049,51 @@ class SearchEpc: (epc_data["built-form"] == estimation_built_form) & (epc_data["property-type"] == estimation_property_type) ] + epc_data["weight"] = 1 / epc_data["house_number_distance"] + # We use house_number_distance as a weighting where closer homes are upweighted when interpolating # For each attribute, we need to determine the datatype and use an appropriate method # to interpolate. - for attribute in epc_data.columns: - estimation_data = epc_data[[attribute, "house_number_distance"]] + + estimated_epc = {} + for key, vartype in vartypes.items(): + epc_data[key] = np.where(pd.isnull(epc_data[key]), None, epc_data[key]) + epc_data[key] = np.where(epc_data[key] == "", None, epc_data[key]) + epc_data[key] = epc_data[key].astype(vartype) + estimation_data = epc_data[[key, "weight"]] + estimation_data = estimation_data[~pd.isnull(estimation_data[key])] + + if estimation_data.shape[0] == 0: + estimated_epc[key] = None + continue + + if vartype == "Int64": + estimated_value = self._estimate_int(estimation_data, key) + elif vartype == "float": + estimated_value = self._estimate_float(estimation_data, key) + elif vartype == "str": + estimated_value = self._estimate_str(estimation_data, key) + else: + raise NotImplementedError("estimation method not implemented for type") + + estimated_epc[key] = estimated_value + + return estimated_epc + + @staticmethod + def _estimate_int(estimation_data, key): + return round(np.average(a=estimation_data[key], weights=estimation_data["weight"])) + + @staticmethod + def _estimate_float(estimation_data, key): + return np.average(a=estimation_data[key], weights=estimation_data["weight"]) + + @staticmethod + def _estimate_str(estimation_data, key): + agg = estimation_data.groupby(key)["weight"].sum().reset_index() + agg = agg[agg["weight"] == agg["weight"].max()] + if agg.shape[0] != 1: + raise NotImplementedError("implement me") + + return agg[key].values[0]