working on epc error estimation method

This commit is contained in:
Khalim Conn-Kowlessar 2024-01-02 18:05:29 +00:00
parent e46a3d0a19
commit 81621eb1de
2 changed files with 23 additions and 9 deletions

View file

@ -7,6 +7,7 @@ import pandas as pd
import numpy as np
from epc_api.client import EpcClient
from backend.OrdnanceSurvey import OrdnanceSuveyClient
from BaseUtility import Definitions
from utils.logger import setup_logger
from typing import List
from fuzzywuzzy import process
@ -441,10 +442,15 @@ class SearchEpc:
epc_data["numeric_house_number"] = epc_data["house_number"].apply(
lambda house_num: self.extract_numeric_housenumber_part(house_num)
)
epc_data["house_number_distance"] = abs(
epc_data["numeric_house_number"] - self.numeric_house_number
)
epc_data["weight"] = 1 / epc_data["house_number_distance"]
if self.numeric_house_number is None:
# If we don't have a house number, we treat all weights as equal
epc_data["weight"] = 1
else:
epc_data["house_number_distance"] = abs(
epc_data["numeric_house_number"] - self.numeric_house_number
)
epc_data["weight"] = 1 / epc_data["house_number_distance"]
epc_built_form = self._estimate_str(key="built-form", estimation_data=epc_data)
epc_property_type = self._estimate_str(key="property-type", estimation_data=epc_data)
@ -504,9 +510,10 @@ class SearchEpc:
for key, vartype in vartypes.items():
epc_data[key] = np.where(pd.isnull(epc_data[key]), None, epc_data[key])
epc_data[key] = np.where(epc_data[key] == "", None, epc_data[key])
epc_data[key] = epc_data[key].astype(vartype)
estimation_data = epc_data[[key, "weight", "lodgement-datetime"]]
estimation_data = epc_data[[key, "weight", "lodgement-datetime"]].copy()
estimation_data = estimation_data[~pd.isnull(estimation_data[key])]
estimation_data = estimation_data[~estimation_data[key].isin(Definitions.DATA_ANOMALY_MATCHES)]
estimation_data[key] = estimation_data[key].astype(vartype)
if estimation_data.shape[0] == 0:
estimated_epc[key] = None

View file

@ -32,6 +32,9 @@ def check_numeric_performance(estimated_value, actual_value):
if actual_value == 0 and estimated_value == 0:
return 0
if actual_value == 0 and estimated_value != 0:
return 1
return abs(estimated_value - actual_value) / actual_value
@ -91,18 +94,22 @@ def app():
numeric_performance = {key: value for key, value in numeric_performance.items() if value is not None}
# Get an average
numeric_performance = sum(numeric_performance.values()) / len(numeric_performance)
numeric_success = 1 - numeric_performance
# categorical performance
categorical_performance = {
key: 0 if estimated_epc[key] != epc[key] else 1 for key, value in str_var_types.items()
}
# Get an average
categorical_performance = sum(categorical_performance.values()) / len(categorical_performance)
categorical_success = sum(categorical_performance.values()) / len(categorical_performance)
results.append(
{
"uprn": epc["uprn"],
"numeric_performance": numeric_performance,
"categorical_performance": categorical_performance
"numeric_success": numeric_success,
"categorical_success": categorical_success,
"property_type": epc["property-type"],
"built_form": epc["built-form"],
"tenure": epc["tenure"],
}
)