mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
working on epc error estimation method
This commit is contained in:
parent
e46a3d0a19
commit
81621eb1de
2 changed files with 23 additions and 9 deletions
|
|
@ -7,6 +7,7 @@ import pandas as pd
|
|||
import numpy as np
|
||||
from epc_api.client import EpcClient
|
||||
from backend.OrdnanceSurvey import OrdnanceSuveyClient
|
||||
from BaseUtility import Definitions
|
||||
from utils.logger import setup_logger
|
||||
from typing import List
|
||||
from fuzzywuzzy import process
|
||||
|
|
@ -441,10 +442,15 @@ class SearchEpc:
|
|||
epc_data["numeric_house_number"] = epc_data["house_number"].apply(
|
||||
lambda house_num: self.extract_numeric_housenumber_part(house_num)
|
||||
)
|
||||
epc_data["house_number_distance"] = abs(
|
||||
epc_data["numeric_house_number"] - self.numeric_house_number
|
||||
)
|
||||
epc_data["weight"] = 1 / epc_data["house_number_distance"]
|
||||
|
||||
if self.numeric_house_number is None:
|
||||
# If we don't have a house number, we treat all weights as equal
|
||||
epc_data["weight"] = 1
|
||||
else:
|
||||
epc_data["house_number_distance"] = abs(
|
||||
epc_data["numeric_house_number"] - self.numeric_house_number
|
||||
)
|
||||
epc_data["weight"] = 1 / epc_data["house_number_distance"]
|
||||
|
||||
epc_built_form = self._estimate_str(key="built-form", estimation_data=epc_data)
|
||||
epc_property_type = self._estimate_str(key="property-type", estimation_data=epc_data)
|
||||
|
|
@ -504,9 +510,10 @@ class SearchEpc:
|
|||
for key, vartype in vartypes.items():
|
||||
epc_data[key] = np.where(pd.isnull(epc_data[key]), None, epc_data[key])
|
||||
epc_data[key] = np.where(epc_data[key] == "", None, epc_data[key])
|
||||
epc_data[key] = epc_data[key].astype(vartype)
|
||||
estimation_data = epc_data[[key, "weight", "lodgement-datetime"]]
|
||||
estimation_data = epc_data[[key, "weight", "lodgement-datetime"]].copy()
|
||||
estimation_data = estimation_data[~pd.isnull(estimation_data[key])]
|
||||
estimation_data = estimation_data[~estimation_data[key].isin(Definitions.DATA_ANOMALY_MATCHES)]
|
||||
estimation_data[key] = estimation_data[key].astype(vartype)
|
||||
|
||||
if estimation_data.shape[0] == 0:
|
||||
estimated_epc[key] = None
|
||||
|
|
|
|||
|
|
@ -32,6 +32,9 @@ def check_numeric_performance(estimated_value, actual_value):
|
|||
if actual_value == 0 and estimated_value == 0:
|
||||
return 0
|
||||
|
||||
if actual_value == 0 and estimated_value != 0:
|
||||
return 1
|
||||
|
||||
return abs(estimated_value - actual_value) / actual_value
|
||||
|
||||
|
||||
|
|
@ -91,18 +94,22 @@ def app():
|
|||
numeric_performance = {key: value for key, value in numeric_performance.items() if value is not None}
|
||||
# Get an average
|
||||
numeric_performance = sum(numeric_performance.values()) / len(numeric_performance)
|
||||
numeric_success = 1 - numeric_performance
|
||||
|
||||
# categorical performance
|
||||
categorical_performance = {
|
||||
key: 0 if estimated_epc[key] != epc[key] else 1 for key, value in str_var_types.items()
|
||||
}
|
||||
# Get an average
|
||||
categorical_performance = sum(categorical_performance.values()) / len(categorical_performance)
|
||||
categorical_success = sum(categorical_performance.values()) / len(categorical_performance)
|
||||
|
||||
results.append(
|
||||
{
|
||||
"uprn": epc["uprn"],
|
||||
"numeric_performance": numeric_performance,
|
||||
"categorical_performance": categorical_performance
|
||||
"numeric_success": numeric_success,
|
||||
"categorical_success": categorical_success,
|
||||
"property_type": epc["property-type"],
|
||||
"built_form": epc["built-form"],
|
||||
"tenure": epc["tenure"],
|
||||
}
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue