mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
211 lines
7.3 KiB
Python
211 lines
7.3 KiB
Python
import os
|
|
import time
|
|
from epc_api.client import EpcClient
|
|
from utils.logger import setup_logger
|
|
from typing import List
|
|
from fuzzywuzzy import process
|
|
|
|
logger = setup_logger()
|
|
|
|
|
|
class SearchEpc:
|
|
"""
|
|
Given address information about a home, this class is responsible for retrieving the EPC data associated
|
|
to the property.
|
|
|
|
For a home, we might have address lines 1, 2, 3 and 4, as well as a postcode.
|
|
|
|
Often, simply searching the EPC database with address line 1 and postcode will be enough to find
|
|
the property, but there are some cases where this is not true and we might need to utilise other
|
|
combinations about the home to find the property
|
|
"""
|
|
|
|
MAX_RETRIES = 5
|
|
|
|
SUCCESS = {
|
|
"status": 200,
|
|
"message": "success",
|
|
"error": None
|
|
}
|
|
|
|
NODATA = {
|
|
"status": 201,
|
|
"message": "No data",
|
|
"error": None
|
|
}
|
|
|
|
def __init__(
|
|
self,
|
|
address1: str,
|
|
postcode: str,
|
|
address2: str = None,
|
|
address3: str = None,
|
|
address4: str = None,
|
|
max_retries: int = None,
|
|
uprn: [int, None] = None,
|
|
size=None,
|
|
):
|
|
"""
|
|
Address lines 1 and postcode are mandatory fields. The other address lines are optional
|
|
but can be used to find the epc for the home, if address1 and postcode are insufficient
|
|
:param address1: string, propery's address line 1
|
|
:param postcode: string, propery's postcode
|
|
:param address2: string, optional, propery's address line 2
|
|
:param address3: string, optional, propery's address line 3
|
|
:param address4: string, optional, propery's address line 4
|
|
:param max_retries: int, optional, number of retries to make when searching the api
|
|
:param uprn: int, optional, the uprn of the property
|
|
:param size: int, optional, the number of results to return. If not provided, defaults to 25 which is the api's
|
|
default
|
|
"""
|
|
|
|
self.address1 = address1
|
|
self.postcode = postcode
|
|
self.address2 = address2
|
|
self.address3 = address3
|
|
self.address4 = address4
|
|
self.uprn = uprn
|
|
|
|
self.max_retries = max_retries if max_retries is not None else self.MAX_RETRIES
|
|
|
|
self.client = EpcClient(auth_token=os.getenv("EPC_AUTH_TOKEN"))
|
|
|
|
self.data = None
|
|
|
|
self.size = size if size is not None else 25
|
|
|
|
def search(self):
|
|
# Get the EPC data with retries
|
|
|
|
for retry in range(self.max_retries):
|
|
try:
|
|
|
|
if self.uprn:
|
|
# We use the direct call method inside, since we need to implement uprn as a valid
|
|
# parameter for the search function
|
|
url = os.path.join(self.client.domestic.host, "search")
|
|
response = self.client.domestic.call(method="get", url=url, params={"uprn": self.uprn})
|
|
else:
|
|
response = self.client.domestic.search(
|
|
params={"address": self.address1, "postcode": self.postcode}, size=self.size
|
|
)
|
|
|
|
if response:
|
|
self.data = response
|
|
return self.SUCCESS
|
|
|
|
if retry > 0:
|
|
print("Failed previous attempt but retry successful")
|
|
# If we got nothing, final try
|
|
if not response:
|
|
# TODO: Make a call to OS uprn service and get the address' uprn, just in case there is an
|
|
# issue with how we are searching the api
|
|
|
|
return {
|
|
"status": 204,
|
|
"message": "no data",
|
|
"error": None
|
|
}
|
|
|
|
return {
|
|
"status": 200,
|
|
"message": "success",
|
|
"error": None
|
|
}
|
|
|
|
except Exception as e:
|
|
if retry < self.max_retries - 1:
|
|
# If not the last retry, wait for 3 seconds before retrying
|
|
time.sleep(3)
|
|
else:
|
|
# If it's the last retry, we continue
|
|
return {
|
|
"status": 500,
|
|
"message": "Could not retrieve EPC data",
|
|
"error": str(e)
|
|
}
|
|
|
|
@staticmethod
|
|
def filter_rows(rows, property_type=None, address=None):
|
|
"""
|
|
This method should not be used when property_type and address are both not None
|
|
:param rows:
|
|
:param property_type:
|
|
:param address:
|
|
:return:
|
|
"""
|
|
# Given the results from the EPC api, attempts to reduce the number of rows
|
|
uprns = {r["uprn"] for r in rows}
|
|
|
|
if (property_type is None) and (address is None):
|
|
return rows
|
|
|
|
if len(uprns) == 1:
|
|
return rows
|
|
|
|
logger.error("Multiple UPRNS found - we should use an alternate method of searching - TODO")
|
|
if property_type is not None:
|
|
# We can do a filter on the property type
|
|
rows_filtered = [r for r in rows if r["property-type"] == property_type]
|
|
|
|
if rows_filtered:
|
|
return rows_filtered
|
|
|
|
return rows
|
|
|
|
if address is not None:
|
|
# We can do a filter on the property type
|
|
best_match = process.extractOne(address, [r["address"] for r in rows], score_cutoff=0)
|
|
rows_filtered = [r for r in rows if r["address"] == best_match[0]]
|
|
|
|
if rows_filtered:
|
|
return rows_filtered
|
|
|
|
return rows
|
|
|
|
def retrieve(self, property_type=None, address=None):
|
|
|
|
"""
|
|
Given a successful search, this method will format the data and return it
|
|
:return:
|
|
"""
|
|
|
|
if self.data is None:
|
|
raise ValueError("data is missing, run search first")
|
|
|
|
rows = self.data["rows"]
|
|
|
|
# We perform some checks on the rows
|
|
# Firstly, we should only have 1 urpn so if we have multiple, we'll need to filter down the
|
|
# property further
|
|
|
|
rows = self.filter_rows(rows, property_type=property_type, address=None)
|
|
rows = self.filter_rows(rows, property_type=None, address=address)
|
|
|
|
# We now check for a full sap epc:
|
|
full_sap_epc = [r for r in rows if r["transaction-type"] == "new dwelling"]
|
|
full_sap_epc = full_sap_epc[0] if full_sap_epc else {}
|
|
|
|
# Finally, we identify the newest epc and the rest, and then return
|
|
newest_epc, older_epcs = self.filter_newest_epc(list_of_epcs=rows)
|
|
|
|
return newest_epc, older_epcs, full_sap_epc
|
|
|
|
@staticmethod
|
|
def filter_newest_epc(list_of_epcs: List):
|
|
newest_response = [
|
|
r for r in list_of_epcs if
|
|
r["lodgement-datetime"] == max([x["lodgement-datetime"] for x in list_of_epcs])
|
|
]
|
|
|
|
if not newest_response:
|
|
return {}, []
|
|
|
|
if len(newest_response) != 1:
|
|
# It is possible (but rare, and likely an error on EPC lodgement) that we have multiple EPCs that
|
|
# were lodged at the exact same time. In this case, we will take the first one
|
|
newest_response = [newest_response[0]]
|
|
|
|
older_epcs = [epc for epc in list_of_epcs if epc["lmk-key"] != newest_response[0]["lmk-key"]]
|
|
|
|
return newest_response[0], older_epcs
|