import os import time from epc_api.client import EpcClient from utils.logger import setup_logger from typing import List from fuzzywuzzy import process logger = setup_logger() class SearchEpc: """ Given address information about a home, this class is responsible for retrieving the EPC data associated to the property. For a home, we might have address lines 1, 2, 3 and 4, as well as a postcode. Often, simply searching the EPC database with address line 1 and postcode will be enough to find the property, but there are some cases where this is not true and we might need to utilise other combinations about the home to find the property """ MAX_RETRIES = 5 SUCCESS = { "status": 200, "message": "success", "error": None } NODATA = { "status": 201, "message": "No data", "error": None } def __init__( self, address1: str, postcode: str, address2: str = None, address3: str = None, address4: str = None, max_retries: int = None, uprn: [int, None] = None, size=None, ): """ Address lines 1 and postcode are mandatory fields. The other address lines are optional but can be used to find the epc for the home, if address1 and postcode are insufficient :param address1: string, propery's address line 1 :param postcode: string, propery's postcode :param address2: string, optional, propery's address line 2 :param address3: string, optional, propery's address line 3 :param address4: string, optional, propery's address line 4 :param max_retries: int, optional, number of retries to make when searching the api :param uprn: int, optional, the uprn of the property :param size: int, optional, the number of results to return. If not provided, defaults to 25 which is the api's default """ self.address1 = address1 self.postcode = postcode self.address2 = address2 self.address3 = address3 self.address4 = address4 self.uprn = uprn self.max_retries = max_retries if max_retries is not None else self.MAX_RETRIES self.client = EpcClient(auth_token=os.getenv("EPC_AUTH_TOKEN")) self.data = None self.size = size if size is not None else 25 def search(self): # Get the EPC data with retries for retry in range(self.max_retries): try: if self.uprn: # We use the direct call method inside, since we need to implement uprn as a valid # parameter for the search function url = os.path.join(self.client.domestic.host, "search") response = self.client.domestic.call(method="get", url=url, params={"uprn": self.uprn}) else: response = self.client.domestic.search( params={"address": self.address1, "postcode": self.postcode}, size=self.size ) if response: self.data = response return self.SUCCESS if retry > 0: print("Failed previous attempt but retry successful") # If we got nothing, final try if not response: # TODO: Make a call to OS uprn service and get the address' uprn, just in case there is an # issue with how we are searching the api return { "status": 204, "message": "no data", "error": None } return { "status": 200, "message": "success", "error": None } except Exception as e: if retry < self.max_retries - 1: # If not the last retry, wait for 3 seconds before retrying time.sleep(3) else: # If it's the last retry, we continue return { "status": 500, "message": "Could not retrieve EPC data", "error": str(e) } @staticmethod def filter_rows(rows, property_type=None, address=None): """ This method should not be used when property_type and address are both not None :param rows: :param property_type: :param address: :return: """ # Given the results from the EPC api, attempts to reduce the number of rows uprns = {r["uprn"] for r in rows} if (property_type is None) and (address is None): return rows if len(uprns) == 1: return rows logger.error("Multiple UPRNS found - we should use an alternate method of searching - TODO") if property_type is not None: # We can do a filter on the property type rows_filtered = [r for r in rows if r["property-type"] == property_type] if rows_filtered: return rows_filtered return rows if address is not None: # We can do a filter on the property type best_match = process.extractOne(address, [r["address"] for r in rows], score_cutoff=0) rows_filtered = [r for r in rows if r["address"] == best_match[0]] if rows_filtered: return rows_filtered return rows def retrieve(self, property_type=None, address=None): """ Given a successful search, this method will format the data and return it :return: """ if self.data is None: raise ValueError("data is missing, run search first") rows = self.data["rows"] # We perform some checks on the rows # Firstly, we should only have 1 urpn so if we have multiple, we'll need to filter down the # property further rows = self.filter_rows(rows, property_type=property_type, address=None) rows = self.filter_rows(rows, property_type=None, address=address) # We now check for a full sap epc: full_sap_epc = [r for r in rows if r["transaction-type"] == "new dwelling"] full_sap_epc = full_sap_epc[0] if full_sap_epc else {} # Finally, we identify the newest epc and the rest, and then return newest_epc, older_epcs = self.filter_newest_epc(list_of_epcs=rows) return newest_epc, older_epcs, full_sap_epc @staticmethod def filter_newest_epc(list_of_epcs: List): newest_response = [ r for r in list_of_epcs if r["lodgement-datetime"] == max([x["lodgement-datetime"] for x in list_of_epcs]) ] if not newest_response: return {}, [] if len(newest_response) != 1: # It is possible (but rare, and likely an error on EPC lodgement) that we have multiple EPCs that # were lodged at the exact same time. In this case, we will take the first one newest_response = [newest_response[0]] older_epcs = [epc for epc in list_of_epcs if epc["lmk-key"] != newest_response[0]["lmk-key"]] return newest_response[0], older_epcs