From 97a7aadcf720d0f31dea72b74aa51bc14ef8f445 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 3 Jan 2024 11:47:06 +0000 Subject: [PATCH] changed api retrieval to get desired property types first --- backend/SearchEpc.py | 27 ++++++++++++++++++++++----- etl/testing_data/estimate_epc.py | 2 ++ 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 5504d573..701029be 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -425,10 +425,22 @@ class SearchEpc: :param property_type: The 'property-type' value to be used for filtering the EPC data. :return: """ + + property_type_api_map = { + "Bungalow": "bungalow", + "Flat": "flat", + "House": "house", + "Maisonette": "maisonette", + "Park Home": "park home", + } + postcode = initial_postcode while postcode: # Fetch data from EPC API - epc_response = self.get_epc(params={"postcode": postcode}, size=100) + params = {"postcode": postcode} + if property_type: + params["property-type"] = property_type_api_map[property_type] + epc_response = self.get_epc(params=params, size=100) if epc_response["status"] == 200: epc_data = pd.DataFrame(self.data["rows"]) @@ -460,8 +472,13 @@ class SearchEpc: # missing, so we fill with 1 epc_data["weight"] = epc_data["weight"].fillna(1) - epc_built_form = self._estimate_str(key="built-form", estimation_data=epc_data) epc_property_type = self._estimate_str(key="property-type", estimation_data=epc_data) + estimation_property_type = epc_property_type if property_type == "" else property_type + + epc_built_form = self._estimate_str( + key="built-form", + estimation_data=epc_data[epc_data["property-type"] == estimation_property_type] + ) if built_form == "Semi-Detached" and epc_built_form in ["End-Terraced", "Mid-Terraced"]: estimation_built_form = "End-Terraced" @@ -470,10 +487,10 @@ class SearchEpc: else: estimation_built_form = built_form - estimation_property_type = epc_property_type if property_type == "" else property_type - # We handle some edge cases experiences with maisonettes - if built form is detatched, just filter # on maisonette + # We also add some additional logic for bungalows, because they are far less common than other + # property types if (estimation_property_type == "Maisonette") & ( estimation_built_form in ["Detached", "Semi-Detached"] ): @@ -488,7 +505,7 @@ class SearchEpc: return epc_data # Return the filtered data if it's not empty # Shorten the postcode by one character for the next iteration - postcode = postcode[:-1] + postcode = postcode[:-1].rstrip() # If loop finishes without a valid response, raise an exception raise Exception("Unable to find postcode data after trimming - investigate me") diff --git a/etl/testing_data/estimate_epc.py b/etl/testing_data/estimate_epc.py index a6c6ea6c..7a03edc3 100644 --- a/etl/testing_data/estimate_epc.py +++ b/etl/testing_data/estimate_epc.py @@ -142,6 +142,8 @@ def app(): avg_numeric_succes = results_df["numeric_success"].median() avg_categorical_sucess = results_df["categorical_success"].median() + # Before changing the search methodology: 0.7963985988175015, 0.5348837209302325 + # Group by tenure by_tenure = results_df.groupby("tenure").agg( {"numeric_success": "median", "categorical_success": "median", "uprn": "count"}