changed api retrieval to get desired property types first

This commit is contained in:
Khalim Conn-Kowlessar 2024-01-03 11:47:06 +00:00
parent aca882f10c
commit 97a7aadcf7
2 changed files with 24 additions and 5 deletions

View file

@ -425,10 +425,22 @@ class SearchEpc:
:param property_type: The 'property-type' value to be used for filtering the EPC data.
:return:
"""
property_type_api_map = {
"Bungalow": "bungalow",
"Flat": "flat",
"House": "house",
"Maisonette": "maisonette",
"Park Home": "park home",
}
postcode = initial_postcode
while postcode:
# Fetch data from EPC API
epc_response = self.get_epc(params={"postcode": postcode}, size=100)
params = {"postcode": postcode}
if property_type:
params["property-type"] = property_type_api_map[property_type]
epc_response = self.get_epc(params=params, size=100)
if epc_response["status"] == 200:
epc_data = pd.DataFrame(self.data["rows"])
@ -460,8 +472,13 @@ class SearchEpc:
# missing, so we fill with 1
epc_data["weight"] = epc_data["weight"].fillna(1)
epc_built_form = self._estimate_str(key="built-form", estimation_data=epc_data)
epc_property_type = self._estimate_str(key="property-type", estimation_data=epc_data)
estimation_property_type = epc_property_type if property_type == "" else property_type
epc_built_form = self._estimate_str(
key="built-form",
estimation_data=epc_data[epc_data["property-type"] == estimation_property_type]
)
if built_form == "Semi-Detached" and epc_built_form in ["End-Terraced", "Mid-Terraced"]:
estimation_built_form = "End-Terraced"
@ -470,10 +487,10 @@ class SearchEpc:
else:
estimation_built_form = built_form
estimation_property_type = epc_property_type if property_type == "" else property_type
# We handle some edge cases experiences with maisonettes - if built form is detatched, just filter
# on maisonette
# We also add some additional logic for bungalows, because they are far less common than other
# property types
if (estimation_property_type == "Maisonette") & (
estimation_built_form in ["Detached", "Semi-Detached"]
):
@ -488,7 +505,7 @@ class SearchEpc:
return epc_data # Return the filtered data if it's not empty
# Shorten the postcode by one character for the next iteration
postcode = postcode[:-1]
postcode = postcode[:-1].rstrip()
# If loop finishes without a valid response, raise an exception
raise Exception("Unable to find postcode data after trimming - investigate me")

View file

@ -142,6 +142,8 @@ def app():
avg_numeric_succes = results_df["numeric_success"].median()
avg_categorical_sucess = results_df["categorical_success"].median()
# Before changing the search methodology: 0.7963985988175015, 0.5348837209302325
# Group by tenure
by_tenure = results_df.groupby("tenure").agg(
{"numeric_success": "median", "categorical_success": "median", "uprn": "count"}