diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 6fbdb3ea..1d5c2296 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -440,6 +440,8 @@ class SearchEpc: params = {"postcode": postcode} if property_type: params["property-type"] = property_type_api_map[property_type] + + # We take the 20 nearest homes of the relevant type, so not to pull in too many irrelevant homes epc_response = self.get_epc(params=params, size=100) if epc_response["status"] == 200: diff --git a/etl/testing_data/estimate_epc.py b/etl/testing_data/estimate_epc.py index 7a03edc3..9e460678 100644 --- a/etl/testing_data/estimate_epc.py +++ b/etl/testing_data/estimate_epc.py @@ -142,12 +142,44 @@ def app(): avg_numeric_succes = results_df["numeric_success"].median() avg_categorical_sucess = results_df["categorical_success"].median() - # Before changing the search methodology: 0.7963985988175015, 0.5348837209302325 + # With 20 nearest homes + # 0.7718100840549558 + # 0.5116279069767442 + # 100 nearest homes + # 0.7859617377809409 + # 0.5348837209302325 # Group by tenure by_tenure = results_df.groupby("tenure").agg( {"numeric_success": "median", "categorical_success": "median", "uprn": "count"} ) + pd.set_option('display.max_rows', 500) + pd.set_option('display.max_columns', 500) + pd.set_option('display.width', 1000) + + # With 20 nearest homes + # numeric_success categorical_success uprn + # tenure + # NO DATA! 0.847840 0.581395 278 + # Not defined - use in the case of a new dwelling... 0.930282 0.651163 617 + # Owner-occupied 0.770330 0.511628 2588 + # Rented (private) 0.791885 0.558140 1232 + # owner-occupied 0.741088 0.488372 10912 + # rental (private) 0.749064 0.488372 3252 + # rental (social) 0.822109 0.581395 3878 + # unknown 0.895840 0.627907 1820 + + # 100 nearest homes + # tenure + # NO DATA! 0.899566 0.604651 233 + # Not defined - use in the case of a new dwelling... 0.927518 0.674419 608 + # Owner-occupied 0.777026 0.511628 3167 + # Rented (private) 0.805646 0.534884 1316 + # owner-occupied 0.762180 0.488372 10835 + # rental (private) 0.760503 0.511628 3181 + # rental (social) 0.830057 0.604651 3705 + # unknown 0.899948 0.627907 1571 + # By property type - we also want to see how many properties we have for each property type by_property_type = results_df.groupby("property_type").agg( {"numeric_success": "median", "categorical_success": "median", "uprn": "count"}