building out aggregation

This commit is contained in:
Khalim Conn-Kowlessar 2024-01-02 18:23:52 +00:00
parent 748dc14271
commit 2156f6b076
2 changed files with 20 additions and 1 deletions

View file

@ -437,6 +437,7 @@ class SearchEpc:
if not epc_data.empty:
# Further processing of the EPC data
epc_data['lodgement-datetime'] = pd.to_datetime(epc_data['lodgement-datetime'])
epc_data = epc_data.sort_values("lodgement-datetime", ascending=False).groupby("uprn").head(1)
epc_data["house_number"] = epc_data["address"].apply(lambda add1: self.get_house_number(add1))
epc_data["numeric_house_number"] = epc_data["house_number"].apply(
@ -505,7 +506,6 @@ class SearchEpc:
built_form=built_form,
property_type=property_type
)
epc_data['lodgement-datetime'] = pd.to_datetime(epc_data['lodgement-datetime'])
# For each attribute, we need to determine the datatype and use an appropriate method
# to estimate.

View file

@ -113,3 +113,22 @@ def app():
"tenure": epc["tenure"],
}
)
# Get aggregate performance figures
results_df = pd.DataFrame(results)
avg_numeric_succes = results_df["numeric_success"].median()
avg_categorical_sucess = results_df["categorical_success"].median()
# Group by tenure
by_tenure = results_df.groupby("tenure").agg(
{"numeric_success": "median", "categorical_success": "median", "uprn": "count"}
)
# By property type - we also want to see how many properties we have for each property type
by_property_type = results_df.groupby("property_type").agg(
{"numeric_success": "median", "categorical_success": "median", "uprn": "count"}
)
# By property_type & built form
by_property_type_built_form = results_df.groupby(["property_type", "built_form"]).agg(
{"numeric_success": "median", "categorical_success": "median", "uprn": "count"}
)