From c806ef71516d7fda620f854262b7360937b48b10 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 29 Nov 2024 15:12:14 +0000 Subject: [PATCH] modified the hug postcodes data --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- etl/access_reporting/app.py | 0 etl/customers/gla/hug_postcodes.py | 29 ++++++++++++++++++++++++++++- 4 files changed, 30 insertions(+), 3 deletions(-) create mode 100644 etl/access_reporting/app.py diff --git a/.idea/Model.iml b/.idea/Model.iml index 9b63b142..df6c4faa 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index acd935c1..50cad4ca 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/etl/access_reporting/app.py b/etl/access_reporting/app.py new file mode 100644 index 00000000..e69de29b diff --git a/etl/customers/gla/hug_postcodes.py b/etl/customers/gla/hug_postcodes.py index 85783d62..ac2d1e3c 100644 --- a/etl/customers/gla/hug_postcodes.py +++ b/etl/customers/gla/hug_postcodes.py @@ -3,6 +3,7 @@ import pandas as pd from pathlib import Path from tqdm import tqdm from etl.epc.settings import EARLIEST_EPC_DATE +from etl.spatial.OpenUprnClient import OpenUprnClient src_file_path = inspect.getfile(lambda: None) @@ -22,6 +23,7 @@ for directory in tqdm(epc_directories): data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE] data = data[~pd.isnull(data["uprn"])] + data["uprn"] = data["uprn"].astype(int) # Take just the newest EPC per uprn, based on lodgement-date data = data.sort_values("lodgement-date", ascending=False).drop_duplicates("uprn") # Take EPC D and below @@ -31,16 +33,41 @@ for directory in tqdm(epc_directories): # Take homes that don't have a gas boiler off_gas = data[~data["main-fuel"].str.contains("mains gas", case=False, na=False)] + if off_gas.empty: + continue + + # Remote properties with conservation area issues + uprns = off_gas["uprn"].unique() + # Get data + ca_data = OpenUprnClient.get_spatial_data(uprns, "retrofit-data-dev") + off_gas = off_gas.merge( + ca_data[["UPRN", "conservation_status", "is_listed_building", "is_heritage_building"]].rename( + columns={"UPRN": "uprn"} + ), + how="left", + on="uprn", + ) + # Remove any restricted units + off_gas = off_gas[ + (off_gas["conservation_status"] != True) + & (off_gas["is_listed_building"] != True) + & (off_gas["is_heritage_building"] != True) + ] + region_summary = off_gas.groupby("postal_region").size().reset_index(name="count") aggregation.append(region_summary) postal_region_aggregation = pd.concat(aggregation) +# Re-aggregate +postal_region_aggregation = postal_region_aggregation.groupby("postal_region")["count"].sum().reset_index() + postal_region_aggregation = postal_region_aggregation.sort_values("count", ascending=False) postal_region_aggregation = postal_region_aggregation.rename( columns={"postal_region": "Postcode Region", "count": "Number of Homes"} ) postal_region_aggregation.to_excel( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/GLA/Off Gas EPC D-G Postal Regions.xlsx", + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/GLA/Off Gas EPC D-G Postal Regions - without conservation " + "area.xlsx", index=False )