diff --git a/.idea/Model.iml b/.idea/Model.iml
index 9b63b142..df6c4faa 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
-
+
diff --git a/.idea/misc.xml b/.idea/misc.xml
index acd935c1..50cad4ca 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
-
+
diff --git a/etl/access_reporting/app.py b/etl/access_reporting/app.py
new file mode 100644
index 00000000..e69de29b
diff --git a/etl/customers/gla/hug_postcodes.py b/etl/customers/gla/hug_postcodes.py
index 85783d62..ac2d1e3c 100644
--- a/etl/customers/gla/hug_postcodes.py
+++ b/etl/customers/gla/hug_postcodes.py
@@ -3,6 +3,7 @@ import pandas as pd
from pathlib import Path
from tqdm import tqdm
from etl.epc.settings import EARLIEST_EPC_DATE
+from etl.spatial.OpenUprnClient import OpenUprnClient
src_file_path = inspect.getfile(lambda: None)
@@ -22,6 +23,7 @@ for directory in tqdm(epc_directories):
data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE]
data = data[~pd.isnull(data["uprn"])]
+ data["uprn"] = data["uprn"].astype(int)
# Take just the newest EPC per uprn, based on lodgement-date
data = data.sort_values("lodgement-date", ascending=False).drop_duplicates("uprn")
# Take EPC D and below
@@ -31,16 +33,41 @@ for directory in tqdm(epc_directories):
# Take homes that don't have a gas boiler
off_gas = data[~data["main-fuel"].str.contains("mains gas", case=False, na=False)]
+ if off_gas.empty:
+ continue
+
+ # Remote properties with conservation area issues
+ uprns = off_gas["uprn"].unique()
+ # Get data
+ ca_data = OpenUprnClient.get_spatial_data(uprns, "retrofit-data-dev")
+ off_gas = off_gas.merge(
+ ca_data[["UPRN", "conservation_status", "is_listed_building", "is_heritage_building"]].rename(
+ columns={"UPRN": "uprn"}
+ ),
+ how="left",
+ on="uprn",
+ )
+ # Remove any restricted units
+ off_gas = off_gas[
+ (off_gas["conservation_status"] != True)
+ & (off_gas["is_listed_building"] != True)
+ & (off_gas["is_heritage_building"] != True)
+ ]
+
region_summary = off_gas.groupby("postal_region").size().reset_index(name="count")
aggregation.append(region_summary)
postal_region_aggregation = pd.concat(aggregation)
+# Re-aggregate
+postal_region_aggregation = postal_region_aggregation.groupby("postal_region")["count"].sum().reset_index()
+
postal_region_aggregation = postal_region_aggregation.sort_values("count", ascending=False)
postal_region_aggregation = postal_region_aggregation.rename(
columns={"postal_region": "Postcode Region", "count": "Number of Homes"}
)
postal_region_aggregation.to_excel(
- "/Users/khalimconn-kowlessar/Documents/hestia/Customers/GLA/Off Gas EPC D-G Postal Regions.xlsx",
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/GLA/Off Gas EPC D-G Postal Regions - without conservation "
+ "area.xlsx",
index=False
)