Model/etl/spatial/SpecialBuildingsClient.py

114 lines
5.5 KiB
Python

import geopandas as gpd
from shapely.geometry import Point
from utils.logger import setup_logger
from etl.spatial.ConservationAreaClient import read_shapefile_from_s3
from datatypes.datatypes import OpenUprnCoordinateData
logger = setup_logger()
class SpecialBuildingsClient:
"""
This class reads in data from Historic England, which can be used to determine if specific buildings are
listed or heritage buildings
"""
def __init__(self, historic_england_listed_buildings_path, historic_england_heritage_buildings_path, bucket):
self.historic_england_listed_buildings_path = historic_england_listed_buildings_path
self.historic_england_heritage_buildings_path = historic_england_heritage_buildings_path
self.bucket = bucket
self.historic_england_listed_buildings = None
self.historic_england_heritage_buildings = None
def read(self):
"""
Read the data
"""
logger.info("Reading in historic england listed buildings shapefile")
self.historic_england_listed_buildings = read_shapefile_from_s3(
bucket_name=self.bucket, s3_file_key=self.historic_england_listed_buildings_path
)
logger.info("Reading in historic england heritage buildings shapefile")
self.historic_england_heritage_buildings = read_shapefile_from_s3(
bucket_name=self.bucket, s3_file_key=self.historic_england_heritage_buildings_path
)
# Convert the gov data to british national grid co-ordinates
self.historic_england_heritage_buildings = self.historic_england_heritage_buildings.to_crs("EPSG:27700")
def is_listed_building(self, coordinates: OpenUprnCoordinateData) -> bool:
"""
Check if a location specified by British National Grid coordinates is a listed building.
:param coordinates: dictionary, which should have the OpenUprnCoordinateData format
:return: "listed_building" if the location is within a listed building polygon, "not_listed_building" otherwise
"""
# Convert the coordinates to a Shapely Point object
point = Point(coordinates.X_COORDINATE, coordinates.Y_COORDINATE)
# Check if the point is within any of the listed building polygons
within_listed_buildings = self.historic_england_listed_buildings.contains(point)
if within_listed_buildings.any():
# If the point is within any listed building polygon, log the names of the buildings and return
# "listed_building"
names = self.historic_england_listed_buildings.loc[within_listed_buildings, "Name"]
logger.info(f"The location is within the following listed buildings: {names.values}")
return True
# If the point is not within any listed building polygon, return "not_listed_building"
return False
def is_listed_building_vectorised(self, uprn_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
# Check against historic England listed buildings data
joined_gdf_listed = gpd.sjoin(uprn_gdf, self.historic_england_listed_buildings, how="left", predicate="within")
# Identify where we have matches
uprn_is_listed = joined_gdf_listed[~joined_gdf_listed.index_right.isna()]["UPRN"].unique()
# Populate the results in the input GeoDataFrame
uprn_gdf['is_listed_building'] = False
uprn_gdf.loc[uprn_gdf["UPRN"].isin(uprn_is_listed), 'is_listed_building'] = True
return uprn_gdf
def is_heritage_building_at_risk(self, coordinates: OpenUprnCoordinateData) -> bool:
"""
Check if a location specified by British National Grid coordinates is a heritage building at risk.
:param coordinates: dictionary, which should have the OpenUprnCoordinateData format
:return: "heritage_building_at_risk" if the location is within a heritage building at risk polygon,
"not_heritage_building_at_risk" otherwise
"""
# Convert the coordinates to a Shapely Point object
point = Point(coordinates.X_COORDINATE, coordinates.Y_COORDINATE)
# Check if the point is within any of the heritage building at risk polygons
within_heritage_buildings_at_risk = self.historic_england_heritage_buildings.contains(point)
if within_heritage_buildings_at_risk.any():
# If the point is within any heritage building at risk polygon, log the names of the buildings and return
# "heritage_building_at_risk"
names = self.historic_england_heritage_buildings.loc[within_heritage_buildings_at_risk, "EntryName"]
logger.info(f"The location is within the following heritage buildings at risk: {names.values}")
return True
# If the point is not within any heritage building at risk polygon, return "not_heritage_building_at_risk"
return False
def is_heritage_building_at_risk_vectorised(self, uprn_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
# Check against historic England heritage buildings data
joined_gdf_heritage = gpd.sjoin(
uprn_gdf, self.historic_england_heritage_buildings, how="left", predicate="within"
)
# Identify where we have matches
uprn_is_heritage = joined_gdf_heritage[~joined_gdf_heritage.index_right.isna()]["UPRN"].unique()
# Populate the results in the input GeoDataFrame
uprn_gdf['is_heritage_building'] = False
uprn_gdf.loc[uprn_gdf["UPRN"].isin(uprn_is_heritage), 'is_heritage_building'] = True
return uprn_gdf