allow postcode filtering

This commit is contained in:
Khalim Conn-Kowlessar 2024-10-10 18:51:12 +01:00
parent a953a1f0ee
commit f53ce8b430
2 changed files with 21 additions and 12 deletions

View file

@ -1,9 +1,12 @@
"""
This script performs some basic analysis to identify EPC data for postcodes specified in the Warmer Homes Local Grant
"""
from nis import match
import inspect
import requests
import json
import pandas as pd
from pathlib import Path
from etl.ownership.Ownership import Ownership
postcodes = pd.read_excel(
@ -19,7 +22,7 @@ postcodes.columns = ['postcode', 'Local Authority']
postcodes = postcodes.drop([0, 1])
# Since there are a large number of potcodes (425k), let's just take a few examples
# Take postcodes that begin with "BN15"
postcodes = postcodes[postcodes["postcode"].str.startswith("BN15")]
# postcodes = postcodes[postcodes["postcode"].str.startswith("BN15")]
# The Local Authority is Adur, so let's get the EPC data for this area
# epc_data = pd.read_csv(
@ -39,11 +42,14 @@ postcodes = postcodes[postcodes["postcode"].str.startswith("BN15")]
# ratings_distribution["Percentage"] = ratings_distribution["Count"] / ratings_distribution["Count"].sum() * 100
# Can we identify the owners of these units so we can contact them?
file_src = inspect.getfile(lambda x: None)
DATA_DIRECTORY = Path(file_src).parent / "local_data" / "all-domestic-certificates"
epc_paths = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
epc_paths = [str(entry / "certificates.csv") for entry in epc_paths]
ownership = Ownership(
epc_paths=[
"/Users/khalimconn-kowlessar/Documents/hestia/Model/local_data/all-domestic-certificates/domestic-E07000223"
"-Adur/certificates.csv"
],
epc_paths=epc_paths,
domestic_ownership_path="/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_07.csv",
overseas_ownership_path="/Users/khalimconn-kowlessar/Downloads/OCOD_FULL_2024_07.csv",
land_registry_path="/Users/khalimconn-kowlessar/Downloads/pp-complete.csv",
@ -53,11 +59,11 @@ ownership = Ownership(
portfolio_value=0,
excluded_owners=[],
excluded_uprns=[],
save=False
save=True
)
# Data will be found at ownership/gla-proposal
ownership.source_epc_properties(column_filters={})
ownership.source_epc_properties(column_filters={}, postcodes=postcodes["postcode"].str.lower().tolist())
# Step 2: Get company ownership data
ownership.load_company_ownership()
@ -83,9 +89,6 @@ owners_count["Percentage"] = owners_count["Count"] / owners_count["Count"].sum()
companies_house_api_key = "1d9c2877-3271-4642-80ed-a6170971653f"
import requests
import json
company_number = "13197205"
url = f'https://api.company-information.service.gov.uk/company/{company_number}'

View file

@ -175,7 +175,7 @@ class Ownership:
# Prepare the final outputs:
self.create_final_matches()
def source_epc_properties(self, column_filters=None):
def source_epc_properties(self, column_filters=None, postcodes=None):
"""
This function will filter the epc data as specified by column filters, searching across all of the EPC tables
:param column_filters: Dictionary with column names as keys and list of acceptable values as values. This
@ -183,6 +183,7 @@ class Ownership:
{"column_name": ["value1", "value2", ...]}, where column_name is the name of the column
in the EPC data and ["value1", "value2", ...] is a list of acceptable values for that
column. If a column is not found in the EPC data, an exception is raised.
:param postcodes: A list of postcodes to filter the data on
"""
column_filters = {} if column_filters is None else column_filters
@ -206,6 +207,11 @@ class Ownership:
else:
raise Exception(f"Column {column} not found in data. column_filters is malformed")
if postcodes is not None:
epc_data = epc_data[epc_data["POSTCODE"].str.lower().isin(postcodes)]
if epc_data.empty:
continue
data.append(epc_data)
self.epc_data = pd.concat(data, ignore_index=True)