From f53ce8b4302482ce54785e9da807c6b6ad9296b3 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 10 Oct 2024 18:51:12 +0100 Subject: [PATCH] allow postcode filtering --- etl/customers/gla/proposal_investigation.py | 25 ++++++++++++--------- etl/ownership/Ownership.py | 8 ++++++- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/etl/customers/gla/proposal_investigation.py b/etl/customers/gla/proposal_investigation.py index 57df0554..776bbc59 100644 --- a/etl/customers/gla/proposal_investigation.py +++ b/etl/customers/gla/proposal_investigation.py @@ -1,9 +1,12 @@ """ This script performs some basic analysis to identify EPC data for postcodes specified in the Warmer Homes Local Grant """ -from nis import match +import inspect +import requests +import json import pandas as pd +from pathlib import Path from etl.ownership.Ownership import Ownership postcodes = pd.read_excel( @@ -19,7 +22,7 @@ postcodes.columns = ['postcode', 'Local Authority'] postcodes = postcodes.drop([0, 1]) # Since there are a large number of potcodes (425k), let's just take a few examples # Take postcodes that begin with "BN15" -postcodes = postcodes[postcodes["postcode"].str.startswith("BN15")] +# postcodes = postcodes[postcodes["postcode"].str.startswith("BN15")] # The Local Authority is Adur, so let's get the EPC data for this area # epc_data = pd.read_csv( @@ -39,11 +42,14 @@ postcodes = postcodes[postcodes["postcode"].str.startswith("BN15")] # ratings_distribution["Percentage"] = ratings_distribution["Count"] / ratings_distribution["Count"].sum() * 100 # Can we identify the owners of these units so we can contact them? + +file_src = inspect.getfile(lambda x: None) +DATA_DIRECTORY = Path(file_src).parent / "local_data" / "all-domestic-certificates" +epc_paths = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()] +epc_paths = [str(entry / "certificates.csv") for entry in epc_paths] + ownership = Ownership( - epc_paths=[ - "/Users/khalimconn-kowlessar/Documents/hestia/Model/local_data/all-domestic-certificates/domestic-E07000223" - "-Adur/certificates.csv" - ], + epc_paths=epc_paths, domestic_ownership_path="/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_07.csv", overseas_ownership_path="/Users/khalimconn-kowlessar/Downloads/OCOD_FULL_2024_07.csv", land_registry_path="/Users/khalimconn-kowlessar/Downloads/pp-complete.csv", @@ -53,11 +59,11 @@ ownership = Ownership( portfolio_value=0, excluded_owners=[], excluded_uprns=[], - save=False + save=True ) # Data will be found at ownership/gla-proposal -ownership.source_epc_properties(column_filters={}) +ownership.source_epc_properties(column_filters={}, postcodes=postcodes["postcode"].str.lower().tolist()) # Step 2: Get company ownership data ownership.load_company_ownership() @@ -83,9 +89,6 @@ owners_count["Percentage"] = owners_count["Count"] / owners_count["Count"].sum() companies_house_api_key = "1d9c2877-3271-4642-80ed-a6170971653f" -import requests -import json - company_number = "13197205" url = f'https://api.company-information.service.gov.uk/company/{company_number}' diff --git a/etl/ownership/Ownership.py b/etl/ownership/Ownership.py index 52181452..2c04ac8a 100644 --- a/etl/ownership/Ownership.py +++ b/etl/ownership/Ownership.py @@ -175,7 +175,7 @@ class Ownership: # Prepare the final outputs: self.create_final_matches() - def source_epc_properties(self, column_filters=None): + def source_epc_properties(self, column_filters=None, postcodes=None): """ This function will filter the epc data as specified by column filters, searching across all of the EPC tables :param column_filters: Dictionary with column names as keys and list of acceptable values as values. This @@ -183,6 +183,7 @@ class Ownership: {"column_name": ["value1", "value2", ...]}, where column_name is the name of the column in the EPC data and ["value1", "value2", ...] is a list of acceptable values for that column. If a column is not found in the EPC data, an exception is raised. + :param postcodes: A list of postcodes to filter the data on """ column_filters = {} if column_filters is None else column_filters @@ -206,6 +207,11 @@ class Ownership: else: raise Exception(f"Column {column} not found in data. column_filters is malformed") + if postcodes is not None: + epc_data = epc_data[epc_data["POSTCODE"].str.lower().isin(postcodes)] + if epc_data.empty: + continue + data.append(epc_data) self.epc_data = pd.concat(data, ignore_index=True)