diff --git a/etl/customers/gla/proposal_investigation.py b/etl/customers/gla/proposal_investigation.py index e36d82b8..57df0554 100644 --- a/etl/customers/gla/proposal_investigation.py +++ b/etl/customers/gla/proposal_investigation.py @@ -74,3 +74,32 @@ matches = ownership.matched_addresses.copy() matches = matches[matches["epc_postcode"].str.lower().isin(postcodes["postcode"].str.lower())] # Remove any social transactions matches = matches[~matches["TENURE"].isin(["Rented (social)", "rental (social)"])] + +matches.head() +owners_count = matches.groupby(['Proprietor Name (1)', 'Company Registration No. (1)']).size().reset_index() +owners_count.columns = ['Owner', 'Owner Registration #', 'Count'] +owners_count = owners_count.sort_values('Count', ascending=False) +owners_count["Percentage"] = owners_count["Count"] / owners_count["Count"].sum() * 100 + +companies_house_api_key = "1d9c2877-3271-4642-80ed-a6170971653f" + +import requests +import json + +company_number = "13197205" +url = f'https://api.company-information.service.gov.uk/company/{company_number}' + +# Make the API request +response = requests.get(url, auth=(companies_house_api_key, '')) + +# Check if the request was successful +if response.status_code == 200: + company_data = response.json() + # Pretty-print the fetched data + print(json.dumps(company_data, indent=4)) +else: + print(f"Failed to fetch data. Status code: {response.status_code}") + +psc_url = f'https://api.company-information.service.gov.uk/company/{company_number}/persons-with-significant-control' +psc_response = requests.get(psc_url, auth=(companies_house_api_key, '')) +psc_data = psc_response.json() diff --git a/etl/ownership/Ownership.py b/etl/ownership/Ownership.py index 2079391c..52181452 100644 --- a/etl/ownership/Ownership.py +++ b/etl/ownership/Ownership.py @@ -488,11 +488,11 @@ class Ownership: house_no = house_no.replace(",", "") if house_no is None: - # It's hard for us to get a reliable match - # filtered = filtered[filtered["Property Address"].str.contains(address["ADDRESS1"])] - # if filtered.shape[0] > 1: - # raise Exception("No valid - maybe we should do levenstein?") - continue + # If the house number is missing, it means that we usually have a named property so we look for an + # exact match on that name + filtered = filtered[filtered["Property Address"].str.lower().str.contains(address["ADDRESS"].lower())] + if filtered.shape[0] != 1: + continue else: