mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
working on gla proposal
This commit is contained in:
parent
f53ce8b430
commit
722a3dba55
2 changed files with 72 additions and 34 deletions
|
|
@ -10,16 +10,18 @@ from pathlib import Path
|
|||
from etl.ownership.Ownership import Ownership
|
||||
|
||||
postcodes = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Downloads/WHLG-eligible-postcodes.xlsx", sheet_name='Eligible postcodes'
|
||||
"/Users/khalimconn-kowlessar/Downloads/WHLG-eligible-postcodes_RP edit.xlsx", sheet_name='Eligible postcodes'
|
||||
)
|
||||
# Take just the first two columns
|
||||
# Take just the first three columns
|
||||
postcodes = postcodes[
|
||||
['List of eligible postcodes via the IMD Income Decile 1-2 pathway', 'Unnamed: 1']
|
||||
['List of eligible postcodes via the IMD Income Decile 1-2 pathway', 'Unnamed: 1', 'Unnamed: 2']
|
||||
]
|
||||
|
||||
postcodes.columns = ['postcode', 'Local Authority']
|
||||
postcodes.columns = ['postcode', 'Local Authority', 'London Borough?']
|
||||
# Drop the first row
|
||||
postcodes = postcodes.drop([0, 1])
|
||||
# Take just the London Boroughs
|
||||
postcodes = postcodes[postcodes["London Borough?"] == "Yes"]
|
||||
# Since there are a large number of potcodes (425k), let's just take a few examples
|
||||
# Take postcodes that begin with "BN15"
|
||||
# postcodes = postcodes[postcodes["postcode"].str.startswith("BN15")]
|
||||
|
|
@ -74,22 +76,46 @@ ownership.prepare_for_matching()
|
|||
# Step 4: Match EPC data to ownership data
|
||||
ownership.match()
|
||||
|
||||
from utils.s3 import save_excel_to_s3
|
||||
|
||||
# Save the data to S3
|
||||
save_excel_to_s3(
|
||||
df=ownership.matched_addresses,
|
||||
bucket_name=ownership.bucket,
|
||||
file_key=ownership.matched_addresses_pre_filter_filepath
|
||||
)
|
||||
|
||||
# We have the matches, which we now need to match to the postcodes
|
||||
matches = ownership.matched_addresses.copy()
|
||||
# filter matches on the postcodes we're interested in
|
||||
matches = matches[matches["epc_postcode"].str.lower().isin(postcodes["postcode"].str.lower())]
|
||||
# Remove any social transactions
|
||||
matches = matches[~matches["TENURE"].isin(["Rented (social)", "rental (social)"])]
|
||||
matches = matches[~matches["TENURE"].isin(
|
||||
["Rented (social)", "rental (social)",
|
||||
"Not defined - use in the case of a new dwelling for which the intended tenure in not known. It is not to be "
|
||||
"used for an existing dwelling", "NO DATA!"])
|
||||
]
|
||||
# Look at the EPC ratings
|
||||
epc_ratings = matches.groupby(["CURRENT_ENERGY_RATING"]).size().reset_index()
|
||||
epc_ratings.columns = ["EPC Rating", "Count"]
|
||||
epc_ratings["Percentage"] = epc_ratings["Count"] / epc_ratings["Count"].sum() * 100
|
||||
|
||||
# Take properties that are below an EPC C rating, as defined by the guidance and remove any new builds
|
||||
matches = matches[matches["CURRENT_ENERGY_RATING"].isin(["D", "E", "F", "G"])]
|
||||
# 11,694 properties
|
||||
|
||||
matches.head()
|
||||
owners_count = matches.groupby(['Proprietor Name (1)', 'Company Registration No. (1)']).size().reset_index()
|
||||
owners_count.columns = ['Owner', 'Owner Registration #', 'Count']
|
||||
owners_count = owners_count.sort_values('Count', ascending=False)
|
||||
owners_count["Percentage"] = owners_count["Count"] / owners_count["Count"].sum() * 100
|
||||
|
||||
# Take an example postal region
|
||||
matches = matches.sort_values("epc_postcode", ascending=True)
|
||||
example = matches[matches["epc_postcode"].str.startswith("BR1 ")].copy()
|
||||
|
||||
companies_house_api_key = "1d9c2877-3271-4642-80ed-a6170971653f"
|
||||
|
||||
company_number = "13197205"
|
||||
company_number = example.head(1)["Company Registration No. (1)"].values[0]
|
||||
url = f'https://api.company-information.service.gov.uk/company/{company_number}'
|
||||
|
||||
# Make the API request
|
||||
|
|
@ -102,7 +128,17 @@ if response.status_code == 200:
|
|||
print(json.dumps(company_data, indent=4))
|
||||
else:
|
||||
print(f"Failed to fetch data. Status code: {response.status_code}")
|
||||
# Try appending a zero the beginning of the company number
|
||||
company_number = f"0{company_number}"
|
||||
url = f'https://api.company-information.service.gov.uk/company/{company_number}'
|
||||
response = requests.get(url, auth=(companies_house_api_key, ''))
|
||||
company_data = response.json()
|
||||
|
||||
from pprint import pprint
|
||||
|
||||
pprint(company_data)
|
||||
|
||||
psc_url = f'https://api.company-information.service.gov.uk/company/{company_number}/persons-with-significant-control'
|
||||
psc_response = requests.get(psc_url, auth=(companies_house_api_key, ''))
|
||||
psc_data = psc_response.json()
|
||||
pprint(psc_data)
|
||||
|
|
|
|||
|
|
@ -161,16 +161,17 @@ class Ownership:
|
|||
# Step 5: Match land registry data to existing matches
|
||||
self.match_with_land_registry()
|
||||
# We store this data in s3 before we perform any filtering
|
||||
save_excel_to_s3(
|
||||
df=self.matched_addresses,
|
||||
bucket_name=self.bucket,
|
||||
file_key=self.matched_addresses_pre_filter_filepath
|
||||
)
|
||||
save_excel_to_s3(
|
||||
df=self.combined_matching_lookup,
|
||||
bucket_name=self.bucket,
|
||||
file_key=self.combined_matching_lookup_pre_filter_filepath
|
||||
)
|
||||
if self.save:
|
||||
save_excel_to_s3(
|
||||
df=self.matched_addresses,
|
||||
bucket_name=self.bucket,
|
||||
file_key=self.matched_addresses_pre_filter_filepath
|
||||
)
|
||||
save_excel_to_s3(
|
||||
df=self.combined_matching_lookup,
|
||||
bucket_name=self.bucket,
|
||||
file_key=self.combined_matching_lookup_pre_filter_filepath
|
||||
)
|
||||
|
||||
# Prepare the final outputs:
|
||||
self.create_final_matches()
|
||||
|
|
@ -1013,25 +1014,26 @@ class Ownership:
|
|||
if self.portfolio_properties["UPRN"].nunique() != self.portfolio_epc_data["UPRN"].nunique():
|
||||
raise ValueError("Portfolio properties and epc data don't match")
|
||||
|
||||
logger.info("Storing final outpus")
|
||||
# Store data
|
||||
save_excel_to_s3(
|
||||
df=self.portfolio_owners,
|
||||
bucket_name=self.bucket,
|
||||
file_key=self.portfolio_owners_filepath,
|
||||
)
|
||||
if self.save:
|
||||
logger.info("Storing final outpus")
|
||||
# Store data
|
||||
save_excel_to_s3(
|
||||
df=self.portfolio_owners,
|
||||
bucket_name=self.bucket,
|
||||
file_key=self.portfolio_owners_filepath,
|
||||
)
|
||||
|
||||
save_excel_to_s3(
|
||||
df=self.portfolio_properties,
|
||||
bucket_name=self.bucket,
|
||||
file_key=self.portfolio_properties_filepath,
|
||||
)
|
||||
save_excel_to_s3(
|
||||
df=self.portfolio_properties,
|
||||
bucket_name=self.bucket,
|
||||
file_key=self.portfolio_properties_filepath,
|
||||
)
|
||||
|
||||
save_excel_to_s3(
|
||||
df=self.portfolio_epc_data,
|
||||
bucket_name=self.bucket,
|
||||
file_key=self.portfolio_epc_data_filepath,
|
||||
)
|
||||
save_excel_to_s3(
|
||||
df=self.portfolio_epc_data,
|
||||
bucket_name=self.bucket,
|
||||
file_key=self.portfolio_epc_data_filepath,
|
||||
)
|
||||
|
||||
def get_asset_list(self):
|
||||
"""
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue