mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
setting up code for gla proposal
This commit is contained in:
parent
ecf05369dd
commit
2d7e9a3cc9
3 changed files with 88 additions and 7 deletions
0
etl/customers/gla/__init__.py
Normal file
0
etl/customers/gla/__init__.py
Normal file
76
etl/customers/gla/proposal_investigation.py
Normal file
76
etl/customers/gla/proposal_investigation.py
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
"""
|
||||
This script performs some basic analysis to identify EPC data for postcodes specified in the Warmer Homes Local Grant
|
||||
"""
|
||||
from nis import match
|
||||
|
||||
import pandas as pd
|
||||
from etl.ownership.Ownership import Ownership
|
||||
|
||||
postcodes = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Downloads/WHLG-eligible-postcodes.xlsx", sheet_name='Eligible postcodes'
|
||||
)
|
||||
# Take just the first two columns
|
||||
postcodes = postcodes[
|
||||
['List of eligible postcodes via the IMD Income Decile 1-2 pathway', 'Unnamed: 1']
|
||||
]
|
||||
|
||||
postcodes.columns = ['postcode', 'Local Authority']
|
||||
# Drop the first row
|
||||
postcodes = postcodes.drop([0, 1])
|
||||
# Since there are a large number of potcodes (425k), let's just take a few examples
|
||||
# Take postcodes that begin with "BN15"
|
||||
postcodes = postcodes[postcodes["postcode"].str.startswith("BN15")]
|
||||
|
||||
# The Local Authority is Adur, so let's get the EPC data for this area
|
||||
# epc_data = pd.read_csv(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Model/local_data/all-domestic-certificates/domestic-E07000223-Adur"
|
||||
# "/certificates.csv", low_memory=False
|
||||
# )
|
||||
# # Filter on these postcodes
|
||||
# epc_data = epc_data[epc_data["POSTCODE"].str.lower().isin(postcodes["postcode"].str.lower())]
|
||||
# epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
|
||||
# # Take the newest EPC for each UPRN, based on LODGEMENT_DATE
|
||||
# epc_data["LODGEMENT_DATE"] = pd.to_datetime(epc_data["LODGEMENT_DATE"])
|
||||
# epc_data = epc_data.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN")
|
||||
#
|
||||
# # Let's look at the breakdown of EPC ratings. We want the count and the % of the total
|
||||
# ratings_distribution = epc_data.groupby("CURRENT_ENERGY_RATING").size().reset_index()
|
||||
# ratings_distribution.columns = ["Rating", "Count"]
|
||||
# ratings_distribution["Percentage"] = ratings_distribution["Count"] / ratings_distribution["Count"].sum() * 100
|
||||
|
||||
# Can we identify the owners of these units so we can contact them?
|
||||
ownership = Ownership(
|
||||
epc_paths=[
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Model/local_data/all-domestic-certificates/domestic-E07000223"
|
||||
"-Adur/certificates.csv"
|
||||
],
|
||||
domestic_ownership_path="/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_07.csv",
|
||||
overseas_ownership_path="/Users/khalimconn-kowlessar/Downloads/OCOD_FULL_2024_07.csv",
|
||||
land_registry_path="/Users/khalimconn-kowlessar/Downloads/pp-complete.csv",
|
||||
project_name="gla-proposal",
|
||||
bucket="retrofit-data-dev",
|
||||
average_property_value=0,
|
||||
portfolio_value=0,
|
||||
excluded_owners=[],
|
||||
excluded_uprns=[],
|
||||
save=False
|
||||
)
|
||||
|
||||
# Data will be found at ownership/gla-proposal
|
||||
ownership.source_epc_properties(column_filters={})
|
||||
|
||||
# Step 2: Get company ownership data
|
||||
ownership.load_company_ownership()
|
||||
|
||||
# Step 3: Prepare data for matching
|
||||
ownership.prepare_for_matching()
|
||||
|
||||
# Step 4: Match EPC data to ownership data
|
||||
ownership.match()
|
||||
|
||||
# We have the matches, which we now need to match to the postcodes
|
||||
matches = ownership.matched_addresses.copy()
|
||||
# filter matches on the postcodes we're interested in
|
||||
matches = matches[matches["epc_postcode"].str.lower().isin(postcodes["postcode"].str.lower())]
|
||||
# Remove any social transactions
|
||||
matches = matches[~matches["TENURE"].isin(["Rented (social)", "rental (social)"])]
|
||||
|
|
@ -61,6 +61,7 @@ class Ownership:
|
|||
portfolio_value: float,
|
||||
excluded_owners: List[str] = None,
|
||||
excluded_uprns: List[int] = None,
|
||||
save=True
|
||||
):
|
||||
"""
|
||||
|
||||
|
|
@ -115,6 +116,8 @@ class Ownership:
|
|||
f"ownership/{self.project_name}/{self.run_timestamp}/portfolio_epc_data.xlsx"
|
||||
)
|
||||
|
||||
self.save = save
|
||||
|
||||
# Data
|
||||
self.epc_data = None
|
||||
self.ownership_data = None
|
||||
|
|
@ -210,12 +213,13 @@ class Ownership:
|
|||
if self.excluded_uprns:
|
||||
self.epc_data = self.epc_data[~self.epc_data["UPRN"].astype(float).isin(self.excluded_uprns)]
|
||||
|
||||
# We now store the data in s3
|
||||
save_excel_to_s3(
|
||||
df=self.epc_data,
|
||||
bucket_name=self.bucket,
|
||||
file_key=self.epc_data_filepath
|
||||
)
|
||||
if self.save:
|
||||
# We now store the data in s3
|
||||
save_excel_to_s3(
|
||||
df=self.epc_data,
|
||||
bucket_name=self.bucket,
|
||||
file_key=self.epc_data_filepath
|
||||
)
|
||||
|
||||
def load_company_ownership(self):
|
||||
"""
|
||||
|
|
@ -590,7 +594,8 @@ class Ownership:
|
|||
"CURRENT_ENERGY_RATING",
|
||||
"POSTCODE",
|
||||
"LODGEMENT_DATE",
|
||||
"TRANSACTION_TYPE"
|
||||
"TRANSACTION_TYPE",
|
||||
"TENURE",
|
||||
]
|
||||
].rename(
|
||||
columns={
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue