Merge pull request #356 from Hestia-Homes/gla-proposal

Gla proposal
This commit is contained in:
KhalimCK 2024-10-16 10:20:22 +01:00 committed by GitHub
commit f0219b079b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 275 additions and 42 deletions

View file

@ -1204,7 +1204,7 @@ class Property:
return False
suitable_house = self.data["property-type"] == "House" and self.data["built-form"] in [
"Detached", "Semi-Detached",
"Detached", "Semi-Detached", "End-Terrace",
]
suitable_bungalow = self.data["property-type"] == "Bungalow" and self.data["built-form"] in [

View file

@ -543,7 +543,11 @@ async def trigger_plan(body: PlanTriggerRequest):
representative_recommendations = {}
for p in tqdm(input_properties):
recommender = Recommendations(
property_instance=p, materials=materials, exclusions=body.exclusions, inclusions=body.inclusions
property_instance=p,
materials=materials,
exclusions=body.exclusions,
inclusions=body.inclusions,
default_u_values=body.default_u_values
)
property_recommendations, property_representative_recommendations = recommender.recommend()

View file

@ -89,6 +89,9 @@ class PlanTriggerRequest(BaseModel):
# if False, allows optimisation to be switched off
optimise: Optional[bool] = True
# If True, uses default u-values for models
default_u_values: Optional[bool] = True
_allowed_goals = {"Increasing EPC"}
_allowed_housing_types = {"Social", "Private"}

View file

@ -103,6 +103,8 @@ class PropertyValuation:
# Vander Elliot Intrusive surveys
12103116: 1_537_000,
12103117: 1_404_000,
# GLA Proposal
100020606627: 409_000
}
# We base our valuation uplifts on a number of sources

View file

View file

@ -0,0 +1,38 @@
import pandas as pd
from utils.s3 import save_csv_to_s3
asset_list = [
{
"address": "4, King Henrys Drive",
"postcode": "CR0 0PA"
},
]
portfolio_id = 110
user_id = 8
asset_list = pd.DataFrame(asset_list)
filename = f"{user_id}/{portfolio_id}/asset_list.csv"
save_csv_to_s3(
dataframe=asset_list,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
body1 = {
"portfolio_id": str(portfolio_id),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "A",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": "",
"inclusions": [
"cavity_wall_insulation", "loft_insulation", "air_source_heat_pump", "solar_pv"
],
"budget": None,
"scenario_name": "Whole House",
"multi_plan": False,
}
print(body1)

View file

@ -0,0 +1,173 @@
"""
This script performs some basic analysis to identify EPC data for postcodes specified in the Warmer Homes Local Grant
"""
import inspect
import requests
import json
import pandas as pd
from pathlib import Path
from etl.ownership.Ownership import Ownership
postcodes = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/WHLG-eligible-postcodes_RP edit.xlsx", sheet_name='Eligible postcodes'
)
# Take just the first three columns
postcodes = postcodes[
['List of eligible postcodes via the IMD Income Decile 1-2 pathway', 'Unnamed: 1', 'Unnamed: 2']
]
postcodes.columns = ['postcode', 'Local Authority', 'London Borough?']
# Drop the first row
postcodes = postcodes.drop([0, 1])
# Take just the London Boroughs
postcodes = postcodes[postcodes["London Borough?"] == "Yes"]
# Since there are a large number of potcodes (425k), let's just take a few examples
# Take postcodes that begin with "BN15"
# postcodes = postcodes[postcodes["postcode"].str.startswith("BN15")]
# The Local Authority is Adur, so let's get the EPC data for this area
# epc_data = pd.read_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Model/local_data/all-domestic-certificates/domestic-E07000223-Adur"
# "/certificates.csv", low_memory=False
# )
# # Filter on these postcodes
# epc_data = epc_data[epc_data["POSTCODE"].str.lower().isin(postcodes["postcode"].str.lower())]
# epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
# # Take the newest EPC for each UPRN, based on LODGEMENT_DATE
# epc_data["LODGEMENT_DATE"] = pd.to_datetime(epc_data["LODGEMENT_DATE"])
# epc_data = epc_data.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN")
#
# # Let's look at the breakdown of EPC ratings. We want the count and the % of the total
# ratings_distribution = epc_data.groupby("CURRENT_ENERGY_RATING").size().reset_index()
# ratings_distribution.columns = ["Rating", "Count"]
# ratings_distribution["Percentage"] = ratings_distribution["Count"] / ratings_distribution["Count"].sum() * 100
# Can we identify the owners of these units so we can contact them?
file_src = inspect.getfile(lambda x: None)
DATA_DIRECTORY = Path(file_src).parent / "local_data" / "all-domestic-certificates"
epc_paths = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
epc_paths = [str(entry / "certificates.csv") for entry in epc_paths]
ownership = Ownership(
epc_paths=epc_paths,
domestic_ownership_path="/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_07.csv",
overseas_ownership_path="/Users/khalimconn-kowlessar/Downloads/OCOD_FULL_2024_07.csv",
land_registry_path="/Users/khalimconn-kowlessar/Downloads/pp-complete.csv",
project_name="gla-proposal",
bucket="retrofit-data-dev",
average_property_value=0,
portfolio_value=0,
excluded_owners=[],
excluded_uprns=[],
save=True
)
# Data will be found at ownership/gla-proposal
ownership.source_epc_properties(column_filters={}, postcodes=postcodes["postcode"].str.lower().tolist())
# Step 2: Get company ownership data
ownership.load_company_ownership()
# Step 3: Prepare data for matching
ownership.prepare_for_matching()
# Step 4: Match EPC data to ownership data
ownership.match()
from utils.s3 import save_excel_to_s3, read_excel_from_s3
# Save the data to S3
# save_excel_to_s3(
# df=ownership.matched_addresses,
# bucket_name=ownership.bucket,
# file_key=ownership.matched_addresses_pre_filter_filepath
# )
# Read in matches
matches = read_excel_from_s3(
bucket_name=ownership.bucket,
file_key="ownership/gla-proposal/2024-10-10 19:02:34.131365/matched_addresses_pre_filter.xlsx",
header_row=0
)
# We have the matches, which we now need to match to the postcodes
matches = ownership.matched_addresses.copy()
# filter matches on the postcodes we're interested in
matches = matches[matches["epc_postcode"].str.lower().isin(postcodes["postcode"].str.lower())]
# Remove any social transactions
matches = matches[~matches["TENURE"].isin(
["Rented (social)", "rental (social)",
"Not defined - use in the case of a new dwelling for which the intended tenure in not known. It is not to be "
"used for an existing dwelling", "NO DATA!"])
]
matches["is_prs"] = matches["TENURE"].isin(["rental (private)", "Rented (private)"])
# Look at the EPC ratings
epc_ratings = matches.groupby(["CURRENT_ENERGY_RATING"]).size().reset_index()
epc_ratings.columns = ["EPC Rating", "Count"]
epc_ratings["Percentage"] = epc_ratings["Count"] / epc_ratings["Count"].sum() * 100
# Take properties that are below an EPC C rating, as defined by the guidance and remove any new builds
matches = matches[matches["CURRENT_ENERGY_RATING"].isin(["D", "E", "F", "G"])]
# 11,694 properties
matches["epc_postcode"].nunique()
# 6899
owners_count = matches.groupby(['Proprietor Name (1)', 'Company Registration No. (1)']).size().reset_index()
owners_count.columns = ['Owner', 'Owner Registration #', 'Count']
owners_count = owners_count.sort_values('Count', ascending=False)
owners_count["Percentage"] = owners_count["Count"] / owners_count["Count"].sum() * 100
# Take an example postal region
matches = matches.sort_values("epc_postcode", ascending=True)
# BR1, BR5
example = matches[matches["epc_postcode"].str.startswith("CR0 ")].copy()
example = example[example["TENURE"].isin(["rental (private)", "Rented (private)"])]
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
example[
["epc_address", "epc_postcode", "CURRENT_ENERGY_RATING", "CURRENT_ENERGY_EFFICIENCY", "Proprietor Name (1)",
"Company Registration No. (1)"]
].head(4)
ownership.epc_data["UPRN"] = ownership.epc_data["UPRN"].astype(int)
example = example.merge(
ownership.epc_data[["UPRN", "BUILT_FORM", "PROPERTY_TYPE", "WALLS_DESCRIPTION", "ROOF_DESCRIPTION"]],
on="UPRN",
how="left"
)
z = example[example["CURRENT_ENERGY_RATING"] == "E"]
z = z[z["TENURE"].isin(["rental (private)", "Rented (private)"])]
companies_house_api_key = "1d9c2877-3271-4642-80ed-a6170971653f"
company_number = example.head(1)["Company Registration No. (1)"].values[0]
url = f'https://api.company-information.service.gov.uk/company/{company_number}'
# Make the API request
response = requests.get(url, auth=(companies_house_api_key, ''))
# Check if the request was successful
if response.status_code == 200:
company_data = response.json()
# Pretty-print the fetched data
print(json.dumps(company_data, indent=4))
else:
print(f"Failed to fetch data. Status code: {response.status_code}")
# Try appending a zero the beginning of the company number
company_number = f"0{company_number}"
url = f'https://api.company-information.service.gov.uk/company/{company_number}'
response = requests.get(url, auth=(companies_house_api_key, ''))
company_data = response.json()
from pprint import pprint
pprint(company_data)
psc_url = f'https://api.company-information.service.gov.uk/company/{company_number}/persons-with-significant-control'
psc_response = requests.get(psc_url, auth=(companies_house_api_key, ''))
psc_data = psc_response.json()
pprint(psc_data)

View file

@ -61,6 +61,7 @@ class Ownership:
portfolio_value: float,
excluded_owners: List[str] = None,
excluded_uprns: List[int] = None,
save=True
):
"""
@ -115,6 +116,8 @@ class Ownership:
f"ownership/{self.project_name}/{self.run_timestamp}/portfolio_epc_data.xlsx"
)
self.save = save
# Data
self.epc_data = None
self.ownership_data = None
@ -158,21 +161,22 @@ class Ownership:
# Step 5: Match land registry data to existing matches
self.match_with_land_registry()
# We store this data in s3 before we perform any filtering
save_excel_to_s3(
df=self.matched_addresses,
bucket_name=self.bucket,
file_key=self.matched_addresses_pre_filter_filepath
)
save_excel_to_s3(
df=self.combined_matching_lookup,
bucket_name=self.bucket,
file_key=self.combined_matching_lookup_pre_filter_filepath
)
if self.save:
save_excel_to_s3(
df=self.matched_addresses,
bucket_name=self.bucket,
file_key=self.matched_addresses_pre_filter_filepath
)
save_excel_to_s3(
df=self.combined_matching_lookup,
bucket_name=self.bucket,
file_key=self.combined_matching_lookup_pre_filter_filepath
)
# Prepare the final outputs:
self.create_final_matches()
def source_epc_properties(self, column_filters=None):
def source_epc_properties(self, column_filters=None, postcodes=None):
"""
This function will filter the epc data as specified by column filters, searching across all of the EPC tables
:param column_filters: Dictionary with column names as keys and list of acceptable values as values. This
@ -180,6 +184,7 @@ class Ownership:
{"column_name": ["value1", "value2", ...]}, where column_name is the name of the column
in the EPC data and ["value1", "value2", ...] is a list of acceptable values for that
column. If a column is not found in the EPC data, an exception is raised.
:param postcodes: A list of postcodes to filter the data on
"""
column_filters = {} if column_filters is None else column_filters
@ -203,6 +208,11 @@ class Ownership:
else:
raise Exception(f"Column {column} not found in data. column_filters is malformed")
if postcodes is not None:
epc_data = epc_data[epc_data["POSTCODE"].str.lower().isin(postcodes)]
if epc_data.empty:
continue
data.append(epc_data)
self.epc_data = pd.concat(data, ignore_index=True)
@ -210,12 +220,13 @@ class Ownership:
if self.excluded_uprns:
self.epc_data = self.epc_data[~self.epc_data["UPRN"].astype(float).isin(self.excluded_uprns)]
# We now store the data in s3
save_excel_to_s3(
df=self.epc_data,
bucket_name=self.bucket,
file_key=self.epc_data_filepath
)
if self.save:
# We now store the data in s3
save_excel_to_s3(
df=self.epc_data,
bucket_name=self.bucket,
file_key=self.epc_data_filepath
)
def load_company_ownership(self):
"""
@ -484,11 +495,11 @@ class Ownership:
house_no = house_no.replace(",", "")
if house_no is None:
# It's hard for us to get a reliable match
# filtered = filtered[filtered["Property Address"].str.contains(address["ADDRESS1"])]
# if filtered.shape[0] > 1:
# raise Exception("No valid - maybe we should do levenstein?")
continue
# If the house number is missing, it means that we usually have a named property so we look for an
# exact match on that name
filtered = filtered[filtered["Property Address"].str.lower().str.contains(address["ADDRESS"].lower())]
if filtered.shape[0] != 1:
continue
else:
@ -590,7 +601,8 @@ class Ownership:
"CURRENT_ENERGY_RATING",
"POSTCODE",
"LODGEMENT_DATE",
"TRANSACTION_TYPE"
"TRANSACTION_TYPE",
"TENURE",
]
].rename(
columns={
@ -1002,25 +1014,26 @@ class Ownership:
if self.portfolio_properties["UPRN"].nunique() != self.portfolio_epc_data["UPRN"].nunique():
raise ValueError("Portfolio properties and epc data don't match")
logger.info("Storing final outpus")
# Store data
save_excel_to_s3(
df=self.portfolio_owners,
bucket_name=self.bucket,
file_key=self.portfolio_owners_filepath,
)
if self.save:
logger.info("Storing final outpus")
# Store data
save_excel_to_s3(
df=self.portfolio_owners,
bucket_name=self.bucket,
file_key=self.portfolio_owners_filepath,
)
save_excel_to_s3(
df=self.portfolio_properties,
bucket_name=self.bucket,
file_key=self.portfolio_properties_filepath,
)
save_excel_to_s3(
df=self.portfolio_properties,
bucket_name=self.bucket,
file_key=self.portfolio_properties_filepath,
)
save_excel_to_s3(
df=self.portfolio_epc_data,
bucket_name=self.bucket,
file_key=self.portfolio_epc_data_filepath,
)
save_excel_to_s3(
df=self.portfolio_epc_data,
bucket_name=self.bucket,
file_key=self.portfolio_epc_data_filepath,
)
def get_asset_list(self):
"""