mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
commit
f0219b079b
8 changed files with 275 additions and 42 deletions
|
|
@ -1204,7 +1204,7 @@ class Property:
|
|||
return False
|
||||
|
||||
suitable_house = self.data["property-type"] == "House" and self.data["built-form"] in [
|
||||
"Detached", "Semi-Detached",
|
||||
"Detached", "Semi-Detached", "End-Terrace",
|
||||
]
|
||||
|
||||
suitable_bungalow = self.data["property-type"] == "Bungalow" and self.data["built-form"] in [
|
||||
|
|
|
|||
|
|
@ -543,7 +543,11 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
representative_recommendations = {}
|
||||
for p in tqdm(input_properties):
|
||||
recommender = Recommendations(
|
||||
property_instance=p, materials=materials, exclusions=body.exclusions, inclusions=body.inclusions
|
||||
property_instance=p,
|
||||
materials=materials,
|
||||
exclusions=body.exclusions,
|
||||
inclusions=body.inclusions,
|
||||
default_u_values=body.default_u_values
|
||||
)
|
||||
property_recommendations, property_representative_recommendations = recommender.recommend()
|
||||
|
||||
|
|
|
|||
|
|
@ -89,6 +89,9 @@ class PlanTriggerRequest(BaseModel):
|
|||
# if False, allows optimisation to be switched off
|
||||
optimise: Optional[bool] = True
|
||||
|
||||
# If True, uses default u-values for models
|
||||
default_u_values: Optional[bool] = True
|
||||
|
||||
_allowed_goals = {"Increasing EPC"}
|
||||
|
||||
_allowed_housing_types = {"Social", "Private"}
|
||||
|
|
|
|||
|
|
@ -103,6 +103,8 @@ class PropertyValuation:
|
|||
# Vander Elliot Intrusive surveys
|
||||
12103116: 1_537_000,
|
||||
12103117: 1_404_000,
|
||||
# GLA Proposal
|
||||
100020606627: 409_000
|
||||
}
|
||||
|
||||
# We base our valuation uplifts on a number of sources
|
||||
|
|
|
|||
0
etl/customers/gla/__init__.py
Normal file
0
etl/customers/gla/__init__.py
Normal file
38
etl/customers/gla/example_model_outputs.py
Normal file
38
etl/customers/gla/example_model_outputs.py
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
import pandas as pd
|
||||
from utils.s3 import save_csv_to_s3
|
||||
|
||||
asset_list = [
|
||||
{
|
||||
"address": "4, King Henrys Drive",
|
||||
"postcode": "CR0 0PA"
|
||||
},
|
||||
]
|
||||
portfolio_id = 110
|
||||
user_id = 8
|
||||
|
||||
asset_list = pd.DataFrame(asset_list)
|
||||
|
||||
filename = f"{user_id}/{portfolio_id}/asset_list.csv"
|
||||
save_csv_to_s3(
|
||||
dataframe=asset_list,
|
||||
bucket_name="retrofit-plan-inputs-dev",
|
||||
file_name=filename
|
||||
)
|
||||
|
||||
body1 = {
|
||||
"portfolio_id": str(portfolio_id),
|
||||
"housing_type": "Private",
|
||||
"goal": "Increasing EPC",
|
||||
"goal_value": "A",
|
||||
"trigger_file_path": filename,
|
||||
"already_installed_file_path": "",
|
||||
"patches_file_path": "",
|
||||
"non_invasive_recommendations_file_path": "",
|
||||
"inclusions": [
|
||||
"cavity_wall_insulation", "loft_insulation", "air_source_heat_pump", "solar_pv"
|
||||
],
|
||||
"budget": None,
|
||||
"scenario_name": "Whole House",
|
||||
"multi_plan": False,
|
||||
}
|
||||
print(body1)
|
||||
173
etl/customers/gla/proposal_investigation.py
Normal file
173
etl/customers/gla/proposal_investigation.py
Normal file
|
|
@ -0,0 +1,173 @@
|
|||
"""
|
||||
This script performs some basic analysis to identify EPC data for postcodes specified in the Warmer Homes Local Grant
|
||||
"""
|
||||
|
||||
import inspect
|
||||
import requests
|
||||
import json
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
from etl.ownership.Ownership import Ownership
|
||||
|
||||
postcodes = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Downloads/WHLG-eligible-postcodes_RP edit.xlsx", sheet_name='Eligible postcodes'
|
||||
)
|
||||
# Take just the first three columns
|
||||
postcodes = postcodes[
|
||||
['List of eligible postcodes via the IMD Income Decile 1-2 pathway', 'Unnamed: 1', 'Unnamed: 2']
|
||||
]
|
||||
|
||||
postcodes.columns = ['postcode', 'Local Authority', 'London Borough?']
|
||||
# Drop the first row
|
||||
postcodes = postcodes.drop([0, 1])
|
||||
# Take just the London Boroughs
|
||||
postcodes = postcodes[postcodes["London Borough?"] == "Yes"]
|
||||
# Since there are a large number of potcodes (425k), let's just take a few examples
|
||||
# Take postcodes that begin with "BN15"
|
||||
# postcodes = postcodes[postcodes["postcode"].str.startswith("BN15")]
|
||||
|
||||
# The Local Authority is Adur, so let's get the EPC data for this area
|
||||
# epc_data = pd.read_csv(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Model/local_data/all-domestic-certificates/domestic-E07000223-Adur"
|
||||
# "/certificates.csv", low_memory=False
|
||||
# )
|
||||
# # Filter on these postcodes
|
||||
# epc_data = epc_data[epc_data["POSTCODE"].str.lower().isin(postcodes["postcode"].str.lower())]
|
||||
# epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
|
||||
# # Take the newest EPC for each UPRN, based on LODGEMENT_DATE
|
||||
# epc_data["LODGEMENT_DATE"] = pd.to_datetime(epc_data["LODGEMENT_DATE"])
|
||||
# epc_data = epc_data.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN")
|
||||
#
|
||||
# # Let's look at the breakdown of EPC ratings. We want the count and the % of the total
|
||||
# ratings_distribution = epc_data.groupby("CURRENT_ENERGY_RATING").size().reset_index()
|
||||
# ratings_distribution.columns = ["Rating", "Count"]
|
||||
# ratings_distribution["Percentage"] = ratings_distribution["Count"] / ratings_distribution["Count"].sum() * 100
|
||||
|
||||
# Can we identify the owners of these units so we can contact them?
|
||||
|
||||
file_src = inspect.getfile(lambda x: None)
|
||||
DATA_DIRECTORY = Path(file_src).parent / "local_data" / "all-domestic-certificates"
|
||||
epc_paths = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
|
||||
epc_paths = [str(entry / "certificates.csv") for entry in epc_paths]
|
||||
|
||||
ownership = Ownership(
|
||||
epc_paths=epc_paths,
|
||||
domestic_ownership_path="/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_07.csv",
|
||||
overseas_ownership_path="/Users/khalimconn-kowlessar/Downloads/OCOD_FULL_2024_07.csv",
|
||||
land_registry_path="/Users/khalimconn-kowlessar/Downloads/pp-complete.csv",
|
||||
project_name="gla-proposal",
|
||||
bucket="retrofit-data-dev",
|
||||
average_property_value=0,
|
||||
portfolio_value=0,
|
||||
excluded_owners=[],
|
||||
excluded_uprns=[],
|
||||
save=True
|
||||
)
|
||||
|
||||
# Data will be found at ownership/gla-proposal
|
||||
ownership.source_epc_properties(column_filters={}, postcodes=postcodes["postcode"].str.lower().tolist())
|
||||
|
||||
# Step 2: Get company ownership data
|
||||
ownership.load_company_ownership()
|
||||
|
||||
# Step 3: Prepare data for matching
|
||||
ownership.prepare_for_matching()
|
||||
|
||||
# Step 4: Match EPC data to ownership data
|
||||
ownership.match()
|
||||
|
||||
from utils.s3 import save_excel_to_s3, read_excel_from_s3
|
||||
|
||||
# Save the data to S3
|
||||
# save_excel_to_s3(
|
||||
# df=ownership.matched_addresses,
|
||||
# bucket_name=ownership.bucket,
|
||||
# file_key=ownership.matched_addresses_pre_filter_filepath
|
||||
# )
|
||||
|
||||
# Read in matches
|
||||
matches = read_excel_from_s3(
|
||||
bucket_name=ownership.bucket,
|
||||
file_key="ownership/gla-proposal/2024-10-10 19:02:34.131365/matched_addresses_pre_filter.xlsx",
|
||||
header_row=0
|
||||
)
|
||||
|
||||
# We have the matches, which we now need to match to the postcodes
|
||||
matches = ownership.matched_addresses.copy()
|
||||
# filter matches on the postcodes we're interested in
|
||||
matches = matches[matches["epc_postcode"].str.lower().isin(postcodes["postcode"].str.lower())]
|
||||
# Remove any social transactions
|
||||
matches = matches[~matches["TENURE"].isin(
|
||||
["Rented (social)", "rental (social)",
|
||||
"Not defined - use in the case of a new dwelling for which the intended tenure in not known. It is not to be "
|
||||
"used for an existing dwelling", "NO DATA!"])
|
||||
]
|
||||
matches["is_prs"] = matches["TENURE"].isin(["rental (private)", "Rented (private)"])
|
||||
# Look at the EPC ratings
|
||||
epc_ratings = matches.groupby(["CURRENT_ENERGY_RATING"]).size().reset_index()
|
||||
epc_ratings.columns = ["EPC Rating", "Count"]
|
||||
epc_ratings["Percentage"] = epc_ratings["Count"] / epc_ratings["Count"].sum() * 100
|
||||
|
||||
# Take properties that are below an EPC C rating, as defined by the guidance and remove any new builds
|
||||
matches = matches[matches["CURRENT_ENERGY_RATING"].isin(["D", "E", "F", "G"])]
|
||||
# 11,694 properties
|
||||
matches["epc_postcode"].nunique()
|
||||
# 6899
|
||||
|
||||
owners_count = matches.groupby(['Proprietor Name (1)', 'Company Registration No. (1)']).size().reset_index()
|
||||
owners_count.columns = ['Owner', 'Owner Registration #', 'Count']
|
||||
owners_count = owners_count.sort_values('Count', ascending=False)
|
||||
owners_count["Percentage"] = owners_count["Count"] / owners_count["Count"].sum() * 100
|
||||
|
||||
# Take an example postal region
|
||||
matches = matches.sort_values("epc_postcode", ascending=True)
|
||||
# BR1, BR5
|
||||
example = matches[matches["epc_postcode"].str.startswith("CR0 ")].copy()
|
||||
example = example[example["TENURE"].isin(["rental (private)", "Rented (private)"])]
|
||||
|
||||
pd.set_option('display.max_rows', 500)
|
||||
pd.set_option('display.max_columns', 500)
|
||||
pd.set_option('display.width', 1000)
|
||||
example[
|
||||
["epc_address", "epc_postcode", "CURRENT_ENERGY_RATING", "CURRENT_ENERGY_EFFICIENCY", "Proprietor Name (1)",
|
||||
"Company Registration No. (1)"]
|
||||
].head(4)
|
||||
|
||||
ownership.epc_data["UPRN"] = ownership.epc_data["UPRN"].astype(int)
|
||||
example = example.merge(
|
||||
ownership.epc_data[["UPRN", "BUILT_FORM", "PROPERTY_TYPE", "WALLS_DESCRIPTION", "ROOF_DESCRIPTION"]],
|
||||
on="UPRN",
|
||||
how="left"
|
||||
)
|
||||
z = example[example["CURRENT_ENERGY_RATING"] == "E"]
|
||||
z = z[z["TENURE"].isin(["rental (private)", "Rented (private)"])]
|
||||
|
||||
companies_house_api_key = "1d9c2877-3271-4642-80ed-a6170971653f"
|
||||
|
||||
company_number = example.head(1)["Company Registration No. (1)"].values[0]
|
||||
url = f'https://api.company-information.service.gov.uk/company/{company_number}'
|
||||
|
||||
# Make the API request
|
||||
response = requests.get(url, auth=(companies_house_api_key, ''))
|
||||
|
||||
# Check if the request was successful
|
||||
if response.status_code == 200:
|
||||
company_data = response.json()
|
||||
# Pretty-print the fetched data
|
||||
print(json.dumps(company_data, indent=4))
|
||||
else:
|
||||
print(f"Failed to fetch data. Status code: {response.status_code}")
|
||||
# Try appending a zero the beginning of the company number
|
||||
company_number = f"0{company_number}"
|
||||
url = f'https://api.company-information.service.gov.uk/company/{company_number}'
|
||||
response = requests.get(url, auth=(companies_house_api_key, ''))
|
||||
company_data = response.json()
|
||||
|
||||
from pprint import pprint
|
||||
|
||||
pprint(company_data)
|
||||
|
||||
psc_url = f'https://api.company-information.service.gov.uk/company/{company_number}/persons-with-significant-control'
|
||||
psc_response = requests.get(psc_url, auth=(companies_house_api_key, ''))
|
||||
psc_data = psc_response.json()
|
||||
pprint(psc_data)
|
||||
|
|
@ -61,6 +61,7 @@ class Ownership:
|
|||
portfolio_value: float,
|
||||
excluded_owners: List[str] = None,
|
||||
excluded_uprns: List[int] = None,
|
||||
save=True
|
||||
):
|
||||
"""
|
||||
|
||||
|
|
@ -115,6 +116,8 @@ class Ownership:
|
|||
f"ownership/{self.project_name}/{self.run_timestamp}/portfolio_epc_data.xlsx"
|
||||
)
|
||||
|
||||
self.save = save
|
||||
|
||||
# Data
|
||||
self.epc_data = None
|
||||
self.ownership_data = None
|
||||
|
|
@ -158,21 +161,22 @@ class Ownership:
|
|||
# Step 5: Match land registry data to existing matches
|
||||
self.match_with_land_registry()
|
||||
# We store this data in s3 before we perform any filtering
|
||||
save_excel_to_s3(
|
||||
df=self.matched_addresses,
|
||||
bucket_name=self.bucket,
|
||||
file_key=self.matched_addresses_pre_filter_filepath
|
||||
)
|
||||
save_excel_to_s3(
|
||||
df=self.combined_matching_lookup,
|
||||
bucket_name=self.bucket,
|
||||
file_key=self.combined_matching_lookup_pre_filter_filepath
|
||||
)
|
||||
if self.save:
|
||||
save_excel_to_s3(
|
||||
df=self.matched_addresses,
|
||||
bucket_name=self.bucket,
|
||||
file_key=self.matched_addresses_pre_filter_filepath
|
||||
)
|
||||
save_excel_to_s3(
|
||||
df=self.combined_matching_lookup,
|
||||
bucket_name=self.bucket,
|
||||
file_key=self.combined_matching_lookup_pre_filter_filepath
|
||||
)
|
||||
|
||||
# Prepare the final outputs:
|
||||
self.create_final_matches()
|
||||
|
||||
def source_epc_properties(self, column_filters=None):
|
||||
def source_epc_properties(self, column_filters=None, postcodes=None):
|
||||
"""
|
||||
This function will filter the epc data as specified by column filters, searching across all of the EPC tables
|
||||
:param column_filters: Dictionary with column names as keys and list of acceptable values as values. This
|
||||
|
|
@ -180,6 +184,7 @@ class Ownership:
|
|||
{"column_name": ["value1", "value2", ...]}, where column_name is the name of the column
|
||||
in the EPC data and ["value1", "value2", ...] is a list of acceptable values for that
|
||||
column. If a column is not found in the EPC data, an exception is raised.
|
||||
:param postcodes: A list of postcodes to filter the data on
|
||||
"""
|
||||
|
||||
column_filters = {} if column_filters is None else column_filters
|
||||
|
|
@ -203,6 +208,11 @@ class Ownership:
|
|||
else:
|
||||
raise Exception(f"Column {column} not found in data. column_filters is malformed")
|
||||
|
||||
if postcodes is not None:
|
||||
epc_data = epc_data[epc_data["POSTCODE"].str.lower().isin(postcodes)]
|
||||
if epc_data.empty:
|
||||
continue
|
||||
|
||||
data.append(epc_data)
|
||||
|
||||
self.epc_data = pd.concat(data, ignore_index=True)
|
||||
|
|
@ -210,12 +220,13 @@ class Ownership:
|
|||
if self.excluded_uprns:
|
||||
self.epc_data = self.epc_data[~self.epc_data["UPRN"].astype(float).isin(self.excluded_uprns)]
|
||||
|
||||
# We now store the data in s3
|
||||
save_excel_to_s3(
|
||||
df=self.epc_data,
|
||||
bucket_name=self.bucket,
|
||||
file_key=self.epc_data_filepath
|
||||
)
|
||||
if self.save:
|
||||
# We now store the data in s3
|
||||
save_excel_to_s3(
|
||||
df=self.epc_data,
|
||||
bucket_name=self.bucket,
|
||||
file_key=self.epc_data_filepath
|
||||
)
|
||||
|
||||
def load_company_ownership(self):
|
||||
"""
|
||||
|
|
@ -484,11 +495,11 @@ class Ownership:
|
|||
house_no = house_no.replace(",", "")
|
||||
|
||||
if house_no is None:
|
||||
# It's hard for us to get a reliable match
|
||||
# filtered = filtered[filtered["Property Address"].str.contains(address["ADDRESS1"])]
|
||||
# if filtered.shape[0] > 1:
|
||||
# raise Exception("No valid - maybe we should do levenstein?")
|
||||
continue
|
||||
# If the house number is missing, it means that we usually have a named property so we look for an
|
||||
# exact match on that name
|
||||
filtered = filtered[filtered["Property Address"].str.lower().str.contains(address["ADDRESS"].lower())]
|
||||
if filtered.shape[0] != 1:
|
||||
continue
|
||||
|
||||
else:
|
||||
|
||||
|
|
@ -590,7 +601,8 @@ class Ownership:
|
|||
"CURRENT_ENERGY_RATING",
|
||||
"POSTCODE",
|
||||
"LODGEMENT_DATE",
|
||||
"TRANSACTION_TYPE"
|
||||
"TRANSACTION_TYPE",
|
||||
"TENURE",
|
||||
]
|
||||
].rename(
|
||||
columns={
|
||||
|
|
@ -1002,25 +1014,26 @@ class Ownership:
|
|||
if self.portfolio_properties["UPRN"].nunique() != self.portfolio_epc_data["UPRN"].nunique():
|
||||
raise ValueError("Portfolio properties and epc data don't match")
|
||||
|
||||
logger.info("Storing final outpus")
|
||||
# Store data
|
||||
save_excel_to_s3(
|
||||
df=self.portfolio_owners,
|
||||
bucket_name=self.bucket,
|
||||
file_key=self.portfolio_owners_filepath,
|
||||
)
|
||||
if self.save:
|
||||
logger.info("Storing final outpus")
|
||||
# Store data
|
||||
save_excel_to_s3(
|
||||
df=self.portfolio_owners,
|
||||
bucket_name=self.bucket,
|
||||
file_key=self.portfolio_owners_filepath,
|
||||
)
|
||||
|
||||
save_excel_to_s3(
|
||||
df=self.portfolio_properties,
|
||||
bucket_name=self.bucket,
|
||||
file_key=self.portfolio_properties_filepath,
|
||||
)
|
||||
save_excel_to_s3(
|
||||
df=self.portfolio_properties,
|
||||
bucket_name=self.bucket,
|
||||
file_key=self.portfolio_properties_filepath,
|
||||
)
|
||||
|
||||
save_excel_to_s3(
|
||||
df=self.portfolio_epc_data,
|
||||
bucket_name=self.bucket,
|
||||
file_key=self.portfolio_epc_data_filepath,
|
||||
)
|
||||
save_excel_to_s3(
|
||||
df=self.portfolio_epc_data,
|
||||
bucket_name=self.bucket,
|
||||
file_key=self.portfolio_epc_data_filepath,
|
||||
)
|
||||
|
||||
def get_asset_list(self):
|
||||
"""
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue