mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
working on electrical consumption estimates
This commit is contained in:
parent
d07e54ce88
commit
bd610c8881
6 changed files with 157 additions and 94 deletions
|
|
@ -1321,3 +1321,16 @@ class Property:
|
|||
self.hot_water_energy_source = self.heating_energy_source
|
||||
else:
|
||||
raise Exception("Investiage me")
|
||||
|
||||
def is_ashp_valid(self, exclusions):
|
||||
|
||||
if "air_source_heat_pump" in self.non_invasive_recommendations:
|
||||
return True
|
||||
|
||||
if "air_source_heat_pump" in exclusions:
|
||||
return False
|
||||
|
||||
suitable_property_type = self.data["property-type"] in ["House", "Bungalow"]
|
||||
has_air_source_heat_pump = self.main_heating["has_air_source_heat_pump"]
|
||||
|
||||
return suitable_property_type and not has_air_source_heat_pump
|
||||
|
|
|
|||
|
|
@ -520,11 +520,37 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
# rating to the target SAP rating (ie 69C)
|
||||
# TODO: Update this!
|
||||
energy_consumption = energy_consumption_client.estimate_new_consumption(
|
||||
current_rating=p.data["current-energy-rating"],
|
||||
target_rating="C",
|
||||
current_energy_efficiency=p.data["current-energy-efficiency"],
|
||||
target_efficiency="69",
|
||||
current_consumption=p.current_adjusted_energy
|
||||
)
|
||||
|
||||
def convert_to_electric_consumption(self, p, energy_consumption, assumed_ashp_efficiency, exclusions):
|
||||
if (p.main_fuel["fuel_type"] == "electricity") or (
|
||||
p.main_fuel["fuel_type"] == "mains gas" and not p.is_ashp_valid(exclusions=exclusions)
|
||||
):
|
||||
# if the primary fuel is already electricity, we don't need to adjust the consumpion
|
||||
return energy_consumption
|
||||
|
||||
if p.main_fuel["fuel_type"] == "mains gas" and p.is_ashp_valid(exclusions=exclusions):
|
||||
# if the primary fuel is gas, we need to adjust the consumption to reflect the expected
|
||||
# efficiency of an ASHP.
|
||||
# We should adjust the energy consumption to reflect the 200-400% efficiency of an ASHP with
|
||||
# electrified heating, so that the solar panel can cover heating generation.
|
||||
heating_consumption = p.energy_consumption_estimates["adjusted"]["heating"]
|
||||
hot_water_consumption = p.energy_consumption_estimates["adjusted"]["hot_water"]
|
||||
|
||||
systems_consumptions = heating_consumption + hot_water_consumption
|
||||
|
||||
adjusted_consumption = systems_consumptions / (assumed_ashp_efficiency / 100)
|
||||
electric_consumption = (
|
||||
adjusted_consumption +
|
||||
p.energy_consumption_estimates["adjusted"]["lighting"] +
|
||||
p.energy_consumption_estimates["adjusted"]["appliances"]
|
||||
)
|
||||
|
||||
return electric_consumption
|
||||
|
||||
# TODO: Should energy_consumption to adjusted to just electricity requirement?
|
||||
# We should align our calculation of required energy consumption with expectations around decarbonising
|
||||
# heating and hot water, so worse case we should take just the electrical consumption of the property
|
||||
|
|
|
|||
|
|
@ -507,31 +507,36 @@ class EnergyConsumptionModel:
|
|||
return prediction
|
||||
|
||||
@staticmethod
|
||||
def calculate_percentage_decrease(start_rating, end_rating, consumption_averages):
|
||||
def calculate_percentage_decrease(start_efficiency, end_efficiency, consumption_averages):
|
||||
|
||||
start_consumption = consumption_averages.loc[
|
||||
consumption_averages["current-energy-rating"] == start_rating, "total_consumption"
|
||||
consumption_averages["current-energy-efficiency"].astype(str) == str(start_efficiency), "total_consumption"
|
||||
].values[0]
|
||||
|
||||
end_consumption = consumption_averages.loc[
|
||||
consumption_averages["current-energy-rating"] == end_rating, "total_consumption"
|
||||
consumption_averages["current-energy-efficiency"].astype(str) == str(end_efficiency), "total_consumption"
|
||||
].values[0]
|
||||
|
||||
percentage_decrease = ((start_consumption - end_consumption) / start_consumption) * 100
|
||||
# percentage_decrease cannot be nehative
|
||||
if percentage_decrease < 0:
|
||||
percentage_decrease = 0
|
||||
return percentage_decrease
|
||||
|
||||
def estimate_new_consumption(self, current_rating, target_rating, current_consumption):
|
||||
def estimate_new_consumption(self, current_energy_efficiency, target_efficiency, current_consumption):
|
||||
"""
|
||||
Given then consumption_averages dataset, which is produced as a result of the data_combining.py script,
|
||||
for the energy kwh models, this function will estimate the new consumption based on the current consumption,
|
||||
based on the expected reduction in consumption from the current rating to the target rating.
|
||||
:param current_rating:
|
||||
:param target_rating:
|
||||
:param current_energy_efficiency:
|
||||
:param target_efficiency:
|
||||
:param current_consumption:
|
||||
:param df:
|
||||
:return:
|
||||
"""
|
||||
percentage_decrease = self.calculate_percentage_decrease(
|
||||
current_rating, target_rating, self.consumption_averages
|
||||
start_efficiency=current_energy_efficiency,
|
||||
end_efficiency=target_efficiency,
|
||||
consumption_averages=self.consumption_averages
|
||||
)
|
||||
new_consumption = current_consumption * (1 - percentage_decrease / 100)
|
||||
return new_consumption
|
||||
|
|
|
|||
|
|
@ -94,7 +94,7 @@ def app():
|
|||
|
||||
# We also estimate the energy consumption reduction from this data, by band
|
||||
df["total_consumption"] = df["heating_kwh"] + df["hot_water_kwh"]
|
||||
consumption_averages = df.groupby("current-energy-rating")["total_consumption"].mean().reset_index()
|
||||
consumption_averages = df.groupby("current-energy-efficiency")["total_consumption"].mean().reset_index()
|
||||
|
||||
# Save the consumption averages back to s3
|
||||
save_dataframe_to_s3_parquet(
|
||||
|
|
|
|||
|
|
@ -11,7 +11,10 @@ from utils.s3 import read_dataframe_from_s3_parquet
|
|||
# The mode EPC rating is D, so we associate the £238k valuation with an EPC D property
|
||||
# Therefore value_of_F * 1.15 = value_of_D * 1.03
|
||||
# Therefore value_of_F = value_of_D * 1.03/1.15 = 238k * (1.03/1.15) = 213165
|
||||
PROPERTY_VALUE_ESTIMATE = 213_165
|
||||
PROPERTY_VALUE_ESTIMATE = 200_000
|
||||
|
||||
# UPRNs of properties we need
|
||||
MANUAL_EXCLUSIONS = []
|
||||
|
||||
|
||||
def aggregate_matches(matching_lookup, company_ownership, properties):
|
||||
|
|
@ -283,6 +286,36 @@ def filter_land_registry(properties):
|
|||
)
|
||||
|
||||
|
||||
def is_substring(x, match_string):
|
||||
if pd.isnull(x):
|
||||
return False
|
||||
return x in match_string.lower()
|
||||
|
||||
|
||||
def house_number_match(paon, house_number):
|
||||
# Firstly try and convert to numberic
|
||||
try:
|
||||
paon_numeric = int(paon)
|
||||
house_number_numeric = int(house_number)
|
||||
return paon_numeric == house_number_numeric
|
||||
except Exception as e: # noqa
|
||||
# If we can't convert both to numeric, we do an equality
|
||||
|
||||
return paon == house_number
|
||||
|
||||
|
||||
def check_equalities(lr_filtered):
|
||||
all_paon_equal = all(lr_filtered["paon"] == lr_filtered["paon"].values[0])
|
||||
if pd.isnull(lr_filtered["saon"].values[0]):
|
||||
all_saon_equal = all(pd.isnull(lr_filtered["saon"]))
|
||||
else:
|
||||
all_saon_equal = all(lr_filtered["saon"] == lr_filtered["saon"].values[0])
|
||||
|
||||
all_street_equal = all(lr_filtered["street"] == lr_filtered["street"].values[0])
|
||||
|
||||
return all_paon_equal, all_saon_equal, all_street_equal
|
||||
|
||||
|
||||
def app():
|
||||
"""
|
||||
This script is for scoping property ownership for EPC F & G rated properties in Birmingam, for Goldman Sachs
|
||||
|
|
@ -292,8 +325,8 @@ def app():
|
|||
# https://epc.opendatacommunities.org/domestic/search?address=&postcode=&local-authority=&constituency
|
||||
# =&uprn=100031179243&from-month=1&from-year=2008&to-month=12&to-year=2024
|
||||
# is actually listed in two local authorities causing us to think it's an EPC F & G property, but it's
|
||||
# it's actually EPC E. Need to handle this, probably by reading in all of the EPC data, concatenating together
|
||||
# and performing a singular filter for most recent EPC by UPRN
|
||||
# it's actually EPC E. Need to handle this, probably by reading in all of the EPC data, concatenating
|
||||
# together and performing a singular filter for most recent EPC by UPRN
|
||||
# paths = [
|
||||
# "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
|
||||
# "local_data/all-domestic-certificates/domestic-E08000031-Wolverhampton/certificates.csv",
|
||||
|
|
@ -356,10 +389,6 @@ def app():
|
|||
# Take the newest UPRN
|
||||
properties = properties.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN")
|
||||
|
||||
# TODO: Do we want to filter properties based on lodgement dates?
|
||||
# E.g. we might want to filter properties that have had a sale EPC lodged in the last x months, because
|
||||
# this could be indicative of a sale happening, and the land registry data may not have caught up yet
|
||||
|
||||
# Remove entries where the address begins with the term "land adjoining", or other records that don't reference the
|
||||
# the property itself
|
||||
starting_terms = [
|
||||
|
|
@ -461,6 +490,8 @@ def app():
|
|||
|
||||
# freehold_matching_lookup.to_excel("freehold_matching_lookup V2.xlsx")
|
||||
# leasehold_matching_lookup.to_excel("leasehold_matching_lookup V2.xlsx")
|
||||
# freehold_matching_lookup = pd.read_excel("freehold_matching_lookup V2.xlsx")
|
||||
# leasehold_matching_lookup = pd.read_excel("leasehold_matching_lookup V2.xlsx")
|
||||
|
||||
# The approximate matches aren't very good
|
||||
freehold_matching_lookup = freehold_matching_lookup[freehold_matching_lookup["match_type"] == "exact"]
|
||||
|
|
@ -483,7 +514,9 @@ def app():
|
|||
"ADDRESS1",
|
||||
"CURRENT_ENERGY_EFFICIENCY",
|
||||
"CURRENT_ENERGY_RATING",
|
||||
"POSTCODE"
|
||||
"POSTCODE",
|
||||
"LODGEMENT_DATE",
|
||||
"TRANSACTION_TYPE"
|
||||
]
|
||||
].rename(
|
||||
columns={
|
||||
|
|
@ -501,7 +534,7 @@ def app():
|
|||
"Postcode",
|
||||
"Company Registration No. (1)",
|
||||
"Proprietor Name (1)",
|
||||
|
||||
"Date Proprietor Added",
|
||||
]
|
||||
],
|
||||
how="left", on="Title Number"
|
||||
|
|
@ -531,35 +564,6 @@ def app():
|
|||
land_registry["saon"] = land_registry["saon"].str.lower().str.strip()
|
||||
land_registry["date_of_transfer"] = pd.to_datetime(land_registry["date_of_transfer"])
|
||||
|
||||
def is_substring(x, match_string):
|
||||
|
||||
if pd.isnull(x):
|
||||
return False
|
||||
|
||||
return x in match_string.lower()
|
||||
|
||||
def house_number_match(paon, house_number):
|
||||
# Firstly try and convert to numberic
|
||||
try:
|
||||
paon_numeric = int(paon)
|
||||
house_number_numeric = int(house_number)
|
||||
return paon_numeric == house_number_numeric
|
||||
except Exception as e: # noqa
|
||||
# If we can't convert both to numeric, we do an equality
|
||||
|
||||
return paon == house_number
|
||||
|
||||
def check_equalities(lr_filtered):
|
||||
all_paon_equal = all(lr_filtered["paon"] == lr_filtered["paon"].values[0])
|
||||
if pd.isnull(lr_filtered["saon"].values[0]):
|
||||
all_saon_equal = all(pd.isnull(lr_filtered["saon"]))
|
||||
else:
|
||||
all_saon_equal = all(lr_filtered["saon"] == lr_filtered["saon"].values[0])
|
||||
|
||||
all_street_equal = all(lr_filtered["street"] == lr_filtered["street"].values[0])
|
||||
|
||||
return all_paon_equal, all_saon_equal, all_street_equal
|
||||
|
||||
land_registry_matches = []
|
||||
for _, match in tqdm(matched_addresses.iterrows(), total=len(matched_addresses)):
|
||||
|
||||
|
|
@ -779,13 +783,25 @@ def app():
|
|||
).drop(columns=["uprn"])
|
||||
|
||||
# Flat anything that sold in the last year
|
||||
# TODO: Decide on what this logic should be!
|
||||
matched_addresses["sold_recently"] = (
|
||||
matched_addresses["date_of_transfer"] >= pd.Timestamp.now() - pd.DateOffset(years=1)
|
||||
)
|
||||
|
||||
# Drop anything that sold recently
|
||||
matched_addresses = matched_addresses[~matched_addresses["sold_recently"]]
|
||||
matched_addresses["sale_lodged_recently"] = (
|
||||
(pd.to_datetime(matched_addresses["LODGEMENT_DATE"]) >= pd.Timestamp.now() - pd.DateOffset(months=12)) &
|
||||
(matched_addresses["TRANSACTION_TYPE"].isin(["marketed sale", "non marketed sale"]))
|
||||
)
|
||||
|
||||
# Drop rows on the booleans
|
||||
matched_addresses = matched_addresses[
|
||||
~matched_addresses["sold_recently"] &
|
||||
~matched_addresses["sale_lodged_recently"]
|
||||
]
|
||||
|
||||
# Filter combined_matching_lookup accordingly
|
||||
combined_matching_lookup = combined_matching_lookup[
|
||||
combined_matching_lookup["UPRN"].isin(matched_addresses["UPRN"])
|
||||
]
|
||||
|
||||
# shared_freehold_match = pd.DataFrame(shared_freehold_match)
|
||||
# Strore these files
|
||||
|
|
@ -807,45 +823,19 @@ def app():
|
|||
properties=properties
|
||||
)
|
||||
|
||||
investment_20m = combined_aggregate[combined_aggregate["cumulative_value"] <= 20_500_000]
|
||||
investment_50m = combined_aggregate[combined_aggregate["cumulative_value"] <= 51_000_000]
|
||||
|
||||
investment_20m_properties = matched_addresses[
|
||||
matched_addresses["Company Registration No. (1)"].isin(investment_20m["Company Registration No. (1)"])
|
||||
]
|
||||
|
||||
investment_50m_properties = matched_addresses[
|
||||
matched_addresses["Company Registration No. (1)"].isin(investment_50m["Company Registration No. (1)"])
|
||||
]
|
||||
|
||||
# Merge on the owner
|
||||
al_rayan = investment_50m_properties[
|
||||
investment_50m_properties["Proprietor Name (1)"].str.contains("AL RAYAN BANK PLC")]
|
||||
|
||||
portfolio_epc_data_50m = properties[properties["UPRN"].isin(investment_50m_properties["UPRN"])]
|
||||
portfolio_epc_data_20m = properties[properties["UPRN"].isin(investment_20m_properties["UPRN"])]
|
||||
|
||||
# investment_20m_properties.to_excel("investment_20m_properties 28th July.xlsx", index=False)
|
||||
# Storing data
|
||||
# investment_50m_properties.to_excel("investment_50m_properties 28th July.xlsx", index=False)
|
||||
|
||||
z = pd.read_excel("investment_50m_properties 28th May.xlsx")
|
||||
new = investment_50m_properties[~investment_50m_properties["UPRN"].isin(z["UPRN"])]
|
||||
new_al_rayan = new[
|
||||
new["Proprietor Name (1)"].str.contains("AL RAYAN BANK PLC")
|
||||
]
|
||||
new_al_rayan = new_al_rayan.merge(
|
||||
properties[["UPRN", "LODGEMENT_DATE"]],
|
||||
how="left",
|
||||
on="UPRN"
|
||||
).merge(
|
||||
company_ownership[["Title Number", "Date Proprietor Added"]],
|
||||
how="left",
|
||||
on="Title Number",
|
||||
)
|
||||
|
||||
# Store the EPC data
|
||||
portfolio_epc_data_50m.to_excel("portfolio_epc_data_50m 28th May.xlsx", index=False)
|
||||
portfolio_epc_data_20m.to_excel("portfolio_epc_data_20m 28th May.xlsx", index=False)
|
||||
# portfolio_epc_data_50m.to_excel("portfolio_epc_data_50m 29th July.xlsx", index=False)
|
||||
|
||||
# We check if any of these properties are in a conservation area
|
||||
valuations = pd.read_excel("property value.xlsx")
|
||||
|
|
@ -891,6 +881,48 @@ def company_aggregation():
|
|||
aggregation.to_excel("Company ownership aggregation.xlsx")
|
||||
|
||||
|
||||
def extract_price_info(text):
|
||||
# Use regex to find the relevant price information
|
||||
match = re.search(r'Estimated price\n\nLow£([\d,]+)k\n\n£([\d,]+)k\n\nHigh£([\d,]+)k', text)
|
||||
if match:
|
||||
low_price = int(match.group(1).replace(',', '')) * 1000
|
||||
est_price = int(match.group(2).replace(',', '')) * 1000
|
||||
high_price = int(match.group(3).replace(',', '')) * 1000
|
||||
|
||||
price_info = {
|
||||
'Zoopla Valuation': est_price,
|
||||
'Zoopla Lower Bound': low_price,
|
||||
'Zoopla Upper Bound': high_price
|
||||
}
|
||||
|
||||
return price_info
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_valuations(portfolio_epc_data_50m):
|
||||
# This gets blocked pretty quickly by Zoopla
|
||||
import requests
|
||||
import time
|
||||
from tqdm import tqdm
|
||||
valuation_data = []
|
||||
for _, property_data in tqdm(portfolio_epc_data_50m.iterrows(), total=len(portfolio_epc_data_50m)):
|
||||
uprn = property_data["UPRN"]
|
||||
response = requests.get(
|
||||
f"https://r.jina.ai/https://www.zoopla.co.uk/property/uprn/{uprn}/"
|
||||
)
|
||||
|
||||
pricing = extract_price_info(response.text)
|
||||
valuation_data.append(
|
||||
{
|
||||
"UPRN": uprn,
|
||||
**pricing
|
||||
}
|
||||
)
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
|
||||
def prepare_anonymised_data():
|
||||
investment_50m_properties = pd.read_excel("investment_50m_properties 28th May.xlsx", header=0)
|
||||
investment_epc_data = pd.read_excel("portfolio_epc_data_50m 28th May.xlsx", header=0)
|
||||
|
|
|
|||
|
|
@ -116,7 +116,7 @@ class HeatingRecommender:
|
|||
# In the future, we'll allow overrides, so that non-intrusive surveys can contradict these conditions
|
||||
# and either allow or prevent the recommendation of an air source heat pump
|
||||
|
||||
if self.is_ashp_valid(exclusions=exclusions):
|
||||
if self.property.is_ashp_valid(exclusions=exclusions):
|
||||
self.recommend_air_source_heat_pump(
|
||||
phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations
|
||||
)
|
||||
|
|
@ -186,19 +186,6 @@ class HeatingRecommender:
|
|||
description = ("Replace the existing boiler and cylinder without a thermostat with a new electric combi "
|
||||
"boiler")
|
||||
|
||||
def is_ashp_valid(self, exclusions):
|
||||
|
||||
if "air_source_heat_pump" in self.property.non_invasive_recommendations:
|
||||
return True
|
||||
|
||||
if "air_source_heat_pump" in exclusions:
|
||||
return False
|
||||
|
||||
suitable_property_type = self.property.data["property-type"] in ["House", "Bungalow"]
|
||||
has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"]
|
||||
|
||||
return suitable_property_type and not has_air_source_heat_pump
|
||||
|
||||
def recommend_air_source_heat_pump(self, phase, has_cavity_or_loft_recommendations, _return=False):
|
||||
"""
|
||||
This method will implement the recommendation for an air source heat pump
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue