working on electrical consumption estimates

This commit is contained in:
Khalim Conn-Kowlessar 2024-07-29 14:29:07 +01:00
parent d07e54ce88
commit bd610c8881
6 changed files with 157 additions and 94 deletions

View file

@ -1321,3 +1321,16 @@ class Property:
self.hot_water_energy_source = self.heating_energy_source
else:
raise Exception("Investiage me")
def is_ashp_valid(self, exclusions):
if "air_source_heat_pump" in self.non_invasive_recommendations:
return True
if "air_source_heat_pump" in exclusions:
return False
suitable_property_type = self.data["property-type"] in ["House", "Bungalow"]
has_air_source_heat_pump = self.main_heating["has_air_source_heat_pump"]
return suitable_property_type and not has_air_source_heat_pump

View file

@ -520,11 +520,37 @@ async def trigger_plan(body: PlanTriggerRequest):
# rating to the target SAP rating (ie 69C)
# TODO: Update this!
energy_consumption = energy_consumption_client.estimate_new_consumption(
current_rating=p.data["current-energy-rating"],
target_rating="C",
current_energy_efficiency=p.data["current-energy-efficiency"],
target_efficiency="69",
current_consumption=p.current_adjusted_energy
)
def convert_to_electric_consumption(self, p, energy_consumption, assumed_ashp_efficiency, exclusions):
if (p.main_fuel["fuel_type"] == "electricity") or (
p.main_fuel["fuel_type"] == "mains gas" and not p.is_ashp_valid(exclusions=exclusions)
):
# if the primary fuel is already electricity, we don't need to adjust the consumpion
return energy_consumption
if p.main_fuel["fuel_type"] == "mains gas" and p.is_ashp_valid(exclusions=exclusions):
# if the primary fuel is gas, we need to adjust the consumption to reflect the expected
# efficiency of an ASHP.
# We should adjust the energy consumption to reflect the 200-400% efficiency of an ASHP with
# electrified heating, so that the solar panel can cover heating generation.
heating_consumption = p.energy_consumption_estimates["adjusted"]["heating"]
hot_water_consumption = p.energy_consumption_estimates["adjusted"]["hot_water"]
systems_consumptions = heating_consumption + hot_water_consumption
adjusted_consumption = systems_consumptions / (assumed_ashp_efficiency / 100)
electric_consumption = (
adjusted_consumption +
p.energy_consumption_estimates["adjusted"]["lighting"] +
p.energy_consumption_estimates["adjusted"]["appliances"]
)
return electric_consumption
# TODO: Should energy_consumption to adjusted to just electricity requirement?
# We should align our calculation of required energy consumption with expectations around decarbonising
# heating and hot water, so worse case we should take just the electrical consumption of the property

View file

@ -507,31 +507,36 @@ class EnergyConsumptionModel:
return prediction
@staticmethod
def calculate_percentage_decrease(start_rating, end_rating, consumption_averages):
def calculate_percentage_decrease(start_efficiency, end_efficiency, consumption_averages):
start_consumption = consumption_averages.loc[
consumption_averages["current-energy-rating"] == start_rating, "total_consumption"
consumption_averages["current-energy-efficiency"].astype(str) == str(start_efficiency), "total_consumption"
].values[0]
end_consumption = consumption_averages.loc[
consumption_averages["current-energy-rating"] == end_rating, "total_consumption"
consumption_averages["current-energy-efficiency"].astype(str) == str(end_efficiency), "total_consumption"
].values[0]
percentage_decrease = ((start_consumption - end_consumption) / start_consumption) * 100
# percentage_decrease cannot be nehative
if percentage_decrease < 0:
percentage_decrease = 0
return percentage_decrease
def estimate_new_consumption(self, current_rating, target_rating, current_consumption):
def estimate_new_consumption(self, current_energy_efficiency, target_efficiency, current_consumption):
"""
Given then consumption_averages dataset, which is produced as a result of the data_combining.py script,
for the energy kwh models, this function will estimate the new consumption based on the current consumption,
based on the expected reduction in consumption from the current rating to the target rating.
:param current_rating:
:param target_rating:
:param current_energy_efficiency:
:param target_efficiency:
:param current_consumption:
:param df:
:return:
"""
percentage_decrease = self.calculate_percentage_decrease(
current_rating, target_rating, self.consumption_averages
start_efficiency=current_energy_efficiency,
end_efficiency=target_efficiency,
consumption_averages=self.consumption_averages
)
new_consumption = current_consumption * (1 - percentage_decrease / 100)
return new_consumption

View file

@ -94,7 +94,7 @@ def app():
# We also estimate the energy consumption reduction from this data, by band
df["total_consumption"] = df["heating_kwh"] + df["hot_water_kwh"]
consumption_averages = df.groupby("current-energy-rating")["total_consumption"].mean().reset_index()
consumption_averages = df.groupby("current-energy-efficiency")["total_consumption"].mean().reset_index()
# Save the consumption averages back to s3
save_dataframe_to_s3_parquet(

View file

@ -11,7 +11,10 @@ from utils.s3 import read_dataframe_from_s3_parquet
# The mode EPC rating is D, so we associate the £238k valuation with an EPC D property
# Therefore value_of_F * 1.15 = value_of_D * 1.03
# Therefore value_of_F = value_of_D * 1.03/1.15 = 238k * (1.03/1.15) = 213165
PROPERTY_VALUE_ESTIMATE = 213_165
PROPERTY_VALUE_ESTIMATE = 200_000
# UPRNs of properties we need
MANUAL_EXCLUSIONS = []
def aggregate_matches(matching_lookup, company_ownership, properties):
@ -283,6 +286,36 @@ def filter_land_registry(properties):
)
def is_substring(x, match_string):
if pd.isnull(x):
return False
return x in match_string.lower()
def house_number_match(paon, house_number):
# Firstly try and convert to numberic
try:
paon_numeric = int(paon)
house_number_numeric = int(house_number)
return paon_numeric == house_number_numeric
except Exception as e: # noqa
# If we can't convert both to numeric, we do an equality
return paon == house_number
def check_equalities(lr_filtered):
all_paon_equal = all(lr_filtered["paon"] == lr_filtered["paon"].values[0])
if pd.isnull(lr_filtered["saon"].values[0]):
all_saon_equal = all(pd.isnull(lr_filtered["saon"]))
else:
all_saon_equal = all(lr_filtered["saon"] == lr_filtered["saon"].values[0])
all_street_equal = all(lr_filtered["street"] == lr_filtered["street"].values[0])
return all_paon_equal, all_saon_equal, all_street_equal
def app():
"""
This script is for scoping property ownership for EPC F & G rated properties in Birmingam, for Goldman Sachs
@ -292,8 +325,8 @@ def app():
# https://epc.opendatacommunities.org/domestic/search?address=&postcode=&local-authority=&constituency
# =&uprn=100031179243&from-month=1&from-year=2008&to-month=12&to-year=2024
# is actually listed in two local authorities causing us to think it's an EPC F & G property, but it's
# it's actually EPC E. Need to handle this, probably by reading in all of the EPC data, concatenating together
# and performing a singular filter for most recent EPC by UPRN
# it's actually EPC E. Need to handle this, probably by reading in all of the EPC data, concatenating
# together and performing a singular filter for most recent EPC by UPRN
# paths = [
# "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E08000031-Wolverhampton/certificates.csv",
@ -356,10 +389,6 @@ def app():
# Take the newest UPRN
properties = properties.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN")
# TODO: Do we want to filter properties based on lodgement dates?
# E.g. we might want to filter properties that have had a sale EPC lodged in the last x months, because
# this could be indicative of a sale happening, and the land registry data may not have caught up yet
# Remove entries where the address begins with the term "land adjoining", or other records that don't reference the
# the property itself
starting_terms = [
@ -461,6 +490,8 @@ def app():
# freehold_matching_lookup.to_excel("freehold_matching_lookup V2.xlsx")
# leasehold_matching_lookup.to_excel("leasehold_matching_lookup V2.xlsx")
# freehold_matching_lookup = pd.read_excel("freehold_matching_lookup V2.xlsx")
# leasehold_matching_lookup = pd.read_excel("leasehold_matching_lookup V2.xlsx")
# The approximate matches aren't very good
freehold_matching_lookup = freehold_matching_lookup[freehold_matching_lookup["match_type"] == "exact"]
@ -483,7 +514,9 @@ def app():
"ADDRESS1",
"CURRENT_ENERGY_EFFICIENCY",
"CURRENT_ENERGY_RATING",
"POSTCODE"
"POSTCODE",
"LODGEMENT_DATE",
"TRANSACTION_TYPE"
]
].rename(
columns={
@ -501,7 +534,7 @@ def app():
"Postcode",
"Company Registration No. (1)",
"Proprietor Name (1)",
"Date Proprietor Added",
]
],
how="left", on="Title Number"
@ -531,35 +564,6 @@ def app():
land_registry["saon"] = land_registry["saon"].str.lower().str.strip()
land_registry["date_of_transfer"] = pd.to_datetime(land_registry["date_of_transfer"])
def is_substring(x, match_string):
if pd.isnull(x):
return False
return x in match_string.lower()
def house_number_match(paon, house_number):
# Firstly try and convert to numberic
try:
paon_numeric = int(paon)
house_number_numeric = int(house_number)
return paon_numeric == house_number_numeric
except Exception as e: # noqa
# If we can't convert both to numeric, we do an equality
return paon == house_number
def check_equalities(lr_filtered):
all_paon_equal = all(lr_filtered["paon"] == lr_filtered["paon"].values[0])
if pd.isnull(lr_filtered["saon"].values[0]):
all_saon_equal = all(pd.isnull(lr_filtered["saon"]))
else:
all_saon_equal = all(lr_filtered["saon"] == lr_filtered["saon"].values[0])
all_street_equal = all(lr_filtered["street"] == lr_filtered["street"].values[0])
return all_paon_equal, all_saon_equal, all_street_equal
land_registry_matches = []
for _, match in tqdm(matched_addresses.iterrows(), total=len(matched_addresses)):
@ -779,13 +783,25 @@ def app():
).drop(columns=["uprn"])
# Flat anything that sold in the last year
# TODO: Decide on what this logic should be!
matched_addresses["sold_recently"] = (
matched_addresses["date_of_transfer"] >= pd.Timestamp.now() - pd.DateOffset(years=1)
)
# Drop anything that sold recently
matched_addresses = matched_addresses[~matched_addresses["sold_recently"]]
matched_addresses["sale_lodged_recently"] = (
(pd.to_datetime(matched_addresses["LODGEMENT_DATE"]) >= pd.Timestamp.now() - pd.DateOffset(months=12)) &
(matched_addresses["TRANSACTION_TYPE"].isin(["marketed sale", "non marketed sale"]))
)
# Drop rows on the booleans
matched_addresses = matched_addresses[
~matched_addresses["sold_recently"] &
~matched_addresses["sale_lodged_recently"]
]
# Filter combined_matching_lookup accordingly
combined_matching_lookup = combined_matching_lookup[
combined_matching_lookup["UPRN"].isin(matched_addresses["UPRN"])
]
# shared_freehold_match = pd.DataFrame(shared_freehold_match)
# Strore these files
@ -807,45 +823,19 @@ def app():
properties=properties
)
investment_20m = combined_aggregate[combined_aggregate["cumulative_value"] <= 20_500_000]
investment_50m = combined_aggregate[combined_aggregate["cumulative_value"] <= 51_000_000]
investment_20m_properties = matched_addresses[
matched_addresses["Company Registration No. (1)"].isin(investment_20m["Company Registration No. (1)"])
]
investment_50m_properties = matched_addresses[
matched_addresses["Company Registration No. (1)"].isin(investment_50m["Company Registration No. (1)"])
]
# Merge on the owner
al_rayan = investment_50m_properties[
investment_50m_properties["Proprietor Name (1)"].str.contains("AL RAYAN BANK PLC")]
portfolio_epc_data_50m = properties[properties["UPRN"].isin(investment_50m_properties["UPRN"])]
portfolio_epc_data_20m = properties[properties["UPRN"].isin(investment_20m_properties["UPRN"])]
# investment_20m_properties.to_excel("investment_20m_properties 28th July.xlsx", index=False)
# Storing data
# investment_50m_properties.to_excel("investment_50m_properties 28th July.xlsx", index=False)
z = pd.read_excel("investment_50m_properties 28th May.xlsx")
new = investment_50m_properties[~investment_50m_properties["UPRN"].isin(z["UPRN"])]
new_al_rayan = new[
new["Proprietor Name (1)"].str.contains("AL RAYAN BANK PLC")
]
new_al_rayan = new_al_rayan.merge(
properties[["UPRN", "LODGEMENT_DATE"]],
how="left",
on="UPRN"
).merge(
company_ownership[["Title Number", "Date Proprietor Added"]],
how="left",
on="Title Number",
)
# Store the EPC data
portfolio_epc_data_50m.to_excel("portfolio_epc_data_50m 28th May.xlsx", index=False)
portfolio_epc_data_20m.to_excel("portfolio_epc_data_20m 28th May.xlsx", index=False)
# portfolio_epc_data_50m.to_excel("portfolio_epc_data_50m 29th July.xlsx", index=False)
# We check if any of these properties are in a conservation area
valuations = pd.read_excel("property value.xlsx")
@ -891,6 +881,48 @@ def company_aggregation():
aggregation.to_excel("Company ownership aggregation.xlsx")
def extract_price_info(text):
# Use regex to find the relevant price information
match = re.search(r'Estimated price\n\nLow£([\d,]+)k\n\n£([\d,]+)k\n\nHigh£([\d,]+)k', text)
if match:
low_price = int(match.group(1).replace(',', '')) * 1000
est_price = int(match.group(2).replace(',', '')) * 1000
high_price = int(match.group(3).replace(',', '')) * 1000
price_info = {
'Zoopla Valuation': est_price,
'Zoopla Lower Bound': low_price,
'Zoopla Upper Bound': high_price
}
return price_info
return None
def get_valuations(portfolio_epc_data_50m):
# This gets blocked pretty quickly by Zoopla
import requests
import time
from tqdm import tqdm
valuation_data = []
for _, property_data in tqdm(portfolio_epc_data_50m.iterrows(), total=len(portfolio_epc_data_50m)):
uprn = property_data["UPRN"]
response = requests.get(
f"https://r.jina.ai/https://www.zoopla.co.uk/property/uprn/{uprn}/"
)
pricing = extract_price_info(response.text)
valuation_data.append(
{
"UPRN": uprn,
**pricing
}
)
time.sleep(2)
def prepare_anonymised_data():
investment_50m_properties = pd.read_excel("investment_50m_properties 28th May.xlsx", header=0)
investment_epc_data = pd.read_excel("portfolio_epc_data_50m 28th May.xlsx", header=0)

View file

@ -116,7 +116,7 @@ class HeatingRecommender:
# In the future, we'll allow overrides, so that non-intrusive surveys can contradict these conditions
# and either allow or prevent the recommendation of an air source heat pump
if self.is_ashp_valid(exclusions=exclusions):
if self.property.is_ashp_valid(exclusions=exclusions):
self.recommend_air_source_heat_pump(
phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations
)
@ -186,19 +186,6 @@ class HeatingRecommender:
description = ("Replace the existing boiler and cylinder without a thermostat with a new electric combi "
"boiler")
def is_ashp_valid(self, exclusions):
if "air_source_heat_pump" in self.property.non_invasive_recommendations:
return True
if "air_source_heat_pump" in exclusions:
return False
suitable_property_type = self.property.data["property-type"] in ["House", "Bungalow"]
has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"]
return suitable_property_type and not has_air_source_heat_pump
def recommend_air_source_heat_pump(self, phase, has_cavity_or_loft_recommendations, _return=False):
"""
This method will implement the recommendation for an air source heat pump