working on electrical consumption estimates

2026-07-27 23:35:01 +00:00 · 2024-07-29 14:29:07 +01:00 · 2024-07-29 14:29:07 +01:00 · bd610c8881
commit bd610c8881
parent d07e54ce88
6 changed files with 157 additions and 94 deletions
--- a/backend/Property.py
+++ b/backend/Property.py
@ -1321,3 +1321,16 @@ class Property:
                self.hot_water_energy_source = self.heating_energy_source
            else:
                raise Exception("Investiage me")
+
+    def is_ashp_valid(self, exclusions):
+
+        if "air_source_heat_pump" in self.non_invasive_recommendations:
+            return True
+
+        if "air_source_heat_pump" in exclusions:
+            return False
+
+        suitable_property_type = self.data["property-type"] in ["House", "Bungalow"]
+        has_air_source_heat_pump = self.main_heating["has_air_source_heat_pump"]
+
+        return suitable_property_type and not has_air_source_heat_pump
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@ -520,11 +520,37 @@ async def trigger_plan(body: PlanTriggerRequest):
                # rating to the target SAP rating (ie 69C)
                # TODO: Update this!
                energy_consumption = energy_consumption_client.estimate_new_consumption(
-                    current_rating=p.data["current-energy-rating"],
-                    target_rating="C",
+                    current_energy_efficiency=p.data["current-energy-efficiency"],
+                    target_efficiency="69",
                    current_consumption=p.current_adjusted_energy
                )

+                def convert_to_electric_consumption(self, p, energy_consumption, assumed_ashp_efficiency, exclusions):
+                    if (p.main_fuel["fuel_type"] == "electricity") or (
+                        p.main_fuel["fuel_type"] == "mains gas" and not p.is_ashp_valid(exclusions=exclusions)
+                    ):
+                        # if the primary fuel is already electricity, we don't need to adjust the consumpion
+                        return energy_consumption
+
+                    if p.main_fuel["fuel_type"] == "mains gas" and p.is_ashp_valid(exclusions=exclusions):
+                        # if the primary fuel is gas, we need to adjust the consumption to reflect the expected
+                        # efficiency of an ASHP.
+                        # We should adjust the energy consumption to reflect the 200-400% efficiency of an ASHP with
+                        # electrified heating, so that the solar panel can cover heating generation.
+                        heating_consumption = p.energy_consumption_estimates["adjusted"]["heating"]
+                        hot_water_consumption = p.energy_consumption_estimates["adjusted"]["hot_water"]
+
+                        systems_consumptions = heating_consumption + hot_water_consumption
+
+                        adjusted_consumption = systems_consumptions / (assumed_ashp_efficiency / 100)
+                        electric_consumption = (
+                            adjusted_consumption +
+                            p.energy_consumption_estimates["adjusted"]["lighting"] +
+                            p.energy_consumption_estimates["adjusted"]["appliances"]
+                        )
+
+                        return electric_consumption
+
                # TODO: Should energy_consumption to adjusted to just electricity requirement?
                # We should align our calculation of required energy consumption with expectations around decarbonising
                # heating and hot water, so worse case we should take just the electrical consumption of the property
--- a/etl/bill_savings/EnergyConsumptionModel.py
+++ b/etl/bill_savings/EnergyConsumptionModel.py
@ -507,31 +507,36 @@ class EnergyConsumptionModel:
        return prediction

    @staticmethod
-    def calculate_percentage_decrease(start_rating, end_rating, consumption_averages):
+    def calculate_percentage_decrease(start_efficiency, end_efficiency, consumption_averages):

        start_consumption = consumption_averages.loc[
-            consumption_averages["current-energy-rating"] == start_rating, "total_consumption"
+            consumption_averages["current-energy-efficiency"].astype(str) == str(start_efficiency), "total_consumption"
        ].values[0]
+
        end_consumption = consumption_averages.loc[
-            consumption_averages["current-energy-rating"] == end_rating, "total_consumption"
+            consumption_averages["current-energy-efficiency"].astype(str) == str(end_efficiency), "total_consumption"
        ].values[0]

        percentage_decrease = ((start_consumption - end_consumption) / start_consumption) * 100
+        # percentage_decrease cannot be nehative
+        if percentage_decrease < 0:
+            percentage_decrease = 0
        return percentage_decrease

-    def estimate_new_consumption(self, current_rating, target_rating, current_consumption):
+    def estimate_new_consumption(self, current_energy_efficiency, target_efficiency, current_consumption):
        """
        Given then consumption_averages dataset, which is produced as a result of the data_combining.py script,
        for the energy kwh models, this function will estimate the new consumption based on the current consumption,
        based on the expected reduction in consumption from the current rating to the target rating.
-        :param current_rating:
-        :param target_rating:
+        :param current_energy_efficiency:
+        :param target_efficiency:
        :param current_consumption:
-        :param df:
        :return:
        """
        percentage_decrease = self.calculate_percentage_decrease(
-            current_rating, target_rating, self.consumption_averages
+            start_efficiency=current_energy_efficiency,
+            end_efficiency=target_efficiency,
+            consumption_averages=self.consumption_averages
        )
        new_consumption = current_consumption * (1 - percentage_decrease / 100)
        return new_consumption
--- a/etl/bill_savings/data_combining.py
+++ b/etl/bill_savings/data_combining.py
@ -94,7 +94,7 @@ def app():

    # We also estimate the energy consumption reduction from this data, by band
    df["total_consumption"] = df["heating_kwh"] + df["hot_water_kwh"]
-    consumption_averages = df.groupby("current-energy-rating")["total_consumption"].mean().reset_index()
+    consumption_averages = df.groupby("current-energy-efficiency")["total_consumption"].mean().reset_index()

    # Save the consumption averages back to s3
    save_dataframe_to_s3_parquet(
--- a/etl/customers/goldman/property_ownership.py
+++ b/etl/customers/goldman/property_ownership.py
@ -11,7 +11,10 @@ from utils.s3 import read_dataframe_from_s3_parquet
 # The mode EPC rating is D, so we associate the £238k valuation with an EPC D property
 # Therefore value_of_F * 1.15 = value_of_D * 1.03
 # Therefore value_of_F = value_of_D * 1.03/1.15 = 238k * (1.03/1.15) = 213165
-PROPERTY_VALUE_ESTIMATE = 213_165
+PROPERTY_VALUE_ESTIMATE = 200_000
+
+# UPRNs of properties we need
+MANUAL_EXCLUSIONS = []


 def aggregate_matches(matching_lookup, company_ownership, properties):
@ -283,6 +286,36 @@ def filter_land_registry(properties):
    )


+def is_substring(x, match_string):
+    if pd.isnull(x):
+        return False
+    return x in match_string.lower()
+
+
+def house_number_match(paon, house_number):
+    # Firstly try and convert to numberic
+    try:
+        paon_numeric = int(paon)
+        house_number_numeric = int(house_number)
+        return paon_numeric == house_number_numeric
+    except Exception as e:  # noqa
+        # If we can't convert both to numeric, we do an equality
+
+        return paon == house_number
+
+
+def check_equalities(lr_filtered):
+    all_paon_equal = all(lr_filtered["paon"] == lr_filtered["paon"].values[0])
+    if pd.isnull(lr_filtered["saon"].values[0]):
+        all_saon_equal = all(pd.isnull(lr_filtered["saon"]))
+    else:
+        all_saon_equal = all(lr_filtered["saon"] == lr_filtered["saon"].values[0])
+
+    all_street_equal = all(lr_filtered["street"] == lr_filtered["street"].values[0])
+
+    return all_paon_equal, all_saon_equal, all_street_equal
+
+
 def app():
    """
    This script is for scoping property ownership for EPC F & G rated properties in Birmingam, for Goldman Sachs
@ -292,8 +325,8 @@ def app():
    #       https://epc.opendatacommunities.org/domestic/search?address=&postcode=&local-authority=&constituency
    #       =&uprn=100031179243&from-month=1&from-year=2008&to-month=12&to-year=2024
    #       is actually listed in two local authorities causing us to think it's an EPC F & G property, but it's
-    #       it's actually EPC E. Need to handle this, probably by reading in all of the EPC data, concatenating together
-    #       and performing a singular filter for most recent EPC by UPRN
+    #       it's actually EPC E. Need to handle this, probably by reading in all of the EPC data, concatenating
+    #       together and performing a singular filter for most recent EPC by UPRN
    # paths = [
    #     "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
    #     "local_data/all-domestic-certificates/domestic-E08000031-Wolverhampton/certificates.csv",
@ -356,10 +389,6 @@ def app():
    # Take the newest UPRN
    properties = properties.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN")

-    # TODO: Do we want to filter properties based on lodgement dates?
-    #       E.g. we might want to filter properties that have had a sale EPC lodged in the last x months, because
-    #       this could be indicative of a sale happening, and the land registry data may not have caught up yet
-
    # Remove entries where the address begins with the term "land adjoining", or other records that don't reference the
    # the property itself
    starting_terms = [
@ -461,6 +490,8 @@ def app():

    # freehold_matching_lookup.to_excel("freehold_matching_lookup V2.xlsx")
    # leasehold_matching_lookup.to_excel("leasehold_matching_lookup V2.xlsx")
+    # freehold_matching_lookup = pd.read_excel("freehold_matching_lookup V2.xlsx")
+    # leasehold_matching_lookup = pd.read_excel("leasehold_matching_lookup V2.xlsx")

    # The approximate matches aren't very good
    freehold_matching_lookup = freehold_matching_lookup[freehold_matching_lookup["match_type"] == "exact"]
@ -483,7 +514,9 @@ def app():
                "ADDRESS1",
                "CURRENT_ENERGY_EFFICIENCY",
                "CURRENT_ENERGY_RATING",
-                "POSTCODE"
+                "POSTCODE",
+                "LODGEMENT_DATE",
+                "TRANSACTION_TYPE"
            ]
        ].rename(
            columns={
@ -501,7 +534,7 @@ def app():
                "Postcode",
                "Company Registration No. (1)",
                "Proprietor Name (1)",
-
+                "Date Proprietor Added",
            ]
        ],
        how="left", on="Title Number"
@ -531,35 +564,6 @@ def app():
    land_registry["saon"] = land_registry["saon"].str.lower().str.strip()
    land_registry["date_of_transfer"] = pd.to_datetime(land_registry["date_of_transfer"])

-    def is_substring(x, match_string):
-
-        if pd.isnull(x):
-            return False
-
-        return x in match_string.lower()
-
-    def house_number_match(paon, house_number):
-        # Firstly try and convert to numberic
-        try:
-            paon_numeric = int(paon)
-            house_number_numeric = int(house_number)
-            return paon_numeric == house_number_numeric
-        except Exception as e:  # noqa
-            # If we can't convert both to numeric, we do an equality
-
-            return paon == house_number
-
-    def check_equalities(lr_filtered):
-        all_paon_equal = all(lr_filtered["paon"] == lr_filtered["paon"].values[0])
-        if pd.isnull(lr_filtered["saon"].values[0]):
-            all_saon_equal = all(pd.isnull(lr_filtered["saon"]))
-        else:
-            all_saon_equal = all(lr_filtered["saon"] == lr_filtered["saon"].values[0])
-
-        all_street_equal = all(lr_filtered["street"] == lr_filtered["street"].values[0])
-
-        return all_paon_equal, all_saon_equal, all_street_equal
-
    land_registry_matches = []
    for _, match in tqdm(matched_addresses.iterrows(), total=len(matched_addresses)):

@ -779,13 +783,25 @@ def app():
    ).drop(columns=["uprn"])

    # Flat anything that sold in the last year
-    # TODO: Decide on what this logic should be!
    matched_addresses["sold_recently"] = (
        matched_addresses["date_of_transfer"] >= pd.Timestamp.now() - pd.DateOffset(years=1)
    )

-    # Drop anything that sold recently
-    matched_addresses = matched_addresses[~matched_addresses["sold_recently"]]
+    matched_addresses["sale_lodged_recently"] = (
+        (pd.to_datetime(matched_addresses["LODGEMENT_DATE"]) >= pd.Timestamp.now() - pd.DateOffset(months=12)) &
+        (matched_addresses["TRANSACTION_TYPE"].isin(["marketed sale", "non marketed sale"]))
+    )
+
+    # Drop rows on the booleans
+    matched_addresses = matched_addresses[
+        ~matched_addresses["sold_recently"] &
+        ~matched_addresses["sale_lodged_recently"]
+        ]
+
+    # Filter combined_matching_lookup accordingly
+    combined_matching_lookup = combined_matching_lookup[
+        combined_matching_lookup["UPRN"].isin(matched_addresses["UPRN"])
+    ]

    # shared_freehold_match = pd.DataFrame(shared_freehold_match)
    # Strore these files
@ -807,45 +823,19 @@ def app():
        properties=properties
    )

-    investment_20m = combined_aggregate[combined_aggregate["cumulative_value"] <= 20_500_000]
    investment_50m = combined_aggregate[combined_aggregate["cumulative_value"] <= 51_000_000]

-    investment_20m_properties = matched_addresses[
-        matched_addresses["Company Registration No. (1)"].isin(investment_20m["Company Registration No. (1)"])
-    ]
-
    investment_50m_properties = matched_addresses[
        matched_addresses["Company Registration No. (1)"].isin(investment_50m["Company Registration No. (1)"])
    ]

-    # Merge on the owner
-    al_rayan = investment_50m_properties[
-        investment_50m_properties["Proprietor Name (1)"].str.contains("AL RAYAN BANK PLC")]
-
    portfolio_epc_data_50m = properties[properties["UPRN"].isin(investment_50m_properties["UPRN"])]
-    portfolio_epc_data_20m = properties[properties["UPRN"].isin(investment_20m_properties["UPRN"])]

-    # investment_20m_properties.to_excel("investment_20m_properties 28th July.xlsx", index=False)
+    # Storing data
    # investment_50m_properties.to_excel("investment_50m_properties 28th July.xlsx", index=False)

-    z = pd.read_excel("investment_50m_properties 28th May.xlsx")
-    new = investment_50m_properties[~investment_50m_properties["UPRN"].isin(z["UPRN"])]
-    new_al_rayan = new[
-        new["Proprietor Name (1)"].str.contains("AL RAYAN BANK PLC")
-    ]
-    new_al_rayan = new_al_rayan.merge(
-        properties[["UPRN", "LODGEMENT_DATE"]],
-        how="left",
-        on="UPRN"
-    ).merge(
-        company_ownership[["Title Number", "Date Proprietor Added"]],
-        how="left",
-        on="Title Number",
-    )
-
    # Store the EPC data
-    portfolio_epc_data_50m.to_excel("portfolio_epc_data_50m 28th May.xlsx", index=False)
-    portfolio_epc_data_20m.to_excel("portfolio_epc_data_20m 28th May.xlsx", index=False)
+    # portfolio_epc_data_50m.to_excel("portfolio_epc_data_50m 29th July.xlsx", index=False)

    # We check if any of these properties are in a conservation area
    valuations = pd.read_excel("property value.xlsx")
@ -891,6 +881,48 @@ def company_aggregation():
    aggregation.to_excel("Company ownership aggregation.xlsx")


+def extract_price_info(text):
+    # Use regex to find the relevant price information
+    match = re.search(r'Estimated price\n\nLow£([\d,]+)k\n\n£([\d,]+)k\n\nHigh£([\d,]+)k', text)
+    if match:
+        low_price = int(match.group(1).replace(',', '')) * 1000
+        est_price = int(match.group(2).replace(',', '')) * 1000
+        high_price = int(match.group(3).replace(',', '')) * 1000
+
+        price_info = {
+            'Zoopla Valuation': est_price,
+            'Zoopla Lower Bound': low_price,
+            'Zoopla Upper Bound': high_price
+        }
+
+        return price_info
+
+    return None
+
+
+def get_valuations(portfolio_epc_data_50m):
+    # This gets blocked pretty quickly by Zoopla
+    import requests
+    import time
+    from tqdm import tqdm
+    valuation_data = []
+    for _, property_data in tqdm(portfolio_epc_data_50m.iterrows(), total=len(portfolio_epc_data_50m)):
+        uprn = property_data["UPRN"]
+        response = requests.get(
+            f"https://r.jina.ai/https://www.zoopla.co.uk/property/uprn/{uprn}/"
+        )
+
+        pricing = extract_price_info(response.text)
+        valuation_data.append(
+            {
+                "UPRN": uprn,
+                **pricing
+            }
+        )
+
+        time.sleep(2)
+
+
 def prepare_anonymised_data():
    investment_50m_properties = pd.read_excel("investment_50m_properties 28th May.xlsx", header=0)
    investment_epc_data = pd.read_excel("portfolio_epc_data_50m 28th May.xlsx", header=0)
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@ -116,7 +116,7 @@ class HeatingRecommender:
        # In the future, we'll allow overrides, so that non-intrusive surveys can contradict these conditions
        # and either allow or prevent the recommendation of an air source heat pump

-        if self.is_ashp_valid(exclusions=exclusions):
+        if self.property.is_ashp_valid(exclusions=exclusions):
            self.recommend_air_source_heat_pump(
                phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations
            )
@ -186,19 +186,6 @@ class HeatingRecommender:
            description = ("Replace the existing boiler and cylinder without a thermostat with a new electric combi "
                           "boiler")

-    def is_ashp_valid(self, exclusions):
-
-        if "air_source_heat_pump" in self.property.non_invasive_recommendations:
-            return True
-
-        if "air_source_heat_pump" in exclusions:
-            return False
-
-        suitable_property_type = self.property.data["property-type"] in ["House", "Bungalow"]
-        has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"]
-
-        return suitable_property_type and not has_air_source_heat_pump
-
    def recommend_air_source_heat_pump(self, phase, has_cavity_or_loft_recommendations, _return=False):
        """
        This method will implement the recommendation for an air source heat pump