Merge pull request #612 from Hestia-Homes/eco-eligiblity-bug

minor debugging
2026-07-27 23:35:01 +00:00 · 2025-12-13 22:16:34 +08:00 · 2025-12-13 22:16:34 +08:00 · 6fdde5ee40
commit 6fdde5ee40
parent 7dcabfd6ed 395ab0e083
14 changed files with 722 additions and 217 deletions
--- a/asset_list/app.py
+++ b/asset_list/app.py
@ -59,25 +59,26 @@ def app():
    Property UPRN
    """

-    # Lambeth:
-    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lambeth/December 10th"
-    data_filename = "lambeth_sw2_leigham court estate.xlsx"
+    # Peabody data for cleaning
+    data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
+                   "Project/data_validation")
+    data_filename = "to_standardise_uprns.xlsx"
    sheet_name = "Sheet1"
    postcode_column = 'Postcode'
-    address1_column = "Address"
+    address1_column = "Address 1"
    address1_method = None
    fulladdress_column = None
-    address_cols_to_concat = ["Address"]
+    address_cols_to_concat = ["Address 1", "Address 2", "Address 3"]
    missing_postcodes_method = None
    landlord_year_built = None
    landlord_os_uprn = None
-    landlord_property_type = None
-    landlord_built_form = None
+    landlord_property_type = "Type"
+    landlord_built_form = "Attachment"
    landlord_wall_construction = None
    landlord_roof_construction = None
    landlord_heating_system = None
    landlord_existing_pv = None
-    landlord_property_id = "row_id"
+    landlord_property_id = "Org Ref"
    landlord_sap = None
    outcomes_filename = None
    outcomes_sheetname = None
@ -93,6 +94,40 @@ def app():
    asset_list_header = 0
    landlord_block_reference = None

+    # Lambeth:
+    # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lambeth/December 10th"
+    # data_filename = "lambeth_sw2_leigham court estate.xlsx"
+    # sheet_name = "Sheet1"
+    # postcode_column = 'Postcode'
+    # address1_column = "Address"
+    # address1_method = None
+    # fulladdress_column = None
+    # address_cols_to_concat = ["Address"]
+    # missing_postcodes_method = None
+    # landlord_year_built = None
+    # landlord_os_uprn = None
+    # landlord_property_type = None
+    # landlord_built_form = None
+    # landlord_wall_construction = None
+    # landlord_roof_construction = None
+    # landlord_heating_system = None
+    # landlord_existing_pv = None
+    # landlord_property_id = "row_id"
+    # landlord_sap = None
+    # outcomes_filename = None
+    # outcomes_sheetname = None
+    # outcomes_postcode = None
+    # outcomes_houseno = None
+    # outcomes_id = None
+    # outcomes_address = None
+    # master_filepaths = []
+    # master_id_colnames = []
+    # master_to_asset_list_filepath = None
+    # phase = False
+    # ecosurv_landlords = None
+    # asset_list_header = 0
+    # landlord_block_reference = None
+
    # Maps addresses to uprn in problematic cases
    manual_uprn_map = {}

@ -230,22 +265,22 @@ def app():
        )

        # We now retrieve any failed properties
-        chunk_failed = chunk[chunk[asset_list.DOMNA_PROPERTY_ID].isin(errors_chunk)]
-        epc_data_failed, _, _ = get_data(
-            df=chunk_failed,
-            row_id_name=asset_list.DOMNA_PROPERTY_ID,
-            uprn_column=AssetList.STANDARD_UPRN,
-            fulladdress_column=AssetList.STANDARD_FULL_ADDRESS,
-            address1_column=AssetList.STANDARD_ADDRESS_1,
-            postcode_column=AssetList.STANDARD_POSTCODE,
-            property_type_column=AssetList.STANDARD_PROPERTY_TYPE,
-            built_form_column=AssetList.STANDARD_BUILT_FORM,
-            manual_uprn_map=manual_uprn_map,
-            epc_api_only=epc_api_only,
-            epc_auth_token=EPC_AUTH_TOKEN
-        )
-
-        epc_data_chunk.extend(epc_data_failed)
+        # chunk_failed = chunk[chunk[asset_list.DOMNA_PROPERTY_ID].isin(errors_chunk)]
+        # epc_data_failed, _, _ = get_data(
+        #     df=chunk_failed,
+        #     row_id_name=asset_list.DOMNA_PROPERTY_ID,
+        #     uprn_column=AssetList.STANDARD_UPRN,
+        #     fulladdress_column=AssetList.STANDARD_FULL_ADDRESS,
+        #     address1_column=AssetList.STANDARD_ADDRESS_1,
+        #     postcode_column=AssetList.STANDARD_POSTCODE,
+        #     property_type_column=AssetList.STANDARD_PROPERTY_TYPE,
+        #     built_form_column=AssetList.STANDARD_BUILT_FORM,
+        #     manual_uprn_map=manual_uprn_map,
+        #     epc_api_only=epc_api_only,
+        #     epc_auth_token=EPC_AUTH_TOKEN
+        # )
+        #
+        # epc_data_chunk.extend(epc_data_failed)

        # Append the failed data to the main data
        # Store the chunk locally as a csv
@ -422,3 +457,7 @@ def app():

        if not asset_list.geographical_areas.empty:
            asset_list.geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False)
+
+        # Store dupes
+        if not asset_list.duplicated_addresses.empty:
+            asset_list.duplicated_addresses.to_excel(writer, sheet_name="Duplicate Properties", index=False)
--- a/asset_list/mappings/built_form.py
+++ b/asset_list/mappings/built_form.py
@ -458,6 +458,12 @@ BUILT_FORM_MAPPINGS = {
    'Maisonette: Detached: Mid Floor': 'detached',
    'Bungalow: EnclosedMidTerrace': 'enclosed mid-terrace',

-    'House: EnclosedMidTerrace': 'enclosed mid-terrace'
+    'House: EnclosedMidTerrace': 'enclosed mid-terrace',
+
+    'EnclosedMidTerrace': 'enclosed mid-terrace',
+    'EnclosedEndTerrace': 'enclosed end-terrace',
+    'EndTerrace': 'end-terrace',
+    'SemiDetached': 'semi-detached',
+    'MidTerrace': 'mid-terrace'

 }
--- a/backend/app/db/functions/recommendations_functions.py
+++ b/backend/app/db/functions/recommendations_functions.py
@ -1,4 +1,4 @@
-from sqlalchemy import insert, delete, text
+from sqlalchemy import insert, delete, select
 from sqlalchemy.orm import Session
 from sqlalchemy.exc import SQLAlchemyError
 from backend.app.db.models.recommendations import (
@ -242,20 +242,26 @@ def chunked(iterable, size=100):
        yield iterable[i:i + size]


+# def fast_delete_recommendations(session, chunk):
+#     placeholders = ",".join(["(:p{})".format(i) for i in range(len(chunk))])
+#     params = {f"p{i}": chunk[i] for i in range(len(chunk))}
+#
+#     sql = text(f"""
+#         WITH ids(property_id) AS (
+#             VALUES {placeholders}
+#         )
+#         DELETE FROM recommendation r
+#         USING ids
+#         WHERE r.property_id = ids.property_id;
+#     """)
+#
+#     session.execute(sql, params, execution_options={"synchronize_session": False})
+
 def fast_delete_recommendations(session, chunk):
-    placeholders = ",".join(["(:p{})".format(i) for i in range(len(chunk))])
-    params = {f"p{i}": chunk[i] for i in range(len(chunk))}
-
-    sql = text(f"""
-        WITH ids(property_id) AS (
-            VALUES {placeholders}
-        )
-        DELETE FROM recommendation r
-        USING ids
-        WHERE r.property_id = ids.property_id;
-    """)
-
-    session.execute(sql, params, execution_options={"synchronize_session": False})
+    session.execute(
+        delete(Recommendation)
+        .where(Recommendation.property_id.in_(chunk))
+    )


 def clear_portfolio(session: Session, portfolio_id: int, batch_size=100):
@ -362,11 +368,19 @@ def clear_portfolio(session: Session, portfolio_id: int, batch_size=100):
    # --------------------------
    # Recommendations (fast delete)
    # --------------------------
-    rec_chunks = list(chunked(property_ids, batch_size))
+    # rec_chunks = list(chunked(property_ids, batch_size * 5))  # larger chunks for fast delete
+    # total = len(rec_chunks)
+    # for i, chunk in enumerate(rec_chunks, start=1):
+    #     print_progress("Deleting Recommendations", i, total)
+    #     fast_delete_recommendations(session, chunk)
+    rec_chunks = list(chunked(recommendation_ids, batch_size))
    total = len(rec_chunks)
    for i, chunk in enumerate(rec_chunks, start=1):
        print_progress("Deleting Recommendations", i, total)
-        fast_delete_recommendations(session, chunk)
+        session.execute(
+            delete(Recommendation)
+            .where(Recommendation.id.in_(chunk))
+        )

    # --------------------------
    # Inspections
@ -412,3 +426,114 @@ def clear_portfolio(session: Session, portfolio_id: int, batch_size=100):

    session.commit()
    print("Portfolio cleared.")
+
+
+def clear_portfolio_in_batches(
+    session: Session,
+    portfolio_id: int,
+    property_batch_size: int = 10
+):
+    # Fetch all property IDs once
+    property_ids = [
+        pid for (pid,) in
+        session.query(PropertyModel.id)
+        .filter(PropertyModel.portfolio_id == portfolio_id)
+        .all()
+    ]
+
+    def delete_for_property_batch(prop_ids):
+        # ----------------------------
+        # Recommendations → PlanRecommendations
+        # ----------------------------
+        rec_subq = (
+            select(Recommendation.id)
+            .where(Recommendation.property_id.in_(prop_ids))
+        )
+
+        session.execute(
+            delete(PlanRecommendations)
+            .where(PlanRecommendations.recommendation_id.in_(rec_subq))
+        )
+
+        session.execute(
+            delete(RecommendationMaterials)
+            .where(RecommendationMaterials.recommendation_id.in_(rec_subq))
+        )
+
+        session.execute(
+            delete(Recommendation)
+            .where(Recommendation.property_id.in_(prop_ids))
+        )
+
+        # ----------------------------
+        # Inspections
+        # ----------------------------
+        session.execute(
+            delete(InspectionModel)
+            .where(InspectionModel.property_id.in_(prop_ids))
+        )
+
+        # ----------------------------
+        # Plans (scoped to these properties)
+        # ----------------------------
+        plan_subq = (
+            select(Plan.id)
+            .where(Plan.property_id.in_(prop_ids))
+        )
+
+        session.execute(
+            delete(PlanRecommendations)
+            .where(PlanRecommendations.plan_id.in_(plan_subq))
+        )
+
+        session.execute(
+            delete(FundingPackageMeasures)
+            .where(
+                FundingPackageMeasures.funding_package_id.in_(
+                    select(FundingPackage.id)
+                    .where(FundingPackage.plan_id.in_(plan_subq))
+                )
+            )
+        )
+
+        session.execute(
+            delete(FundingPackage)
+            .where(FundingPackage.plan_id.in_(plan_subq))
+        )
+
+        session.execute(
+            delete(Plan)
+            .where(Plan.id.in_(plan_subq))
+        )
+
+        # ----------------------------
+        # Property-scoped auxiliary tables
+        # ----------------------------
+        session.execute(
+            delete(PropertyDetailsEpcModel)
+            .where(PropertyDetailsEpcModel.property_id.in_(prop_ids))
+        )
+
+        session.execute(
+            delete(PropertyTargetsModel)
+            .where(PropertyTargetsModel.property_id.in_(prop_ids))
+        )
+
+        # ----------------------------
+        # Properties (last)
+        # ----------------------------
+        session.execute(
+            delete(PropertyModel)
+            .where(PropertyModel.id.in_(prop_ids))
+        )
+
+    # -------- BATCH DELETE LOOP --------
+    property_chunks = list(chunked(property_ids, property_batch_size))
+    total_batches = len(property_chunks)
+
+    for i, prop_ids in enumerate(property_chunks, start=1):
+        print(f"Deleting batch {i}/{total_batches} ({len(prop_ids)} properties)")
+        delete_for_property_batch(prop_ids)
+        session.commit()
+
+    print("Portfolio cleared in batches.")
--- a/backend/engine/engine.py
+++ b/backend/engine/engine.py
@ -662,7 +662,9 @@ async def model_engine(body: PlanTriggerRequest):
                address1 = config.get("domna_address_1", None)

            address1 = str(int(address1)) if isinstance(address1, float) else str(address1)
-            full_address = config.get("domna_full_address") if body.file_format == "domna_asset_list" else None
+            full_address = config.get("domna_full_address", "") if body.file_format == "domna_asset_list" else None
+            if not isinstance(full_address, str):  # Catch for when the full address is nan
+                full_address = None
            heating_system = parse_heating_system(config)

            associated_uprns = []
--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@ -290,6 +290,14 @@ class AnnualBillSavings:
            # The solar thermal covers a % of the heating kwh, so we need to adjust the cost
            return (kwh / cop) * assumptions.SOLAR_CONSUMPTION_PROPORTION * cls.ELECTRICITY_PRICE_CAP

+        if fuel in ['Oil + Solar Thermal']:
+            # The solar thermal covers a % of the heating kwh, so we need to adjust the cost
+            price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "Kerosene"].squeeze()
+            cost_per_kwh = cls.cost_per_kwh(
+                price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"]
+            )
+            return (kwh / cop) * cost_per_kwh * assumptions.SOLAR_CONSUMPTION_PROPORTION
+
        if fuel == "LPG + Solar Thermal":
            # The solar thermal covers a % of the heating kwh, so we need to adjust the cost
            price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "LPG"].squeeze()
--- a/backend/tests/test_integration.py
+++ b/backend/tests/test_integration.py
@ -82,6 +82,12 @@ costs_by_floor_area = epc_data[
    ][["TOTAL_FLOOR_AREA", "CURRENT_ENERGY_EFFICIENCY", "LIGHTING_COST_CURRENT", "HEATING_COST_CURRENT",
       "HOT_WATER_COST_CURRENT"]].copy()

+epc_data = epc_data[
+    (epc_data["MAINHEAT_DESCRIPTION"].str.contains("SAP05:") == False) &
+    (~epc_data["LIGHTING_COST_CURRENT"].isin([None, ""])) &
+    (~pd.isnull(epc_data["LIGHTING_COST_CURRENT"]))
+    ]
+
 costs_by_floor_area.columns = [c.lower().replace("_", "-") for c in costs_by_floor_area.columns]
 for c in ["lighting-cost-current", "heating-cost-current", "hot-water-cost-current"]:
    costs_by_floor_area[c + "_scaled"] = costs_by_floor_area[c] / costs_by_floor_area["total-floor-area"]
@ -92,8 +98,8 @@ costs_by_floor_area = costs_by_floor_area.groupby("current-energy-efficiency")[

 epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]

-sample_epc_data = epc_data[pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2015-01-01"].drop_duplicates("UPRN").sample(
-    10000).reset_index(drop=True)
+sample_epc_data = epc_data[pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2008-01-01"].drop_duplicates("UPRN").sample(
+    50000).reset_index(drop=True)

 # TODO: In Property find_energy_sources, sort out biomass community heating - what fuel type
 # TODO: We might be able to remove find_energy_sources entirely and remove estimate_electrical_consumption. It's used
@ -163,6 +169,8 @@ mocked_kwh_predictions["heating_kwh_predictions"] = pd.DataFrame(mocked_kwh_pred
 mocked_kwh_predictions["hotwater_kwh_predictions"] = pd.DataFrame(mocked_kwh_predictions["hotwater_kwh_predictions"])

 # TODO: We might want to implement this generally, via an ETL process
+for x in cleaned["mainheat-description"]:
+    x["has_wood_chips"] = False
 for p in input_properties:
    for col in ["lighting-cost-current", "heating-cost-current", "hot-water-cost-current"]:
        if pd.isnull(p.data[col]):
@ -313,6 +321,10 @@ for p in tqdm(input_properties):
    if not recommendations.get(p.id):
        continue

+    # Temp allow to skip
+    if not isinstance(recommendations.get(p.id)[0], list):
+        continue
+
    # we need to double unlist because we have a list of lists
    property_measure_types = {rec["type"] for recs in recommendations[p.id] for rec in recs}
    property_required_measures = [m for m in recommendations[p.id] if m[0]["type"] in body.required_measures]
@ -336,32 +348,32 @@ for p in tqdm(input_properties):
    )
    gain = optimiser_functions.calculate_gain(body=body, p=p, fixed_gain=fixed_gain, eco_packages=eco_packages)

-    funding = Funding(
-        tenure=body.housing_type,
-        project_scores_matrix=project_scores_matrix,
-        partial_project_scores_matrix=partial_project_scores_matrix,
-        whlg_eligible_postcodes=whlg_eligible_postcodes,
-        eco4_social_cavity_abs_rate=13,
-        eco4_social_solid_abs_rate=17,
-        eco4_private_cavity_abs_rate=13,
-        eco4_private_solid_abs_rate=17,
-        gbis_social_cavity_abs_rate=21,
-        gbis_social_solid_abs_rate=25,
-        gbis_private_cavity_abs_rate=21,
-        gbis_private_solid_abs_rate=28,
-    )
-
-    li_thickness = convert_thickness_to_numeric(
-        p.roof["insulation_thickness"], p.roof["is_pitched"], p.roof["is_flat"]
-    )
-    current_wall_u_value = p.walls["thermal_transmittance"]
-    if current_wall_u_value is None:
-        current_wall_u_value = get_wall_u_value(
-            clean_description=p.walls["clean_description"],
-            age_band=p.age_band,
-            is_granite_or_whinstone=p.walls["is_granite_or_whinstone"],
-            is_sandstone_or_limestone=p.walls["is_sandstone_or_limestone"],
-        )
+    # funding = Funding(
+    #     tenure=body.housing_type,
+    #     project_scores_matrix=project_scores_matrix,
+    #     partial_project_scores_matrix=partial_project_scores_matrix,
+    #     whlg_eligible_postcodes=whlg_eligible_postcodes,
+    #     eco4_social_cavity_abs_rate=13,
+    #     eco4_social_solid_abs_rate=17,
+    #     eco4_private_cavity_abs_rate=13,
+    #     eco4_private_solid_abs_rate=17,
+    #     gbis_social_cavity_abs_rate=21,
+    #     gbis_social_solid_abs_rate=25,
+    #     gbis_private_cavity_abs_rate=21,
+    #     gbis_private_solid_abs_rate=28,
+    # )
+    #
+    # li_thickness = convert_thickness_to_numeric(
+    #     p.roof["insulation_thickness"], p.roof["is_pitched"], p.roof["is_flat"]
+    # )
+    # current_wall_u_value = p.walls["thermal_transmittance"]
+    # if current_wall_u_value is None:
+    #     current_wall_u_value = get_wall_u_value(
+    #         clean_description=p.walls["clean_description"],
+    #         age_band=p.age_band,
+    #         is_granite_or_whinstone=p.walls["is_granite_or_whinstone"],
+    #         is_sandstone_or_limestone=p.walls["is_sandstone_or_limestone"],
+    #     )

    # We insert the innovation uplift
    measures_to_optimise_with_uplift = deepcopy(measures_to_optimise)
@ -369,35 +381,39 @@ for p in tqdm(input_properties):
    # TODO: Turn this into a function and store the innovaiton uplift
    for group in measures_to_optimise_with_uplift:
        for r in group:
-
-            if r["type"] in ["mechanical_ventilation", "low_energy_lighting", "secondary_heating",
-                             "extension_cavity_wall_insulation", "draught_proofing", "sealing_open_fireplace"]:
-                (
-                    r["partial_project_score"],
-                    r["partial_project_funding"],
-                    r["innovation_uplift"],
-                    r["uplift_project_score"],
-                ) = (
-                    0, 0, 0, 0
-                )
-                continue
-
-            (
-                r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"],
-                r["uplift_project_score"]
-            ) = funding.get_innovation_uplift(
-                measure=r,
-                starting_sap=int(p.data["current-energy-efficiency"]),
-                floor_area=p.floor_area,
-                is_cavity=p.walls["is_cavity_wall"],
-                current_wall_uvalue=current_wall_u_value,
-                is_partial="partial" in p.walls["clean_description"].lower(),
-                existing_li_thickness=li_thickness,
-                mainheating=p.main_heating,
-                main_fuel=p.main_fuel,
-                mainheat_energy_eff=p.data["mainheat-energy-eff"],
+            (r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"],
+             r["uplift_project_score"]) = (
+                0, 0, 0, 0
            )

+            # if r["type"] in ["mechanical_ventilation", "low_energy_lighting", "secondary_heating",
+            #                  "extension_cavity_wall_insulation", "draught_proofing", "sealing_open_fireplace"]:
+            #     (
+            #         r["partial_project_score"],
+            #         r["partial_project_funding"],
+            #         r["innovation_uplift"],
+            #         r["uplift_project_score"],
+            #     ) = (
+            #         0, 0, 0, 0
+            #     )
+            #     continue
+            #
+            # (
+            #     r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"],
+            #     r["uplift_project_score"]
+            # ) = funding.get_innovation_uplift(
+            #     measure=r,
+            #     starting_sap=int(p.data["current-energy-efficiency"]),
+            #     floor_area=p.floor_area,
+            #     is_cavity=p.walls["is_cavity_wall"],
+            #     current_wall_uvalue=current_wall_u_value,
+            #     is_partial="partial" in p.walls["clean_description"].lower(),
+            #     existing_li_thickness=li_thickness,
+            #     mainheating=p.main_heating,
+            #     main_fuel=p.main_fuel,
+            #     mainheat_energy_eff=p.data["mainheat-energy-eff"],
+            # )
+
            if r["already_installed"]:
                # if already installed, we zero out the uplift and funding
                (r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"],
@ -411,7 +427,7 @@ for p in tqdm(input_properties):
    )

    # When the goal is Increasing EPC, we can run the funding optimiser
-    if body.goal == "Increasing EPC":
+    if body.goal == "Switch off":

        solutions = optimise_with_funding_paths(
            p=p,
@ -481,37 +497,43 @@ for p in tqdm(input_properties):
            ROOF_INSULATION_MEASURES
        )

-        funding.check_funding(
-            measures=solution,
-            starting_sap=int(p.data["current-energy-efficiency"]),
-            ending_sap=int(p.data["current-energy-efficiency"]) + sum([x["gain"] for x in solution]),
-            floor_area=p.floor_area,
-            mainheat_description=p.main_heating["clean_description"],
-            heating_control_description=p.main_heating_controls["clean_description"],
-            is_cavity=p.walls["is_cavity_wall"],
-            current_wall_uvalue=current_wall_u_value,
-            is_partial="partial" in p.walls["clean_description"].lower(),
-            existing_li_thickness=li_thickness,
-            mainheating=p.main_heating,
-            main_fuel=p.main_fuel,
-            mainheat_energy_eff=p.data["mainheat-energy-eff"],
-            has_wall_insulation_recommendation=has_wall_insulation_recommendation,
-            has_roof_insulation_recommendation=has_roof_insulation_recommendation,
-        )
+        # funding.check_funding(
+        #     measures=solution,
+        #     starting_sap=int(p.data["current-energy-efficiency"]),
+        #     ending_sap=int(p.data["current-energy-efficiency"]) + sum([x["gain"] for x in solution]),
+        #     floor_area=p.floor_area,
+        #     mainheat_description=p.main_heating["clean_description"],
+        #     heating_control_description=p.main_heating_controls["clean_description"],
+        #     is_cavity=p.walls["is_cavity_wall"],
+        #     current_wall_uvalue=current_wall_u_value,
+        #     is_partial="partial" in p.walls["clean_description"].lower(),
+        #     existing_li_thickness=li_thickness,
+        #     mainheating=p.main_heating,
+        #     main_fuel=p.main_fuel,
+        #     mainheat_energy_eff=p.data["mainheat-energy-eff"],
+        #     has_wall_insulation_recommendation=has_wall_insulation_recommendation,
+        #     has_roof_insulation_recommendation=has_roof_insulation_recommendation,
+        # )

        # Determine the scheme
        scheme = "none"
-        if funding.eco4_eligible:
-            scheme = "eco4"
-        if scheme == "none" and funding.gbis_eligible:
-            scheme = "gbis"
+        # if funding.eco4_eligible:
+        #     scheme = "eco4"
+        # if scheme == "none" and funding.gbis_eligible:
+        #     scheme = "gbis"

-        funded_measures = solution if scheme in ["gbis", "eco4"] else []
-        project_funding = 0 if funding.full_project_abs is not None else funding.full_project_abs
-        total_uplift = funding.eco4_uplift
-        full_project_score = 0 if funding.full_project_abs is not None else funding.full_project_abs
-        partial_project_score = funding.partial_project_abs
-        uplift_project_score = funding.eco4_uplift if scheme == "eco4" else funding.gbis_uplift
+        funded_measures = []
+        # funded_measures = solution if scheme in ["gbis", "eco4"] else []
+        # project_funding = 0 if funding.full_project_abs is not None else funding.full_project_abs
+        project_funding = 0
+        # total_uplift = funding.eco4_uplift
+        total_uplift = 0
+        # full_project_score = 0 if funding.full_project_abs is not None else funding.full_project_abs
+        full_project_score = 0
+        # partial_project_score = funding.partial_project_abs
+        partial_project_score = 0
+        # uplift_project_score = funding.eco4_uplift if scheme == "eco4" else funding.gbis_uplift
+        uplift_project_score = 0

    selected = {r["id"] for r in solution}

--- a/etl/customers/lincs_rural/get_missed.py
+++ b/etl/customers/lincs_rural/get_missed.py
@ -0,0 +1,47 @@
+# After going back to Lincs rural, they gave us some additional data that we can use to try to fetch missed UPRNs again
+import pandas as pd
+
+# missed = pd.read_excel(
+#     "/Users/khalimconn-kowlessar/Downloads/lincs_rural_missed_nov_2025.xlsx",
+#     sheet_name="Missed Properties"
+# )
+# missed = missed[~pd.isnull(missed["rrn"])]
+
+prepared = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Downloads/lincs_rural_standardised_ara_nov_2025.xlsx",
+    sheet_name="Standardised Asset List"
+)
+
+updated_data = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Downloads/MASTER LIST EPCS UPDATED November 2025 Domna Homes - Copy.xlsx",
+    sheet_name="PROPERTY EPC RATINGS"
+)
+updated_data = updated_data[~pd.isnull(updated_data["Property Ref."])]
+
+missed = updated_data[~updated_data["Property Ref."].isin(prepared["landlord_property_id"].values.tolist())].copy()
+# missed.to_csv("/Users/khalimconn-kowlessar/Downloads/lincs_rural_missed_uprn.csv")
+# We'll grab the UPRNs manually and then pull them in, and prepare for ARA
+
+missing_uprns = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/lincs_rural_missed_uprn.csv")
+
+missing_uprns["landlord_property_id"] = missing_uprns["Property Ref."].copy()
+missing_uprns["domna_property_id"] = missing_uprns["Property Ref."].copy()
+missing_uprns["domna_address_1"] = missing_uprns['Unnamed: 1'].str.split(",").str[0].str.strip()
+missing_uprns["postcode"] = missing_uprns['Unnamed: 1'].str.split(",").str[-1].str.strip()
+missing_uprns["landlord_property_type"] = "unknown"
+missing_uprns["landlord_built_form"] = "unknown"
+missing_uprns["domna_full_address"] = missing_uprns['Unnamed: 1'].copy()
+
+missed_standardised_for_ara = missing_uprns[
+    ['landlord_property_id', 'domna_address_1', 'landlord_property_type', 'landlord_built_form', 'postcode',
+     'domna_property_id', 'UPRN']
+].rename(
+    columns={"UPRN": "epc_os_uprn"}
+)
+
+# Store
+missed_standardised_for_ara.to_excel(
+    "/Users/khalimconn-kowlessar/Downloads/lincs_rural_missed_standardised_ara_nov_2025.xlsx",
+    index=False,
+    sheet_name="Standardised Asset List"
+)
--- a/etl/customers/peabody/Nov
+++ b/etl/customers/peabody/Nov
--- a/Project/b_data_cleanse.py
+++ b/Project/b_data_cleanse.py
@ -0,0 +1,147 @@
+"""
+We have found, within the Peabody data, a large volume of properties with missing and incorrects
+UPRNS and incorrect address data. We want to flag these records and also find missings where we can
+
+We also have duplicate UPRNS that should be flagged
+"""
+import json
+import time
+import os
+import pandas as pd
+import numpy as np
+from tqdm import tqdm
+from dotenv import load_dotenv
+from asset_list.utils import get_data_for_property
+from utils.logger import setup_logger
+from utils.s3 import read_io_from_s3, save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
+
+logger = setup_logger()
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+sustainability_data = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
+    "- Data Extracts for Domna.xlsx",
+    sheet_name="Sustainability"
+)
+property_data = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
+    "- Data Extracts for Domna.xlsx",
+    sheet_name="Properties"
+)
+
+missing_uprns = sustainability_data[pd.isnull(sustainability_data['UPRN'])].copy()
+
+# Any non-numeric UPRNS or leading with 0s are invalid
+non_numeric_uprns = sustainability_data[
+    ~sustainability_data['UPRN'].astype(str).str.match(r'^[1-9][0-9]*$') & ~pd.isnull(sustainability_data['UPRN'])
+    ].copy()
+# 70 properties
+leading_zero_uprns = sustainability_data[
+    sustainability_data['UPRN'].astype(str).str.startswith('0')
+].copy()
+
+# Flag duplicates
+duplicate_uprns = sustainability_data[
+    sustainability_data.duplicated(subset=['UPRN'], keep=False) & ~pd.isnull(sustainability_data['UPRN'])
+    ].copy()
+
+# Store this data
+# missing_uprns.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting
+# Project/data_validation/missing_uprns.csv", index=False)
+# non_numeric_uprns.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting
+# Project/data_validation/non_numeric_uprns.csv", index=False)
+# leading_zero_uprns.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting
+# Project/data_validation/leading_zero_uprns.csv", index=False)
+# duplicate_uprns.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting
+# Project/data_validation/duplicate_uprns.csv", index=False)
+
+# Take everything remaining
+data_needing_validation = sustainability_data[
+    ~sustainability_data["Org Ref"].isin(
+        missing_uprns["Org Ref"].values.tolist() + non_numeric_uprns["Org Ref"].values.tolist() +
+        leading_zero_uprns["Org Ref"].values.tolist() + duplicate_uprns["Org Ref"].values.tolist()
+    )
+].copy()
+
+# TODO: We should build a SAL for UPRNS that are missing, invalid or duplicated
+
+# We check UPRN validity against our OS data
+uprn_filenames = read_dataframe_from_s3_parquet(
+    bucket_name="retrofit-data-dev", file_key="spatial/filename_meta.parquet"
+)
+
+# We're going to:
+# 1) Grab a filename
+# 2) Read it in
+# 3) Check which UPRNS from our data are in that file
+# 4) Keep a record of which UPRNS were found where
+
+for uprn_file in tqdm(uprn_filenames['filenames'].values, total=len(uprn_filenames)):
+    spatial_data = read_dataframe_from_s3_parquet(
+        bucket_name="retrofit-data-dev", file_key=f"spatial/{uprn_file}"
+    )
+
+    uprns_in_file = data_needing_validation[
+        data_needing_validation['UPRN'].astype('Int64').isin(spatial_data['UPRN'].astype('Int64').values)
+    ].copy()
+
+    print("Found {} UPRNS in file {}".format(len(uprns_in_file), uprn_file))
+    if len(uprns_in_file) > 0:
+        # Store the found UPRNS in the validation cache
+        data_to_store = uprns_in_file[["Org Ref", "UPRN"]].copy()
+        data_to_store["Source File"] = uprn_file
+        # Store
+        data_to_store.to_csv(
+            "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
+            f"Project/data_validation/validation_cache/{uprn_file.split('.parquet')[0]}_found_uprns.csv",
+            index=False
+        )
+
+# Get all of the files:
+storage_locations = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
+                     "Project/data_validation/validation_cache")
+# List contents
+folder_contents = os.listdir(storage_locations)
+# Grab files and concatenate
+all_found_uprns = []
+for file in folder_contents:
+    if file.endswith("_found_uprns.csv"):
+        df = pd.read_csv(os.path.join(storage_locations, file))
+        all_found_uprns.append(df)
+
+all_found_uprns = pd.concat(all_found_uprns)
+
+# We now flag any UPRNS that were not found in any of the OS datasets
+os_missed_uprns = data_needing_validation[
+    ~data_needing_validation['Org Ref'].isin(all_found_uprns['Org Ref'].values.tolist())
+].copy()
+
+# store
+os_missed_uprns.to_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
+    "Project/data_validation/os_missed_uprns.csv",
+    index=False
+)
+
+# Now build a larger table for standardisation
+to_standardised = pd.concat(
+    [missing_uprns, non_numeric_uprns, leading_zero_uprns, duplicate_uprns, os_missed_uprns]
+)
+
+to_standardised.to_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
+    "Project/data_validation/to_standardise_uprns.xlsx",
+    index=False)
+
+# We prepare a finalised dataset to work with, that excludes all problematic properties and leaves us with
+# properties for which we have the data we need
+
+finalised_data = sustainability_data[
+    ~sustainability_data["Org Ref"].isin(
+        to_standardised["Org Ref"].values.tolist()
+    )
+].copy()
+
+# Prepare with the column formats we need, as analogous to a_data_prep where we defined an initial working sample
--- a/Project/c_finalised_modelling_data.py
+++ b/Project/c_finalised_modelling_data.py
@ -0,0 +1,114 @@
+import pandas as pd
+
+# import pandas as pd
+#
+# sal = pd.read_excel(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
+#     "Project/data_validation/to_standardise_uprns - Standardised.xlsx",
+#     sheet_name="Standardised Asset List"
+# )
+#
+# # Quick breadown of missingness
+# missing = sal[
+#     pd.isnull(sal["estimated"]) | (sal["estimated"] == True) | pd.isnull(sal["epc_os_uprn"])
+#     ]
+#
+# fetched = sal[(sal["estimated"] == False) | ~pd.isnull(sal["epc_os_uprn"])].copy()
+# fetched = fetched[
+#     ["landlord_property_id", "domna_address_1", "domna_postcode", "domna_full_address", "epc_address1",
+#      "epc_postcode", "epc_address", "landlord_property_type", "epc_property_type"]
+# ]
+#
+# known_issues = [
+#
+# ]
+#
+# # Missed postcodes
+# missed_postcode_agg = missing.groupby("domna_postcode").size().reset_index(name="count")
+# missed_postcode_agg = missed_postcode_agg.sort_values("count", ascending=False)
+#
+# multi_missed_postcode = missed_postcode_agg[missed_postcode_agg["count"] > 1]
+
+### Prepare
+sustainability_data = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
+    "- Data Extracts for Domna.xlsx",
+    sheet_name="Sustainability"
+)
+
+# Data we want to remove:
+missing_uprns = sustainability_data[pd.isnull(sustainability_data['UPRN'])].copy()
+
+# Any non-numeric UPRNS or leading with 0s are invalid
+non_numeric_uprns = sustainability_data[
+    ~sustainability_data['UPRN'].astype(str).str.match(r'^[1-9][0-9]*$') & ~pd.isnull(sustainability_data['UPRN'])
+    ].copy()
+# 70 properties
+leading_zero_uprns = sustainability_data[
+    sustainability_data['UPRN'].astype(str).str.startswith('0')
+].copy()
+
+# Flag duplicates
+duplicate_uprns = sustainability_data[
+    sustainability_data.duplicated(subset=['UPRN'], keep=False) & ~pd.isnull(sustainability_data['UPRN'])
+    ].copy()
+
+# Read in the UPRNs that were not valid based on the OS data
+os_missed_uprns = pd.read_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
+    "Project/data_validation/os_missed_uprns.csv",
+)
+
+modelling_data = sustainability_data[
+    ~sustainability_data["Org Ref"].isin(
+        missing_uprns["Org Ref"].unique().tolist() + non_numeric_uprns["Org Ref"].unique().tolist() +
+        leading_zero_uprns["Org Ref"].unique().tolist() + duplicate_uprns["Org Ref"].unique().tolist() +
+        os_missed_uprns["Org Ref"].unique().tolist()
+    )
+].copy()
+
+# Need to prepare for upload
+# Variables:
+
+
+modelling_data["landlord_property_id"] = sustainability_data["Org Ref"].copy()
+modelling_data["domna_property_id"] = sustainability_data["Org Ref"].copy()
+
+modelling_data = modelling_data.rename(
+    {
+        "Address 1": "domna_address_1",
+        "Postcode": "postcode",
+        "Type": "landlord_property_type",
+        "Attachment": "landlord_built_form",
+        "Heating": "landlord_heating_system",
+        "UPRN": "epc_os_uprn"
+    }
+)
+
+modelling_data = modelling_data[
+    [
+        "domna_address_1", "Address 2", "Address 3", "postcode", "landlord_property_type",
+        "landlord_built_form", "landlord_heating_system", "epc_os_uprn", "Total Floor Area (m2)",
+        "domna_property_id", "domna_full_address"
+    ]
+]
+
+modelling_data["landlord_built_form"] = modelling_data["landlord_built_form"].map(
+    {
+        "MidTerrace": "Mid-Terrace",
+        "EndTerrace": "End-Terrace",
+        "SemiDetached": "Semi-Detached",
+        "Detached": "Detached",
+        "EnclosedEndTerrace": "Enclosed End-Terrace",
+        "EnclosedMidTerrace": "Enclosed Mid-Terrace",
+    }
+)
+
+
+def make_full_address(x):
+    to_join = [x['domna_address_1'], x['Address 2'], x['Address 3']]
+    to_join = [x for x in to_join if not pd.isnull(x) and x != '']
+    return ", ".join(to_join)
+
+
+modelling_data["domna_full_address"] = modelling_data.apply(lambda x: make_full_address(x), axis=1)
--- a/etl/customers/peabody/Nov
+++ b/etl/customers/peabody/Nov
@ -1,6 +0,0 @@
-"""
-We have found, within the Peabody data, a large volume of properties with missing and incorrects
-UPRNS and incorrect address data. We want to flag these records and also find missings where we can
-
-We also have duplicate UPRNS that should be flagged
-"""
--- a/etl/epc/Dataset.py
+++ b/etl/epc/Dataset.py
@ -844,7 +844,7 @@ class TrainingDataset(BaseDataset):

        # Make sure they are all efficiency columns
        if any(~missings.index.str.contains("energy_eff")):
-            raise ValueError("Non efficiency columns are missing")
+            raise ValueError(f"Non efficiency columns are missing {missings.index}")

        for m in missings.index:
            self.df[m] = self.df[m].fillna("NO_RATING")
--- a/etl/webscrape/Zoopla.py
+++ b/etl/webscrape/Zoopla.py
@ -15,25 +15,10 @@ os.makedirs(CACHE_DIR, exist_ok=True)


 def random_delay():
-    """Pause randomly between requests (0.5–2 s)."""
    time.sleep(random.uniform(0.5, 2))


-def extract_feature(soup, icon_id):
-    tag = soup.find("use", href=f"#{icon_id}")
-    if tag:
-        parent = tag.find_parent("div", class_="_1pbf8i53")
-        if parent:
-            text = parent.get_text(strip=True)
-            return text
-    return None
-
-
 def extract_embedded_json(text):
-    """
-    Extract embedded property JSON containing attributes, energy, estimates, and sales history.
-    """
-    # Try to grab everything after "attributes"
    match = re.search(
        r'"attributes"\s*:\s*\{.*?\}\s*,.*?"historicSales".*?\]',
        text,
@ -48,13 +33,16 @@ def extract_embedded_json(text):
        except json.JSONDecodeError:
            pass

-    # fallback for independent keys
    result = {}
    for key in [
        "attributes", "energy", "rentEstimate",
        "saleEstimate", "saleHistory", "historicSales"
    ]:
-        key_match = re.search(rf'"{key}"\s*:\s*(\{{.*?\}}|\[.*?\])', text, re.DOTALL)
+        key_match = re.search(
+            rf'"{key}"\s*:\s*(\{{.*?\}}|\[.*?\])',
+            text,
+            re.DOTALL
+        )
        if key_match:
            try:
                result[key] = json.loads(key_match.group(1))
@ -64,28 +52,23 @@ def extract_embedded_json(text):


 def scrape_all_estimates(session, url):
-    """Scrape valuation estimates for one Zoopla property URL."""
    resp = session.get(url, impersonate=random.choice(ENGINES))
    html = resp.text
-    page_source = BeautifulSoup(resp.text, "html.parser")
-    estimates = page_source.find_all("div", {"data-testid": "sale-estimate"})
-
+    soup = BeautifulSoup(html, "html.parser")
+    estimates = soup.find_all("div", {"data-testid": "sale-estimate"})
    data = extract_embedded_json(html)

-    is_blocked = len(estimates) == 0
-
    return {
        "estimates": estimates,
-        "is_blocked": is_blocked,
+        "is_blocked": len(estimates) == 0,
        "response_html": html,
-        "attributes": data.get("attributes"),
-        "rent": data.get("rentEstimate"),
-        "historicSales": data.get("historicSales"),
+        "attributes": data.get("attributes", {}),
+        "rentEstimate": data.get("rentEstimate", {}),
+        "historicSales": data.get("historicSales", []),
    }


 def extract_estimates(estimates):
-    """Extract low, mid, and high estimates from parsed HTML."""
    est = estimates[0]
    low = est.find("span", {"data-testid": "low-estimate-blurred"}).text
    mid = est.find("p", {"data-testid": "estimate-blurred"}).text
@ -94,110 +77,123 @@ def extract_estimates(estimates):


 def cache_path_for_url(url):
-    """Return a deterministic local cache path for a URL."""
    uprn = url.split("/")[-2]
    return os.path.join(CACHE_DIR, f"{uprn}.html")


+def parse_cached_html(url, html):
+    soup = BeautifulSoup(html, "html.parser")
+    estimates = soup.find_all("div", {"data-testid": "sale-estimate"})
+    data = extract_embedded_json(html)
+    history = data.get("historicSales") or [{}]
+
+    if not estimates:
+        return None
+
+    low, mid, high = extract_estimates(estimates)
+
+    return {
+        "URL": url,
+        "Low Estimate": low,
+        "Middle Estimate": mid,
+        "High Estimate": high,
+        **data.get("attributes", {}),
+        **data.get("rentEstimate", {}),
+        **history[0],
+    }
+
+
 def parallel_task(url):
-    """Main worker function executed in each process."""
    cache_path = cache_path_for_url(url)

-    # Use cached file if it exists
    if os.path.exists(cache_path):
-        html = open(cache_path, "r").read()
-        page_source = BeautifulSoup(html, "html.parser")
-        estimates = page_source.find_all("div", {"data-testid": "sale-estimate"})
-        data = extract_embedded_json(html)
-        history_sales = data.get("historicSales", [{}])
-        if len(history_sales) == 0:
-            history_sales = [{}]
+        with open(cache_path, "r", encoding="utf-8") as f:
+            html = f.read()
+        cached = parse_cached_html(url, html)
+        if cached:
+            return cached

-        if estimates:
-            low, mid, high = extract_estimates(estimates)
-            return {
-                "URL": url, "Low Estimate": low, "Middle Estimate": mid, "High Estimate": high,
-                **data.get("attributes", {}), **data.get("rentEstimate", {}),
-                **history_sales[0]
-            }
-
-    # Otherwise scrape live
    with StealthSession() as session:
-        attempts = 0
-        while attempts < 5:
+        for attempt in range(5):
            output = scrape_all_estimates(session, url)
+
            if not output["is_blocked"] and output["estimates"]:
-                open(cache_path, "w").write(output["html"])
+                html = output.get("response_html")
+                if html:
+                    with open(cache_path, "w", encoding="utf-8") as f:
+                        f.write(html)
+
+                history = output.get("historicSales") or [{}]
                low, mid, high = extract_estimates(output["estimates"])
-                history_sales = output.get("historicSales", [{}])
-                if len(history_sales) == 0:
-                    history_sales = [{}]
+
                return {
-                    "URL": url, "Low Estimate": low, "Middle Estimate": mid, "High Estimate": high,
+                    "URL": url,
+                    "Low Estimate": low,
+                    "Middle Estimate": mid,
+                    "High Estimate": high,
                    **output.get("attributes", {}),
-                    **output.get("rent", {}),
-                    **history_sales[0]
+                    **output.get("rentEstimate", {}),
+                    **history[0],
                }
-            attempts += 1
-            print(f"[Attempt {attempts}] Blocked or empty for {url}")
+
            random_delay()

-        # If still blocked, return placeholders
-        return {"URL": url, "Low Estimate": None, "Middle Estimate": None, "High Estimate": None}
+    return {
+        "URL": url,
+        "Low Estimate": None,
+        "Middle Estimate": None,
+        "High Estimate": None,
+    }


 def parse_price(p):
-    if p is None:
+    if not p:
        return None

    p = p.replace("£", "").strip().lower()
-    if not p:
-        return None
    if p.endswith("k"):
        return float(p[:-1]) * 1_000
-    elif p.endswith("m"):
+    if p.endswith("m"):
        return float(p[:-1]) * 1_000_000
-    else:
-        try:
-            return float(p.replace(",", ""))
-        except ValueError:
-            return None
+
+    try:
+        return float(p.replace(",", ""))
+    except ValueError:
+        return None


 if __name__ == "__main__":
-    # Load portfolio
    asset_list = pd.read_excel(
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/sfr/October 2025 AL portfolio/22.10 AL Portfolio - "
-        "Standardised - partial UPRN fill.xlsx",
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
+        "Project/modelling_sample.xlsx",
        sheet_name="Standardised Asset List"
    )
+
    asset_list = asset_list[~pd.isnull(asset_list["epc_os_uprn"])]
+    asset_list = asset_list.drop_duplicates("epc_os_uprn")
    asset_list["epc_os_uprn"] = asset_list["epc_os_uprn"].astype(int).astype(str)
+
    uprns = asset_list["epc_os_uprn"].tolist()
    urls = [f"https://www.zoopla.co.uk/property/uprn/{uprn}/" for uprn in uprns]

-    # Limit concurrency to avoid blocks
-    with Pool(processes=2) as pool:  # fewer processes = fewer fingerprints
+    with Pool(processes=2) as pool:
        estimates_list = list(
            tqdm(pool.imap(parallel_task, urls), total=len(urls))
        )

    df = pd.DataFrame(estimates_list)
-
-    print(df.head())
-
    df["uprn"] = df["URL"].str.extract(r"uprn/(\d+)/")
    df["valuation"] = df["Middle Estimate"].apply(parse_price)

    df.to_csv("zoopla_estimates.csv", index=False)

-    # Merge with asset list
    merged = asset_list.merge(
        df[["uprn", "valuation"]],
        left_on="epc_os_uprn",
        right_on="uprn",
        how="left"
    )
+
    merged.to_excel(
        "20251029 AL Portfolio - Standardised - with valuations.xlsx",
        index=False
--- a/sfr/principal_pitch/2_export_data.py
+++ b/sfr/principal_pitch/2_export_data.py
@ -11,8 +11,8 @@ from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcMod

 # PORTFOLIO_ID = 206
 # SCENARIOS = [389]
-PORTFOLIO_ID = 388
-SCENARIOS = [803]
+PORTFOLIO_ID = 404
+SCENARIOS = [829]


 def get_data(portfolio_id, scenario_ids):
@ -121,7 +121,8 @@ recommendations_measures_pivot["total_retrofit_cost"] = recommendations_measures

 df = properties_df[
    [
-        "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows",
+        "landlord_property_id", "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof",
+        "heating", "windows",
        "current_epc_rating",
        "current_sap_points", "total_floor_area", "number_of_rooms",
    ]
@ -143,7 +144,7 @@ from utils.s3 import read_csv_from_s3, read_excel_from_s3

 # asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath='8/206/asset_list.csv')
 asset_list = read_excel_from_s3(
-    bucket_name="retrofit-plan-inputs-dev", file_key='2/388/20251208T203603925Z/asset_list.xlsx',
+    bucket_name="retrofit-plan-inputs-dev", file_key="2/404/20251211T163200754Z/asset_list.xlsx",
    header_row=0, sheet_name="Standardised Asset List"
 )
 asset_list = pd.DataFrame(asset_list)
@ -201,11 +202,15 @@ asset_list = asset_list.merge(
 )

 # For exporting
-asset_list.to_excel(
-    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
-    "Project/20251209_sample_package_data.xlsx",
+df.to_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lincs Rural/EPC C -without floors proposed measures - "
+    "with ID.xlsx",
    index=False
 )
+# asset_list.to_excel(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lincs Rural/epc_measures.xlsx",
+#     index=False
+# )

 condition_costs = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/Condition costs.xlsx",