diff --git a/backend/export/property_scenarios/main.py b/backend/export/property_scenarios/main.py index 8ba1467a..f3ea0100 100644 --- a/backend/export/property_scenarios/main.py +++ b/backend/export/property_scenarios/main.py @@ -134,10 +134,18 @@ def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, Un body_dict = { "task_id": "test", "subtask_id": "test", - "portfolio_id": 647, + "portfolio_id": 655, "scenario_ids": [], "default_plans_only": True, } + + body_dict = { + "task_id": "test", + "subtask_id": "test", + "portfolio_id": 655, + "scenario_ids": [1174], + "default_plans_only": False, + } :param event: Lambda event containing export request details :param context: Lambda context (not used in this handler but included for completeness) :return: HTTP response indicating success or failure of the export operation @@ -159,54 +167,6 @@ def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, Un with db_read_session() as session: exported_files = process_export(payload, session) - # Merge with input - raw_input1 = pd.read_excel( - "/Users/khalimconn-kowlessar/Downloads/eon - 20260323 address sanitisation - Standardised.xlsx", - sheet_name="Standardised Asset List", - ) - raw_input2 = pd.read_excel( - "/Users/khalimconn-kowlessar/Downloads/eon - 20260323 address sanitisation - Standardised.xlsx", - sheet_name="Addresses needing validation", - ) - raw_input = pd.concat([raw_input1, raw_input2], ignore_index=True) - raw_input["epc_os_uprn"] = np.where( - pd.isnull(raw_input["epc_os_uprn"]), - raw_input["ordnance_survey_uprn"], - raw_input["epc_os_uprn"], - ) - raw_input["epc_os_uprn"] = raw_input["epc_os_uprn"].astype(int) - - left_df = raw_input[ - ["epc_os_uprn", "domna_address_1", "landlord_property_type", "landlord_property_type"]].copy() - - combined = left_df.merge( - exported_files["default_plans"], how="right", - left_on="epc_os_uprn", right_on="uprn" - ) - raw_addresses = pd.read_excel( - "/Users/khalimconn-kowlessar/Downloads/North Tyneside Council. EPC D and Below with Type (1).xlsx") - raw_addresses = raw_addresses[["UPRN", "Address 1", "Postcode"]] - raw_addresses["Address 1"] = raw_addresses["Address 1"].str.replace(" ", " ") - raw_addresses = raw_addresses.drop_duplicates("Address 1") - - combined2 = combined.merge( - raw_addresses, how="left", left_on="domna_address_1", right_on="Address 1" - ) - - combined2 = combined2.drop(columns=["landlord_property_id"]) - combined2 = combined2.rename(columns={"UPRN": "landlord_property_id"}) - combined2["epc_os_uprn"] = combined2["epc_os_uprn"].astype("Int64") - combined2.to_excel("/Users/khalimconn-kowlessar/Downloads/EON - recommended measures for review.xlsx") - - removed = raw_addresses[~raw_addresses["UPRN"].isin(combined2["landlord_property_id"])] - - df2 = pd.read_excel( - "/Users/khalimconn-kowlessar/Downloads/20260330 EON - recommended measures for review (1).xlsx" - ) - removed2 = raw_addresses[~raw_addresses["UPRN"].isin(df2["landlord_property_id"])] - - raw_addresses[raw_addresses["Address 1"].duplicated()] - # TODO: Need to handle the exported files - e.g. upload to s3 and email a presigned url _ = exported_files return { diff --git a/backend/tests/test_rebaselining_pipeline.py b/backend/tests/test_rebaselining_pipeline.py index 9fbe1f35..a0c62f34 100644 --- a/backend/tests/test_rebaselining_pipeline.py +++ b/backend/tests/test_rebaselining_pipeline.py @@ -2,6 +2,10 @@ import os import pickle import pandas as pd import pytest +from datetime import datetime +from backend.ml_models.api import ModelApi +from backend.app.utils import sap_to_epc +from backend.app.config import get_prediction_buckets def load_sample_certificates(): @@ -60,12 +64,6 @@ def load_cleaning_data(): @pytest.mark.integration def test_rebaselining_pipeline_with_real_data(): - import pandas as pd - from datetime import datetime - from backend.ml_models.api import ModelApi - from backend.app.utils import sap_to_epc - from backend.app.config import get_prediction_buckets - df = load_sample_certificates() cleaning_data = load_cleaning_data() input_properties = [make_property_from_row(row, cleaning_data=cleaning_data) for _, row in df.iterrows()]