remove redundant code

This commit is contained in:
Khalim Conn-Kowlessar 2026-03-30 18:47:13 +01:00
parent 354c8fcb27
commit 1173066888
2 changed files with 13 additions and 55 deletions

View file

@ -134,10 +134,18 @@ def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, Un
body_dict = {
"task_id": "test",
"subtask_id": "test",
"portfolio_id": 647,
"portfolio_id": 655,
"scenario_ids": [],
"default_plans_only": True,
}
body_dict = {
"task_id": "test",
"subtask_id": "test",
"portfolio_id": 655,
"scenario_ids": [1174],
"default_plans_only": False,
}
:param event: Lambda event containing export request details
:param context: Lambda context (not used in this handler but included for completeness)
:return: HTTP response indicating success or failure of the export operation
@ -159,54 +167,6 @@ def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, Un
with db_read_session() as session:
exported_files = process_export(payload, session)
# Merge with input
raw_input1 = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/eon - 20260323 address sanitisation - Standardised.xlsx",
sheet_name="Standardised Asset List",
)
raw_input2 = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/eon - 20260323 address sanitisation - Standardised.xlsx",
sheet_name="Addresses needing validation",
)
raw_input = pd.concat([raw_input1, raw_input2], ignore_index=True)
raw_input["epc_os_uprn"] = np.where(
pd.isnull(raw_input["epc_os_uprn"]),
raw_input["ordnance_survey_uprn"],
raw_input["epc_os_uprn"],
)
raw_input["epc_os_uprn"] = raw_input["epc_os_uprn"].astype(int)
left_df = raw_input[
["epc_os_uprn", "domna_address_1", "landlord_property_type", "landlord_property_type"]].copy()
combined = left_df.merge(
exported_files["default_plans"], how="right",
left_on="epc_os_uprn", right_on="uprn"
)
raw_addresses = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/North Tyneside Council. EPC D and Below with Type (1).xlsx")
raw_addresses = raw_addresses[["UPRN", "Address 1", "Postcode"]]
raw_addresses["Address 1"] = raw_addresses["Address 1"].str.replace(" ", " ")
raw_addresses = raw_addresses.drop_duplicates("Address 1")
combined2 = combined.merge(
raw_addresses, how="left", left_on="domna_address_1", right_on="Address 1"
)
combined2 = combined2.drop(columns=["landlord_property_id"])
combined2 = combined2.rename(columns={"UPRN": "landlord_property_id"})
combined2["epc_os_uprn"] = combined2["epc_os_uprn"].astype("Int64")
combined2.to_excel("/Users/khalimconn-kowlessar/Downloads/EON - recommended measures for review.xlsx")
removed = raw_addresses[~raw_addresses["UPRN"].isin(combined2["landlord_property_id"])]
df2 = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/20260330 EON - recommended measures for review (1).xlsx"
)
removed2 = raw_addresses[~raw_addresses["UPRN"].isin(df2["landlord_property_id"])]
raw_addresses[raw_addresses["Address 1"].duplicated()]
# TODO: Need to handle the exported files - e.g. upload to s3 and email a presigned url
_ = exported_files
return {

View file

@ -2,6 +2,10 @@ import os
import pickle
import pandas as pd
import pytest
from datetime import datetime
from backend.ml_models.api import ModelApi
from backend.app.utils import sap_to_epc
from backend.app.config import get_prediction_buckets
def load_sample_certificates():
@ -60,12 +64,6 @@ def load_cleaning_data():
@pytest.mark.integration
def test_rebaselining_pipeline_with_real_data():
import pandas as pd
from datetime import datetime
from backend.ml_models.api import ModelApi
from backend.app.utils import sap_to_epc
from backend.app.config import get_prediction_buckets
df = load_sample_certificates()
cleaning_data = load_cleaning_data()
input_properties = [make_property_from_row(row, cleaning_data=cleaning_data) for _, row in df.iterrows()]