From 17b8f22840bbf431f440c6795fa5bab6a346db2c Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 13 Apr 2026 14:59:00 +0000 Subject: [PATCH 1/4] fix BOM issue --- asset_list/app.py | 18 +++--- backend/address2UPRN/main.py | 9 ++- backend/export/property_scenarios/main.py | 70 ++++++++++++----------- 3 files changed, 52 insertions(+), 45 deletions(-) diff --git a/asset_list/app.py b/asset_list/app.py index 5794eaf3..b0030667 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -74,23 +74,23 @@ def app(): """ data_folder = "/workspaces/model/asset_list" - data_filename = "Calico ARA Upload Review.xlsx" - sheet_name = "Sheet1" - postcode_column = "Postcode" - address1_column = "Units" + data_filename = "Waverley UPRN Match.xlsx" + sheet_name = "in" + postcode_column = "postcode_clean" + address1_column = "domna_found_address" address1_method = None - fulladdress_column = "Units" - address_cols_to_concat = ["Units"] + fulladdress_column = "domna_found_address" + address_cols_to_concat = [] missing_postcodes_method = None landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = None # Good to include if landlord gave + landlord_os_uprn = "domna_found_uprn" + landlord_property_type = "Property Type 1" # Good to include if landlord gave landlord_built_form = None # Good to include if landlord gave landlord_wall_construction = None landlord_roof_construction = None landlord_heating_system = None landlord_existing_pv = None - landlord_property_id = "llid" + landlord_property_id = "WBC Ref" landlord_sap = None outcomes_filename = None outcomes_sheetname = None diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 97e2037a..647d46be 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -351,9 +351,9 @@ def handler(event, context, local=False): { "body": json.dumps( { - "task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917", - "sub_task_id": "6a427b6e-1ece-4983-b1e5-9bffccc53d1d", - "s3_uri": "s3://retrofit-data-dev/ara_postcode_splitter_batches/e31f2f21-175b-4a91-a3ec-a6baa325e917/8673913b-1a88-42d7-8578-0449123d94b0/2026-02-18T11:47:00.822579_f95467f5.csv", + "sub_task_id": "d7363c83-2ef7-4474-b30f-980fd587350c", + "task_id": "a042af13-8b57-4709-ad22-ecac1ccca4bd", + "s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/essex/Copy of EPC register Essex(August 2025)(in) (2).csv", } ) } @@ -424,6 +424,9 @@ def handler(event, context, local=False): bucket, key = parse_s3_uri(s3_uri) csv_data = read_csv_from_s3_dict(bucket, key) df = pd.DataFrame(csv_data) + df.columns = [ + c.lstrip("\ufeff") for c in df.columns + ] # strip BOM from column names logger.info(f"Loaded {len(df)} rows from S3") except Exception as s3_error: logger.error(f"Failed to read data from S3: {s3_error}") diff --git a/backend/export/property_scenarios/main.py b/backend/export/property_scenarios/main.py index f3ea0100..b179531d 100644 --- a/backend/export/property_scenarios/main.py +++ b/backend/export/property_scenarios/main.py @@ -26,15 +26,14 @@ def has_solar_with_battery(materials_list: Optional[List[Dict[str, Any]]]) -> bo :return: """ for m in materials_list or []: - if ( - m.get("type") == "solar_pv" - and m.get("includes_battery") is True - ): + if m.get("type") == "solar_pv" and m.get("includes_battery") is True: return True return False -def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, int], pd.DataFrame]: +def process_export( + payload: ExportRequest, session: Session +) -> Dict[Union[str, int], pd.DataFrame]: export_files: Dict[Union[str, int], pd.DataFrame] = {} db_methods = DbMethods(session) @@ -52,7 +51,9 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, logger.info("Retrieved %s plans for export", len(plans_df)) if plans_df.empty: - logger.info("Empty plans dataframe - no plans to export. Returning empty export.") + logger.info( + "Empty plans dataframe - no plans to export. Returning empty export." + ) return export_files plan_ids: List[int] = plans_df["id"].tolist() recommendations_df: pd.DataFrame = db_methods.get_recommendations(plan_ids) @@ -61,13 +62,12 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, recommendations_df = db_methods.attach_materials(recommendations_df) - recommendations_df["has_solar_with_battery"] = ( - recommendations_df["materials"].apply(has_solar_with_battery) - ) + recommendations_df["has_solar_with_battery"] = recommendations_df[ + "materials" + ].apply(has_solar_with_battery) - _filter = ( - (recommendations_df["measure_type"] == "solar_pv") - & (recommendations_df["has_solar_with_battery"]) + _filter = (recommendations_df["measure_type"] == "solar_pv") & ( + recommendations_df["has_solar_with_battery"] ) recommendations_df.loc[_filter, "measure_type"] = ( @@ -83,10 +83,13 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, else: scenario_recs = recommendations_df[ recommendations_df["scenario_id"] == group_key - ] + ] if scenario_recs.empty: - logger.info("No recommendations found for group_key %s - skipping export for this group", group_key) + logger.info( + "No recommendations found for group_key %s - skipping export for this group", + group_key, + ) continue measures_df: pd.DataFrame = scenario_recs[ @@ -99,14 +102,12 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, values="estimated_cost", ).reset_index() - pivot["total_retrofit_cost"] = ( - pivot.drop(columns=["property_id", "plan_name"]).sum(axis=1) - ) + pivot["total_retrofit_cost"] = pivot.drop( + columns=["property_id", "plan_name"] + ).sum(axis=1) post_sap: pd.DataFrame = ( - scenario_recs.groupby("property_id")[["sap_points"]] - .sum() - .reset_index() + scenario_recs.groupby("property_id")[["sap_points"]].sum().reset_index() ) df: pd.DataFrame = ( @@ -117,7 +118,9 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, df["sap_points"] = df["sap_points"].fillna(0) df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"] - df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(sap_to_epc) + df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply( + sap_to_epc + ) export_files[group_key] = df @@ -128,22 +131,17 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, # Lambda Handler # ============================================================ -def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, Union[int, str]]: + +def handler( + event: Mapping[str, Any], context: Optional[Any] +) -> Mapping[str, Union[int, str]]: """ Example event: body_dict = { "task_id": "test", "subtask_id": "test", - "portfolio_id": 655, - "scenario_ids": [], - "default_plans_only": True, - } - - body_dict = { - "task_id": "test", - "subtask_id": "test", - "portfolio_id": 655, - "scenario_ids": [1174], + "portfolio_id": 670, + "scenario_ids": [1199], "default_plans_only": False, } :param event: Lambda event containing export request details @@ -167,8 +165,14 @@ def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, Un with db_read_session() as session: exported_files = process_export(payload, session) + output_path = f"/tmp/export_{payload.portfolio_id}.xlsx" + with pd.ExcelWriter(output_path, engine="openpyxl") as writer: + for group_key, df in exported_files.items(): + sheet_name = str(group_key)[:31] # Excel sheet name limit + df.to_excel(writer, sheet_name=sheet_name, index=False) + + logger.info("Exported to %s", output_path) # TODO: Need to handle the exported files - e.g. upload to s3 and email a presigned url - _ = exported_files return { "statusCode": 200, "body": json.dumps({}), From 98c9a1df74f9330ef88542b76918cbc71915f310 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 13 Apr 2026 15:15:42 +0000 Subject: [PATCH 2/4] added fix for utf --- backend/address2UPRN/main.py | 3 --- utils/s3.py | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 647d46be..79c0de69 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -424,9 +424,6 @@ def handler(event, context, local=False): bucket, key = parse_s3_uri(s3_uri) csv_data = read_csv_from_s3_dict(bucket, key) df = pd.DataFrame(csv_data) - df.columns = [ - c.lstrip("\ufeff") for c in df.columns - ] # strip BOM from column names logger.info(f"Loaded {len(df)} rows from S3") except Exception as s3_error: logger.error(f"Failed to read data from S3: {s3_error}") diff --git a/utils/s3.py b/utils/s3.py index 242e0db5..930e2e15 100644 --- a/utils/s3.py +++ b/utils/s3.py @@ -330,7 +330,7 @@ def read_csv_from_s3(bucket_name: str, filepath: str) -> list[dict[str, str]]: body = s3_object["Body"].read() # Use StringIO to create a file-like object from the string - csv_data = StringIO(body.decode("utf-8")) + csv_data = StringIO(body.decode("utf-8-sig")) # Use csv library to read it into a list of dictionaries reader = csv.DictReader(csv_data) From ef366f1cd5eade72cd479767699c7bb9cca96df7 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 13 Apr 2026 15:20:10 +0000 Subject: [PATCH 3/4] be the same as main --- backend/export/property_scenarios/main.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/backend/export/property_scenarios/main.py b/backend/export/property_scenarios/main.py index b179531d..0ab59e27 100644 --- a/backend/export/property_scenarios/main.py +++ b/backend/export/property_scenarios/main.py @@ -140,8 +140,16 @@ def handler( body_dict = { "task_id": "test", "subtask_id": "test", - "portfolio_id": 670, - "scenario_ids": [1199], + "portfolio_id": 655, + "scenario_ids": [], + "default_plans_only": True, + } + + body_dict = { + "task_id": "test", + "subtask_id": "test", + "portfolio_id": 655, + "scenario_ids": [1174], "default_plans_only": False, } :param event: Lambda event containing export request details @@ -165,14 +173,8 @@ def handler( with db_read_session() as session: exported_files = process_export(payload, session) - output_path = f"/tmp/export_{payload.portfolio_id}.xlsx" - with pd.ExcelWriter(output_path, engine="openpyxl") as writer: - for group_key, df in exported_files.items(): - sheet_name = str(group_key)[:31] # Excel sheet name limit - df.to_excel(writer, sheet_name=sheet_name, index=False) - - logger.info("Exported to %s", output_path) # TODO: Need to handle the exported files - e.g. upload to s3 and email a presigned url + _ = exported_files return { "statusCode": 200, "body": json.dumps({}), From 9c1181475ea5b4ba41d3b2c2e1f87ca73b3f003b Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 13 Apr 2026 15:20:55 +0000 Subject: [PATCH 4/4] be the same as main --- backend/export/property_scenarios/main.py | 50 ++++++++++------------- 1 file changed, 22 insertions(+), 28 deletions(-) diff --git a/backend/export/property_scenarios/main.py b/backend/export/property_scenarios/main.py index 0ab59e27..f3ea0100 100644 --- a/backend/export/property_scenarios/main.py +++ b/backend/export/property_scenarios/main.py @@ -26,14 +26,15 @@ def has_solar_with_battery(materials_list: Optional[List[Dict[str, Any]]]) -> bo :return: """ for m in materials_list or []: - if m.get("type") == "solar_pv" and m.get("includes_battery") is True: + if ( + m.get("type") == "solar_pv" + and m.get("includes_battery") is True + ): return True return False -def process_export( - payload: ExportRequest, session: Session -) -> Dict[Union[str, int], pd.DataFrame]: +def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, int], pd.DataFrame]: export_files: Dict[Union[str, int], pd.DataFrame] = {} db_methods = DbMethods(session) @@ -51,9 +52,7 @@ def process_export( logger.info("Retrieved %s plans for export", len(plans_df)) if plans_df.empty: - logger.info( - "Empty plans dataframe - no plans to export. Returning empty export." - ) + logger.info("Empty plans dataframe - no plans to export. Returning empty export.") return export_files plan_ids: List[int] = plans_df["id"].tolist() recommendations_df: pd.DataFrame = db_methods.get_recommendations(plan_ids) @@ -62,12 +61,13 @@ def process_export( recommendations_df = db_methods.attach_materials(recommendations_df) - recommendations_df["has_solar_with_battery"] = recommendations_df[ - "materials" - ].apply(has_solar_with_battery) + recommendations_df["has_solar_with_battery"] = ( + recommendations_df["materials"].apply(has_solar_with_battery) + ) - _filter = (recommendations_df["measure_type"] == "solar_pv") & ( - recommendations_df["has_solar_with_battery"] + _filter = ( + (recommendations_df["measure_type"] == "solar_pv") + & (recommendations_df["has_solar_with_battery"]) ) recommendations_df.loc[_filter, "measure_type"] = ( @@ -83,13 +83,10 @@ def process_export( else: scenario_recs = recommendations_df[ recommendations_df["scenario_id"] == group_key - ] + ] if scenario_recs.empty: - logger.info( - "No recommendations found for group_key %s - skipping export for this group", - group_key, - ) + logger.info("No recommendations found for group_key %s - skipping export for this group", group_key) continue measures_df: pd.DataFrame = scenario_recs[ @@ -102,12 +99,14 @@ def process_export( values="estimated_cost", ).reset_index() - pivot["total_retrofit_cost"] = pivot.drop( - columns=["property_id", "plan_name"] - ).sum(axis=1) + pivot["total_retrofit_cost"] = ( + pivot.drop(columns=["property_id", "plan_name"]).sum(axis=1) + ) post_sap: pd.DataFrame = ( - scenario_recs.groupby("property_id")[["sap_points"]].sum().reset_index() + scenario_recs.groupby("property_id")[["sap_points"]] + .sum() + .reset_index() ) df: pd.DataFrame = ( @@ -118,9 +117,7 @@ def process_export( df["sap_points"] = df["sap_points"].fillna(0) df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"] - df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply( - sap_to_epc - ) + df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(sap_to_epc) export_files[group_key] = df @@ -131,10 +128,7 @@ def process_export( # Lambda Handler # ============================================================ - -def handler( - event: Mapping[str, Any], context: Optional[Any] -) -> Mapping[str, Union[int, str]]: +def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, Union[int, str]]: """ Example event: body_dict = {