From 17b8f22840bbf431f440c6795fa5bab6a346db2c Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 13 Apr 2026 14:59:00 +0000 Subject: [PATCH] fix BOM issue --- asset_list/app.py | 18 +++--- backend/address2UPRN/main.py | 9 ++- backend/export/property_scenarios/main.py | 70 ++++++++++++----------- 3 files changed, 52 insertions(+), 45 deletions(-) diff --git a/asset_list/app.py b/asset_list/app.py index 5794eaf3..b0030667 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -74,23 +74,23 @@ def app(): """ data_folder = "/workspaces/model/asset_list" - data_filename = "Calico ARA Upload Review.xlsx" - sheet_name = "Sheet1" - postcode_column = "Postcode" - address1_column = "Units" + data_filename = "Waverley UPRN Match.xlsx" + sheet_name = "in" + postcode_column = "postcode_clean" + address1_column = "domna_found_address" address1_method = None - fulladdress_column = "Units" - address_cols_to_concat = ["Units"] + fulladdress_column = "domna_found_address" + address_cols_to_concat = [] missing_postcodes_method = None landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = None # Good to include if landlord gave + landlord_os_uprn = "domna_found_uprn" + landlord_property_type = "Property Type 1" # Good to include if landlord gave landlord_built_form = None # Good to include if landlord gave landlord_wall_construction = None landlord_roof_construction = None landlord_heating_system = None landlord_existing_pv = None - landlord_property_id = "llid" + landlord_property_id = "WBC Ref" landlord_sap = None outcomes_filename = None outcomes_sheetname = None diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 97e2037a..647d46be 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -351,9 +351,9 @@ def handler(event, context, local=False): { "body": json.dumps( { - "task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917", - "sub_task_id": "6a427b6e-1ece-4983-b1e5-9bffccc53d1d", - "s3_uri": "s3://retrofit-data-dev/ara_postcode_splitter_batches/e31f2f21-175b-4a91-a3ec-a6baa325e917/8673913b-1a88-42d7-8578-0449123d94b0/2026-02-18T11:47:00.822579_f95467f5.csv", + "sub_task_id": "d7363c83-2ef7-4474-b30f-980fd587350c", + "task_id": "a042af13-8b57-4709-ad22-ecac1ccca4bd", + "s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/essex/Copy of EPC register Essex(August 2025)(in) (2).csv", } ) } @@ -424,6 +424,9 @@ def handler(event, context, local=False): bucket, key = parse_s3_uri(s3_uri) csv_data = read_csv_from_s3_dict(bucket, key) df = pd.DataFrame(csv_data) + df.columns = [ + c.lstrip("\ufeff") for c in df.columns + ] # strip BOM from column names logger.info(f"Loaded {len(df)} rows from S3") except Exception as s3_error: logger.error(f"Failed to read data from S3: {s3_error}") diff --git a/backend/export/property_scenarios/main.py b/backend/export/property_scenarios/main.py index f3ea0100..b179531d 100644 --- a/backend/export/property_scenarios/main.py +++ b/backend/export/property_scenarios/main.py @@ -26,15 +26,14 @@ def has_solar_with_battery(materials_list: Optional[List[Dict[str, Any]]]) -> bo :return: """ for m in materials_list or []: - if ( - m.get("type") == "solar_pv" - and m.get("includes_battery") is True - ): + if m.get("type") == "solar_pv" and m.get("includes_battery") is True: return True return False -def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, int], pd.DataFrame]: +def process_export( + payload: ExportRequest, session: Session +) -> Dict[Union[str, int], pd.DataFrame]: export_files: Dict[Union[str, int], pd.DataFrame] = {} db_methods = DbMethods(session) @@ -52,7 +51,9 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, logger.info("Retrieved %s plans for export", len(plans_df)) if plans_df.empty: - logger.info("Empty plans dataframe - no plans to export. Returning empty export.") + logger.info( + "Empty plans dataframe - no plans to export. Returning empty export." + ) return export_files plan_ids: List[int] = plans_df["id"].tolist() recommendations_df: pd.DataFrame = db_methods.get_recommendations(plan_ids) @@ -61,13 +62,12 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, recommendations_df = db_methods.attach_materials(recommendations_df) - recommendations_df["has_solar_with_battery"] = ( - recommendations_df["materials"].apply(has_solar_with_battery) - ) + recommendations_df["has_solar_with_battery"] = recommendations_df[ + "materials" + ].apply(has_solar_with_battery) - _filter = ( - (recommendations_df["measure_type"] == "solar_pv") - & (recommendations_df["has_solar_with_battery"]) + _filter = (recommendations_df["measure_type"] == "solar_pv") & ( + recommendations_df["has_solar_with_battery"] ) recommendations_df.loc[_filter, "measure_type"] = ( @@ -83,10 +83,13 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, else: scenario_recs = recommendations_df[ recommendations_df["scenario_id"] == group_key - ] + ] if scenario_recs.empty: - logger.info("No recommendations found for group_key %s - skipping export for this group", group_key) + logger.info( + "No recommendations found for group_key %s - skipping export for this group", + group_key, + ) continue measures_df: pd.DataFrame = scenario_recs[ @@ -99,14 +102,12 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, values="estimated_cost", ).reset_index() - pivot["total_retrofit_cost"] = ( - pivot.drop(columns=["property_id", "plan_name"]).sum(axis=1) - ) + pivot["total_retrofit_cost"] = pivot.drop( + columns=["property_id", "plan_name"] + ).sum(axis=1) post_sap: pd.DataFrame = ( - scenario_recs.groupby("property_id")[["sap_points"]] - .sum() - .reset_index() + scenario_recs.groupby("property_id")[["sap_points"]].sum().reset_index() ) df: pd.DataFrame = ( @@ -117,7 +118,9 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, df["sap_points"] = df["sap_points"].fillna(0) df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"] - df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(sap_to_epc) + df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply( + sap_to_epc + ) export_files[group_key] = df @@ -128,22 +131,17 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, # Lambda Handler # ============================================================ -def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, Union[int, str]]: + +def handler( + event: Mapping[str, Any], context: Optional[Any] +) -> Mapping[str, Union[int, str]]: """ Example event: body_dict = { "task_id": "test", "subtask_id": "test", - "portfolio_id": 655, - "scenario_ids": [], - "default_plans_only": True, - } - - body_dict = { - "task_id": "test", - "subtask_id": "test", - "portfolio_id": 655, - "scenario_ids": [1174], + "portfolio_id": 670, + "scenario_ids": [1199], "default_plans_only": False, } :param event: Lambda event containing export request details @@ -167,8 +165,14 @@ def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, Un with db_read_session() as session: exported_files = process_export(payload, session) + output_path = f"/tmp/export_{payload.portfolio_id}.xlsx" + with pd.ExcelWriter(output_path, engine="openpyxl") as writer: + for group_key, df in exported_files.items(): + sheet_name = str(group_key)[:31] # Excel sheet name limit + df.to_excel(writer, sheet_name=sheet_name, index=False) + + logger.info("Exported to %s", output_path) # TODO: Need to handle the exported files - e.g. upload to s3 and email a presigned url - _ = exported_files return { "statusCode": 200, "body": json.dumps({}),