fix BOM issue

This commit is contained in:
Jun-te Kim 2026-04-13 14:59:00 +00:00
parent 4ea7ef0e80
commit 17b8f22840
3 changed files with 52 additions and 45 deletions

View file

@ -74,23 +74,23 @@ def app():
"""
data_folder = "/workspaces/model/asset_list"
data_filename = "Calico ARA Upload Review.xlsx"
sheet_name = "Sheet1"
postcode_column = "Postcode"
address1_column = "Units"
data_filename = "Waverley UPRN Match.xlsx"
sheet_name = "in"
postcode_column = "postcode_clean"
address1_column = "domna_found_address"
address1_method = None
fulladdress_column = "Units"
address_cols_to_concat = ["Units"]
fulladdress_column = "domna_found_address"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = None # Good to include if landlord gave
landlord_os_uprn = "domna_found_uprn"
landlord_property_type = "Property Type 1" # Good to include if landlord gave
landlord_built_form = None # Good to include if landlord gave
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "llid"
landlord_property_id = "WBC Ref"
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None

View file

@ -351,9 +351,9 @@ def handler(event, context, local=False):
{
"body": json.dumps(
{
"task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917",
"sub_task_id": "6a427b6e-1ece-4983-b1e5-9bffccc53d1d",
"s3_uri": "s3://retrofit-data-dev/ara_postcode_splitter_batches/e31f2f21-175b-4a91-a3ec-a6baa325e917/8673913b-1a88-42d7-8578-0449123d94b0/2026-02-18T11:47:00.822579_f95467f5.csv",
"sub_task_id": "d7363c83-2ef7-4474-b30f-980fd587350c",
"task_id": "a042af13-8b57-4709-ad22-ecac1ccca4bd",
"s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/essex/Copy of EPC register Essex(August 2025)(in) (2).csv",
}
)
}
@ -424,6 +424,9 @@ def handler(event, context, local=False):
bucket, key = parse_s3_uri(s3_uri)
csv_data = read_csv_from_s3_dict(bucket, key)
df = pd.DataFrame(csv_data)
df.columns = [
c.lstrip("\ufeff") for c in df.columns
] # strip BOM from column names
logger.info(f"Loaded {len(df)} rows from S3")
except Exception as s3_error:
logger.error(f"Failed to read data from S3: {s3_error}")

View file

@ -26,15 +26,14 @@ def has_solar_with_battery(materials_list: Optional[List[Dict[str, Any]]]) -> bo
:return:
"""
for m in materials_list or []:
if (
m.get("type") == "solar_pv"
and m.get("includes_battery") is True
):
if m.get("type") == "solar_pv" and m.get("includes_battery") is True:
return True
return False
def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, int], pd.DataFrame]:
def process_export(
payload: ExportRequest, session: Session
) -> Dict[Union[str, int], pd.DataFrame]:
export_files: Dict[Union[str, int], pd.DataFrame] = {}
db_methods = DbMethods(session)
@ -52,7 +51,9 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str,
logger.info("Retrieved %s plans for export", len(plans_df))
if plans_df.empty:
logger.info("Empty plans dataframe - no plans to export. Returning empty export.")
logger.info(
"Empty plans dataframe - no plans to export. Returning empty export."
)
return export_files
plan_ids: List[int] = plans_df["id"].tolist()
recommendations_df: pd.DataFrame = db_methods.get_recommendations(plan_ids)
@ -61,13 +62,12 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str,
recommendations_df = db_methods.attach_materials(recommendations_df)
recommendations_df["has_solar_with_battery"] = (
recommendations_df["materials"].apply(has_solar_with_battery)
)
recommendations_df["has_solar_with_battery"] = recommendations_df[
"materials"
].apply(has_solar_with_battery)
_filter = (
(recommendations_df["measure_type"] == "solar_pv")
& (recommendations_df["has_solar_with_battery"])
_filter = (recommendations_df["measure_type"] == "solar_pv") & (
recommendations_df["has_solar_with_battery"]
)
recommendations_df.loc[_filter, "measure_type"] = (
@ -83,10 +83,13 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str,
else:
scenario_recs = recommendations_df[
recommendations_df["scenario_id"] == group_key
]
]
if scenario_recs.empty:
logger.info("No recommendations found for group_key %s - skipping export for this group", group_key)
logger.info(
"No recommendations found for group_key %s - skipping export for this group",
group_key,
)
continue
measures_df: pd.DataFrame = scenario_recs[
@ -99,14 +102,12 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str,
values="estimated_cost",
).reset_index()
pivot["total_retrofit_cost"] = (
pivot.drop(columns=["property_id", "plan_name"]).sum(axis=1)
)
pivot["total_retrofit_cost"] = pivot.drop(
columns=["property_id", "plan_name"]
).sum(axis=1)
post_sap: pd.DataFrame = (
scenario_recs.groupby("property_id")[["sap_points"]]
.sum()
.reset_index()
scenario_recs.groupby("property_id")[["sap_points"]].sum().reset_index()
)
df: pd.DataFrame = (
@ -117,7 +118,9 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str,
df["sap_points"] = df["sap_points"].fillna(0)
df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"]
df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(sap_to_epc)
df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(
sap_to_epc
)
export_files[group_key] = df
@ -128,22 +131,17 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str,
# Lambda Handler
# ============================================================
def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, Union[int, str]]:
def handler(
event: Mapping[str, Any], context: Optional[Any]
) -> Mapping[str, Union[int, str]]:
"""
Example event:
body_dict = {
"task_id": "test",
"subtask_id": "test",
"portfolio_id": 655,
"scenario_ids": [],
"default_plans_only": True,
}
body_dict = {
"task_id": "test",
"subtask_id": "test",
"portfolio_id": 655,
"scenario_ids": [1174],
"portfolio_id": 670,
"scenario_ids": [1199],
"default_plans_only": False,
}
:param event: Lambda event containing export request details
@ -167,8 +165,14 @@ def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, Un
with db_read_session() as session:
exported_files = process_export(payload, session)
output_path = f"/tmp/export_{payload.portfolio_id}.xlsx"
with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
for group_key, df in exported_files.items():
sheet_name = str(group_key)[:31] # Excel sheet name limit
df.to_excel(writer, sheet_name=sheet_name, index=False)
logger.info("Exported to %s", output_path)
# TODO: Need to handle the exported files - e.g. upload to s3 and email a presigned url
_ = exported_files
return {
"statusCode": 200,
"body": json.dumps({}),