From d51af4112599344df73effa02a5bebde1c717406 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 20 Feb 2026 12:12:43 +0000 Subject: [PATCH 01/19] tweaked inputs for standardised asset list --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- asset_list/AssetList.py | 288 +++++++++++++------------ asset_list/app.py | 59 ++--- asset_list/mappings/built_form.py | 103 ++++++++- asset_list/mappings/heating_systems.py | 19 +- asset_list/mappings/property_type.py | 5 +- asset_list/mappings/roof.py | 4 +- asset_list/mappings/walls.py | 5 +- 9 files changed, 298 insertions(+), 189 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index c6561970..09f2e496 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index 50cad4ca..fb10c6b0 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index 28e17e2a..5f354a27 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -36,14 +36,13 @@ from dotenv import load_dotenv logger = setup_logger() load_dotenv(dotenv_path="../backend/.env") - # OpenAI API Key (set this in your environment variables for security) -OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "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") +OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", + "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") class DataRemapper: def __init__(self, standard_values, standard_map=None, max_tokens=1000): - print(f"{OPENAI_API_KEY}") """ Initialize the remapper with standard values and a predefined mapping. @@ -1298,8 +1297,8 @@ class AssetList: self.standardised_asset_list[ self.ATTRIBUTE_HAS_SOLAR ] = self.standardised_asset_list[ - self.FIND_EPC_DATA_NAMES["Solar photovoltaics"] - ] | ~self.standardised_asset_list[ + self.FIND_EPC_DATA_NAMES["Solar photovoltaics"] + ] | ~self.standardised_asset_list[ self.EPC_API_DATA_NAMES["photo-supply"] ].isin( ["0.0", 0, None, "", np.nan] @@ -1317,7 +1316,7 @@ class AssetList: property_type=( str(x[self.STANDARD_PROPERTY_TYPE]).title() if str(x[self.STANDARD_PROPERTY_TYPE]).title() - in accepted_epc_property_types + in accepted_epc_property_types else ( x[self.EPC_API_DATA_NAMES["property-type"]] if not pd.isnull( @@ -1375,9 +1374,9 @@ class AssetList: self.standardised_asset_list.apply( lambda x: estimate_perimeter( floor_area=x[self.EPC_API_DATA_NAMES["total-floor-area"]] - / x[self.ATTRIBUTE_NUMBER_OF_FLOORS], + / x[self.ATTRIBUTE_NUMBER_OF_FLOORS], num_rooms=x[self.EPC_API_DATA_NAMES["number-habitable-rooms"]] - / x[self.ATTRIBUTE_NUMBER_OF_FLOORS], + / x[self.ATTRIBUTE_NUMBER_OF_FLOORS], ), axis=1, ) @@ -1462,7 +1461,7 @@ class AssetList: year_lower_bound = ( 2007 if x[self.EPC_API_DATA_NAMES["construction-age-band"]] - == "England and Wales: 2007 onwards" + == "England and Wales: 2007 onwards" else 2012 ) @@ -1517,7 +1516,7 @@ class AssetList: age_band_matches = ( "EPC Age Band Matches Year Built" if x[self.STANDARD_YEAR_BUILT] - == int(x[self.EPC_API_DATA_NAMES["construction-age-band"]]) + == int(x[self.EPC_API_DATA_NAMES["construction-age-band"]]) else "EPC Age Band is different from Year Built" ) @@ -1547,7 +1546,7 @@ class AssetList: age_band_matches = ( "EPC Age Band Matches Year Built" if (x[self.STANDARD_YEAR_BUILT] >= float(lower_date)) - and (x[self.STANDARD_YEAR_BUILT] <= float(upper_date)) + and (x[self.STANDARD_YEAR_BUILT] <= float(upper_date)) else ( "EPC Age Band is older than Year Built" if x[self.STANDARD_YEAR_BUILT] > float(upper_date) @@ -1719,22 +1718,22 @@ class AssetList: if self.non_intrusives_present: if self.new_format_non_insturives_present_v2: non_intrusives_wall_filter = ( - self.standardised_asset_list["non-intrusives: Construction"] - == "CAVITY" - ) & self.standardised_asset_list["non-intrusives: Insulated"].isin( + self.standardised_asset_list["non-intrusives: Construction"] + == "CAVITY" + ) & self.standardised_asset_list["non-intrusives: Insulated"].isin( ["EMPTY", "PARTIAL", "EMPTY CAVITY"] ) else: non_intrusives_wall_filter = ( - self.standardised_asset_list["non-intrusives: Construction"] - == "CAVITY" - ) & self.standardised_asset_list["non-intrusives: Insulated"].isin( + self.standardised_asset_list["non-intrusives: Construction"] + == "CAVITY" + ) & self.standardised_asset_list["non-intrusives: Insulated"].isin( ["EMPTY", "PARTIAL"] ) elif self.old_format_non_intrusives_present: non_intrusives_wall_filter = self.standardised_asset_list[ - "non-intrusives: WFT Findings" - ].str.lower().str.strip().isin( + "non-intrusives: WFT Findings" + ].str.lower().str.strip().isin( [ "empty cavity", "partial fill", @@ -1744,18 +1743,18 @@ class AssetList: "empty cav", ] ) | ( - ( - self.standardised_asset_list["non-intrusives: WFT Findings"] - .str.lower() - .str.strip() - .str.contains("empty cavity|partial fill") - & ~self.standardised_asset_list["non-intrusives: WFT Findings"] - .astype(str) - .str.lower() - .str.strip() - .str.contains("major access issues") - ) - ) + ( + self.standardised_asset_list["non-intrusives: WFT Findings"] + .str.lower() + .str.strip() + .str.contains("empty cavity|partial fill") + & ~self.standardised_asset_list["non-intrusives: WFT Findings"] + .astype(str) + .str.lower() + .str.strip() + .str.contains("major access issues") + ) + ) else: # We set the filter to False, as we have no non-intrusives non_intrusives_wall_filter = False @@ -1767,12 +1766,12 @@ class AssetList: ) else: year_built_filter = ( - self.standardised_asset_list[self.STANDARD_YEAR_BUILT] - <= self.EMPTY_CAVITY_YEAR_THRESHOLD - ) | ( - self.standardised_asset_list["epc_year_upper_bound"] - <= self.EMPTY_CAVITY_YEAR_THRESHOLD - ) + self.standardised_asset_list[self.STANDARD_YEAR_BUILT] + <= self.EMPTY_CAVITY_YEAR_THRESHOLD + ) | ( + self.standardised_asset_list["epc_year_upper_bound"] + <= self.EMPTY_CAVITY_YEAR_THRESHOLD + ) # Criteria: # The property isn't a bedsit @@ -1813,8 +1812,8 @@ class AssetList: ] = ( ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & ~self.standardised_asset_list[ - "non_intrusive_indicates_empty_cavity_has_solar" - ] + "non_intrusive_indicates_empty_cavity_has_solar" + ] & ( ~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin( ["bedsit"] @@ -1890,8 +1889,8 @@ class AssetList: .str.lower() .isin(self.EPC_NO_WALL_INSULATION_DESCRIPTIONS) | self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin( - ["uninsulated cavity"] - ) + ["uninsulated cavity"] + ) ) ###################################################### @@ -1928,8 +1927,8 @@ class AssetList: extraction_wall_filter = ( extraction_wall_filter & ~self.standardised_asset_list[ - "non-intrusives: Eligibility (Red/Yellow/Green)" - ].isin(["RED"]) + "non-intrusives: Eligibility (Red/Yellow/Green)" + ].isin(["RED"]) ) self.standardised_asset_list[ @@ -2025,26 +2024,26 @@ class AssetList: self.standardised_asset_list[ "solar_epc_data_indicates_correct_heating_system" ] = ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["mainheat-description"] - ] - .str.lower() - .str.contains( - "air source heat pump|ground source heat pump|boiler and radiators, electric" - ) - ) | ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["mainheat-description"] - ] - .str.lower() - .str.contains("electric storage heaters") - & ( self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["mainheatcont-description"] + self.EPC_API_DATA_NAMES["mainheat-description"] ] - == "Controls for high heat retention storage heaters" + .str.lower() + .str.contains( + "air source heat pump|ground source heat pump|boiler and radiators, electric" + ) + ) | ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["mainheat-description"] + ] + .str.lower() + .str.contains("electric storage heaters") + & ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["mainheatcont-description"] + ] + == "Controls for high heat retention storage heaters" + ) ) - ) # If the landlord has given us the heating system, we default to that on heating upgrades. Because of the # poor heating in place, if the EPC indicates that this property had a low efficiency heating system but the @@ -2052,25 +2051,25 @@ class AssetList: self.standardised_asset_list[ "solar_epc_data_indicates_requires_heating_upgrade" ] = ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["mainheat-description"] - ] - .str.lower() - .str.contains("electric storage heaters|room heaters") - & ( self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["mainheatcont-description"] + self.EPC_API_DATA_NAMES["mainheat-description"] ] - != "Controls for high heat retention storage heaters" + .str.lower() + .str.contains("electric storage heaters|room heaters") + & ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["mainheatcont-description"] + ] + != "Controls for high heat retention storage heaters" + ) + ) & ( + ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin( + ["district heating", "communal heating", "communal gas boiler"] + ) + & ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM] + .astype(str) + .str.contains("gas ") ) - ) & ( - ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin( - ["district heating", "communal heating", "communal gas boiler"] - ) - & ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM] - .astype(str) - .str.contains("gas ") - ) # Basic check - both of the previous two shouldn't be true simultaneously if ( @@ -2150,8 +2149,8 @@ class AssetList: self.standardised_asset_list[ "solar_non_intrusives_walls_insulated" ] = self.standardised_asset_list[ - "non-intrusives: WFT Findings" - ].str.lower().str.strip().isin( + "non-intrusives: WFT Findings" + ].str.lower().str.strip().isin( [ "retro drilled", "retro filled", @@ -2160,8 +2159,8 @@ class AssetList: "retro drilled and filled", ] ) | self.standardised_asset_list[ - "non-intrusives: WFT Findings" - ].str.lower().str.strip().str.contains( + "non-intrusives: WFT Findings" + ].str.lower().str.strip().str.contains( "retro drilled" ) else: @@ -2178,14 +2177,19 @@ class AssetList: ) self.standardised_asset_list["solar_epc_walls_insulated"] = ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]] - .str.lower() - .str.contains("|".join(self.EPC_INSULATED_WALLS_SUBSTRINGS)) - ) | ( - self.standardised_asset_list["walls_u_value"].apply( - lambda x: x <= 0.7 if not pd.isnull(x) else False - ) - ) + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES[ + "walls-description"]] + .str.lower() + .str.contains("|".join( + self.EPC_INSULATED_WALLS_SUBSTRINGS)) + ) | ( + self.standardised_asset_list[ + "walls_u_value"].apply( + lambda x: x <= 0.7 if not pd.isnull( + x) else False + ) + ) roof_data = [] for desc in self.standardised_asset_list[ @@ -2227,20 +2231,20 @@ class AssetList: self.standardised_asset_list[ "solar_epc_loft_needs_topup" ] = self.standardised_asset_list[ - self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS - ].apply( + self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS + ].apply( lambda x: int(x) < 200 if str(x).isdigit() else False ) | ( - ( - self.standardised_asset_list["is_loft"] - | self.standardised_asset_list["is_pitched"] + ( + self.standardised_asset_list["is_loft"] + | self.standardised_asset_list["is_pitched"] + ) + & ( + self.standardised_asset_list[ + self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS + ].isin(["below average", "none"]) + ) ) - & ( - self.standardised_asset_list[ - self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS - ].isin(["below average", "none"]) - ) - ) self.standardised_asset_list["epc_has_floor_recommendation"] = ( self.standardised_asset_list["epc_has_floor_recommendation"].fillna(False) @@ -2249,15 +2253,16 @@ class AssetList: # Check if the boiler is electric # We check if it contains both the terms boiler & electric self.standardised_asset_list["has_electric_boiler"] = ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["mainheat-description"] - ] - .str.lower() - .isin(["boiler and radiators, electric"]) - ) | ( - self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM] - == "electric boiler" - ) + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["mainheat-description"] + ] + .str.lower() + .isin(["boiler and radiators, electric"]) + ) | ( + self.standardised_asset_list[ + self.STANDARD_HEATING_SYSTEM] + == "electric boiler" + ) #################################### # Check solar eligibility @@ -2395,11 +2400,11 @@ class AssetList: empty_cavity_map = { "non_intrusive_indicates_empty_cavity": self.EMPTY_CAVITY_NON_INTRUSIVE - + ": ", + + ": ", "non_intrusive_indicates_empty_cavity_has_solar": f"{self.EMPTY_CAVITY_NON_INTRUSIVE} - property " - "already has solar: ", + "already has solar: ", "non_intrusive_indicates_empty_cavity_no_year_filter": f"{self.EMPTY_CAVITY_NON_INTRUSIVE}, " - f"built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: ", + f"built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: ", } for variable, description in empty_cavity_map.items(): self.standardised_asset_list["cavity_reason"] = np.where( @@ -2415,8 +2420,8 @@ class AssetList: ( self.standardised_asset_list["epc_indicates_empty_cavity"] & ~self.standardised_asset_list[ - "non_intrusive_indicates_empty_cavity" - ] + "non_intrusive_indicates_empty_cavity" + ] & ( self.standardised_asset_list["non-intrusives: WFT Findings"] .str.lower() @@ -2441,8 +2446,8 @@ class AssetList: ( self.standardised_asset_list["epc_indicates_empty_cavity"] & ~self.standardised_asset_list[ - "non_intrusive_indicates_empty_cavity" - ] + "non_intrusive_indicates_empty_cavity" + ] & self.standardised_asset_list[ "non_intrusive_indicates_cavity_extraction" ] @@ -2457,8 +2462,8 @@ class AssetList: ( self.standardised_asset_list["epc_indicates_empty_cavity"] & ~self.standardised_asset_list[ - "non_intrusive_indicates_empty_cavity" - ] + "non_intrusive_indicates_empty_cavity" + ] & ( self.standardised_asset_list["non-intrusives: Insulated"] == "RETRO DRILLED" @@ -2474,8 +2479,8 @@ class AssetList: ( self.standardised_asset_list["epc_indicates_empty_cavity"] & ~self.standardised_asset_list[ - "non_intrusive_indicates_empty_cavity" - ] + "non_intrusive_indicates_empty_cavity" + ] & ( self.standardised_asset_list["non-intrusives: Insulated"] == "FILLED AT BUILD" @@ -2491,8 +2496,8 @@ class AssetList: ( self.standardised_asset_list["epc_indicates_empty_cavity"] & ~self.standardised_asset_list[ - "non_intrusive_indicates_empty_cavity" - ] + "non_intrusive_indicates_empty_cavity" + ] & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), f"{self.EPC_EMPTY}: " + self.standardised_asset_list["SAP Category"], @@ -2636,7 +2641,7 @@ class AssetList: identified_work = self.standardised_asset_list[ ~pd.isnull(self.standardised_asset_list["cavity_reason"]) | ~pd.isnull(self.standardised_asset_list["solar_reason"]) - ][self.DOMNA_PROPERTY_ID].values + ][self.DOMNA_PROPERTY_ID].values if self.DOMNA_PROPERTY_ID in self.outcomes.columns: self.outcomes_for_output = self.outcomes[ @@ -2671,12 +2676,12 @@ class AssetList: blocks_of_flats = self.standardised_asset_list[ self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats" - ] + ] non_blocks_of_flats = self.standardised_asset_list[ self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "block of flats" - ] + ] # Produce some aggregate figures self.work_type_figures = { @@ -2719,7 +2724,7 @@ class AssetList: blocks = self.standardised_asset_list[ self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats" - ].copy() + ].copy() if blocks.empty: return @@ -2856,7 +2861,7 @@ class AssetList: self.standardised_asset_list = self.standardised_asset_list[ self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "block of flats" - ] + ] self.standardised_asset_list = pd.concat( [self.standardised_asset_list, expanded_blocks], ignore_index=True @@ -2936,7 +2941,7 @@ class AssetList: # find any block refs with more than 50% emptires viable_empty_blocks = self.block_analysis_df[ self.block_analysis_df["Percentage of Empties"] >= 0.50 - ] + ] if not viable_empty_blocks.empty: project_code_lookup = viable_empty_blocks[["Block Reference"]].copy() @@ -3175,7 +3180,7 @@ class AssetList: contact_details = pd.read_excel(local_filepath, sheet_name=sheet_name)[ [self.contact_detail_fields["landlord_property_id"]] + details_colnames - ] + ] contact_details = contact_details[ ~pd.isnull( contact_details[self.contact_detail_fields["landlord_property_id"]] @@ -3568,10 +3573,13 @@ class AssetList: "Non-Intrusives: Date Checked ": date_of_inspections, "Non-Intrusives: Wall Type ": non_intrusives_construction, "Non-intrusives: Insulation ": non_intrusives_insulated, - "Non-intrusives: Insulation Material ": non_intrusives_insulation_material, - "Non-Intrusives: CIGA Check Required ": non_intrusives_ciga_check_required, + "Non-intrusives: Insulation Material ": + non_intrusives_insulation_material, + "Non-Intrusives: CIGA Check Required ": + non_intrusives_ciga_check_required, "Non-Intrusives: PV Access Issues ": non_intrusives_pv_access, - "Non-Intrusives: Roof Orientation ": non_intrusives_roof_orientation, + "Non-Intrusives: Roof Orientation ": + non_intrusives_roof_orientation, "Non-Intrusives: Surveyor Notes ": non_intrusives_surveyor_notes, "Non-Intrusives: Surveyor Name ": non_intrusives_surveyor_name, "CIGA: Date Requested ": None, # TODO: Don't have this for the moment @@ -3748,8 +3756,8 @@ class AssetList: # We compare address line 1 to full address if any( df[self.STANDARD_FULL_ADDRESS] - .str.lower() - .str.contains(row["Address Line 1"].lower(), na=False) + .str.lower() + .str.contains(row["Address Line 1"].lower(), na=False) ): df = df[ df[self.STANDARD_FULL_ADDRESS] @@ -3989,7 +3997,7 @@ class AssetList: matched = matched[ matched["houseno"].astype(str) == house_no_to_match - ] + ] if matched.shape[0] == 1: lookup_i.append( { @@ -4014,7 +4022,7 @@ class AssetList: )[0] matched = matched[ matched[self.STANDARD_FULL_ADDRESS] == best_match - ] + ] lookup_i.append( { "row_id": x["row_id"], @@ -4325,7 +4333,7 @@ class AssetList: df = self.standardised_asset_list[ self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] == row[master_id_colnames[idx]] - ] + ] if df.shape[0] == 1: matched.append( { @@ -4431,7 +4439,7 @@ class AssetList: )[1] ) > 90 - ] + ] if df.shape[0] == 0: unmatched.append(row["row_id"]) @@ -4439,8 +4447,8 @@ class AssetList: if any( df[self.STANDARD_FULL_ADDRESS] - .str.lower() - .str.contains( + .str.lower() + .str.contains( " ".join( [row[house_no_col], row["Street / Block Name"]] ).lower() @@ -4467,7 +4475,7 @@ class AssetList: row[property_type_col].split(" ")[-1].lower() ) & (df[self.STANDARD_PROPERTY_TYPE] != "block of flats") - ] + ] if df.shape[0] != 1: # We have multiple matches - it's likely because the landlord has a duplicate diff --git a/asset_list/app.py b/asset_list/app.py index 3e492118..0b792270 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -13,12 +13,11 @@ from asset_list.utils import get_data from dotenv import load_dotenv from backend.SearchEpc import SearchEpc -load_dotenv(dotenv_path="../backend/.env") +load_dotenv(dotenv_path="backend/.env") EPC_AUTH_TOKEN = os.getenv( "EPC_AUTH_TOKEN", ) - OPENAI_API_KEY = os.getenv( "OPENAI_API_KEY", ) @@ -74,24 +73,25 @@ def app(): Property UPRN """ - data_folder = "/workspaces/model/asset_list" - data_filename = "assests.xlsx" + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lifespace Rentals/Missed" + # data_filename = "For Modelling - Final - reviewed.xlsx" + data_filename = "Missed Properties - with address.xlsx" sheet_name = "Sheet1" postcode_column = "Postcode" - address1_column = "Address" - address1_method = "house_number_extraction" - fulladdress_column = None - address_cols_to_concat = ["Address"] + address1_column = "address1" + address1_method = None + fulladdress_column = "address1" + address_cols_to_concat = [] missing_postcodes_method = None landlord_year_built = None landlord_os_uprn = "UPRN" - landlord_property_type = "Archetype" - landlord_built_form = "Bedroom Count" - landlord_wall_construction = "Wall Insulation Type" - landlord_roof_construction = "Roof Type" - landlord_heating_system = "Boiler Type" + landlord_property_type = "Type" + landlord_built_form = None + landlord_wall_construction = None + landlord_roof_construction = None + landlord_heating_system = None landlord_existing_pv = None - landlord_property_id = "Tab" + landlord_property_id = "Reference" landlord_sap = None outcomes_filename = None outcomes_sheetname = None @@ -243,7 +243,7 @@ def app(): if skip is not None and not force_retrieve_data: if i <= skip: continue - chunk = asset_list.standardised_asset_list[i : i + chunk_size] + chunk = asset_list.standardised_asset_list[i: i + chunk_size] epc_data_chunk, errors_chunk, no_epc_chunk = get_data( df=chunk, row_id_name=asset_list.DOMNA_PROPERTY_ID, @@ -386,7 +386,7 @@ def app(): # Retrieve just the data we need epc_df = epc_df[ [asset_list.DOMNA_PROPERTY_ID] + list(asset_list.EPC_API_DATA_NAMES.keys()) - ].rename(columns=asset_list.EPC_API_DATA_NAMES) + ].rename(columns=asset_list.EPC_API_DATA_NAMES) # Look for columns not in the find my EPC data, which will have happened if we didn't # retrieve it in the first place @@ -403,16 +403,12 @@ def app(): find_my_epc_data[ [asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"] + list(asset_list.FIND_EPC_DATA_NAMES.keys()) - ].rename(columns=asset_list.FIND_EPC_DATA_NAMES), + ].rename(columns=asset_list.FIND_EPC_DATA_NAMES), how="left", on=asset_list.DOMNA_PROPERTY_ID, ) asset_list.merge_data(epc_df) - # asset_list.standardised_asset_list = asset_list.standardised_asset_list[ - # asset_list.standardised_asset_list["domna_full_address"] - # != "120 Airdrie Crescent, Burnley, Lancashire" - # ] asset_list.extract_attributes() asset_list.identify_worktypes() @@ -426,27 +422,6 @@ def app(): os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx" ) - # Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data - - # Determine inspections priority - # solar_jobs = asset_list.standardised_asset_list[~pd.isnull(asset_list.standardised_asset_list["solar_reason"])][ - # "domna_postcode"].unique() - # asset_list.standardised_asset_list["in_solar_area"] = asset_list.standardised_asset_list["domna_postcode"].isin( - # solar_jobs - # ) - # # Same for cav - # cavity_jobs = asset_list.standardised_asset_list[ - # ~pd.isnull(asset_list.standardised_asset_list["cavity_reason"]) - # ]["domna_postcode"].unique() - # asset_list.standardised_asset_list["in_cavity_area"] = asset_list.standardised_asset_list["domna_postcode"].isin( - # cavity_jobs - # ) - # # We prioritise properties that are in solar areas and cavity areas - # import numpy as np - # asset_list.standardised_asset_list["inspection_priority"] = np.where( - # asset_list.standardised_asset_list["in_solar_area"] | asset_list.standardised_asset_list["in_cavity_area"], - # 1, 2 - # ) with pd.ExcelWriter(filename) as writer: asset_list.standardised_asset_list.to_excel( diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py index d6466539..4842450d 100644 --- a/asset_list/mappings/built_form.py +++ b/asset_list/mappings/built_form.py @@ -528,6 +528,107 @@ BUILT_FORM_MAPPINGS = { 'House: Semi Detached: Top Floor': 'semi-detached', 'House: End Terrace: Ground Floor': 'end-terrace', 'Maisonette: Enclosed End Terrace: Mid Floor': 'enclosed end-terrace', - 'Bungalow: EnclosedEndTerrace': 'enclosed end-terrace' + 'Bungalow: EnclosedEndTerrace': 'enclosed end-terrace', + '2 BED MID TERRACED HOUSE': 'mid-terrace', + '4 BED SEMI DETACHED-PARLOURED': 'semi-detached', + '2 BED END TERRACED HOUSE': 'end-terrace', + '3 BED MID TERRACED HOUSE': 'mid-terrace', + '3 BED SEMI DETACHED HOUSE': 'semi-detached', + '3 BED MID TERRACE - PARLOURED': 'mid-terrace', + '3 BED END TERRACE - PARLOURED': 'end-terrace', + '4 BED+ END TERRACED HOUSE': 'end-terrace', + '3 BED END TERRACED HOUSE': 'end-terrace', + '3 BED SEMI DETACHED-PARLOURED': 'semi-detached', + '4 BED+ END TERRACE - PARLOURED': 'end-terrace', + '2 BED SEMI DETACHED HOUSE': 'semi-detached', + '3 BED DETACHED HOUSE': 'detached', + '2 BED GRD FLR COTT FLT-CNT STR': 'ground floor', + '2 BED 1ST FLOOR WALKUP FLAT': 'mid-floor', + '1 BED GRD FL COTT FLAT-OWN ENT': 'ground floor', + '1 BED 1ST FL WALK UP DECK ACC': 'mid-floor', + '2 BED MAISONETTE UPPER COM ENT': 'mid-floor', + '2 BED GRD FLR COTT FLT OWN ENT': 'ground floor', + '1 BED BUNGALOW': 'unknown', + '2 BED GRD FL COTT FLT-OWN ENTR': 'ground floor', + '1 BED 1ST FL COTT FLT-CNT STR': 'mid-floor', + '1 BED GRD FL WALK UP OWN ENT': 'ground floor', + '1 BED GRD FLOOR WALKUP FLAT': 'ground floor', + '2 BED GRD FLOOR WALKUP FLAT': 'ground floor', + '2 BED 1ST FLR FLT-SHELTERED': 'mid-floor', + '2 BED BUNGALOW': 'unknown', + '2 BED GRD FLR COTT FLT(P)-1950': 'ground floor', + + 'Ground Floor Front Left': 'ground floor', + 'End-Terrace House': 'end-terrace', + 'Ground floor': 'ground floor', + 'Ground Floor Front Right': 'ground floor', + 'End Terrace (GII List)': 'end-terrace', + 'Semi Detached House': 'semi-detached', + 'Ground Floor Right': 'ground floor', + 'PB Ground Floor Flat': 'ground floor', + 'Basement and Ground Floor': 'ground floor', + 'Semi-detached bungalow': 'detached', + 'Detached Cottage': 'detached', + 'Lower & Ground Floor': 'ground floor', + 'Ground FLoor Flat': 'ground floor', + 'ground floor': 'ground floor', + 'Ground Floor Left': 'ground floor', + 'Semi-detached House': 'detached', + 'Basement & Lower Ground': 'basement', + 'Semi-Detached House': 'detached', + 'Ground floor flat -': 'ground floor', + 'Basement Flat': 'basement', + 'semi-detached bungalow': 'semi-detached', + 'Lower Ground Floor Flat': 'ground floor', + 'Ground floor Flat': 'ground floor', + 'Ground Floor flat': 'ground floor', + 'Ground': 'ground floor', + 'Semi detached Bungalow': 'semi-detached', + 'ground floor flat': 'ground floor', + 'Mid terrace House': 'mid-terrace', + 'Raised Ground Floor': 'ground floor', + 'Basement Floor': 'basement', + 'Second floor flat': 'mid-floor', + 'Fourth Floor Flat': 'mid-floor', + 'First/Second Maisonette': 'mid-floor', + 'Ground/First': 'ground floor', + 'First and Second Floor': 'mid-floor', + 'Terrace House': 'mid-terrace', + '1st/2nd Floor Maisonette': 'mid-floor', + 'Semi-det House': 'semi-detached', + 'First': 'mid-floor', + 'Ground & First Floor': 'ground floor', + 'End of Terrace House': 'end-terrace', + '2nd Floor Purpose Built': 'mid-floor', + 'First/Second Floor Maison': 'mid-floor', + 'GFF purpose built': 'ground floor', + 'Second': 'mid-floor', + 'Semi-det House (GII List)': 'semi-detached', + '3rd and 4th Floor': 'mid-floor', + 'First Floor flat': 'mid-floor', + 'Mid-Terrace House': 'mid-terrace', + '1st & 2nd Floors': 'mid-floor', + 'Ground/first floor': 'ground floor', + 'FFF purpose built': 'mid-floor', + 'Second floor': 'mid-floor', + 'Second/Third floor': 'mid-floor', + 'First floor Flat': 'mid-floor', + 'First floor': 'mid-floor', + 'Lower Ground Flat': 'basement', + 'First Floor Rear Flat': 'mid-floor', + 'First & Second Floor': 'mid-floor', + 'Ground & Lower Ground': 'basement', + 'First Floor Rear': 'mid-floor', + 'First & Second': 'mid-floor', + 'First Floor Front': 'mid-floor', + 'First & Second Floors': 'mid-floor', + 'First/Second Floor': 'mid-floor', + 'Sem-detach house': 'semi-detached', + 'Second Floor Flat (Top)': 'top-floor', + '3 FloorTerrace House': 'mid-terrace', + 'First floor flat': 'mid-floor', + 'First & Second Floor Flat': 'mid-floor', + 'First Floor Purpose Built': 'mid-floor', + 'Purpose built First Floor': 'mid-floor', } diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py index 272d6279..5f962108 100644 --- a/asset_list/mappings/heating_systems.py +++ b/asset_list/mappings/heating_systems.py @@ -498,6 +498,23 @@ HEATING_MAPPINGS = { 'Boiler: A rated Combi, System 2: Boiler: A rated Combi': 'gas combi boiler', 'System 2: Boiler: A rated Regular Boiler, Boiler: A rated Regular Boiler': 'gas boiler, radiators', - 'Boiler: A rated Combi, System 2: Boiler: C rated Combi': 'gas combi boiler' + 'Boiler: A rated Combi, System 2: Boiler: C rated Combi': 'gas combi boiler', + + 'IDEAL ISAR HE30': 'gas combi boiler', + 'WORCESTER GREENSTAR 25 SI': 'gas combi boiler', + 'POTTERTON PROMAX COMBI 28 HE PLUS': 'gas combi boiler', + 'WORCESTER GREENSTAR 28I JUNIOR': 'gas combi boiler', + 'BAXI ASSURE 25 COMBI': 'gas combi boiler', + 'POTTERTON PROMAX COMBI 28 HE PLUS A': 'gas combi boiler', + 'WORCESTER GREENSTAR 30 SI': 'gas combi boiler', + 'POTTERTON SUPRIMA 40L': 'gas boiler, radiators', + 'POTTERTON ASSURE 30 COMBI': 'gas combi boiler', + 'POTTERTON PROMAX 28 COMBI ERP': 'gas combi boiler', + 'BAXI ASSURE 30 COMBI': 'gas combi boiler', + 'POTTERTON PROMAX 18 SYSTEM ERP': 'gas boiler, radiators', + 'POTTERTON PROMAX COMBI 33 HE PLUS A': 'gas combi boiler', + 'POTTERTON SUPRIMA 40 HE': 'gas boiler, radiators', + 'FERROLI MODENA 102': 'gas boiler, radiators', + 'POTTERTON PROMAX COMBI 24 HE PLUS A': 'gas combi boiler' } diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py index 177a7549..71788c25 100644 --- a/asset_list/mappings/property_type.py +++ b/asset_list/mappings/property_type.py @@ -444,6 +444,9 @@ PROPERTY_MAPPING = { 'Warden Bungalow': 'bungalow', 'Warden Flat': 'flat', 'Upper Floor Flat': 'flat', - 'Extracare Scheme': 'other' + 'Extracare Scheme': 'other', + + 'SHELTERED': 'unknown', + 'PARLOUR': 'unknown', } diff --git a/asset_list/mappings/roof.py b/asset_list/mappings/roof.py index 70cc8742..192238e0 100644 --- a/asset_list/mappings/roof.py +++ b/asset_list/mappings/roof.py @@ -320,6 +320,8 @@ ROOF_CONSTRUCTION_MAPPINGS = { 'Pitched (slates or tiles) access to loft, 100mm': 'pitched insulated', 'Pitched (slates or tiles) no loft access, 200mm': 'pitched insulated', 'Pitched (slates or tiles) access to loft, 200mm': 'pitched insulated', - 'Pitched (slates or tiles) access to loft, 50mm': 'pitched less than 100mm insulation' + 'Pitched (slates or tiles) access to loft, 50mm': 'pitched less than 100mm insulation', + + 'Pitched roofs': 'pitched unknown insulation', } diff --git a/asset_list/mappings/walls.py b/asset_list/mappings/walls.py index 1a252b33..c369204d 100644 --- a/asset_list/mappings/walls.py +++ b/asset_list/mappings/walls.py @@ -369,6 +369,9 @@ WALL_CONSTRUCTION_MAPPINGS = { 'Solid Brick, As built': 'solid brick unknown insulation', 'System built, As built': 'system built unknown insulation', 'Timber frame, As built': 'timber frame unknown insulation', - 'Cavity, As built': 'cavity unknown insulation' + 'Cavity, As built': 'cavity unknown insulation', + 'FILLED CAVITY': 'filled cavity', + 'EXTERNAL': 'insulated solid brick', + 'AS BUILT': 'other' } From 3e0444b3a7228ea165ff8de39f6c8bfdbde1fa35 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 20 Feb 2026 17:01:09 +0000 Subject: [PATCH 02/19] working on export logic --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- .../export/property_scenarios/db_functions.py | 205 ++++++++++++++++++ .../export/property_scenarios/input_schema.py | 33 +++ backend/export/property_scenarios/main.py | 154 +++++++++++++ 5 files changed, 394 insertions(+), 2 deletions(-) create mode 100644 backend/export/property_scenarios/db_functions.py create mode 100644 backend/export/property_scenarios/input_schema.py create mode 100644 backend/export/property_scenarios/main.py diff --git a/.idea/Model.iml b/.idea/Model.iml index 09f2e496..c6561970 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index fb10c6b0..50cad4ca 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/backend/export/property_scenarios/db_functions.py b/backend/export/property_scenarios/db_functions.py new file mode 100644 index 00000000..f527e738 --- /dev/null +++ b/backend/export/property_scenarios/db_functions.py @@ -0,0 +1,205 @@ +from typing import List, Any, Dict, Optional +import pandas as pd +from sqlalchemy import func +from sqlalchemy.orm import Session +from collections import defaultdict + +from backend.app.db.models.recommendations import ( + Recommendation, + PlanModel, + PlanRecommendations, + RecommendationMaterials, +) +from backend.app.db.models.portfolio import ( + PropertyModel, + PropertyDetailsEpcModel, +) +from utils.logger import setup_logger + +logger = setup_logger() + + +class DbMethods: + + def __init__(self, session: Session): + self.session = session + + def get_properties(self, portfolio_id: int) -> pd.DataFrame: + """ + Function to fetch the property data, for property scenario exports + :param portfolio_id: + :return: + """ + query = ( + self.session.query(PropertyModel, PropertyDetailsEpcModel) + .join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id, + ) + .filter(PropertyModel.portfolio_id == portfolio_id) + .all() + ) + + data = [ + { + **{ + col.name: getattr(row.PropertyModel, col.name) + for col in PropertyModel.__table__.columns + }, + **{ + col.name: getattr(row.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns + }, + } + for row in query + ] + + return pd.DataFrame(data) + + def get_latest_plans( + self, + portfolio_id: int, + scenario_ids: Optional[List[int]] = None, + default_only: bool = False, + ) -> pd.DataFrame: + """ + Fetch latest plans. + + Modes: + 1) Scenario mode: latest per (scenario_id, property_id) + 2) Default mode: latest default plan per property (ignores scenario_ids) + + """ + + # ----------------------------- + # Sanity checks + # ----------------------------- + if default_only and scenario_ids: + # Override scenario_ids to make it explicit that they will be ignored in the query + scenario_ids = None + + if not default_only and not scenario_ids: + raise ValueError( + "Either scenario_ids must be provided " + "or default_only must be True." + ) + + # ----------------------------- + # Filter on just the default plans - we ignore the scenario ids. NOTE - this is specific to postgres + # and relies on DISTINCT ON behaviour. + # ----------------------------- + if default_only: + # Latest default plan per property (ignore scenarios entirely) + # DISTINCT ON (property_id) keeps the first row per property, + # ordered by created_at DESC so we get the newest one. + + plans_query = ( + self.session.query(PlanModel) + .filter(PlanModel.is_default.is_(True)) + .distinct(PlanModel.property_id) + .order_by( + PlanModel.property_id, + PlanModel.created_at.desc(), + ) + ) + + else: + # Latest plan per (scenario_id, property_id) + # DISTINCT ON (scenario_id, property_id) keeps the newest + # plan per scenario/property combination. + + plans_query = ( + self.session.query(PlanModel) + .filter(PlanModel.scenario_id.in_(scenario_ids)) + .distinct( + PlanModel.scenario_id, + PlanModel.property_id, + ) + .order_by( + PlanModel.scenario_id, + PlanModel.property_id, + PlanModel.created_at.desc(), + ) + ) + + logger.info("Fetching plans") + plans = plans_query.all() + + return pd.DataFrame( + [ + { + col.name: getattr(plan, col.name) + for col in PlanModel.__table__.columns + } + for plan in plans + ] + ) + + def get_recommendations(self, plan_ids: List[int]) -> pd.DataFrame: + + if not plan_ids: + return pd.DataFrame() + + recs_query = ( + self.session.query( + Recommendation, + PlanModel.scenario_id, + ) + .join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id, + ) + .join(PlanModel, PlanModel.id == PlanRecommendations.plan_id) + .filter( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default.is_(True), + Recommendation.already_installed.is_(False), + ) + .all() + ) + + data = [ + { + **{ + col.name: getattr(r.Recommendation, col.name) + for col in Recommendation.__table__.columns + }, + "scenario_id": r.scenario_id, + } + for r in recs_query + ] + + return pd.DataFrame(data) + + def attach_materials(self, recommendations_df: pd.DataFrame) -> pd.DataFrame: + + if recommendations_df.empty: + recommendations_df["materials"] = [] + return recommendations_df + + rec_ids = recommendations_df["id"].tolist() + + materials_query = ( + self.session.query(RecommendationMaterials) + .filter(RecommendationMaterials.recommendation_id.in_(rec_ids)) + .all() + ) + + materials_map: Dict[int, List[Dict[str, Any]]] = defaultdict(list) + + for m in materials_query: + materials_map[m.recommendation_id].append( + { + "material_id": m.material_id, + "depth": m.depth, + "quantity": m.quantity, + "quantity_unit": m.quantity_unit, + "estimated_cost": m.estimated_cost, + } + ) + + recommendations_df["materials"] = recommendations_df["id"].apply( + lambda x: materials_map.get(x, []) + ) + + return recommendations_df diff --git a/backend/export/property_scenarios/input_schema.py b/backend/export/property_scenarios/input_schema.py new file mode 100644 index 00000000..4ef704a3 --- /dev/null +++ b/backend/export/property_scenarios/input_schema.py @@ -0,0 +1,33 @@ +from typing import Optional, Union, List +from pydantic import BaseModel, model_validator + + +class ExportRequest(BaseModel): + # uuid which maps to a specific export request, used for tracking and logging + task_id: Union[str, None] + # uuid which maps to a specific export operation, used for tracking and logging. subtask is the child of the + # task, where the work has been distributed across workers + subtask_id: Union[str, None] + # associated portfolio id for the export request + portfolio_id: int + # list of scenario ids to export + scenario_ids: List[int] + # boolean which will overwrite the scenario ids. If this is true, we will only export the default plan for each + # property and will ignore the scenario ids + default_plans_only: Optional[bool] = False + + @model_validator(mode="after") + def validate_default_plan_override(self): + """ + If default_plans_only is True and scenario_ids were provided, + we allow execution but make it explicit that scenario_ids + will be ignored. + """ + if self.default_plans_only and self.scenario_ids: + # We do NOT raise — we allow execution. + # We just mark the object so the handler can log/return a warning. + object.__setattr__(self, "_scenario_ids_ignored", True) + else: + object.__setattr__(self, "_scenario_ids_ignored", False) + + return self diff --git a/backend/export/property_scenarios/main.py b/backend/export/property_scenarios/main.py new file mode 100644 index 00000000..88ebf326 --- /dev/null +++ b/backend/export/property_scenarios/main.py @@ -0,0 +1,154 @@ +import json +from typing import List, Optional, Any, Mapping + +import pandas as pd +from sqlalchemy.orm import Session + +from backend.export.property_scenarios.input_schema import ExportRequest +from backend.export.property_scenarios.db_functions import DbMethods +from backend.app.db.connection import db_engine +from backend.app.utils import sap_to_epc +from utils.logger import setup_logger + +logger = setup_logger() + + +def process_export(config: ExportRequest) -> List[str]: + exported_files: List[str] = [] + + with Session(bind=db_engine) as session: + + db_methods = DbMethods(session) + + properties_df = db_methods.get_properties(config.portfolio_id) + + plans_df = db_methods.get_latest_plans( + portfolio_id=config.portfolio_id, + scenario_ids=config.scenario_ids, + default_only=config.default_plans_only, + ) + + if plans_df.empty: + return exported_files + + recommendations_df = db_methods.get_recommendations( + plans_df["id"].tolist() + ) + + recommendations_df = db_methods.attach_materials(recommendations_df) + + for scenario_id in config.scenario_ids: + + scenario_recs = recommendations_df[ + recommendations_df["scenario_id"] == scenario_id + ] + + if scenario_recs.empty: + continue + + measures_df = scenario_recs[ + ["property_id", "measure_type", "estimated_cost"] + ].drop_duplicates() + + pivot = measures_df.pivot( + index="property_id", + columns="measure_type", + values="estimated_cost", + ).reset_index() + + pivot["total_retrofit_cost"] = ( + pivot.drop(columns=["property_id"]).sum(axis=1) + ) + + post_sap = ( + scenario_recs.groupby("property_id")[["sap_points"]] + .sum() + .reset_index() + ) + + df = ( + properties_df + .merge(pivot, how="left", on="property_id") + .merge(post_sap, how="left", on="property_id") + ) + + df["sap_points"] = df["sap_points"].fillna(0) + df["predicted_post_works_sap"] = ( + df["current_sap_points"] + df["sap_points"] + ) + df["predicted_post_works_epc"] = df[ + "predicted_post_works_sap" + ].apply(sap_to_epc) + + filename = ( + f"/tmp/{config.scenario_names[scenario_id]} - " + f"{config.project_name}.xlsx" + ) + + with pd.ExcelWriter(filename) as writer: + df.to_excel(writer, sheet_name="properties", index=False) + + exported_files.append(filename) + + return exported_files + + +# ============================================================ +# Lambda Handler +# ============================================================ + +def handler(event: dict, context: Optional[Any]) -> Mapping[str, int | str]: + """ + Lambda event should have the following structure: + 1) task id - unique identifier for the export task (optional, can be used for tracking/logging) + 2) subtask id - unique identifier for the specific export operation (optional, can be used for tracking/logging) + 2) portfolio id - id of the portfolio to export + 3) scenario ids - list of scenario ids to export + 4) default_plans_only - flag indicating if we should only consider default plans for export (optional, + defaults to False) + :param event: + :param context: + :return: + """ + for record in event.get("Records", []): + try: + body_dict = json.loads(record["body"]) + + # body_dict = { + # "task_id": "test", + # "subtask_id": "test", + # "portfolio_id": 569, + # "scenario_ids": [], + # "default_plans_only": True, + # } + + logger.debug("Validating request body") + payload = ExportRequest.model_validate(body_dict) + + if payload._scenario_ids_ignored: + logger.warning( + "Received scenario_ids in request body but they will be ignored " + "because default_plans_only is set to True" + ) + + logger.debug("Successfully validated request body") + process_export(payload) + + # TODO: Need to handle the exported files - e.g. upload to s3 and email a presigned url + + return { + "statusCode": 200, + "body": json.dumps({}), + } + + except Exception as e: + logger.error(f"Failed to process record: {e}") + return { + "statusCode": 500, + "body": json.dumps({"message": "Failed to process export request"}), + } + + return { + "statusCode": 201, + "body": json.dumps({"message": "No records to process"}), + } From bf3d6f4d515c22a60be6e2154a1ac6893bc4dc00 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 23 Feb 2026 12:13:59 +0000 Subject: [PATCH 03/19] working on integration test --- .idea/Model.iml | 3 + backend/app/db/base.py | 3 + backend/app/db/models/addresses.py | 4 +- backend/app/db/models/condition.py | 4 +- backend/app/db/models/energy_assessments.py | 8 +- backend/app/db/models/epc.py | 5 +- backend/app/db/models/funding.py | 5 +- backend/app/db/models/inspections.py | 4 +- backend/app/db/models/materials.py | 3 +- .../app/db/models/non_intrusive_surveys.py | 4 +- backend/app/db/models/portfolio.py | 11 +- backend/app/db/models/recommendations.py | 7 +- backend/app/db/models/solar.py | 4 +- backend/app/db/models/users.py | 4 +- backend/app/db/models/whlg.py | 3 +- backend/export/README.md | 155 ++++++++++ .../export/property_scenarios/db_functions.py | 12 +- backend/export/property_scenarios/main.py | 148 +++++----- backend/export/tests/conftest.py | 55 ++++ backend/export/tests/test_export.py | 274 ++++++++++++++++++ pytest.ini | 2 + test.requirements.txt | 4 +- 22 files changed, 602 insertions(+), 120 deletions(-) create mode 100644 backend/app/db/base.py create mode 100644 backend/export/README.md create mode 100644 backend/export/tests/conftest.py create mode 100644 backend/export/tests/test_export.py diff --git a/.idea/Model.iml b/.idea/Model.iml index c6561970..1e51ede4 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -10,4 +10,7 @@ + + \ No newline at end of file diff --git a/backend/app/db/base.py b/backend/app/db/base.py new file mode 100644 index 00000000..59be7030 --- /dev/null +++ b/backend/app/db/base.py @@ -0,0 +1,3 @@ +from sqlalchemy.orm import declarative_base + +Base = declarative_base() diff --git a/backend/app/db/models/addresses.py b/backend/app/db/models/addresses.py index 51e9540f..a813f58d 100644 --- a/backend/app/db/models/addresses.py +++ b/backend/app/db/models/addresses.py @@ -7,9 +7,7 @@ from sqlalchemy import ( func, UniqueConstraint, ) -from sqlalchemy.orm import declarative_base - -Base = declarative_base() +from backend.app.db.base import Base class PostcodeSearch(Base): diff --git a/backend/app/db/models/condition.py b/backend/app/db/models/condition.py index 77043366..96f601a7 100644 --- a/backend/app/db/models/condition.py +++ b/backend/app/db/models/condition.py @@ -7,12 +7,12 @@ from sqlalchemy import ( String, Enum as SqlEnum, ) -from sqlalchemy.orm import declarative_base, relationship +from sqlalchemy.orm import relationship from backend.condition.domain.aspect_type import AspectType from backend.condition.domain.element_type import ElementType -Base = declarative_base() +from backend.app.db.base import Base ElementTypeDb = SqlEnum( ElementType, diff --git a/backend/app/db/models/energy_assessments.py b/backend/app/db/models/energy_assessments.py index 46912c9b..65879c39 100644 --- a/backend/app/db/models/energy_assessments.py +++ b/backend/app/db/models/energy_assessments.py @@ -1,10 +1,8 @@ -from sqlalchemy import Column, Integer, BigInteger, Text, Float, DateTime, Boolean, Date, ForeignKey -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.dialects.postgresql import ENUM as PgEnum import enum from datetime import datetime - -Base = declarative_base() +from backend.app.db.base import Base +from sqlalchemy import Column, Integer, BigInteger, Text, Float, DateTime, Boolean, Date, ForeignKey +from sqlalchemy.dialects.postgresql import ENUM as PgEnum class EnergyAssessment(Base): diff --git a/backend/app/db/models/epc.py b/backend/app/db/models/epc.py index 5a216040..ff0b40a0 100644 --- a/backend/app/db/models/epc.py +++ b/backend/app/db/models/epc.py @@ -4,11 +4,8 @@ from sqlalchemy import ( String, JSON, TIMESTAMP, - UniqueConstraint, ) -from sqlalchemy.orm import declarative_base - -Base = declarative_base() +from backend.app.db.base import Base class EpcStore(Base): diff --git a/backend/app/db/models/funding.py b/backend/app/db/models/funding.py index a7417e14..19e8203d 100644 --- a/backend/app/db/models/funding.py +++ b/backend/app/db/models/funding.py @@ -3,20 +3,17 @@ import enum from sqlalchemy import ( Column, Integer, - String, Float, Enum, TIMESTAMP, BigInteger, ForeignKey, ) -from sqlalchemy.orm import declarative_base from sqlalchemy.sql import func +from backend.app.db.base import Base from backend.app.db.models.recommendations import PlanModel from backend.app.db.models.materials import MaterialType, Material -Base = declarative_base() - class SchemeEnum(enum.Enum): eco4 = "eco4" diff --git a/backend/app/db/models/inspections.py b/backend/app/db/models/inspections.py index 473f8a02..2a42f589 100644 --- a/backend/app/db/models/inspections.py +++ b/backend/app/db/models/inspections.py @@ -9,11 +9,9 @@ from sqlalchemy import ( Enum, ForeignKey, ) -from sqlalchemy.ext.declarative import declarative_base +from backend.app.db.base import Base from backend.app.db.models.portfolio import PropertyModel -Base = declarative_base() - # ------------------------------------------------------------------- # ENUM DEFINITIONS (equivalent to drizzle pgEnum calls) diff --git a/backend/app/db/models/materials.py b/backend/app/db/models/materials.py index 8a524491..101ac021 100644 --- a/backend/app/db/models/materials.py +++ b/backend/app/db/models/materials.py @@ -1,10 +1,9 @@ import enum from sqlalchemy import Column, Integer, String, Float, Enum, TIMESTAMP, Boolean -from sqlalchemy.orm import declarative_base from sqlalchemy.sql import func -Base = declarative_base() +from backend.app.db.base import Base class MaterialType(enum.Enum): diff --git a/backend/app/db/models/non_intrusive_surveys.py b/backend/app/db/models/non_intrusive_surveys.py index bc2d8adc..bbfb7a54 100644 --- a/backend/app/db/models/non_intrusive_surveys.py +++ b/backend/app/db/models/non_intrusive_surveys.py @@ -1,7 +1,5 @@ from sqlalchemy import Column, BigInteger, String, TIMESTAMP, ForeignKey, Integer -from sqlalchemy.orm import declarative_base - -Base = declarative_base() +from backend.app.db.base import Base class NonIntrusiveSurvey(Base): diff --git a/backend/app/db/models/portfolio.py b/backend/app/db/models/portfolio.py index f6a99a97..9eb26597 100644 --- a/backend/app/db/models/portfolio.py +++ b/backend/app/db/models/portfolio.py @@ -4,6 +4,7 @@ import datetime from sqlalchemy import ( Column, Integer, + BigInteger, Text, Boolean, Float, @@ -12,12 +13,10 @@ from sqlalchemy import ( ForeignKey, CheckConstraint, ) -from sqlalchemy.ext.declarative import declarative_base +from backend.app.db.base import Base from backend.app.db.models.users import UserModel # noqa from backend.app.db.models.materials import MaterialType -Base = declarative_base() - class PortfolioStatus(enum.Enum): SCOPING = "scoping" @@ -32,7 +31,7 @@ class PortfolioStatus(enum.Enum): NEEDS_REVIEW = "needs review" -class PortfolioGoal(enum.Enum): # TODO: Move to domain? +class PortfolioGoal(enum.Enum): # TODO: Move to domain? VALUATION_IMPROVEMENT = "Valuation Improvement" INCREASING_EPC = "Increasing EPC" REDUCING_CO2_EMISSIONS = "Reducing CO2 emissions" @@ -116,9 +115,9 @@ class PropertyModel(Base): id = Column(Integer, primary_key=True, autoincrement=True) portfolio_id = Column(Integer, ForeignKey("portfolio.id"), nullable=False) creation_status = Column(Enum(PropertyCreationStatus), nullable=False) - uprn = Column(Integer) + uprn = Column(BigInteger) landlord_property_id = Column(Text) - building_reference_number = Column(Integer) + building_reference_number = Column(BigInteger) status = Column( Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]), nullable=False, diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py index 538b11e3..9352eeb2 100644 --- a/backend/app/db/models/recommendations.py +++ b/backend/app/db/models/recommendations.py @@ -1,3 +1,4 @@ +import enum from typing import Iterable, List, NamedTuple, Optional, Type from sqlalchemy import ( Column, @@ -9,17 +10,15 @@ from sqlalchemy import ( ForeignKey, Enum, ) -from sqlalchemy.orm import declarative_base, Mapped, mapped_column +from sqlalchemy.orm import Mapped, mapped_column from sqlalchemy.sql import func from datetime import datetime +from backend.app.db.base import Base from backend.app.db.models.portfolio import Portfolio, PortfolioGoal, PropertyModel from backend.app.db.models.materials import Material from backend.app.db.models.portfolio import Epc from datatypes.enums import QuantityUnits -import enum - -Base = declarative_base() def portfolio_goal_values(enum_cls: Type[PortfolioGoal]) -> List[str]: diff --git a/backend/app/db/models/solar.py b/backend/app/db/models/solar.py index 88372bd3..dc1846f3 100644 --- a/backend/app/db/models/solar.py +++ b/backend/app/db/models/solar.py @@ -2,9 +2,7 @@ import datetime import pytz from enum import Enum as PyEnum from sqlalchemy import Column, Integer, Float, DateTime, JSON, BigInteger, ForeignKey, Enum, Boolean -from sqlalchemy.ext.declarative import declarative_base - -Base = declarative_base() +from backend.app.db.base import Base class Solar(Base): diff --git a/backend/app/db/models/users.py b/backend/app/db/models/users.py index 6e243815..7952b9b7 100644 --- a/backend/app/db/models/users.py +++ b/backend/app/db/models/users.py @@ -1,8 +1,6 @@ from sqlalchemy import Column, Integer, String, DateTime -from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.sql import func - -Base = declarative_base() +from backend.app.db.base import Base class UserModel(Base): diff --git a/backend/app/db/models/whlg.py b/backend/app/db/models/whlg.py index 29d907e4..5c5b7172 100644 --- a/backend/app/db/models/whlg.py +++ b/backend/app/db/models/whlg.py @@ -1,4 +1,3 @@ -import uuid from typing import Optional from sqlmodel import SQLModel, Field @@ -12,4 +11,4 @@ class Whlg(SQLModel, table=True): index=True, ) - postcode: str = Field(nullable=False) \ No newline at end of file + postcode: str = Field(nullable=False) diff --git a/backend/export/README.md b/backend/export/README.md new file mode 100644 index 00000000..a98154fc --- /dev/null +++ b/backend/export/README.md @@ -0,0 +1,155 @@ +# 🧪 Running Tests in PyCharm (macOS + pytest-postgresql) + +Our test suite uses `pytest` and `pytest-postgresql`, which +automatically spins up a temporary PostgreSQL instance. + +On Linux (including GitHub Actions), PostgreSQL binaries are installed +in standard system locations.\ +On macOS (Homebrew), they are not --- so PyCharm needs a small +configuration tweak to locate `pg_ctl`. + +This guide explains how to run and debug tests locally in PyCharm +without modifying test code. + +------------------------------------------------------------------------ + +## ✅ Prerequisites + +1. Install PostgreSQL via Homebrew: + +``` bash +brew install postgresql +``` + +2. Confirm `pg_ctl` exists: + +``` bash +which pg_ctl +``` + +Typical output: + + /opt/homebrew/bin/pg_ctl + +------------------------------------------------------------------------ + +# 🚀 Running Tests in PyCharm + +## Step 1 --- Create a PyCharm pytest Run Configuration + +1. Open the test file. +2. Click the green ▶ next to the test. +3. Choose **"Edit Run Configuration..."** + +You should see something like: + +- **Target:** `backend/export/tests/test_export.py` +- **Working directory:** Project root (e.g.`Model/`) + +------------------------------------------------------------------------ + +## Step 2 --- Add Required Override (macOS Only) + +In the Run Configuration: + +### ➜ "Additional Arguments" + +Add: + + --override-ini=postgresql_exec=/opt/homebrew/bin/pg_ctl + +This tells `pytest-postgresql` where `pg_ctl` lives on macOS. + +Without this, PyCharm may fail with: + + ExecutableMissingException: Could not found pg_config executable + +------------------------------------------------------------------------ + +## Step 3 --- Run or Debug + +You can now: + +- Click ▶ Run\ +- Click 🐞 Debug\ +- Set breakpoints normally + +The temporary PostgreSQL instance will start automatically. + +------------------------------------------------------------------------ + +# 🔍 Why This Is Needed + +`pytest-postgresql` defaults to a Linux-style path: + + /usr/lib/postgresql//bin/pg_ctl + +That path exists on Ubuntu (CI), but not on macOS. + +On macOS, Homebrew installs PostgreSQL in: + + /opt/homebrew/bin/ + +The `--override-ini` flag safely overrides the executable path +**locally**, without modifying: + +- test files\ +- `conftest.py`\ +- `pytest.ini`\ +- CI configuration + +This ensures: + +- ✅ Tests still work in GitHub Actions\ +- ✅ Tests still work for Linux users\ +- ✅ macOS developers can debug in PyCharm\ +- ✅ No repository-specific hacks are required + +------------------------------------------------------------------------ + +# 🛠 Optional: Using a Local `.env` File + +If you prefer not to hardcode the override in the run configuration: + +1. Create a local file: + +```{=html} + +``` + + .env.local + +2. Add: + +```{=html} + +``` + + PYTEST_ADDOPTS=--override-ini=postgresql_exec=/opt/homebrew/bin/pg_ctl + +3. In PyCharm: + - Open the Run Configuration + - Add `.env.local` under **"Paths to .env files"** + +------------------------------------------------------------------------ + +# 🧪 Running Tests via Terminal (Recommended for CI Parity) + +For normal execution outside PyCharm: + +``` bash +make test +``` + +These already work without additional configuration. + +------------------------------------------------------------------------ + +# 🧠 Summary + +Environment Works Without Override? Needs `--override-ini`? + ------------------------ ------------------------- ------------------------- +GitHub Actions (Linux) ✅ Yes ❌ No +Linux local ✅ Yes ❌ No +macOS terminal (tox) ✅ Yes ❌ No +macOS PyCharm debugger ❌ No ✅ Yes diff --git a/backend/export/property_scenarios/db_functions.py b/backend/export/property_scenarios/db_functions.py index f527e738..8b29ab0e 100644 --- a/backend/export/property_scenarios/db_functions.py +++ b/backend/export/property_scenarios/db_functions.py @@ -1,6 +1,5 @@ from typing import List, Any, Dict, Optional import pandas as pd -from sqlalchemy import func from sqlalchemy.orm import Session from collections import defaultdict @@ -95,7 +94,10 @@ class DbMethods: plans_query = ( self.session.query(PlanModel) - .filter(PlanModel.is_default.is_(True)) + .filter( + PlanModel.portfolio_id == portfolio_id, + PlanModel.is_default.is_(True) + ) .distinct(PlanModel.property_id) .order_by( PlanModel.property_id, @@ -110,7 +112,10 @@ class DbMethods: plans_query = ( self.session.query(PlanModel) - .filter(PlanModel.scenario_id.in_(scenario_ids)) + .filter( + PlanModel.portfolio_id == portfolio_id, + PlanModel.scenario_id.in_(scenario_ids) + ) .distinct( PlanModel.scenario_id, PlanModel.property_id, @@ -138,6 +143,7 @@ class DbMethods: def get_recommendations(self, plan_ids: List[int]) -> pd.DataFrame: if not plan_ids: + logger.info("No plan ids provided") return pd.DataFrame() recs_query = ( diff --git a/backend/export/property_scenarios/main.py b/backend/export/property_scenarios/main.py index 88ebf326..d2d89916 100644 --- a/backend/export/property_scenarios/main.py +++ b/backend/export/property_scenarios/main.py @@ -1,96 +1,98 @@ import json -from typing import List, Optional, Any, Mapping +from typing import Optional, Any, Mapping, Dict, Union import pandas as pd from sqlalchemy.orm import Session from backend.export.property_scenarios.input_schema import ExportRequest from backend.export.property_scenarios.db_functions import DbMethods -from backend.app.db.connection import db_engine +from backend.app.db.connection import db_read_session from backend.app.utils import sap_to_epc from utils.logger import setup_logger logger = setup_logger() -def process_export(config: ExportRequest) -> List[str]: - exported_files: List[str] = [] +def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, int], pd.DataFrame]: + export_files: Dict[Union[str, int], pd.DataFrame] = {} - with Session(bind=db_engine) as session: + db_methods = DbMethods(session) - db_methods = DbMethods(session) + properties_df = db_methods.get_properties(payload.portfolio_id) - properties_df = db_methods.get_properties(config.portfolio_id) + logger.info("Retrieved %s properties for export", len(properties_df)) - plans_df = db_methods.get_latest_plans( - portfolio_id=config.portfolio_id, - scenario_ids=config.scenario_ids, - default_only=config.default_plans_only, - ) + plans_df = db_methods.get_latest_plans( + portfolio_id=payload.portfolio_id, + scenario_ids=payload.scenario_ids, + default_only=payload.default_plans_only, + ) - if plans_df.empty: - return exported_files + logger.info("Retrieved %s plans for export", len(plans_df)) - recommendations_df = db_methods.get_recommendations( - plans_df["id"].tolist() - ) + if plans_df.empty: + return export_files - recommendations_df = db_methods.attach_materials(recommendations_df) + recommendations_df = db_methods.get_recommendations( + plans_df["id"].tolist() + ) - for scenario_id in config.scenario_ids: + recommendations_df = db_methods.attach_materials(recommendations_df) + if payload.default_plans_only: + group_keys = [None] # Single export, no scenario grouping + else: + group_keys = payload.scenario_ids + + for group_key in group_keys: + + if payload.default_plans_only: + scenario_recs = recommendations_df + export_label = "default_plans" + else: scenario_recs = recommendations_df[ - recommendations_df["scenario_id"] == scenario_id + recommendations_df["scenario_id"] == group_key ] + export_label = group_key - if scenario_recs.empty: - continue + if scenario_recs.empty: + continue - measures_df = scenario_recs[ - ["property_id", "measure_type", "estimated_cost"] - ].drop_duplicates() + measures_df: pd.DataFrame = scenario_recs[ + ["property_id", "measure_type", "estimated_cost"] + ].drop_duplicates() - pivot = measures_df.pivot( - index="property_id", - columns="measure_type", - values="estimated_cost", - ).reset_index() + pivot = measures_df.pivot( + index="property_id", + columns="measure_type", + values="estimated_cost", + ).reset_index() - pivot["total_retrofit_cost"] = ( - pivot.drop(columns=["property_id"]).sum(axis=1) - ) + pivot["total_retrofit_cost"] = ( + pivot.drop(columns=["property_id"]).sum(axis=1) + ) - post_sap = ( - scenario_recs.groupby("property_id")[["sap_points"]] - .sum() - .reset_index() - ) + post_sap = ( + scenario_recs.groupby("property_id")[["sap_points"]] + .sum() + .reset_index() + ) - df = ( - properties_df - .merge(pivot, how="left", on="property_id") - .merge(post_sap, how="left", on="property_id") - ) + df = ( + properties_df.rename(columns={"solar_pv": "existing_solar_pv"}) + .merge(pivot, how="left", on="property_id") + .merge(post_sap, how="left", on="property_id") + ) - df["sap_points"] = df["sap_points"].fillna(0) - df["predicted_post_works_sap"] = ( - df["current_sap_points"] + df["sap_points"] - ) - df["predicted_post_works_epc"] = df[ - "predicted_post_works_sap" - ].apply(sap_to_epc) + df["sap_points"] = df["sap_points"].fillna(0) + df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"] + df["predicted_post_works_epc"] = df[ + "predicted_post_works_sap" + ].apply(sap_to_epc) - filename = ( - f"/tmp/{config.scenario_names[scenario_id]} - " - f"{config.project_name}.xlsx" - ) + export_files[export_label] = df - with pd.ExcelWriter(filename) as writer: - df.to_excel(writer, sheet_name="properties", index=False) - - exported_files.append(filename) - - return exported_files + return export_files # ============================================================ @@ -106,22 +108,23 @@ def handler(event: dict, context: Optional[Any]) -> Mapping[str, int | str]: 3) scenario ids - list of scenario ids to export 4) default_plans_only - flag indicating if we should only consider default plans for export (optional, defaults to False) - :param event: - :param context: - :return: + + Exxample event: + body_dict = { + "task_id": "test", + "subtask_id": "test", + "portfolio_id": 569, + "scenario_ids": [], + "default_plans_only": True, + } + :param event: Lambda event containing export request details + :param context: Lambda context (not used in this handler but included for completeness) + :return: HTTP response indicating success or failure of the export operation """ for record in event.get("Records", []): try: body_dict = json.loads(record["body"]) - # body_dict = { - # "task_id": "test", - # "subtask_id": "test", - # "portfolio_id": 569, - # "scenario_ids": [], - # "default_plans_only": True, - # } - logger.debug("Validating request body") payload = ExportRequest.model_validate(body_dict) @@ -132,7 +135,8 @@ def handler(event: dict, context: Optional[Any]) -> Mapping[str, int | str]: ) logger.debug("Successfully validated request body") - process_export(payload) + with db_read_session() as session: + exported_files = process_export(payload, session) # TODO: Need to handle the exported files - e.g. upload to s3 and email a presigned url diff --git a/backend/export/tests/conftest.py b/backend/export/tests/conftest.py new file mode 100644 index 00000000..10bfa971 --- /dev/null +++ b/backend/export/tests/conftest.py @@ -0,0 +1,55 @@ +import pytest +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker +from backend.app.db.base import Base + + +@pytest.fixture(scope="function") +def engine(postgresql): + """ + Create a SQLAlchemy engine bound to the ephemeral + pytest-postgresql database. + """ + + # Build SQLAlchemy URL from psycopg connection info + connection_string = ( + f"postgresql+psycopg://" + f"{postgresql.info.user}:" + f"{postgresql.info.password}@" + f"{postgresql.info.host}:" + f"{postgresql.info.port}/" + f"{postgresql.info.dbname}" + ) + + engine = create_engine(connection_string) + + # Create tables once per test session + Base.metadata.create_all(engine) + + # Yeild will split this function into two phase. 1) setup and 2) teardown, the latter of which will run after all + # tests have completed + yield engine + + # Clean-up after entire test session + Base.metadata.drop_all(engine) + engine.dispose() + + +@pytest.fixture(scope="function") +def db_session(engine): + """ + Provides a clean transactional session per test. + + Rolls back after each test to keep isolation. + """ + + connection = engine.connect() + transaction = connection.begin() + + session = sessionmaker(bind=connection)() + + yield session + + session.close() + transaction.rollback() + connection.close() diff --git a/backend/export/tests/test_export.py b/backend/export/tests/test_export.py new file mode 100644 index 00000000..eb82333d --- /dev/null +++ b/backend/export/tests/test_export.py @@ -0,0 +1,274 @@ +import pandas as pd +import numpy as np +from pathlib import Path +import time + +from backend.export.property_scenarios.main import process_export +from backend.export.property_scenarios.input_schema import ExportRequest +from backend.app.db.models.portfolio import PropertyModel, Epc, Portfolio, PortfolioStatus, PortfolioGoal, \ + PropertyCreationStatus, PropertyDetailsEpcModel +from backend.app.db.models.recommendations import PlanModel, Recommendation, PlanRecommendations +from utils.logger import setup_logger + +FIXTURE_PATH = Path("backend/export/tests/fixtures") +logger = setup_logger() + + +def load_csv(name: str) -> pd.DataFrame: + df = pd.read_csv(FIXTURE_PATH / name) + df = df.replace({np.nan: None}) + return df + + +def test_default_export_integration(db_session): + # ---------------------------------------- + # 1) Load csvs + # ---------------------------------------- + t0 = time.perf_counter() + portfolio_df = load_csv("portfolio_569.csv") + properties_df = load_csv("properties_569.csv") + property_details_epc_df = load_csv("property_details_epc_569.csv") + plans_df = load_csv("plans_569.csv") + plan_recs_df = load_csv("plan_recs_569.csv") + recommendations_df = load_csv("recommendations_569.csv") + + # Shrink down recommendations_df to speed up the data load. For this test, we only need + # default recommendations so let's focus on those. We filter on where default is true + recommendations_df = recommendations_df[ + recommendations_df["default"] + ] + valid_rec_ids = recommendations_df["id"].unique() + + plan_recs_df = plan_recs_df[ + plan_recs_df["recommendation_id"].isin(valid_rec_ids) + ] + + logger.info( + "Loaded CSVs in %.2f seconds | properties=%s plans=%s recs=%s", + time.perf_counter() - t0, + len(properties_df), + len(plans_df), + len(recommendations_df), + ) + + logger.info("Starting database load") + db_load_t0 = time.perf_counter() + + # ---------------------------------------- + # 2) Insert test portfolio + # ---------------------------------------- + + portfolios = [] + for row in portfolio_df.itertuples(index=False): + portfolios.append( + Portfolio( + id=row.id, + name=row.name, + status=PortfolioStatus[row.status.split(".")[-1]], + goal=PortfolioGoal[row.goal.split(".")[-1]] if row.goal else None, + ) + ) + + db_session.bulk_save_objects(portfolios) + db_session.flush() + # ---------------------------------------- + # 3) Insert test property + # ---------------------------------------- + + properties = [] + + for row in properties_df.itertuples(index=False): + row_dict = row._asdict() + + row_dict["uprn"] = int(row_dict["uprn"]) if row_dict.get("uprn") else None + row_dict["building_reference_number"] = ( + int(row_dict["building_reference_number"]) + if row_dict.get("building_reference_number") + else None + ) + + prop = PropertyModel(**{ + col: row_dict[col] + for col in PropertyModel.__table__.columns.keys() + if col in row_dict + }) + + prop.creation_status = PropertyCreationStatus[ + row_dict["creation_status"].split(".")[-1] + ] + prop.status = PortfolioStatus[row_dict["status"].split(".")[-1]] + + if row_dict.get("current_epc_rating"): + prop.current_epc_rating = Epc[ + row_dict["current_epc_rating"].split(".")[-1] + ] + + properties.append(prop) + + db_session.bulk_save_objects(properties) + db_session.flush() + + # ---------------------------------------- + # 4) Insert property details - EPC + # ---------------------------------------- + + property_lookup = { + prop.uprn: prop + for prop in db_session.query(PropertyModel).all() + } + + epc_rows = [] + + for row in property_details_epc_df.itertuples(index=False): + row_dict = row._asdict() + + uprn = int(row_dict["uprn"]) if row_dict.get("uprn") else None + property_obj = property_lookup.get(uprn) + + if not property_obj: + continue # skip if property not found + + # Build only fields that exist on the model + epc_data = { + col.name: row_dict[col.name] + for col in PropertyDetailsEpcModel.__table__.columns + if col.name in row_dict and col.name not in ["id", "property_id", "portfolio_id"] + } + + epc = PropertyDetailsEpcModel( + property_id=property_obj.id, + portfolio_id=property_obj.portfolio_id, + **epc_data, + ) + + epc_rows.append(epc) + + db_session.bulk_save_objects(epc_rows) + db_session.flush() + + # ---------------------------------------- + # 4) Insert default plan + # ---------------------------------------- + + plans = [] + + for row in plans_df.itertuples(index=False): + row_dict = row._asdict() + + if row_dict.get("post_epc_rating"): + row_dict["post_epc_rating"] = Epc[ + row_dict["post_epc_rating"].split(".")[-1] + ] + + row_dict["scenario_id"] = None + + plan = PlanModel(**{ + col: row_dict[col] + for col in PlanModel.__table__.columns.keys() + if col in row_dict + }) + + plans.append(plan) + + db_session.bulk_save_objects(plans) + db_session.flush() + + # ---------------------------------------- + # 5) Insert recommendation + # ---------------------------------------- + + recs = [ + Recommendation(**{ + col: row[col] + for col in Recommendation.__table__.columns.keys() + if col in row + }) + for _, row in recommendations_df.iterrows() + ] + + db_session.bulk_save_objects(recs) + db_session.flush() + + # ---------------------------------------- + # 6) Insert PlanRecommendations + # ---------------------------------------- + links = [ + PlanRecommendations( + plan_id=row.plan_id, + recommendation_id=row.recommendation_id, + ) + for row in plan_recs_df.itertuples(index=False) + ] + + db_session.bulk_save_objects(links) + db_session.commit() + logger.info("Inserted all data in %.2f seconds", time.perf_counter() - db_load_t0) + + # ---------------------------------------- + # 6) Build payload + # ---------------------------------------- + + body_dict = { + "task_id": "test", + "subtask_id": "test", + "portfolio_id": 569, + "scenario_ids": [], + "default_plans_only": True, + } + + payload = ExportRequest.model_validate(body_dict) + + # ---------------------------------------- + # 7) Call process_export + # ---------------------------------------- + + logger.info( + "Recommendation count in DB: %s", + db_session.query(Recommendation).count() + ) + + logger.info( + "Default + not installed count: %s", + db_session.query(Recommendation) + .filter( + Recommendation.default.is_(True), + Recommendation.already_installed.is_(False) + ) + .count() + ) + + logger.info("Starting process_export") + process_t0 = time.perf_counter() + + result = process_export(payload, session=db_session) + + logger.info("process_export finished in %.2f seconds", time.perf_counter() - process_t0) + + # ---------------------------------------- + # 8) Assertions + # ---------------------------------------- + + assert "default_plans" in result + + df = result["default_plans"] + + assert not df.empty + + # This test was generated on a real portfolio and so we check the things we expect to do + + # 1) All packages are "compliant", where in this case, the properties should get to EPC C + + failed = df[df["predicted_post_works_sap"] < 69] + failed_property_types = failed["property_type"].value_counts().to_dict() + assert failed_property_types["Flat"] == 113 + assert failed_property_types["House"] == 8 + assert failed_property_types["Bungalow"] == 4 + assert failed_property_types["Maisonette"] == 1 + # Check the houses + + assert failed.shape[0] + + # Errors for me: + # - should get to EPC C: https://ara.domna.homes/portfolio/569/building-passport/661051/plans + # - Why doesn't this get to a C, under the plan?: + # https://ara.domna.homes/portfolio/569/building-passport/660447/plans/1603913 diff --git a/pytest.ini b/pytest.ini index 9c9f8234..7bef3884 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,6 @@ [pytest] pythonpath = . +log_cli = true +log_cli_level = INFO addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests diff --git a/test.requirements.txt b/test.requirements.txt index d31371a6..d8b8b777 100644 --- a/test.requirements.txt +++ b/test.requirements.txt @@ -2,4 +2,6 @@ pytest mock pytest-cov pytest-mock -dotenv \ No newline at end of file +dotenv +psycopg[binary] +pytest-postgresql \ No newline at end of file From 3d18827961468d18dad0ab7f595391e1ea198197 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 25 Feb 2026 13:36:02 +0000 Subject: [PATCH 04/19] added plan name to export --- backend/export/property_scenarios/db_functions.py | 2 ++ backend/export/property_scenarios/main.py | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/backend/export/property_scenarios/db_functions.py b/backend/export/property_scenarios/db_functions.py index 8b29ab0e..a27806c2 100644 --- a/backend/export/property_scenarios/db_functions.py +++ b/backend/export/property_scenarios/db_functions.py @@ -150,6 +150,7 @@ class DbMethods: self.session.query( Recommendation, PlanModel.scenario_id, + PlanModel.name ) .join( PlanRecommendations, @@ -171,6 +172,7 @@ class DbMethods: for col in Recommendation.__table__.columns }, "scenario_id": r.scenario_id, + "plan_name": r.name, } for r in recs_query ] diff --git a/backend/export/property_scenarios/main.py b/backend/export/property_scenarios/main.py index d2d89916..56886a8b 100644 --- a/backend/export/property_scenarios/main.py +++ b/backend/export/property_scenarios/main.py @@ -59,17 +59,17 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, continue measures_df: pd.DataFrame = scenario_recs[ - ["property_id", "measure_type", "estimated_cost"] + ["property_id", "measure_type", "plan_name", "estimated_cost"] ].drop_duplicates() pivot = measures_df.pivot( - index="property_id", + index=["property_id", "plan_name"], columns="measure_type", values="estimated_cost", ).reset_index() pivot["total_retrofit_cost"] = ( - pivot.drop(columns=["property_id"]).sum(axis=1) + pivot.drop(columns=["property_id", "plan_name"]).sum(axis=1) ) post_sap = ( From 43796d339ecab4bb7257236dba7117aea2e31579 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 25 Feb 2026 13:49:12 +0000 Subject: [PATCH 05/19] revert load dotenv path for sal --- asset_list/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asset_list/app.py b/asset_list/app.py index 0b792270..a97bb8e0 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -13,7 +13,7 @@ from asset_list.utils import get_data from dotenv import load_dotenv from backend.SearchEpc import SearchEpc -load_dotenv(dotenv_path="backend/.env") +load_dotenv(dotenv_path="../backend/.env") EPC_AUTH_TOKEN = os.getenv( "EPC_AUTH_TOKEN", ) From 042140afecd1cc739a27ec74e249dad7fe15498a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 25 Feb 2026 16:10:07 +0000 Subject: [PATCH 06/19] added export to pytest.ini --- pytest.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytest.ini b/pytest.ini index 7bef3884..608d5e0c 100644 --- a/pytest.ini +++ b/pytest.ini @@ -3,4 +3,4 @@ pythonpath = . log_cli = true log_cli_level = INFO addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial -testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests +testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests backend/export/tests From 2e5ae82d3b23eecda9ae07853d809041892ca5b4 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 25 Feb 2026 16:13:19 +0000 Subject: [PATCH 07/19] added additional testing packages to dev container --- .devcontainer/backend/requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.devcontainer/backend/requirements.txt b/.devcontainer/backend/requirements.txt index 9814c8d4..8fbb6120 100644 --- a/.devcontainer/backend/requirements.txt +++ b/.devcontainer/backend/requirements.txt @@ -18,5 +18,7 @@ sqlmodel pytest==9.0.2 pytest-cov==7.0.0 ipykernel>=6.25,<7 +dotenv +psycopg[binary] # Formatting black==26.1.0 \ No newline at end of file From bf865811c05a9bd26dc86b8cd8727cadf5a5d7ac Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 25 Feb 2026 16:20:04 +0000 Subject: [PATCH 08/19] added handler typing --- backend/export/property_scenarios/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/export/property_scenarios/main.py b/backend/export/property_scenarios/main.py index 56886a8b..eb97df2f 100644 --- a/backend/export/property_scenarios/main.py +++ b/backend/export/property_scenarios/main.py @@ -99,7 +99,7 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, # Lambda Handler # ============================================================ -def handler(event: dict, context: Optional[Any]) -> Mapping[str, int | str]: +def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, int | str]: """ Lambda event should have the following structure: 1) task id - unique identifier for the export task (optional, can be used for tracking/logging) From e645f90b0efda881201c79874e609945b4c3f374 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 25 Feb 2026 16:45:52 +0000 Subject: [PATCH 09/19] fixing stict typing issues --- .gitignore | 5 ++- .idea/watcherTasks.xml | 25 +++++++++++ .../export/property_scenarios/input_schema.py | 7 +++- backend/export/property_scenarios/main.py | 41 +++++++++---------- pyproject.toml | 3 ++ pyrightconfig.json | 8 ++++ 6 files changed, 65 insertions(+), 24 deletions(-) create mode 100644 .idea/watcherTasks.xml create mode 100644 pyproject.toml create mode 100644 pyrightconfig.json diff --git a/.gitignore b/.gitignore index 6268360b..68e66052 100644 --- a/.gitignore +++ b/.gitignore @@ -279,4 +279,7 @@ cache/ *.png *.pptx -local_data* \ No newline at end of file +local_data* + +# pyright local config +pyrightconfig.json \ No newline at end of file diff --git a/.idea/watcherTasks.xml b/.idea/watcherTasks.xml new file mode 100644 index 00000000..2a14ba99 --- /dev/null +++ b/.idea/watcherTasks.xml @@ -0,0 +1,25 @@ + + + + + + + + \ No newline at end of file diff --git a/backend/export/property_scenarios/input_schema.py b/backend/export/property_scenarios/input_schema.py index 4ef704a3..2d925fc0 100644 --- a/backend/export/property_scenarios/input_schema.py +++ b/backend/export/property_scenarios/input_schema.py @@ -1,5 +1,5 @@ from typing import Optional, Union, List -from pydantic import BaseModel, model_validator +from pydantic import BaseModel, model_validator, PrivateAttr class ExportRequest(BaseModel): @@ -15,7 +15,10 @@ class ExportRequest(BaseModel): # boolean which will overwrite the scenario ids. If this is true, we will only export the default plan for each # property and will ignore the scenario ids default_plans_only: Optional[bool] = False - + + # Private attribute to indicate whether scenario_ids should be ignored due to default_plans_only being True + _scenario_ids_ignored: bool = PrivateAttr(default=False) + @model_validator(mode="after") def validate_default_plan_override(self): """ diff --git a/backend/export/property_scenarios/main.py b/backend/export/property_scenarios/main.py index eb97df2f..50754f6f 100644 --- a/backend/export/property_scenarios/main.py +++ b/backend/export/property_scenarios/main.py @@ -1,5 +1,5 @@ import json -from typing import Optional, Any, Mapping, Dict, Union +from typing import Optional, Any, Mapping, Dict, Union, List import pandas as pd from sqlalchemy.orm import Session @@ -13,6 +13,12 @@ from utils.logger import setup_logger logger = setup_logger() +def choose_group_keys(payload: ExportRequest) -> List[int]: + if payload.default_plans_only: + return [] # Single export, no scenario grouping + return payload.scenario_ids or [] + + def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, int], pd.DataFrame]: export_files: Dict[Union[str, int], pd.DataFrame] = {} @@ -22,33 +28,28 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, logger.info("Retrieved %s properties for export", len(properties_df)) - plans_df = db_methods.get_latest_plans( + plans_df: pd.DataFrame = db_methods.get_latest_plans( portfolio_id=payload.portfolio_id, scenario_ids=payload.scenario_ids, - default_only=payload.default_plans_only, + default_only=bool(payload.default_plans_only), ) logger.info("Retrieved %s plans for export", len(plans_df)) if plans_df.empty: return export_files - - recommendations_df = db_methods.get_recommendations( - plans_df["id"].tolist() - ) + plan_ids: List[int] = plans_df["id"].tolist() + recommendations_df: pd.DataFrame = db_methods.get_recommendations(plan_ids) recommendations_df = db_methods.attach_materials(recommendations_df) - if payload.default_plans_only: - group_keys = [None] # Single export, no scenario grouping - else: - group_keys = payload.scenario_ids + group_keys: List[Union[str, int]] = choose_group_keys(payload) for group_key in group_keys: if payload.default_plans_only: scenario_recs = recommendations_df - export_label = "default_plans" + export_label: Union[str, int] = "default_plans" else: scenario_recs = recommendations_df[ recommendations_df["scenario_id"] == group_key @@ -62,7 +63,7 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, ["property_id", "measure_type", "plan_name", "estimated_cost"] ].drop_duplicates() - pivot = measures_df.pivot( + pivot: pd.DataFrame = measures_df.pivot( index=["property_id", "plan_name"], columns="measure_type", values="estimated_cost", @@ -72,13 +73,13 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, pivot.drop(columns=["property_id", "plan_name"]).sum(axis=1) ) - post_sap = ( + post_sap: pd.DataFrame = ( scenario_recs.groupby("property_id")[["sap_points"]] .sum() .reset_index() ) - df = ( + df: pd.DataFrame = ( properties_df.rename(columns={"solar_pv": "existing_solar_pv"}) .merge(pivot, how="left", on="property_id") .merge(post_sap, how="left", on="property_id") @@ -86,9 +87,7 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, df["sap_points"] = df["sap_points"].fillna(0) df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"] - df["predicted_post_works_epc"] = df[ - "predicted_post_works_sap" - ].apply(sap_to_epc) + df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(sap_to_epc) export_files[export_label] = df @@ -99,7 +98,7 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, # Lambda Handler # ============================================================ -def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, int | str]: +def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, Union[int, str]]: """ Lambda event should have the following structure: 1) task id - unique identifier for the export task (optional, can be used for tracking/logging) @@ -128,7 +127,7 @@ def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, in logger.debug("Validating request body") payload = ExportRequest.model_validate(body_dict) - if payload._scenario_ids_ignored: + if getattr(payload, "_scenario_ids_ignored", False): logger.warning( "Received scenario_ids in request body but they will be ignored " "because default_plans_only is set to True" @@ -139,7 +138,7 @@ def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, in exported_files = process_export(payload, session) # TODO: Need to handle the exported files - e.g. upload to s3 and email a presigned url - + _ = exported_files return { "statusCode": 200, "body": json.dumps({}), diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..72ec3f0c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[tool.pyright] +reportUnknownMemberType = false +reportUnknownVariableType = false \ No newline at end of file diff --git a/pyrightconfig.json b/pyrightconfig.json new file mode 100644 index 00000000..d4e0e2a4 --- /dev/null +++ b/pyrightconfig.json @@ -0,0 +1,8 @@ +{ + "typeCheckingMode": "strict", + "venvPath": "/Users/khalimconn-kowlessar/opt/anaconda3/envs/", + "venv": "Fastapi-backend", + "include": [ + "." + ] +} \ No newline at end of file From f13bffec7c857f49e0c14b9eb5e59e90affdc48b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 25 Feb 2026 16:51:26 +0000 Subject: [PATCH 10/19] implementing scenario_ids_ignored feedback --- backend/export/property_scenarios/input_schema.py | 4 ++++ backend/export/property_scenarios/main.py | 12 ++---------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/backend/export/property_scenarios/input_schema.py b/backend/export/property_scenarios/input_schema.py index 2d925fc0..f6fa5965 100644 --- a/backend/export/property_scenarios/input_schema.py +++ b/backend/export/property_scenarios/input_schema.py @@ -34,3 +34,7 @@ class ExportRequest(BaseModel): object.__setattr__(self, "_scenario_ids_ignored", False) return self + + @property + def scenario_ids_ignored(self) -> bool: + return self._scenario_ids_ignored diff --git a/backend/export/property_scenarios/main.py b/backend/export/property_scenarios/main.py index 50754f6f..50fd808b 100644 --- a/backend/export/property_scenarios/main.py +++ b/backend/export/property_scenarios/main.py @@ -100,15 +100,7 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, Union[int, str]]: """ - Lambda event should have the following structure: - 1) task id - unique identifier for the export task (optional, can be used for tracking/logging) - 2) subtask id - unique identifier for the specific export operation (optional, can be used for tracking/logging) - 2) portfolio id - id of the portfolio to export - 3) scenario ids - list of scenario ids to export - 4) default_plans_only - flag indicating if we should only consider default plans for export (optional, - defaults to False) - - Exxample event: + Example event: body_dict = { "task_id": "test", "subtask_id": "test", @@ -127,7 +119,7 @@ def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, Un logger.debug("Validating request body") payload = ExportRequest.model_validate(body_dict) - if getattr(payload, "_scenario_ids_ignored", False): + if payload.scenario_ids_ignored: logger.warning( "Received scenario_ids in request body but they will be ignored " "because default_plans_only is set to True" From 1717e7b4c2308c947c1728b1fc647ab5659fcfe7 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 25 Feb 2026 17:30:39 +0000 Subject: [PATCH 11/19] fixing typing issues --- backend/app/db/base.py | 6 +- backend/app/db/models/recommendations.py | 46 +++++++-- .../export/property_scenarios/db_functions.py | 95 ++++++++++--------- 3 files changed, 93 insertions(+), 54 deletions(-) diff --git a/backend/app/db/base.py b/backend/app/db/base.py index 59be7030..fa2b68a5 100644 --- a/backend/app/db/base.py +++ b/backend/app/db/base.py @@ -1,3 +1,5 @@ -from sqlalchemy.orm import declarative_base +from sqlalchemy.orm import DeclarativeBase -Base = declarative_base() + +class Base(DeclarativeBase): + pass diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py index 9352eeb2..27d03303 100644 --- a/backend/app/db/models/recommendations.py +++ b/backend/app/db/models/recommendations.py @@ -54,19 +54,47 @@ class Recommendation(Base): class RecommendationMaterials(Base): __tablename__ = "recommendation_materials" - id = Column(BigInteger, primary_key=True, autoincrement=True) - recommendation_id = Column( - BigInteger, ForeignKey("recommendation.id"), nullable=False + id: Mapped[int] = mapped_column( + BigInteger, primary_key=True, autoincrement=True ) - material_id = Column(BigInteger, ForeignKey(Material.id), nullable=False) - created_at = Column(TIMESTAMP, nullable=False, server_default=func.now()) - depth = Column(Float, nullable=False) - quantity = Column(Float, nullable=False) - quantity_unit = Column( + + recommendation_id: Mapped[int] = mapped_column( + BigInteger, + ForeignKey("recommendation.id"), + nullable=False, + ) + + material_id: Mapped[int] = mapped_column( + BigInteger, + ForeignKey(Material.id), + nullable=False, + ) + + created_at: Mapped[datetime] = mapped_column( + TIMESTAMP, + nullable=False, + server_default=func.now(), + ) + + depth: Mapped[float] = mapped_column( + Float, + nullable=False, + ) + + quantity: Mapped[float] = mapped_column( + Float, + nullable=False, + ) + + quantity_unit: Mapped[QuantityUnits] = mapped_column( Enum(QuantityUnits, values_callable=lambda x: [e.value for e in x]), nullable=False, ) - estimated_cost = Column(Float, nullable=False) + + estimated_cost: Mapped[float] = mapped_column( + Float, + nullable=False, + ) class PlanTypeEnum(enum.Enum): # TODO: move this to domain? diff --git a/backend/export/property_scenarios/db_functions.py b/backend/export/property_scenarios/db_functions.py index a27806c2..1527a989 100644 --- a/backend/export/property_scenarios/db_functions.py +++ b/backend/export/property_scenarios/db_functions.py @@ -1,6 +1,8 @@ -from typing import List, Any, Dict, Optional +from typing import List, Any, Dict, Optional, Tuple, Sequence import pandas as pd +from sqlalchemy import select from sqlalchemy.orm import Session +from sqlalchemy.engine import Row from collections import defaultdict from backend.app.db.models.recommendations import ( @@ -20,7 +22,7 @@ logger = setup_logger() class DbMethods: - def __init__(self, session: Session): + def __init__(self, session: Session) -> None: self.session = session def get_properties(self, portfolio_id: int) -> pd.DataFrame: @@ -29,28 +31,31 @@ class DbMethods: :param portfolio_id: :return: """ - query = ( - self.session.query(PropertyModel, PropertyDetailsEpcModel) + stmt = ( + select(PropertyModel, PropertyDetailsEpcModel) .join( PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id, ) - .filter(PropertyModel.portfolio_id == portfolio_id) - .all() + .where(PropertyModel.portfolio_id == portfolio_id) ) - data = [ + rows: Sequence[Row[Tuple[PropertyModel, PropertyDetailsEpcModel]]] = ( + self.session.execute(stmt).all() + ) + + data: List[Dict[str, Any]] = [ { **{ - col.name: getattr(row.PropertyModel, col.name) - for col in PropertyModel.__table__.columns + col.name: getattr(property_model, col.name) + for col in PropertyModel.__table__.columns.values() }, **{ - col.name: getattr(row.PropertyDetailsEpcModel, col.name) - for col in PropertyDetailsEpcModel.__table__.columns + col.name: getattr(epc_model, col.name) + for col in PropertyDetailsEpcModel.__table__.columns.values() }, } - for row in query + for property_model, epc_model in rows ] return pd.DataFrame(data) @@ -92,11 +97,11 @@ class DbMethods: # DISTINCT ON (property_id) keeps the first row per property, # ordered by created_at DESC so we get the newest one. - plans_query = ( - self.session.query(PlanModel) - .filter( + stmt = ( + select(PlanModel) + .where( PlanModel.portfolio_id == portfolio_id, - PlanModel.is_default.is_(True) + PlanModel.is_default.is_(True), ) .distinct(PlanModel.property_id) .order_by( @@ -110,11 +115,13 @@ class DbMethods: # DISTINCT ON (scenario_id, property_id) keeps the newest # plan per scenario/property combination. - plans_query = ( - self.session.query(PlanModel) - .filter( + assert scenario_ids is not None + + stmt = ( + select(PlanModel) + .where( PlanModel.portfolio_id == portfolio_id, - PlanModel.scenario_id.in_(scenario_ids) + PlanModel.scenario_id.in_(scenario_ids), ) .distinct( PlanModel.scenario_id, @@ -128,13 +135,14 @@ class DbMethods: ) logger.info("Fetching plans") - plans = plans_query.all() + + plans: Sequence[PlanModel] = self.session.scalars(stmt).all() return pd.DataFrame( [ { col.name: getattr(plan, col.name) - for col in PlanModel.__table__.columns + for col in PlanModel.__table__.columns.values() } for plan in plans ] @@ -146,35 +154,34 @@ class DbMethods: logger.info("No plan ids provided") return pd.DataFrame() - recs_query = ( - self.session.query( - Recommendation, - PlanModel.scenario_id, - PlanModel.name - ) + stmt = ( + select(Recommendation, PlanModel.scenario_id, PlanModel.name) .join( PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id, ) .join(PlanModel, PlanModel.id == PlanRecommendations.plan_id) - .filter( + .where( PlanRecommendations.plan_id.in_(plan_ids), Recommendation.default.is_(True), Recommendation.already_installed.is_(False), ) - .all() ) - data = [ + rows: Sequence[Tuple[Recommendation, Optional[int], Optional[str]]] = ( + self.session.execute(stmt).tuples().all() + ) + + data: List[Dict[str, Any]] = [ { **{ - col.name: getattr(r.Recommendation, col.name) - for col in Recommendation.__table__.columns + col.name: getattr(rec_model, col.name) + for col in Recommendation.__table__.columns.values() }, - "scenario_id": r.scenario_id, - "plan_name": r.name, + "scenario_id": scenario_id, + "plan_name": plan_name, } - for r in recs_query + for rec_model, scenario_id, plan_name in rows ] return pd.DataFrame(data) @@ -185,12 +192,14 @@ class DbMethods: recommendations_df["materials"] = [] return recommendations_df - rec_ids = recommendations_df["id"].tolist() + rec_ids: List[int] = [int(x) for x in recommendations_df["id"].tolist()] - materials_query = ( - self.session.query(RecommendationMaterials) - .filter(RecommendationMaterials.recommendation_id.in_(rec_ids)) - .all() + stmt = select(RecommendationMaterials).where( + RecommendationMaterials.recommendation_id.in_(rec_ids) + ) + + materials_query: Sequence[RecommendationMaterials] = ( + self.session.scalars(stmt).all() ) materials_map: Dict[int, List[Dict[str, Any]]] = defaultdict(list) @@ -206,8 +215,8 @@ class DbMethods: } ) - recommendations_df["materials"] = recommendations_df["id"].apply( - lambda x: materials_map.get(x, []) + recommendations_df["materials"] = recommendations_df["id"].astype(int).apply( + lambda x: materials_map.get(int(x), []) ) return recommendations_df From 1df4fb781547caf89c2c1cd581d7044adcb2128c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 26 Feb 2026 16:53:23 +0000 Subject: [PATCH 12/19] working on export issues --- .idea/watcherTasks.xml | 2 +- backend/export/property_scenarios/main.py | 1 + backend/export/tests/test_export.py | 42 +++++++++++++++++------ sfr/principal_pitch/2_export_data.py | 15 ++++---- 4 files changed, 42 insertions(+), 18 deletions(-) diff --git a/.idea/watcherTasks.xml b/.idea/watcherTasks.xml index 2a14ba99..60d7e26a 100644 --- a/.idea/watcherTasks.xml +++ b/.idea/watcherTasks.xml @@ -1,7 +1,7 @@ - +