diff --git a/.github/workflows/_deploy_lambda.yml b/.github/workflows/_deploy_lambda.yml index 1ab50e8d..e0da2f2b 100644 --- a/.github/workflows/_deploy_lambda.yml +++ b/.github/workflows/_deploy_lambda.yml @@ -99,11 +99,11 @@ jobs: -out=lambdaplan - name: Terraform Apply - if: (inputs.terraform_apply == 'true' || github.ref == 'refs/heads/dev' || github.ref == 'refs/heads/main') && inputs.terraform_destroy != 'true' + if: inputs.terraform_apply == 'true' && inputs.terraform_destroy != 'true' working-directory: ${{ inputs.lambda_path }} run: terraform apply -auto-approve lambdaplan - name: Terraform Destroy - if: inputs.terraform_destroy == 'true' + if: inputs.terraform_destroy == 'true' && inputs.terraform_apply != 'true' working-directory: ${{ inputs.lambda_path }} run: terraform destroy -auto-approve diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 397eb6ee..1cdaaf79 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -16,6 +16,13 @@ jobs: outputs: stage: ${{ steps.set-stage.outputs.stage }} + terraform_apply: ${{ steps.set-stage.outputs.terraform_apply }} + + env: + AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }} env: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} @@ -28,17 +35,18 @@ jobs: id: set-stage shell: bash run: | - echo "AWS_ACCESS_KEY_ID is set? ${AWS_ACCESS_KEY_ID:+yes}" - echo "AWS_SECRET_ACCESS_KEY is set? ${AWS_SECRET_ACCESS_KEY:+yes}" - echo "AWS_REGION=$AWS_REGION" - echo "DEV_DB_HOST=$DEV_DB_HOST" - BRANCH="${GITHUB_REF_NAME}" if [[ "$BRANCH" == "prod" ]]; then echo "stage=prod" >> "$GITHUB_OUTPUT" - else + echo "terraform_apply=false" >> "$GITHUB_OUTPUT" + elif [[ "$BRANCH" == "dev" ]]; then echo "stage=dev" >> "$GITHUB_OUTPUT" + echo "terraform_apply=true" >> "$GITHUB_OUTPUT" + else + # Feature branch + echo "stage=dev" >> "$GITHUB_OUTPUT" + echo "terraform_apply=false" >> "$GITHUB_OUTPUT" fi # ============================================================ @@ -105,6 +113,7 @@ jobs: stage: ${{ needs.determine_stage.outputs.stage }} ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }} image_digest: ${{ needs.address2uprn_image.outputs.image_digest }} + terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} secrets: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} @@ -145,8 +154,7 @@ jobs: stage: ${{ needs.determine_stage.outputs.stage }} ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }} image_digest: ${{ needs.postcodeSplitter_image.outputs.image_digest }} - # This should not be deployed in production!!!! - terraform_apply: 'true' + terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} secrets: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} @@ -186,9 +194,9 @@ jobs: stage: ${{ needs.determine_stage.outputs.stage }} ecr_repo: condition-etl-${{ needs.determine_stage.outputs.stage }} image_digest: ${{ needs.condition_etl_image.outputs.image_digest }} + # terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} terraform_destroy: 'true' secrets: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} - AWS_REGION: ${{ secrets.DEV_AWS_REGION }} - + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} \ No newline at end of file diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index d3a92463..cc6431b8 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -27,4 +27,4 @@ jobs: env: EPC_AUTH_TOKEN: ${{ secrets.DEV_EPC_AUTH_TOKEN }} run: | - make test + make test \ No newline at end of file diff --git a/asset_list/app.py b/asset_list/app.py index da4eb6bb..43c653a7 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -69,6 +69,7 @@ def app(): Property UPRN """ +<<<<<<< HEAD data_folder = "/workspaces/model/asset_list/" data_filename = "manchester.xlsx" sheet_name = "PW0099 - Property List" @@ -77,6 +78,53 @@ def app(): address1_method = None fulladdress_column = None address_cols_to_concat = ["address"] +======= + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Aspire" + data_filename = "ASPIRE ASSET LIST.xlsx" + sheet_name = "Asset List" + postcode_column = "Postcode" + address1_column = None + address1_method = "house_number_extraction" + fulladdress_column = "Address" + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = None + landlord_os_uprn = None + landlord_property_type = "Property Type" + landlord_built_form = None + landlord_wall_construction = None + landlord_roof_construction = None + landlord_heating_system = None + landlord_existing_pv = None + landlord_property_id = "LLUPRN" + landlord_sap = None + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None + master_filepaths = [] + master_id_colnames = [] + master_to_asset_list_filepath = None + phase = False + ecosurv_landlords = None + asset_list_header = 0 + landlord_block_reference = None + + # Peabody data for cleaning + data_folder = ( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " + "Project/data_validation" + ) + data_filename = "to_standardise_uprns.xlsx" + sheet_name = "Sheet1" + postcode_column = "Postcode" + address1_column = None + address1_method = "house_number_extraction" + fulladdress_column = "Address" + address_cols_to_concat = None +>>>>>>> d4064da36565f87c2b72d10e9f3604cc6c37bdb6 missing_postcodes_method = None landlord_year_built = None landlord_os_uprn = None @@ -238,7 +286,7 @@ def app(): if skip is not None and not force_retrieve_data: if i <= skip: continue - chunk = asset_list.standardised_asset_list[i : i + chunk_size] + chunk = asset_list.standardised_asset_list[i: i + chunk_size] epc_data_chunk, errors_chunk, no_epc_chunk = get_data( df=chunk, row_id_name=asset_list.DOMNA_PROPERTY_ID, @@ -381,7 +429,7 @@ def app(): # Retrieve just the data we need epc_df = epc_df[ [asset_list.DOMNA_PROPERTY_ID] + list(asset_list.EPC_API_DATA_NAMES.keys()) - ].rename(columns=asset_list.EPC_API_DATA_NAMES) + ].rename(columns=asset_list.EPC_API_DATA_NAMES) # Look for columns not in the find my EPC data, which will have happened if we didn't # retrieve it in the first place @@ -398,7 +446,7 @@ def app(): find_my_epc_data[ [asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"] + list(asset_list.FIND_EPC_DATA_NAMES.keys()) - ].rename(columns=asset_list.FIND_EPC_DATA_NAMES), + ].rename(columns=asset_list.FIND_EPC_DATA_NAMES), how="left", on=asset_list.DOMNA_PROPERTY_ID, ) diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py index 6f808c9a..177a7549 100644 --- a/asset_list/mappings/property_type.py +++ b/asset_list/mappings/property_type.py @@ -439,6 +439,11 @@ PROPERTY_MAPPING = { 'House: Semi Detached: Top Floor': 'house', 'House: End Terrace: Ground Floor': 'house', 'Maisonette: Enclosed End Terrace: Mid Floor': 'maisonette', - 'Flat: Enclosed Mid Terrace: Basement': 'flat' + 'Flat: Enclosed Mid Terrace: Basement': 'flat', + + 'Warden Bungalow': 'bungalow', + 'Warden Flat': 'flat', + 'Upper Floor Flat': 'flat', + 'Extracare Scheme': 'other' } diff --git a/backend/.env.local b/backend/.env.test similarity index 100% rename from backend/.env.local rename to backend/.env.test diff --git a/backend/Property.py b/backend/Property.py index 14f7e03f..6a84fc09 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -1256,7 +1256,8 @@ class Property: "biodiesel": "Smokeless Fuel", "b30d": "B30K Biofuel", "coal": "Coal", - "oil": "Oil" + "oil": "Oil", + "unknown": None # Handle - anything post 2020 is electricity else gas } self.heating_energy_source = list({ @@ -1326,7 +1327,16 @@ class Property: if self.heating_energy_source == "Varied (Community Scheme)": if self.main_fuel["fuel_type"] in fuel_map: # We assume when None as it's unknown - self.heating_energy_source = fuel_map[self.main_fuel["fuel_type"]] + mapped_to = fuel_map[self.main_fuel["fuel_type"]] + if mapped_to is None and self.main_fuel["fuel_type"] == "unknown": + # Handle logic based on age band + if self.year_built >= 2020: + self.heating_energy_source = "Electricity" + else: + self.heating_energy_source = "Natural Gas (Community Scheme)" + + else: + self.heating_energy_source = mapped_to else: raise NotImplementedError(f"Unhandled fuel {self.main_fuel['fuel_type']}") diff --git a/backend/app/config.py b/backend/app/config.py index b335c215..41552ae5 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -1,8 +1,22 @@ +import os from functools import lru_cache -from pydantic_settings import BaseSettings +from pydantic_settings import BaseSettings, SettingsConfigDict from typing import Optional +def resolve_env_file() -> Optional[str]: + env = os.getenv("ENVIRONMENT", "local") + + if env == "local": + return "backend/.env" + + if env == "test": + return "backend/.env.test" + + # prod = no env file + return None + + class Settings(BaseSettings): API_KEY: str API_KEY_NAME: str = "X-API-KEY" @@ -41,8 +55,10 @@ class Settings(BaseSettings): AWS_SECRET_KEY_ID: Optional[str] = None AWS_DEFAULT_REGION: Optional[str] = None - class Config: - env_file = "backend/.env.local" + model_config = SettingsConfigDict( + env_file=resolve_env_file(), + env_file_encoding="utf-8", + ) @lru_cache() diff --git a/backend/app/plan/utils.py b/backend/app/plan/utils.py index 33f391d4..10d7fb06 100644 --- a/backend/app/plan/utils.py +++ b/backend/app/plan/utils.py @@ -24,7 +24,7 @@ def get_cleaned(): cleaned = read_from_s3( s3_file_name="cleaned_epc_data/cleaned.bson", - bucket_name="retrofit-data-{environment}".format(environment=get_settings().ENVIRONMENT) + bucket_name=get_settings().DATA_BUCKET ) cleaned = msgpack.unpackb(cleaned, raw=False) diff --git a/backend/condition/handler/Dockerfile b/backend/condition/handler/Dockerfile index 031d981e..71556895 100644 --- a/backend/condition/handler/Dockerfile +++ b/backend/condition/handler/Dockerfile @@ -15,7 +15,7 @@ ENV DB_HOST=${DEV_DB_HOST} ENV DB_PORT=${DEV_DB_PORT} ENV DB_NAME=${DEV_DB_NAME} -COPY backend/.env.local backend/.env.local +COPY backend/.env.test backend/.env # ----------------------------- # Copy requirements FIRST (for Docker layer caching) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index e833eb89..69726604 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -978,13 +978,15 @@ async def model_engine(body: PlanTriggerRequest): recommendations_scoring_data.extend(p.recommendations_scoring_data) logger.info("Preparing data for scoring in sap change api") - recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data).drop( - columns=[ - "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", - "carbon_ending" - ] - ) - # Temp putting this here + recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data) + if not recommendations_scoring_data.empty: + recommendations_scoring_data = recommendations_scoring_data.drop( + columns=[ + "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", + "carbon_ending" + ] + ) + # TODO: Temp putting this here recommendations_scoring_data["is_post_sap10_ending"] = True all_predictions = await model_api.async_paginated_predictions( diff --git a/backend/ml_models/api.py b/backend/ml_models/api.py index daf4b715..440367b2 100644 --- a/backend/ml_models/api.py +++ b/backend/ml_models/api.py @@ -313,4 +313,15 @@ class ModelApi: logger.error(f"Batch {chunk}-{chunk + batch_size} failed (Attempt {attempts}): {e}") await asyncio.sleep(2 ** attempts) # exponential backoff await self.close_aiohttp_session() + + # Ensure stable output structure for the datagrame to be utilised by other functions downstream + for k in all_predictions.keys(): + if all_predictions[k].empty: + col_template = ['id', 'predictions', 'property_id', 'recommendation_id', 'phase'] if ( + extract_ids) else ['id', 'predictions'] + + all_predictions[k] = pd.DataFrame( + columns=col_template + ) + return all_predictions diff --git a/backend/onboarders/base.py b/backend/onboarders/base.py index 93a0b7b0..03cb2370 100644 --- a/backend/onboarders/base.py +++ b/backend/onboarders/base.py @@ -53,7 +53,7 @@ class OnboarderBase: ) else: self.data = read_from_s3(bucket_name=self.bucket_name, s3_file_name=self.input_file_name) - + def write(self): if self.data is None: raise ValueError("No data to write. Please run transform() before writing.") diff --git a/etl/bill_savings/KwhData.py b/etl/bill_savings/KwhData.py index 3291e909..b4bb979d 100644 --- a/etl/bill_savings/KwhData.py +++ b/etl/bill_savings/KwhData.py @@ -196,6 +196,10 @@ class KwhData: if save and self.bucket is None: raise Exception("bucket not set, cannot save data") + if data.empty: + # If we have no data + return data + # TODO: New is a temporary parameter, which will transform the epc descriptions to their transformed features # in anticipation of the new model diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index c6fea3b6..e470c1a3 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -1090,6 +1090,7 @@ class Recommendations: ashp_cop = ashp_cop if ashp_cop else assumptions.AVERAGE_ASHP_EFFICIENCY + # kwh_impact_table = kwh_simulation_predictions["heating_kwh_predictions"][ kwh_simulation_predictions["heating_kwh_predictions"]["property_id"] == str(property_instance.id) ].merge( diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py index 71e47ba6..f88a672b 100644 --- a/recommendations/RoofRecommendations.py +++ b/recommendations/RoofRecommendations.py @@ -331,18 +331,18 @@ class RoofRecommendations: """ # Can a non-primary part satisfy loft insulation? - primary_needs_loft = component_needs[1]["needs_loft_insulation"] + primary_needs_loft = component_needs[0]["needs_loft_insulation"] secondary_needs_loft = any( - p['needs_loft_insulation'] for idx, p in component_needs.items() if idx != 1 + p['needs_loft_insulation'] for idx, p in component_needs.items() if idx != 0 ) if primary_needs_loft and not secondary_needs_loft: # Only option is loft return "loft" - primary_needs_sloping = component_needs[1]["needs_sloping_ceiling"] + primary_needs_sloping = component_needs[0]["needs_sloping_ceiling"] secondary_needs_sloping = any( - p['needs_sloping_ceiling'] for idx, p in component_needs.items() if idx != 1 + p['needs_sloping_ceiling'] for idx, p in component_needs.items() if idx != 0 ) if primary_needs_sloping and not secondary_needs_sloping: @@ -418,11 +418,13 @@ class RoofRecommendations: return needs_sloping, not needs_loft # Indicates that the property needs sloping ceiling as we only run # this in that case + roof_components = [x for x in find_my_epc_components if x["component_name"] == "Roof"] + extracted_roof_descriptions = { idx: { "description": component["description"], **RoofAttributes(component["description"]).process() - } for idx, component in enumerate(find_my_epc_components) if component["component_name"] == "Roof" + } for idx, component in enumerate(roof_components) } component_needs = {} diff --git a/utils/s3.py b/utils/s3.py index e70669d0..2e67d4f0 100644 --- a/utils/s3.py +++ b/utils/s3.py @@ -264,6 +264,7 @@ def save_excel_to_s3(df, bucket_name, file_key): def read_csv_from_s3(bucket_name, filepath): + logger.info(f"Reading CSV file from S3 bucket '{bucket_name}' with key '{filepath}'") s3 = boto3.client('s3') # Get the object from s3