diff --git a/backend/postcode_splitter/hackney.xlsx b/backend/postcode_splitter/hackney.xlsx deleted file mode 100644 index 64892f3a..00000000 Binary files a/backend/postcode_splitter/hackney.xlsx and /dev/null differ diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index d417c8f1..d55f618a 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -1,10 +1,12 @@ import pandas as pd import requests -from backend.address2UPRN.main import resolve_uprns_for_postcode_group, get_epc_data_with_postcode +from backend.address2UPRN.main import ( + resolve_uprns_for_postcode_group, + get_epc_data_with_postcode, +) from tqdm import tqdm - def sanitise_postcode(postcode: str) -> str | None: """ Normalise postcode for grouping. @@ -51,11 +53,7 @@ def main(): # --- validate AFTER grouping (save API calls) --- # Get unique, non-null postcodes - unique_postcodes = ( - df["postcode_clean"] - .dropna() - .unique() - ) + unique_postcodes = df["postcode_clean"].dropna().unique() # Validate each postcode once, TODOadd a progress bar postcode_validity = { @@ -66,7 +64,6 @@ def main(): # Map validity back onto dataframe df["postcode_valid"] = df["postcode_clean"].map(postcode_validity) - results = [] for postcode, group_df in tqdm( @@ -98,17 +95,33 @@ def main(): results.append(tmp) final_df = pd.concat(results, ignore_index=True) - a = final_df[[ - "best_match_lexiscore","Address 1", - "best_match_address", "Postcode", - "UPRN", "best_match_uprn" - ]] # add levi score to viewing - b = final_df[final_df["best_match_lexiscore"]>0] # add levi score to viewing - b = b[[ - "best_match_lexiscore","Address 1", - "best_match_address", "Postcode", - "UPRN", "best_match_uprn" - ]] + a = final_df[ + [ + "best_match_lexiscore", + "Address 1", + "best_match_address", + "Postcode", + "UPRN", + "best_match_uprn", + ] + ] # add levi score to viewing + b = final_df[final_df["best_match_lexiscore"] > 0] # add levi score to viewing + b = b[ + [ + "best_match_lexiscore", + "Address 1", + "best_match_address", + "Postcode", + "UPRN", + "best_match_uprn", + ] + ] + + +def handler(event, context): + print("hello Postcode splitter world") + return {"statusCode": 200, "body": "hello world"} + if __name__ == "__main__": main() diff --git a/infrastructure/terraform/shared/main.tf b/infrastructure/terraform/shared/main.tf index 3ba78ef3..fa1007f2 100644 --- a/infrastructure/terraform/shared/main.tf +++ b/infrastructure/terraform/shared/main.tf @@ -311,4 +311,29 @@ module "address2uprn_registry" { output "address2uprn_repository_url" { value = module.address2uprn_registry.repository_url +} + + +################################################ +# Postcode Splitter – Lambda ECR +################################################ +module "postcodeSplitter_state_bucket" { + source = "../modules/tf_state_bucket" + bucket_name = "postcodeSplitter-terraform-state" + +} + +output "postcodeSplitter_state_bucket_name" { + value = module.postcodeSplitter_state_bucket.bucket_name +} + +module "postcodeSplitter_registry" { + source = "../modules/container_registry" + name = "postcodeSplitter" + stage = var.stage + +} + +output "postcodeSplitter_repository_url" { + value = module.postcodeSplitter_registry.repository_url } \ No newline at end of file