import pandas as pd import requests from backend.address2UPRN.main import ( resolve_uprns_for_postcode_group, get_epc_data_with_postcode, ) from tqdm import tqdm def sanitise_postcode(postcode: str) -> str | None: """ Normalise postcode for grouping. - Uppercase - Remove all whitespace """ if pd.isna(postcode): return None return postcode.upper().replace(" ", "") def is_valid_postcode(postcode_clean: str) -> bool: """ Validate postcode using postcodes.io. Expects a sanitised postcode (e.g. E84SQ). Returns True if valid, False otherwise. """ POSTCODES_IO_VALIDATE_URL = "https://api.postcodes.io/postcodes/{postcode}/validate" if not postcode_clean: return False try: resp = requests.get( POSTCODES_IO_VALIDATE_URL.format(postcode=postcode_clean), timeout=5, ) resp.raise_for_status() return resp.json().get("result", False) except requests.RequestException: # Network issues, rate limits, etc. return False def main(): df = pd.read_excel("hackney.xlsx", sheet_name="Sustainability") df = df.head(500) # Sanitise postcodes df["postcode_clean"] = df["Postcode"].apply(sanitise_postcode) # --- validate AFTER grouping (save API calls) --- # Get unique, non-null postcodes unique_postcodes = df["postcode_clean"].dropna().unique() # Validate each postcode once, TODOadd a progress bar postcode_validity = { pc: is_valid_postcode(pc) for pc in tqdm(unique_postcodes, total=len(unique_postcodes)) } # Map validity back onto dataframe df["postcode_valid"] = df["postcode_clean"].map(postcode_validity) results = [] for postcode, group_df in tqdm( df[df["postcode_valid"]].groupby("postcode_clean"), desc="Resolving UPRNs by postcode", ): try: epc_df = get_epc_data_with_postcode(postcode) if epc_df.empty: tmp = group_df.copy() tmp["found_uprn"] = None tmp["status"] = "no_epc_results" results.append(tmp) continue resolved = resolve_uprns_for_postcode_group( group_df=group_df, epc_df=epc_df, ) results.append(resolved) except Exception as e: tmp = group_df.copy() tmp["found_uprn"] = None tmp["status"] = "exception" tmp["error"] = str(e) results.append(tmp) final_df = pd.concat(results, ignore_index=True) a = final_df[ [ "best_match_lexiscore", "Address 1", "best_match_address", "Postcode", "UPRN", "best_match_uprn", ] ] # add levi score to viewing b = final_df[final_df["best_match_lexiscore"] > 0] # add levi score to viewing b = b[ [ "best_match_lexiscore", "Address 1", "best_match_address", "Postcode", "UPRN", "best_match_uprn", ] ] def handler(event, context): print("hello Postcode splitter world") return {"statusCode": 200, "body": "hello world"} if __name__ == "__main__": main()