import pandas as pd import requests def sanitise_postcode(postcode: str) -> str | None: """ Normalise postcode for grouping. - Uppercase - Remove all whitespace """ if pd.isna(postcode): return None return postcode.upper().replace(" ", "") def is_valid_postcode(postcode_clean: str) -> bool: """ Validate postcode using postcodes.io. Expects a sanitised postcode (e.g. E84SQ). Returns True if valid, False otherwise. """ POSTCODES_IO_VALIDATE_URL = "https://api.postcodes.io/postcodes/{postcode}/validate" if not postcode_clean: return False try: resp = requests.get( POSTCODES_IO_VALIDATE_URL.format(postcode=postcode_clean), timeout=5, ) resp.raise_for_status() return resp.json().get("result", False) except requests.RequestException: # Network issues, rate limits, etc. return False def main(): df = pd.read_excel("hackney.xlsx") # Sanitise postcodes df["postcode_clean"] = df["Postcode"].apply(sanitise_postcode) # --- validate AFTER grouping (save API calls) --- # Get unique, non-null postcodes unique_postcodes = ( df["postcode_clean"] .dropna() .unique() ) # Validate each postcode once postcode_validity = { pc: is_valid_postcode(pc) for pc in unique_postcodes } # Map validity back onto dataframe df["postcode_valid"] = df["postcode_clean"].map(postcode_validity) # Group only valid postcodes grouped = ( df[df["postcode_valid"]] .groupby("postcode_clean") ) # Example: count addresses per postcode postcode_counts = grouped.size().sort_values(ascending=False) for pc in sorted(unique_postcodes): pc_df = df[df["postcode_clean"] == pc] pd_df if __name__ == "__main__": main()