mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
81 lines
1.9 KiB
Python
81 lines
1.9 KiB
Python
import pandas as pd
|
|
import requests
|
|
|
|
|
|
|
|
|
|
def sanitise_postcode(postcode: str) -> str | None:
|
|
"""
|
|
Normalise postcode for grouping.
|
|
|
|
- Uppercase
|
|
- Remove all whitespace
|
|
"""
|
|
if pd.isna(postcode):
|
|
return None
|
|
|
|
return postcode.upper().replace(" ", "")
|
|
|
|
|
|
def is_valid_postcode(postcode_clean: str) -> bool:
|
|
"""
|
|
Validate postcode using postcodes.io.
|
|
|
|
Expects a sanitised postcode (e.g. E84SQ).
|
|
Returns True if valid, False otherwise.
|
|
"""
|
|
POSTCODES_IO_VALIDATE_URL = "https://api.postcodes.io/postcodes/{postcode}/validate"
|
|
if not postcode_clean:
|
|
return False
|
|
|
|
try:
|
|
resp = requests.get(
|
|
POSTCODES_IO_VALIDATE_URL.format(postcode=postcode_clean),
|
|
timeout=5,
|
|
)
|
|
resp.raise_for_status()
|
|
return resp.json().get("result", False)
|
|
except requests.RequestException:
|
|
# Network issues, rate limits, etc.
|
|
return False
|
|
|
|
|
|
def main():
|
|
df = pd.read_excel("hackney.xlsx")
|
|
|
|
# Sanitise postcodes
|
|
df["postcode_clean"] = df["Postcode"].apply(sanitise_postcode)
|
|
|
|
# --- validate AFTER grouping (save API calls) ---
|
|
|
|
# Get unique, non-null postcodes
|
|
unique_postcodes = (
|
|
df["postcode_clean"]
|
|
.dropna()
|
|
.unique()
|
|
)
|
|
|
|
# Validate each postcode once
|
|
postcode_validity = {
|
|
pc: is_valid_postcode(pc)
|
|
for pc in unique_postcodes
|
|
}
|
|
|
|
# Map validity back onto dataframe
|
|
df["postcode_valid"] = df["postcode_clean"].map(postcode_validity)
|
|
|
|
# Group only valid postcodes
|
|
grouped = (
|
|
df[df["postcode_valid"]]
|
|
.groupby("postcode_clean")
|
|
)
|
|
|
|
# Example: count addresses per postcode
|
|
postcode_counts = grouped.size().sort_values(ascending=False)
|
|
|
|
for pc in sorted(unique_postcodes):
|
|
pc_df = df[df["postcode_clean"] == pc]
|
|
pd_df
|
|
|
|
if __name__ == "__main__":
|
|
main()
|