Model/backend/app/db/functions/whlg_functions.py
2025-11-14 13:36:09 +00:00

80 lines
2.2 KiB
Python

from backend.app.db.connection import get_db_session
from backend.app.db.models.whlg import Whlg
def upsert_whlg_postcode(postcode: str):
"""
Manually upsert a postcode into the WHLG table.
No unique constraint is required.
"""
cleaned = postcode.lower().replace(" ", "")
with get_db_session() as session:
# Check if record exists
existing = session.query(Whlg).filter(Whlg.postcode == cleaned).first()
if existing:
return existing # nothing to update, just return it
# Insert a new row
record = Whlg(postcode=cleaned)
session.add(record)
session.commit()
session.refresh(record)
return record
# One time script to upload 400,000 records in one go with the pay
# of pandas and one insert
from backend.app.db.connection import get_db_session
from backend.app.db.models.whlg import Whlg
from sqlalchemy import select
from sqlalchemy.orm import Session
def upload_whlg_from_dataframe(df):
"""
FAST bulk insert of WHLG postcodes (400k+ rows).
No unique constraint needed.
"""
if "Postcode" not in df.columns:
raise ValueError("DataFrame must contain a 'Postcode' column")
# 1. Clean incoming postcodes
cleaned_postcodes = (
df["Postcode"]
.astype(str)
.str.lower()
.str.replace(" ", "", regex=False)
.dropna()
.unique()
.tolist()
)
with get_db_session() as session:
# 2. Fetch existing postcodes once (VERY FAST)
existing = session.exec(select(Whlg.postcode)).all()
existing_set = set(existing)
# 3. Determine which are new
new_postcodes = [
pc for pc in cleaned_postcodes if pc not in existing_set
]
if not new_postcodes:
return {"inserted": 0, "skipped_existing": len(cleaned_postcodes)}
# 4. Bulk insert new postcodes in one shot
objects = [Whlg(postcode=pc) for pc in new_postcodes]
session.bulk_save_objects(objects)
session.commit()
return {
"inserted": len(new_postcodes),
"skipped_existing": len(cleaned_postcodes) - len(new_postcodes),
"total_provided": len(cleaned_postcodes)
}