mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
80 lines
2.2 KiB
Python
80 lines
2.2 KiB
Python
from backend.app.db.connection import get_db_session
|
|
from backend.app.db.models.whlg import Whlg
|
|
|
|
|
|
def upsert_whlg_postcode(postcode: str):
|
|
"""
|
|
Manually upsert a postcode into the WHLG table.
|
|
No unique constraint is required.
|
|
"""
|
|
|
|
cleaned = postcode.lower().replace(" ", "")
|
|
|
|
with get_db_session() as session:
|
|
# Check if record exists
|
|
existing = session.query(Whlg).filter(Whlg.postcode == cleaned).first()
|
|
|
|
if existing:
|
|
return existing # nothing to update, just return it
|
|
|
|
# Insert a new row
|
|
record = Whlg(postcode=cleaned)
|
|
session.add(record)
|
|
session.commit()
|
|
session.refresh(record)
|
|
|
|
return record
|
|
|
|
|
|
# One time script to upload 400,000 records in one go with the pay
|
|
# of pandas and one insert
|
|
from backend.app.db.connection import get_db_session
|
|
from backend.app.db.models.whlg import Whlg
|
|
from sqlalchemy import select
|
|
from sqlalchemy.orm import Session
|
|
|
|
|
|
def upload_whlg_from_dataframe(df):
|
|
"""
|
|
FAST bulk insert of WHLG postcodes (400k+ rows).
|
|
No unique constraint needed.
|
|
"""
|
|
|
|
if "Postcode" not in df.columns:
|
|
raise ValueError("DataFrame must contain a 'Postcode' column")
|
|
|
|
# 1. Clean incoming postcodes
|
|
cleaned_postcodes = (
|
|
df["Postcode"]
|
|
.astype(str)
|
|
.str.lower()
|
|
.str.replace(" ", "", regex=False)
|
|
.dropna()
|
|
.unique()
|
|
.tolist()
|
|
)
|
|
|
|
with get_db_session() as session:
|
|
# 2. Fetch existing postcodes once (VERY FAST)
|
|
existing = session.exec(select(Whlg.postcode)).all()
|
|
existing_set = set(existing)
|
|
|
|
# 3. Determine which are new
|
|
new_postcodes = [
|
|
pc for pc in cleaned_postcodes if pc not in existing_set
|
|
]
|
|
|
|
if not new_postcodes:
|
|
return {"inserted": 0, "skipped_existing": len(cleaned_postcodes)}
|
|
|
|
# 4. Bulk insert new postcodes in one shot
|
|
objects = [Whlg(postcode=pc) for pc in new_postcodes]
|
|
|
|
session.bulk_save_objects(objects)
|
|
session.commit()
|
|
|
|
return {
|
|
"inserted": len(new_postcodes),
|
|
"skipped_existing": len(cleaned_postcodes) - len(new_postcodes),
|
|
"total_provided": len(cleaned_postcodes)
|
|
}
|