from backend.app.db.connection import get_db_session from backend.app.db.models.whlg import Whlg def upsert_whlg_postcode(postcode: str): """ Manually upsert a postcode into the WHLG table. No unique constraint is required. """ cleaned = postcode.lower().replace(" ", "") with get_db_session() as session: # Check if record exists existing = session.query(Whlg).filter(Whlg.postcode == cleaned).first() if existing: return existing # nothing to update, just return it # Insert a new row record = Whlg(postcode=cleaned) session.add(record) session.commit() session.refresh(record) return record # One time script to upload 400,000 records in one go with the pay # of pandas and one insert from backend.app.db.connection import get_db_session from backend.app.db.models.whlg import Whlg from sqlalchemy import select from sqlalchemy.orm import Session def upload_whlg_from_dataframe(df): """ FAST bulk insert of WHLG postcodes (400k+ rows). No unique constraint needed. """ if "Postcode" not in df.columns: raise ValueError("DataFrame must contain a 'Postcode' column") # 1. Clean incoming postcodes cleaned_postcodes = ( df["Postcode"] .astype(str) .str.lower() .str.replace(" ", "", regex=False) .dropna() .unique() .tolist() ) with get_db_session() as session: # 2. Fetch existing postcodes once (VERY FAST) existing = session.exec(select(Whlg.postcode)).all() existing_set = set(existing) # 3. Determine which are new new_postcodes = [ pc for pc in cleaned_postcodes if pc not in existing_set ] if not new_postcodes: return {"inserted": 0, "skipped_existing": len(cleaned_postcodes)} # 4. Bulk insert new postcodes in one shot objects = [Whlg(postcode=pc) for pc in new_postcodes] session.bulk_save_objects(objects) session.commit() return { "inserted": len(new_postcodes), "skipped_existing": len(cleaned_postcodes) - len(new_postcodes), "total_provided": len(cleaned_postcodes) }