""" This is a script for preparing a sample for testing the end to end process, so that when Spring send us data, we know it will work. """ import pandas as pd from utils.s3 import read_csv_from_s3 birmingham_epcs = pd.read_csv( "/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/domestic-E08000025-Birmingham/certificates.csv" ) # We get the newest EPC, by UPRN and LODGEMENT_DATE birmingham_epcs['LODGEMENT_DATE'] = pd.to_datetime(birmingham_epcs['LODGEMENT_DATE']) birmingham_epcs = birmingham_epcs.sort_values( by=['UPRN', 'LODGEMENT_DATE'], ascending=[True, False] ).drop_duplicates(subset='UPRN') birmingham_epcs["postal_region"] = birmingham_epcs["POSTCODE"].str.split(" ").str[0] addressable_market = birmingham_epcs[ (birmingham_epcs['CURRENT_ENERGY_RATING'].isin(['F', 'G', 'E', 'D'])) & (birmingham_epcs['LODGEMENT_DATE'] >= '2020-01-01') & (birmingham_epcs['PROPERTY_TYPE'].isin(['House', 'Bungalow'])) & (birmingham_epcs['TENURE'].isin( ['rental (private)', 'Rented (private)'] )) ] # We take the Spring portfolio and remove the properties in their sample asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath='8/206/asset_list.csv') asset_list = pd.DataFrame(asset_list) asset_list["postal_region"] = asset_list["postcode"].str.split(" ").str[0] addressable_market = addressable_market[ ~addressable_market["UPRN"].astype(int).astype(str).isin(asset_list["uprn"].values) ] addressable_market = addressable_market[ addressable_market["postal_region"].isin(asset_list["postal_region"].unique()) ] # Take a sample of properties, EPC F or G, EPC lodged in 2025. We focus on houses/bingalows sample = birmingham_epcs[ (birmingham_epcs['CURRENT_ENERGY_RATING'].isin(['F', 'G'])) & (birmingham_epcs['LODGEMENT_DATE'] >= '2025-01-01') & (birmingham_epcs['PROPERTY_TYPE'].isin(['House', 'Bungalow'])) ] # Prepare the sample, with just the columns we would expect to receive from Spring # 1) UPRN # 2) Address # 3) Postcode # 4) Property type # 5) Built form # 6) Number of bedrooms (we'll simulate this) # 7) Number of bathrooms (we'll simulate this) # 8) Valuation (We'll simulate this, around 200,000) sample = sample[['UPRN', 'ADDRESS', 'POSTCODE', 'PROPERTY_TYPE', 'BUILT_FORM']].copy() sample['BEDROOMS'] = 3 # Simulating number of bedrooms sample['BATHROOMS'] = 1 # Simulating number of bathrooms sample['VALUATION'] = 200000 # Simulating valuation sample.columns = [x.lower() for x in sample.columns] # Store this as a excel sample.to_excel( "/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/birmingham_sample.xlsx", index=False )