mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
71 lines
2.6 KiB
Python
71 lines
2.6 KiB
Python
"""
|
|
This is a script for preparing a sample for testing the end to end process, so that when Spring send us
|
|
data, we know it will work.
|
|
"""
|
|
|
|
import pandas as pd
|
|
from utils.s3 import read_csv_from_s3
|
|
|
|
birmingham_epcs = pd.read_csv(
|
|
"/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/domestic-E08000025-Birmingham/certificates.csv"
|
|
)
|
|
|
|
# We get the newest EPC, by UPRN and LODGEMENT_DATE
|
|
birmingham_epcs['LODGEMENT_DATE'] = pd.to_datetime(birmingham_epcs['LODGEMENT_DATE'])
|
|
|
|
birmingham_epcs = birmingham_epcs.sort_values(
|
|
by=['UPRN', 'LODGEMENT_DATE'],
|
|
ascending=[True, False]
|
|
).drop_duplicates(subset='UPRN')
|
|
|
|
birmingham_epcs["postal_region"] = birmingham_epcs["POSTCODE"].str.split(" ").str[0]
|
|
|
|
addressable_market = birmingham_epcs[
|
|
(birmingham_epcs['CURRENT_ENERGY_RATING'].isin(['F', 'G', 'E', 'D'])) &
|
|
(birmingham_epcs['LODGEMENT_DATE'] >= '2020-01-01') &
|
|
(birmingham_epcs['PROPERTY_TYPE'].isin(['House', 'Bungalow'])) &
|
|
(birmingham_epcs['TENURE'].isin(
|
|
['rental (private)', 'Rented (private)']
|
|
))
|
|
]
|
|
|
|
# We take the Spring portfolio and remove the properties in their sample
|
|
asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath='8/206/asset_list.csv')
|
|
asset_list = pd.DataFrame(asset_list)
|
|
asset_list["postal_region"] = asset_list["postcode"].str.split(" ").str[0]
|
|
|
|
addressable_market = addressable_market[
|
|
~addressable_market["UPRN"].astype(int).astype(str).isin(asset_list["uprn"].values)
|
|
]
|
|
addressable_market = addressable_market[
|
|
addressable_market["postal_region"].isin(asset_list["postal_region"].unique())
|
|
]
|
|
|
|
# Take a sample of properties, EPC F or G, EPC lodged in 2025. We focus on houses/bingalows
|
|
sample = birmingham_epcs[
|
|
(birmingham_epcs['CURRENT_ENERGY_RATING'].isin(['F', 'G'])) &
|
|
(birmingham_epcs['LODGEMENT_DATE'] >= '2025-01-01') &
|
|
(birmingham_epcs['PROPERTY_TYPE'].isin(['House', 'Bungalow']))
|
|
]
|
|
|
|
# Prepare the sample, with just the columns we would expect to receive from Spring
|
|
# 1) UPRN
|
|
# 2) Address
|
|
# 3) Postcode
|
|
# 4) Property type
|
|
# 5) Built form
|
|
# 6) Number of bedrooms (we'll simulate this)
|
|
# 7) Number of bathrooms (we'll simulate this)
|
|
# 8) Valuation (We'll simulate this, around 200,000)
|
|
|
|
sample = sample[['UPRN', 'ADDRESS', 'POSTCODE', 'PROPERTY_TYPE', 'BUILT_FORM']].copy()
|
|
sample['BEDROOMS'] = 3 # Simulating number of bedrooms
|
|
sample['BATHROOMS'] = 1 # Simulating number of bathrooms
|
|
sample['VALUATION'] = 200000 # Simulating valuation
|
|
sample.columns = [x.lower() for x in sample.columns]
|
|
|
|
# Store this as a excel
|
|
sample.to_excel(
|
|
"/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/birmingham_sample.xlsx",
|
|
index=False
|
|
)
|