Model/etl/customers/united living/get_data.py
2025-03-19 18:50:21 +00:00

73 lines
2.4 KiB
Python

import os
import pandas as pd
import numpy as np
from asset_list.utils import get_data
from backend.SearchEpc import SearchEpc
from etl.spatial.OpenUprnClient import OpenUprnClient
from dotenv import load_dotenv
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
def app():
filepath = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/United Living/Potential GMCA props 05.03.xlsx"
df = pd.read_excel(filepath)
df["row_id"] = df.index
df["house_number"] = df.apply(
lambda x: SearchEpc.get_house_number(x["Address"], x["Postcode"]),
axis=1
)
properties_data, _, _ = get_data(
df=df,
manual_uprn_map={},
epc_auth_token=EPC_AUTH_TOKEN,
uprn_column=None,
fulladdress_column="Address",
address1_column="house_number",
postcode_column="Postcode",
property_type_column=None,
built_form_column=None,
epc_api_only=True,
row_id_name="row_id",
)
no_data = df[df["row_id"].isin(_)]
no_data[["Address", "Postcode"]]
# 53 108 Alexandra Street OL6 9QP 100011536830
# 56 301 Whiteacre Road OL6 9QF 100011557437
# 65 97 Princess Street OL6 9QJ 100011551813
data = df.merge(
pd.DataFrame(properties_data)[["uprn", "row_id"]],
how="left", left_on="row_id", right_on="row_id"
)
# Fill missing UPRNS
data["uprn"] = np.where(data["Address"] == "108 Alexandra Street", 100011536830, data["uprn"])
data["uprn"] = np.where(data["Address"] == "301 Whiteacre Road", 100011557437, data["uprn"])
data["uprn"] = np.where(data["Address"] == "97 Princess Street", 100011551813, data["uprn"])
# We now get whether the property is listed, heritage or in a conservation area
spatial_data = OpenUprnClient.get_spatial_data(uprns=data["uprn"].tolist(), bucket_name="retrofit-data-dev")
spatial_data = spatial_data.rename(columns={"UPRN": "uprn"})
data["uprn"] = data["uprn"].astype(int)
merged = data.merge(
spatial_data, how="left", on="uprn"
)
# fill NAs
for c in ['conservation_status', 'is_listed_building', 'is_heritage_building']:
merged[c] = merged[c].fillna(False)
merged.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/United Living/Potential GMCA props 05.03 - data "
"pulled.xlsx",
index=False
)