Model/etl/customers/blakeridge_mill/data.py
Khalim Conn-Kowlessar 20e4b28e07 major bulk update
2025-07-14 10:38:15 +01:00

49 lines
1.6 KiB
Python

# Get units for postcodes WF17 8RA, WF17 8RB
import os
import pandas as pd
from epc_api.client import EpcClient
from dotenv import load_dotenv
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
postcodes = [
"WF17 8RA",
"WF17 8RB",
]
client = EpcClient(auth_token=EPC_AUTH_TOKEN)
data = []
for postcode in postcodes:
resp = client.domestic.search(
params={"postcode": postcode, "address": None, "local-authority": None, "property-type": None,
"floor-area": None,
"energy-band": None, "from-month": None, "from-year": None, "to-month": None, "to-year": None,
'constituency': None},
size=1000
)
data.extend(resp["rows"])
df = pd.DataFrame(data)
# Get newest field by UPRN, inspection-date
df["inspection-date"] = pd.to_datetime(df["inspection-date"])
df = df.sort_values(by=["uprn", "inspection-date"], ascending=[True, False])
df = df.drop_duplicates(subset=["uprn"], keep="first")
df.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Blakeridge Mill/blakeridge_mill_epc_data.xlsx", index=False
)
df = df[df["address"] != "The Tower Blakeridge Mill, Upper Blakeridge Lane"]
df["walls-description"].value_counts()
df["roof-description"].value_counts()
df["total-floor-area"].astype(float).mean()
df["current-energy-efficiency"] = pd.to_numeric(df["current-energy-efficiency"], errors='coerce')
df.groupby("transaction-type")["current-energy-efficiency"].mean()
df["transaction-type"].value_counts()
df[df["transaction-type"] == "rental"]["built-form"].value_counts()