Model/etl/customers/lincs_rural/prepare_data.py
Khalim Conn-Kowlessar 8f7e9e0bde simplified fuel code
2025-12-10 19:17:19 +00:00

91 lines
2.9 KiB
Python

"""
Rough script to prepare the data for Lincs Rural project
"""
from tqdm import tqdm
import pandas as pd
import os
from dotenv import load_dotenv
from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
from backend.SearchEpc import SearchEpc
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
data = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/MASTER LIST EPCS UPDATED November 2025 Domna Homes.xlsx",
sheet_name="PROPERTY EPC RATINGS"
)
# We have property RRNs - we need UPRN
standardised_ara_list = []
missed = []
for _, x in tqdm(data.iterrows(), total=len(data)):
try:
rrn = x["EPC Ref."]
# Fetch from find my epc
retriever = RetrieveFindMyEpc(
address="",
postcode="",
rrn=rrn,
address_postal_town="",
)
find_epc_data = retriever.retrieve_newest_find_my_epc_data(rrn=rrn)
# Find the UPRN
epc_searcher = SearchEpc(
address1=str(find_epc_data["address1"]),
postcode=str(find_epc_data["postcode"]),
auth_token=EPC_AUTH_TOKEN,
os_api_key="",
property_type=None,
fast=False,
full_address=",".join([find_epc_data["address1"], find_epc_data["address2"]]),
max_retries=5,
)
epc_searcher.find_property(skip_os=True)
# Append in format we need
# Stuff we need:
standardised_ara_list.append(
{
"landlord_property_id": x["Property Ref."],
"domna_address_1": find_epc_data["address1"],
"postcode": find_epc_data["postcode"],
"landlord_property_type": epc_searcher.newest_epc.get("property-type"),
"landlord_built_form": epc_searcher.newest_epc.get("built-form"),
"landlord_heating_system": epc_searcher.newest_epc.get("mainheat-description", ""),
"epc_os_uprn": epc_searcher.newest_epc.get("uprn"),
"domna_property_id": x["Property Ref."],
"domna_full_address": epc_searcher.newest_epc.get(
"address", ", ".join([
find_epc_data["address1"],
find_epc_data["address2"],
])
),
}
)
except Exception as e:
missed.append({
"property_ref": x["Property Ref."],
"rrn": x["EPC Ref."],
"error": str(e)
})
missed_df = pd.DataFrame(missed)
# Store
standardised_ara_df = pd.DataFrame(standardised_ara_list)
standardised_ara_df.to_excel(
"/Users/khalimconn-kowlessar/Downloads/lincs_rural_standardised_ara_nov_2025.xlsx",
index=False,
sheet_name="Standardised Asset List"
)
# Store missed
missed_df.to_excel(
"/Users/khalimconn-kowlessar/Downloads/lincs_rural_missed_nov_2025.xlsx",
index=False,
sheet_name="Missed Properties"
)