Model/etl/customers/unitas/Audit_check.py
2024-06-03 18:17:31 +01:00

182 lines
6.4 KiB
Python

import pandas as pd
import os
from tqdm import tqdm
from dotenv import load_dotenv
from backend.SearchEpc import SearchEpc
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
def app():
# Read in rolling master
master = pd.read_csv(
"/Users/khalimconn-kowlessar/Downloads/UNITAS ( STOKE) MASTER ROLLING SHEET UPDATED 16.5.24 K - PASSWORD "
"PROTECTED/ECO 4 - PHASE 1-Table 1.csv"
)
master = master[master["INSTALLER"] == "SCIS"]
master = master[
[
'UPRN', 'NO.', 'Street / Block Name', 'Town/Area', 'Post Code', 'Surveyor', "SUBMISSION DATE"
]
]
master = master[~pd.isnull(master["UPRN"])]
master = master[master["UPRN"] != "NOT ON ASSET LIST"]
heights = []
eco_assessment_epcs = []
for _, row in tqdm(master.iterrows(), total=len(master)):
searcher = SearchEpc(
address1="",
postcode="",
uprn=str(int(row["UPRN"])),
auth_token=EPC_AUTH_TOKEN,
os_api_key="",
property_type=None,
fast=False,
)
# Force the skipping of estimating the EPC
searcher.ordnance_survey_client.property_type = None
searcher.ordnance_survey_client.built_form = None
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
continue
# Look for eco assessment epcs
eco_epc = [x for x in [searcher.newest_epc] + searcher.older_epcs if x['transaction-type'] == 'ECO assessment']
# Take the newest
eco_epc = sorted(eco_epc, key=lambda x: x['inspection-date'], reverse=True)
if eco_epc:
eco_assessment_epcs.append(eco_epc[0])
height = {
"uprn": row["UPRN"],
"floor_height": searcher.newest_epc["floor-height"]
}
heights.append(height)
heights_df = pd.DataFrame(heights)
eco_assessment_epcs_df = pd.DataFrame(eco_assessment_epcs)
merged_heights_df = master.merge(heights_df, left_on="UPRN", right_on="uprn", how="inner")
merged_heights_df = merged_heights_df[merged_heights_df["floor_height"] != ""]
merged_eco_assessment_epcs_df = master.merge(eco_assessment_epcs_df[["uprn", "floor-height"]], left_on="UPRN",
right_on="uprn", how="inner")
merged_eco_assessment_epcs_df["floor-height"] = merged_eco_assessment_epcs_df["floor-height"].astype(float)
merged_eco_assessment_epcs_df.groupby("Surveyor")["floor-height"].mean()
# Store
merged_heights_df.to_csv("Unitas 2022 heights - based on newest EPC.csv", index=False)
merged_eco_assessment_epcs_df.to_csv("Unitas 2022 heights - based on ECO assessment EPC.csv", index=False)
# Read in a diferent sheet
master = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/COMMUNITY HOUSING SURVEYS WITH A POST EPC.xlsx"
)
master["row_number"] = master.index
heights = []
eco_assessment_epcs = []
expected_pre = []
expected_post = []
biggest_floor_height = []
for _, row in tqdm(master.iterrows(), total=len(master)):
full_address = ", ".join([
str(row["NO."]), row["Street / Block Name"], row["Town/Area"], row["Post Code"]
])
searcher = SearchEpc(
address1=str(row["NO."]),
postcode=str(row["Post Code"]),
auth_token=EPC_AUTH_TOKEN,
os_api_key="",
property_type=None,
fast=False,
full_address=full_address
)
# Force the skipping of estimating the EPC
searcher.ordnance_survey_client.property_type = None
searcher.ordnance_survey_client.built_form = None
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
continue
all_epcs = [searcher.newest_epc] + searcher.older_epcs
# Search for SAP 54s
sap_54s = [x for x in all_epcs if x["current-energy-efficiency"] == "54"]
sap_69s = [x for x in all_epcs if x["current-energy-efficiency"] == "69"]
heights = [float(x["floor-height"]) for x in all_epcs if x["floor-height"] != ""]
# Look for eco assessment epcs
eco_epc = [x for x in [searcher.newest_epc] + searcher.older_epcs if x['transaction-type'] == 'ECO assessment']
# Take the newest
eco_epc = sorted(eco_epc, key=lambda x: x['inspection-date'], reverse=True)
if eco_epc:
eco_assessment_epcs.append(
{
"row_number": row["row_number"],
**eco_epc[0]
}
)
if heights:
floor_height_max = max(heights)
biggest_floor_height.append(
{
"row_number": row["row_number"],
"floor_height": floor_height_max
}
)
if sap_54s:
expected_pre.append(
{
"row_number": row["row_number"],
**sap_54s[0]
}
)
if sap_69s:
expected_post.append(
{
"row_number": row["row_number"],
**sap_69s[0]
}
)
expected_pre_df = pd.DataFrame(expected_pre)
expected_post_df = pd.DataFrame(expected_post)
heights_df = pd.DataFrame(biggest_floor_height)
eco_assessment_epcs_df = pd.DataFrame(eco_assessment_epcs)
merged_heights_df = master.merge(heights_df, on="row_number", how="inner")
merged_heights_df = merged_heights_df[merged_heights_df["floor_height"] != ""]
merged_eco_assessment_epcs_df = master.merge(
eco_assessment_epcs_df[["row_number", "floor-height"]], on="row_number", how="inner"
)
merged_eco_assessment_epcs_df["floor-height"] = merged_eco_assessment_epcs_df["floor-height"].astype(float)
merged_eco_assessment_epcs_df.groupby("Surveyor")["floor-height"].mean()
# Check average floor height for social housing properties with ECO assessment EPCs in Birmingham
sample = pd.read_csv("local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv")
sample = sample[sample["TRANSACTION_TYPE"] == "ECO assessment"]
sample = sample[sample["TENURE"].isin(["rental (social)", "Rented (social)"])]
sample["FLOOR_HEIGHT"] = sample["FLOOR_HEIGHT"].astype(float)
sample["FLOOR_HEIGHT"].mean()
sample[pd.to_datetime(sample["LODGEMENT_DATE"]) >= "2022-01-01"]["FLOOR_HEIGHT"].mean()