import pandas as pd import os from tqdm import tqdm from dotenv import load_dotenv from backend.SearchEpc import SearchEpc load_dotenv(dotenv_path="backend/.env") EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") def app(): # Read in rolling master master = pd.read_csv( "/Users/khalimconn-kowlessar/Downloads/UNITAS ( STOKE) MASTER ROLLING SHEET UPDATED 16.5.24 K - PASSWORD " "PROTECTED/ECO 4 - PHASE 1-Table 1.csv" ) master = master[master["INSTALLER"] == "SCIS"] master = master[ [ 'UPRN', 'NO.', 'Street / Block Name', 'Town/Area', 'Post Code', 'Surveyor', "SUBMISSION DATE" ] ] master = master[~pd.isnull(master["UPRN"])] master = master[master["UPRN"] != "NOT ON ASSET LIST"] heights = [] eco_assessment_epcs = [] for _, row in tqdm(master.iterrows(), total=len(master)): searcher = SearchEpc( address1="", postcode="", uprn=str(int(row["UPRN"])), auth_token=EPC_AUTH_TOKEN, os_api_key="", property_type=None, fast=False, ) # Force the skipping of estimating the EPC searcher.ordnance_survey_client.property_type = None searcher.ordnance_survey_client.built_form = None searcher.find_property(skip_os=True) if searcher.newest_epc is None: continue # Look for eco assessment epcs eco_epc = [x for x in [searcher.newest_epc] + searcher.older_epcs if x['transaction-type'] == 'ECO assessment'] # Take the newest eco_epc = sorted(eco_epc, key=lambda x: x['inspection-date'], reverse=True) if eco_epc: eco_assessment_epcs.append(eco_epc[0]) height = { "uprn": row["UPRN"], "floor_height": searcher.newest_epc["floor-height"] } heights.append(height) heights_df = pd.DataFrame(heights) eco_assessment_epcs_df = pd.DataFrame(eco_assessment_epcs) merged_heights_df = master.merge(heights_df, left_on="UPRN", right_on="uprn", how="inner") merged_heights_df = merged_heights_df[merged_heights_df["floor_height"] != ""] merged_eco_assessment_epcs_df = master.merge(eco_assessment_epcs_df[["uprn", "floor-height"]], left_on="UPRN", right_on="uprn", how="inner") merged_eco_assessment_epcs_df["floor-height"] = merged_eco_assessment_epcs_df["floor-height"].astype(float) merged_eco_assessment_epcs_df.groupby("Surveyor")["floor-height"].mean() # Store merged_heights_df.to_csv("Unitas 2022 heights - based on newest EPC.csv", index=False) merged_eco_assessment_epcs_df.to_csv("Unitas 2022 heights - based on ECO assessment EPC.csv", index=False) # Read in a diferent sheet master = pd.read_excel( "/Users/khalimconn-kowlessar/Downloads/COMMUNITY HOUSING SURVEYS WITH A POST EPC.xlsx" ) master["row_number"] = master.index heights = [] eco_assessment_epcs = [] expected_pre = [] expected_post = [] biggest_floor_height = [] for _, row in tqdm(master.iterrows(), total=len(master)): full_address = ", ".join([ str(row["NO."]), row["Street / Block Name"], row["Town/Area"], row["Post Code"] ]) searcher = SearchEpc( address1=str(row["NO."]), postcode=str(row["Post Code"]), auth_token=EPC_AUTH_TOKEN, os_api_key="", property_type=None, fast=False, full_address=full_address ) # Force the skipping of estimating the EPC searcher.ordnance_survey_client.property_type = None searcher.ordnance_survey_client.built_form = None searcher.find_property(skip_os=True) if searcher.newest_epc is None: continue all_epcs = [searcher.newest_epc] + searcher.older_epcs # Search for SAP 54s sap_54s = [x for x in all_epcs if x["current-energy-efficiency"] == "54"] sap_69s = [x for x in all_epcs if x["current-energy-efficiency"] == "69"] heights = [float(x["floor-height"]) for x in all_epcs if x["floor-height"] != ""] # Look for eco assessment epcs eco_epc = [x for x in [searcher.newest_epc] + searcher.older_epcs if x['transaction-type'] == 'ECO assessment'] # Take the newest eco_epc = sorted(eco_epc, key=lambda x: x['inspection-date'], reverse=True) if eco_epc: eco_assessment_epcs.append( { "row_number": row["row_number"], **eco_epc[0] } ) if heights: floor_height_max = max(heights) biggest_floor_height.append( { "row_number": row["row_number"], "floor_height": floor_height_max } ) if sap_54s: expected_pre.append( { "row_number": row["row_number"], **sap_54s[0] } ) if sap_69s: expected_post.append( { "row_number": row["row_number"], **sap_69s[0] } ) expected_pre_df = pd.DataFrame(expected_pre) expected_post_df = pd.DataFrame(expected_post) heights_df = pd.DataFrame(biggest_floor_height) eco_assessment_epcs_df = pd.DataFrame(eco_assessment_epcs) merged_heights_df = master.merge(heights_df, on="row_number", how="inner") merged_heights_df = merged_heights_df[merged_heights_df["floor_height"] != ""] merged_eco_assessment_epcs_df = master.merge( eco_assessment_epcs_df[["row_number", "floor-height"]], on="row_number", how="inner" ) merged_eco_assessment_epcs_df["floor-height"] = merged_eco_assessment_epcs_df["floor-height"].astype(float) merged_eco_assessment_epcs_df.groupby("Surveyor")["floor-height"].mean() # Check average floor height for social housing properties with ECO assessment EPCs in Birmingham sample = pd.read_csv("local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv") sample = sample[sample["TRANSACTION_TYPE"] == "ECO assessment"] sample = sample[sample["TENURE"].isin(["rental (social)", "Rented (social)"])] sample["FLOOR_HEIGHT"] = sample["FLOOR_HEIGHT"].astype(float) sample["FLOOR_HEIGHT"].mean() sample[pd.to_datetime(sample["LODGEMENT_DATE"]) >= "2022-01-01"]["FLOOR_HEIGHT"].mean()