merged peters code

This commit is contained in:
Jun-te Kim 2026-02-05 17:46:23 +00:00
parent 9c7678865f
commit 68a95d0296
7 changed files with 34 additions and 59 deletions

View file

@ -7,7 +7,7 @@ mangum==0.19.0
# AWS
boto3==1.35.44
# Data
openpyxl==3.1.2
openpyxl==3.1.5
# Basic
pytz
uvicorn[standard]

View file

@ -9,7 +9,7 @@ mangum==0.19.0
# AWS
boto3==1.35.44
# Data
openpyxl==3.1.2
openpyxl==3.1.5
# Basic
pytz
uvicorn[standard]

View file

@ -69,61 +69,24 @@ def app():
Property UPRN
"""
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hackney"
data_filename = "Domna SHF Wave 3 (3).xlsx"
sheet_name = "Domna Wave 3"
postcode_column = "Postcode"
address1_column = "Address 1"
address1_method = None
fulladdress_column = None
address_cols_to_concat = ["Address 1"]
missing_postcodes_method = None
landlord_year_built = "Construction Years"
landlord_os_uprn = "UPRN"
landlord_property_type = "Type"
landlord_built_form = "Attachment"
landlord_wall_construction = "Wall type"
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "Row ID"
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
outcomes_address = None
master_filepaths = []
master_id_colnames = []
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = None
asset_list_header = 0
landlord_block_reference = None
# Peabody data for cleaning
data_folder = (
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
"Project/data_validation"
)
data_filename = "to_standardise_uprns.xlsx"
data_folder = "/workspaces/model/asset_list/"
data_filename = "assets.xlsx"
sheet_name = "Sheet1"
postcode_column = "Postcode"
address1_column = None
address1_method = "house_number_extraction"
fulladdress_column = "Address"
address_cols_to_concat = None
address1_column = "junte found address"
address1_method = None
fulladdress_column = None
address_cols_to_concat = ["junte found address"]
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_os_uprn = "juntes uprn"
landlord_property_type = None
landlord_built_form = None
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "LLUPRN"
landlord_property_id = "landlordid"
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None

View file

@ -12,6 +12,7 @@ import re
EPC_AUTH_TOKEN = os.getenv(
"EPC_AUTH_TOKEN",
"a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA=",
)
if EPC_AUTH_TOKEN is None:
@ -300,7 +301,9 @@ def get_uprn_candidates(
)
def get_uprn(user_inputed_address: str, postcode: str, return_address=False):
def get_uprn(
user_inputed_address: str, postcode: str, return_address=False, return_EPC=False
):
"""
Return uprn (str)
Return False if failed to find a sensible matching epc
@ -331,8 +334,9 @@ def get_uprn(user_inputed_address: str, postcode: str, return_address=False):
address = top_rank_df["address"].values[0]
lexiscore = float(top_rank_df["lexiscore"].values[0])
epc = top_rank_df["current-energy-rating"].values[0]
logger.info(f"Address found to be: {address}, with lexiscore {lexiscore}")
# logger.info(f"Address found to be: {address}, with lexiscore {lexiscore}")
# Safe to return the agreed UPRN
found_uprn = top_rank_df.iloc[0]["uprn"]
@ -340,7 +344,10 @@ def get_uprn(user_inputed_address: str, postcode: str, return_address=False):
return None
if return_address:
return found_uprn, address
if return_EPC is False:
return found_uprn, address
else:
return found_uprn, address, epc
return found_uprn

View file

@ -5,12 +5,15 @@ from backend.address2UPRN.main import get_uprn
# Enable tqdm for pandas
tqdm.pandas()
df = pd.read_excel("address2.xlsx")
file_name = "brentwood.xlsx"
df = pd.read_excel(file_name)
def extract_uprn(row):
print(row["User Input"], row["Postcode"])
result = get_uprn(row["User Input"], row["Postcode"], return_address=True)
user_input = "Address"
postcode = "Postcode"
result = get_uprn(row[user_input], row[postcode], return_address=True)
if result is None:
return pd.Series([None, None])
@ -19,6 +22,8 @@ def extract_uprn(row):
return pd.Series([uprn, found_address])
df[["juntes uprn", "junte found address"]] = df.progress_apply(extract_uprn, axis=1)
df[["juntes uprn", "junte found address", "junte found epc"]] = df.progress_apply(
extract_uprn, axis=1
)
df.to_excel("outputs2.xlsx", index=False)
df.to_excel(f"{file_name}_outputs.xlsx", index=False)

View file

@ -10,7 +10,7 @@ mangum==0.19.0
# AWS
boto3==1.35.44
# Data
openpyxl==3.1.2
openpyxl==3.1.5
# Basic
pytz
sqlmodel

View file

@ -28,12 +28,12 @@ from sqlalchemy import func
# PORTFOLIO_ID = 206
# SCENARIOS = [389]
PORTFOLIO_ID = 524
PORTFOLIO_ID = 506
SCENARIOS = [
1009,
987,
]
scenario_names = {
1009: "EPC C; Most Economic",
987: "EPC C",
}