merged peters code

2026-07-27 23:35:01 +00:00 · 2026-02-05 17:46:23 +00:00 · 2026-02-05 17:46:23 +00:00 · 68a95d0296
commit 68a95d0296
parent 9c7678865f
7 changed files with 34 additions and 59 deletions
--- a/.devcontainer/asset_list/requirements.txt
+++ b/.devcontainer/asset_list/requirements.txt
@ -7,7 +7,7 @@ mangum==0.19.0
 # AWS
 boto3==1.35.44
 # Data
-openpyxl==3.1.2
+openpyxl==3.1.5
 # Basic
 pytz
 uvicorn[standard]
--- a/.devcontainer/backend/requirements.txt
+++ b/.devcontainer/backend/requirements.txt
@ -9,7 +9,7 @@ mangum==0.19.0
 # AWS
 boto3==1.35.44
 # Data
-openpyxl==3.1.2
+openpyxl==3.1.5
 # Basic
 pytz
 uvicorn[standard]
--- a/asset_list/app.py
+++ b/asset_list/app.py
@ -69,61 +69,24 @@ def app():
    Property UPRN
    """

-    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hackney"
-    data_filename = "Domna SHF Wave 3 (3).xlsx"
-    sheet_name = "Domna Wave 3"
-    postcode_column = "Postcode"
-    address1_column = "Address 1"
-    address1_method = None
-    fulladdress_column = None
-    address_cols_to_concat = ["Address 1"]
-    missing_postcodes_method = None
-    landlord_year_built = "Construction Years"
-    landlord_os_uprn = "UPRN"
-    landlord_property_type = "Type"
-    landlord_built_form = "Attachment"
-    landlord_wall_construction = "Wall type"
-    landlord_roof_construction = None
-    landlord_heating_system = None
-    landlord_existing_pv = None
-    landlord_property_id = "Row ID"
-    landlord_sap = None
-    outcomes_filename = None
-    outcomes_sheetname = None
-    outcomes_postcode = None
-    outcomes_houseno = None
-    outcomes_id = None
-    outcomes_address = None
-    master_filepaths = []
-    master_id_colnames = []
-    master_to_asset_list_filepath = None
-    phase = False
-    ecosurv_landlords = None
-    asset_list_header = 0
-    landlord_block_reference = None
-
-    # Peabody data for cleaning
-    data_folder = (
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
-        "Project/data_validation"
-    )
-    data_filename = "to_standardise_uprns.xlsx"
+    data_folder = "/workspaces/model/asset_list/"
+    data_filename = "assets.xlsx"
    sheet_name = "Sheet1"
    postcode_column = "Postcode"
-    address1_column = None
-    address1_method = "house_number_extraction"
-    fulladdress_column = "Address"
-    address_cols_to_concat = None
+    address1_column = "junte found address"
+    address1_method = None
+    fulladdress_column = None
+    address_cols_to_concat = ["junte found address"]
    missing_postcodes_method = None
    landlord_year_built = None
-    landlord_os_uprn = None
+    landlord_os_uprn = "juntes uprn"
    landlord_property_type = None
    landlord_built_form = None
    landlord_wall_construction = None
    landlord_roof_construction = None
    landlord_heating_system = None
    landlord_existing_pv = None
-    landlord_property_id = "LLUPRN"
+    landlord_property_id = "landlordid"
    landlord_sap = None
    outcomes_filename = None
    outcomes_sheetname = None
--- a/backend/address2UPRN/main.py
+++ b/backend/address2UPRN/main.py
@ -12,6 +12,7 @@ import re

 EPC_AUTH_TOKEN = os.getenv(
    "EPC_AUTH_TOKEN",
+    "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA=",
 )

 if EPC_AUTH_TOKEN is None:
@ -300,7 +301,9 @@ def get_uprn_candidates(
    )


-def get_uprn(user_inputed_address: str, postcode: str, return_address=False):
+def get_uprn(
+    user_inputed_address: str, postcode: str, return_address=False, return_EPC=False
+):
    """
    Return uprn (str)
    Return False if failed to find a sensible matching epc
@ -331,8 +334,9 @@ def get_uprn(user_inputed_address: str, postcode: str, return_address=False):

    address = top_rank_df["address"].values[0]
    lexiscore = float(top_rank_df["lexiscore"].values[0])
+    epc = top_rank_df["current-energy-rating"].values[0]

-    logger.info(f"Address found to be: {address}, with lexiscore {lexiscore}")
+    # logger.info(f"Address found to be: {address}, with lexiscore {lexiscore}")
    # Safe to return the agreed UPRN
    found_uprn = top_rank_df.iloc[0]["uprn"]

@ -340,7 +344,10 @@ def get_uprn(user_inputed_address: str, postcode: str, return_address=False):
        return None

    if return_address:
-        return found_uprn, address
+        if return_EPC is False:
+            return found_uprn, address
+        else:
+            return found_uprn, address, epc
    return found_uprn


--- a/backend/address2UPRN/script.py
+++ b/backend/address2UPRN/script.py
@ -5,12 +5,15 @@ from backend.address2UPRN.main import get_uprn
 # Enable tqdm for pandas
 tqdm.pandas()

-df = pd.read_excel("address2.xlsx")
+file_name = "brentwood.xlsx"
+
+df = pd.read_excel(file_name)


 def extract_uprn(row):
-    print(row["User Input"], row["Postcode"])
-    result = get_uprn(row["User Input"], row["Postcode"], return_address=True)
+    user_input = "Address"
+    postcode = "Postcode"
+    result = get_uprn(row[user_input], row[postcode], return_address=True)

    if result is None:
        return pd.Series([None, None])
@ -19,6 +22,8 @@ def extract_uprn(row):
    return pd.Series([uprn, found_address])


-df[["juntes uprn", "junte found address"]] = df.progress_apply(extract_uprn, axis=1)
+df[["juntes uprn", "junte found address", "junte found epc"]] = df.progress_apply(
+    extract_uprn, axis=1
+)

-df.to_excel("outputs2.xlsx", index=False)
+df.to_excel(f"{file_name}_outputs.xlsx", index=False)
--- a/backend/app/requirements/requirements.txt
+++ b/backend/app/requirements/requirements.txt
@ -10,7 +10,7 @@ mangum==0.19.0
 # AWS
 boto3==1.35.44
 # Data
-openpyxl==3.1.2
+openpyxl==3.1.5
 # Basic
 pytz
 sqlmodel
--- a/sfr/principal_pitch/2_export_data.py
+++ b/sfr/principal_pitch/2_export_data.py
@ -28,12 +28,12 @@ from sqlalchemy import func

 # PORTFOLIO_ID = 206
 # SCENARIOS = [389]
-PORTFOLIO_ID = 524
+PORTFOLIO_ID = 506
 SCENARIOS = [
-    1009,
+    987,
 ]
 scenario_names = {
-    1009: "EPC C; Most Economic",
+    987: "EPC C",
 }