From 18396d94944d4ec130e20af340de561aeb2baa23 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 6 Feb 2026 15:45:25 +0000 Subject: [PATCH] temporary script built --- .devcontainer/asset_list/devcontainer.json | 3 ++- .devcontainer/backend/devcontainer.json | 3 ++- asset_list/app.py | 14 ++++++------- backend/address2UPRN/main.py | 17 +++++++++++++-- backend/address2UPRN/script.py | 24 +++++++++++++++------- sfr/principal_pitch/2_export_data.py | 10 +++++---- 6 files changed, 49 insertions(+), 22 deletions(-) diff --git a/.devcontainer/asset_list/devcontainer.json b/.devcontainer/asset_list/devcontainer.json index 4834d559..7c597859 100644 --- a/.devcontainer/asset_list/devcontainer.json +++ b/.devcontainer/asset_list/devcontainer.json @@ -22,7 +22,8 @@ "jgclark.vscode-todo-highlight", "corentinartaud.pdfpreview", "ms-python.vscode-python-envs", - "ms-python.black-formatter" + "ms-python.black-formatter", + "GrapeCity.gc-excelviewer" ], "settings": { "files.defaultWorkspace": "/workspaces/model", diff --git a/.devcontainer/backend/devcontainer.json b/.devcontainer/backend/devcontainer.json index c672b1bf..377adf1e 100644 --- a/.devcontainer/backend/devcontainer.json +++ b/.devcontainer/backend/devcontainer.json @@ -22,7 +22,8 @@ "corentinartaud.pdfpreview", "ms-python.vscode-python-envs", "ms-python.black-formatter", - "waderyan.gitblame" + "waderyan.gitblame", + "GrapeCity.gc-excelviewer" ], "settings": { "files.defaultWorkspace": "/workspaces/model", diff --git a/asset_list/app.py b/asset_list/app.py index 9bb0c1f4..da4eb6bb 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -70,23 +70,23 @@ def app(): """ data_folder = "/workspaces/model/asset_list/" - data_filename = "assets.xlsx" - sheet_name = "Sheet1" - postcode_column = "Postcode" - address1_column = "junte found address" + data_filename = "manchester.xlsx" + sheet_name = "PW0099 - Property List" + postcode_column = "post Code" + address1_column = "address" address1_method = None fulladdress_column = None - address_cols_to_concat = ["junte found address"] + address_cols_to_concat = ["address"] missing_postcodes_method = None landlord_year_built = None - landlord_os_uprn = "juntes uprn" + landlord_os_uprn = None landlord_property_type = None landlord_built_form = None landlord_wall_construction = None landlord_roof_construction = None landlord_heating_system = None landlord_existing_pv = None - landlord_property_id = "landlordid" + landlord_property_id = "UHTprop Ref" landlord_sap = None outcomes_filename = None outcomes_sheetname = None diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 5f4fed74..1b3a6c8a 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -302,7 +302,11 @@ def get_uprn_candidates( def get_uprn( - user_inputed_address: str, postcode: str, return_address=False, return_EPC=False + user_inputed_address: str, + postcode: str, + return_address=False, + return_EPC=False, + return_score=True, ): """ Return uprn (str) @@ -335,6 +339,7 @@ def get_uprn( address = top_rank_df["address"].values[0] lexiscore = float(top_rank_df["lexiscore"].values[0]) epc = top_rank_df["current-energy-rating"].values[0] + score = float(top_rank_df["lexiscore"].values[0]) # logger.info(f"Address found to be: {address}, with lexiscore {lexiscore}") # Safe to return the agreed UPRN @@ -347,7 +352,15 @@ def get_uprn( if return_EPC is False: return found_uprn, address else: - return found_uprn, address, epc + if return_score is False: + return found_uprn, address, epc + else: + return ( + found_uprn, + address, + epc, + score, + ) return found_uprn diff --git a/backend/address2UPRN/script.py b/backend/address2UPRN/script.py index 0582450b..59855dbc 100644 --- a/backend/address2UPRN/script.py +++ b/backend/address2UPRN/script.py @@ -5,7 +5,7 @@ from backend.address2UPRN.main import get_uprn # Enable tqdm for pandas tqdm.pandas() -file_name = "brentwood.xlsx" +file_name = "forhousing.xlsx" df = pd.read_excel(file_name) @@ -13,17 +13,27 @@ df = pd.read_excel(file_name) def extract_uprn(row): user_input = "Address" postcode = "Postcode" - result = get_uprn(row[user_input], row[postcode], return_address=True) + result = get_uprn( + row[user_input], + row[postcode], + return_address=True, + return_EPC=True, + return_score=True, + ) if result is None: - return pd.Series([None, None]) + return pd.Series([None, None, None, None]) - uprn, found_address = result - return pd.Series([uprn, found_address]) + uprn, found_address, epc, score = result + return pd.Series([uprn, found_address, epc, score]) -df[["juntes uprn", "junte found address", "junte found epc"]] = df.progress_apply( - extract_uprn, axis=1 +df[["juntes uprn", "junte found address", "junte found epc", "junte score"]] = ( + df.progress_apply(extract_uprn, axis=1) ) df.to_excel(f"{file_name}_outputs.xlsx", index=False) + +# TODO: add lexiscore +# TODO: run it +# TODO: give it to danny diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py index 4e8cd157..1841cf3f 100644 --- a/sfr/principal_pitch/2_export_data.py +++ b/sfr/principal_pitch/2_export_data.py @@ -28,14 +28,16 @@ from sqlalchemy import func # PORTFOLIO_ID = 206 # SCENARIOS = [389] -PORTFOLIO_ID = 506 +PORTFOLIO_ID = 544 SCENARIOS = [ - 987, + 1027, ] scenario_names = { - 987: "EPC C", + 1027: "EPC C", } +project_name = "manchester" + def get_data(portfolio_id, scenario_ids): session = sessionmaker(bind=db_engine)() @@ -329,6 +331,6 @@ for scenario_id in SCENARIOS: df[df["predicted_post_works_sap"] == ""] # Create excel to store to - filename = f"{scenario_names[scenario_id]} - 20250113 final.xlsx" + filename = f"{scenario_names[scenario_id]} - {project_name}.xlsx" with pd.ExcelWriter(filename) as writer: df.to_excel(writer, sheet_name="properties", index=False)