save script

This commit is contained in:
Jun-te Kim 2026-02-03 23:23:57 +00:00
parent 14b108cd94
commit 0b67828665
6 changed files with 52 additions and 19 deletions

View file

@ -27,8 +27,9 @@ RUN useradd -m -s /usr/bin/bash ${USER} \
# # 4) Python deps - if you want to run assest list
ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
ADD asset_list/requirements.txt requirements.txt
RUN pip install -r requirements.txt
ADD .devcontainer/asset_list/requirements.txt requirements2.txt
ADD asset_list/requirements.txt requirements1.txt
RUN cat requirements1.txt requirements2.txt >> requirements.txt
RUN pip install -r requirements.txt
# 5) Workdir

View file

@ -15,10 +15,9 @@ uvicorn[standard]
pytest==9.0.2
pytest-cov==7.0.0
ipykernel>=6.25,<7
pydantic-settings<2
pyyaml>=6.0.1
pydantic>=1.10.7,<2
sqlmodel
# Formatting
black==26.1.0
dotenv
pydantic-settings

View file

@ -20,3 +20,21 @@ DATA_BUCKET="test"
EPC_AUTH_TOKEN="test"
ENGINE_SQS_URL="test"
ENERGY_ASSESSMENTS_BUCKET="test"
API_KEY="test"
SECRET_KEY="test"
ENVIRONMENT="test"
DATA_BUCKET="test"
PLAN_TRIGGER_BUCKET="test"
ENGINE_SQS_URL="test"
GOOGLE_SOLAR_API_KEY="test"
DB_HOST="test"
DB_PASSWORD="test"
DB_USERNAME="test"
DB_PORT="test"
DB_NAME="test"
SAP_PREDICTIONS_BUCKET="test"
CARBON_PREDICTIONS_BUCKET="test"
HEAT_PREDICTIONS_BUCKET="test"
HEATING_KWH_PREDICTIONS_BUCKET="test"
HOTWATER_KWH_PREDICTIONS_BUCKET="test"
ENERGY_ASSESSMENTS_BUCKET="test"

View file

@ -212,6 +212,8 @@ def get_epc_data_with_postcode(postcode, size=500, attempt=1, max_attempts=3):
method="get",
params={"postcode": postcode},
)
if not search_resp or "rows" not in search_resp:
return pd.DataFrame()
results_df = pd.DataFrame(search_resp["rows"], columns=search_resp["column-names"])
@ -298,7 +300,7 @@ def get_uprn_candidates(
)
def get_uprn(user_inputed_address: str, postcode: str):
def get_uprn(user_inputed_address: str, postcode: str, return_address=False):
"""
Return uprn (str)
Return False if failed to find a sensible matching epc
@ -337,6 +339,8 @@ def get_uprn(user_inputed_address: str, postcode: str):
if found_uprn == "":
return None
if return_address:
return found_uprn, address
return found_uprn

View file

@ -1,17 +1,24 @@
import pandas as pd
from tqdm import tqdm
from backend.address2UPRN.main import get_uprn
# Enable tqdm for pandas
tqdm.pandas()
df = pd.read_excel("address2.xlsx")
# use Address 1
junte_df = pd.read_excel("hackney_uprn_failures.xlsx")
def extract_uprn(row):
print(row["User Input"], row["Postcode"])
result = get_uprn(row["User Input"], row["Postcode"], return_address=True)
if result is None:
return pd.Series([None, None])
uprn, found_address = result
return pd.Series([uprn, found_address])
# use domna_address_1
khalim_df = pd.read_excel("khalim_standard.xlsx")
combined_df = junte_df.merge(khalim_df, how="left", left_on="Address 1", right_on='domna_address_1')
# Find the row in khalim_df that does not app
result = combined_df[~pd.isnull(combined_df["epc_os_uprn"])]
df[["juntes uprn", "junte found address"]] = df.progress_apply(extract_uprn, axis=1)
df.to_excel("outputs2.xlsx", index=False)

View file

@ -2,6 +2,10 @@
This script prepares the data for the financial model
"""
from dotenv import load_dotenv
load_dotenv(".env.local")
import pandas as pd
import numpy as np
from backend.app.utils import sap_to_epc
@ -24,12 +28,12 @@ from sqlalchemy import func
# PORTFOLIO_ID = 206
# SCENARIOS = [389]
PORTFOLIO_ID = 502 # Peabody
PORTFOLIO_ID = 515
SCENARIOS = [
986,
997,
]
scenario_names = {
986: "EPC C",
997: "EPC C",
}