This commit is contained in:
Jun-te Kim 2026-01-27 22:31:37 +00:00
parent 1acf4f4d6a
commit 1468150a49
3 changed files with 3 additions and 189 deletions

View file

@ -58,39 +58,6 @@ def app():
Property UPRN
"""
data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Warmfront/SCIS")
data_filename = "SCIS_Historic_Deemed_Combined_Workings.xlsx"
sheet_name = "SCIS"
postcode_column = 'POSTCODE'
address1_column = "NO"
address1_method = None
fulladdress_column = None
address_cols_to_concat = ["NO", "Street / Block Name", "Town/Area"]
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "PROPERTY TYPE As per table emailed"
landlord_built_form = "PROPERTY TYPE As per table emailed"
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "Row ID"
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
outcomes_address = None
master_filepaths = []
master_id_colnames = []
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = None
asset_list_header = 0
landlord_block_reference = None
# Peabody data for cleaning
data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
"Project/data_validation")

View file

@ -1,7 +1,7 @@
from sqlalchemy import create_engine
from contextlib import contextmanager
from backend.app.config import get_settings
# from sqlmodel import Session
from sqlmodel import Session
connection_string = "postgresql+{drivername}://{username}:{password}@{server}:{port}/{dbname}"
db_string = connection_string.format(

View file

@ -19,7 +19,7 @@ SCENARIOS = [
970,
]
scenario_names = {
970: "EPC C - Nosolid floor, EQI, IWI",
970: "EPC C - No solid floor, EQI, IWI",
}
@ -228,159 +228,6 @@ for scenario_id in SCENARIOS:
df["uprn"] = df["uprn"].astype(str)
# Create excel to store to
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
f"Project/Final SAL/{scenario_names[scenario_id]} - 20250113 final.xlsx")
filename = (f"{scenario_names[scenario_id]} - 20250113 final.xlsx")
with pd.ExcelWriter(filename) as writer:
df.to_excel(writer, sheet_name="properties", index=False)
# asset_list = pd.DataFrame(asset_list)
# asset_list = asset_list.rename(
# columns={
# "postcode": "domna_postcode"
# }
# )
# if "domna_full_address":
# # For Peabody
# asset_list["domna_full_address"] = asset_list["domna_address_1"]
#
# asset_list = asset_list[["domna_full_address", "domna_postcode", "epc_os_uprn", ]].copy()
# asset_list = asset_list.rename(columns={"epc_os_uprn": "uprn"})
# asset_list["uprn"] = asset_list["uprn"].astype("Int64").astype(str)
# asset_list = asset_list.merge(
# df.drop(columns=["address", "postcode", "property_type", "total_floor_area"]),
# how="left",
# on="uprn"
# )
# Get conservation area data from property details spatial. based on the UPRNs
def get_conservation_area_data(uprns):
session = sessionmaker(bind=db_engine)()
session.begin()
# Query to get conservation area data
spatial_query = session.query(
PropertyDetailsSpatial
).filter(
PropertyDetailsSpatial.uprn.in_(uprns) # Filter by UPRNs
).all()
# Transform spatial data to include all fields dynamically
spatial_data = [
{col.name: getattr(spatial, col.name) for col in PropertyDetailsSpatial.__table__.columns}
for spatial in spatial_query
]
session.close()
return pd.DataFrame(spatial_data)
uprns = asset_list[
~pd.isna(asset_list["uprn"]) & (asset_list["uprn"] != "<NA>")
]["uprn"].astype(int).unique().tolist()
conservation_area_data = get_conservation_area_data(uprns)
conservation_area_data["uprn"] = conservation_area_data["uprn"].astype(str)
asset_list = asset_list.merge(
conservation_area_data[["uprn", "conservation_status", "is_listed_building", "is_heritage_building"]],
how="left",
on="uprn"
)
# For exporting
df.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lincs Rural/EPC C -without floors proposed measures - "
"with ID.xlsx",
index=False
)
# asset_list.to_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lincs Rural/epc_measures.xlsx",
# index=False
# )
condition_costs = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/Condition costs.xlsx",
sheet_name="Prices - Khalim",
header=35
)
# Remove unnamed columns and reset index
condition_costs = condition_costs.loc[:, ~condition_costs.columns.str.contains('^Unnamed')]
condition_costs = condition_costs.reset_index(drop=True)
# We now estimate condition cost
def simulate_condition(asset_list, condition_costs):
"""
This function is for testing, and will simulate condition cost from 1-10 for each property to see what the
costing array looks like.
:param df:
:return:
"""
condition_df = []
for _, row in asset_list.iterrows():
n_bathrooms = row["bathrooms"]
conditions = {}
for condition in reversed(range(1, 11)):
condition_cost = condition_costs[
condition_costs["Condition"] == condition
].drop(columns=["Condition"]).iloc[0]
# Each cost is scaled by floor area
condition_cost = condition_cost * row["total_floor_area"]
condition_cost["Bathroom"] = condition_cost["Bathroom"] * n_bathrooms
total_condition_cost = condition_cost.sum()
conditions["Condition " + str(condition)] = (total_condition_cost)
condition_df.append(
{
"uprn": row["uprn"],
**conditions
}
)
condition_df = pd.DataFrame(condition_df)
asset_list = asset_list.merge(
condition_df,
how="left",
on="uprn"
)
return asset_list
# asset_list = simulate_condition(asset_list, condition_costs)
# We calculate the condition cost based on the condition
for _, row in asset_list.iterrows():
condition = row["condition_score"]
if condition in [None, ""]:
continue
condition = int(float(condition))
condition_cost = condition_costs[
condition_costs["Condition"] == condition
].drop(columns=["Condition"]).iloc[0]
# Each cost is scaled by floor area
condition_cost = condition_cost * float(row["total_floor_area"])
n_bathrooms = row["n_bathrooms"]
condition_cost["Bathroom"] = condition_cost["Bathroom"] * float(n_bathrooms)
total_condition_cost = condition_cost.sum()
asset_list.loc[asset_list["uprn"] == row["uprn"], "domna_condition_cost"] = total_condition_cost
# Store output
asset_list.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/20250624_portfolio_retrofit_packages.xlsx",
index=False
)
condition_cost_comparison = asset_list[
["condition_score", "decoration_sum_min ", "decoration_sum_max", "domna_condition_cost"]
]