mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
working on stonewater matching algorithm
This commit is contained in:
parent
31c5935577
commit
dc1cf6d604
3 changed files with 171 additions and 9 deletions
|
|
@ -229,7 +229,3 @@ def app():
|
|||
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/southend/southend EPC Data pull - 14 Nov "
|
||||
"2024.xlsx")
|
||||
asset_list.to_excel(filename, index=False)
|
||||
|
||||
asset_list["% of the Roof with PV"].value_counts()
|
||||
|
||||
asset_list[asset_list["% of the Roof with PV"] == "50.0"][["Address", "Postcode"]]
|
||||
|
|
|
|||
|
|
@ -117,7 +117,7 @@ def extract_summary_report(pdf_path):
|
|||
- Fuel Bill
|
||||
- Address
|
||||
"""
|
||||
|
||||
|
||||
data = {
|
||||
"Address": None,
|
||||
"Postcode": None,
|
||||
|
|
@ -1618,5 +1618,136 @@ def append_stonewater_id():
|
|||
index=False
|
||||
)
|
||||
|
||||
|
||||
def propsed_wave_3_sample():
|
||||
"""
|
||||
Stonewater want to ensure that the properties that when selecting properties for wave 3, they choose properties
|
||||
such that most of the properties within a geographical area are treatable within the bid.
|
||||
Name, if we take a geographical area (which could be postal region) they want the most, and ideally all, of the
|
||||
properties within that geographical area to be included within the bid
|
||||
:return:
|
||||
"""
|
||||
|
||||
asset_list = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 "
|
||||
"- Archetyped V3.1.xlsx",
|
||||
header=4
|
||||
)
|
||||
# Clean address ids
|
||||
asset_list = asset_list[~pd.isnull(asset_list["Address ID"])]
|
||||
asset_list = asset_list[asset_list["Address ID"] != "Address ID"]
|
||||
asset_list["Address ID"] = asset_list["Address ID"].astype(int)
|
||||
|
||||
# Create the postal region, taking the first part of the postcode
|
||||
asset_list["Postal Region"] = asset_list["Postcode"].str.split(" ").str[0]
|
||||
unique_postal_regions = asset_list["Postal Region"].unique()
|
||||
|
||||
# Keep just the columns we need
|
||||
asset_list = asset_list[
|
||||
["Address ID", "Archetype ID", "Postal Region", "Postcode", "Property Type", "Wall Type", "Roof Type",
|
||||
"Heating"]
|
||||
]
|
||||
|
||||
survey_results = pd.read_excel(
|
||||
os.path.join(CUSTOMER_FOLDER_PATH, "Stonewater - Bid Packages WIP 14.11.24.xlsx"),
|
||||
header=13,
|
||||
sheet_name="Modelled Packages"
|
||||
)
|
||||
|
||||
# TOOD: We probably want the actual surveyed wall, roof, heating type
|
||||
survey_results = survey_results[
|
||||
["Address ID", "Archetype ID", "Current SAP Rating", "Current EPC Band", "Postcode"]
|
||||
]
|
||||
survey_results["Postal Region"] = survey_results["Postcode"].str.split(" ").str[0]
|
||||
|
||||
survey_results_with_original_features = survey_results.merge(
|
||||
asset_list[["Address ID", "Property Type", "Wall Type", "Roof Type", "Heating"]],
|
||||
on="Address ID",
|
||||
how="left"
|
||||
)
|
||||
|
||||
if survey_results_with_original_features.shape[0] != survey_results.shape[0]:
|
||||
raise ValueError("Something went wrong")
|
||||
|
||||
# Tier definitions
|
||||
# Tier 1: We have a property in the same postal region and same archetype that was surveyed and is below EPC D
|
||||
# Tier 2: We have a property in the same archetype that was surveyed and is below EPC D
|
||||
#
|
||||
|
||||
for region in unique_postal_regions:
|
||||
# Take all of the properties in that region
|
||||
region_assets = asset_list[asset_list["Postal Region"] == region].copy()
|
||||
archetypes = region_assets["Archetype ID"].unique()
|
||||
# We get the properties that have been surveyed
|
||||
region_surveyed = survey_results[
|
||||
survey_results["Archetype ID"].isin(archetypes) &
|
||||
(survey_results["Postal Region"] == region)
|
||||
][["Archetype ID", "Current EPC Band"]].drop_duplicates()
|
||||
|
||||
if region_surveyed["Archetype ID"].duplicated().sum():
|
||||
raise NotImplementedError("Fix me")
|
||||
|
||||
region_assets = region_assets.merge(
|
||||
region_surveyed,
|
||||
on="Archetype ID",
|
||||
how="left"
|
||||
)
|
||||
|
||||
# Label the tier 1 properties
|
||||
region_assets["Confidence Tier"] = None
|
||||
region_assets["Confidence Tier"] = np.where(
|
||||
region_assets["Current EPC Band"].isin(["D", "E", "F", "G"]),
|
||||
"1", region_assets["Confidence Tier"]
|
||||
)
|
||||
# TODO: Turn into a function
|
||||
missed_archetypes = set(archetypes) - set(region_surveyed["Archetype ID"])
|
||||
|
||||
region_surveyed = survey_results[
|
||||
survey_results["Archetype ID"].isin(missed_archetypes)
|
||||
][["Archetype ID", "Current EPC Band"]].drop_duplicates()
|
||||
|
||||
if region_surveyed["Archetype ID"].duplicated().sum():
|
||||
raise NotImplementedError("Fix me 2")
|
||||
|
||||
region_assets = region_assets.merge(
|
||||
region_surveyed,
|
||||
on="Archetype ID",
|
||||
how="left",
|
||||
suffixes=("", "_method2")
|
||||
)
|
||||
|
||||
region_assets["Confidence Tier"] = np.where(
|
||||
region_assets["Current EPC Band_method2"].isin(["D", "E", "F", "G"]),
|
||||
"2 - same archetype", region_assets["Confidence Tier"]
|
||||
)
|
||||
|
||||
region_assets["Current EPC Band"] = region_assets["Current EPC Band"].fillna(
|
||||
region_assets["Current EPC Band_method2"])
|
||||
|
||||
region_assets = region_assets.drop(columns=["Current EPC Band_method2"])
|
||||
|
||||
missed_addressids = region_assets[pd.isnull(region_assets["Confidence Tier"])]["Address ID"].unique().tolist()
|
||||
|
||||
# This means that this archetype was never surveyed and so we need to find a sufficiently similar property
|
||||
for a_id in missed_addressids:
|
||||
property = asset_list[asset_list["Address ID"] == a_id].squeeze()
|
||||
|
||||
surveyed_same_postcode = survey_results_with_original_features[
|
||||
(survey_results_with_original_features["Postcode"] == property["Postcode"]) &
|
||||
(survey_results_with_original_features["Property Type"] == property["Property Type"])
|
||||
]
|
||||
|
||||
surveyed_same_region = survey_results_with_original_features[
|
||||
(survey_results_with_original_features["Postal Region"] == property["Postal Region"]) &
|
||||
(survey_results_with_original_features["Property Type"] == property["Property Type"])
|
||||
]
|
||||
|
||||
same_postcode = survey_results[
|
||||
survey_results["Archetype ID"].isin(missed_archetypes) &
|
||||
(survey_results["Postal Region"] == region)
|
||||
][["Archetype ID", "Current EPC Band"]].drop_duplicates()
|
||||
|
||||
pd.isnull(region_assets["Current EPC Band"]).sum()
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# main()
|
||||
|
|
|
|||
|
|
@ -206,6 +206,14 @@ def app():
|
|||
# Drop the column that is ""
|
||||
transformed_df = transformed_df.drop(columns=[""])
|
||||
|
||||
# Get the find my epc data
|
||||
find_my_epc_data = epc_df[["row_id", "find_my_epc_data"]].drop(columns=["find_my_epc_data"]).join(
|
||||
pd.json_normalize(epc_df["find_my_epc_data"])
|
||||
)
|
||||
# We check if we get the solar pv column:
|
||||
if "Solar photovoltaics" not in find_my_epc_data.columns:
|
||||
find_my_epc_data["Solar photovoltaics"] = False
|
||||
|
||||
# Retrieve just the data we need
|
||||
epc_df = epc_df[
|
||||
[
|
||||
|
|
@ -228,6 +236,7 @@ def app():
|
|||
"mainheat-description",
|
||||
#
|
||||
"energy-consumption-current", # kwh/m2
|
||||
"photo-supply",
|
||||
]
|
||||
]
|
||||
|
||||
|
|
@ -236,12 +245,25 @@ def app():
|
|||
how="left",
|
||||
on="row_id"
|
||||
).merge(
|
||||
transformed_df,
|
||||
find_my_epc_data[
|
||||
[
|
||||
"row_id", "heating_text", "hot_water_text", 'Assessor’s name',
|
||||
"Assessor's Telephone", "Assessor's Email", "Accreditation scheme",
|
||||
"Assessor’s ID", "Solar photovoltaics"
|
||||
]
|
||||
].rename(
|
||||
columns={
|
||||
"Solar photovoltaics": "Has Solar PV",
|
||||
"heating_text": "Heating Estimated kWh",
|
||||
"hot_water_text": "Hot Water Estimated kWh",
|
||||
}
|
||||
),
|
||||
how="left",
|
||||
on="row_id"
|
||||
)
|
||||
|
||||
asset_list = asset_list.drop(columns=["row_id"])
|
||||
asset_list["Has Solar PV"] = asset_list["Has Solar PV"] | ~asset_list["photo-supply"].isin(["0.0", 0, None, ""])
|
||||
asset_list = asset_list.drop(columns=["photo-supply"])
|
||||
|
||||
# Rename the columns
|
||||
asset_list = asset_list.rename(columns={
|
||||
|
|
@ -259,7 +281,7 @@ def app():
|
|||
"mainheat-description": "Heating Type",
|
||||
"secondheat-description": "Secondary Heating",
|
||||
"transaction-type": "Reason for last EPC",
|
||||
"energy-consumption-current": "Heat Demand (kWh/m2)"
|
||||
"energy-consumption-current": "Heat Demand (kWh/m2)",
|
||||
})
|
||||
|
||||
asset_list["Estimated Number of Floors"] = asset_list.apply(
|
||||
|
|
@ -295,6 +317,19 @@ def app():
|
|||
axis=1
|
||||
)
|
||||
|
||||
# For all of the columns in transformed_df, prefix with "Recommendation: "
|
||||
for col in transformed_df.columns:
|
||||
if col == "row_id":
|
||||
continue
|
||||
transformed_df = transformed_df.rename(columns={col: f"Recommendation: {col}"})
|
||||
|
||||
asset_list = asset_list.merge(
|
||||
transformed_df,
|
||||
how="left",
|
||||
on="row_id"
|
||||
)
|
||||
asset_list = asset_list.drop(columns=["row_id"])
|
||||
|
||||
# Store as an excel
|
||||
filename = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Settle/Settle EPC Data pull - 08 Nov 2024.xlsx"
|
||||
filename = os.path.join(DATA_FOLDER, ".".join(DATA_FILENAME.split(".")[:-1])) + " EPC Data Pull.xlsx"
|
||||
asset_list.to_excel(filename, index=False)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue