Added the rest of the column remapping

This commit is contained in:
Khalim Conn-Kowlessar 2023-09-19 13:25:28 +01:00
parent 22949f81cf
commit 9f636645e3

View file

@ -53,6 +53,8 @@ def process_and_prune_desriptions(df, cleaned_lookup):
# TODO: If we integrate u values, we can probably remove insulation thickness
# TODO: Add in main fuel
cols_to_drop = {
"walls": [
'original_description', 'clean_description', 'thermal_transmittance_unit',
@ -84,21 +86,45 @@ def process_and_prune_desriptions(df, cleaned_lookup):
"hotwater": [
"original_description", "clean_description", "assumed", "original_description_ENDING",
"clean_description_ENDING", "assumed_ENDING"
]
],
"mainheat": [
"original_description", "clean_description", "original_description_ENDING",
"has_assumed", "original_description_ENDING", "clean_description_ENDING",
"has_assumed_ENDING",
],
"mainheatcont": [
"original_description", "clean_description", "original_description_ENDING", "clean_description_ENDING"
],
"windows": [
"original_description", "clean_description", "original_description_ENDING", "clean_description_ENDING",
# We don't need many of the glazing coverage features because we have the multi_glaze_proportion feature
"has_glazing", "glazing_coverage", "no_data", "has_glazing_ENDING", "glazing_coverage_ENDING",
"no_data_ENDING"
],
"main-fuel": [
"original_description", "clean_description", "original_description_ENDING", "clean_description_ENDING"
],
}
for component in ["walls", "floor", "roof", "hotwater"]:
for component in ["walls", "floor", "roof", "hotwater", "mainheat", "mainheatcont", "windows", "main-fuel"]:
component_upper = component.upper()
if component == "main-fuel":
component_upper = component_upper.replace("-", "_")
cleaned_key = "main-fuel" if component == "main-fuel" else f"{component}-description"
left_on = (
f"{component_upper}_STARTING" if component == "main-fuel" else f"{component_upper}_DESCRIPTION_STARTING"
)
df = df.merge(
pd.DataFrame(cleaned_lookup[f"{component}-description"]),
pd.DataFrame(cleaned_lookup[cleaned_key]),
how="left",
left_on=f"{component_upper}_DESCRIPTION_STARTING",
left_on=left_on,
right_on="original_description",
).merge(
pd.DataFrame(cleaned_lookup[f"{component}-description"]),
pd.DataFrame(cleaned_lookup[cleaned_key]),
how="left",
left_on=f"{component_upper}_DESCRIPTION_ENDING",
left_on=left_on,
right_on="original_description",
suffixes=("", "_ENDING")
)
@ -160,6 +186,20 @@ def process_and_prune_desriptions(df, cleaned_lookup):
}
)
# If we have tarrif, rename it
if "tariff_type" in cleaned_lookup[f"{component}-description"][0]:
df = df.rename(
columns={
"tariff_type": f"{component}_tariff_type",
"tariff_type_ENDING": f"{component}_tariff_type_ENDING",
}
)
# We don't need any lighting specific cleaning, we just drop the original description as we use
# LOW_ENERGY_LIGHTING_STARTING, LOW_ENERGY_LIGHTING_ENDING
df = df.drop(columns=["LOW_ENERGY_LIGHTING_STARTING", "LOW_ENERGY_LIGHTING_ENDING"])
return df