mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
getting ready to work on the colchester data
This commit is contained in:
parent
7b4218299f
commit
99a0948e2b
2 changed files with 78 additions and 87 deletions
|
|
@ -343,6 +343,7 @@ class AssetList:
|
|||
self.standardised_asset_list = self.raw_asset_list.copy()
|
||||
# Will be used to store aggregated figures against the various work types
|
||||
self.work_type_figures = {}
|
||||
self.flat_data = None
|
||||
|
||||
# We detect the presence of the non-intrusive columns
|
||||
self.non_intrusives_present = True if "CIGA Check Required" in self.raw_asset_list.columns else False
|
||||
|
|
@ -649,6 +650,9 @@ class AssetList:
|
|||
logger.info("Applying standardisation to asset list")
|
||||
|
||||
for variable, mapping in self.variable_mappings.items():
|
||||
self.standardised_asset_list[variable + "_original_from_landlord"] = (
|
||||
self.standardised_asset_list[variable].copy()
|
||||
)
|
||||
self.standardised_asset_list[variable] = self.standardised_asset_list[variable].map(mapping)
|
||||
|
||||
if self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated().sum():
|
||||
|
|
@ -663,6 +667,12 @@ class AssetList:
|
|||
|
||||
# Apply renames to our standard names
|
||||
# Perform final variable selection and renaming:
|
||||
|
||||
# We add the original columns to the keep variables
|
||||
self.keep_variables += [
|
||||
k + "_original_from_landlord" for k in self.variable_mappings.keys()
|
||||
]
|
||||
|
||||
self.standardised_asset_list = self.standardised_asset_list[self.keep_variables].rename(
|
||||
columns=self.rename_map
|
||||
)
|
||||
|
|
@ -912,18 +922,6 @@ class AssetList:
|
|||
self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= self.EMPTY_CAVITY_SAP_THRESHOLD
|
||||
)
|
||||
)
|
||||
|
||||
self.standardised_asset_list["empty_cavity"] = (
|
||||
self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] |
|
||||
self.standardised_asset_list["epc_indicates_empty_cavity"]
|
||||
)
|
||||
# We add a reason
|
||||
self.standardised_asset_list["empty_cavity_reason"] = np.where(
|
||||
self.standardised_asset_list["non_intrusive_indicates_empty_cavity"],
|
||||
"Non-Intrusive Data",
|
||||
"EPC Data"
|
||||
)
|
||||
|
||||
######################################################
|
||||
# Extraction
|
||||
######################################################
|
||||
|
|
@ -933,7 +931,7 @@ class AssetList:
|
|||
self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] = (
|
||||
(self.standardised_asset_list["non-intrusives: Construction"] == "CAVITY") &
|
||||
(self.standardised_asset_list["non-intrusives: Insulated"].isin(["RETRO DRILLED", "FILLED AT BUILD"])) &
|
||||
(~self.standardised_asset_list['non-intrusives: Material'].isin(["GREY LOOSE BEAD", "FORMALDEHYDE"])
|
||||
(~self.standardised_asset_list['non-intrusives: Material'].isin(["GREY LOOSE BEAD", "COMPACTED BEAD"])
|
||||
) & (
|
||||
self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW]
|
||||
)
|
||||
|
|
@ -996,6 +994,12 @@ class AssetList:
|
|||
)
|
||||
)
|
||||
|
||||
self.standardised_asset_list["solar_non_intrusives_walls_insulated"] = (
|
||||
self.standardised_asset_list["non-intrusives: Insulated"].isin(
|
||||
["EWI", "RETRO DRILLED", "FILLED AT BUILD"]
|
||||
)
|
||||
)
|
||||
|
||||
# TODO: We don't have information about the roof from this landlord
|
||||
|
||||
# We merge on the u-value for average thermal transmittance
|
||||
|
|
@ -1146,7 +1150,8 @@ class AssetList:
|
|||
# The walls are insulated
|
||||
(
|
||||
self.standardised_asset_list["solar_landlord_walls_insulated"] |
|
||||
self.standardised_asset_list["solar_epc_walls_insulated"]
|
||||
self.standardised_asset_list["solar_epc_walls_insulated"] |
|
||||
self.standardised_asset_list["solar_non_intrusives_walls_insulated"]
|
||||
) &
|
||||
# Roof is insulated
|
||||
self.standardised_asset_list["solar_epc_roof_insulated"] &
|
||||
|
|
@ -1165,7 +1170,8 @@ class AssetList:
|
|||
# The walls are insulated
|
||||
(
|
||||
self.standardised_asset_list["solar_landlord_walls_insulated"] |
|
||||
self.standardised_asset_list["solar_epc_walls_insulated"]
|
||||
self.standardised_asset_list["solar_epc_walls_insulated"] |
|
||||
self.standardised_asset_list["solar_non_intrusives_walls_insulated"]
|
||||
) &
|
||||
# Roof is insulated
|
||||
self.standardised_asset_list["solar_epc_loft_needs_topup"] &
|
||||
|
|
@ -1216,6 +1222,15 @@ class AssetList:
|
|||
columns=["walls_u_value", "roof_u_value", "floor_u_value"]
|
||||
)
|
||||
|
||||
# Adjust flagged extraction jobs to remove anything for solar
|
||||
self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] = (
|
||||
self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] &
|
||||
~self.standardised_asset_list["solar_eligible_solid_floor"] &
|
||||
~self.standardised_asset_list["solar_eligible_solid_floor_needs_loft"]
|
||||
# ~self.standardised_asset_list["solar_eligible_other_floor"] &
|
||||
# ~self.standardised_asset_list["solar_eligible_other_floor_needs_loft"]
|
||||
)
|
||||
|
||||
# Produce some aggregate figures
|
||||
self.work_type_figures = {
|
||||
# Empty cavity from non-intrusives
|
||||
|
|
@ -1296,3 +1311,40 @@ class AssetList:
|
|||
"Other Floor, Insulated, Needs Loft",
|
||||
self.standardised_asset_list["solar_reason"]
|
||||
)
|
||||
|
||||
def flat_analysis(self):
|
||||
|
||||
# We need to deduce the building name - we strip out the house number
|
||||
|
||||
# We want to deduce if flats have 50% of the properties below C75
|
||||
# We group by postcode and property type
|
||||
grouped = self.standardised_asset_list.groupby(
|
||||
[self.STANDARD_POSTCODE, self.STANDARD_PROPERTY_TYPE]
|
||||
)
|
||||
|
||||
flat_data = []
|
||||
for _, group in grouped:
|
||||
if "flat" in group[self.STANDARD_PROPERTY_TYPE].values:
|
||||
num_flats = group[self.STANDARD_PROPERTY_TYPE].shape[0]
|
||||
num_below_c75 = group[
|
||||
self.EPC_API_DATA_NAMES["current-energy-efficiency"]
|
||||
].lt(self.FILLED_CAVITY_SAP_THRESHOLD).sum()
|
||||
# Check if any flats are below C69
|
||||
num_flats_below_c69 = group[
|
||||
self.EPC_API_DATA_NAMES["current-energy-efficiency"]
|
||||
].lt(69).sum()
|
||||
|
||||
flat_data.append(
|
||||
{
|
||||
"Postcode": group[self.STANDARD_POSTCODE].iloc[0],
|
||||
"Property Type": "Flat",
|
||||
"Number of Flats with EPC": num_flats,
|
||||
"Number of Flats below C75": num_below_c75,
|
||||
"Proportion of Flat EPCs below C75": round(100 * num_below_c75 / num_flats),
|
||||
"Number of Flats Below C69": num_flats_below_c69,
|
||||
}
|
||||
)
|
||||
|
||||
flat_data = pd.DataFrame(flat_data)
|
||||
|
||||
self.flat_data = flat_data
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import json
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
from pprint import pprint
|
||||
import msgpack
|
||||
from utils.s3 import read_from_s3
|
||||
from asset_list.AssetList import AssetList
|
||||
|
|
@ -239,23 +240,18 @@ def app():
|
|||
# - We want: fully insulated property (all wall types), EPC D or below (floors should be solid)
|
||||
# - Or the insulation required is loft/cavity (floors should be solid)
|
||||
|
||||
# For Westward
|
||||
DATA_FOLDER = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Westward"
|
||||
DATA_FILENAME = "WESTWARD - completed list..xlsx"
|
||||
DATA_FOLDER = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester"
|
||||
DATA_FILENAME = "Warmfront data- Colchester Borough Homes (Complete).xlsx"
|
||||
SHEET_NAME = "Sheet1"
|
||||
|
||||
POSTCODE_COLUMN = "WFT EDIT Postcode"
|
||||
FULLADDRESS_COLUMN = "Address"
|
||||
POSTCODE_COLUMN = 'Full Address.1'
|
||||
FULLADDRESS_COLUMN = "Full Address"
|
||||
ADDRESS1_COLUMN = None
|
||||
ADDRESS1_METHOD = "house_number_extraction"
|
||||
|
||||
ADDRESS1_METHOD = "first_word"
|
||||
ADDRESS_COLS_TO_CONCAT = []
|
||||
MISSING_POSTCODES_METHOD = None
|
||||
PROPERTY_YEAR_BUILT = "Build date"
|
||||
UPRN_COLUMN = "UPRN"
|
||||
# If we have the non-intrusives data, this should be true
|
||||
HAS_NON_INTRUSIVES = True
|
||||
PROPERTY_TYPE_COLUMN = "Location type" # This will be used to identify and remove bedsits
|
||||
PROPERTY_YEAR_BUILT = "Build Date"
|
||||
UPRN_COLUMN = None
|
||||
PROPERTY_TYPE_COLUMN = None
|
||||
|
||||
# Maps addresses to uprn in problematic cases
|
||||
MANUAL_UPRN_MAP = {}
|
||||
|
|
@ -297,20 +293,6 @@ def app():
|
|||
|
||||
asset_list.apply_standardiation()
|
||||
|
||||
# DATA_FOLDER = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester"
|
||||
# DATA_FILENAME = "Warmfront data- Colchester Borough Homes (Complete).xlsx"
|
||||
# SHEET_NAME = "Sheet1"
|
||||
# POSTCODE_COLUMN = 'Full Address.1'
|
||||
# FULLADDRESS_COLUMN = "Full Address"
|
||||
# ADDRESS1_COLUMN = None
|
||||
# ADDRESS1_METHOD = "first_word"
|
||||
# ADDRESS_COLS_TO_CONCAT = []
|
||||
# MISSING_POSTCODES_METHOD = None
|
||||
# PROPERTY_YEAR_BUILT = "Build Date"
|
||||
# UPRN_COLUMN = None
|
||||
# # If we have the non-intrusives data, this should be true
|
||||
# HAS_NON_INTRUSIVES = True
|
||||
|
||||
### We retrieve the EPC data
|
||||
|
||||
# We chunk up this data into 5000 rows at a time
|
||||
|
|
@ -455,48 +437,9 @@ def app():
|
|||
|
||||
asset_list.identify_worktypes(cleaned)
|
||||
|
||||
from pprint import pprint
|
||||
pprint(asset_list.work_type_figures)
|
||||
|
||||
# TODO: We should do this breakdown for flats
|
||||
def flat_analysis(asset_list):
|
||||
|
||||
# We need to deduce the building name - we strip out the house number
|
||||
|
||||
# We want to deduce if flats have 50% of the properties below C75
|
||||
# We group by postcode and property type
|
||||
grouped = asset_list.standardised_asset_list.groupby(
|
||||
[asset_list.STANDARD_POSTCODE, asset_list.STANDARD_PROPERTY_TYPE]
|
||||
)
|
||||
|
||||
flat_data = []
|
||||
for _, group in grouped:
|
||||
if "flat" in group[asset_list.STANDARD_PROPERTY_TYPE].values:
|
||||
num_flats = group[asset_list.STANDARD_PROPERTY_TYPE].shape[0]
|
||||
num_below_c75 = group[
|
||||
asset_list.EPC_API_DATA_NAMES["current-energy-efficiency"]
|
||||
].lt(asset_list.FILLED_CAVITY_SAP_THRESHOLD).sum()
|
||||
# Check if any flats are below C69
|
||||
num_flats_below_c69 = group[
|
||||
asset_list.EPC_API_DATA_NAMES["current-energy-efficiency"]
|
||||
].lt(69).sum()
|
||||
|
||||
flat_data.append(
|
||||
{
|
||||
"Postcode": group[asset_list.STANDARD_POSTCODE].iloc[0],
|
||||
"Property Type": "Flat",
|
||||
"Number of Flats with EPC": num_flats,
|
||||
"Number of Flats below C75": num_below_c75,
|
||||
"Proportion of Flat EPCs below C75": round(100 * num_below_c75 / num_flats),
|
||||
"num_flats_below_c69": num_flats_below_c69,
|
||||
}
|
||||
)
|
||||
|
||||
flat_data = pd.DataFrame(flat_data)
|
||||
|
||||
return flat_data
|
||||
|
||||
flat_data = flat_analysis(asset_list)
|
||||
asset_list.flat_analysis()
|
||||
|
||||
# Store as an excel
|
||||
filename = os.path.join(DATA_FOLDER, ".".join(DATA_FILENAME.split(".")[:-1])) + " - Standardised.xlsx"
|
||||
|
|
@ -504,8 +447,4 @@ def app():
|
|||
|
||||
with pd.ExcelWriter(filename) as writer:
|
||||
asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
|
||||
flat_data.to_excel(writer, sheet_name="Flat Data", index=False)
|
||||
|
||||
matches_review = asset_list[
|
||||
[FULLADDRESS_COLUMN, ADDRESS1_COLUMN, POSTCODE_COLUMN, "Address on EPC", "Postcode on EPC"]
|
||||
]
|
||||
asset_list.flat_data.to_excel(writer, sheet_name="Flat Data", index=False)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue