mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
created block splitting code for calico asset list
This commit is contained in:
parent
c22179f1a5
commit
383a4852e2
1 changed files with 120 additions and 77 deletions
|
|
@ -1025,6 +1025,15 @@ class AssetList:
|
|||
self.standardised_asset_list[self.STANDARD_SAP]
|
||||
)
|
||||
|
||||
has_blocks_of_flats = (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats").sum()
|
||||
|
||||
# Perform block splitting, ahead of fetching the EPC data
|
||||
# If we blocks of flats, without a landlord block reference, we create this
|
||||
self.fill_landlord_block_reference(has_blocks_of_flats)
|
||||
|
||||
# If we have blocks of flats, we split these out into individual units.
|
||||
self.split_blocks()
|
||||
|
||||
def merge_data(self, df: pd.DataFrame):
|
||||
"""
|
||||
Used to insert data into the standardised asset list, based on the domna property id
|
||||
|
|
@ -1270,6 +1279,12 @@ class AssetList:
|
|||
)
|
||||
)
|
||||
|
||||
self.standardised_asset_list["SAP Category"] = np.where(
|
||||
pd.isnull(self.standardised_asset_list[self.STANDARD_SAP]),
|
||||
"SAP Unknown",
|
||||
self.standardised_asset_list["SAP Category"]
|
||||
)
|
||||
|
||||
else:
|
||||
# We add a SAP category for all work type identification
|
||||
# We break into 4 categories (54 or less, 55-68, 69-74, 75 or more)
|
||||
|
|
@ -1290,6 +1305,11 @@ class AssetList:
|
|||
),
|
||||
)
|
||||
)
|
||||
self.standardised_asset_list["SAP Category"] = np.where(
|
||||
pd.isnull(self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]]),
|
||||
"SAP Unknown",
|
||||
self.standardised_asset_list["SAP Category"]
|
||||
)
|
||||
|
||||
# Before we being, we identify if a property has solar already as we use this
|
||||
# for identifying cavity jobs
|
||||
|
|
@ -2040,6 +2060,100 @@ class AssetList:
|
|||
self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE]
|
||||
)
|
||||
|
||||
def split_blocks(self):
|
||||
"""
|
||||
Where we have a single row that is a block of flats, we split this into multiple rows,
|
||||
one for each unit. The data that we have will be copied across rows
|
||||
:return:
|
||||
"""
|
||||
|
||||
blocks = self.standardised_asset_list[
|
||||
self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats"
|
||||
].copy()
|
||||
|
||||
if blocks.empty:
|
||||
return
|
||||
|
||||
RANGE_RE = re.compile(r'\b(\d+[A-Za-z]?)\s*[-–]\s*(\d+[A-Za-z]?)\b')
|
||||
NUM_RE = re.compile(r'\b\d+[A-Za-z]?\b') # captures 12, 12A, etc.
|
||||
|
||||
expanded_rows = []
|
||||
for _, row in blocks.iterrows():
|
||||
addr = str(row[self.STANDARD_ADDRESS_1])
|
||||
|
||||
# 1 ─ Range (e.g. 1-7)
|
||||
m_range = RANGE_RE.search(addr)
|
||||
if m_range:
|
||||
start, end = m_range.groups()
|
||||
start, end = int(re.match(r'\d+', start)[0]), int(re.match(r'\d+', end)[0])
|
||||
if start > end or (end - start) > 100:
|
||||
raise ValueError(f"Suspicious range '{addr}'")
|
||||
for n in range(start, end + 1):
|
||||
new = row.copy()
|
||||
new_addr = RANGE_RE.sub(str(n), addr, count=1)
|
||||
original_full_address = new[self.STANDARD_FULL_ADDRESS]
|
||||
new_full_address = original_full_address.replace(addr, new_addr)
|
||||
new[self.STANDARD_ADDRESS_1] = new_addr
|
||||
new[self.STANDARD_FULL_ADDRESS] = new_full_address
|
||||
new[self.STANDARD_PROPERTY_TYPE] = "flat"
|
||||
# Keep a record of the previous address 1
|
||||
new["block_address1"] = addr
|
||||
new["block_full_address"] = original_full_address
|
||||
new["is_expended_block"] = True
|
||||
# We update the full address
|
||||
|
||||
new[self.DOMNA_PROPERTY_ID] = f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}"
|
||||
expanded_rows.append(new)
|
||||
continue
|
||||
|
||||
# 2 ─ Explicit list (e.g. 1, 2, 5 Block)
|
||||
nums = NUM_RE.findall(addr)
|
||||
if len(nums) > 1 and ',' in addr:
|
||||
for n in nums:
|
||||
new = row.copy()
|
||||
new_addr = re.sub(NUM_RE, n, addr, count=1) # replace the first number only
|
||||
new[self.STANDARD_ADDRESS_1] = new_addr
|
||||
new[self.DOMNA_PROPERTY_ID] = f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}"
|
||||
expanded_rows.append(new)
|
||||
continue
|
||||
|
||||
# 3 ─ Single number or no number, treat as individual dwelling
|
||||
if (len(nums) == 1) or not nums:
|
||||
expanded_rows.append(row)
|
||||
continue
|
||||
|
||||
# Anything else with digits is unrecognised
|
||||
raise NotImplementedError(f"Unhandled block format: '{addr}'")
|
||||
|
||||
expanded_blocks = pd.DataFrame(expanded_rows)
|
||||
|
||||
# We drop the blocks from the standardised asset list and append on the expanded blocks
|
||||
self.standardised_asset_list = self.standardised_asset_list[
|
||||
self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "block of flats"
|
||||
]
|
||||
|
||||
self.standardised_asset_list = pd.concat(
|
||||
[self.standardised_asset_list, expanded_blocks],
|
||||
ignore_index=True
|
||||
)
|
||||
|
||||
# As a final clean up, for any blocks that are size 1, we don't includr a project code
|
||||
sizes = (
|
||||
expanded_blocks
|
||||
.groupby(self.STANDARD_BLOCK_REFERENCE)[self.DOMNA_PROPERTY_ID]
|
||||
.nunique()
|
||||
.reset_index()
|
||||
)
|
||||
size_1 = sizes[sizes[self.DOMNA_PROPERTY_ID] <= 1]
|
||||
# Remove the size 1 blocks from the standardised asset list
|
||||
self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE] = np.where(
|
||||
self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin(
|
||||
size_1[self.STANDARD_BLOCK_REFERENCE].values
|
||||
),
|
||||
None,
|
||||
self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE]
|
||||
)
|
||||
|
||||
def label_property_status(self):
|
||||
"""
|
||||
This function is designed to be run after identify_worktypes() has been run, and will create a "property_status"
|
||||
|
|
@ -2081,85 +2195,14 @@ class AssetList:
|
|||
# These blocks may be refecence via the landlord_block_reference field, or by property types being
|
||||
# blocks of flats
|
||||
has_landlord_block_reference = self.landlord_block_reference is not None
|
||||
has_blocks_of_flats = (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats").sum()
|
||||
|
||||
if has_landlord_block_reference or has_blocks_of_flats:
|
||||
if has_landlord_block_reference:
|
||||
|
||||
# If we blocks of flats, without a landlord block reference, we create this
|
||||
self.fill_landlord_block_reference(has_blocks_of_flats)
|
||||
|
||||
self.split_blocks(has_blocks_of_flats)
|
||||
|
||||
def split_blocks(self, has_blocks_of_flats):
|
||||
"""
|
||||
Where we have a single row that is a block of flats, we split this into multiple rows,
|
||||
one for each unit. The data that we have will be copied across rows
|
||||
:param self:
|
||||
:param has_blocks_of_flats:
|
||||
:return:
|
||||
"""
|
||||
|
||||
blocks = self.standardised_asset_list[
|
||||
self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats"
|
||||
].copy()
|
||||
|
||||
RANGE_RE = re.compile(r'\b(\d+[A-Za-z]?)\s*[-–]\s*(\d+[A-Za-z]?)\b')
|
||||
NUM_RE = re.compile(r'\b\d+[A-Za-z]?\b') # captures 12, 12A, etc.
|
||||
|
||||
expanded_rows = []
|
||||
for _, row in blocks.iterrows():
|
||||
addr = str(row[self.STANDARD_ADDRESS_1])
|
||||
|
||||
# 1 ─ Range (e.g. 1-7)
|
||||
m_range = RANGE_RE.search(addr)
|
||||
if m_range:
|
||||
start, end = m_range.groups()
|
||||
start, end = int(re.match(r'\d+', start)[0]), int(re.match(r'\d+', end)[0])
|
||||
if start > end or (end - start) > 100:
|
||||
raise ValueError(f"Suspicious range '{addr}'")
|
||||
for n in range(start, end + 1):
|
||||
new = row.copy()
|
||||
new_addr = RANGE_RE.sub(str(n), addr, count=1)
|
||||
new[self.STANDARD_ADDRESS_1] = new_addr
|
||||
new[self.DOMNA_PROPERTY_ID] = f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}"
|
||||
expanded_rows.append(new)
|
||||
continue
|
||||
|
||||
# 2 ─ Explicit list (e.g. 1, 2, 5 Block)
|
||||
nums = NUM_RE.findall(addr)
|
||||
if len(nums) > 1 and ',' in addr:
|
||||
for n in nums:
|
||||
new = row.copy()
|
||||
new_addr = re.sub(NUM_RE, n, addr, count=1) # replace the first number only
|
||||
new[self.STANDARD_ADDRESS_1] = new_addr
|
||||
new[self.DOMNA_PROPERTY_ID] = f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}"
|
||||
expanded_rows.append(new)
|
||||
continue
|
||||
|
||||
# 3 ─ Single number → treat as individual dwelling
|
||||
if len(nums) == 1:
|
||||
expanded_rows.append(row)
|
||||
continue
|
||||
|
||||
# 4 ─ No numbers → keep as-is
|
||||
if not nums:
|
||||
expanded_rows.append(row)
|
||||
continue
|
||||
|
||||
# Anything else with digits is unrecognised
|
||||
raise NotImplementedError(f"Unhandled block format: '{addr}'")
|
||||
|
||||
expanded_blocks = pd.DataFrame(expanded_rows)
|
||||
|
||||
# We drop the blocks from the standardised asset list and append on the expanded blocks
|
||||
self.standardised_asset_list = self.standardised_asset_list[
|
||||
self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "block of flats"
|
||||
]
|
||||
|
||||
self.standardised_asset_list = pd.concat(
|
||||
[self.standardised_asset_list, expanded_blocks],
|
||||
ignore_index=True
|
||||
)
|
||||
# # If we blocks of flats, without a landlord block reference, we create this
|
||||
# self.fill_landlord_block_reference(has_blocks_of_flats)
|
||||
#
|
||||
# # If we have blocks of flats, we split these out into individual units
|
||||
# self.split_blocks()
|
||||
|
||||
# For blocks that have a 50% allocation, we create project codes
|
||||
self.block_analysis()
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue