completed merge of ha15 and ha32

This commit is contained in:
Khalim Conn-Kowlessar 2023-12-06 16:33:22 +00:00
parent a6c8ca0e1d
commit fa5148228b
3 changed files with 43 additions and 48 deletions

32
backend/SearchEpc.py Normal file
View file

@ -0,0 +1,32 @@
from epc_api.client import EpcClient
class SearchEpc:
"""
Given address information about a home, this class is responsible for retrieving the EPC data associated
to the property.
For a home, we might have address lines 1, 2, 3 and 4, as well as a postcode.
Often, simply searching the EPC database with address line 1 and postcode will be enough to find
the property, but there are some cases where this is not true and we might need to utilise other
combinations about the home to find the property
"""
def __init__(self, address1, postcode, address2=None, address3=None, address4=None):
"""
Address lines 1 and postcode are mandatory fields. The other address lines are optional
but can be used to find the epc for the home, if address1 and postcode are insufficient
:param address1: string, propery's address line 1
:param postcode: string, propery's postcode
:param address2: string, optional, propery's address line 2
:param address3: string, optional, propery's address line 3
:param address4: string, optional, propery's address line 4
"""
self.address1 = address1
self.postcode = postcode
self.address2 = address2
self.address3 = address3
self.address4 = address4
def search(self):

View file

@ -155,7 +155,9 @@ def marge_ha_32(asset_list, identified_addresses):
if missed.shape[0] != 36:
raise ValueError("We have a problem here, investigate me, missings beyond the Hessle Road addresses")
# TODO: Finish me
# Finally, we return the data we need
return merged_data, dropped_identified_merge_keys
def merge_ha_15(asset_list, identified_addresses):
@ -303,55 +305,14 @@ def merge_ha_15(asset_list, identified_addresses):
~identified_addresses["merge_key"].isin(merged_data["merge_key4"])
]
if ha == "ha32":
if missed.shape[0] != 36:
raise ValueError("We have a problem here, investigate me, missings beyond the Hessle Road addresses")
if missed.shape[0]:
raise ValueError("We have a problem here, investigate me, should not have any missings for ha15")
missed.shape
missed["Address"].unique()
len([m for m in missed["Address"].unique() if "Mary Mac" in m])
a = identified_addresses[
identified_addresses["Address"].str.contains("103 Priory Crescent")
]
b = asset_list[
asset_list["Address Line 1"].str.contains("103 Priory Crescent")
]
a["merge_key"]
b["merge_key"]
b["merge_key2"]
b["merge_key3"]
identified_addresses["merge_key"].isin(merged_data["merge_key"])
identified_addresses["merge_key"].isin(asset_list["merge_key"]).sum()
identified_addresses["merge_key2"].isin(asset_list["merge_key2"]).sum()
# Find merge keys not in the identified addresses
missed = identified_addresses[
~identified_addresses["merge_key"].isin(merged_data["merge_key"])
]
identified_addresses[
(identified_addresses["Address"].str.contains("Ashbury Court"))
]
asset_list[
(asset_list["Street"].str.contains("Ashbury Court"))
]
# Drop the UPRN column from the merged data
merged_data = merged_data.drop(columns=["UPRN"])
return merged_data
return merged_data, dropped_identified_merge_keys
def app():
ha32_asset_list, ha15_asset_list, ha32_identified_addresses, ha15_identified_addresses = load_data()
ha32 = marge_ha_32(asset_list=ha32_asset_list, identified_addresses=ha32_identified_addresses)
ha15 = merge_ha_15(asset_list=ha15_asset_list, identified_addresses=ha15_identified_addresses)
ha32, _ = marge_ha_32(asset_list=ha32_asset_list, identified_addresses=ha32_identified_addresses)
ha15, _ = merge_ha_15(asset_list=ha15_asset_list, identified_addresses=ha15_identified_addresses)

View file

@ -1 +1,3 @@
pandas
pandas
pydantic==1.10.11
epc-api-python==1.0.2