working on plusdane matching

This commit is contained in:
Khalim Conn-Kowlessar 2025-03-08 17:30:54 +00:00
parent 831abc884f
commit c4eb72fb92
5 changed files with 147 additions and 34 deletions

View file

@ -397,6 +397,13 @@ class AssetList:
# Update the reference to landlord UPRn
self.landlord_uprn = self.STANDARD_UPRN
# Handle the case when full address and address 1 are the same
if self.full_address_colname == self.address1_colname:
self.full_address_colname = self.STANDARD_FULL_ADDRESS
self.standardised_asset_list[self.full_address_colname] = (
self.standardised_asset_list[self.address1_colname].copy()
)
def _extract_address1(self, asset_list, full_address_col, postcode_col, method="first_two_words"):
if method not in self.ADDRESS_1_CLEANING_METHODS:
@ -632,7 +639,8 @@ class AssetList:
known_errors = [
"#MULTIVALUE",
"This cell has an external reference that can't be shown or edited. Editing this cell will "
"remove the external reference."
"remove the external reference.",
"ND"
]
if pd.isnull(date_str) or date_str in known_errors:
@ -642,6 +650,9 @@ class AssetList:
match = re.match(r"\d{1,2}-[A-Za-z]{3}-(\d{4})", date_str)
if match:
return int(match.group(1)) # Extract the year and convert to integer
if "-" in date_str:
# We probably have a range
return int(date_str.split("-")[1].strip())
if isinstance(date_str, datetime):
return date_str.year
@ -1853,7 +1864,7 @@ class AssetList:
self.outcomes = pd.read_excel(outcomes_filepath, sheet_name=outcomes_sheetname)
self.outcomes["row_id"] = self.outcomes.index
logger.info("Matching outcomes to ")
logger.info("Matching outcomes to asset list")
# Merge the outcomes onto the asset list - we check we're able to match sufficiently well
lookup = []
nomatch = []
@ -1866,7 +1877,7 @@ class AssetList:
].str.lower().str.replace(",", "").str.replace(" ", " ") == address_clean)
]
if not matched.empty and matched.shape[0] == 1:
if matched.shape[0] == 1:
lookup.append(
{
"row_id": x["row_id"],
@ -1875,6 +1886,42 @@ class AssetList:
)
continue
if "UPRN" in x:
matched = self.standardised_asset_list[
self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] == x["UPRN"]
]
if matched.shape[0] == 1:
lookup.append(
{
"row_id": x["row_id"],
self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0]
}
)
continue
matched = self.standardised_asset_list[
(self.standardised_asset_list[self.STANDARD_POSTCODE] == x["Post Code"])
].copy()
if not matched.empty:
matched["houseno"] = matched.apply(
lambda x: SearchEpc.get_house_number(x[self.STANDARD_ADDRESS_1], x[self.STANDARD_POSTCODE]),
axis=1
)
matched = matched[
matched["houseno"].astype(str) == str(x["Numb."])
]
if matched.shape[0] == 1:
lookup.append(
{
"row_id": x["row_id"],
self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0]
}
)
continue
elif not matched.empty:
raise NotImplementedError("Implement me - multiple matches on house number")
nomatch.append(x["row_id"])
self.outcomes_no_match = self.outcomes[self.outcomes["row_id"].isin(nomatch)]

View file

@ -125,21 +125,22 @@ def get_data(
no_epc.append(home[row_id_name])
continue
if epc_api_only:
epc = {
row_id_name: home[row_id_name],
**searcher.newest_epc.copy()
}
epc_data.append(epc)
continue
# Look for EPC recommendatons
try:
property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
except:
property_recommendations = {"rows": []}
if epc_api_only:
epc = {
row_id_name: home[row_id_name],
**searcher.newest_epc.copy(),
"recommendations": property_recommendations["rows"]
}
epc_data.append(epc)
continue
# Retrieve data from FindMyEPC
try:
find_epc_searcher = RetrieveFindMyEpc(
@ -283,25 +284,46 @@ def app():
# landlord_property_id = "Place ref"
# For ACIS - programme re-build
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/ACIS Full Programme Review March 2025"
data_filename = "ACIS asset list.xlsx"
sheet_name = "Assets"
address1_column = "House No"
postcode_column = "Postcode"
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/ACIS Full Programme Review March 2025"
# data_filename = "ACIS asset list.xlsx"
# sheet_name = "Assets"
# address1_column = "House No"
# postcode_column = "Postcode"
# landlord_property_id = "UPRN"
# fulladdress_column = None
# address_cols_to_concat = ["House No", "Street", "Town"]
# missing_postcodes_method = None
# address1_method = None
# landlord_year_built = "YEAR BUILT"
# landlord_os_uprn = None
# landlord_property_type = "Property type"
# landlord_wall_construction = "Wall Constuction"
# landlord_heating_system = "Heating"
# landlord_existing_pv = None
# outcomes_filename = "ACIS Group - 25.11.2024 - outcomes.xlsx"
# master_filename_eco3 = "ECO 3 -Table 1.csv"
# master_filename_eco4 = "ECO 4 -Table 1.csv"
# For plus dane
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane"
data_filename = "PLUS DANE Asset List - for analysis.xlsx"
sheet_name = "Asset List"
address1_column = " Address"
postcode_column = " Postcode"
landlord_property_id = "UPRN"
fulladdress_column = None
address_cols_to_concat = ["House No", "Street", "Town"]
fulladdress_column = " Address"
address_cols_to_concat = []
missing_postcodes_method = None
address1_method = None
landlord_year_built = "YEAR BUILT"
landlord_year_built = "Property Age"
landlord_os_uprn = None
landlord_property_type = "Property type"
landlord_wall_construction = "Wall Constuction"
landlord_heating_system = "Heating"
landlord_property_type = "Property Type"
landlord_wall_construction = "Landlord Wall Full"
landlord_heating_system = "Landlord Heating"
landlord_existing_pv = None
outcomes_filename = "ACIS Group - 25.11.2024 - outcomes.xlsx"
master_filename_eco3 = "ECO 3 -Table 1.csv"
master_filename_eco4 = "ECO 4 -Table 1.csv"
outcomes_filename = "plus dane outcomes.xlsx"
outcomes_sheetname = "EVERYTHING"
master_filepaths = ["JJC Rolling Master.csv", "SCIS Rolling Master.csv"]
# Maps addresses to uprn in problematic cases
manual_uprn_map = {}
@ -360,19 +382,18 @@ def app():
# We now flag properties that have been treated under existing programmes
asset_list.flag_outcomes(
outcomes_filepath=os.path.join(data_folder, outcomes_filename),
outcomes_sheetname="Feedback"
outcomes_sheetname=outcomes_sheetname
)
asset_list.flag_survey_master(
master_filepaths=[
os.path.join(data_folder, f) for f in [master_filename_eco3, master_filename_eco4] if f is not None
],
master_filepaths=master_filepaths
)
### We retrieve the EPC data
# We chunk up this data into 5000 rows at a time
# Create the chunks directory
epc_api_only = False
force_retrieve_data = False
skip = None # Used to skip already completed chunks
chunk_size = 5000
@ -400,6 +421,7 @@ def app():
df=chunk,
row_id_name=asset_list.DOMNA_PROPERTY_ID,
manual_uprn_map=manual_uprn_map,
epc_api_only=epc_api_only
)
# We now retrieve any failed properties
@ -408,7 +430,7 @@ def app():
df=chunk_failed,
row_id_name=asset_list.DOMNA_PROPERTY_ID,
manual_uprn_map=manual_uprn_map,
epc_api_only=False
epc_api_only=epc_api_only
)
epc_data_chunk.extend(epc_data_failed)

View file

@ -16,6 +16,7 @@ STANDARD_HEATING_SYSTEMS = {
"unknown",
"communal gas boiler",
"high heat retention storage heaters",
"room heaters"
}
HEATING_MAPPINGS = {
@ -69,5 +70,30 @@ HEATING_MAPPINGS = {
'Electric': 'electric storage heaters',
'Solid fuel': 'other',
'No Heat': 'unknown',
'GSHP': 'ground source heat pump'
'GSHP': 'ground source heat pump',
'Boiler Oil': 'oil boiler',
'Boiler Electricity': 'electric boiler',
'Boiler ND': 'unknown',
'ND Mains gas': 'unknown',
'Room heaters Mains gas': "room heaters",
'Heat pump (air) Electricity': 'air source heat pump',
'Room heaters Electricity': 'electric radiators',
'Room heaters Oil': 'room heaters',
'No heating system ND': 'unknown',
'Heat pump (wet) Electricity': 'ground source heat pump',
'Room heaters Biomass': 'room heaters',
'ND Solid fuel': 'unknown',
'Boiler Mains gas': 'gas combi boiler',
'Boiler LPG': 'boiler - other fuel',
'Room heaters Solid fuel': 'room heaters',
'ND ND': 'unknown',
'Storage heating Electricity': 'electric storage heaters',
'ND Electricity': 'unknown',
'Community heating Community (non-gas)': 'district heating',
'No heating system N/A': 'unknown',
'Boiler Solid fuel': 'boiler - other fuel',
'Community heating Community (mains gas)': 'communal gas boiler',
'Boiler Biomass': 'boiler - other fuel',
'No heating system Mains gas': 'unknown'
}

View file

@ -62,5 +62,6 @@ PROPERTY_MAPPING = {
'3 Bed First Floor Maisonette': 'maisonette',
'2 Bed 1st Floor Sheltered Flat': 'flat',
'1 Bed First Floor Flat': 'flat',
'3 Bed First Floor Flat': 'flat'
'3 Bed First Floor Flat': 'flat',
'ND': 'unknown'
}

View file

@ -100,5 +100,22 @@ WALL_CONSTRUCTION_MAPPINGS = {
'BRICK/BLOCK CAVITY': 'cavity unknown insulation',
'STONE SOLID': 'sandstone or limestone',
'EXT CLADDING SYSTEM': 'system built',
'BRICK/BLOCK SOLID': 'solid brick unknown insulation'
'BRICK/BLOCK SOLID': 'solid brick unknown insulation',
'Cavity Filled cavity (with internal/external)': 'filled cavity',
'ND (inferred) Filled cavity': 'filled cavity',
'Cavity Filled cavity': 'filled cavity',
'Cavity Unknown insulation': 'cavity unknown insulation',
'Timber frame As-built': 'timber frame',
'System build Unknown insulation': 'system built',
'Cavity As-built': 'unknown',
'System build External': 'system built',
'ND (inferred) ND (inferred)': 'unknown',
'Solid brick External': 'insulated solid brick',
'Cavity External': 'filled cavity',
'System build As-built': 'system built',
'Solid brick Internal': 'insulated solid brick',
'Cavity Internal': 'filled cavity',
'System build Internal': 'system built',
'Solid brick As-built': 'solid brick unknown insulation'
}