mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
working on plusdane matching
This commit is contained in:
parent
831abc884f
commit
c4eb72fb92
5 changed files with 147 additions and 34 deletions
|
|
@ -397,6 +397,13 @@ class AssetList:
|
|||
# Update the reference to landlord UPRn
|
||||
self.landlord_uprn = self.STANDARD_UPRN
|
||||
|
||||
# Handle the case when full address and address 1 are the same
|
||||
if self.full_address_colname == self.address1_colname:
|
||||
self.full_address_colname = self.STANDARD_FULL_ADDRESS
|
||||
self.standardised_asset_list[self.full_address_colname] = (
|
||||
self.standardised_asset_list[self.address1_colname].copy()
|
||||
)
|
||||
|
||||
def _extract_address1(self, asset_list, full_address_col, postcode_col, method="first_two_words"):
|
||||
|
||||
if method not in self.ADDRESS_1_CLEANING_METHODS:
|
||||
|
|
@ -632,7 +639,8 @@ class AssetList:
|
|||
known_errors = [
|
||||
"#MULTIVALUE",
|
||||
"This cell has an external reference that can't be shown or edited. Editing this cell will "
|
||||
"remove the external reference."
|
||||
"remove the external reference.",
|
||||
"ND"
|
||||
]
|
||||
|
||||
if pd.isnull(date_str) or date_str in known_errors:
|
||||
|
|
@ -642,6 +650,9 @@ class AssetList:
|
|||
match = re.match(r"\d{1,2}-[A-Za-z]{3}-(\d{4})", date_str)
|
||||
if match:
|
||||
return int(match.group(1)) # Extract the year and convert to integer
|
||||
if "-" in date_str:
|
||||
# We probably have a range
|
||||
return int(date_str.split("-")[1].strip())
|
||||
|
||||
if isinstance(date_str, datetime):
|
||||
return date_str.year
|
||||
|
|
@ -1853,7 +1864,7 @@ class AssetList:
|
|||
self.outcomes = pd.read_excel(outcomes_filepath, sheet_name=outcomes_sheetname)
|
||||
self.outcomes["row_id"] = self.outcomes.index
|
||||
|
||||
logger.info("Matching outcomes to ")
|
||||
logger.info("Matching outcomes to asset list")
|
||||
# Merge the outcomes onto the asset list - we check we're able to match sufficiently well
|
||||
lookup = []
|
||||
nomatch = []
|
||||
|
|
@ -1866,7 +1877,7 @@ class AssetList:
|
|||
].str.lower().str.replace(",", "").str.replace(" ", " ") == address_clean)
|
||||
]
|
||||
|
||||
if not matched.empty and matched.shape[0] == 1:
|
||||
if matched.shape[0] == 1:
|
||||
lookup.append(
|
||||
{
|
||||
"row_id": x["row_id"],
|
||||
|
|
@ -1875,6 +1886,42 @@ class AssetList:
|
|||
)
|
||||
continue
|
||||
|
||||
if "UPRN" in x:
|
||||
matched = self.standardised_asset_list[
|
||||
self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] == x["UPRN"]
|
||||
]
|
||||
|
||||
if matched.shape[0] == 1:
|
||||
lookup.append(
|
||||
{
|
||||
"row_id": x["row_id"],
|
||||
self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0]
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
matched = self.standardised_asset_list[
|
||||
(self.standardised_asset_list[self.STANDARD_POSTCODE] == x["Post Code"])
|
||||
].copy()
|
||||
if not matched.empty:
|
||||
matched["houseno"] = matched.apply(
|
||||
lambda x: SearchEpc.get_house_number(x[self.STANDARD_ADDRESS_1], x[self.STANDARD_POSTCODE]),
|
||||
axis=1
|
||||
)
|
||||
matched = matched[
|
||||
matched["houseno"].astype(str) == str(x["Numb."])
|
||||
]
|
||||
if matched.shape[0] == 1:
|
||||
lookup.append(
|
||||
{
|
||||
"row_id": x["row_id"],
|
||||
self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0]
|
||||
}
|
||||
)
|
||||
continue
|
||||
elif not matched.empty:
|
||||
raise NotImplementedError("Implement me - multiple matches on house number")
|
||||
|
||||
nomatch.append(x["row_id"])
|
||||
|
||||
self.outcomes_no_match = self.outcomes[self.outcomes["row_id"].isin(nomatch)]
|
||||
|
|
|
|||
|
|
@ -125,21 +125,22 @@ def get_data(
|
|||
no_epc.append(home[row_id_name])
|
||||
continue
|
||||
|
||||
if epc_api_only:
|
||||
epc = {
|
||||
row_id_name: home[row_id_name],
|
||||
**searcher.newest_epc.copy()
|
||||
}
|
||||
|
||||
epc_data.append(epc)
|
||||
continue
|
||||
|
||||
# Look for EPC recommendatons
|
||||
try:
|
||||
property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
|
||||
except:
|
||||
property_recommendations = {"rows": []}
|
||||
|
||||
if epc_api_only:
|
||||
epc = {
|
||||
row_id_name: home[row_id_name],
|
||||
**searcher.newest_epc.copy(),
|
||||
"recommendations": property_recommendations["rows"]
|
||||
}
|
||||
|
||||
epc_data.append(epc)
|
||||
continue
|
||||
|
||||
# Retrieve data from FindMyEPC
|
||||
try:
|
||||
find_epc_searcher = RetrieveFindMyEpc(
|
||||
|
|
@ -283,25 +284,46 @@ def app():
|
|||
# landlord_property_id = "Place ref"
|
||||
|
||||
# For ACIS - programme re-build
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/ACIS Full Programme Review March 2025"
|
||||
data_filename = "ACIS asset list.xlsx"
|
||||
sheet_name = "Assets"
|
||||
address1_column = "House No"
|
||||
postcode_column = "Postcode"
|
||||
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/ACIS Full Programme Review March 2025"
|
||||
# data_filename = "ACIS asset list.xlsx"
|
||||
# sheet_name = "Assets"
|
||||
# address1_column = "House No"
|
||||
# postcode_column = "Postcode"
|
||||
# landlord_property_id = "UPRN"
|
||||
# fulladdress_column = None
|
||||
# address_cols_to_concat = ["House No", "Street", "Town"]
|
||||
# missing_postcodes_method = None
|
||||
# address1_method = None
|
||||
# landlord_year_built = "YEAR BUILT"
|
||||
# landlord_os_uprn = None
|
||||
# landlord_property_type = "Property type"
|
||||
# landlord_wall_construction = "Wall Constuction"
|
||||
# landlord_heating_system = "Heating"
|
||||
# landlord_existing_pv = None
|
||||
# outcomes_filename = "ACIS Group - 25.11.2024 - outcomes.xlsx"
|
||||
# master_filename_eco3 = "ECO 3 -Table 1.csv"
|
||||
# master_filename_eco4 = "ECO 4 -Table 1.csv"
|
||||
|
||||
# For plus dane
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane"
|
||||
data_filename = "PLUS DANE Asset List - for analysis.xlsx"
|
||||
sheet_name = "Asset List"
|
||||
address1_column = " Address"
|
||||
postcode_column = " Postcode"
|
||||
landlord_property_id = "UPRN"
|
||||
fulladdress_column = None
|
||||
address_cols_to_concat = ["House No", "Street", "Town"]
|
||||
fulladdress_column = " Address"
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
address1_method = None
|
||||
landlord_year_built = "YEAR BUILT"
|
||||
landlord_year_built = "Property Age"
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Property type"
|
||||
landlord_wall_construction = "Wall Constuction"
|
||||
landlord_heating_system = "Heating"
|
||||
landlord_property_type = "Property Type"
|
||||
landlord_wall_construction = "Landlord Wall Full"
|
||||
landlord_heating_system = "Landlord Heating"
|
||||
landlord_existing_pv = None
|
||||
outcomes_filename = "ACIS Group - 25.11.2024 - outcomes.xlsx"
|
||||
master_filename_eco3 = "ECO 3 -Table 1.csv"
|
||||
master_filename_eco4 = "ECO 4 -Table 1.csv"
|
||||
outcomes_filename = "plus dane outcomes.xlsx"
|
||||
outcomes_sheetname = "EVERYTHING"
|
||||
master_filepaths = ["JJC Rolling Master.csv", "SCIS Rolling Master.csv"]
|
||||
|
||||
# Maps addresses to uprn in problematic cases
|
||||
manual_uprn_map = {}
|
||||
|
|
@ -360,19 +382,18 @@ def app():
|
|||
# We now flag properties that have been treated under existing programmes
|
||||
asset_list.flag_outcomes(
|
||||
outcomes_filepath=os.path.join(data_folder, outcomes_filename),
|
||||
outcomes_sheetname="Feedback"
|
||||
outcomes_sheetname=outcomes_sheetname
|
||||
)
|
||||
|
||||
asset_list.flag_survey_master(
|
||||
master_filepaths=[
|
||||
os.path.join(data_folder, f) for f in [master_filename_eco3, master_filename_eco4] if f is not None
|
||||
],
|
||||
master_filepaths=master_filepaths
|
||||
)
|
||||
|
||||
### We retrieve the EPC data
|
||||
|
||||
# We chunk up this data into 5000 rows at a time
|
||||
# Create the chunks directory
|
||||
epc_api_only = False
|
||||
force_retrieve_data = False
|
||||
skip = None # Used to skip already completed chunks
|
||||
chunk_size = 5000
|
||||
|
|
@ -400,6 +421,7 @@ def app():
|
|||
df=chunk,
|
||||
row_id_name=asset_list.DOMNA_PROPERTY_ID,
|
||||
manual_uprn_map=manual_uprn_map,
|
||||
epc_api_only=epc_api_only
|
||||
)
|
||||
|
||||
# We now retrieve any failed properties
|
||||
|
|
@ -408,7 +430,7 @@ def app():
|
|||
df=chunk_failed,
|
||||
row_id_name=asset_list.DOMNA_PROPERTY_ID,
|
||||
manual_uprn_map=manual_uprn_map,
|
||||
epc_api_only=False
|
||||
epc_api_only=epc_api_only
|
||||
)
|
||||
|
||||
epc_data_chunk.extend(epc_data_failed)
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ STANDARD_HEATING_SYSTEMS = {
|
|||
"unknown",
|
||||
"communal gas boiler",
|
||||
"high heat retention storage heaters",
|
||||
"room heaters"
|
||||
}
|
||||
|
||||
HEATING_MAPPINGS = {
|
||||
|
|
@ -69,5 +70,30 @@ HEATING_MAPPINGS = {
|
|||
'Electric': 'electric storage heaters',
|
||||
'Solid fuel': 'other',
|
||||
'No Heat': 'unknown',
|
||||
'GSHP': 'ground source heat pump'
|
||||
'GSHP': 'ground source heat pump',
|
||||
|
||||
'Boiler Oil': 'oil boiler',
|
||||
'Boiler Electricity': 'electric boiler',
|
||||
'Boiler ND': 'unknown',
|
||||
'ND Mains gas': 'unknown',
|
||||
'Room heaters Mains gas': "room heaters",
|
||||
'Heat pump (air) Electricity': 'air source heat pump',
|
||||
'Room heaters Electricity': 'electric radiators',
|
||||
'Room heaters Oil': 'room heaters',
|
||||
'No heating system ND': 'unknown',
|
||||
'Heat pump (wet) Electricity': 'ground source heat pump',
|
||||
'Room heaters Biomass': 'room heaters',
|
||||
'ND Solid fuel': 'unknown',
|
||||
'Boiler Mains gas': 'gas combi boiler',
|
||||
'Boiler LPG': 'boiler - other fuel',
|
||||
'Room heaters Solid fuel': 'room heaters',
|
||||
'ND ND': 'unknown',
|
||||
'Storage heating Electricity': 'electric storage heaters',
|
||||
'ND Electricity': 'unknown',
|
||||
'Community heating Community (non-gas)': 'district heating',
|
||||
'No heating system N/A': 'unknown',
|
||||
'Boiler Solid fuel': 'boiler - other fuel',
|
||||
'Community heating Community (mains gas)': 'communal gas boiler',
|
||||
'Boiler Biomass': 'boiler - other fuel',
|
||||
'No heating system Mains gas': 'unknown'
|
||||
}
|
||||
|
|
|
|||
|
|
@ -62,5 +62,6 @@ PROPERTY_MAPPING = {
|
|||
'3 Bed First Floor Maisonette': 'maisonette',
|
||||
'2 Bed 1st Floor Sheltered Flat': 'flat',
|
||||
'1 Bed First Floor Flat': 'flat',
|
||||
'3 Bed First Floor Flat': 'flat'
|
||||
'3 Bed First Floor Flat': 'flat',
|
||||
'ND': 'unknown'
|
||||
}
|
||||
|
|
|
|||
|
|
@ -100,5 +100,22 @@ WALL_CONSTRUCTION_MAPPINGS = {
|
|||
'BRICK/BLOCK CAVITY': 'cavity unknown insulation',
|
||||
'STONE SOLID': 'sandstone or limestone',
|
||||
'EXT CLADDING SYSTEM': 'system built',
|
||||
'BRICK/BLOCK SOLID': 'solid brick unknown insulation'
|
||||
'BRICK/BLOCK SOLID': 'solid brick unknown insulation',
|
||||
|
||||
'Cavity Filled cavity (with internal/external)': 'filled cavity',
|
||||
'ND (inferred) Filled cavity': 'filled cavity',
|
||||
'Cavity Filled cavity': 'filled cavity',
|
||||
'Cavity Unknown insulation': 'cavity unknown insulation',
|
||||
'Timber frame As-built': 'timber frame',
|
||||
'System build Unknown insulation': 'system built',
|
||||
'Cavity As-built': 'unknown',
|
||||
'System build External': 'system built',
|
||||
'ND (inferred) ND (inferred)': 'unknown',
|
||||
'Solid brick External': 'insulated solid brick',
|
||||
'Cavity External': 'filled cavity',
|
||||
'System build As-built': 'system built',
|
||||
'Solid brick Internal': 'insulated solid brick',
|
||||
'Cavity Internal': 'filled cavity',
|
||||
'System build Internal': 'system built',
|
||||
'Solid brick As-built': 'solid brick unknown insulation'
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue