debugging remapper

This commit is contained in:
Khalim Conn-Kowlessar 2025-02-19 22:32:05 +00:00
parent ecf8e46c65
commit 978deb286b
6 changed files with 63 additions and 18 deletions

View file

@ -27,8 +27,8 @@ class DataRemapper:
:param standard_values: Set of allowed standardized values.
:param standard_map: Dictionary of common remappings {raw_value: standard_value}.
"""
self.standard_values = {v.lower() for v in standard_values} # Normalize to lowercase
self.standard_map = {k.lower(): v.lower() for k, v in (standard_map or {}).items()} # Predefined mappings
self.standard_values = standard_values
self.standard_map = standard_map
self.fuzzy_threshold = 90 # Adjust fuzzy matching sensitivity
self.ai_model = "gpt-4-turbo" # Use gpt-3.5-turbo for cheaper processing
@ -39,7 +39,7 @@ class DataRemapper:
self.total_tokens_used = 0
self.total_cost = 0
self.remap_dict = {} # {original_value: standardized_value}
self.max_tokens = 1000 # Limit for OpenAI API
self.max_tokens = max_tokens # Limit for OpenAI API
# Memoization for AI calls
self.ai_cache = {} # {tuple(unmapped_values): {original_value: standardized_value}}
@ -61,6 +61,8 @@ class DataRemapper:
return None
text = text.strip().lower()
text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
# Replace double strings
text = re.sub(r'\s+', ' ', text)
return text
def fuzzy_match(self, text):
@ -106,6 +108,7 @@ class DataRemapper:
if input_tokens > self.max_tokens:
raise ValueError("Input tokens exceed the maximum limit.")
logger.info("Calling OpenAI API for standardization...")
response = self.openai_client.chat.completions.create(
model=self.ai_model,
messages=[{"role": "user", "content": prompt}],
@ -156,8 +159,14 @@ class DataRemapper:
cleaned_value = self.clean_string(value)
# Rule-Based Check (Predefined Mapping)
if cleaned_value in self.standard_map:
self.remap_dict[value] = self.standard_map[cleaned_value]
if cleaned_value in self.standard_map or value in self.standard_map:
self.remap_dict[value] = (
self.standard_map[cleaned_value] if cleaned_value in self.standard_map else self.standard_map[value]
)
continue
if value.lower() in self.standard_map:
self.remap_dict[value] = self.standard_map[value.lower()]
continue
# Exact Match in Standard Values

View file

@ -5,4 +5,8 @@ STANDARD_EXISTING_PV = {
EXISTING_PV_MAPPINGS = {
"NO": "no PV",
"YES": "already has PV",
"no": "no PV",
"yes": "already has PV",
True: "already has PV",
False: "no PV",
}

View file

@ -34,13 +34,12 @@ HEATING_MAPPINGS = {
"Eco Electric Radiators": "electric radiators",
"Gas fire": "other",
"Backboiler - Solid fuel": "other",
'combi - gas': 'gas combi boiler', 'e7 storage heaters': 'electric storage heaters',
'district heating system': 'district heating', 'condensing boiler - gas': 'gas condensing boiler',
'boiler oil/other': 'oil boiler', 'condensing combi - gas': 'gas condensing combi',
'air source source heat pump': 'air source heat pump', 'biomass boiler': 'boiler - other fuel',
'ground source heat pump': 'ground source heat pump', 'electric oil filled radiators': 'electric radiators',
'solid fuel': 'other', 'lpg boiler': 'boiler - other fuel', 'electric boiler': 'electric boiler',
'no data': 'unknown', 'boiler communal/commercial - gas': 'communal gas boiler',
'eco electric radiators': 'electric radiators', 'gas fire': 'other', 'backboiler - solid fuel': 'other',
}
# array(['Combi - GAS', 'E7 Storage Heaters', 'District heating system',
# 'Condensing Boiler - GAS', 'Boiler Oil/other',
# 'Condensing Combi - Gas', 'Air Source Source Heat Pump',
# 'Biomass Boiler', 'Ground Source Heat Pump',
# 'Electric Oil filled radiators', 'Solid Fuel', 'LPG Boiler',
# 'Electric Boiler', 'No data', 'Boiler Communal/Commercial - GAS',
# 'Eco Electric Radiators', 'Gas fire', 'Backboiler - Solid fuel'],
# dtype=object)

View file

@ -11,6 +11,8 @@ PROPERTY_MAPPING = {
"MAISONET": "maisonette",
"BUNGALOW": "bungalow",
"BLKHOUS": "block house",
"blkhous": "block house",
"BEDSIT": "bedsit",
"COACHSE": "coach house",
"coachse": "coach house",
}

View file

@ -1,3 +1,5 @@
from asset_list.AssetList import DataRemapper
STANDARD_WALL_CONSTRUCTIONS = {
"uninsulated cavity", "filled cavity", "partial insulated cavity", "timber frame", "solid brick",
"system built", "granite or whinstone", "other", "unknown", "sandstone or limestone", "cob",
@ -18,6 +20,7 @@ WALL_CONSTRUCTION_MAPPINGS = {
'Average thermal transmittance 0.15 W/m-¦K': 'unknown', 'Average thermal transmittance 0.23 W/m-¦K': 'unknown',
'Average thermal transmittance 0.18 W/m?K': 'unknown',
'Granite or whin, with internal insulation': 'granite or whinstone',
"Granite or whinstone, as built, insulated (assumed)": "granite or whinstone",
'Average thermal transmittance 0.22 W/m-¦K': 'unknown', 'Average thermal transmittance 0.24 W/m?K': 'unknown',
'Average thermal transmittance 0.16 W/m-¦K': 'unknown', 'Average thermal transmittance 0.35 W/m?K': 'unknown',
'Average thermal transmittance 0.26 W/m-¦K': 'unknown', 'Average thermal transmittance 0.62 W/m?K': 'unknown',
@ -34,5 +37,34 @@ WALL_CONSTRUCTION_MAPPINGS = {
'Average thermal transmittance 0.23 W/m?K': 'unknown', 'Average thermal transmittance 0.20 W/m?K': 'unknown',
'Average thermal transmittance 0.32 W/m?K': 'unknown', 'Average thermal transmittance 0.24 W/m-¦K': 'unknown',
'Cavity wall, with internal insulation': 'filled cavity',
'Average thermal transmittance 0.17 W/m-¦K': 'unknown', 'Average thermal transmittance 0.28 W/m?K': 'unknown'
'Average thermal transmittance 0.17 W/m-¦K': 'unknown', 'Average thermal transmittance 0.28 W/m?K': 'unknown',
'new build - average thermal transmittance': 'new build - average thermal transmittance',
'average thermal transmittance 0.25 w/m?k': 'unknown',
'cavity wall, as built, insulated (assumed)': 'filled cavity',
'average thermal transmittance 0.31 w/m?k': 'unknown',
'cavity wall, as built, no insulation (assumed)': 'uninsulated cavity',
'average thermal transmittance 0.30 w/m?k': 'unknown', 'average thermal transmittance 0.28 w/m-¦k': 'unknown',
'average thermal transmittance 0.25 w/m-¦k': 'unknown', 'average thermal transmittance 0.21 w/m-¦k': 'unknown',
'average thermal transmittance 0.20 w/m-¦k': 'unknown', 'average thermal transmittance 0.29 w/m?k': 'unknown',
'average thermal transmittance 0.16 w/m?k': 'unknown', 'average thermal transmittance 0.27 w/m²k': 'unknown',
'average thermal transmittance 0.15 w/m-¦k': 'unknown', 'average thermal transmittance 0.23 w/m-¦k': 'unknown',
'average thermal transmittance 0.18 w/m?k': 'unknown',
'granite or whin, with internal insulation': 'granite or whinstone',
'average thermal transmittance 0.22 w/m-¦k': 'unknown', 'average thermal transmittance 0.24 w/m?k': 'unknown',
'average thermal transmittance 0.16 w/m-¦k': 'unknown', 'average thermal transmittance 0.35 w/m?k': 'unknown',
'average thermal transmittance 0.26 w/m-¦k': 'unknown', 'average thermal transmittance 0.62 w/m?k': 'unknown',
'average thermal transmittance 0.64 w/m?k': 'unknown', 'average thermal transmittance 0.61 w/m?k': 'unknown',
'sandstone or limestone, as built, no insulation (assumed)': 'sandstone or limestone',
'average thermal transmittance 0.33 w/m?k': 'unknown', 'cavity wall,': 'unknown',
'cavity wall, as built, partial insulation (assumed)': 'partial insulated cavity',
'average thermal transmittance 0.29 w/m-¦k': 'unknown', 'average thermal transmittance 0.32 w/m-¦k': 'unknown',
'average thermal transmittance 0.19 w/m-¦k': 'unknown', 'average thermal transmittance 0.27 w/m?k': 'unknown',
'average thermal transmittance 0.22 w/m?k': 'unknown', 'average thermal transmittance 0.38 w/m?k': 'unknown',
'average thermal transmittance 0.26 w/m?k': 'unknown', 'average thermal transmittance 0.27 w/m-¦k': 'unknown',
'average thermal transmittance 0.18 w/m-¦k': 'unknown', 'average thermal transmittance = 0.27 w/m?k': 'unknown',
'cavity wall, with external insulation': 'filled cavity', 'average thermal transmittance 0.21 w/m?k': 'unknown',
'average thermal transmittance 0.23 w/m?k': 'unknown', 'average thermal transmittance 0.20 w/m?k': 'unknown',
'average thermal transmittance 0.32 w/m?k': 'unknown', 'average thermal transmittance 0.24 w/m-¦k': 'unknown',
'cavity wall, with internal insulation': 'filled cavity', 'average thermal transmittance 0.17 w/m-¦k': 'unknown',
'average thermal transmittance 0.28 w/m?k': 'unknown',
}

View file

@ -346,7 +346,7 @@ def app():
invalid_property_types_dictionary = ["bedsit", "bed-sit", "bed sit"]
self = AssetList(
asset_list = AssetList(
local_filepath=os.path.join(DATA_FOLDER, DATA_FILENAME),
header=0,
sheet_name=SHEET_NAME,
@ -364,8 +364,7 @@ def app():
landlord_heating_system="Heat Source",
landlord_existing_pv="PV (Y/N)"
)
self.init_standardise(
)
asset_list.init_standardise()
self.apply_transformations()