mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
debugging remapper
This commit is contained in:
parent
ecf8e46c65
commit
978deb286b
6 changed files with 63 additions and 18 deletions
|
|
@ -27,8 +27,8 @@ class DataRemapper:
|
|||
:param standard_values: Set of allowed standardized values.
|
||||
:param standard_map: Dictionary of common remappings {raw_value: standard_value}.
|
||||
"""
|
||||
self.standard_values = {v.lower() for v in standard_values} # Normalize to lowercase
|
||||
self.standard_map = {k.lower(): v.lower() for k, v in (standard_map or {}).items()} # Predefined mappings
|
||||
self.standard_values = standard_values
|
||||
self.standard_map = standard_map
|
||||
self.fuzzy_threshold = 90 # Adjust fuzzy matching sensitivity
|
||||
self.ai_model = "gpt-4-turbo" # Use gpt-3.5-turbo for cheaper processing
|
||||
|
||||
|
|
@ -39,7 +39,7 @@ class DataRemapper:
|
|||
self.total_tokens_used = 0
|
||||
self.total_cost = 0
|
||||
self.remap_dict = {} # {original_value: standardized_value}
|
||||
self.max_tokens = 1000 # Limit for OpenAI API
|
||||
self.max_tokens = max_tokens # Limit for OpenAI API
|
||||
|
||||
# Memoization for AI calls
|
||||
self.ai_cache = {} # {tuple(unmapped_values): {original_value: standardized_value}}
|
||||
|
|
@ -61,6 +61,8 @@ class DataRemapper:
|
|||
return None
|
||||
text = text.strip().lower()
|
||||
text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
|
||||
# Replace double strings
|
||||
text = re.sub(r'\s+', ' ', text)
|
||||
return text
|
||||
|
||||
def fuzzy_match(self, text):
|
||||
|
|
@ -106,6 +108,7 @@ class DataRemapper:
|
|||
if input_tokens > self.max_tokens:
|
||||
raise ValueError("Input tokens exceed the maximum limit.")
|
||||
|
||||
logger.info("Calling OpenAI API for standardization...")
|
||||
response = self.openai_client.chat.completions.create(
|
||||
model=self.ai_model,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
|
|
@ -156,8 +159,14 @@ class DataRemapper:
|
|||
cleaned_value = self.clean_string(value)
|
||||
|
||||
# Rule-Based Check (Predefined Mapping)
|
||||
if cleaned_value in self.standard_map:
|
||||
self.remap_dict[value] = self.standard_map[cleaned_value]
|
||||
if cleaned_value in self.standard_map or value in self.standard_map:
|
||||
self.remap_dict[value] = (
|
||||
self.standard_map[cleaned_value] if cleaned_value in self.standard_map else self.standard_map[value]
|
||||
)
|
||||
continue
|
||||
|
||||
if value.lower() in self.standard_map:
|
||||
self.remap_dict[value] = self.standard_map[value.lower()]
|
||||
continue
|
||||
|
||||
# Exact Match in Standard Values
|
||||
|
|
|
|||
|
|
@ -5,4 +5,8 @@ STANDARD_EXISTING_PV = {
|
|||
EXISTING_PV_MAPPINGS = {
|
||||
"NO": "no PV",
|
||||
"YES": "already has PV",
|
||||
"no": "no PV",
|
||||
"yes": "already has PV",
|
||||
True: "already has PV",
|
||||
False: "no PV",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -34,13 +34,12 @@ HEATING_MAPPINGS = {
|
|||
"Eco Electric Radiators": "electric radiators",
|
||||
"Gas fire": "other",
|
||||
"Backboiler - Solid fuel": "other",
|
||||
'combi - gas': 'gas combi boiler', 'e7 storage heaters': 'electric storage heaters',
|
||||
'district heating system': 'district heating', 'condensing boiler - gas': 'gas condensing boiler',
|
||||
'boiler oil/other': 'oil boiler', 'condensing combi - gas': 'gas condensing combi',
|
||||
'air source source heat pump': 'air source heat pump', 'biomass boiler': 'boiler - other fuel',
|
||||
'ground source heat pump': 'ground source heat pump', 'electric oil filled radiators': 'electric radiators',
|
||||
'solid fuel': 'other', 'lpg boiler': 'boiler - other fuel', 'electric boiler': 'electric boiler',
|
||||
'no data': 'unknown', 'boiler communal/commercial - gas': 'communal gas boiler',
|
||||
'eco electric radiators': 'electric radiators', 'gas fire': 'other', 'backboiler - solid fuel': 'other',
|
||||
}
|
||||
|
||||
# array(['Combi - GAS', 'E7 Storage Heaters', 'District heating system',
|
||||
# 'Condensing Boiler - GAS', 'Boiler Oil/other',
|
||||
# 'Condensing Combi - Gas', 'Air Source Source Heat Pump',
|
||||
# 'Biomass Boiler', 'Ground Source Heat Pump',
|
||||
# 'Electric Oil filled radiators', 'Solid Fuel', 'LPG Boiler',
|
||||
# 'Electric Boiler', 'No data', 'Boiler Communal/Commercial - GAS',
|
||||
# 'Eco Electric Radiators', 'Gas fire', 'Backboiler - Solid fuel'],
|
||||
# dtype=object)
|
||||
|
|
|
|||
|
|
@ -11,6 +11,8 @@ PROPERTY_MAPPING = {
|
|||
"MAISONET": "maisonette",
|
||||
"BUNGALOW": "bungalow",
|
||||
"BLKHOUS": "block house",
|
||||
"blkhous": "block house",
|
||||
"BEDSIT": "bedsit",
|
||||
"COACHSE": "coach house",
|
||||
"coachse": "coach house",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
from asset_list.AssetList import DataRemapper
|
||||
|
||||
STANDARD_WALL_CONSTRUCTIONS = {
|
||||
"uninsulated cavity", "filled cavity", "partial insulated cavity", "timber frame", "solid brick",
|
||||
"system built", "granite or whinstone", "other", "unknown", "sandstone or limestone", "cob",
|
||||
|
|
@ -18,6 +20,7 @@ WALL_CONSTRUCTION_MAPPINGS = {
|
|||
'Average thermal transmittance 0.15 W/m-¦K': 'unknown', 'Average thermal transmittance 0.23 W/m-¦K': 'unknown',
|
||||
'Average thermal transmittance 0.18 W/m?K': 'unknown',
|
||||
'Granite or whin, with internal insulation': 'granite or whinstone',
|
||||
"Granite or whinstone, as built, insulated (assumed)": "granite or whinstone",
|
||||
'Average thermal transmittance 0.22 W/m-¦K': 'unknown', 'Average thermal transmittance 0.24 W/m?K': 'unknown',
|
||||
'Average thermal transmittance 0.16 W/m-¦K': 'unknown', 'Average thermal transmittance 0.35 W/m?K': 'unknown',
|
||||
'Average thermal transmittance 0.26 W/m-¦K': 'unknown', 'Average thermal transmittance 0.62 W/m?K': 'unknown',
|
||||
|
|
@ -34,5 +37,34 @@ WALL_CONSTRUCTION_MAPPINGS = {
|
|||
'Average thermal transmittance 0.23 W/m?K': 'unknown', 'Average thermal transmittance 0.20 W/m?K': 'unknown',
|
||||
'Average thermal transmittance 0.32 W/m?K': 'unknown', 'Average thermal transmittance 0.24 W/m-¦K': 'unknown',
|
||||
'Cavity wall, with internal insulation': 'filled cavity',
|
||||
'Average thermal transmittance 0.17 W/m-¦K': 'unknown', 'Average thermal transmittance 0.28 W/m?K': 'unknown'
|
||||
'Average thermal transmittance 0.17 W/m-¦K': 'unknown', 'Average thermal transmittance 0.28 W/m?K': 'unknown',
|
||||
'new build - average thermal transmittance': 'new build - average thermal transmittance',
|
||||
'average thermal transmittance 0.25 w/m?k': 'unknown',
|
||||
'cavity wall, as built, insulated (assumed)': 'filled cavity',
|
||||
'average thermal transmittance 0.31 w/m?k': 'unknown',
|
||||
'cavity wall, as built, no insulation (assumed)': 'uninsulated cavity',
|
||||
'average thermal transmittance 0.30 w/m?k': 'unknown', 'average thermal transmittance 0.28 w/m-¦k': 'unknown',
|
||||
'average thermal transmittance 0.25 w/m-¦k': 'unknown', 'average thermal transmittance 0.21 w/m-¦k': 'unknown',
|
||||
'average thermal transmittance 0.20 w/m-¦k': 'unknown', 'average thermal transmittance 0.29 w/m?k': 'unknown',
|
||||
'average thermal transmittance 0.16 w/m?k': 'unknown', 'average thermal transmittance 0.27 w/m²k': 'unknown',
|
||||
'average thermal transmittance 0.15 w/m-¦k': 'unknown', 'average thermal transmittance 0.23 w/m-¦k': 'unknown',
|
||||
'average thermal transmittance 0.18 w/m?k': 'unknown',
|
||||
'granite or whin, with internal insulation': 'granite or whinstone',
|
||||
'average thermal transmittance 0.22 w/m-¦k': 'unknown', 'average thermal transmittance 0.24 w/m?k': 'unknown',
|
||||
'average thermal transmittance 0.16 w/m-¦k': 'unknown', 'average thermal transmittance 0.35 w/m?k': 'unknown',
|
||||
'average thermal transmittance 0.26 w/m-¦k': 'unknown', 'average thermal transmittance 0.62 w/m?k': 'unknown',
|
||||
'average thermal transmittance 0.64 w/m?k': 'unknown', 'average thermal transmittance 0.61 w/m?k': 'unknown',
|
||||
'sandstone or limestone, as built, no insulation (assumed)': 'sandstone or limestone',
|
||||
'average thermal transmittance 0.33 w/m?k': 'unknown', 'cavity wall,': 'unknown',
|
||||
'cavity wall, as built, partial insulation (assumed)': 'partial insulated cavity',
|
||||
'average thermal transmittance 0.29 w/m-¦k': 'unknown', 'average thermal transmittance 0.32 w/m-¦k': 'unknown',
|
||||
'average thermal transmittance 0.19 w/m-¦k': 'unknown', 'average thermal transmittance 0.27 w/m?k': 'unknown',
|
||||
'average thermal transmittance 0.22 w/m?k': 'unknown', 'average thermal transmittance 0.38 w/m?k': 'unknown',
|
||||
'average thermal transmittance 0.26 w/m?k': 'unknown', 'average thermal transmittance 0.27 w/m-¦k': 'unknown',
|
||||
'average thermal transmittance 0.18 w/m-¦k': 'unknown', 'average thermal transmittance = 0.27 w/m?k': 'unknown',
|
||||
'cavity wall, with external insulation': 'filled cavity', 'average thermal transmittance 0.21 w/m?k': 'unknown',
|
||||
'average thermal transmittance 0.23 w/m?k': 'unknown', 'average thermal transmittance 0.20 w/m?k': 'unknown',
|
||||
'average thermal transmittance 0.32 w/m?k': 'unknown', 'average thermal transmittance 0.24 w/m-¦k': 'unknown',
|
||||
'cavity wall, with internal insulation': 'filled cavity', 'average thermal transmittance 0.17 w/m-¦k': 'unknown',
|
||||
'average thermal transmittance 0.28 w/m?k': 'unknown',
|
||||
}
|
||||
|
|
|
|||
|
|
@ -346,7 +346,7 @@ def app():
|
|||
|
||||
invalid_property_types_dictionary = ["bedsit", "bed-sit", "bed sit"]
|
||||
|
||||
self = AssetList(
|
||||
asset_list = AssetList(
|
||||
local_filepath=os.path.join(DATA_FOLDER, DATA_FILENAME),
|
||||
header=0,
|
||||
sheet_name=SHEET_NAME,
|
||||
|
|
@ -364,8 +364,7 @@ def app():
|
|||
landlord_heating_system="Heat Source",
|
||||
landlord_existing_pv="PV (Y/N)"
|
||||
)
|
||||
self.init_standardise(
|
||||
)
|
||||
asset_list.init_standardise()
|
||||
|
||||
self.apply_transformations()
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue