mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
attributes welsh translations and covering some missing cases
This commit is contained in:
parent
8f021cb266
commit
ad3ee6dde5
8 changed files with 83 additions and 8 deletions
|
|
@ -36,7 +36,7 @@ def app():
|
||||||
cleaner = EpcClean([])
|
cleaner = EpcClean([])
|
||||||
|
|
||||||
epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
|
epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
|
||||||
for directory in tqdm(epc_directories[140:]):
|
for directory in tqdm(epc_directories):
|
||||||
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
|
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
|
||||||
# Rename the columns to the same format as the api returns
|
# Rename the columns to the same format as the api returns
|
||||||
data.columns = [c.replace("_", "-").lower() for c in data.columns]
|
data.columns = [c.replace("_", "-").lower() for c in data.columns]
|
||||||
|
|
|
||||||
|
|
@ -108,7 +108,9 @@ class HotWaterAttributes(Definitions):
|
||||||
"popty estynedig olew, dim thermostat ar y silindr": "oil range cooker, no cylinder thermostat",
|
"popty estynedig olew, dim thermostat ar y silindr": "oil range cooker, no cylinder thermostat",
|
||||||
"cynllun cymunedol": "community scheme",
|
"cynllun cymunedol": "community scheme",
|
||||||
"nwy wrth fwy nag un pwynt": "gas multipoint",
|
"nwy wrth fwy nag un pwynt": "gas multipoint",
|
||||||
"popty estynedig olew": "oil range cooker"
|
"popty estynedig olew": "oil range cooker",
|
||||||
|
"dim system ar gael rhagdybir bod twymwr tanddwr trydan": "no system present electric immersion assumed",
|
||||||
|
"o'r brif system, dim thermostat ar y silindr": "from main system, no cylinder thermostat"
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, description: str):
|
def __init__(self, description: str):
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,7 @@ class MainHeatAttributes(Definitions):
|
||||||
"dim system ar gael, rhagdybir bod gwresogyddion trydan": "no system present, electric heaters assumed",
|
"dim system ar gael, rhagdybir bod gwresogyddion trydan": "no system present, electric heaters assumed",
|
||||||
"gwresogyddion ystafell, glo carreg": "room heaters, coal",
|
"gwresogyddion ystafell, glo carreg": "room heaters, coal",
|
||||||
"pwmp gwres sygçön tarddu yn yr awyr, rheiddiaduron, trydan": "air source heat pump, radiators, electric",
|
"pwmp gwres sygçön tarddu yn yr awyr, rheiddiaduron, trydan": "air source heat pump, radiators, electric",
|
||||||
|
"gwresogyddion ystafell, nwy prif gyflenwad": "room heaters, mains gas",
|
||||||
}
|
}
|
||||||
|
|
||||||
REMAP = {
|
REMAP = {
|
||||||
|
|
@ -81,6 +82,22 @@ class MainHeatAttributes(Definitions):
|
||||||
):
|
):
|
||||||
raise ValueError('Invalid description')
|
raise ValueError('Invalid description')
|
||||||
|
|
||||||
|
def process_edge_cases(self, result) -> (dict, bool):
|
||||||
|
"""
|
||||||
|
We handle some edge cases that will cause issues, for example descriptions that are missing a
|
||||||
|
heating system
|
||||||
|
:return: truple containing dictionary result, and boolean is_edge_case
|
||||||
|
"""
|
||||||
|
|
||||||
|
edge_cases = [", underfloor, electric"]
|
||||||
|
if self.description not in edge_cases:
|
||||||
|
return result, False
|
||||||
|
|
||||||
|
if self.description == ", underfloor, electric":
|
||||||
|
result["has_electric"] = True
|
||||||
|
result['has_underfloor_heating'] = True
|
||||||
|
return result, True
|
||||||
|
|
||||||
def process(self) -> Dict[str, Union[str, bool]]:
|
def process(self) -> Dict[str, Union[str, bool]]:
|
||||||
|
|
||||||
result: Dict[str, Union[str, bool]] = {f'has_{ds.replace(" ", "_")}': False for ds in self.DISTRIBUTION_SYSTEMS}
|
result: Dict[str, Union[str, bool]] = {f'has_{ds.replace(" ", "_")}': False for ds in self.DISTRIBUTION_SYSTEMS}
|
||||||
|
|
@ -92,11 +109,17 @@ class MainHeatAttributes(Definitions):
|
||||||
if self.nodata:
|
if self.nodata:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
result, is_edge_case = self.process_edge_cases(result)
|
||||||
|
if is_edge_case:
|
||||||
|
return result
|
||||||
|
|
||||||
description = self.description.split(',')
|
description = self.description.split(',')
|
||||||
|
|
||||||
# Process each part separately
|
# Process each part separately
|
||||||
for part in description:
|
for part in description:
|
||||||
part = part.strip() # remove leading/trailing white spaces
|
part = part.strip() # remove leading/trailing white spaces
|
||||||
|
if not part:
|
||||||
|
continue
|
||||||
|
|
||||||
# Heating Systems
|
# Heating Systems
|
||||||
process_part(result, part, self.HEAT_SYSTEMS, 'has_')
|
process_part(result, part, self.HEAT_SYSTEMS, 'has_')
|
||||||
|
|
|
||||||
|
|
@ -53,17 +53,24 @@ class RoofAttributes(Definitions):
|
||||||
search for regular expressions and translate
|
search for regular expressions and translate
|
||||||
"""
|
"""
|
||||||
|
|
||||||
insulation_thickness_match = re.search(r"ar oleddf, (\d+ mm) o inswleiddio yn y llofft", self.description)
|
loft_insulation_thickness_match = re.search(r"ar oleddf, (\d+ mm) o inswleiddio yn y llofft", self.description)
|
||||||
insulation_thickness_match2 = re.search(r"ar oleddf, (\d+ mm) lo inswleiddio yn y llof", self.description)
|
loft_insulation_thickness_match2 = re.search(r"ar oleddf, (\d+ mm) lo inswleiddio yn y llof", self.description)
|
||||||
|
loft_insulation_thickness_match3 = re.search(r"ar oleddf, (\d+\+ mm) lo inswleiddio yn y llof",
|
||||||
|
self.description)
|
||||||
|
|
||||||
uvalue_search = re.search(r"trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m-¦k", self.description)
|
uvalue_search = re.search(r"trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m-¦k", self.description)
|
||||||
|
|
||||||
# Step 2: Generalized translation with placeholder
|
# Step 2: Generalized translation with placeholder
|
||||||
if (insulation_thickness_match is not None) | (insulation_thickness_match2 is not None):
|
if (loft_insulation_thickness_match is not None) | \
|
||||||
if insulation_thickness_match is not None:
|
(loft_insulation_thickness_match2 is not None) | \
|
||||||
insulation_thickness = insulation_thickness_match.group(1)
|
(loft_insulation_thickness_match3 is not None):
|
||||||
|
if loft_insulation_thickness_match is not None:
|
||||||
|
insulation_thickness = loft_insulation_thickness_match.group(1)
|
||||||
|
elif loft_insulation_thickness_match2 is not None:
|
||||||
|
insulation_thickness = loft_insulation_thickness_match2.group(1)
|
||||||
else:
|
else:
|
||||||
insulation_thickness = insulation_thickness_match2.group(1)
|
insulation_thickness = loft_insulation_thickness_match3.group(1)
|
||||||
|
|
||||||
self.description = f"pitched, {insulation_thickness} loft insulation"
|
self.description = f"pitched, {insulation_thickness} loft insulation"
|
||||||
elif uvalue_search:
|
elif uvalue_search:
|
||||||
uvalue = uvalue_search.group(1)
|
uvalue = uvalue_search.group(1)
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,9 @@ class WindowAttributes(Definitions):
|
||||||
"rhai gwydrau dwbl": "some double glazing",
|
"rhai gwydrau dwbl": "some double glazing",
|
||||||
"gwydrau sengl": "single glazed",
|
"gwydrau sengl": "single glazed",
|
||||||
"ffenestri perfformiad uchel": "high performance glazing",
|
"ffenestri perfformiad uchel": "high performance glazing",
|
||||||
|
"gwydrau triphlyg llawn": "fully triple glazed",
|
||||||
|
"gwydrau triphlyg rhannol": "partial triple glazed",
|
||||||
|
"gwydrau triphlyg mwyaf": "mostly triple glazed",
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, description: str):
|
def __init__(self, description: str):
|
||||||
|
|
|
||||||
|
|
@ -206,4 +206,13 @@ hotwater_cases = [
|
||||||
'tariff_type': None,
|
'tariff_type': None,
|
||||||
'extra_features': None, 'chp_systems': None, 'distribution_system': None, 'no_system_present': None,
|
'extra_features': None, 'chp_systems': None, 'distribution_system': None, 'no_system_present': None,
|
||||||
'assumed': False, "appliance": "oil range cooker"},
|
'assumed': False, "appliance": "oil range cooker"},
|
||||||
|
{'original_description': 'Dim system ar gael: rhagdybir bod twymwr tanddwr trydan',
|
||||||
|
'heater_type': 'electric immersion',
|
||||||
|
'system_type': None, 'thermostat_characteristics': None, 'heating_scope': None, 'energy_recovery': None,
|
||||||
|
'tariff_type': None, 'extra_features': None, 'chp_systems': None, 'distribution_system': None,
|
||||||
|
'no_system_present': 'no system present', 'assumed': True, "appliance": None},
|
||||||
|
{'original_description': "O'r brif system, dim thermostat ar y silindr", 'heater_type': None,
|
||||||
|
'system_type': 'from main system', 'thermostat_characteristics': 'no cylinder thermostat', 'heating_scope': None,
|
||||||
|
'energy_recovery': None, 'tariff_type': None, 'extra_features': None, 'chp_systems': None,
|
||||||
|
'distribution_system': None, 'no_system_present': None, 'assumed': False, "appliance": None},
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -1407,4 +1407,29 @@ mainheat_cases = [
|
||||||
'has_electricaire': False, 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False,
|
'has_electricaire': False, 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False,
|
||||||
"has_electric_heat_pumps": False,
|
"has_electric_heat_pumps": False,
|
||||||
"has_micro-cogeneration": False},
|
"has_micro-cogeneration": False},
|
||||||
|
{'original_description': ', underfloor, electric', 'has_radiators': False, 'has_fan_coil_units': False,
|
||||||
|
'has_pipes_in_screed_above_insulation': False, 'has_pipes_in_insulated_timber_floor': False,
|
||||||
|
'has_pipes_in_concrete_slab': False, 'has_boiler': False, 'has_air_source_heat_pump': False,
|
||||||
|
'has_room_heaters': False, 'has_electric_storage_heaters': False, 'has_warm_air': False,
|
||||||
|
'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False, 'has_community_scheme': False,
|
||||||
|
'has_ground_source_heat_pump': False, 'has_no_system_present': False, 'has_portable_electric_heaters': False,
|
||||||
|
'has_water_source_heat_pump': False, 'has_electric': True, 'has_mains_gas': False, 'has_wood_logs': False,
|
||||||
|
'has_LPG': False, 'has_coal': False, 'has_oil': False, 'has_wood_pellets': False, 'has_anthracite': False,
|
||||||
|
'has_dual_fuel_mineral_and_wood': False, 'has_smokeless_fuel': False, 'has_lpg': False, 'has_assumed': False,
|
||||||
|
'has_electricaire': False, 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': True,
|
||||||
|
"has_electric_heat_pumps": False,
|
||||||
|
"has_micro-cogeneration": False},
|
||||||
|
{'original_description': 'Gwresogyddion ystafell, nwy prif gyflenwad', 'has_radiators': False,
|
||||||
|
'has_fan_coil_units': False,
|
||||||
|
'has_pipes_in_screed_above_insulation': False, 'has_pipes_in_insulated_timber_floor': False,
|
||||||
|
'has_pipes_in_concrete_slab': False, 'has_boiler': False, 'has_air_source_heat_pump': False,
|
||||||
|
'has_room_heaters': True, 'has_electric_storage_heaters': False, 'has_warm_air': False,
|
||||||
|
'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False, 'has_community_scheme': False,
|
||||||
|
'has_ground_source_heat_pump': False, 'has_no_system_present': False, 'has_portable_electric_heaters': False,
|
||||||
|
'has_water_source_heat_pump': False, 'has_electric': False, 'has_mains_gas': True, 'has_wood_logs': False,
|
||||||
|
'has_LPG': False, 'has_coal': False, 'has_oil': False, 'has_wood_pellets': False, 'has_anthracite': False,
|
||||||
|
'has_dual_fuel_mineral_and_wood': False, 'has_smokeless_fuel': False, 'has_lpg': False, 'has_assumed': False,
|
||||||
|
'has_electricaire': False, 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False,
|
||||||
|
"has_electric_heat_pumps": False,
|
||||||
|
"has_micro-cogeneration": False},
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,8 @@ windows_cases = [
|
||||||
'glazing_type': 'secondary', 'no_data': False},
|
'glazing_type': 'secondary', 'no_data': False},
|
||||||
{'original_description': 'Mostly triple glazing', 'has_glazing': True, 'glazing_coverage': 'most',
|
{'original_description': 'Mostly triple glazing', 'has_glazing': True, 'glazing_coverage': 'most',
|
||||||
'glazing_type': 'triple', 'no_data': False},
|
'glazing_type': 'triple', 'no_data': False},
|
||||||
|
{'original_description': 'Gwydrau triphlyg mwyaf', 'has_glazing': True, 'glazing_coverage': 'most',
|
||||||
|
'glazing_type': 'triple', 'no_data': False},
|
||||||
{'original_description': 'Multiple glazing throughout', 'has_glazing': True, 'glazing_coverage': 'full',
|
{'original_description': 'Multiple glazing throughout', 'has_glazing': True, 'glazing_coverage': 'full',
|
||||||
'glazing_type': 'multiple', 'no_data': False},
|
'glazing_type': 'multiple', 'no_data': False},
|
||||||
{'original_description': 'Partial double glazing', 'has_glazing': True, 'glazing_coverage': 'partial',
|
{'original_description': 'Partial double glazing', 'has_glazing': True, 'glazing_coverage': 'partial',
|
||||||
|
|
@ -26,6 +28,8 @@ windows_cases = [
|
||||||
'glazing_type': 'secondary', 'no_data': False},
|
'glazing_type': 'secondary', 'no_data': False},
|
||||||
{'original_description': 'Partial triple glazing', 'has_glazing': True, 'glazing_coverage': 'partial',
|
{'original_description': 'Partial triple glazing', 'has_glazing': True, 'glazing_coverage': 'partial',
|
||||||
'glazing_type': 'triple', 'no_data': False},
|
'glazing_type': 'triple', 'no_data': False},
|
||||||
|
{'original_description': 'Gwydrau triphlyg rhannol', 'has_glazing': True, 'glazing_coverage': 'partial',
|
||||||
|
'glazing_type': 'triple', 'no_data': False},
|
||||||
{'original_description': 'Single glazed', 'has_glazing': True, 'glazing_coverage': 'full', 'glazing_type': 'single',
|
{'original_description': 'Single glazed', 'has_glazing': True, 'glazing_coverage': 'full', 'glazing_type': 'single',
|
||||||
'no_data': False},
|
'no_data': False},
|
||||||
{'original_description': 'Some double glazing', 'has_glazing': True, 'glazing_coverage': 'partial',
|
{'original_description': 'Some double glazing', 'has_glazing': True, 'glazing_coverage': 'partial',
|
||||||
|
|
@ -48,4 +52,6 @@ windows_cases = [
|
||||||
'glazing_type': 'high performance', 'no_data': False},
|
'glazing_type': 'high performance', 'no_data': False},
|
||||||
{'original_description': 'Rhai gwydrau dwbl', 'has_glazing': True, 'glazing_coverage': 'partial',
|
{'original_description': 'Rhai gwydrau dwbl', 'has_glazing': True, 'glazing_coverage': 'partial',
|
||||||
'glazing_type': 'double', 'no_data': False},
|
'glazing_type': 'double', 'no_data': False},
|
||||||
|
{'original_description': 'Gwydrau triphlyg llawn', 'has_glazing': True, 'glazing_coverage': 'full',
|
||||||
|
'glazing_type': 'triple', 'no_data': False},
|
||||||
]
|
]
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue