mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
add more type hint
This commit is contained in:
parent
b72d5fbf42
commit
e06ead55d0
2 changed files with 150 additions and 64 deletions
|
|
@ -24,6 +24,7 @@ from datatypes.epc.domain.historic_epc_matching import (
|
||||||
match_addresses_for_postcode,
|
match_addresses_for_postcode,
|
||||||
)
|
)
|
||||||
from backend.epc_client.client import EpcClientService
|
from backend.epc_client.client import EpcClientService
|
||||||
|
from datatypes.epc.domain.historic_epc_matching import ScoredHistoricEpc
|
||||||
|
|
||||||
logger = setup_logger()
|
logger = setup_logger()
|
||||||
|
|
||||||
|
|
@ -64,7 +65,7 @@ def get_uprn_from_historic_epc(
|
||||||
if not uprn or uprn == "nan":
|
if not uprn or uprn == "nan":
|
||||||
return None
|
return None
|
||||||
|
|
||||||
top = result.top()
|
top: Optional[ScoredHistoricEpc] = result.top()
|
||||||
if top is None:
|
if top is None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
from typing import Optional
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
@ -13,40 +14,103 @@ from datatypes.epc.domain.historic_epc_matching import (
|
||||||
match_addresses_for_postcode,
|
match_addresses_for_postcode,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# Columns required by the HistoricEpc dataclass (lower-cased CSV columns).
|
# Columns required by the HistoricEpc dataclass (lower-cased CSV columns).
|
||||||
# The matcher only reads ADDRESS + UPRN to score; everything else is filled
|
# The matcher only reads ADDRESS + UPRN to score; everything else is filled
|
||||||
# with "" but must be present for HistoricEpc(**kwargs) to construct.
|
# with "" but must be present for HistoricEpc(**kwargs) to construct.
|
||||||
_FULL_COLUMN_FIELDS = [
|
_FULL_COLUMN_FIELDS = [
|
||||||
"LMK_KEY", "ADDRESS1", "ADDRESS2", "ADDRESS3", "POSTCODE",
|
"LMK_KEY",
|
||||||
"BUILDING_REFERENCE_NUMBER", "CURRENT_ENERGY_RATING", "POTENTIAL_ENERGY_RATING",
|
"ADDRESS1",
|
||||||
"CURRENT_ENERGY_EFFICIENCY", "POTENTIAL_ENERGY_EFFICIENCY", "PROPERTY_TYPE",
|
"ADDRESS2",
|
||||||
"BUILT_FORM", "INSPECTION_DATE", "LOCAL_AUTHORITY", "CONSTITUENCY", "COUNTY",
|
"ADDRESS3",
|
||||||
"LODGEMENT_DATE", "TRANSACTION_TYPE", "ENVIRONMENT_IMPACT_CURRENT",
|
"POSTCODE",
|
||||||
"ENVIRONMENT_IMPACT_POTENTIAL", "ENERGY_CONSUMPTION_CURRENT",
|
"BUILDING_REFERENCE_NUMBER",
|
||||||
"ENERGY_CONSUMPTION_POTENTIAL", "CO2_EMISSIONS_CURRENT",
|
"CURRENT_ENERGY_RATING",
|
||||||
"CO2_EMISS_CURR_PER_FLOOR_AREA", "CO2_EMISSIONS_POTENTIAL",
|
"POTENTIAL_ENERGY_RATING",
|
||||||
"LIGHTING_COST_CURRENT", "LIGHTING_COST_POTENTIAL", "HEATING_COST_CURRENT",
|
"CURRENT_ENERGY_EFFICIENCY",
|
||||||
"HEATING_COST_POTENTIAL", "HOT_WATER_COST_CURRENT", "HOT_WATER_COST_POTENTIAL",
|
"POTENTIAL_ENERGY_EFFICIENCY",
|
||||||
"TOTAL_FLOOR_AREA", "ENERGY_TARIFF", "MAINS_GAS_FLAG", "FLOOR_LEVEL",
|
"PROPERTY_TYPE",
|
||||||
"FLAT_TOP_STOREY", "FLAT_STOREY_COUNT", "MAIN_HEATING_CONTROLS",
|
"BUILT_FORM",
|
||||||
"MULTI_GLAZE_PROPORTION", "GLAZED_TYPE", "GLAZED_AREA", "EXTENSION_COUNT",
|
"INSPECTION_DATE",
|
||||||
"NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS", "LOW_ENERGY_LIGHTING",
|
"LOCAL_AUTHORITY",
|
||||||
"NUMBER_OPEN_FIREPLACES", "HOTWATER_DESCRIPTION", "HOT_WATER_ENERGY_EFF",
|
"CONSTITUENCY",
|
||||||
"HOT_WATER_ENV_EFF", "FLOOR_DESCRIPTION", "FLOOR_ENERGY_EFF", "FLOOR_ENV_EFF",
|
"COUNTY",
|
||||||
"WINDOWS_DESCRIPTION", "WINDOWS_ENERGY_EFF", "WINDOWS_ENV_EFF",
|
"LODGEMENT_DATE",
|
||||||
"WALLS_DESCRIPTION", "WALLS_ENERGY_EFF", "WALLS_ENV_EFF",
|
"TRANSACTION_TYPE",
|
||||||
"SECONDHEAT_DESCRIPTION", "SHEATING_ENERGY_EFF", "SHEATING_ENV_EFF",
|
"ENVIRONMENT_IMPACT_CURRENT",
|
||||||
"ROOF_DESCRIPTION", "ROOF_ENERGY_EFF", "ROOF_ENV_EFF", "MAINHEAT_DESCRIPTION",
|
"ENVIRONMENT_IMPACT_POTENTIAL",
|
||||||
"MAINHEAT_ENERGY_EFF", "MAINHEAT_ENV_EFF", "MAINHEATCONT_DESCRIPTION",
|
"ENERGY_CONSUMPTION_CURRENT",
|
||||||
"MAINHEATC_ENERGY_EFF", "MAINHEATC_ENV_EFF", "LIGHTING_DESCRIPTION",
|
"ENERGY_CONSUMPTION_POTENTIAL",
|
||||||
"LIGHTING_ENERGY_EFF", "LIGHTING_ENV_EFF", "MAIN_FUEL", "WIND_TURBINE_COUNT",
|
"CO2_EMISSIONS_CURRENT",
|
||||||
"HEAT_LOSS_CORRIDOR", "UNHEATED_CORRIDOR_LENGTH", "FLOOR_HEIGHT",
|
"CO2_EMISS_CURR_PER_FLOOR_AREA",
|
||||||
"PHOTO_SUPPLY", "SOLAR_WATER_HEATING_FLAG", "MECHANICAL_VENTILATION",
|
"CO2_EMISSIONS_POTENTIAL",
|
||||||
"ADDRESS", "LOCAL_AUTHORITY_LABEL", "CONSTITUENCY_LABEL", "POSTTOWN",
|
"LIGHTING_COST_CURRENT",
|
||||||
"CONSTRUCTION_AGE_BAND", "LODGEMENT_DATETIME", "TENURE",
|
"LIGHTING_COST_POTENTIAL",
|
||||||
"FIXED_LIGHTING_OUTLETS_COUNT", "LOW_ENERGY_FIXED_LIGHT_COUNT", "UPRN",
|
"HEATING_COST_CURRENT",
|
||||||
"UPRN_SOURCE", "REPORT_TYPE",
|
"HEATING_COST_POTENTIAL",
|
||||||
|
"HOT_WATER_COST_CURRENT",
|
||||||
|
"HOT_WATER_COST_POTENTIAL",
|
||||||
|
"TOTAL_FLOOR_AREA",
|
||||||
|
"ENERGY_TARIFF",
|
||||||
|
"MAINS_GAS_FLAG",
|
||||||
|
"FLOOR_LEVEL",
|
||||||
|
"FLAT_TOP_STOREY",
|
||||||
|
"FLAT_STOREY_COUNT",
|
||||||
|
"MAIN_HEATING_CONTROLS",
|
||||||
|
"MULTI_GLAZE_PROPORTION",
|
||||||
|
"GLAZED_TYPE",
|
||||||
|
"GLAZED_AREA",
|
||||||
|
"EXTENSION_COUNT",
|
||||||
|
"NUMBER_HABITABLE_ROOMS",
|
||||||
|
"NUMBER_HEATED_ROOMS",
|
||||||
|
"LOW_ENERGY_LIGHTING",
|
||||||
|
"NUMBER_OPEN_FIREPLACES",
|
||||||
|
"HOTWATER_DESCRIPTION",
|
||||||
|
"HOT_WATER_ENERGY_EFF",
|
||||||
|
"HOT_WATER_ENV_EFF",
|
||||||
|
"FLOOR_DESCRIPTION",
|
||||||
|
"FLOOR_ENERGY_EFF",
|
||||||
|
"FLOOR_ENV_EFF",
|
||||||
|
"WINDOWS_DESCRIPTION",
|
||||||
|
"WINDOWS_ENERGY_EFF",
|
||||||
|
"WINDOWS_ENV_EFF",
|
||||||
|
"WALLS_DESCRIPTION",
|
||||||
|
"WALLS_ENERGY_EFF",
|
||||||
|
"WALLS_ENV_EFF",
|
||||||
|
"SECONDHEAT_DESCRIPTION",
|
||||||
|
"SHEATING_ENERGY_EFF",
|
||||||
|
"SHEATING_ENV_EFF",
|
||||||
|
"ROOF_DESCRIPTION",
|
||||||
|
"ROOF_ENERGY_EFF",
|
||||||
|
"ROOF_ENV_EFF",
|
||||||
|
"MAINHEAT_DESCRIPTION",
|
||||||
|
"MAINHEAT_ENERGY_EFF",
|
||||||
|
"MAINHEAT_ENV_EFF",
|
||||||
|
"MAINHEATCONT_DESCRIPTION",
|
||||||
|
"MAINHEATC_ENERGY_EFF",
|
||||||
|
"MAINHEATC_ENV_EFF",
|
||||||
|
"LIGHTING_DESCRIPTION",
|
||||||
|
"LIGHTING_ENERGY_EFF",
|
||||||
|
"LIGHTING_ENV_EFF",
|
||||||
|
"MAIN_FUEL",
|
||||||
|
"WIND_TURBINE_COUNT",
|
||||||
|
"HEAT_LOSS_CORRIDOR",
|
||||||
|
"UNHEATED_CORRIDOR_LENGTH",
|
||||||
|
"FLOOR_HEIGHT",
|
||||||
|
"PHOTO_SUPPLY",
|
||||||
|
"SOLAR_WATER_HEATING_FLAG",
|
||||||
|
"MECHANICAL_VENTILATION",
|
||||||
|
"ADDRESS",
|
||||||
|
"LOCAL_AUTHORITY_LABEL",
|
||||||
|
"CONSTITUENCY_LABEL",
|
||||||
|
"POSTTOWN",
|
||||||
|
"CONSTRUCTION_AGE_BAND",
|
||||||
|
"LODGEMENT_DATETIME",
|
||||||
|
"TENURE",
|
||||||
|
"FIXED_LIGHTING_OUTLETS_COUNT",
|
||||||
|
"LOW_ENERGY_FIXED_LIGHT_COUNT",
|
||||||
|
"UPRN",
|
||||||
|
"UPRN_SOURCE",
|
||||||
|
"REPORT_TYPE",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -63,7 +127,9 @@ def _build_df(rows: list[dict]) -> pd.DataFrame:
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def patch_postcode_valid():
|
def patch_postcode_valid():
|
||||||
with patch.object(matcher_mod.AddressMatch, "is_valid_postcode", return_value=True) as m:
|
with patch.object(
|
||||||
|
matcher_mod.AddressMatch, "is_valid_postcode", return_value=True
|
||||||
|
) as m:
|
||||||
yield m
|
yield m
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -106,10 +172,12 @@ class TestMatchAddressesForPostcode:
|
||||||
self, patch_read, patch_postcode_valid
|
self, patch_read, patch_postcode_valid
|
||||||
):
|
):
|
||||||
# Disjoint number sets => hard zero. Still kept in matches.
|
# Disjoint number sets => hard zero. Still kept in matches.
|
||||||
patch_read.return_value = _build_df([
|
patch_read.return_value = _build_df(
|
||||||
_row("47 GORDON ROAD", "100"),
|
[
|
||||||
_row("999 SOMEWHERE ELSE", "200"),
|
_row("47 GORDON ROAD", "100"),
|
||||||
])
|
_row("999 SOMEWHERE ELSE", "200"),
|
||||||
|
]
|
||||||
|
)
|
||||||
result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
|
result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
|
||||||
assert isinstance(result, HistoricEpcMatches)
|
assert isinstance(result, HistoricEpcMatches)
|
||||||
assert len(result.matches) == 2
|
assert len(result.matches) == 2
|
||||||
|
|
@ -117,10 +185,12 @@ class TestMatchAddressesForPostcode:
|
||||||
def test_top_has_lexirank_one_and_lexiscore_monotone(
|
def test_top_has_lexirank_one_and_lexiscore_monotone(
|
||||||
self, patch_read, patch_postcode_valid
|
self, patch_read, patch_postcode_valid
|
||||||
):
|
):
|
||||||
patch_read.return_value = _build_df([
|
patch_read.return_value = _build_df(
|
||||||
_row("48 GORDON ROAD", "200"), # near miss
|
[
|
||||||
_row("47 GORDON ROAD", "100"), # exact (after normalisation)
|
_row("48 GORDON ROAD", "200"), # near miss
|
||||||
])
|
_row("47 GORDON ROAD", "100"), # exact (after normalisation)
|
||||||
|
]
|
||||||
|
)
|
||||||
result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
|
result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
|
||||||
assert result.top().lexirank == 1
|
assert result.top().lexirank == 1
|
||||||
scores = [m.lexiscore for m in result.matches]
|
scores = [m.lexiscore for m in result.matches]
|
||||||
|
|
@ -173,19 +243,23 @@ class TestMatchAddressesForPostcode:
|
||||||
class TestUnambiguousUprn:
|
class TestUnambiguousUprn:
|
||||||
|
|
||||||
def test_exact_match_returns_uprn(self, patch_read, patch_postcode_valid):
|
def test_exact_match_returns_uprn(self, patch_read, patch_postcode_valid):
|
||||||
patch_read.return_value = _build_df([
|
patch_read.return_value = _build_df(
|
||||||
_row("47 GORDON ROAD", "100"),
|
[
|
||||||
_row("48 GORDON ROAD", "200"),
|
_row("47 GORDON ROAD", "100"),
|
||||||
])
|
_row("48 GORDON ROAD", "200"),
|
||||||
|
]
|
||||||
|
)
|
||||||
result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
|
result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
|
||||||
assert result.unambiguous_uprn() == "100"
|
assert result.unambiguous_uprn() == "100"
|
||||||
|
|
||||||
def test_ambiguous_tie_returns_none(self, patch_read, patch_postcode_valid):
|
def test_ambiguous_tie_returns_none(self, patch_read, patch_postcode_valid):
|
||||||
# Two duplicate addresses with different UPRNs share rank-1.
|
# Two duplicate addresses with different UPRNs share rank-1.
|
||||||
patch_read.return_value = _build_df([
|
patch_read.return_value = _build_df(
|
||||||
_row("47 GORDON ROAD", "100"),
|
[
|
||||||
_row("47 GORDON ROAD", "200"),
|
_row("47 GORDON ROAD", "100"),
|
||||||
])
|
_row("47 GORDON ROAD", "200"),
|
||||||
|
]
|
||||||
|
)
|
||||||
result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
|
result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
|
||||||
assert result.unambiguous_uprn() is None
|
assert result.unambiguous_uprn() is None
|
||||||
|
|
||||||
|
|
@ -193,10 +267,12 @@ class TestUnambiguousUprn:
|
||||||
self, patch_read, patch_postcode_valid
|
self, patch_read, patch_postcode_valid
|
||||||
):
|
):
|
||||||
# User address has building number 47; no row has 47 -> all hard-zero.
|
# User address has building number 47; no row has 47 -> all hard-zero.
|
||||||
patch_read.return_value = _build_df([
|
patch_read.return_value = _build_df(
|
||||||
_row("999 ELSEWHERE", "100"),
|
[
|
||||||
_row("888 ELSEWHERE", "200"),
|
_row("999 ELSEWHERE", "100"),
|
||||||
])
|
_row("888 ELSEWHERE", "200"),
|
||||||
|
]
|
||||||
|
)
|
||||||
result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
|
result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
|
||||||
assert all(m.lexiscore == 0.0 for m in result.matches)
|
assert all(m.lexiscore == 0.0 for m in result.matches)
|
||||||
assert result.unambiguous_uprn() is None
|
assert result.unambiguous_uprn() is None
|
||||||
|
|
@ -205,15 +281,22 @@ class TestUnambiguousUprn:
|
||||||
self, patch_read, patch_postcode_valid
|
self, patch_read, patch_postcode_valid
|
||||||
):
|
):
|
||||||
# Use a real NaN in the UPRN cell.
|
# Use a real NaN in the UPRN cell.
|
||||||
patch_read.return_value = _build_df([
|
patch_read.return_value = _build_df(
|
||||||
_row("47 GORDON ROAD", np.nan),
|
[
|
||||||
_row("48 GORDON ROAD", "200"),
|
_row("47 GORDON ROAD", np.nan),
|
||||||
])
|
_row("48 GORDON ROAD", "200"),
|
||||||
result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
|
]
|
||||||
top = result.top()
|
)
|
||||||
|
result: HistoricEpcMatches = match_addresses_for_postcode(
|
||||||
|
"47 Gordon Road", "AB33 8AL"
|
||||||
|
)
|
||||||
|
top: Optional[ScoredHistoricEpc] = result.top()
|
||||||
# pandas_cell_to_str must turn NaN/"nan" into "" (not the literal string "nan"),
|
# pandas_cell_to_str must turn NaN/"nan" into "" (not the literal string "nan"),
|
||||||
# so unambiguous_uprn's truthiness check correctly drops the row.
|
# so unambiguous_uprn's truthiness check correctly drops the row.
|
||||||
assert top.record.uprn == ""
|
if top:
|
||||||
|
assert top.record.uprn == ""
|
||||||
|
else:
|
||||||
|
pytest.fail("should have an epc score, no results found :(")
|
||||||
|
|
||||||
|
|
||||||
# ---------- top / top_n ----------
|
# ---------- top / top_n ----------
|
||||||
|
|
@ -222,11 +305,13 @@ class TestUnambiguousUprn:
|
||||||
class TestTopHelpers:
|
class TestTopHelpers:
|
||||||
|
|
||||||
def test_top_n_returns_first_k(self, patch_read, patch_postcode_valid):
|
def test_top_n_returns_first_k(self, patch_read, patch_postcode_valid):
|
||||||
patch_read.return_value = _build_df([
|
patch_read.return_value = _build_df(
|
||||||
_row("47 GORDON ROAD", "100"),
|
[
|
||||||
_row("48 GORDON ROAD", "200"),
|
_row("47 GORDON ROAD", "100"),
|
||||||
_row("49 GORDON ROAD", "300"),
|
_row("48 GORDON ROAD", "200"),
|
||||||
])
|
_row("49 GORDON ROAD", "300"),
|
||||||
|
]
|
||||||
|
)
|
||||||
result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
|
result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
|
||||||
top2 = result.top_n(2)
|
top2 = result.top_n(2)
|
||||||
assert len(top2) == 2
|
assert len(top2) == 2
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue