mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Merge pull request #1086 from Hestia-Homes/feature/pashub-additional-files
Fetch coordination and design documents from pashub
This commit is contained in:
commit
c98fc8452f
7 changed files with 381 additions and 16 deletions
|
|
@ -18,6 +18,9 @@ class FileTypeEnum(enum.Enum):
|
|||
ECMK_RD_SAP_SITE_NOTE = "ecmk_rd_sap_site_note"
|
||||
ECMK_SURVEY_XML = "ecmk_survey_xml"
|
||||
MAGIC_PLAN_JSON = "magic_plan_json"
|
||||
IMPROVEMENT_OPTION_EVALUATION = "improvement_option_evaluation"
|
||||
MEDIUM_TERM_IMPROVEMENT_PLAN = "medium_term_improvement_plan"
|
||||
RETROFIT_DESIGN_DOC = "retrofit_design_doc"
|
||||
|
||||
|
||||
class FileSourceEnum(enum.Enum):
|
||||
|
|
|
|||
|
|
@ -14,9 +14,12 @@ class CoreFiles(Enum):
|
|||
PAR_PHOTOPACK = "PAR Photo Pack"
|
||||
PAS2023_PROPERTY = "PAS 2023 Property Assessment Report"
|
||||
PAS2023_OCCUPANCY = "PAS 2023 Occupancy Assessment Report"
|
||||
IMPROVEMENT_OPTION_EVALUATION = "Improvement Option Evaluation"
|
||||
MEDIUM_TERM_IMPROVEMENT_PLAN = "Medium Term Improvement Plan"
|
||||
RETROFIT_DESIGN_DOC = "Retrofit Design Doc"
|
||||
|
||||
|
||||
CORE_TO_FILETYPE_MAP = {
|
||||
_CORE_FILE_TO_FILE_TYPE: dict[CoreFiles, str] = {
|
||||
CoreFiles.PHOTOPACK: FileTypeEnum.PHOTO_PACK.value,
|
||||
CoreFiles.SITENOTE: FileTypeEnum.SITE_NOTE.value,
|
||||
CoreFiles.RDSAP_SITENOTE: FileTypeEnum.RD_SAP_SITE_NOTE.value,
|
||||
|
|
@ -26,11 +29,49 @@ CORE_TO_FILETYPE_MAP = {
|
|||
CoreFiles.PAR_PHOTOPACK: FileTypeEnum.PAR_PHOTO_PACK.value,
|
||||
CoreFiles.PAS2023_PROPERTY: FileTypeEnum.PAS_2023_PROPERTY.value,
|
||||
CoreFiles.PAS2023_OCCUPANCY: FileTypeEnum.PAS_2023_OCCUPANCY.value,
|
||||
CoreFiles.IMPROVEMENT_OPTION_EVALUATION: FileTypeEnum.IMPROVEMENT_OPTION_EVALUATION.value,
|
||||
CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN: FileTypeEnum.MEDIUM_TERM_IMPROVEMENT_PLAN.value,
|
||||
CoreFiles.RETROFIT_DESIGN_DOC: FileTypeEnum.RETROFIT_DESIGN_DOC.value,
|
||||
}
|
||||
|
||||
|
||||
def infer_file_type(filename: str) -> Optional[str]:
|
||||
for core_file, file_type in CORE_TO_FILETYPE_MAP.items():
|
||||
def get_core_file_type(
|
||||
filename: str, evidence_category: Optional[str] = None
|
||||
) -> Optional[CoreFiles]:
|
||||
# Identify retrofit design doc using evidence category as the name is possibly unreliable.
|
||||
# We might change to always use evidence category, but needs more investigation
|
||||
if evidence_category is not None and evidence_category.lower() == "retrofit design":
|
||||
return CoreFiles.RETROFIT_DESIGN_DOC
|
||||
|
||||
if CoreFiles.IMPROVEMENT_OPTION_EVALUATION.value in filename:
|
||||
return CoreFiles.IMPROVEMENT_OPTION_EVALUATION
|
||||
|
||||
if CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN.value in filename:
|
||||
return CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN
|
||||
|
||||
if evidence_category is None and "-OSM-" in filename and "DR-N-A" in filename:
|
||||
return CoreFiles.RETROFIT_DESIGN_DOC
|
||||
|
||||
_prefix_skip = {
|
||||
CoreFiles.RETROFIT_DESIGN_DOC,
|
||||
CoreFiles.IMPROVEMENT_OPTION_EVALUATION,
|
||||
CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN,
|
||||
}
|
||||
|
||||
for core_file in CoreFiles:
|
||||
if core_file in _prefix_skip:
|
||||
continue
|
||||
|
||||
if filename.startswith(core_file.value):
|
||||
return file_type
|
||||
return core_file
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_file_type_string(filename: str) -> Optional[str]:
|
||||
core_file: Optional[CoreFiles] = get_core_file_type(filename)
|
||||
|
||||
if core_file is None:
|
||||
return None
|
||||
|
||||
return _CORE_FILE_TO_FILE_TYPE[core_file]
|
||||
|
|
|
|||
|
|
@ -5,12 +5,11 @@ from datetime import datetime
|
|||
|
||||
import requests
|
||||
|
||||
from backend.pashub_fetcher.core_files import CoreFiles
|
||||
from backend.pashub_fetcher.core_files import CoreFiles, get_core_file_type
|
||||
from backend.pashub_fetcher.evidence_file_data import EvidenceFileData
|
||||
from backend.pashub_fetcher.evidence_metadata import EvidenceMetadata
|
||||
from utils.logger import setup_logger
|
||||
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
|
|
@ -86,12 +85,6 @@ class PashubClient:
|
|||
except Exception:
|
||||
return None
|
||||
|
||||
def _get_core_file_type(self, file: EvidenceFileData) -> Optional[CoreFiles]:
|
||||
for core_file in CoreFiles:
|
||||
if file.file_name.startswith(core_file.value):
|
||||
return core_file
|
||||
return None
|
||||
|
||||
def _select_latest_core_files(
|
||||
self,
|
||||
files: List[EvidenceFileData],
|
||||
|
|
@ -99,7 +92,9 @@ class PashubClient:
|
|||
grouped: Dict[CoreFiles, List[EvidenceFileData]] = defaultdict(list)
|
||||
|
||||
for file in files:
|
||||
core_type = self._get_core_file_type(file)
|
||||
core_type: Optional[CoreFiles] = get_core_file_type(
|
||||
file.file_name, file.evidence_category
|
||||
)
|
||||
if not core_type:
|
||||
continue
|
||||
grouped[core_type].append(file)
|
||||
|
|
@ -107,6 +102,9 @@ class PashubClient:
|
|||
latest_files: Dict[CoreFiles, EvidenceFileData] = {}
|
||||
|
||||
for core_type, group in grouped.items():
|
||||
if core_type == CoreFiles.RETROFIT_DESIGN_DOC and len(group) > 1:
|
||||
osm_candidates = [f for f in group if "-OSM-" in f.file_name]
|
||||
group = osm_candidates if osm_candidates else group
|
||||
latest = max(group, key=lambda f: datetime.fromisoformat(f.created_utc))
|
||||
latest_files[core_type] = latest
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ from backend.app.db.models.uploaded_file import (
|
|||
)
|
||||
from backend.documents_parser.db_writer import save_epc_property_data
|
||||
from backend.documents_parser.parser import parse_site_notes_pdf
|
||||
from backend.pashub_fetcher.core_files import infer_file_type
|
||||
from backend.pashub_fetcher.core_files import get_file_type_string
|
||||
from backend.pashub_fetcher.pashub_client import PashubClient
|
||||
from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
|
||||
PashubToAraTriggerRequest,
|
||||
|
|
@ -109,7 +109,7 @@ class PashubService:
|
|||
uprn=int(uprn) if uprn else None,
|
||||
hubspot_deal_id=hubspot_deal_id,
|
||||
file_source=FileSourceEnum.PAS_HUB.value,
|
||||
file_type=infer_file_type(filename),
|
||||
file_type=get_file_type_string(filename),
|
||||
)
|
||||
file_paths.append(file_path)
|
||||
uploaded_files.append(uploaded_file)
|
||||
|
|
|
|||
185
backend/pashub_fetcher/tests/test_core_files.py
Normal file
185
backend/pashub_fetcher/tests/test_core_files.py
Normal file
|
|
@ -0,0 +1,185 @@
|
|||
from backend.pashub_fetcher.core_files import (
|
||||
CoreFiles,
|
||||
get_core_file_type,
|
||||
get_file_type_string,
|
||||
)
|
||||
|
||||
|
||||
def test_file_type_for_photopack():
|
||||
assert get_file_type_string("Photopack_123456_V1.pdf") == "photo_pack"
|
||||
|
||||
|
||||
def test_file_type_for_sitenote():
|
||||
assert get_file_type_string("SiteNote_123456_V1.pdf") == "site_note"
|
||||
|
||||
|
||||
def test_file_type_for_rdsap_sitenote():
|
||||
assert (
|
||||
get_file_type_string("RdSAP_SiteNote_9510890_V1_Assessmet.pdf")
|
||||
== "rd_sap_site_note"
|
||||
)
|
||||
|
||||
|
||||
def test_file_type_for_pas2023_ventilation():
|
||||
assert (
|
||||
get_file_type_string("PAS 2023 Ventilation Assessment Report_123456.pdf")
|
||||
== "pas_2023_ventilation"
|
||||
)
|
||||
|
||||
|
||||
def test_file_type_for_pas2023_condition():
|
||||
assert (
|
||||
get_file_type_string("PAS 2023 Condition Report_123456.pdf")
|
||||
== "pas_2023_condition"
|
||||
)
|
||||
|
||||
|
||||
def test_file_type_for_pas_significance():
|
||||
assert get_file_type_string("PAS Significance_123456.pdf") == "pas_significance"
|
||||
|
||||
|
||||
def test_file_type_for_par_photopack():
|
||||
assert (
|
||||
get_file_type_string("PAR Photo Pack_95101890_V2_Assessment.pdf")
|
||||
== "par_photo_pack"
|
||||
)
|
||||
|
||||
|
||||
def test_file_type_for_pas2023_property():
|
||||
assert (
|
||||
get_file_type_string("PAS 2023 Property Assessment Report_123456.pdf")
|
||||
== "pas_2023_property"
|
||||
)
|
||||
|
||||
|
||||
def test_file_type_for_pas2023_occupancy():
|
||||
assert (
|
||||
get_file_type_string("PAS 2023 Occupancy Assessment Report_123456.pdf")
|
||||
== "pas_2023_occupancy"
|
||||
)
|
||||
|
||||
|
||||
def test_file_type_for_improvement_option_evaluation():
|
||||
# filename: "{job_id} - {postcode} - Improvement Option Evaluation.pdf"
|
||||
assert (
|
||||
get_file_type_string("6000802 - NG4 4HD - Improvement Option Evaluation.pdf")
|
||||
== "improvement_option_evaluation"
|
||||
)
|
||||
|
||||
|
||||
def test_file_type_for_medium_term_improvement_plan():
|
||||
# filename: "{job_id} - {postcode} - Medium Term Improvement Plan IOE.pdf"
|
||||
assert (
|
||||
get_file_type_string(
|
||||
"60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf"
|
||||
)
|
||||
== "medium_term_improvement_plan"
|
||||
)
|
||||
|
||||
|
||||
def test_file_type_for_retrofit_design_doc():
|
||||
assert (
|
||||
get_file_type_string("2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf")
|
||||
== "retrofit_design_doc"
|
||||
)
|
||||
assert (
|
||||
get_file_type_string("2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf")
|
||||
== "retrofit_design_doc"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# core_file_for
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_core_file_for_evidence_category_match_is_case_insensitive() -> None:
|
||||
# Arrange
|
||||
filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
|
||||
|
||||
# Act
|
||||
result = get_core_file_type(filename, evidence_category="Retrofit Design")
|
||||
|
||||
# Assert
|
||||
assert result == CoreFiles.RETROFIT_DESIGN_DOC
|
||||
|
||||
|
||||
def test_core_file_for_evidence_category_returns_retrofit_design_doc() -> None:
|
||||
# Arrange
|
||||
filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
|
||||
|
||||
# Act
|
||||
result = get_core_file_type(filename, evidence_category="retrofit design")
|
||||
|
||||
# Assert
|
||||
assert result == CoreFiles.RETROFIT_DESIGN_DOC
|
||||
|
||||
|
||||
def test_core_file_for_ioe_substring_returns_improvement_option_evaluation() -> None:
|
||||
# Arrange
|
||||
filename = "6000802 - NG4 4HD - Improvement Option Evaluation.pdf"
|
||||
|
||||
# Act
|
||||
result = get_core_file_type(filename)
|
||||
|
||||
# Assert
|
||||
assert result == CoreFiles.IMPROVEMENT_OPTION_EVALUATION
|
||||
|
||||
|
||||
def test_core_file_for_mtip_substring_returns_medium_term_improvement_plan() -> None:
|
||||
# Arrange
|
||||
filename = "60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf"
|
||||
|
||||
# Act
|
||||
result = get_core_file_type(filename)
|
||||
|
||||
# Assert
|
||||
assert result == CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN
|
||||
|
||||
|
||||
def test_core_file_for_osm_pattern_returns_retrofit_design_doc_without_evidence_category() -> (
|
||||
None
|
||||
):
|
||||
# Arrange
|
||||
filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
|
||||
|
||||
# Act
|
||||
result = get_core_file_type(filename)
|
||||
|
||||
# Assert
|
||||
assert result == CoreFiles.RETROFIT_DESIGN_DOC
|
||||
|
||||
|
||||
def test_core_file_for_prefix_returns_photopack() -> None:
|
||||
# Arrange
|
||||
filename = "Photopack_123456_V1.pdf"
|
||||
|
||||
# Act
|
||||
result = get_core_file_type(filename)
|
||||
|
||||
# Assert
|
||||
assert result == CoreFiles.PHOTOPACK
|
||||
|
||||
|
||||
def test_core_file_for_unknown_filename_returns_none() -> None:
|
||||
# Arrange
|
||||
filename = "unknown_document_123.pdf"
|
||||
|
||||
# Act
|
||||
result = get_core_file_type(filename)
|
||||
|
||||
# Assert
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_core_file_for_osm_fallback_does_not_fire_when_evidence_category_present() -> (
|
||||
None
|
||||
):
|
||||
# Arrange — OSM+DR-N-A filename but evidence_category is something other than retrofit design
|
||||
filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
|
||||
|
||||
# Act
|
||||
result = get_core_file_type(filename, evidence_category="some other category")
|
||||
|
||||
# Assert
|
||||
assert result is None
|
||||
117
backend/pashub_fetcher/tests/test_pashub_client.py
Normal file
117
backend/pashub_fetcher/tests/test_pashub_client.py
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
# pyright: reportPrivateUsage=false
|
||||
from typing import Optional
|
||||
|
||||
from backend.pashub_fetcher.core_files import CoreFiles
|
||||
from backend.pashub_fetcher.evidence_file_data import EvidenceFileData
|
||||
from backend.pashub_fetcher.pashub_client import PashubClient
|
||||
|
||||
|
||||
def make_client() -> PashubClient:
|
||||
return PashubClient(token="test-token")
|
||||
|
||||
|
||||
def make_file(
|
||||
file_name: str = "unknown.pdf",
|
||||
evidence_category: Optional[str] = None,
|
||||
created_utc: str = "2024-01-01T00:00:00",
|
||||
) -> EvidenceFileData:
|
||||
return EvidenceFileData(
|
||||
file_id="id-1",
|
||||
file_name=file_name,
|
||||
created_utc=created_utc,
|
||||
file_size=1024,
|
||||
file_extension="pdf",
|
||||
evidence_category=evidence_category,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _select_latest_core_files
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_select_latest_core_files_returns_single_retrofit_design_doc() -> None:
|
||||
# Arrange
|
||||
client = make_client()
|
||||
files = [
|
||||
make_file(
|
||||
file_name="2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf",
|
||||
evidence_category="retrofit design",
|
||||
created_utc="2024-06-01T00:00:00",
|
||||
)
|
||||
]
|
||||
|
||||
# Act
|
||||
result = client._select_latest_core_files(files)
|
||||
|
||||
# Assert
|
||||
assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
|
||||
|
||||
|
||||
def test_select_latest_core_files_osm_candidate_wins_over_non_osm() -> None:
|
||||
# Arrange - the non-OSM file is newer but should lose to the OSM file
|
||||
client = make_client()
|
||||
files = [
|
||||
make_file(
|
||||
file_name="2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf",
|
||||
evidence_category="retrofit design",
|
||||
created_utc="2024-01-01T00:00:00",
|
||||
),
|
||||
make_file(
|
||||
file_name="Retrofit Design Doc non-osm variant.pdf",
|
||||
evidence_category="retrofit design",
|
||||
created_utc="2024-06-01T00:00:00",
|
||||
),
|
||||
]
|
||||
|
||||
# Act
|
||||
result = client._select_latest_core_files(files)
|
||||
|
||||
# Assert
|
||||
assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
|
||||
|
||||
|
||||
def test_select_latest_core_files_picks_latest_when_both_candidates_have_osm() -> None:
|
||||
# Arrange
|
||||
client = make_client()
|
||||
files = [
|
||||
make_file(
|
||||
file_name="2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf",
|
||||
evidence_category="retrofit design",
|
||||
created_utc="2024-01-01T00:00:00",
|
||||
),
|
||||
make_file(
|
||||
file_name="2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf",
|
||||
evidence_category="retrofit design",
|
||||
created_utc="2024-06-01T00:00:00",
|
||||
),
|
||||
]
|
||||
|
||||
# Act
|
||||
result = client._select_latest_core_files(files)
|
||||
|
||||
# Assert
|
||||
assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf"
|
||||
|
||||
|
||||
def test_select_latest_core_files_falls_back_to_latest_when_no_osm_candidates() -> None:
|
||||
# Arrange
|
||||
client = make_client()
|
||||
files = [
|
||||
make_file(
|
||||
file_name="retrofit_design_v1.pdf",
|
||||
evidence_category="retrofit design",
|
||||
created_utc="2024-01-01T00:00:00",
|
||||
),
|
||||
make_file(
|
||||
file_name="retrofit_design_v2.pdf",
|
||||
evidence_category="retrofit design",
|
||||
created_utc="2024-06-01T00:00:00",
|
||||
),
|
||||
]
|
||||
|
||||
# Act
|
||||
result = client._select_latest_core_files(files)
|
||||
|
||||
# Assert
|
||||
assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "retrofit_design_v2.pdf"
|
||||
23
pytest.ini
23
pytest.ini
|
|
@ -3,6 +3,27 @@ pythonpath = .
|
|||
log_cli = true
|
||||
log_cli_level = INFO
|
||||
addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial
|
||||
testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests backend/export/tests etl/hubspot/tests datatypes/epc/schema/tests datatypes/epc/surveys/tests datatypes/epc/domain/tests backend/ecmk_fetcher/tests/ backend/pashub_fetcher/tests backend/documents_parser/tests backend/magic_plan/tests datatypes/magicplan/api/tests datatypes/magicplan/domain/tests backend/app/db/functions/tests
|
||||
testpaths =
|
||||
recommendations/tests
|
||||
backend/tests
|
||||
backend/address2UPRN/tests
|
||||
backend/app/db/functions/tests
|
||||
backend/categorisation/tests
|
||||
backend/condition/tests
|
||||
backend/documents_parser/tests
|
||||
backend/ecmk_fetcher/tests
|
||||
backend/export/tests
|
||||
backend/magic_plan/tests
|
||||
backend/onboarders/tests
|
||||
backend/pashub_fetcher/tests
|
||||
datatypes/epc/domain/tests
|
||||
datatypes/epc/schema/tests
|
||||
datatypes/epc/surveys/tests
|
||||
datatypes/magicplan/api/tests
|
||||
datatypes/magicplan/domain/tests
|
||||
etl/epc/tests
|
||||
etl/epc_clean/tests
|
||||
etl/hubspot/tests
|
||||
etl/spatial/tests
|
||||
markers =
|
||||
integration: mark a test as an integration test
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue