diff --git a/backend/documents_parser/extractor.py b/backend/documents_parser/extractor.py index 822f7907..71555395 100644 --- a/backend/documents_parser/extractor.py +++ b/backend/documents_parser/extractor.py @@ -83,10 +83,17 @@ class PasHubRdSapSiteNotesExtractor: def extract_inspection_metadata(self) -> InspectionMetadata: try: addr_start = self.text_list.index("Property Address:") + 1 - addr_end = self.text_list.index("Property Photo", addr_start) - property_address = ", ".join( - t.rstrip(",") for t in self.text_list[addr_start:addr_end] - ) + try: + addr_end = self.text_list.index("Property Photo", addr_start) + address_tokens = self.text_list[addr_start:addr_end] + except ValueError: + addr_end = self.text_list.index("RdSAP Assessment", addr_start) + address_tokens = [] + for t in self.text_list[addr_start:addr_end]: + if not t or t.startswith("Page "): + break + address_tokens.append(t) + property_address = ", ".join(t.rstrip(",") for t in address_tokens) except ValueError: property_address = "" diff --git a/backend/documents_parser/tests/fixtures/PasHubSiteNotes_7.pdf b/backend/documents_parser/tests/fixtures/PasHubSiteNotes_7.pdf new file mode 100644 index 00000000..29d083c6 Binary files /dev/null and b/backend/documents_parser/tests/fixtures/PasHubSiteNotes_7.pdf differ diff --git a/backend/documents_parser/tests/fixtures/pashub_site_notes_7_text.json b/backend/documents_parser/tests/fixtures/pashub_site_notes_7_text.json new file mode 100644 index 00000000..933535cf --- /dev/null +++ b/backend/documents_parser/tests/fixtures/pashub_site_notes_7_text.json @@ -0,0 +1,670 @@ +[ + "SMART EPC: Record of", + "Inspection & Site Notes", + "Inspection Surveyor:", + "Dave Elliott", + "E-Mail Address:", + "davejohns36@icloud.com", + "Report Reference:", + "Not Applicable", + "Created On:", + "12 September 2025", + "Date of Inspection:", + "08 September 2025", + "Property Address:", + "Flat 3,", + "29 Watcombe Circus,", + "NOTTINGHAM,", + "NG5 2DU", + "Page 1", + "", + "Photo of electricity meter:", + "Single Smart Meter", + "RdSAP Assessment", + "General", + "Confirm you have checked for the existence of an", + "EPC before carrying out another energy assessment.", + "Yes", + "Does an EPC exist at the point of carrying out this", + "energy assessment?", + "No", + "Inspection Date:", + "08/09/2025", + "Transaction Type:", + "None of the Above", + "Tenure:", + "Rented Social", + "Type of Property:", + "Maisonette", + "Detachment Type:", + "Semi-Detached", + "Flat Type:", + "Mid-floor", + "Flat Location:", + "3", + "Corridor Type:", + "Unheated Corridor", + "Unheated corridor wall length:", + "6.59 m", + "Number of storeys:", + "2 Storeys", + "Terrain Type:", + "Suburban", + "Number of Extensions:", + "2 Extensions", + "Is an electricity smart meter present?", + "Yes", + "Electric meter type:", + "Single", + "Is the dwelling export-capable?", + "No", + "Is mains gas available?", + "Yes", + "Is there a gas smart meter?", + "No", + "Is the gas meter accessible?", + "Yes", + "Page 2", + "", + "Photo of Gas Meter:", + "Gas Meter", + "External indicators of Solid Brick construction:", + "Brick Pattern", + "Select Measurements Location:", + "Internal", + "Building Construction", + "Main Building", + "Age Range:", + "1900-1929", + "Record indicators of property age:", + "Property checker", + "Walls - Construction Type:", + "Solid brick", + "Record external indicators of Solid Brick", + "Construction:", + "consistent with build age", + "Walls - Insulation Type:", + "As built", + "Thermal conductivity of wall insulation:", + "Unknown", + "Wall U-Value known?", + "Not Known", + "Wall thickness:", + "280 mm", + "Page 3", + "", + "Photo wall thickness:", + "Wall Measurements", + "Wall Dry-Lined?", + "No", + "Party wall construction type:", + "Solid Masonry, Timber Frame, or System Built", + "Floor type:", + "Other dwelling below", + "Extension 1", + "Age Range:", + "1900-1929", + "Record indicators of property age:", + "Property checker", + "Walls - Construction Type:", + "Solid brick", + "Record external indicators of Solid Brick", + "Construction:", + "headers and stretchers in brick bond", + "Walls - Insulation Type:", + "As built", + "Thermal conductivity of wall insulation:", + "Unknown", + "Wall U-Value known?", + "Not Known", + "Wall thickness:", + "280 mm", + "Wall Dry-Lined?", + "Yes", + "Party wall construction type:", + "Solid Masonry, Timber Frame, or System Built", + "Floor type:", + "Other dwelling below", + "Extension 2", + "Age Range:", + "1900-1929", + "Record indicators of property age:", + "Property checker", + "Walls - Construction Type:", + "Solid brick", + "Record external indicators of Solid Brick", + "Construction:", + "headers and stretchers in brick bond", + "Walls - Insulation Type:", + "As built", + "Thermal conductivity of wall insulation:", + "Unknown", + "Page 4", + "", + "Loft insulation:", + "Loft", + "Loft insulation:", + "Loft", + "Wall U-Value known?", + "Not Known", + "Wall thickness:", + "280 mm", + "Wall Dry-Lined?", + "Yes", + "Party wall construction type:", + "Solid Masonry, Timber Frame, or System Built", + "Floor type:", + "Other dwelling below", + "Building Measurements", + "Area (m2)", + "Height (m)", + "Heat Loss Perimeter (m)", + "PWL (m)", + "Main Building", + "Floor 1", + "39.5", + "3.58", + "11.02", + "15.21", + "Floor 0", + "23.06", + "2.87", + "11.72", + "10.8", + "Extension 1", + "Floor 1", + "3.43", + "3.58", + "4.97", + "1", + "Floor 0", + "3.43", + "2.87", + "4.97", + "1", + "Extension 2", + "Floor 0", + "1.81", + "3.58", + "4.96", + "1", + "Roof Space", + "Main Building", + "Roofs - Construction Type:", + "Pitched roof (Slates or tiles), Access to loft", + "Roofs - Insulation At:", + "Joists", + "Roof U-Value:", + "Not Known", + "Roofs - Insulation Thickness:", + "225 mm", + "Page 5", + "", + "Loft insulation:", + "Loft", + "Loft insulation:", + "Loft", + "Loft insulation:", + "Loft", + "Loft insulation:", + "Loft", + "Loft insulation:", + "Loft", + "Loft insulation:", + "Loft", + "Loft insulation:", + "Loft", + "Loft insulation:", + "Loft", + "Page 6", + "", + "Loft insulation:", + "Loft", + "Indicators of Solid Brick Wall Construction in roof space:", + "solid wall construction visible to gables", + "Record indicators of Solid Brick Wall Construction in", + "roof space:", + "solid wall construction visible to gables", + "Extension 1", + "Roofs - Construction Type:", + "Flat", + "Roofs - Insulation At:", + "Unknown", + "Record indicators of Solid Brick Wall Construction in", + "roof space:", + "solid wall construction visible at eaves", + "Extension 2", + "Roofs - Construction Type:", + "Flat", + "Roofs - Insulation At:", + "Unknown", + "Record indicators of Solid Brick Wall Construction in", + "roof space:", + "Couldn\u2019t enter", + "Page 7", + "", + "Alternative Wall", + "Main Building", + "Alternative Wall 1", + "Construction type:", + "Solid brick", + "Record external indicators of Solid Brick", + "Construction:", + "consistent with building age, no visible cavity trays", + "Insulation Type:", + "As Built", + "Sheltered wall?", + "Yes", + "Thermal conductivity of wall insulation:", + "Unknown", + "Wall thickness:", + "280 mm", + "Wall Dry-Lined?", + "Yes", + "Windows", + "Window 1", + "Window location:", + "Main Building", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing, Unknown install date", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "What size is the glazing gap?", + "16 mm or more", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "1.2 m", + "Window width:", + "0.8 m", + "Orientation:", + "South West", + "Window 2", + "Window location:", + "Extension 1", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing, Unknown install date", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "What size is the glazing gap?", + "16 mm or more", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "1.65 m", + "Window width:", + "0.52 m", + "Orientation:", + "East", + "Page 8", + "", + "Window 3", + "Window location:", + "Extension 1", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing, Unknown install date", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "What size is the glazing gap?", + "16 mm or more", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "1.95 m", + "Window width:", + "0.86 m", + "Orientation:", + "East", + "Window 4", + "Window location:", + "Extension 1", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing, Unknown install date", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "What size is the glazing gap?", + "16 mm or more", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "1.76 m", + "Window width:", + "0.65 m", + "Orientation:", + "North", + "Window 5", + "Window location:", + "Extension 1", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing, Unknown install date", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "What size is the glazing gap?", + "16 mm or more", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "1.68 m", + "Window width:", + "0.68 m", + "Orientation:", + "East", + "Page 9", + "", + "Window 6", + "Window location:", + "Extension 1", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing, Unknown install date", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "What size is the glazing gap?", + "16 mm or more", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "1.84 m", + "Window width:", + "1.18 m", + "Orientation:", + "North East", + "Window 7", + "Window location:", + "Extension 1", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing, Unknown install date", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "What size is the glazing gap?", + "16 mm or more", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "1.76 m", + "Window width:", + "0.65 m", + "Orientation:", + "North", + "Window 8", + "Window location:", + "Extension 2", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing, Unknown install date", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "What size is the glazing gap?", + "16 mm or more", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "1.82 m", + "Window width:", + "0.84 m", + "Orientation:", + "South East", + "Page 10", + "", + "Window 9", + "Window location:", + "Extension 2", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing, Unknown install date", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "What size is the glazing gap?", + "16 mm or more", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "1.65 m", + "Window width:", + "0.5 m", + "Orientation:", + "South", + "Window 10", + "Window location:", + "Extension 2", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing, Unknown install date", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "What size is the glazing gap?", + "16 mm or more", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "1.71 m", + "Window width:", + "0.47 m", + "Orientation:", + "East", + "Window 11", + "Window location:", + "Extension 2", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing, Unknown install date", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "What size is the glazing gap?", + "16 mm or more", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "1.2 m", + "Window width:", + "0.8 m", + "Orientation:", + "South West", + "Page 11", + "", + "Heating & Hot Water", + "Main Heating Systems", + "Main Heating 1", + "How would you like to select the Heating System?", + "PCDF Search", + "System type:", + "Boiler with radiators or underfloor heating", + "Product Id", + "15030", + "Manufacturer", + "Baxi", + "Model", + "Duo-tec Combi", + "Orig Manuf", + "Baxi Heating", + "Fuel", + "Mains gas", + "S. Efficiency", + "0", + "Type", + "Combi", + "Condensing", + "Yes", + "Year", + "2006 - 2008", + "Mount", + "Wall", + "Open Flue", + "Room-sealed", + "Fan Assist", + "Yes", + "Status", + "Normal status for an actual product", + "Central heating pump age:", + "Unknown", + "Controls:", + "Programmer, room thermostat and TRVs", + "Does the boiler have a Flue Gas Heat Recover", + "System (FGHRS)?", + "No", + "Is there a weather compensator?", + "No", + "Emitter:", + "Radiators", + "Emitter Temperature:", + "Unknown", + "Secondary Heating System", + "Secondary Fuel", + "No Secondary Heating", + "Water Heating & Cylinder", + "Water Heating Type:", + "Regular", + "Water Heating System:", + "From main heating 1", + "Cylinder Size:", + "No Cylinder", + "Ventilation", + "Ventilation type:", + "Natural", + "Has fixed air conditioning?", + "No", + "Number of open flues:", + "0", + "Number of closed flues:", + "0", + "Number of boiler flues:", + "1", + "Page 12", + "", + "Number of other flues:", + "0", + "Number of extract fans:", + "2", + "Number of passive vents:", + "1", + "Number of flueless gas fires:", + "0", + "Pressure test:", + "No test", + "Is there a draught lobby?", + "Yes", + "Conservatories", + "Is there conservatory?", + "No conservatory", + "Renewables", + "Wind Turbines", + "Has wind turbines?", + "No", + "Solar hot water", + "Has solar hot water?", + "No", + "Photovoltaics", + "Has photovoltaic array?", + "No", + "Number of PV batteries:", + "None", + "Hydro", + "Is the dwelling connected to Hydro?", + "No", + "Room Count Elements", + "Number of habitable rooms?", + "3", + "Are any of these rooms unheated?", + "No", + "Number of external doors?", + "1", + "Number of insulated external doors?", + "0", + "Number of draughtproofed external doors?", + "1", + "Number of open chimneys?", + "0", + "Number of blocked chimneys?", + "0", + "Number of fixed incandescent bulbs:", + "7", + "Is the exact number of LED and CFL bulbs known?", + "Yes", + "Number of fixed LED bulbs:", + "7", + "Number of fixed CFL bulbs:", + "0", + "Are there any waste water heat recovery systems?", + "None", + "Number of baths:", + "1", + "How many special features are there at the", + "property?", + "0", + "Customer Response", + "Customer present?", + "Yes", + "Page 13", + "", + "Customer willing to answer satisfaction survey?", + "No", + "Addendum + Related Party Disclosure", + "Addendum", + "None", + "Related party disclosure", + "No related party", + "Photographs Required", + "Page 14", + "" +] \ No newline at end of file diff --git a/backend/documents_parser/tests/test_extractor.py b/backend/documents_parser/tests/test_extractor.py index 65ec8c23..9e7eaffd 100644 --- a/backend/documents_parser/tests/test_extractor.py +++ b/backend/documents_parser/tests/test_extractor.py @@ -66,6 +66,11 @@ def load_text_fixture_6() -> list[str]: return json.load(f) +def load_text_fixture_7() -> list[str]: + with open(os.path.join(FIXTURES, "pashub_site_notes_7_text.json")) as f: + return json.load(f) + + class TestInspectionMetadata: def test_full_inspection_metadata(self) -> None: result = PasHubRdSapSiteNotesExtractor(load_text_fixture()).extract_inspection_metadata() @@ -785,6 +790,18 @@ class TestElectricShowerExtraction: assert wu.showers[0].outlet_type == "Electric Shower" +# --- fixture 7: maisonette, 2 extensions, no property photo --- + + +class TestExtractNoPropertyPhoto: + def test_address_extracted_when_no_property_photo(self) -> None: + result = PasHubRdSapSiteNotesExtractor(load_text_fixture_7()).extract() + assert result.inspection_metadata.property_address == "Flat 3, 29 Watcombe Circus, NOTTINGHAM, NG5 2DU" + assert result.inspection_metadata.property_photo is False + assert result.general.property_type == "Maisonette" + assert result.general.number_of_extensions == 2 + + class TestSolidMasonryPartyWall: @pytest.fixture def bc(self) -> BuildingConstruction: