managed to get material

This commit is contained in:
Jun-te Kim 2025-03-21 12:27:15 +00:00
parent 3f22a4b855
commit 604c340f6c
4 changed files with 341 additions and 35 deletions

View file

@ -4,7 +4,295 @@ from etl.pdfReader.pdfReaderToText import pdfReaderToText
from etl.surveyedData.surveryedData import surveyedDataProcessor
import pandas as pd
file_paths = [{'119 CUTNOOK LANE, M44 6LU': ['/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/119 CUTNOOK LANE, M44 6LU/CSR - 119 Cutnook.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/119 CUTNOOK LANE, M44 6LU/EPR.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/119 CUTNOOK LANE, M44 6LU/INPUTS.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/119 CUTNOOK LANE, M44 6LU/UB.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/119 CUTNOOK LANE, M44 6LU/XML.xml']},
{'22 LINNET DRIVE, M44 6LW': ['/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/22 LINNET DRIVE, M44 6LW/CSR - 22 Linnet.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/22 LINNET DRIVE, M44 6LW/EPR.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/22 LINNET DRIVE, M44 6LW/INPUTS.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/22 LINNET DRIVE, M44 6LW/XML.xml']},
{'50 PARKSTONE ROAD, M44 6LN': ['/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/50 PARKSTONE ROAD, M44 6LN/CSR - 50 Parkstone.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/50 PARKSTONE ROAD, M44 6LN/EPR.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/50 PARKSTONE ROAD, M44 6LN/INPUTS.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/50 PARKSTONE ROAD, M44 6LN/XML.xml']},
{'89 PARKSTONE ROAD, M44 6LJ': ['/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/89 PARKSTONE ROAD, M44 6LJ/CSR - 89 Parkstone.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/89 PARKSTONE ROAD, M44 6LJ/EPR.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/89 PARKSTONE ROAD, M44 6LJ/INPUTS.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/89 PARKSTONE ROAD, M44 6LJ/XML.xml']},
{'153 HEATHCOTE STREET, ST3 1AD': ['/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/153 HEATHCOTE STREET, ST3 1AD/CSR - 153 Heathcote.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/153 HEATHCOTE STREET, ST3 1AD/EPR.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/153 HEATHCOTE STREET, ST3 1AD/INPUTS.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/153 HEATHCOTE STREET, ST3 1AD/XML.xml']},
{'16 IONA PLACE, ST3 2DY': ['/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/16 IONA PLACE, ST3 2DY/EPR.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/16 IONA PLACE, ST3 2DY/INPUTS.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/16 IONA PLACE, ST3 2DY/XML.xml']},
{'24 SEEDFIELDS ROAD, ST3 2BZ': ['/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/24 SEEDFIELDS ROAD, ST3 2BZ/CSR - 24 Seedfields.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/24 SEEDFIELDS ROAD, ST3 2BZ/EPR.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/24 SEEDFIELDS ROAD, ST3 2BZ/INPUTS.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/24 SEEDFIELDS ROAD, ST3 2BZ/XML.xml']},
{'27 ROUNDWAY, ST3 2BH': ['/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/27 ROUNDWAY, ST3 2BH/CSR - 27 Roundway.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/27 ROUNDWAY, ST3 2BH/EPR.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/27 ROUNDWAY, ST3 2BH/INPUTS.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/27 ROUNDWAY, ST3 2BH/XML.xml']},
{'30 ANGLESEY DRIVE, ST3 2SS': ['/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/30 ANGLESEY DRIVE, ST3 2SS/CSR - 30 Anglesey.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/30 ANGLESEY DRIVE, ST3 2SS/EPR.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/30 ANGLESEY DRIVE, ST3 2SS/INPUTS.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/30 ANGLESEY DRIVE, ST3 2SS/XML.xml']},
{'37 MEAFORD DRIVE, ST3 2BY': ['/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/37 MEAFORD DRIVE, ST3 2BY/CSR - 37 Meaford.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/37 MEAFORD DRIVE, ST3 2BY/EPR.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/37 MEAFORD DRIVE, ST3 2BY/INPUTS.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/37 MEAFORD DRIVE, ST3 2BY/UB.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/37 MEAFORD DRIVE, ST3 2BY/XML.xml']},
{'50 LOUISE DRIVE, ST3 2DT': ['/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/50 LOUISE DRIVE, ST3 2DT/CSR - 50 Louise.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/50 LOUISE DRIVE, ST3 2DT/EPR.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/50 LOUISE DRIVE, ST3 2DT/INPUTS.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/50 LOUISE DRIVE, ST3 2DT/UB.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/50 LOUISE DRIVE, ST3 2DT/XML.xml']},
{'90 MEAFORD DRIVE, ST3 2BB': ['/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/90 MEAFORD DRIVE, ST3 2BB/CSR - 90 Meaford.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/90 MEAFORD DRIVE, ST3 2BB/EPR.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/90 MEAFORD DRIVE, ST3 2BB/INPUTS.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/90 MEAFORD DRIVE, ST3 2BB/XML.xml']},
{'99 MEAFORD DRIVE, ST3 2BG': ['/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/99 MEAFORD DRIVE, ST3 2BG/CSR - 99 Meaford.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/99 MEAFORD DRIVE, ST3 2BG/EPR.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/99 MEAFORD DRIVE, ST3 2BG/INPUTS.pdf',
'/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/99 MEAFORD DRIVE, ST3 2BG/XML.xml']},
{'1 Till Walk': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/1 Till Walk/1 Till Walk PRE EPR PDF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/1 Till Walk/1 Till Walk PRE EPR RDSAP XML.xml',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/1 Till Walk/1 Till Walk PRE EPR SITE NOTES.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/1 Till Walk/1 Till Walk ST3 5DF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/1 Till Walk/Dec - 1 Till Walk ST3 5DF.pdf']},
{'10 Lomond walk': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/10 Lomond walk/10 Lomond Walk PRE EPR PDF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/10 Lomond walk/10 Lomond Walk PRE EPR RDSAP XML.xml',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/10 Lomond walk/10 Lomond Walk PRE EPR SITE NOTES.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/10 Lomond walk/10 Lomond Walk ST3 3HW.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/10 Lomond walk/Dec - 10 Lomond Walk ST3 3HW.pdf']},
{'10 Pembridge road': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/10 Pembridge road/10 Pembridge Road PRE EPR PDF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/10 Pembridge road/10 Pembridge Road PRE EPR RDSAP XML.xml',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/10 Pembridge road/10 Pembridge Road PRE EPR SITE NOTES.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/10 Pembridge road/10 Pembridge Road ST3 3BX.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/10 Pembridge road/Dec - 10 Pembridge Road ST3 3BX.pdf']},
{'10A Pembridge road': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/10A Pembridge road/10A Pembridge Road PRE EPR PDF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/10A Pembridge road/10A Pembridge Road PRE EPR RDSAP XML.xml',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/10A Pembridge road/10A Pembridge Road PRE EPR SITE NOTES.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/10A Pembridge road/10A Pembridge Road ST3 3BX.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/10A Pembridge road/Dec - 10A Pembridge Road ST3 3BX.pdf']},
{'12 Lomond walk': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/12 Lomond walk/12 Lomond Walk PRE EPR RDSAP XML.xml',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/12 Lomond walk/12 Lomond Walk PRE ERP PDF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/12 Lomond walk/12 Lomond Walk PRE ERP SITE NOTES.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/12 Lomond walk/12 Lomond Walk ST3 3HW.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/12 Lomond walk/Dec - 12 Lomond Walk ST3 3HW.pdf']},
{'12A Pembridge road': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/12A Pembridge road/12A Pembridge Road PRE EPR PDF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/12A Pembridge road/12A Pembridge Road PRE EPR RDSAP XML.xml',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/12A Pembridge road/12A Pembridge Road PRE EPR SITE NOTES.pdf']},
{'15 Lomond walk': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/15 Lomond walk/15 Lomond Walk PRE EPR PDF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/15 Lomond walk/15 Lomond Walk PRE EPR RDSAP XML.xml',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/15 Lomond walk/15 Lomond Walk PRE EPR SITE NOTES.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/15 Lomond walk/15 Lomond Walk ST3 3HW.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/15 Lomond walk/Dec - 15 Lomond Walk ST3 3HW.pdf']},
{'18 Bilston street': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/18 Bilston street/18 Bilston Street PRE EPR PDF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/18 Bilston street/18 Bilston Street PRE EPR RDSAP XML.xml',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/18 Bilston street/18 Bilston Street PRE EPR SITE NOTES.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/18 Bilston street/18 Bilston Street ST4 5DA.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/18 Bilston street/Dec - 18 Bilston Street ST4 5DA.pdf']},
{'18 Lomond walk': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/18 Lomond walk/18 Lomond Walk PRE EPR PDF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/18 Lomond walk/18 Lomond Walk PRE EPR RDSAP XML.xml',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/18 Lomond walk/18 Lomond Walk PRE EPR SITE NOTES.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/18 Lomond walk/18 Lomond Walk ST3 3HW.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/18 Lomond walk/Dec - 18 Lomond Walk ST3 3HW.pdf']},
{'19 sedgley walk': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/19 sedgley walk/19 Sedgley Walk PRE EPR PDF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/19 sedgley walk/19 Sedgley Walk PRE EPR RDSAP XML.xml',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/19 sedgley walk/19 Sedgley Walk PRE EPR SITE NOTES.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/19 sedgley walk/19 Sedgley Walk ST3 1RY.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/19 sedgley walk/Dec - 19 Sedgley Walk ST3 1RY.pdf']},
{'2 Gregson close': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/2 Gregson close/2 Gregson close PRE EPR PDF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/2 Gregson close/2 Gregson close PRE EPR RDSAP XML.xml',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/2 Gregson close/2 Gregson close PRE EPR SITE NOTES.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/2 Gregson close/2 Gregson close ST3 2SJ.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/2 Gregson close/Dec - 2 Gregson close ST3 2SJ.pdf']},
{'2 Lomond walk': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/2 Lomond walk/2 Lomond Walk PRE EPR PDF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/2 Lomond walk/2 Lomond Walk PRE EPR RDSAP XML.xml',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/2 Lomond walk/2 Lomond Walk PRE EPR SITE NOTES.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/2 Lomond walk/2 Lomond Walk ST3 3HW.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/2 Lomond walk/Dec - 2 Lomond Walk ST3 3HW.pdf']},
{'20 Lomond walk': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/20 Lomond walk/20 Lomond Walk PRE ERR PDF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/20 Lomond walk/20 Lomond Walk PRE ERR RDSAP XML.xml',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/20 Lomond walk/20 Lomond Walk PRE ERR SITE NOTES.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/20 Lomond walk/20 Lomond Walk ST3 3HW.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/20 Lomond walk/Dec - 20 Lomond Walk ST3 3HW.pdf']},
{'3 Lomond walk': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/3 Lomond walk/3 Lomond Walk PRE EPR PDF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/3 Lomond walk/3 Lomond Walk PRE EPR RDSAP XML.xml',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/3 Lomond walk/3 Lomond Walk PRE EPR SITE NOTES.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/3 Lomond walk/3 Lomond Walk ST3 3HW.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/3 Lomond walk/Dec - 3 Lomond Walk ST3 3HW.pdf']},
{'32 Anglesey drive': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/32 Anglesey drive/32 Anglesey Drive PRER EPR PDF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/32 Anglesey drive/32 Anglesey Drive PRER EPR RDSAP XML.xml',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/32 Anglesey drive/32 Anglesey Drive PRER EPR SITE NOTES.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/32 Anglesey drive/32 Anglesey Drive ST3 2SS.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/32 Anglesey drive/Dec - 32 Anglesey Drive ST3 2SS.pdf']},
{'36 Woodville road': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/36 Woodville road/36 Woodville Road PRE EPR PDF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/36 Woodville road/36 Woodville Road PRE EPR RDSAP XML.xml',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/36 Woodville road/36 Woodville Road PRE EPR SITE NOTES.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/36 Woodville road/36 Woodville Road ST3 6BX.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/36 Woodville road/Dec - 36 Woodville Road ST3 6BX.pdf']},
{'379 lightwood road': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/379 lightwood road/379 Lightwood Road PRE EPR PDF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/379 lightwood road/379 Lightwood Road PRE EPR RDSAP XML.xml',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/379 lightwood road/379 Lightwood Road PRE EPR SITE NOTES.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/379 lightwood road/379 Lightwood Road ST3 4JT.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/379 lightwood road/Dec - 379 Lightwood Road ST3 4JT.pdf']},
{'387 Lightwood road': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/387 Lightwood road/387 Lightwood Road PRE EPR PDF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/387 Lightwood road/387 Lightwood Road PRE EPR RDSAP XML.xml',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/387 Lightwood road/387 Lightwood Road PRE EPR SITE NOTES.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/387 Lightwood road/387 Lightwood Road ST3 4JT.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/387 Lightwood road/Dec - 387 Lightwood Road ST3 4JT.pdf']},
{'4 Pembridge road': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/4 Pembridge road/4 Pembridge Road PRE EPR PDF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/4 Pembridge road/4 Pembridge Road PRE EPR RDSAP XML.xml',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/4 Pembridge road/4 Pembridge Road PRE EPR SITE NOTES.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/4 Pembridge road/4 Pembridge Road ST3 3BX.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/4 Pembridge road/Dec - 4 Pembridge Road ST3 3BX.pdf']},
{'49 bridle path': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/49 bridle path/49 Bridle Path PRE EPR PDF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/49 bridle path/49 Bridle Path PRE EPR RDSAP XML.xml',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/49 bridle path/49 Bridle Path PRE EPR SITE NOTES.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/49 bridle path/49 Bridle Path ST3 4SB.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/49 bridle path/Dec - 49 Bridle Path ST3 4SB.pdf']},
{'5 Lomond walk': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/5 Lomond walk/5 Lomond Walk PRE EPR PDF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/5 Lomond walk/5 Lomond Walk PRE EPR RDSASP XML.xml',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/5 Lomond walk/5 Lomond Walk PRE EPR SITE NOTES.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/5 Lomond walk/5 Lomond Walk ST3 3HW.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/5 Lomond walk/Dec - 5 Lomond Walk ST3 3HW.pdf']},
{'5 Poolside': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/5 Poolside/5 Poolside PRE EPR PDF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/5 Poolside/5 Poolside PRE EPR RDSAP XML.xml',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/5 Poolside/5 Poolside PRE EPR SITE NOTES.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/5 Poolside/5 Poolside ST3 3NY.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/5 Poolside/Dec - 5 Poolside ST3 3NY.pdf']},
{'52 Bridle path': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/52 Bridle path/52 Bridle Path PRE EPR PDF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/52 Bridle path/52 Bridle Path PRE EPR RDSAP XML.xml',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/52 Bridle path/52 Bridle Path PRE EPR SITE NOTES.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/52 Bridle path/52 Bridle Path ST3 4SB.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/52 Bridle path/Dec - 52 Bridle Path ST3 4SB.pdf']},
{'58 Wise street': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/58 Wise street/58 Wise Street PRE EPR PDF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/58 Wise street/58 Wise Street PRE EPR RDSAP XML.xml',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/58 Wise street/58 Wise Street PRE EPR SITE NOTES.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/58 Wise street/58 Wise Street ST3 4PQ.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/58 Wise street/Dec - 58 Wise Street ST3 4PQ.pdf']},
{'7 ingleby road': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/7 ingleby road/7 Ingleby Road PRE ERP PDF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/7 ingleby road/7 Ingleby Road PRE ERP RDSAP XML.xml',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/7 ingleby road/7 Ingleby Road PRE ERP SITE NOTES.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/7 ingleby road/7 Ingleby Road ST3 3JY.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/7 ingleby road/Dec - 7 Ingleby Road ST3 3JY.pdf']},
{'7 Lomond walk': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/7 Lomond walk/7 Lomond Walk PRE EPR PDF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/7 Lomond walk/7 Lomond Walk PRE EPR RDSAP XML.xml',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/7 Lomond walk/7 Lomond Walk PRE EPR SITE NOTES.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/7 Lomond walk/7 Lomond Walk ST3 3HW.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/7 Lomond walk/Dec - 7 Lomond Walk ST3 3HW.pdf']},
{'7 Peel Street': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/7 Peel Street/7 Peel Street PRE EPR PDF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/7 Peel Street/7 Peel Street PRE EPR RDSAP XML.xml',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/7 Peel Street/7 Peel Street PRE EPR SITE NOTES.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/7 Peel Street/7 Peel Street ST3 4PE.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/7 Peel Street/Dec - 7 Peel Street ST3 4PE.pdf']},
{'83 Wimbourne avenue': []},
{'85 Wimbourne avenue': []},
{'87 Wimbourne avenue': []},
{'9 Farrington close': []},
{'9 Lomond walk': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/9 Lomond walk/9 Lomond Walk PRE EPR RDSAP XML.xml',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/9 Lomond walk/9 Lomond Walk PRE EPR SITE NOTES.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/9 Lomond walk/9 Lomond Walk ST3 3HW.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/9 Lomond walk/Dec - 9 Lomond Walk PRE EPR PDF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/9 Lomond walk/Dec - 9 Lomond Walk ST3 3HW.pdf']},
{'9 Melbourne street': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/9 Melbourne street/9 Melbourne Street PRE EPR PDF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/9 Melbourne street/9 Melbourne Street PRE EPR RDSAP XML.xml',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/9 Melbourne street/9 Melbourne Street PRE EPR SITE NOTES.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/9 Melbourne street/9 Melbourne Street ST3 5AA.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/9 Melbourne street/Dec - 9 Melbourne Street ST3 5AA.pdf']},
{'91 Wimbourne avenue': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/91 Wimbourne avenue/91 Wimbourne Avenue PRE EPR PDF.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/91 Wimbourne avenue/91 Wimbourne Avenue PRE EPR RDSAP XML.xml',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/91 Wimbourne avenue/91 Wimbourne Avenue PRE EPR SITE NOTES.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/91 Wimbourne avenue/91 Wimbourne Avenue ST3 3LS.pdf',
'/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/91 Wimbourne avenue/Dec - 91 Wimbourne Avenue ST3 3LS.pdf']},
{'101 Edgefield Road ST3 1AF': []},
{'103 Edgefield Road ST3 1AF': []},
{'105 Edgefield Road ST3 1AF': []},
{'111 Edgefield Road ST3 1AF': []},
{'113 Edgefield Road ST3 1AF': []},
{'114 Foley Road ST3 2LH': ['/tmp/sharepoint/Mev Malik/W.C. 10.03.2025/UNITAS/114 Foley Road ST3 2LH/5209-8505-3432-0097-5106 epr.pdf',
'/tmp/sharepoint/Mev Malik/W.C. 10.03.2025/UNITAS/114 Foley Road ST3 2LH/5209-8505-3432-0097-5106.pdf',
'/tmp/sharepoint/Mev Malik/W.C. 10.03.2025/UNITAS/114 Foley Road ST3 2LH/5209-8505-3432-0097-5106.xml']},
{'114 Longley Road ST3 1AN': []},
{'15 Sedgley Walk ST3 1RY': ['/tmp/sharepoint/Mev Malik/W.C. 10.03.2025/UNITAS/15 Sedgley Walk ST3 1RY/0976-0245-5835-8209-5804 epr.pdf',
'/tmp/sharepoint/Mev Malik/W.C. 10.03.2025/UNITAS/15 Sedgley Walk ST3 1RY/0976-0245-5835-8209-5804.pdf',
'/tmp/sharepoint/Mev Malik/W.C. 10.03.2025/UNITAS/15 Sedgley Walk ST3 1RY/0976-0245-5835-8209-5804.xml']},
{'151 Edgefield Road ST3 1AF': []},
{'155 Edgefield Road ST3 1AF': []},
{'17 Conewood Place ST3 2NW': []},
{'17 Sedgley Walk ST3 1RY': ['/tmp/sharepoint/Mev Malik/W.C. 10.03.2025/UNITAS/17 Sedgley Walk ST3 1RY/2080-2607-5453-9500-2595 epr.pdf',
'/tmp/sharepoint/Mev Malik/W.C. 10.03.2025/UNITAS/17 Sedgley Walk ST3 1RY/2080-2607-5453-9500-2595.pdf',
'/tmp/sharepoint/Mev Malik/W.C. 10.03.2025/UNITAS/17 Sedgley Walk ST3 1RY/2080-2607-5453-9500-2595.xml']},
{'21 Conewood Place ST3 2NW': []},
{'27 Conewood Place ST3 2NW': []},
{'30 Caroline Street ST3 1DE': []},
{'32 Caroline Street ST3 1DE': []},
{'34 Caroline Street ST3 1DE': []},
{'35 Heathcote Street ST3 1AA': []},
{'4 Milverton Place ST3 2SH': []},
{'47 Hurst Road ST3 2LT': []},
{'48 Lundy Road ST3 2DZ': []},
{'5 Conewood Place ST3 2NW': []},
{'54 Kirkup Walk ST3 2RR': []},
{'56 Heathcote Road ST3 2LX': []},
{'59 Caroline Street ST3 1DE': []},
{'96 Edgefield Road ST3 1AF': []},
{'96 Longley Road ST3 1AN': []},
{'97 Edgefield Road ST3 1AF': ['/tmp/sharepoint/Mev Malik/W.C. 10.03.2025/UNITAS/97 Edgefield Road ST3 1AF/CS reports 16032025.pdf']},
{'112 Chiltern Drive WD3 8JZ': []},
{'13 Hubbards Road WD3 5JL': []},
{'13 Orchard Way WD3 8HZ': []},
{'14 Orchard Way WD3 8HZ': []},
{'15 Hubbards Road WD3 5JL': []},
{'19 Orchard Way WD3 8HZ': []},
{'20 Edinburgh Avenue WD3 8LB': []},
{'21 Clitheroe Gardens WD19 6RP': []},
{'26 Clitheroe Gardens WD19 6RP': []},
{'29 The Queens Drive WD3 8LN': []},
{'31 The Queens Drive WD3 8LN': []},
{'34 Hubbards Road WD3 5JJ': []},
{'36 Orchard Way WD3 8HZ': []},
{'42 The Queens Drive WD3 8LT': []},
{'45 Orchard Way WD3 8HZ': []},
{'5 Hubbards Road WD3 5JJ': []},
{'56 The Queens Drive WD3 8LT': []},
{'7 Hubbards Road WD3 5JL': []},
{'8 The Greenway WD3 8HU': []},
{'80 The Queens Drive WD3 8LX': []},
{'82 The Queens Drive WD3 8LX': []},
{'9 Orchard Way WD3 8HZ': []},
{'90 Chiltern Drive WD3 8JZ': []},
{'96 Chiltern Drive WD3 8JZ': []}]
def work_out_total_floor_area(pre_site_note):
total = 0
def add_all_floors(floor_list):
total = 0
for floor in floor_list:
total += floor.floor_area_m2
return total
main = True if pre_site_note.property_description.no_of_main_property > 0 else False
ext1 = True if pre_site_note.property_description.no_of_extension_1 > 0 else False
ext2 = True if pre_site_note.property_description.no_of_extension_2 > 0 else False
ext3 = True if pre_site_note.property_description.no_of_extension_3 > 0 else False
ext4 = True if pre_site_note.property_description.no_of_extension_4 > 0 else False
total += add_all_floors(pre_site_note.property_description.main_property.dimensions) if main is True else 0
total += add_all_floors(pre_site_note.property_description.ex1_property.dimensions) if ext1 is True else 0
total += add_all_floors(pre_site_note.property_description.ex2_property.dimensions) if ext2 is True else 0
total += add_all_floors(pre_site_note.property_description.ex3_property.dimensions) if ext3 is True else 0
total += add_all_floors(pre_site_note.property_description.ex4_proprerty.dimensions) if ext4 is True else 0
return total
def main():
@ -12,11 +300,12 @@ def main():
"Address": [],
"Surveyor's Name": [],
"Type of Work": [],
"Price": []
"Price": [],
"Total Floor Area": [],
}
south_coast_scraper = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION)
file_paths = south_coast_scraper.download_file_for_each_address()
jjc = SharePointScraper(SharePointInstaller.JJC)
# file_paths = jjc.download_file_for_each_address()
list_of_surveys = []
for eachAddress in file_paths:
for address, files in eachAddress.items():
@ -25,26 +314,24 @@ def main():
for survey in list_of_surveys:
if survey.pre_site_note:
if survey.pre_site_note.property_description.main_property.wall.insulation.lower() == 'as built' \
and survey.pre_site_note.property_description.main_property.wall.construction.lower() == "cavity wall":
if survey.csr:
data["Price"].append(500)
data["Type of Work"].append("REMIDIAL CWI ONLY")
else:
data["Price"].append(1000)
data["Type of Work"].append("CAVITY ONLY")
else:
# Solar
data["Price"].append(1608)
data["Type of Work"].append("SOLAR")
total_floor_area = work_out_total_floor_area(survey.pre_site_note)
data["Address"].append(survey.address)
data["Surveyor's Name"].append(survey.pre_site_note.assessor_information.name)
data["Type of Work"].append("Caluclating...")
data["Price"].append("Caluclating...")
data["Total Floor Area"].append(total_floor_area)
if survey.csr:
if survey.csr.insulation_info:
print(survey.csr.insulation_info.type)
df = pd.DataFrame(data)
# Save to an Excel file
df.to_excel("survey_data.xlsx", index=False)
df.to_csv("survery_data.csv", index=False)
print(f"WEEK COMMENCING {WEEK_COMMENCING}")
print("Excel file 'survey_data.xlsx' created successfully!")

View file

@ -50,5 +50,5 @@ class pdfReaderToText():
if self.type.name == ReportType.QUIDOS_PRESITE_NOTE.name:
return QuidosSiteNotesExtractor(self.text_list)
elif self.type == ReportType.CHARTED_SURVEYOR_REPORT:
return CSR(self.text_list)
return CSR(self.text_list)

View file

@ -6,20 +6,35 @@ from transform.types import (
Lighting, WaterHeating, HotWaterCylinder, SolarWaterHeating,
ShowerAndBaths, FlueGasHeatRecoverySystem, PhotovoltaicPanel,
WindTurbine, OtherDetails, Windows, Heating, HeatingSystemControls,
HeatingType
HeatingType, Insulation
)
from datetime import datetime
class SiteNotesExtractor():
def __init__(self, data_list):
self.raw_data = data_list
def get_x_occurance(self, lst, value, x=1):
try:
return [i for i, v in enumerate(lst) if v == value][x]
except IndexError:
return None # Return None if the value does not occur twice
def two_columns_processor(self, data, sub_titles_to_gather, avoid, indexAdd = 1):
def get_value(key):
try:
index = data.index(key)
value = data[index + indexAdd]
return None if value in avoid else value
except (ValueError, IndexError):
return None
dict_ = {}
for items in data:
if items in avoid:
continue
elif items in sub_titles_to_gather:
dict_.update({f"{items.lower().replace('-', '_').replace(' ','_')}":get_value(items)})
return dict_
def get_data_between(self, a, b):
return self.raw_data[self.raw_data.index(a):self.raw_data.index(b)]
@ -27,6 +42,22 @@ class CSR(SiteNotesExtractor):
def __init__(self, data_list):
super().__init__(data_list)
self.type = ReportType.CHARTED_SURVEYOR_REPORT
self.insulation_info = None
self.setup()
def setup(self):
self.get_materials()
def get_materials(self):
lst = self.get_data_between("Detailed description of existing Cavity Wall Insulation ", "Detailed description of Defects in existing Cavity Wall Insulation")
dict_ = self.two_columns_processor(lst, ["Detailed description of existing Cavity Wall Insulation "], ["Detailed description of Defects in existing Cavity Wall Insulation"])
self.insulation_info = Insulation(
type=dict_.get('detailed_description_of_existing_cavity_wall_insulation_', "")
) if dict_ is not None else None
@ -563,21 +594,6 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor):
u_value_w_m2_k=dict_.get("u_value_(w/m²k)", "") if dict_.get("u_value_(w/m²k), '')") is not None else "",
)
def two_columns_processor(self, data, sub_titles_to_gather, avoid, indexAdd = 1):
def get_value(key):
try:
index = data.index(key)
value = data[index + indexAdd]
return None if value in avoid else value
except (ValueError, IndexError):
return None
dict_ = {}
for items in data:
if items in avoid:
continue
elif items in sub_titles_to_gather:
dict_.update({f"{items.lower().replace('-', '_').replace(' ','_')}":get_value(items)})
return dict_
def get_windows(self):
data = self.get_data_between("11.0 Windows", "12.0 Ventilation & Cooling")

View file

@ -192,3 +192,6 @@ class PropertyDescription(BaseModel):
mainHeating: Optional[Heating]
mainHeating2: Optional[Heating]
secondaryHeatingType: Optional[HeatingType]
class Insulation(BaseModel):
type: str