diff --git a/etl/jjc_invoice.py b/etl/jjc_invoice.py index fc9d481..ca1538b 100644 --- a/etl/jjc_invoice.py +++ b/etl/jjc_invoice.py @@ -4,7 +4,295 @@ from etl.pdfReader.pdfReaderToText import pdfReaderToText from etl.surveyedData.surveryedData import surveyedDataProcessor import pandas as pd +file_paths = [{'119 CUTNOOK LANE, M44 6LU': ['/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/119 CUTNOOK LANE, M44 6LU/CSR - 119 Cutnook.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/119 CUTNOOK LANE, M44 6LU/EPR.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/119 CUTNOOK LANE, M44 6LU/INPUTS.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/119 CUTNOOK LANE, M44 6LU/UB.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/119 CUTNOOK LANE, M44 6LU/XML.xml']}, + {'22 LINNET DRIVE, M44 6LW': ['/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/22 LINNET DRIVE, M44 6LW/CSR - 22 Linnet.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/22 LINNET DRIVE, M44 6LW/EPR.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/22 LINNET DRIVE, M44 6LW/INPUTS.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/22 LINNET DRIVE, M44 6LW/XML.xml']}, + {'50 PARKSTONE ROAD, M44 6LN': ['/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/50 PARKSTONE ROAD, M44 6LN/CSR - 50 Parkstone.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/50 PARKSTONE ROAD, M44 6LN/EPR.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/50 PARKSTONE ROAD, M44 6LN/INPUTS.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/50 PARKSTONE ROAD, M44 6LN/XML.xml']}, + {'89 PARKSTONE ROAD, M44 6LJ': ['/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/89 PARKSTONE ROAD, M44 6LJ/CSR - 89 Parkstone.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/89 PARKSTONE ROAD, M44 6LJ/EPR.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/89 PARKSTONE ROAD, M44 6LJ/INPUTS.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/FOR HOUSING/89 PARKSTONE ROAD, M44 6LJ/XML.xml']}, + {'153 HEATHCOTE STREET, ST3 1AD': ['/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/153 HEATHCOTE STREET, ST3 1AD/CSR - 153 Heathcote.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/153 HEATHCOTE STREET, ST3 1AD/EPR.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/153 HEATHCOTE STREET, ST3 1AD/INPUTS.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/153 HEATHCOTE STREET, ST3 1AD/XML.xml']}, + {'16 IONA PLACE, ST3 2DY': ['/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/16 IONA PLACE, ST3 2DY/EPR.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/16 IONA PLACE, ST3 2DY/INPUTS.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/16 IONA PLACE, ST3 2DY/XML.xml']}, + {'24 SEEDFIELDS ROAD, ST3 2BZ': ['/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/24 SEEDFIELDS ROAD, ST3 2BZ/CSR - 24 Seedfields.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/24 SEEDFIELDS ROAD, ST3 2BZ/EPR.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/24 SEEDFIELDS ROAD, ST3 2BZ/INPUTS.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/24 SEEDFIELDS ROAD, ST3 2BZ/XML.xml']}, + {'27 ROUNDWAY, ST3 2BH': ['/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/27 ROUNDWAY, ST3 2BH/CSR - 27 Roundway.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/27 ROUNDWAY, ST3 2BH/EPR.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/27 ROUNDWAY, ST3 2BH/INPUTS.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/27 ROUNDWAY, ST3 2BH/XML.xml']}, + {'30 ANGLESEY DRIVE, ST3 2SS': ['/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/30 ANGLESEY DRIVE, ST3 2SS/CSR - 30 Anglesey.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/30 ANGLESEY DRIVE, ST3 2SS/EPR.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/30 ANGLESEY DRIVE, ST3 2SS/INPUTS.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/30 ANGLESEY DRIVE, ST3 2SS/XML.xml']}, + {'37 MEAFORD DRIVE, ST3 2BY': ['/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/37 MEAFORD DRIVE, ST3 2BY/CSR - 37 Meaford.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/37 MEAFORD DRIVE, ST3 2BY/EPR.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/37 MEAFORD DRIVE, ST3 2BY/INPUTS.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/37 MEAFORD DRIVE, ST3 2BY/UB.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/37 MEAFORD DRIVE, ST3 2BY/XML.xml']}, + {'50 LOUISE DRIVE, ST3 2DT': ['/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/50 LOUISE DRIVE, ST3 2DT/CSR - 50 Louise.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/50 LOUISE DRIVE, ST3 2DT/EPR.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/50 LOUISE DRIVE, ST3 2DT/INPUTS.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/50 LOUISE DRIVE, ST3 2DT/UB.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/50 LOUISE DRIVE, ST3 2DT/XML.xml']}, + {'90 MEAFORD DRIVE, ST3 2BB': ['/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/90 MEAFORD DRIVE, ST3 2BB/CSR - 90 Meaford.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/90 MEAFORD DRIVE, ST3 2BB/EPR.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/90 MEAFORD DRIVE, ST3 2BB/INPUTS.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/90 MEAFORD DRIVE, ST3 2BB/XML.xml']}, + {'99 MEAFORD DRIVE, ST3 2BG': ['/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/99 MEAFORD DRIVE, ST3 2BG/CSR - 99 Meaford.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/99 MEAFORD DRIVE, ST3 2BG/EPR.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/99 MEAFORD DRIVE, ST3 2BG/INPUTS.pdf', + '/tmp/sharepoint/Andy Rumfitt/W.C. 10.03.2025/UNITAS/99 MEAFORD DRIVE, ST3 2BG/XML.xml']}, + {'1 Till Walk': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/1 Till Walk/1 Till Walk PRE EPR PDF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/1 Till Walk/1 Till Walk PRE EPR RDSAP XML.xml', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/1 Till Walk/1 Till Walk PRE EPR SITE NOTES.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/1 Till Walk/1 Till Walk ST3 5DF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/1 Till Walk/Dec - 1 Till Walk ST3 5DF.pdf']}, + {'10 Lomond walk': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/10 Lomond walk/10 Lomond Walk PRE EPR PDF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/10 Lomond walk/10 Lomond Walk PRE EPR RDSAP XML.xml', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/10 Lomond walk/10 Lomond Walk PRE EPR SITE NOTES.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/10 Lomond walk/10 Lomond Walk ST3 3HW.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/10 Lomond walk/Dec - 10 Lomond Walk ST3 3HW.pdf']}, + {'10 Pembridge road': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/10 Pembridge road/10 Pembridge Road PRE EPR PDF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/10 Pembridge road/10 Pembridge Road PRE EPR RDSAP XML.xml', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/10 Pembridge road/10 Pembridge Road PRE EPR SITE NOTES.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/10 Pembridge road/10 Pembridge Road ST3 3BX.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/10 Pembridge road/Dec - 10 Pembridge Road ST3 3BX.pdf']}, + {'10A Pembridge road': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/10A Pembridge road/10A Pembridge Road PRE EPR PDF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/10A Pembridge road/10A Pembridge Road PRE EPR RDSAP XML.xml', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/10A Pembridge road/10A Pembridge Road PRE EPR SITE NOTES.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/10A Pembridge road/10A Pembridge Road ST3 3BX.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/10A Pembridge road/Dec - 10A Pembridge Road ST3 3BX.pdf']}, + {'12 Lomond walk': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/12 Lomond walk/12 Lomond Walk PRE EPR RDSAP XML.xml', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/12 Lomond walk/12 Lomond Walk PRE ERP PDF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/12 Lomond walk/12 Lomond Walk PRE ERP SITE NOTES.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/12 Lomond walk/12 Lomond Walk ST3 3HW.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/12 Lomond walk/Dec - 12 Lomond Walk ST3 3HW.pdf']}, + {'12A Pembridge road': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/12A Pembridge road/12A Pembridge Road PRE EPR PDF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/12A Pembridge road/12A Pembridge Road PRE EPR RDSAP XML.xml', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/12A Pembridge road/12A Pembridge Road PRE EPR SITE NOTES.pdf']}, + {'15 Lomond walk': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/15 Lomond walk/15 Lomond Walk PRE EPR PDF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/15 Lomond walk/15 Lomond Walk PRE EPR RDSAP XML.xml', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/15 Lomond walk/15 Lomond Walk PRE EPR SITE NOTES.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/15 Lomond walk/15 Lomond Walk ST3 3HW.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/15 Lomond walk/Dec - 15 Lomond Walk ST3 3HW.pdf']}, + {'18 Bilston street': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/18 Bilston street/18 Bilston Street PRE EPR PDF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/18 Bilston street/18 Bilston Street PRE EPR RDSAP XML.xml', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/18 Bilston street/18 Bilston Street PRE EPR SITE NOTES.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/18 Bilston street/18 Bilston Street ST4 5DA.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/18 Bilston street/Dec - 18 Bilston Street ST4 5DA.pdf']}, + {'18 Lomond walk': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/18 Lomond walk/18 Lomond Walk PRE EPR PDF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/18 Lomond walk/18 Lomond Walk PRE EPR RDSAP XML.xml', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/18 Lomond walk/18 Lomond Walk PRE EPR SITE NOTES.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/18 Lomond walk/18 Lomond Walk ST3 3HW.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/18 Lomond walk/Dec - 18 Lomond Walk ST3 3HW.pdf']}, + {'19 sedgley walk': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/19 sedgley walk/19 Sedgley Walk PRE EPR PDF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/19 sedgley walk/19 Sedgley Walk PRE EPR RDSAP XML.xml', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/19 sedgley walk/19 Sedgley Walk PRE EPR SITE NOTES.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/19 sedgley walk/19 Sedgley Walk ST3 1RY.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/19 sedgley walk/Dec - 19 Sedgley Walk ST3 1RY.pdf']}, + {'2 Gregson close': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/2 Gregson close/2 Gregson close PRE EPR PDF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/2 Gregson close/2 Gregson close PRE EPR RDSAP XML.xml', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/2 Gregson close/2 Gregson close PRE EPR SITE NOTES.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/2 Gregson close/2 Gregson close ST3 2SJ.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/2 Gregson close/Dec - 2 Gregson close ST3 2SJ.pdf']}, + {'2 Lomond walk': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/2 Lomond walk/2 Lomond Walk PRE EPR PDF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/2 Lomond walk/2 Lomond Walk PRE EPR RDSAP XML.xml', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/2 Lomond walk/2 Lomond Walk PRE EPR SITE NOTES.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/2 Lomond walk/2 Lomond Walk ST3 3HW.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/2 Lomond walk/Dec - 2 Lomond Walk ST3 3HW.pdf']}, + {'20 Lomond walk': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/20 Lomond walk/20 Lomond Walk PRE ERR PDF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/20 Lomond walk/20 Lomond Walk PRE ERR RDSAP XML.xml', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/20 Lomond walk/20 Lomond Walk PRE ERR SITE NOTES.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/20 Lomond walk/20 Lomond Walk ST3 3HW.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/20 Lomond walk/Dec - 20 Lomond Walk ST3 3HW.pdf']}, + {'3 Lomond walk': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/3 Lomond walk/3 Lomond Walk PRE EPR PDF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/3 Lomond walk/3 Lomond Walk PRE EPR RDSAP XML.xml', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/3 Lomond walk/3 Lomond Walk PRE EPR SITE NOTES.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/3 Lomond walk/3 Lomond Walk ST3 3HW.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/3 Lomond walk/Dec - 3 Lomond Walk ST3 3HW.pdf']}, + {'32 Anglesey drive': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/32 Anglesey drive/32 Anglesey Drive PRER EPR PDF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/32 Anglesey drive/32 Anglesey Drive PRER EPR RDSAP XML.xml', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/32 Anglesey drive/32 Anglesey Drive PRER EPR SITE NOTES.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/32 Anglesey drive/32 Anglesey Drive ST3 2SS.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/32 Anglesey drive/Dec - 32 Anglesey Drive ST3 2SS.pdf']}, + {'36 Woodville road': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/36 Woodville road/36 Woodville Road PRE EPR PDF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/36 Woodville road/36 Woodville Road PRE EPR RDSAP XML.xml', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/36 Woodville road/36 Woodville Road PRE EPR SITE NOTES.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/36 Woodville road/36 Woodville Road ST3 6BX.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/36 Woodville road/Dec - 36 Woodville Road ST3 6BX.pdf']}, + {'379 lightwood road': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/379 lightwood road/379 Lightwood Road PRE EPR PDF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/379 lightwood road/379 Lightwood Road PRE EPR RDSAP XML.xml', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/379 lightwood road/379 Lightwood Road PRE EPR SITE NOTES.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/379 lightwood road/379 Lightwood Road ST3 4JT.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/379 lightwood road/Dec - 379 Lightwood Road ST3 4JT.pdf']}, + {'387 Lightwood road': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/387 Lightwood road/387 Lightwood Road PRE EPR PDF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/387 Lightwood road/387 Lightwood Road PRE EPR RDSAP XML.xml', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/387 Lightwood road/387 Lightwood Road PRE EPR SITE NOTES.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/387 Lightwood road/387 Lightwood Road ST3 4JT.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/387 Lightwood road/Dec - 387 Lightwood Road ST3 4JT.pdf']}, + {'4 Pembridge road': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/4 Pembridge road/4 Pembridge Road PRE EPR PDF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/4 Pembridge road/4 Pembridge Road PRE EPR RDSAP XML.xml', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/4 Pembridge road/4 Pembridge Road PRE EPR SITE NOTES.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/4 Pembridge road/4 Pembridge Road ST3 3BX.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/4 Pembridge road/Dec - 4 Pembridge Road ST3 3BX.pdf']}, + {'49 bridle path': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/49 bridle path/49 Bridle Path PRE EPR PDF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/49 bridle path/49 Bridle Path PRE EPR RDSAP XML.xml', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/49 bridle path/49 Bridle Path PRE EPR SITE NOTES.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/49 bridle path/49 Bridle Path ST3 4SB.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/49 bridle path/Dec - 49 Bridle Path ST3 4SB.pdf']}, + {'5 Lomond walk': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/5 Lomond walk/5 Lomond Walk PRE EPR PDF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/5 Lomond walk/5 Lomond Walk PRE EPR RDSASP XML.xml', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/5 Lomond walk/5 Lomond Walk PRE EPR SITE NOTES.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/5 Lomond walk/5 Lomond Walk ST3 3HW.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/5 Lomond walk/Dec - 5 Lomond Walk ST3 3HW.pdf']}, + {'5 Poolside': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/5 Poolside/5 Poolside PRE EPR PDF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/5 Poolside/5 Poolside PRE EPR RDSAP XML.xml', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/5 Poolside/5 Poolside PRE EPR SITE NOTES.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/5 Poolside/5 Poolside ST3 3NY.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/5 Poolside/Dec - 5 Poolside ST3 3NY.pdf']}, + {'52 Bridle path': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/52 Bridle path/52 Bridle Path PRE EPR PDF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/52 Bridle path/52 Bridle Path PRE EPR RDSAP XML.xml', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/52 Bridle path/52 Bridle Path PRE EPR SITE NOTES.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/52 Bridle path/52 Bridle Path ST3 4SB.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/52 Bridle path/Dec - 52 Bridle Path ST3 4SB.pdf']}, + {'58 Wise street': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/58 Wise street/58 Wise Street PRE EPR PDF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/58 Wise street/58 Wise Street PRE EPR RDSAP XML.xml', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/58 Wise street/58 Wise Street PRE EPR SITE NOTES.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/58 Wise street/58 Wise Street ST3 4PQ.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/58 Wise street/Dec - 58 Wise Street ST3 4PQ.pdf']}, + {'7 ingleby road': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/7 ingleby road/7 Ingleby Road PRE ERP PDF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/7 ingleby road/7 Ingleby Road PRE ERP RDSAP XML.xml', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/7 ingleby road/7 Ingleby Road PRE ERP SITE NOTES.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/7 ingleby road/7 Ingleby Road ST3 3JY.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/7 ingleby road/Dec - 7 Ingleby Road ST3 3JY.pdf']}, + {'7 Lomond walk': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/7 Lomond walk/7 Lomond Walk PRE EPR PDF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/7 Lomond walk/7 Lomond Walk PRE EPR RDSAP XML.xml', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/7 Lomond walk/7 Lomond Walk PRE EPR SITE NOTES.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/7 Lomond walk/7 Lomond Walk ST3 3HW.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/7 Lomond walk/Dec - 7 Lomond Walk ST3 3HW.pdf']}, + {'7 Peel Street': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/7 Peel Street/7 Peel Street PRE EPR PDF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/7 Peel Street/7 Peel Street PRE EPR RDSAP XML.xml', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/7 Peel Street/7 Peel Street PRE EPR SITE NOTES.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/7 Peel Street/7 Peel Street ST3 4PE.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/7 Peel Street/Dec - 7 Peel Street ST3 4PE.pdf']}, + {'83 Wimbourne avenue': []}, + {'85 Wimbourne avenue': []}, + {'87 Wimbourne avenue': []}, + {'9 Farrington close': []}, + {'9 Lomond walk': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/9 Lomond walk/9 Lomond Walk PRE EPR RDSAP XML.xml', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/9 Lomond walk/9 Lomond Walk PRE EPR SITE NOTES.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/9 Lomond walk/9 Lomond Walk ST3 3HW.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/9 Lomond walk/Dec - 9 Lomond Walk PRE EPR PDF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/9 Lomond walk/Dec - 9 Lomond Walk ST3 3HW.pdf']}, + {'9 Melbourne street': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/9 Melbourne street/9 Melbourne Street PRE EPR PDF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/9 Melbourne street/9 Melbourne Street PRE EPR RDSAP XML.xml', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/9 Melbourne street/9 Melbourne Street PRE EPR SITE NOTES.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/9 Melbourne street/9 Melbourne Street ST3 5AA.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/9 Melbourne street/Dec - 9 Melbourne Street ST3 5AA.pdf']}, + {'91 Wimbourne avenue': ['/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/91 Wimbourne avenue/91 Wimbourne Avenue PRE EPR PDF.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/91 Wimbourne avenue/91 Wimbourne Avenue PRE EPR RDSAP XML.xml', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/91 Wimbourne avenue/91 Wimbourne Avenue PRE EPR SITE NOTES.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/91 Wimbourne avenue/91 Wimbourne Avenue ST3 3LS.pdf', + '/tmp/sharepoint/Ben Langslow/W.C. 10.03.2025/UNITAS/91 Wimbourne avenue/Dec - 91 Wimbourne Avenue ST3 3LS.pdf']}, + {'101 Edgefield Road ST3 1AF': []}, + {'103 Edgefield Road ST3 1AF': []}, + {'105 Edgefield Road ST3 1AF': []}, + {'111 Edgefield Road ST3 1AF': []}, + {'113 Edgefield Road ST3 1AF': []}, + {'114 Foley Road ST3 2LH': ['/tmp/sharepoint/Mev Malik/W.C. 10.03.2025/UNITAS/114 Foley Road ST3 2LH/5209-8505-3432-0097-5106 epr.pdf', + '/tmp/sharepoint/Mev Malik/W.C. 10.03.2025/UNITAS/114 Foley Road ST3 2LH/5209-8505-3432-0097-5106.pdf', + '/tmp/sharepoint/Mev Malik/W.C. 10.03.2025/UNITAS/114 Foley Road ST3 2LH/5209-8505-3432-0097-5106.xml']}, + {'114 Longley Road ST3 1AN': []}, + {'15 Sedgley Walk ST3 1RY': ['/tmp/sharepoint/Mev Malik/W.C. 10.03.2025/UNITAS/15 Sedgley Walk ST3 1RY/0976-0245-5835-8209-5804 epr.pdf', + '/tmp/sharepoint/Mev Malik/W.C. 10.03.2025/UNITAS/15 Sedgley Walk ST3 1RY/0976-0245-5835-8209-5804.pdf', + '/tmp/sharepoint/Mev Malik/W.C. 10.03.2025/UNITAS/15 Sedgley Walk ST3 1RY/0976-0245-5835-8209-5804.xml']}, + {'151 Edgefield Road ST3 1AF': []}, + {'155 Edgefield Road ST3 1AF': []}, + {'17 Conewood Place ST3 2NW': []}, + {'17 Sedgley Walk ST3 1RY': ['/tmp/sharepoint/Mev Malik/W.C. 10.03.2025/UNITAS/17 Sedgley Walk ST3 1RY/2080-2607-5453-9500-2595 epr.pdf', + '/tmp/sharepoint/Mev Malik/W.C. 10.03.2025/UNITAS/17 Sedgley Walk ST3 1RY/2080-2607-5453-9500-2595.pdf', + '/tmp/sharepoint/Mev Malik/W.C. 10.03.2025/UNITAS/17 Sedgley Walk ST3 1RY/2080-2607-5453-9500-2595.xml']}, + {'21 Conewood Place ST3 2NW': []}, + {'27 Conewood Place ST3 2NW': []}, + {'30 Caroline Street ST3 1DE': []}, + {'32 Caroline Street ST3 1DE': []}, + {'34 Caroline Street ST3 1DE': []}, + {'35 Heathcote Street ST3 1AA': []}, + {'4 Milverton Place ST3 2SH': []}, + {'47 Hurst Road ST3 2LT': []}, + {'48 Lundy Road ST3 2DZ': []}, + {'5 Conewood Place ST3 2NW': []}, + {'54 Kirkup Walk ST3 2RR': []}, + {'56 Heathcote Road ST3 2LX': []}, + {'59 Caroline Street ST3 1DE': []}, + {'96 Edgefield Road ST3 1AF': []}, + {'96 Longley Road ST3 1AN': []}, + {'97 Edgefield Road ST3 1AF': ['/tmp/sharepoint/Mev Malik/W.C. 10.03.2025/UNITAS/97 Edgefield Road ST3 1AF/CS reports 16032025.pdf']}, + {'112 Chiltern Drive WD3 8JZ': []}, + {'13 Hubbards Road WD3 5JL': []}, + {'13 Orchard Way WD3 8HZ': []}, + {'14 Orchard Way WD3 8HZ': []}, + {'15 Hubbards Road WD3 5JL': []}, + {'19 Orchard Way WD3 8HZ': []}, + {'20 Edinburgh Avenue WD3 8LB': []}, + {'21 Clitheroe Gardens WD19 6RP': []}, + {'26 Clitheroe Gardens WD19 6RP': []}, + {'29 The Queens Drive WD3 8LN': []}, + {'31 The Queens Drive WD3 8LN': []}, + {'34 Hubbards Road WD3 5JJ': []}, + {'36 Orchard Way WD3 8HZ': []}, + {'42 The Queens Drive WD3 8LT': []}, + {'45 Orchard Way WD3 8HZ': []}, + {'5 Hubbards Road WD3 5JJ': []}, + {'56 The Queens Drive WD3 8LT': []}, + {'7 Hubbards Road WD3 5JL': []}, + {'8 The Greenway WD3 8HU': []}, + {'80 The Queens Drive WD3 8LX': []}, + {'82 The Queens Drive WD3 8LX': []}, + {'9 Orchard Way WD3 8HZ': []}, + {'90 Chiltern Drive WD3 8JZ': []}, + {'96 Chiltern Drive WD3 8JZ': []}] +def work_out_total_floor_area(pre_site_note): + + total = 0 + def add_all_floors(floor_list): + total = 0 + for floor in floor_list: + total += floor.floor_area_m2 + + return total + + main = True if pre_site_note.property_description.no_of_main_property > 0 else False + ext1 = True if pre_site_note.property_description.no_of_extension_1 > 0 else False + ext2 = True if pre_site_note.property_description.no_of_extension_2 > 0 else False + ext3 = True if pre_site_note.property_description.no_of_extension_3 > 0 else False + ext4 = True if pre_site_note.property_description.no_of_extension_4 > 0 else False + + total += add_all_floors(pre_site_note.property_description.main_property.dimensions) if main is True else 0 + total += add_all_floors(pre_site_note.property_description.ex1_property.dimensions) if ext1 is True else 0 + total += add_all_floors(pre_site_note.property_description.ex2_property.dimensions) if ext2 is True else 0 + total += add_all_floors(pre_site_note.property_description.ex3_property.dimensions) if ext3 is True else 0 + total += add_all_floors(pre_site_note.property_description.ex4_proprerty.dimensions) if ext4 is True else 0 + + + return total def main(): @@ -12,11 +300,12 @@ def main(): "Address": [], "Surveyor's Name": [], "Type of Work": [], - "Price": [] + "Price": [], + "Total Floor Area": [], } - south_coast_scraper = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION) - file_paths = south_coast_scraper.download_file_for_each_address() + jjc = SharePointScraper(SharePointInstaller.JJC) + # file_paths = jjc.download_file_for_each_address() list_of_surveys = [] for eachAddress in file_paths: for address, files in eachAddress.items(): @@ -25,26 +314,24 @@ def main(): for survey in list_of_surveys: if survey.pre_site_note: - if survey.pre_site_note.property_description.main_property.wall.insulation.lower() == 'as built' \ - and survey.pre_site_note.property_description.main_property.wall.construction.lower() == "cavity wall": - if survey.csr: - data["Price"].append(500) - data["Type of Work"].append("REMIDIAL CWI ONLY") - else: - data["Price"].append(1000) - data["Type of Work"].append("CAVITY ONLY") - else: - # Solar - data["Price"].append(1608) - data["Type of Work"].append("SOLAR") - + total_floor_area = work_out_total_floor_area(survey.pre_site_note) data["Address"].append(survey.address) data["Surveyor's Name"].append(survey.pre_site_note.assessor_information.name) + data["Type of Work"].append("Caluclating...") + data["Price"].append("Caluclating...") + data["Total Floor Area"].append(total_floor_area) + + if survey.csr: + if survey.csr.insulation_info: + print(survey.csr.insulation_info.type) + + df = pd.DataFrame(data) # Save to an Excel file df.to_excel("survey_data.xlsx", index=False) + df.to_csv("survery_data.csv", index=False) print(f"WEEK COMMENCING {WEEK_COMMENCING}") print("Excel file 'survey_data.xlsx' created successfully!") diff --git a/etl/pdfReader/pdfReaderToText.py b/etl/pdfReader/pdfReaderToText.py index 9632296..5ea7b42 100644 --- a/etl/pdfReader/pdfReaderToText.py +++ b/etl/pdfReader/pdfReaderToText.py @@ -50,5 +50,5 @@ class pdfReaderToText(): if self.type.name == ReportType.QUIDOS_PRESITE_NOTE.name: return QuidosSiteNotesExtractor(self.text_list) elif self.type == ReportType.CHARTED_SURVEYOR_REPORT: - return CSR(self.text_list) + return CSR(self.text_list) \ No newline at end of file diff --git a/etl/pdfReader/sitenotes.py b/etl/pdfReader/sitenotes.py index 4dbcdad..2c92273 100644 --- a/etl/pdfReader/sitenotes.py +++ b/etl/pdfReader/sitenotes.py @@ -6,20 +6,35 @@ from transform.types import ( Lighting, WaterHeating, HotWaterCylinder, SolarWaterHeating, ShowerAndBaths, FlueGasHeatRecoverySystem, PhotovoltaicPanel, WindTurbine, OtherDetails, Windows, Heating, HeatingSystemControls, - HeatingType + HeatingType, Insulation ) from datetime import datetime class SiteNotesExtractor(): def __init__(self, data_list): self.raw_data = data_list - def get_x_occurance(self, lst, value, x=1): try: return [i for i, v in enumerate(lst) if v == value][x] except IndexError: return None # Return None if the value does not occur twice - + + def two_columns_processor(self, data, sub_titles_to_gather, avoid, indexAdd = 1): + def get_value(key): + try: + index = data.index(key) + value = data[index + indexAdd] + return None if value in avoid else value + except (ValueError, IndexError): + return None + dict_ = {} + for items in data: + if items in avoid: + continue + elif items in sub_titles_to_gather: + dict_.update({f"{items.lower().replace('-', '_').replace(' ','_')}":get_value(items)}) + return dict_ + def get_data_between(self, a, b): return self.raw_data[self.raw_data.index(a):self.raw_data.index(b)] @@ -27,6 +42,22 @@ class CSR(SiteNotesExtractor): def __init__(self, data_list): super().__init__(data_list) self.type = ReportType.CHARTED_SURVEYOR_REPORT + self.insulation_info = None + self.setup() + + def setup(self): + self.get_materials() + + def get_materials(self): + lst = self.get_data_between("Detailed description of existing Cavity Wall Insulation ", "Detailed description of Defects in existing Cavity Wall Insulation") + dict_ = self.two_columns_processor(lst, ["Detailed description of existing Cavity Wall Insulation "], ["Detailed description of Defects in existing Cavity Wall Insulation"]) + + self.insulation_info = Insulation( + type=dict_.get('detailed_description_of_existing_cavity_wall_insulation_', "") + ) if dict_ is not None else None + + + @@ -563,21 +594,6 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): u_value_w_m2_k=dict_.get("u_value_(w/m²k)", "") if dict_.get("u_value_(w/m²k), '')") is not None else "", ) - def two_columns_processor(self, data, sub_titles_to_gather, avoid, indexAdd = 1): - def get_value(key): - try: - index = data.index(key) - value = data[index + indexAdd] - return None if value in avoid else value - except (ValueError, IndexError): - return None - dict_ = {} - for items in data: - if items in avoid: - continue - elif items in sub_titles_to_gather: - dict_.update({f"{items.lower().replace('-', '_').replace(' ','_')}":get_value(items)}) - return dict_ def get_windows(self): data = self.get_data_between("11.0 Windows", "12.0 Ventilation & Cooling") diff --git a/etl/transform/types.py b/etl/transform/types.py index de84fad..57dc14d 100644 --- a/etl/transform/types.py +++ b/etl/transform/types.py @@ -192,3 +192,6 @@ class PropertyDescription(BaseModel): mainHeating: Optional[Heating] mainHeating2: Optional[Heating] secondaryHeatingType: Optional[HeatingType] + +class Insulation(BaseModel): + type: str \ No newline at end of file