diff --git a/etl/development.py b/etl/development.py index d6a1b02..58bfd6b 100644 --- a/etl/development.py +++ b/etl/development.py @@ -35,5 +35,8 @@ if __name__ == "__main__": # - [x] Finish off scraping for section that I need to finish # - [in progress] Pydantic format for deemed report # - [] Generate deemed report -> scopre of automations thats possible +# - [] Ensure lewis has sent me this week deemed score (invoice price for each job) +# - [] Ask lewis to put his rate card in sharepoint ( ask nick where the most logical place this will be) +# [] Ask kieran how some information is recieved from survery to submissions # - [] Docker compose to an sql database in docker compose (2 hours, then work on sql) # - [] Deploy via terraform to aws (1 day) \ No newline at end of file diff --git a/etl/pdfReader/sitenotes.py b/etl/pdfReader/sitenotes.py index 78ec346..a4d315f 100644 --- a/etl/pdfReader/sitenotes.py +++ b/etl/pdfReader/sitenotes.py @@ -4,7 +4,8 @@ from transform.types import ( PropertyDescription, PropertyDetail, Dimension, Walls, Roofs, Floors, Door, VentilationAndCooling, Lighting, WaterHeating, HotWaterCylinder, SolarWaterHeating, - ShowerAndBaths, FlueGasHeatRecoverySystem, + ShowerAndBaths, FlueGasHeatRecoverySystem, PhotovoltaicPanel, + WindTurbine, OtherDetails ) from datetime import datetime @@ -49,7 +50,6 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): # These one are quick fixes can be done on train or one at a time - # self.get_section_19() # self.get_section_20() # self.get_section_21() # self.get_section_22() @@ -231,6 +231,15 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): # Section 19.0 fghrs = self.get_fghrs() + # Section 20.0 + photovoltaicPanel = self.get_photovoltaic_panel() + + # Section 21.0 + windTurbine = self.get_wind_turbine() + + # Section 22.0 + otherDetails = self.get_other_details() + self.property_description = PropertyDescription( built_form = get_value("Built Form"), detachment_or_position = get_value("Detachment/Position"), @@ -282,6 +291,9 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): solarWaterHeating=solarWaterHeating, showerAndBaths=showerAndBaths, flueGasHeatRecoverySystem=fghrs, + photovoltaicPanel=photovoltaicPanel, + windTurbine=windTurbine, + otherDetails=otherDetails, ) @@ -813,7 +825,7 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): ) - def get_section_20(self): + def get_photovoltaic_panel(self): data = self.get_data_between("20.0 Photovoltaic Panel","21.0 Wind Turbine") sub_titles = [ "Percentage of External Roof Area with PVs" @@ -822,14 +834,18 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): "20.0 Photovoltaic Panel", "21.0 Wind Turbine", ] - self.two_columns_processor(data, sub_titles, avoid, 20) + dict_1 = self.two_columns_processor(data, sub_titles, avoid) sub_titles = [ "PVs are connected to dwelling electricity" # PVs are connected to dwelling electricity meter ] - self.two_columns_processor(data, sub_titles, avoid, 20, 2) + dict_2 = self.two_columns_processor(data, sub_titles, avoid, 2) + return PhotovoltaicPanel( + pvs_are_connected_to_dwelling_electricity_meter=True if dict_2.get("pvs_are_connected_to_dwelling_electricity", "NO").upper() == "YES" else False, + percentage_of_external_roof_area_with_pvs=dict_1.get("percentage_of_external_roof_area_with_pvs") + ) - def get_section_21(self): + def get_wind_turbine(self): data = self.get_data_between("21.0 Wind Turbine","22.0 Other Details") sub_titles = [ "Wind Turbine", @@ -838,9 +854,12 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): "21.0 Wind Turbine", "22.0 Other Details", ] - self.two_columns_processor(data, sub_titles, avoid, 21) + dict_ = self.two_columns_processor(data, sub_titles, avoid) + return WindTurbine( + wind_turbine=True if dict_.get("wind_turbine", "NO").upper()=="YES" else False + ) - def get_section_22(self): + def get_other_details(self): data = self.get_data_between("22.0 Other Details","Recommendations (Carbon Saving Figures Are For Guidance Only)") sub_titles = [ "Electricity Meter Type", @@ -851,10 +870,11 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): "Recommendations (Carbon Saving Figures Are For Guidance Only)", ] - self.two_columns_processor(data, sub_titles, avoid, 22) + dict_ = self.two_columns_processor(data, sub_titles, avoid) + return OtherDetails( + electricity_meter_type=dict_.get("electricity_meter_type", ""), + main_gas_avalible=True if dict_.get("main_gas_available", "NO").upper() == "YES" else False, + ) -# Section and 11 -# Extract -# Transform ( wiht validation pydantnic) -# Load \ No newline at end of file + \ No newline at end of file diff --git a/etl/transform/types.py b/etl/transform/types.py index c48db71..e6d2a91 100644 --- a/etl/transform/types.py +++ b/etl/transform/types.py @@ -131,6 +131,16 @@ class ShowerAndBaths(BaseModel): class FlueGasHeatRecoverySystem(BaseModel): fghrs_present: bool +class PhotovoltaicPanel(BaseModel): + pvs_are_connected_to_dwelling_electricity_meter: bool + percentage_of_external_roof_area_with_pvs: str + +class WindTurbine(BaseModel): + wind_turbine: bool + +class OtherDetails(BaseModel): + electricity_meter_type: str + main_gas_avalible: bool class PropertyDetail(BaseModel): age_band: str @@ -167,3 +177,6 @@ class PropertyDescription(BaseModel): solarWaterHeating: Optional[SolarWaterHeating] showerAndBaths: Optional[ShowerAndBaths] flueGasHeatRecoverySystem: Optional[FlueGasHeatRecoverySystem] + photovoltaicPanel: Optional[PhotovoltaicPanel] + windTurbine: Optional[WindTurbine] + otherDetails: Optional[OtherDetails]