From 13c1e501269c565201ac68e086aa27c329d7016a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 6 Oct 2023 10:23:19 +0100 Subject: [PATCH] stripping out assumed tag from cleaned descriptions in EPC clean --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- backend/Property.py | 2 +- backend/app/plan/router.py | 3 ++- etl/epc_clean/EpcClean.py | 2 +- etl/epc_clean/app.py | 3 +-- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index 4413bb06..b0f9c00d 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 3b05c6ac..ca0e1cd9 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,6 +1,6 @@ - + diff --git a/backend/Property.py b/backend/Property.py index 57e653a7..036ab87e 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -301,7 +301,7 @@ class Property(Definitions): self.is_listed = spatial["is_listed_building"].values[0] self.is_heritage = spatial["is_heritage_building"].values[0] - if self.in_conservation_area | self.is_listed | self.is_heritage: + if self.in_conservation_area is True | self.is_listed is True | self.is_heritage is True: self.restricted_measures = True def set_year_built(self): diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 24dfebda..0749803b 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -59,7 +59,7 @@ async def trigger_plan(body: PlanTriggerRequest): epc_client = EpcClient(auth_token=get_settings().EPC_AUTH_TOKEN) plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path) uprn_filenames = read_dataframe_from_s3_parquet( - bucket_name=get_settings().PLAN_TRIGGER_BUCKET, file_key="spatial/filename_meta.parquet" + bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet" ) input_properties = [] @@ -151,6 +151,7 @@ async def trigger_plan(body: PlanTriggerRequest): if not property_recommendations: continue + fewf recommendations[p.id] = property_recommendations diff --git a/etl/epc_clean/EpcClean.py b/etl/epc_clean/EpcClean.py index 90de132c..10b5095d 100644 --- a/etl/epc_clean/EpcClean.py +++ b/etl/epc_clean/EpcClean.py @@ -130,7 +130,7 @@ class EpcClean: self.cleaned[field].append( { "original_description": description, - "clean_description": cln.description.capitalize(), + "clean_description": cln.description.replace("(assumed)", "").rstrip().capitalize(), **cln.process() } ) diff --git a/etl/epc_clean/app.py b/etl/epc_clean/app.py index d23e3f84..593559e0 100644 --- a/etl/epc_clean/app.py +++ b/etl/epc_clean/app.py @@ -19,7 +19,7 @@ LAND_REGISTRY_PATHS = [ os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2017-part2.csv", ] -EPC_DIRECTORY = Path(__file__).parent / "model_data" / "simulation_system" / "data" / "all-domestic-certificates" +EPC_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates" ENVIRONMENT = os.getenv("ENVIRONMENT", "dev") @@ -36,7 +36,6 @@ def app(): cleaned_data = {} epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()] for directory in tqdm(epc_directories): - directory_destructured = str(directory).split("/")[-1].split("-") data = pd.read_csv(directory / "certificates.csv", low_memory=False) # Rename the columns to the same format as the api returns