stripping out assumed tag from cleaned descriptions in EPC clean

This commit is contained in:
Khalim Conn-Kowlessar 2023-10-06 10:23:19 +01:00
parent 2d85e79bd4
commit 13c1e50126
6 changed files with 7 additions and 7 deletions

2
.idea/Model.iml generated
View file

@ -7,7 +7,7 @@
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyNamespacePackagesService">

2
.idea/misc.xml generated
View file

@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
<component name="PythonCompatibilityInspectionAdvertiser">
<option name="version" value="3" />
</component>

View file

@ -301,7 +301,7 @@ class Property(Definitions):
self.is_listed = spatial["is_listed_building"].values[0]
self.is_heritage = spatial["is_heritage_building"].values[0]
if self.in_conservation_area | self.is_listed | self.is_heritage:
if self.in_conservation_area is True | self.is_listed is True | self.is_heritage is True:
self.restricted_measures = True
def set_year_built(self):

View file

@ -59,7 +59,7 @@ async def trigger_plan(body: PlanTriggerRequest):
epc_client = EpcClient(auth_token=get_settings().EPC_AUTH_TOKEN)
plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path)
uprn_filenames = read_dataframe_from_s3_parquet(
bucket_name=get_settings().PLAN_TRIGGER_BUCKET, file_key="spatial/filename_meta.parquet"
bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
)
input_properties = []
@ -151,6 +151,7 @@ async def trigger_plan(body: PlanTriggerRequest):
if not property_recommendations:
continue
fewf
recommendations[p.id] = property_recommendations

View file

@ -130,7 +130,7 @@ class EpcClean:
self.cleaned[field].append(
{
"original_description": description,
"clean_description": cln.description.capitalize(),
"clean_description": cln.description.replace("(assumed)", "").rstrip().capitalize(),
**cln.process()
}
)

View file

@ -19,7 +19,7 @@ LAND_REGISTRY_PATHS = [
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2017-part2.csv",
]
EPC_DIRECTORY = Path(__file__).parent / "model_data" / "simulation_system" / "data" / "all-domestic-certificates"
EPC_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
ENVIRONMENT = os.getenv("ENVIRONMENT", "dev")
@ -36,7 +36,6 @@ def app():
cleaned_data = {}
epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
for directory in tqdm(epc_directories):
directory_destructured = str(directory).split("/")[-1].split("-")
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
# Rename the columns to the same format as the api returns