Merge pull request #96 from Hestia-Homes/main

broken out land registry code
This commit is contained in:
KhalimCK 2023-07-20 14:48:36 +01:00 committed by GitHub
commit 1ee2425026
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 10074 additions and 13 deletions

1
.gitignore vendored
View file

@ -251,4 +251,5 @@ node_modules/
backend/.idea
open_uprn/.idea/
conservation_areas/.idea/
model_data/.idea/

View file

@ -6,6 +6,10 @@ from backend.app.config import get_settings
from model_data.Property import Property
from epc_api.client import EpcClient
from utils.logger import setup_logger
# TODO: This is placeholder until data is stored in DB
from backend.app.plan.temp_cleaned_data import cleaned
from backend.app.plan.uvalue_estimates_walls import uvalue_estimates_walls
from backend.app.plan.uvalue_estimates_floors import uvalue_estimates_floors
logger = setup_logger()
@ -81,6 +85,7 @@ async def trigger_plan(body: PlanTriggerRequest):
)
p.set_is_in_conservation_area(in_conservation_area)
logger.info()
for p in input_properties:
p.get_components(cleaned)
return {"message": "Plan complete"}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

0
land_registry/app.py Normal file
View file

Binary file not shown.

View file

@ -75,28 +75,28 @@ class Property(BaseUtility):
"""
self.coordinates = {key.lower(): value for key, value in coordinates.items()}
def get_components(self, cleaner):
def get_components(self, cleaned):
"""
Given the cleaning that has been performed, we'll use this to identify the property
components, from roof to walls to windows, heating and hot water
:param cleaner:
:param cleaned: This is the dictionary of components found in cleaner.cleaned
:return:
"""
if not cleaner.cleaned:
if not cleaned:
raise ValueError("Cleaner does not contain cleaned data")
if not self.data:
raise ValueError("Property does not contain data")
for description, attribute in cleaner.cleaned.items():
for description, attribute in cleaned.items():
if self.data[description] in self.DATA_ANOMALY_MATCHES:
setattr(self, self.ATTRIBUTE_MAP[description], {"original_description": self.data[description]})
continue
attributes = [
x for x in cleaner.cleaned[description] if x["original_description"] == self.data[description]
x for x in cleaned[description] if x["original_description"] == self.data[description]
]
if len(attributes) != 1:
raise ValueError("Either No attributes or multiple found for %s" % description)

View file

@ -111,6 +111,10 @@ def handler():
} for x in data
]
import pickle
with open("sample_addresses.pkl", "wb") as f:
pickle.dump(address_meta, f)
# Land registry
land_registry_client = LandRegistryClient(
paths=LAND_REGISTRY_PATHS,
@ -308,6 +312,11 @@ def app():
:return:
"""
epc_client = EpcClient(auth_token=EPC_AUTH_TOKEN)
constituencies = {'E14000555', 'E14000726', 'E14000720', 'E14000721', 'E14000553', 'E14000752'}
property_types = ["bungalow", "flat", "house", "maisonette", "park home"]
# We pull properties from local authorities, by property type. This will allow us to build
# a dataset of up to 10k properties per local authority/property type combination
# For particularly old EPC data, we have inconsistent records so we'll only include EPCS that were
@ -331,6 +340,28 @@ def app():
)
)
# Production of sample data for land registry
address_meta = [
{
"postcode": x["postcode"].upper(),
"address1": x["address1"].upper(),
"address2": x["address2"].upper(),
"address3": x["address3"].upper(),
"address": x["address"],
"uprn": x["uprn"]
} for x in data
]
import pickle
with open("sample_addresses.pkl", "wb") as f:
pickle.dump(address_meta, f)
# Incorporate input data into cleaning
cleaner = EpcClean(data + [p.data for p in input_properties])
cleaner = EpcClean(data)
cleaner.clean()
# TODO: cleaner.cleaned datasets to a db
# TODO: Add property age band into this
uvalue_estimates = UvalueEstimations(data=data)
uvalue_estimates.get_estimates(cleaner=cleaner)
# TODO: Store these to a db

View file

@ -1,17 +1,21 @@
pydantic==1.10.11
epc-api-python==1.0.2
pandas==2.0.3
numpy==1.25.1
pytz==2023.3
tzdata==2023.3
urllib3<2
epc-api-python==1.0.2
tqdm
mypy
fuzzywuzzy
python-Levenshtein
dbfread
pyproj
pint
geopandas
mip
seaborn
statsmodels
scikit-learn
pyspellchecker
textblob
pandas==2.0.3
numpy==1.25.1
python-dateutil==2.8.2
six==1.16.0
xgboost

View file

@ -40,6 +40,7 @@ package:
- node_modules/**
- conservation_areas/**
- open_uprn/**
- land_registry/**
plugins:
- serverless-python-requirements