save local

This commit is contained in:
Jun-te Kim 2026-01-28 10:54:54 +00:00
parent 1468150a49
commit aa4bc69e3d
3 changed files with 25 additions and 62 deletions

View file

@ -11,14 +11,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
&& rm -rf /var/lib/apt/lists/*
# # 2) Build and install libpostal from source
# RUN git clone --depth 1 https://github.com/openvenues/libpostal /tmp/libpostal \
# && cd /tmp/libpostal \
# && ./bootstrap.sh \
# && ./configure --datadir=/usr/local/share/libpostal \
# && make -j"$(nproc)" \
# && make install \
# && ldconfig \
# && rm -rf /tmp/libpostal
RUN git clone --depth 1 https://github.com/openvenues/libpostal /tmp/libpostal \
&& cd /tmp/libpostal \
&& ./bootstrap.sh \
&& ./configure --datadir=/usr/local/share/libpostal \
&& make -j"$(nproc)" \
&& make install \
&& ldconfig \
&& rm -rf /tmp/libpostal
# 3) Create the user and grant sudo privileges
RUN useradd -m -s /usr/bin/bash ${USER} \
@ -26,17 +26,17 @@ RUN useradd -m -s /usr/bin/bash ${USER} \
&& chmod 0440 /etc/sudoers.d/${USER}
# # 4) Python deps - if you want to run assest list
# ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
# ADD asset_list/requirements.txt requirements.txt
# RUN pip install -r requirements.txt
ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
ADD asset_list/requirements.txt requirements.txt
RUN pip install -r requirements.txt
#
ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
ADD backend/engine/requirements.txt requirements1.txt
ADD backend/app/requirements/requirements.txt requirements2.txt
# ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
# ADD backend/engine/requirements.txt requirements1.txt
# ADD backend/app/requirements/requirements.txt requirements2.txt
# ADD .devcontainer/requirements.txt requirements3.txt
# RUN cat requirements1.txt requirements2.txt requirements3.txt > requirements.txt
RUN cat requirements1.txt requirements2.txt > requirements.txt
# RUN cat requirements1.txt requirements2.txt > requirements.txt
RUN pip install -r requirements.txt
# 5) Workdir

View file

@ -57,27 +57,24 @@ def app():
EPC recommendations
Property UPRN
"""
# Peabody data for cleaning
data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
"Project/data_validation")
data_filename = "to_standardise_uprns.xlsx"
data_folder = ("/workspaces/model/asset_list")
data_filename = "assets.xlsx"
sheet_name = "Sheet1"
postcode_column = 'Postcode'
address1_column = "Address 1"
address1_method = None
fulladdress_column = None
address_cols_to_concat = ["Address 1", "Address 2", "Address 3"]
address1_column = None
address1_method = 'house_number_extraction'
fulladdress_column = 'Address'
address_cols_to_concat = None
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "Type"
landlord_built_form = "Attachment"
landlord_property_type = None
landlord_built_form = None
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "Org Ref"
landlord_property_id = "LLUPRN"
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None
@ -93,40 +90,6 @@ def app():
asset_list_header = 0
landlord_block_reference = None
# Lambeth:
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lambeth/December 10th"
# data_filename = "lambeth_sw2_leigham court estate.xlsx"
# sheet_name = "Sheet1"
# postcode_column = 'Postcode'
# address1_column = "Address"
# address1_method = None
# fulladdress_column = None
# address_cols_to_concat = ["Address"]
# missing_postcodes_method = None
# landlord_year_built = None
# landlord_os_uprn = None
# landlord_property_type = None
# landlord_built_form = None
# landlord_wall_construction = None
# landlord_roof_construction = None
# landlord_heating_system = None
# landlord_existing_pv = None
# landlord_property_id = "row_id"
# landlord_sap = None
# outcomes_filename = None
# outcomes_sheetname = None
# outcomes_postcode = None
# outcomes_houseno = None
# outcomes_id = None
# outcomes_address = None
# master_filepaths = []
# master_id_colnames = []
# master_to_asset_list_filepath = None
# phase = False
# ecosurv_landlords = None
# asset_list_header = 0
# landlord_block_reference = None
# Maps addresses to uprn in problematic cases
manual_uprn_map = {}

View file

@ -10,4 +10,4 @@ tiktoken
msgpack
beautifulsoup4
typing-extensions>=4.5.0
requests>=2.28.2
requests>=2.28.2