mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-08 11:17:29 +00:00
submit this to main
This commit is contained in:
parent
b3b6a22ca6
commit
ae7a7e1bd5
2 changed files with 33 additions and 25 deletions
|
|
@ -5,7 +5,7 @@ ARG DEBIAN_FRONTEND=noninteractive
|
|||
|
||||
# Install system dependencies in a single layer
|
||||
RUN apt update && apt install -y --no-install-recommends \
|
||||
sudo jq vim \
|
||||
sudo jq vim curl\
|
||||
&& apt autoremove -y \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
|
@ -17,6 +17,22 @@ RUN useradd -m -s /usr/bin/bash ${USER} \
|
|||
# Install Poetry
|
||||
RUN pip install --no-cache-dir poetry
|
||||
|
||||
# Download and install nvm:
|
||||
# RUN curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.3/install.sh | bash
|
||||
|
||||
# # in lieu of restarting the shell
|
||||
# RUN \. "$HOME/.nvm/nvm.sh"
|
||||
|
||||
# # Download and install Node.js:
|
||||
# RUN nvm install 22
|
||||
|
||||
# # Verify the Node.js version:
|
||||
# RUN node -v # Should print "v22.16.0".
|
||||
# RUN nvm current # Should print "v22.16.0".
|
||||
|
||||
# # Verify npm version:
|
||||
# RUN npm -v # Should print "10.9.2".
|
||||
|
||||
|
||||
# Install aws
|
||||
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
|
||||
|
|
@ -37,4 +53,4 @@ RUN terraform -install-autocomplete
|
|||
|
||||
|
||||
# Set the working directory
|
||||
WORKDIR /workspaces/survey-extractor
|
||||
WORKDIR /workspaces/survey-extractor:q
|
||||
|
|
@ -254,22 +254,22 @@ class SurveyPrice():
|
|||
def merge_hub_spot_and_survey_information_from_sharepoint_url(self, hubspot_data, survey_data):
|
||||
|
||||
# Standardise address
|
||||
def extract_start_and_postcode(addr):
|
||||
if not isinstance(addr, str) or addr.strip() == "":
|
||||
return "", ""
|
||||
parts = addr.lower().replace(",", "").strip().split()
|
||||
start = ' '.join(parts[:2]) # Number + street
|
||||
postcode = ' '.join(parts[-2:]) # Postcode
|
||||
return start, postcode
|
||||
# def extract_start_and_postcode(addr):
|
||||
# if not isinstance(addr, str) or addr.strip() == "":
|
||||
# return "", ""
|
||||
# parts = addr.lower().replace(",", "").strip().split()
|
||||
# start = ' '.join(parts[:2]) # Number + street
|
||||
# postcode = ' '.join(parts[-2:]) # Postcode
|
||||
# return start, postcode
|
||||
|
||||
# Extract start + postcode from both datasets
|
||||
survey_data[['address_start', 'postcode']] = survey_data['SHAREPOINT ADDRESS'].apply(
|
||||
lambda x: pd.Series(extract_start_and_postcode(x))
|
||||
)
|
||||
|
||||
hubspot_data[['address_start', 'postcode']] = hubspot_data['HUBSPOT_DEAL_ADDRESS'].apply(
|
||||
lambda x: pd.Series(extract_start_and_postcode(x))
|
||||
)
|
||||
# survey_data[['address_start', 'postcode']] = survey_data['SHAREPOINT ADDRESS'].apply(
|
||||
# lambda x: pd.Series(extract_start_and_postcode(x))
|
||||
# )
|
||||
#
|
||||
# hubspot_data[['address_start', 'postcode']] = hubspot_data['HUBSPOT_DEAL_ADDRESS'].apply(
|
||||
# lambda x: pd.Series(extract_start_and_postcode(x))
|
||||
# )
|
||||
|
||||
|
||||
# re-name to installer
|
||||
|
|
@ -286,16 +286,8 @@ class SurveyPrice():
|
|||
}
|
||||
)
|
||||
|
||||
merged_df = pd.merge(
|
||||
survey_data,
|
||||
hubspot_data,
|
||||
on=['address_start', 'postcode'],
|
||||
how='inner'
|
||||
)
|
||||
merged_df = pd.concat([hubspot_data, survey_data], axis=1)
|
||||
|
||||
# if hubspot detects
|
||||
|
||||
merged_df.drop(columns=['address_start', 'postcode'], inplace=True)
|
||||
def compute_energy_grant(row):
|
||||
pre_band_letter = row["SHAREPOINT PRE_INSTALL_SAP_SCORE_BANDING"][-1]
|
||||
post_band_letter = surveyedDataProcessor.get_band(row["HUBSPOT_POST_INSTALL_SAP_SCORE"])[-1]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue