submit this to main

2026-06-08 11:17:29 +00:00 · 2025-05-23 14:07:11 +00:00 · 2025-05-23 14:07:11 +00:00 · ae7a7e1bd5
commit ae7a7e1bd5
parent b3b6a22ca6
2 changed files with 33 additions and 25 deletions
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@ -5,7 +5,7 @@ ARG DEBIAN_FRONTEND=noninteractive

 # Install system dependencies in a single layer
 RUN apt update && apt install -y --no-install-recommends \
-    sudo jq vim \
+    sudo jq vim curl\
    && apt autoremove -y \
    && rm -rf /var/lib/apt/lists/*

@ -17,6 +17,22 @@ RUN useradd -m -s /usr/bin/bash ${USER} \
 # Install Poetry
 RUN pip install --no-cache-dir poetry

+# Download and install nvm:
+# RUN curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.3/install.sh | bash
+
+# # in lieu of restarting the shell
+# RUN \. "$HOME/.nvm/nvm.sh"
+
+# # Download and install Node.js:
+# RUN nvm install 22
+
+# # Verify the Node.js version:
+# RUN node -v # Should print "v22.16.0".
+# RUN nvm current # Should print "v22.16.0".
+
+# # Verify npm version:
+# RUN npm -v # Should print "10.9.2".
+

 # Install aws
 RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
@ -37,4 +53,4 @@ RUN terraform -install-autocomplete


 # Set the working directory
-WORKDIR /workspaces/survey-extractor
+WORKDIR /workspaces/survey-extractor:q
--- a/etl/surveyPrice/surveyPrice.py
+++ b/etl/surveyPrice/surveyPrice.py
@ -254,22 +254,22 @@ class SurveyPrice():
    def merge_hub_spot_and_survey_information_from_sharepoint_url(self, hubspot_data, survey_data):

        # Standardise address
-        def extract_start_and_postcode(addr):
-            if not isinstance(addr, str) or addr.strip() == "":
-                return "", ""
-            parts = addr.lower().replace(",", "").strip().split()
-            start = ' '.join(parts[:2])  # Number + street
-            postcode = ' '.join(parts[-2:])  # Postcode
-            return start, postcode
+        # def extract_start_and_postcode(addr):
+            # if not isinstance(addr, str) or addr.strip() == "":
+                # return "", ""
+            # parts = addr.lower().replace(",", "").strip().split()
+            # start = ' '.join(parts[:2])  # Number + street
+            # postcode = ' '.join(parts[-2:])  # Postcode
+            # return start, postcode

        # Extract start + postcode from both datasets
-        survey_data[['address_start', 'postcode']] = survey_data['SHAREPOINT ADDRESS'].apply(
-            lambda x: pd.Series(extract_start_and_postcode(x))
-        )
-
-        hubspot_data[['address_start', 'postcode']] = hubspot_data['HUBSPOT_DEAL_ADDRESS'].apply(
-            lambda x: pd.Series(extract_start_and_postcode(x))
-        )
+        # survey_data[['address_start', 'postcode']] = survey_data['SHAREPOINT ADDRESS'].apply(
+            # lambda x: pd.Series(extract_start_and_postcode(x))
+        # )
+# 
+        # hubspot_data[['address_start', 'postcode']] = hubspot_data['HUBSPOT_DEAL_ADDRESS'].apply(
+            # lambda x: pd.Series(extract_start_and_postcode(x))
+        # )


        # re-name to installer
@ -286,16 +286,8 @@ class SurveyPrice():
            }
        )

-        merged_df = pd.merge(
-            survey_data,
-            hubspot_data,
-            on=['address_start', 'postcode'],
-            how='inner'
-        )
+        merged_df = pd.concat([hubspot_data, survey_data], axis=1)

-        # if hubspot detects 
-
-        merged_df.drop(columns=['address_start', 'postcode'], inplace=True)
        def compute_energy_grant(row):
            pre_band_letter = row["SHAREPOINT PRE_INSTALL_SAP_SCORE_BANDING"][-1]
            post_band_letter = surveyedDataProcessor.get_band(row["HUBSPOT_POST_INSTALL_SAP_SCORE"])[-1]