diff --git a/.idea/Model.iml b/.idea/Model.iml
index b03b31b1..44faa37d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (area_data)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
 </module>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index ca0e1cd9..f0144d5b 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (area_data)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/model_data/simulation_system/area_data.py b/model_data/simulation_system/area_data.py
index 603b71c9..ee74012b 100644
--- a/model_data/simulation_system/area_data.py
+++ b/model_data/simulation_system/area_data.py
@@ -2,3 +2,83 @@
 This script produces the dataset used to model the wall area of properties, which is used to estimate the cost
 of insulation measures within homes
 """
+import boto3
+import PyPDF2
+import tempfile
+
+bucket = "retrofit-datalake-dev"
+
+
+def list_files_in_s3_folder(bucket_name, folder_name):
+    """
+    List files in a specific S3 bucket and folder.
+
+    Parameters:
+    - bucket_name: Name of the S3 bucket.
+    - folder_name: Name of the folder (prefix) within the bucket.
+
+    Returns:
+    - A list of file names within the specified folder.
+    """
+
+    # Ensure folder name ends with a '/'
+    if not folder_name.endswith('/'):
+        folder_name += '/'
+
+    s3_client = boto3.client('s3')
+
+    # Initialize empty list to store file names
+    files = []
+
+    # Initialize paginator
+    paginator = s3_client.get_paginator('list_objects_v2')
+
+    # Create a PageIterator from the Paginator
+    page_iterator = paginator.paginate(Bucket=bucket_name, Prefix=folder_name)
+
+    for page in page_iterator:
+        # Extract file names from the current page and append to the list
+        files.extend([item['Key'] for item in page.get('Contents', [])])
+
+    return files
+
+
+def fetch_pdf_from_s3(bucket_name, pdf_key, local_path):
+    """
+    Fetch a PDF from an S3 bucket and save it locally.
+
+    Parameters:
+    - bucket_name: Name of the S3 bucket.
+    - pdf_key: Path (key) of the PDF file within the bucket.
+    - local_path: Local path where the PDF should be saved.
+    """
+
+    s3_client = boto3.client('s3')
+    response = s3_client.get_object(Bucket=bucket_name, Key=pdf_key)
+
+    # Read the PDF bytes and save locally
+    with open(local_path, 'wb') as f:
+        f.write(response['Body'].read())
+
+
+# Usage
+bucket_name = 'YOUR_BUCKET_NAME'
+pdf_key = 'path/to/your/pdf_file.pdf'
+local_path = 'local_file_name.pdf'
+fetch_pdf_from_s3(bucket_name, pdf_key, local_path)
+
+
+def handler():
+    files = list_files_in_s3_folder(bucket, "full_sap_calculations")
+
+    # get pdfs
+    sap_calulation_pdfs = [file for file in files if file.endswith(".pdf")]
+
+    # For each pdf, we pull out the net & gross wall areas
+
+    data = []
+    for sap_calculation_file in sap_calulation_pdfs:
+        # Create a temp file to store the PDF
+        temp_filename = tempfile.NamedTemporaryFile(suffix=".pdf").name
+
+        pdf_file = fetch_pdf_from_s3(bucket, sap_calculation_file, temp_filename)
diff --git a/model_data/simulation_system/requirements/area_data.txt b/model_data/simulation_system/requirements/area_data.txt
new file mode 100644
index 00000000..f6bff53c
--- /dev/null
+++ b/model_data/simulation_system/requirements/area_data.txt
@@ -0,0 +1,2 @@
+boto3==1.28.38
+PyPDF2==3.0.1
\ No newline at end of file