mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
wip on area data handler
This commit is contained in:
parent
58f476f59f
commit
20ba7149c1
4 changed files with 84 additions and 2 deletions
2
.idea/Model.iml
generated
2
.idea/Model.iml
generated
|
|
@ -7,7 +7,7 @@
|
|||
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
|
||||
<orderEntry type="jdk" jdkName="Python 3.10 (area_data)" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
||||
2
.idea/misc.xml
generated
2
.idea/misc.xml
generated
|
|
@ -1,6 +1,6 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (area_data)" project-jdk-type="Python SDK" />
|
||||
<component name="PythonCompatibilityInspectionAdvertiser">
|
||||
<option name="version" value="3" />
|
||||
</component>
|
||||
|
|
|
|||
|
|
@ -2,3 +2,83 @@
|
|||
This script produces the dataset used to model the wall area of properties, which is used to estimate the cost
|
||||
of insulation measures within homes
|
||||
"""
|
||||
import boto3
|
||||
import PyPDF2
|
||||
import tempfile
|
||||
|
||||
bucket = "retrofit-datalake-dev"
|
||||
|
||||
|
||||
def list_files_in_s3_folder(bucket_name, folder_name):
|
||||
"""
|
||||
List files in a specific S3 bucket and folder.
|
||||
|
||||
Parameters:
|
||||
- bucket_name: Name of the S3 bucket.
|
||||
- folder_name: Name of the folder (prefix) within the bucket.
|
||||
|
||||
Returns:
|
||||
- A list of file names within the specified folder.
|
||||
"""
|
||||
|
||||
# Ensure folder name ends with a '/'
|
||||
if not folder_name.endswith('/'):
|
||||
folder_name += '/'
|
||||
|
||||
s3_client = boto3.client('s3')
|
||||
|
||||
# Initialize empty list to store file names
|
||||
files = []
|
||||
|
||||
# Initialize paginator
|
||||
paginator = s3_client.get_paginator('list_objects_v2')
|
||||
|
||||
# Create a PageIterator from the Paginator
|
||||
page_iterator = paginator.paginate(Bucket=bucket_name, Prefix=folder_name)
|
||||
|
||||
for page in page_iterator:
|
||||
# Extract file names from the current page and append to the list
|
||||
files.extend([item['Key'] for item in page.get('Contents', [])])
|
||||
|
||||
return files
|
||||
|
||||
|
||||
def fetch_pdf_from_s3(bucket_name, pdf_key, local_path):
|
||||
"""
|
||||
Fetch a PDF from an S3 bucket and save it locally.
|
||||
|
||||
Parameters:
|
||||
- bucket_name: Name of the S3 bucket.
|
||||
- pdf_key: Path (key) of the PDF file within the bucket.
|
||||
- local_path: Local path where the PDF should be saved.
|
||||
"""
|
||||
|
||||
s3_client = boto3.client('s3')
|
||||
response = s3_client.get_object(Bucket=bucket_name, Key=pdf_key)
|
||||
|
||||
# Read the PDF bytes and save locally
|
||||
with open(local_path, 'wb') as f:
|
||||
f.write(response['Body'].read())
|
||||
|
||||
|
||||
# Usage
|
||||
bucket_name = 'YOUR_BUCKET_NAME'
|
||||
pdf_key = 'path/to/your/pdf_file.pdf'
|
||||
local_path = 'local_file_name.pdf'
|
||||
fetch_pdf_from_s3(bucket_name, pdf_key, local_path)
|
||||
|
||||
|
||||
def handler():
|
||||
files = list_files_in_s3_folder(bucket, "full_sap_calculations")
|
||||
|
||||
# get pdfs
|
||||
sap_calulation_pdfs = [file for file in files if file.endswith(".pdf")]
|
||||
|
||||
# For each pdf, we pull out the net & gross wall areas
|
||||
|
||||
data = []
|
||||
for sap_calculation_file in sap_calulation_pdfs:
|
||||
# Create a temp file to store the PDF
|
||||
temp_filename = tempfile.NamedTemporaryFile(suffix=".pdf").name
|
||||
|
||||
pdf_file = fetch_pdf_from_s3(bucket, sap_calculation_file, temp_filename)
|
||||
|
|
|
|||
2
model_data/simulation_system/requirements/area_data.txt
Normal file
2
model_data/simulation_system/requirements/area_data.txt
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
boto3==1.28.38
|
||||
PyPDF2==3.0.1
|
||||
Loading…
Add table
Reference in a new issue