need to upload an image otherwise nothing will work

This commit is contained in:
Jun-te Kim 2025-07-14 15:00:32 +00:00
parent c3a24085be
commit 549d7265cb
4 changed files with 128 additions and 16 deletions

View file

@ -0,0 +1,43 @@
name: Build and Push Docker to ECR
on:
push:
branches: [feature/energy_report_etl, main]
env:
AWS_REGION: eu-west-2
ECR_REPOSITORY: survey_extractor
jobs:
deploy:
runs-on: ubuntu-latest
permissions:
id-token: write
contents: read
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::<account-id>:role/<your-github-oidc-role>
aws-region: ${{ env.AWS_REGION }}
- name: Log in to Amazon ECR
uses: aws-actions/amazon-ecr-login@v2
- name: Build, tag, and push Docker image to ECR
env:
ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
IMAGE_TAG: latest
run: |
IMAGE_URI=${{ env.ECR_REGISTRY }}/${{ env.ECR_REPOSITORY }}:${{ env.IMAGE_TAG }}
echo "Building Docker image..."
docker build -t $IMAGE_URI .
echo "Pushing Docker image to ECR..."
docker push $IMAGE_URI

View file

@ -3,16 +3,21 @@ resource "aws_sqs_queue" "my_queue" {
name = "my-lambda-queue"
}
# IAM role that the Lambda function will assume to get permissions
# Create an ECR repository to store the Docker image for the Lambda function
resource "aws_ecr_repository" "lambda_repo" {
name = "survey_extractor"
}
# IAM role that the Lambda function will assume
resource "aws_iam_role" "lambda_exec_role" {
name = "lambda-exec-role"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Version = "2012-10-17",
Statement = [
{
Action = "sts:AssumeRole"
Effect = "Allow"
Action = "sts:AssumeRole",
Effect = "Allow",
Principal = {
Service = "lambda.amazonaws.com"
}
@ -21,22 +26,55 @@ resource "aws_iam_role" "lambda_exec_role" {
})
}
# Attach the basic execution policy (writes logs to CloudWatch) to the Lambda role
# Attach AWS-managed policy for basic Lambda execution (CloudWatch logging)
resource "aws_iam_role_policy_attachment" "lambda_basic_execution" {
role = aws_iam_role.lambda_exec_role.name
policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"
}
# Give Lambda permission to poll and process SQS messages
resource "aws_iam_role_policy_attachment" "sqs_access" {
role = aws_iam_role.lambda_exec_role.name
policy_arn = "arn:aws:iam::aws:policy/AWSLambdaSQSQueueExecutionRole"
# Custom policy: SQS access + ECR image pull permissions
resource "aws_iam_policy" "lambda_custom_policy" {
name = "lambda-sqs-ecr-policy"
policy = jsonencode({
Version = "2012-10-17",
Statement = [
# Allow Lambda to read from SQS
{
Effect = "Allow",
Action = [
"sqs:ReceiveMessage",
"sqs:DeleteMessage",
"sqs:GetQueueAttributes"
],
Resource = aws_sqs_queue.my_queue.arn
},
# Allow Lambda to pull images from ECR
{
Effect = "Allow",
Action = [
"ecr:GetDownloadUrlForLayer",
"ecr:BatchGetImage",
"ecr:BatchCheckLayerAvailability"
],
Resource = aws_ecr_repository.lambda_repo.arn
},
# Needed to authenticate to ECR (pulling the image)
{
Effect = "Allow",
Action = [
"ecr:GetAuthorizationToken"
],
Resource = "*"
}
]
})
}
# Create an ECR repository to store the Docker image for the Lambda function
resource "aws_ecr_repository" "lambda_repo" {
name = "lambda-hello-world"
# Attach the custom policy to the Lambda role
resource "aws_iam_role_policy_attachment" "lambda_custom_policy_attach" {
role = aws_iam_role.lambda_exec_role.name
policy_arn = aws_iam_policy.lambda_custom_policy.arn
}
# Define the Lambda function using a Docker image from ECR
@ -45,8 +83,7 @@ resource "aws_lambda_function" "lambda_docker" {
role = aws_iam_role.lambda_exec_role.arn
package_type = "Image"
image_uri = "${aws_ecr_repository.lambda_repo.repository_url}:latest"
timeout = 10
timeout = 10
}
# Connect the SQS queue to the Lambda so it gets triggered by incoming messages
@ -55,3 +92,26 @@ resource "aws_lambda_event_source_mapping" "sqs_trigger" {
function_name = aws_lambda_function.lambda_docker.arn
batch_size = 1
}
resource "aws_ecr_repository_policy" "lambda_ecr_access" {
repository = aws_ecr_repository.lambda_repo.name
policy = jsonencode({
Version = "2008-10-17",
Statement = [
{
Sid = "AllowLambdaPull",
Effect = "Allow",
Principal = {
Service = "lambda.amazonaws.com"
},
Action = [
"ecr:GetDownloadUrlForLayer",
"ecr:BatchGetImage",
"ecr:BatchCheckLayerAvailability"
]
}
]
})
}

View file

@ -0,0 +1,8 @@
from etl.surveyedData.surveryedData import surveyedDataProcessor
files = [
"/tmp/sharepoint/Sandwell/SANDWELL-001/26 Willow close B64 6EG/Content (13).pdf",
]
from sqlalchemy.dialects.postgresql import UUID
sdp = surveyedDataProcessor("fake address", files)

View file

@ -3,6 +3,7 @@ import logging
import pymupdf
from etl.fileReader.sitenotes import QuidosSiteNotesExtractor, CSR, WarmHomesConditionReport, ECOConditionReport, RDSAPEnergyReport
from etl.fileReader.reportType import ReportType
from pprint import pprint
class pdfReaderToText():
@ -24,7 +25,7 @@ class pdfReaderToText():
self.all_text += text
self.text_list = self.all_text.split('\n')
print(self.text_list)
pprint(self.text_list)
def get_list_of_text(self):
return self.text_list