From 2fbc330c7ccc3a67a889f2ca62d17c646c22add2 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <junte.kim@mealcraft.com>
Date: Wed, 12 Mar 2025 13:20:19 +0000
Subject: [PATCH] use formula to get each section correctly

---
 etl/pdfReader/sitenotes.py | 45 +++++++++++++++++---------------------
 1 file changed, 20 insertions(+), 25 deletions(-)

diff --git a/etl/pdfReader/sitenotes.py b/etl/pdfReader/sitenotes.py
index 9e424bf..b7cbfc5 100644
--- a/etl/pdfReader/sitenotes.py
+++ b/etl/pdfReader/sitenotes.py
@@ -238,7 +238,7 @@ class QuidosSiteNotes(SiteNotes):
 
     def get_section_7(self):
         data = self.raw_data[self.raw_data.index('7.0 Walls'): self.raw_data.index('8.0 Roofs')]
-        avoid = [
+        sub_titles = [
             "Construction",
             "Insulation",
             "Insulation Thickness(mm)",
@@ -253,24 +253,18 @@ class QuidosSiteNotes(SiteNotes):
             "Alternative Wall Present",
         ]
         
-        get_value = lambda key: None if self.raw_data[self.raw_data.index(key) + 1] in avoid else self.raw_data[self.raw_data.index(key) + 1]
-        
-        # Main property
-        main_info = data[data.index("Main Property"):data.index("Extension 1")]
-        for i,item in enumerate(main_info):
-            if item in avoid:
-                setattr(self, f"main_property_{item.lower().replace(' ', '_').replace('-', '_')}", get_value(item))
-
-        for j in range(1, 5):
-            main_data = data[data.index(f"Extension {j}"):]
-            get_value = lambda key: None if main_data[main_data.index(key) + 1] in avoid else main_data[main_data.index(key) + 1]
-            for i,item in enumerate(main_data):
-                if item in avoid:
-                    setattr(self, f"extensions_{j}_{item.lower().replace(' ', '_').replace('-', '_')}", get_value(item))
+        main_titles = [
+            "Main Property",
+            "Extension 1",
+            "Extension 2",
+            "Extension 3",
+            "Extension 4",
+        ]
+        self.two_column_with_extension_processor(data, sub_titles, main_titles, 7)
 
     def get_section_8(self):
         data = self.raw_data[self.raw_data.index('8.0 Roofs'): self.raw_data.index('9.0 Floors')]
-        avoid = [
+        sub_titles = [
             "Construction",
             "Insulation Type",
             "Insulation Thickness",
@@ -285,25 +279,26 @@ class QuidosSiteNotes(SiteNotes):
             "Extension 4",
         ]
 
-        self.two_column_with_extension_processor(data, avoid, titles, 8)
+        self.two_column_with_extension_processor(data, sub_titles, titles, 8)
+
+    def two_column_with_extension_processor(self, data, sub_titles, main_titles, section):
 
-    def two_column_with_extension_processor(self, data, avoid, titles, section):
         title = None
         proc_data = data
         for items in data: 
-            if items in titles:
+            if items in main_titles:
                 title = items.lower().replace(" ", "_").replace("-", "_")
-                index = titles.index(items)
-                if titles[index] in data:
-                    print(titles[index])
-                    proc_data = data[data.index(titles[index]):]
+                index = main_titles.index(items)
+                if main_titles[index] in data:
+                    print(main_titles[index])
+                    proc_data = data[data.index(main_titles[index]):]
                     continue
                 else:
                     break
             if title is None:
                 continue
-            get_value = lambda key: None if proc_data[proc_data.index(key) + 1] in avoid else proc_data[proc_data.index(key) + 1]
-            if items in avoid:
+            get_value = lambda key: None if proc_data[proc_data.index(key) + 1] in sub_titles else proc_data[proc_data.index(key) + 1]
+            if items in sub_titles:
                 setattr(self, f"section_{section}_{title}_{items.lower().replace(' ', '_').replace('-','_')}", get_value(items))
 
     def get_section_9(self):