mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-30 13:10:56 +00:00
post code and address are now seperate@
This commit is contained in:
parent
4421b6bdf0
commit
26c7fa019b
7 changed files with 68 additions and 191 deletions
|
|
@ -6,8 +6,10 @@ from tqdm import tqdm
|
|||
import os
|
||||
from etl.scraper.scraper import SharePointInstaller
|
||||
from etl.scraper.scraper import SharePointScraper
|
||||
|
||||
board_id = "5185076280"
|
||||
# wates
|
||||
# board_id = "4965130190"
|
||||
# liz green
|
||||
board_id = "6097500103"
|
||||
monday_key = "eyJhbGciOiJIUzI1NiJ9.eyJ0aWQiOjQ5ODc2ODQxOCwiYWFpIjoxMSwidWlkIjozNjE3ODAzNCwiaWFkIjoiMjAyNS0wNC0xMVQxMToyMzoxNy40NjdaIiwicGVyIjoibWU6d3JpdGUiLCJhY3RpZCI6MTM5OTc4MjMsInJnbiI6InVzZTEifQ.-2Lit4s46ZF6AXuMW9t0TxIaFLkHqD4Yo-PyM9i2XZY"
|
||||
monday = MondayClient(monday_key)
|
||||
|
||||
|
|
@ -20,7 +22,23 @@ osmosis = SharePointScraper(SharePointInstaller.OSMOSIS_WAVE_2)
|
|||
parent_folder = "/Osmosis ACD/Osmosis ACD Projects/"
|
||||
|
||||
# Change this per installer
|
||||
parent_folder += "Installer Documentation/Platform Housing Group/Cocuun"
|
||||
# parent_folder += "Stonewater/Wates/REDO"
|
||||
parent_folder += "Installer Documentation/Platform Housing Group/Liv Green"
|
||||
|
||||
import re
|
||||
|
||||
def sanitize_name(name: str, ignore_dot = False) -> str:
|
||||
# Remove leading/trailing whitespace and collapse multiple spaces into a single space
|
||||
if ignore_dot:
|
||||
name = ''.join(char for char in name if char.isalnum() or char.isspace() or char == ")" or char == "(").strip()
|
||||
else:
|
||||
name = ''.join(char for char in name if char.isalnum() or char.isspace() or char == "." or char == ")" or char == "(").strip()
|
||||
|
||||
name = re.sub(r'\s+', ' ', name)
|
||||
# Remove or replace other SharePoint-invalid characters if necessary
|
||||
invalid_chars = r'[\"*:<>?/\\|#%~{}]'
|
||||
name = re.sub(invalid_chars, '', name)
|
||||
return name
|
||||
|
||||
|
||||
def extract_asset_ids(item, file_column_id):
|
||||
|
|
@ -102,8 +120,10 @@ def get_all_items(board_id, monday):
|
|||
def upload_to_sharepoint(to_upload, master_folder_name):
|
||||
osmosis.create_dir(master_folder_name, parent_folder)
|
||||
for file_path in to_upload:
|
||||
print(f"uploading {file_path}")
|
||||
osmosis.upload_file(file_path, parent_folder + f"/{master_folder_name}", file_path[5:])
|
||||
file_name = file_path[5:]
|
||||
file_name = sanitize_name(file_name)
|
||||
print(f"Uploading {file_name} to sharepoint")
|
||||
osmosis.upload_file(file_path, parent_folder + f"/{master_folder_name}", file_name)
|
||||
|
||||
# Step 1: Fetch column IDs
|
||||
board_data = monday.boards.fetch_boards_by_id(board_id)
|
||||
|
|
@ -117,20 +137,26 @@ if not name_id or not files_id:
|
|||
raise Exception("Could not find 'name' or 'file(s)' columns")
|
||||
|
||||
items = get_all_items(board_id, monday)
|
||||
for item in tqdm(items):
|
||||
item_name = item["name"]
|
||||
item_name = ''.join(char for char in item_name if char.isalnum() or char.isspace())
|
||||
asset_ids = extract_asset_ids(item, files_id)
|
||||
for i,item in enumerate(tqdm(items)):
|
||||
if i>123:
|
||||
item_name = item["name"]
|
||||
item_name = sanitize_name(item_name, ignore_dot=True)
|
||||
print(f"Item name is {item_name}")
|
||||
asset_ids = extract_asset_ids(item, files_id)
|
||||
|
||||
to_upload = []
|
||||
for asset_id in asset_ids:
|
||||
try:
|
||||
public_url, file_name = get_public_url(asset_id)
|
||||
print(f"Downloading {file_name} from {public_url}")
|
||||
file_path = download_file_from_public_url(public_url, file_name)
|
||||
to_upload.append(file_path)
|
||||
except Exception as e:
|
||||
print(f"Failed to download/upload asset {asset_id}: {e}")
|
||||
to_upload = []
|
||||
for asset_id in asset_ids:
|
||||
try:
|
||||
public_url, file_name = get_public_url(asset_id)
|
||||
print(f"Downloading {file_name}")
|
||||
file_path = download_file_from_public_url(public_url, file_name)
|
||||
to_upload.append(file_path)
|
||||
except Exception as e:
|
||||
print(f"Failed to download/upload asset {asset_id}: {e}")
|
||||
|
||||
if to_upload:
|
||||
upload_to_sharepoint(to_upload, item_name)
|
||||
if to_upload:
|
||||
upload_to_sharepoint(to_upload, item_name)
|
||||
|
||||
|
||||
|
||||
# Liv green # Cocuun # Wates
|
||||
|
|
@ -128,7 +128,7 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor):
|
|||
uprn = get_value('UPRN'),
|
||||
postcode = get_value('Postcode'),
|
||||
region = get_value('Region'),
|
||||
address = get_value('Address'),
|
||||
address = ','.join(get_value('Address').split(',')[:-1]).strip(),
|
||||
town = get_value('Town'),
|
||||
county = get_value('County'),
|
||||
property_tenure = get_value('Property Tenure'),
|
||||
|
|
|
|||
|
|
@ -25,4 +25,8 @@ def test_floor_area_calculator(local_survey):
|
|||
# Floor area is important to work out invoice, make a test to work out invoice correctly
|
||||
area = work_out_total_floor_area(local_survey.pre_site_note)
|
||||
assert area == ('73-97m', 91)
|
||||
|
||||
|
||||
|
||||
def test_address_and_post_code(local_survey):
|
||||
assert local_survey.pre_site_note.survey_information.address == "10 Turnberry Close, ST. LEONARDS-ON-SEA"
|
||||
assert local_survey.pre_site_note.survey_information.postcode == "TN38 0WL"
|
||||
|
|
@ -1,37 +0,0 @@
|
|||
import os
|
||||
# WarmFront Sharepoint KEYS
|
||||
os.environ["SHAREPOINT_CLIENT_ID"] = "895e3b77-b1d7-43ec-b18f-dcfe07cdfeaf"
|
||||
os.environ["SHAREPOINT_CLIENT_SECRET"] = "SOf8Q~-is4wdQiqvEEm9FlJQRAY9ELGaj5Qz-a6E"
|
||||
os.environ["SHAREPOINT_TENANT_ID"] = "c3f7519c-2719-4547-af04-6da6cbfd8f8f"
|
||||
|
||||
from etl.surveyPrice.surveyPrice import SurveyPrice
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def sp():
|
||||
return SurveyPrice()
|
||||
|
||||
|
||||
def cavity_price_dataframe_sanity_check(df):
|
||||
assert df.shape == (160, 5)
|
||||
assert df.columns.tolist() == ['WORK TYPE', 'Floor Area Group', 'Trickle Vent', 'Wetrooms', 'PRICE']
|
||||
|
||||
def test_get_price_matrix_jjc_empties(sp):
|
||||
jjc_empties_price_table = sp.get_cavity_pricing_table("JJC - EMPTIES")
|
||||
cavity_price_dataframe_sanity_check(jjc_empties_price_table)
|
||||
|
||||
def test_get_price_matrix_jjc_general_extraction(sp):
|
||||
sp = SurveyPrice()
|
||||
jjc_empties_price_table = sp.get_cavity_pricing_table("JJC - GENERAL EXTRACTIONS")
|
||||
cavity_price_dataframe_sanity_check(jjc_empties_price_table)
|
||||
|
||||
def test_get_price_matrix_jjc_foam(sp):
|
||||
sp = SurveyPrice()
|
||||
jjc_empties_price_table = sp.get_cavity_pricing_table("JJC - FORMALDEHYDE EXTRACTION")
|
||||
cavity_price_dataframe_sanity_check(jjc_empties_price_table)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -226,7 +226,7 @@ class SharePointClient:
|
|||
'Authorization': f"Bearer {self.access_token['access_token']}"
|
||||
}
|
||||
|
||||
logger.debug("Access token retrieved successfully.")
|
||||
# logger.debug("Access token retrieved successfully.")
|
||||
|
||||
@api_call_decorator
|
||||
def get_documents_drive(self):
|
||||
|
|
@ -235,7 +235,7 @@ class SharePointClient:
|
|||
:return: Tuple containing HTTP method, URL, and None for data.
|
||||
"""
|
||||
url = f"https://graph.microsoft.com/v1.0/sites/{self.site_id}/drive"
|
||||
logger.debug(f"Getting document drive from URL: {url}")
|
||||
# logger.debug(f"Getting document drive from URL: {url}")
|
||||
return 'GET', url, None
|
||||
|
||||
@api_call_decorator
|
||||
|
|
@ -250,7 +250,7 @@ class SharePointClient:
|
|||
:return: Tuple containing HTTP method, URL, and None for data.
|
||||
"""
|
||||
url = f"https://graph.microsoft.com/v1.0/drives/{self.document_drive_id}/root:/{folder_path}:/children?$top={page_size}"
|
||||
logger.debug(f"Listing folder contents from URL: {url}")
|
||||
# logger.debug(f"Listing folder contents from URL: {url}")
|
||||
return 'GET', url, None
|
||||
|
||||
|
||||
|
|
@ -286,12 +286,12 @@ class SharePointClient:
|
|||
:return: Response JSON from the API
|
||||
"""
|
||||
url = f"https://graph.microsoft.com/v1.0/drives/{self.document_drive_id}/root:/{sharepoint_parent_id}/{file_name}:/content"
|
||||
logger.debug(f"Uploading file to URL: {url}")
|
||||
# logger.debug(f"Uploading file to URL: {url}")
|
||||
|
||||
response = requests.put(url, headers=self.headers, data=file_stream)
|
||||
|
||||
if response.status_code in (200, 201):
|
||||
logger.info(f"File '{file_name}' uploaded successfully.")
|
||||
# logger.info(f"File '{file_name}' uploaded successfully.")
|
||||
return response.json()
|
||||
else:
|
||||
retry = handle_error(response)
|
||||
|
|
|
|||
|
|
@ -1,34 +1,15 @@
|
|||
import type { Metadata } from "next";
|
||||
import { Geist, Geist_Mono } from "next/font/google";
|
||||
import "./globals.css";
|
||||
|
||||
const geistSans = Geist({
|
||||
variable: "--font-geist-sans",
|
||||
subsets: ["latin"],
|
||||
});
|
||||
|
||||
const geistMono = Geist_Mono({
|
||||
variable: "--font-geist-mono",
|
||||
subsets: ["latin"],
|
||||
});
|
||||
|
||||
export const metadata: Metadata = {
|
||||
title: "Create Next App",
|
||||
description: "Generated by create next app",
|
||||
};
|
||||
|
||||
export default function RootLayout({
|
||||
export default function DashboardLayout({
|
||||
children,
|
||||
}: Readonly<{
|
||||
children: React.ReactNode;
|
||||
}>) {
|
||||
}: {
|
||||
children: React.ReactNode
|
||||
}) {
|
||||
return (
|
||||
<html lang="en">
|
||||
<body
|
||||
className={`${geistSans.variable} ${geistMono.variable} antialiased`}
|
||||
>
|
||||
{children}
|
||||
<body>
|
||||
{/* Layout UI */}
|
||||
{/* Place children where you want to render a page or nested layout */}
|
||||
<main>{children}</main>
|
||||
</body>
|
||||
</html>
|
||||
);
|
||||
}
|
||||
)
|
||||
}
|
||||
|
|
@ -1,103 +1,6 @@
|
|||
import Image from "next/image";
|
||||
|
||||
export default function Home() {
|
||||
return (
|
||||
<div className="grid grid-rows-[20px_1fr_20px] items-center justify-items-center min-h-screen p-8 pb-20 gap-16 sm:p-20 font-[family-name:var(--font-geist-sans)]">
|
||||
<main className="flex flex-col gap-[32px] row-start-2 items-center sm:items-start">
|
||||
<Image
|
||||
className="dark:invert"
|
||||
src="/next.svg"
|
||||
alt="Next.js logo"
|
||||
width={180}
|
||||
height={38}
|
||||
priority
|
||||
/>
|
||||
<ol className="list-inside list-decimal text-sm/6 text-center sm:text-left font-[family-name:var(--font-geist-mono)]">
|
||||
<li className="mb-2 tracking-[-.01em]">
|
||||
Get started by editing{" "}
|
||||
<code className="bg-black/[.05] dark:bg-white/[.06] px-1 py-0.5 rounded font-[family-name:var(--font-geist-mono)] font-semibold">
|
||||
src/app/page.tsx
|
||||
</code>
|
||||
.
|
||||
</li>
|
||||
<li className="tracking-[-.01em]">
|
||||
Save and see your changes instantly.
|
||||
</li>
|
||||
</ol>
|
||||
|
||||
<div className="flex gap-4 items-center flex-col sm:flex-row">
|
||||
<a
|
||||
className="rounded-full border border-solid border-transparent transition-colors flex items-center justify-center bg-foreground text-background gap-2 hover:bg-[#383838] dark:hover:bg-[#ccc] font-medium text-sm sm:text-base h-10 sm:h-12 px-4 sm:px-5 sm:w-auto"
|
||||
href="https://vercel.com/new?utm_source=create-next-app&utm_medium=appdir-template-tw&utm_campaign=create-next-app"
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
>
|
||||
<Image
|
||||
className="dark:invert"
|
||||
src="/vercel.svg"
|
||||
alt="Vercel logomark"
|
||||
width={20}
|
||||
height={20}
|
||||
/>
|
||||
Deploy now
|
||||
</a>
|
||||
<a
|
||||
className="rounded-full border border-solid border-black/[.08] dark:border-white/[.145] transition-colors flex items-center justify-center hover:bg-[#f2f2f2] dark:hover:bg-[#1a1a1a] hover:border-transparent font-medium text-sm sm:text-base h-10 sm:h-12 px-4 sm:px-5 w-full sm:w-auto md:w-[158px]"
|
||||
href="https://nextjs.org/docs?utm_source=create-next-app&utm_medium=appdir-template-tw&utm_campaign=create-next-app"
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
>
|
||||
Read our docs
|
||||
</a>
|
||||
</div>
|
||||
</main>
|
||||
<footer className="row-start-3 flex gap-[24px] flex-wrap items-center justify-center">
|
||||
<a
|
||||
className="flex items-center gap-2 hover:underline hover:underline-offset-4"
|
||||
href="https://nextjs.org/learn?utm_source=create-next-app&utm_medium=appdir-template-tw&utm_campaign=create-next-app"
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
>
|
||||
<Image
|
||||
aria-hidden
|
||||
src="/file.svg"
|
||||
alt="File icon"
|
||||
width={16}
|
||||
height={16}
|
||||
/>
|
||||
Learn
|
||||
</a>
|
||||
<a
|
||||
className="flex items-center gap-2 hover:underline hover:underline-offset-4"
|
||||
href="https://vercel.com/templates?framework=next.js&utm_source=create-next-app&utm_medium=appdir-template-tw&utm_campaign=create-next-app"
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
>
|
||||
<Image
|
||||
aria-hidden
|
||||
src="/window.svg"
|
||||
alt="Window icon"
|
||||
width={16}
|
||||
height={16}
|
||||
/>
|
||||
Examples
|
||||
</a>
|
||||
<a
|
||||
className="flex items-center gap-2 hover:underline hover:underline-offset-4"
|
||||
href="https://nextjs.org?utm_source=create-next-app&utm_medium=appdir-template-tw&utm_campaign=create-next-app"
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
>
|
||||
<Image
|
||||
aria-hidden
|
||||
src="/globe.svg"
|
||||
alt="Globe icon"
|
||||
width={16}
|
||||
height={16}
|
||||
/>
|
||||
Go to nextjs.org →
|
||||
</a>
|
||||
</footer>
|
||||
</div>
|
||||
<h1>Hello Next.js!</h1>
|
||||
);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue