mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
86 lines
2.8 KiB
Python
86 lines
2.8 KiB
Python
import requests
|
|
import json
|
|
from data_collection.config import ADZUNA_API_KEY, ADZUNA_APP_ID
|
|
|
|
import pandas as pd
|
|
import os
|
|
import time
|
|
from tqdm import tqdm
|
|
|
|
"""
|
|
Table of constituencies and their codes can be downloaded from the Office of National Statistics, found here:
|
|
https://geoportal.statistics.gov.uk/datasets/ons::westminster-parliamentary-constituencies-december-2022-names-and
|
|
-codes-in-the-united-kingdom/explore
|
|
"""
|
|
|
|
constituencies = pd.read_csv(
|
|
os.path.abspath(
|
|
os.path.dirname(
|
|
__file__)) + "/data_collection/data/Westminster_Parliamentary_Constituencies_("
|
|
"December_2022)_Names_and_Codes_in_the_United_Kingdom.csv"
|
|
)
|
|
|
|
constituencies["location_type"] = "constituency"
|
|
|
|
|
|
def retry_api_call(job_title, location, max_retries=10):
|
|
for i in range(max_retries):
|
|
try:
|
|
response = get_adzuna_jobs(job_title, location)
|
|
return response
|
|
except (requests.HTTPError, requests.ConnectionError):
|
|
print(f"Attempt {i + 1} failed. Retrying in 2 seconds...")
|
|
time.sleep(2)
|
|
print(f"Failed after {max_retries} attempts.")
|
|
return None
|
|
|
|
|
|
def get_adzuna_jobs(job_title, location):
|
|
base_url = "https://api.adzuna.com/v1/api/jobs"
|
|
country_code = "gb"
|
|
|
|
url = f"{base_url}/{country_code}/search/1"
|
|
|
|
params = {
|
|
"app_id": ADZUNA_APP_ID,
|
|
"app_key": ADZUNA_API_KEY,
|
|
"results_per_page": 25,
|
|
"what": job_title,
|
|
"where": location,
|
|
"content-type": "application/json",
|
|
"distance": 10
|
|
}
|
|
|
|
response = requests.get(url, params=params)
|
|
response.raise_for_status()
|
|
|
|
jobs = json.loads(response.text)
|
|
return jobs
|
|
|
|
|
|
JOB_TITLES = [
|
|
"insulation installer", "internal wall insulation installer", "external wall insulation installer",
|
|
"cavity wall insulation installer", "loft insulation installer", "roof insulation installer",
|
|
"spray foam insulation installer", "insulation technician", "insulation engineer", "iwi insulation installer",
|
|
"iwi installer", "ewi insulation installer", "ewi installer", "cwi insulation installer", "cwi installer",
|
|
]
|
|
|
|
results = []
|
|
for i, job_title in enumerate(JOB_TITLES):
|
|
print("Pulling job title %s of %s" % (str(i + 1), str(len(JOB_TITLES))))
|
|
for _, location_config in tqdm(constituencies.iterrows(), total=constituencies.shape[0]):
|
|
|
|
location = location_config["PCON22NM"]
|
|
jobs = retry_api_call(job_title, location)
|
|
time.sleep(0.5)
|
|
if jobs["results"]:
|
|
for job in jobs['results']:
|
|
to_append = {
|
|
"job_title": job_title,
|
|
"search_location": location,
|
|
"search_location_code": location_config["PCON22CD"],
|
|
**job
|
|
}
|
|
results.append(to_append)
|
|
|
|
results_df = pd.DataFrame(results)
|