Added retry methodology

This commit is contained in:
Khalim Conn-Kowlessar 2023-06-29 16:54:27 +01:00
parent 28f021d098
commit f21d2778af
2 changed files with 40 additions and 17 deletions

View file

@ -4,6 +4,8 @@ from data_collection.config import ADZUNA_API_KEY, ADZUNA_APP_ID
import pandas as pd
import os
import time
from tqdm import tqdm
"""
Table of constituencies and their codes can be downloaded from the Office of National Statistics, found here:
@ -18,6 +20,20 @@ constituencies = pd.read_csv(
"December_2022)_Names_and_Codes_in_the_United_Kingdom.csv"
)
constituencies["location_type"] = "constituency"
def retry_api_call(job_title, location, max_retries=10):
for i in range(max_retries):
try:
response = get_adzuna_jobs(job_title, location)
return response
except (requests.HTTPError, requests.ConnectionError):
print(f"Attempt {i + 1} failed. Retrying in 2 seconds...")
time.sleep(2)
print(f"Failed after {max_retries} attempts.")
return None
def get_adzuna_jobs(job_title, location):
base_url = "https://api.adzuna.com/v1/api/jobs"
@ -28,20 +44,18 @@ def get_adzuna_jobs(job_title, location):
params = {
"app_id": ADZUNA_APP_ID,
"app_key": ADZUNA_API_KEY,
"results_per_page": 10, # change as needed
"results_per_page": 25,
"what": job_title,
"where": location,
"content-type": "application/json"
"content-type": "application/json",
"distance": 10
}
response = requests.get(url, params=params)
response.raise_for_status()
if response.status_code == 200:
jobs = json.loads(response.text)
return jobs
else:
print(f"Error: {response.status_code}")
return None
jobs = json.loads(response.text)
return jobs
JOB_TITLES = [
@ -51,12 +65,20 @@ JOB_TITLES = [
"iwi installer", "ewi insulation installer", "ewi installer", "cwi insulation installer", "cwi installer",
]
for job_title in JOB_TITLES:
for _, location in constituencies.iterrows():
jobs = get_adzuna_jobs(job_title, location)
if jobs is not None:
results = []
for i, job_title in enumerate(JOB_TITLES):
print("Pulling job title %s of %s" % (str(i + 1), str(len(JOB_TITLES))))
for _, location_config in tqdm(constituencies.iterrows(), total=constituencies.shape[0]):
location = location_config["PCON22NM"]
time.sleep(0.5)
if jobs["results"]:
for job in jobs['results']:
print(job['title'])
print(job['salary_min'])
print(job['salary_max'])
print()
to_append = {
"job_title": job_title,
"location": location,
"location_code": location_config["PCON22CD"],
**job
}
results.append(to_append)

View file

@ -1,3 +1,4 @@
requests
python-dotenv
pandas
pandas
tqdm