mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Added retry methodology
This commit is contained in:
parent
28f021d098
commit
f21d2778af
2 changed files with 40 additions and 17 deletions
|
|
@ -4,6 +4,8 @@ from data_collection.config import ADZUNA_API_KEY, ADZUNA_APP_ID
|
|||
|
||||
import pandas as pd
|
||||
import os
|
||||
import time
|
||||
from tqdm import tqdm
|
||||
|
||||
"""
|
||||
Table of constituencies and their codes can be downloaded from the Office of National Statistics, found here:
|
||||
|
|
@ -18,6 +20,20 @@ constituencies = pd.read_csv(
|
|||
"December_2022)_Names_and_Codes_in_the_United_Kingdom.csv"
|
||||
)
|
||||
|
||||
constituencies["location_type"] = "constituency"
|
||||
|
||||
|
||||
def retry_api_call(job_title, location, max_retries=10):
|
||||
for i in range(max_retries):
|
||||
try:
|
||||
response = get_adzuna_jobs(job_title, location)
|
||||
return response
|
||||
except (requests.HTTPError, requests.ConnectionError):
|
||||
print(f"Attempt {i + 1} failed. Retrying in 2 seconds...")
|
||||
time.sleep(2)
|
||||
print(f"Failed after {max_retries} attempts.")
|
||||
return None
|
||||
|
||||
|
||||
def get_adzuna_jobs(job_title, location):
|
||||
base_url = "https://api.adzuna.com/v1/api/jobs"
|
||||
|
|
@ -28,20 +44,18 @@ def get_adzuna_jobs(job_title, location):
|
|||
params = {
|
||||
"app_id": ADZUNA_APP_ID,
|
||||
"app_key": ADZUNA_API_KEY,
|
||||
"results_per_page": 10, # change as needed
|
||||
"results_per_page": 25,
|
||||
"what": job_title,
|
||||
"where": location,
|
||||
"content-type": "application/json"
|
||||
"content-type": "application/json",
|
||||
"distance": 10
|
||||
}
|
||||
|
||||
response = requests.get(url, params=params)
|
||||
response.raise_for_status()
|
||||
|
||||
if response.status_code == 200:
|
||||
jobs = json.loads(response.text)
|
||||
return jobs
|
||||
else:
|
||||
print(f"Error: {response.status_code}")
|
||||
return None
|
||||
jobs = json.loads(response.text)
|
||||
return jobs
|
||||
|
||||
|
||||
JOB_TITLES = [
|
||||
|
|
@ -51,12 +65,20 @@ JOB_TITLES = [
|
|||
"iwi installer", "ewi insulation installer", "ewi installer", "cwi insulation installer", "cwi installer",
|
||||
]
|
||||
|
||||
for job_title in JOB_TITLES:
|
||||
for _, location in constituencies.iterrows():
|
||||
jobs = get_adzuna_jobs(job_title, location)
|
||||
if jobs is not None:
|
||||
results = []
|
||||
for i, job_title in enumerate(JOB_TITLES):
|
||||
print("Pulling job title %s of %s" % (str(i + 1), str(len(JOB_TITLES))))
|
||||
for _, location_config in tqdm(constituencies.iterrows(), total=constituencies.shape[0]):
|
||||
|
||||
location = location_config["PCON22NM"]
|
||||
|
||||
time.sleep(0.5)
|
||||
if jobs["results"]:
|
||||
for job in jobs['results']:
|
||||
print(job['title'])
|
||||
print(job['salary_min'])
|
||||
print(job['salary_max'])
|
||||
print()
|
||||
to_append = {
|
||||
"job_title": job_title,
|
||||
"location": location,
|
||||
"location_code": location_config["PCON22CD"],
|
||||
**job
|
||||
}
|
||||
results.append(to_append)
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
requests
|
||||
python-dotenv
|
||||
pandas
|
||||
pandas
|
||||
tqdm
|
||||
Loading…
Add table
Reference in a new issue