linkedIn_auto_jobs_applier_.../src/linkedIn_job_manager.py

209 lines
9.3 KiB
Python
Raw Normal View History

2024-08-04 12:14:56 +00:00
import os
import random
import time
import traceback
from itertools import product
from pathlib import Path
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By
2024-08-23 19:02:45 +00:00
import src.utils as utils
2024-08-23 22:06:06 +00:00
from src.job import Job
2024-08-23 19:02:45 +00:00
from src.linkedIn_easy_applier import LinkedInEasyApplier
2024-08-22 09:42:49 +00:00
import json
2024-08-04 12:14:56 +00:00
class EnvironmentKeys:
def __init__(self):
self.skip_apply = self._read_env_key_bool("SKIP_APPLY")
self.disable_description_filter = self._read_env_key_bool("DISABLE_DESCRIPTION_FILTER")
@staticmethod
def _read_env_key(key: str) -> str:
return os.getenv(key, "")
@staticmethod
def _read_env_key_bool(key: str) -> bool:
return os.getenv(key) == "True"
class LinkedInJobManager:
def __init__(self, driver):
self.driver = driver
self.set_old_answers = set()
self.easy_applier_component = None
def set_parameters(self, parameters):
self.company_blacklist = parameters.get('companyBlacklist', []) or []
self.title_blacklist = parameters.get('titleBlacklist', []) or []
self.positions = parameters.get('positions', [])
self.locations = parameters.get('locations', [])
self.base_search_url = self.get_base_search_url(parameters)
self.seen_jobs = []
resume_path = parameters.get('uploads', {}).get('resume', None)
if resume_path is not None and Path(resume_path).exists():
2024-08-22 15:12:03 +00:00
self.resume_path = Path(resume_path)
2024-08-04 12:14:56 +00:00
else:
2024-08-22 15:12:03 +00:00
self.resume_path = None
2024-08-04 12:14:56 +00:00
self.output_file_directory = Path(parameters['outputFileDirectory'])
self.env_config = EnvironmentKeys()
2024-08-22 09:42:49 +00:00
#self.old_question()
2024-08-04 12:14:56 +00:00
def set_gpt_answerer(self, gpt_answerer):
self.gpt_answerer = gpt_answerer
2024-08-22 09:42:49 +00:00
def set_resume_generator_manager(self, resume_generator_manager):
self.resume_generator_manager = resume_generator_manager
2024-08-04 12:14:56 +00:00
def start_applying(self):
2024-08-22 15:12:03 +00:00
self.easy_applier_component = LinkedInEasyApplier(self.driver, self.resume_path, self.set_old_answers, self.gpt_answerer, self.resume_generator_manager)
2024-08-04 12:14:56 +00:00
searches = list(product(self.positions, self.locations))
random.shuffle(searches)
page_sleep = 0
minimum_time = 60 * 15
minimum_page_time = time.time() + minimum_time
for position, location in searches:
location_url = "&location=" + location
job_page_number = -1
2024-08-05 00:09:50 +00:00
utils.printyellow(f"Starting the search for {position} in {location}.")
2024-08-04 12:14:56 +00:00
try:
while True:
page_sleep += 1
job_page_number += 1
2024-08-05 00:09:50 +00:00
utils.printyellow(f"Going to job page {job_page_number}")
2024-08-04 12:14:56 +00:00
self.next_job_page(position, location_url, job_page_number)
time.sleep(random.uniform(1.5, 3.5))
2024-08-05 00:09:50 +00:00
utils.printyellow("Starting the application process for this page...")
2024-08-04 12:14:56 +00:00
self.apply_jobs()
2024-08-05 00:09:50 +00:00
utils.printyellow("Applying to jobs on this page has been completed!")
2024-08-04 12:14:56 +00:00
time_left = minimum_page_time - time.time()
if time_left > 0:
2024-08-05 00:09:50 +00:00
utils.printyellow(f"Sleeping for {time_left} seconds.")
2024-08-04 12:14:56 +00:00
time.sleep(time_left)
minimum_page_time = time.time() + minimum_time
if page_sleep % 5 == 0:
sleep_time = random.randint(5, 34)
2024-08-05 00:09:50 +00:00
utils.printyellow(f"Sleeping for {sleep_time / 60} minutes.")
2024-08-04 12:14:56 +00:00
time.sleep(sleep_time)
page_sleep += 1
except Exception:
2024-08-05 00:09:50 +00:00
traceback.format_exc()
2024-08-04 12:14:56 +00:00
pass
time_left = minimum_page_time - time.time()
if time_left > 0:
2024-08-05 00:09:50 +00:00
utils.printyellow(f"Sleeping for {time_left} seconds.")
2024-08-04 12:14:56 +00:00
time.sleep(time_left)
minimum_page_time = time.time() + minimum_time
if page_sleep % 5 == 0:
sleep_time = random.randint(50, 90)
2024-08-05 00:09:50 +00:00
utils.printyellow(f"Sleeping for {sleep_time / 60} minutes.")
2024-08-04 12:14:56 +00:00
time.sleep(sleep_time)
page_sleep += 1
def apply_jobs(self):
try:
2024-08-22 15:54:56 +00:00
no_jobs_element = self.driver.find_element(By.CLASS_NAME, 'jobs-search-two-pane__no-results-banner--expand')
if 'No matching jobs found' in no_jobs_element.text or 'unfortunately, things aren' in self.driver.page_source.lower():
raise Exception("No more jobs on this page")
except NoSuchElementException:
pass
job_results = self.driver.find_element(By.CLASS_NAME, "jobs-search-results-list")
utils.scroll_slow(self.driver, job_results)
utils.scroll_slow(self.driver, job_results, step=300, reverse=True)
job_list_elements = self.driver.find_elements(By.CLASS_NAME, 'scaffold-layout__list-container')[0].find_elements(By.CLASS_NAME, 'jobs-search-results__list-item')
if not job_list_elements:
raise Exception("No job class elements found on page")
job_list = [Job(*self.extract_job_information_from_tile(job_element)) for job_element in job_list_elements]
for job in job_list:
if self.is_blacklisted(job.title, job.company, job.link):
utils.printyellow(f"Blacklisted {job.title} at {job.company}, skipping...")
self.write_to_file(job, "skipped")
continue
2024-08-04 12:14:56 +00:00
try:
2024-08-22 15:54:56 +00:00
if job.apply_method not in {"Continue", "Applied", "Apply"}:
self.easy_applier_component.job_apply(job)
self.write_to_file(job, "success")
except Exception as e:
utils.printred(traceback.format_exc())
self.write_to_file(job, "failed")
continue
2024-08-04 12:14:56 +00:00
2024-08-22 09:42:49 +00:00
def write_to_file(self, job, file_name):
2024-08-23 13:51:17 +00:00
pdf_path = Path(job.pdf_path).resolve()
pdf_path = pdf_path.as_uri()
2024-08-22 09:42:49 +00:00
data = {
"company": job.company,
"job_title": job.title,
"link": job.link,
2024-08-23 21:20:48 +00:00
"job_recruiter": job.recruiter_link,
2024-08-22 09:42:49 +00:00
"job_location": job.location,
2024-08-22 18:33:25 +00:00
"pdf_path": pdf_path
2024-08-22 09:42:49 +00:00
}
file_path = self.output_file_directory / f"{file_name}.json"
if not file_path.exists():
with open(file_path, 'w', encoding='utf-8') as f:
json.dump([data], f, indent=4)
else:
with open(file_path, 'r+', encoding='utf-8') as f:
try:
existing_data = json.load(f)
except json.JSONDecodeError:
existing_data = []
existing_data.append(data)
f.seek(0)
json.dump(existing_data, f, indent=4)
f.truncate()
2024-08-04 12:14:56 +00:00
def get_base_search_url(self, parameters):
2024-08-10 18:02:45 +00:00
url_parts = []
if parameters['remote']:
url_parts.append("f_CF=f_WRA")
experience_levels = [str(i+1) for i, (level, v) in enumerate(parameters.get('experienceLevel', {}).items()) if v]
2024-08-10 18:02:45 +00:00
if experience_levels:
url_parts.append(f"f_E={','.join(experience_levels)}")
url_parts.append(f"distance={parameters['distance']}")
job_types = [key[0].upper() for key, value in parameters.get('jobTypes', {}).items() if value]
if job_types:
url_parts.append(f"f_JT={','.join(job_types)}")
date_mapping = {
"all time": "",
"month": "&f_TPR=r2592000",
"week": "&f_TPR=r604800",
"24 hours": "&f_TPR=r86400"
}
date_param = next((v for k, v in date_mapping.items() if parameters.get('date', {}).get(k)), "")
url_parts.append("f_LF=f_AL") # Easy Apply
base_url = "&".join(url_parts)
return f"?{base_url}{date_param}"
2024-08-04 12:14:56 +00:00
def next_job_page(self, position, location, job_page):
self.driver.get(f"https://www.linkedin.com/jobs/search/{self.base_search_url}&keywords={position}{location}&start={job_page * 25}")
def extract_job_information_from_tile(self, job_tile):
job_title, company, job_location, apply_method, link = "", "", "", "", ""
try:
job_title = job_tile.find_element(By.CLASS_NAME, 'job-card-list__title').text
link = job_tile.find_element(By.CLASS_NAME, 'job-card-list__title').get_attribute('href').split('?')[0]
company = job_tile.find_element(By.CLASS_NAME, 'job-card-container__primary-description').text
except:
pass
try:
job_location = job_tile.find_element(By.CLASS_NAME, 'job-card-container__metadata-item').text
except:
pass
try:
apply_method = job_tile.find_element(By.CLASS_NAME, 'job-card-container__apply-method').text
except:
apply_method = "Applied"
return job_title, company, job_location, link, apply_method
def is_blacklisted(self, job_title, company, link):
job_title_words = job_title.lower().split(' ')
title_blacklisted = any(word in job_title_words for word in self.title_blacklist)
2024-08-06 02:50:18 +00:00
company_blacklisted = company.strip().lower() in (word.strip().lower() for word in self.company_blacklist)
2024-08-04 12:14:56 +00:00
link_seen = link in self.seen_jobs
2024-08-23 22:06:06 +00:00
return title_blacklisted or company_blacklisted or link_seen