linkedIn_auto_jobs_applier_.../main.py

221 lines
10 KiB
Python
Raw Permalink Normal View History

2024-08-22 09:42:49 +00:00
import os
2024-08-04 12:14:56 +00:00
import re
2024-08-22 09:42:49 +00:00
import sys
2024-08-04 12:14:56 +00:00
from pathlib import Path
import yaml
2024-08-22 09:42:49 +00:00
import click
2024-08-04 12:14:56 +00:00
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
2024-08-22 09:42:49 +00:00
from selenium.common.exceptions import WebDriverException, TimeoutException
from lib_resume_builder_AIHawk import Resume,StyleManager,FacadeManager,ResumeGenerator
2024-08-23 19:02:45 +00:00
from src.utils import chromeBrowserOptions
from src.gpt import GPTAnswerer
from src.linkedIn_authenticator import LinkedInAuthenticator
from src.linkedIn_bot_facade import LinkedInBotFacade
from src.linkedIn_job_manager import LinkedInJobManager
from src.job_application_profile import JobApplicationProfile
2024-08-22 09:42:49 +00:00
# Suppress stderr
sys.stderr = open(os.devnull, 'w')
2024-08-04 12:14:56 +00:00
class ConfigError(Exception):
pass
class ConfigValidator:
@staticmethod
def validate_email(email: str) -> bool:
2024-08-22 09:42:49 +00:00
return re.match(r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$', email) is not None
2024-08-04 16:33:22 +00:00
2024-08-04 12:14:56 +00:00
@staticmethod
2024-08-22 09:42:49 +00:00
def validate_yaml_file(yaml_path: Path) -> dict:
2024-08-04 12:14:56 +00:00
try:
2024-08-22 09:42:49 +00:00
with open(yaml_path, 'r') as stream:
return yaml.safe_load(stream)
2024-08-04 12:14:56 +00:00
except yaml.YAMLError as exc:
2024-08-22 09:42:49 +00:00
raise ConfigError(f"Error reading file {yaml_path}: {exc}")
2024-08-04 12:14:56 +00:00
except FileNotFoundError:
2024-08-22 09:42:49 +00:00
raise ConfigError(f"File not found: {yaml_path}")
2024-08-23 17:30:48 +00:00
2024-08-22 09:42:49 +00:00
def validate_config(config_yaml_path: Path) -> dict:
parameters = ConfigValidator.validate_yaml_file(config_yaml_path)
required_keys = {
'remote': bool,
'experienceLevel': dict,
'jobTypes': dict,
'date': dict,
'positions': list,
'locations': list,
'distance': int,
'companyBlacklist': list,
'titleBlacklist': list
}
2024-08-23 14:11:44 +00:00
2024-08-22 09:42:49 +00:00
for key, expected_type in required_keys.items():
2024-08-23 14:11:44 +00:00
if key not in parameters:
if key in ['companyBlacklist', 'titleBlacklist']:
parameters[key] = []
else:
raise ConfigError(f"Missing or invalid key '{key}' in config file {config_yaml_path}")
elif not isinstance(parameters[key], expected_type):
if key in ['companyBlacklist', 'titleBlacklist'] and parameters[key] is None:
parameters[key] = []
else:
raise ConfigError(f"Invalid type for key '{key}' in config file {config_yaml_path}. Expected {expected_type}.")
2024-08-22 09:42:49 +00:00
experience_levels = ['internship', 'entry', 'associate', 'mid-senior level', 'director', 'executive']
for level in experience_levels:
if not isinstance(parameters['experienceLevel'].get(level), bool):
raise ConfigError(f"Experience level '{level}' must be a boolean in config file {config_yaml_path}")
job_types = ['full-time', 'contract', 'part-time', 'temporary', 'internship', 'other', 'volunteer']
for job_type in job_types:
if not isinstance(parameters['jobTypes'].get(job_type), bool):
raise ConfigError(f"Job type '{job_type}' must be a boolean in config file {config_yaml_path}")
date_filters = ['all time', 'month', 'week', '24 hours']
for date_filter in date_filters:
if not isinstance(parameters['date'].get(date_filter), bool):
raise ConfigError(f"Date filter '{date_filter}' must be a boolean in config file {config_yaml_path}")
if not all(isinstance(pos, str) for pos in parameters['positions']):
raise ConfigError(f"'positions' must be a list of strings in config file {config_yaml_path}")
if not all(isinstance(loc, str) for loc in parameters['locations']):
raise ConfigError(f"'locations' must be a list of strings in config file {config_yaml_path}")
2024-08-04 12:14:56 +00:00
approved_distances = {0, 5, 10, 25, 50, 100}
2024-08-22 09:42:49 +00:00
if parameters['distance'] not in approved_distances:
2024-08-04 16:33:22 +00:00
raise ConfigError(f"Invalid distance value in config file {config_yaml_path}. Must be one of: {approved_distances}")
2024-08-22 09:42:49 +00:00
for blacklist in ['companyBlacklist', 'titleBlacklist']:
2024-08-23 14:11:44 +00:00
if not isinstance(parameters.get(blacklist), list):
raise ConfigError(f"'{blacklist}' must be a list in config file {config_yaml_path}")
if parameters[blacklist] is None:
2024-08-22 09:42:49 +00:00
parameters[blacklist] = []
2024-08-04 12:14:56 +00:00
return parameters
2024-08-23 14:11:44 +00:00
2024-08-04 12:14:56 +00:00
@staticmethod
def validate_secrets(secrets_yaml_path: Path) -> tuple:
2024-08-22 09:42:49 +00:00
secrets = ConfigValidator.validate_yaml_file(secrets_yaml_path)
mandatory_secrets = ['email', 'password']
2024-08-04 12:14:56 +00:00
for secret in mandatory_secrets:
if secret not in secrets:
2024-08-22 09:42:49 +00:00
raise ConfigError(f"Missing secret '{secret}' in file {secrets_yaml_path}")
2024-08-04 12:14:56 +00:00
if not ConfigValidator.validate_email(secrets['email']):
2024-08-04 16:33:22 +00:00
raise ConfigError(f"Invalid email format in secrets file {secrets_yaml_path}.")
2024-08-04 12:14:56 +00:00
if not secrets['password']:
2024-08-04 16:33:22 +00:00
raise ConfigError(f"Password cannot be empty in secrets file {secrets_yaml_path}.")
return secrets['email'], str(secrets['password']), secrets['llm_api_key']
2024-08-04 12:14:56 +00:00
class FileManager:
@staticmethod
def find_file(name_containing: str, with_extension: str, at_path: Path) -> Path:
2024-08-22 09:42:49 +00:00
return next((file for file in at_path.iterdir() if name_containing.lower() in file.name.lower() and file.suffix.lower() == with_extension.lower()), None)
2024-08-04 12:14:56 +00:00
@staticmethod
def validate_data_folder(app_data_folder: Path) -> tuple:
if not app_data_folder.exists() or not app_data_folder.is_dir():
raise FileNotFoundError(f"Data folder not found: {app_data_folder}")
2024-08-22 09:42:49 +00:00
required_files = ['secrets.yaml', 'config.yaml', 'plain_text_resume.yaml']
missing_files = [file for file in required_files if not (app_data_folder / file).exists()]
2024-08-04 12:14:56 +00:00
if missing_files:
raise FileNotFoundError(f"Missing files in the data folder: {', '.join(missing_files)}")
2024-08-22 09:42:49 +00:00
2024-08-04 12:14:56 +00:00
output_folder = app_data_folder / 'output'
output_folder.mkdir(exist_ok=True)
2024-08-22 09:42:49 +00:00
return (app_data_folder / 'secrets.yaml', app_data_folder / 'config.yaml', app_data_folder / 'plain_text_resume.yaml', output_folder)
2024-08-04 12:14:56 +00:00
@staticmethod
2024-08-04 12:40:32 +00:00
def file_paths_to_dict(resume_file: Path | None, plain_text_resume_file: Path) -> dict:
2024-08-04 12:14:56 +00:00
if not plain_text_resume_file.exists():
raise FileNotFoundError(f"Plain text resume file not found: {plain_text_resume_file}")
2024-08-22 09:42:49 +00:00
2024-08-04 12:40:32 +00:00
result = {'plainTextResume': plain_text_resume_file}
2024-08-22 09:42:49 +00:00
if resume_file:
2024-08-04 12:40:32 +00:00
if not resume_file.exists():
raise FileNotFoundError(f"Resume file not found: {resume_file}")
result['resume'] = resume_file
2024-08-22 09:42:49 +00:00
2024-08-04 12:40:32 +00:00
return result
2024-08-04 12:14:56 +00:00
2024-08-22 09:42:49 +00:00
def init_browser() -> webdriver.Chrome:
2024-08-04 12:14:56 +00:00
try:
options = chromeBrowserOptions()
service = ChromeService(ChromeDriverManager().install())
return webdriver.Chrome(service=service, options=options)
except Exception as e:
raise RuntimeError(f"Failed to initialize browser: {str(e)}")
def create_and_run_bot(email, password, parameters, llm_api_key):
2024-08-04 12:14:56 +00:00
try:
2024-08-22 17:24:37 +00:00
style_manager = StyleManager()
resume_generator = ResumeGenerator()
2024-09-03 18:14:40 +00:00
with open(parameters['uploads']['plainTextResume'], "r", encoding='utf-8') as file:
2024-08-22 17:31:59 +00:00
plain_text_resume = file.read()
2024-08-22 17:26:00 +00:00
resume_object = Resume(plain_text_resume)
resume_generator_manager = FacadeManager(llm_api_key, style_manager, resume_generator, resume_object, Path("data_folder/output"))
2024-08-22 17:31:59 +00:00
os.system('cls' if os.name == 'nt' else 'clear')
resume_generator_manager.choose_style()
2024-08-22 17:33:02 +00:00
os.system('cls' if os.name == 'nt' else 'clear')
2024-08-23 13:51:17 +00:00
2024-08-22 17:31:59 +00:00
job_application_profile_object = JobApplicationProfile(plain_text_resume)
2024-08-22 17:24:37 +00:00
2024-08-04 12:14:56 +00:00
browser = init_browser()
login_component = LinkedInAuthenticator(browser)
apply_component = LinkedInJobManager(browser)
gpt_answerer_component = GPTAnswerer(parameters, llm_api_key)
2024-08-04 12:14:56 +00:00
bot = LinkedInBotFacade(login_component, apply_component)
bot.set_secrets(email, password)
2024-08-22 09:42:49 +00:00
bot.set_job_application_profile_and_resume(job_application_profile_object, resume_object)
bot.set_gpt_answerer_and_resume_generator(gpt_answerer_component, resume_generator_manager)
2024-08-04 12:14:56 +00:00
bot.set_parameters(parameters)
bot.start_login()
bot.start_apply()
2024-08-22 09:42:49 +00:00
except WebDriverException as e:
print(f"WebDriver error occurred: {e}")
2024-08-04 12:14:56 +00:00
except Exception as e:
raise RuntimeError(f"Error running the bot: {str(e)}")
2024-08-22 09:42:49 +00:00
2024-08-04 12:14:56 +00:00
@click.command()
@click.option('--resume', type=click.Path(exists=True, file_okay=True, dir_okay=False, path_type=Path), help="Path to the resume PDF file")
def main(resume: Path = None):
try:
data_folder = Path("data_folder")
secrets_file, config_file, plain_text_resume_file, output_folder = FileManager.validate_data_folder(data_folder)
2024-08-22 09:42:49 +00:00
2024-08-04 12:14:56 +00:00
parameters = ConfigValidator.validate_config(config_file)
email, password, llm_api_key = ConfigValidator.validate_secrets(secrets_file)
2024-08-22 09:42:49 +00:00
2024-08-04 12:14:56 +00:00
parameters['uploads'] = FileManager.file_paths_to_dict(resume, plain_text_resume_file)
parameters['outputFileDirectory'] = output_folder
2024-08-22 09:42:49 +00:00
create_and_run_bot(email, password, parameters, llm_api_key)
2024-08-04 12:14:56 +00:00
except ConfigError as ce:
print(f"Configuration error: {str(ce)}")
2024-08-04 16:33:22 +00:00
print("Refer to the configuration guide for troubleshooting: https://github.com/feder-cr/LinkedIn_AIHawk_automatic_job_application/blob/main/readme.md#configuration")
2024-08-04 12:14:56 +00:00
except FileNotFoundError as fnf:
print(f"File not found: {str(fnf)}")
2024-08-04 16:33:22 +00:00
print("Ensure all required files are present in the data folder.")
print("Refer to the file setup guide: https://github.com/feder-cr/LinkedIn_AIHawk_automatic_job_application/blob/main/readme.md#configuration")
2024-08-04 12:14:56 +00:00
except RuntimeError as re:
2024-08-22 09:42:49 +00:00
2024-08-04 12:14:56 +00:00
print(f"Runtime error: {str(re)}")
2024-08-22 09:42:49 +00:00
2024-08-04 16:33:22 +00:00
print("Refer to the configuration and troubleshooting guide: https://github.com/feder-cr/LinkedIn_AIHawk_automatic_job_application/blob/main/readme.md#configuration")
2024-08-04 12:14:56 +00:00
except Exception as e:
print(f"An unexpected error occurred: {str(e)}")
2024-08-04 16:33:22 +00:00
print("Refer to the general troubleshooting guide: https://github.com/feder-cr/LinkedIn_AIHawk_automatic_job_application/blob/main/readme.md#configuration")
2024-08-04 12:14:56 +00:00
if __name__ == "__main__":
2024-08-04 16:33:22 +00:00
main()