2024-08-22 09:42:49 +00:00
import os
2024-08-04 12:14:56 +00:00
import re
2024-08-22 09:42:49 +00:00
import sys
2024-08-04 12:14:56 +00:00
from pathlib import Path
import yaml
2024-08-22 09:42:49 +00:00
import click
2024-08-04 12:14:56 +00:00
from selenium import webdriver
from selenium . webdriver . chrome . service import Service as ChromeService
from webdriver_manager . chrome import ChromeDriverManager
2024-08-22 09:42:49 +00:00
from selenium . common . exceptions import WebDriverException , TimeoutException
from lib_resume_builder_AIHawk import Resume , StyleManager , FacadeManager , ResumeGenerator
2024-08-23 19:02:45 +00:00
from src . utils import chromeBrowserOptions
from src . gpt import GPTAnswerer
from src . linkedIn_authenticator import LinkedInAuthenticator
from src . linkedIn_bot_facade import LinkedInBotFacade
from src . linkedIn_job_manager import LinkedInJobManager
from src . job_application_profile import JobApplicationProfile
2024-08-22 09:42:49 +00:00
# Suppress stderr
sys . stderr = open ( os . devnull , ' w ' )
2024-08-04 12:14:56 +00:00
class ConfigError ( Exception ) :
pass
class ConfigValidator :
@staticmethod
def validate_email ( email : str ) - > bool :
2024-08-22 09:42:49 +00:00
return re . match ( r ' ^[a-zA-Z0-9._ % +-]+@[a-zA-Z0-9.-]+ \ .[a-zA-Z] { 2,}$ ' , email ) is not None
2024-08-04 16:33:22 +00:00
2024-08-04 12:14:56 +00:00
@staticmethod
2024-08-22 09:42:49 +00:00
def validate_yaml_file ( yaml_path : Path ) - > dict :
2024-08-04 12:14:56 +00:00
try :
2024-08-22 09:42:49 +00:00
with open ( yaml_path , ' r ' ) as stream :
return yaml . safe_load ( stream )
2024-08-04 12:14:56 +00:00
except yaml . YAMLError as exc :
2024-08-22 09:42:49 +00:00
raise ConfigError ( f " Error reading file { yaml_path } : { exc } " )
2024-08-04 12:14:56 +00:00
except FileNotFoundError :
2024-08-22 09:42:49 +00:00
raise ConfigError ( f " File not found: { yaml_path } " )
2024-08-23 17:30:48 +00:00
2024-08-22 09:42:49 +00:00
def validate_config ( config_yaml_path : Path ) - > dict :
parameters = ConfigValidator . validate_yaml_file ( config_yaml_path )
required_keys = {
' remote ' : bool ,
' experienceLevel ' : dict ,
' jobTypes ' : dict ,
' date ' : dict ,
' positions ' : list ,
' locations ' : list ,
' distance ' : int ,
' companyBlacklist ' : list ,
' titleBlacklist ' : list
}
2024-08-23 14:11:44 +00:00
2024-08-22 09:42:49 +00:00
for key , expected_type in required_keys . items ( ) :
2024-08-23 14:11:44 +00:00
if key not in parameters :
if key in [ ' companyBlacklist ' , ' titleBlacklist ' ] :
parameters [ key ] = [ ]
else :
raise ConfigError ( f " Missing or invalid key ' { key } ' in config file { config_yaml_path } " )
elif not isinstance ( parameters [ key ] , expected_type ) :
if key in [ ' companyBlacklist ' , ' titleBlacklist ' ] and parameters [ key ] is None :
parameters [ key ] = [ ]
else :
raise ConfigError ( f " Invalid type for key ' { key } ' in config file { config_yaml_path } . Expected { expected_type } . " )
2024-08-22 09:42:49 +00:00
experience_levels = [ ' internship ' , ' entry ' , ' associate ' , ' mid-senior level ' , ' director ' , ' executive ' ]
for level in experience_levels :
if not isinstance ( parameters [ ' experienceLevel ' ] . get ( level ) , bool ) :
raise ConfigError ( f " Experience level ' { level } ' must be a boolean in config file { config_yaml_path } " )
job_types = [ ' full-time ' , ' contract ' , ' part-time ' , ' temporary ' , ' internship ' , ' other ' , ' volunteer ' ]
for job_type in job_types :
if not isinstance ( parameters [ ' jobTypes ' ] . get ( job_type ) , bool ) :
raise ConfigError ( f " Job type ' { job_type } ' must be a boolean in config file { config_yaml_path } " )
date_filters = [ ' all time ' , ' month ' , ' week ' , ' 24 hours ' ]
for date_filter in date_filters :
if not isinstance ( parameters [ ' date ' ] . get ( date_filter ) , bool ) :
raise ConfigError ( f " Date filter ' { date_filter } ' must be a boolean in config file { config_yaml_path } " )
if not all ( isinstance ( pos , str ) for pos in parameters [ ' positions ' ] ) :
raise ConfigError ( f " ' positions ' must be a list of strings in config file { config_yaml_path } " )
if not all ( isinstance ( loc , str ) for loc in parameters [ ' locations ' ] ) :
raise ConfigError ( f " ' locations ' must be a list of strings in config file { config_yaml_path } " )
2024-08-04 12:14:56 +00:00
approved_distances = { 0 , 5 , 10 , 25 , 50 , 100 }
2024-08-22 09:42:49 +00:00
if parameters [ ' distance ' ] not in approved_distances :
2024-08-04 16:33:22 +00:00
raise ConfigError ( f " Invalid distance value in config file { config_yaml_path } . Must be one of: { approved_distances } " )
2024-08-22 09:42:49 +00:00
for blacklist in [ ' companyBlacklist ' , ' titleBlacklist ' ] :
2024-08-23 14:11:44 +00:00
if not isinstance ( parameters . get ( blacklist ) , list ) :
raise ConfigError ( f " ' { blacklist } ' must be a list in config file { config_yaml_path } " )
if parameters [ blacklist ] is None :
2024-08-22 09:42:49 +00:00
parameters [ blacklist ] = [ ]
2024-08-04 12:14:56 +00:00
return parameters
2024-08-23 14:11:44 +00:00
2024-08-04 12:14:56 +00:00
@staticmethod
def validate_secrets ( secrets_yaml_path : Path ) - > tuple :
2024-08-22 09:42:49 +00:00
secrets = ConfigValidator . validate_yaml_file ( secrets_yaml_path )
2024-08-31 18:19:33 +00:00
mandatory_secrets = [ ' email ' , ' password ' ]
2024-08-04 12:14:56 +00:00
for secret in mandatory_secrets :
if secret not in secrets :
2024-08-22 09:42:49 +00:00
raise ConfigError ( f " Missing secret ' { secret } ' in file { secrets_yaml_path } " )
2024-08-04 12:14:56 +00:00
if not ConfigValidator . validate_email ( secrets [ ' email ' ] ) :
2024-08-04 16:33:22 +00:00
raise ConfigError ( f " Invalid email format in secrets file { secrets_yaml_path } . " )
2024-08-04 12:14:56 +00:00
if not secrets [ ' password ' ] :
2024-08-04 16:33:22 +00:00
raise ConfigError ( f " Password cannot be empty in secrets file { secrets_yaml_path } . " )
2024-08-31 20:58:40 +00:00
return secrets [ ' email ' ] , str ( secrets [ ' password ' ] ) , secrets [ ' llm_api_key ' ]
2024-08-04 12:14:56 +00:00
class FileManager :
@staticmethod
def find_file ( name_containing : str , with_extension : str , at_path : Path ) - > Path :
2024-08-22 09:42:49 +00:00
return next ( ( file for file in at_path . iterdir ( ) if name_containing . lower ( ) in file . name . lower ( ) and file . suffix . lower ( ) == with_extension . lower ( ) ) , None )
2024-08-04 12:14:56 +00:00
@staticmethod
def validate_data_folder ( app_data_folder : Path ) - > tuple :
if not app_data_folder . exists ( ) or not app_data_folder . is_dir ( ) :
raise FileNotFoundError ( f " Data folder not found: { app_data_folder } " )
2024-08-22 09:42:49 +00:00
required_files = [ ' secrets.yaml ' , ' config.yaml ' , ' plain_text_resume.yaml ' ]
missing_files = [ file for file in required_files if not ( app_data_folder / file ) . exists ( ) ]
2024-08-04 12:14:56 +00:00
if missing_files :
raise FileNotFoundError ( f " Missing files in the data folder: { ' , ' . join ( missing_files ) } " )
2024-08-22 09:42:49 +00:00
2024-08-04 12:14:56 +00:00
output_folder = app_data_folder / ' output '
output_folder . mkdir ( exist_ok = True )
2024-08-22 09:42:49 +00:00
return ( app_data_folder / ' secrets.yaml ' , app_data_folder / ' config.yaml ' , app_data_folder / ' plain_text_resume.yaml ' , output_folder )
2024-08-04 12:14:56 +00:00
@staticmethod
2024-08-04 12:40:32 +00:00
def file_paths_to_dict ( resume_file : Path | None , plain_text_resume_file : Path ) - > dict :
2024-08-04 12:14:56 +00:00
if not plain_text_resume_file . exists ( ) :
raise FileNotFoundError ( f " Plain text resume file not found: { plain_text_resume_file } " )
2024-08-22 09:42:49 +00:00
2024-08-04 12:40:32 +00:00
result = { ' plainTextResume ' : plain_text_resume_file }
2024-08-22 09:42:49 +00:00
if resume_file :
2024-08-04 12:40:32 +00:00
if not resume_file . exists ( ) :
raise FileNotFoundError ( f " Resume file not found: { resume_file } " )
result [ ' resume ' ] = resume_file
2024-08-22 09:42:49 +00:00
2024-08-04 12:40:32 +00:00
return result
2024-08-04 12:14:56 +00:00
2024-08-22 09:42:49 +00:00
def init_browser ( ) - > webdriver . Chrome :
2024-08-04 12:14:56 +00:00
try :
options = chromeBrowserOptions ( )
service = ChromeService ( ChromeDriverManager ( ) . install ( ) )
return webdriver . Chrome ( service = service , options = options )
except Exception as e :
raise RuntimeError ( f " Failed to initialize browser: { str ( e ) } " )
2024-08-31 20:58:40 +00:00
def create_and_run_bot ( email , password , parameters , llm_api_key ) :
2024-08-04 12:14:56 +00:00
try :
2024-08-22 17:24:37 +00:00
style_manager = StyleManager ( )
resume_generator = ResumeGenerator ( )
2024-09-03 18:14:40 +00:00
with open ( parameters [ ' uploads ' ] [ ' plainTextResume ' ] , " r " , encoding = ' utf-8 ' ) as file :
2024-08-22 17:31:59 +00:00
plain_text_resume = file . read ( )
2024-08-22 17:26:00 +00:00
resume_object = Resume ( plain_text_resume )
2024-08-31 20:58:40 +00:00
resume_generator_manager = FacadeManager ( llm_api_key , style_manager , resume_generator , resume_object , Path ( " data_folder/output " ) )
2024-08-22 17:31:59 +00:00
os . system ( ' cls ' if os . name == ' nt ' else ' clear ' )
resume_generator_manager . choose_style ( )
2024-08-22 17:33:02 +00:00
os . system ( ' cls ' if os . name == ' nt ' else ' clear ' )
2024-08-23 13:51:17 +00:00
2024-08-22 17:31:59 +00:00
job_application_profile_object = JobApplicationProfile ( plain_text_resume )
2024-08-22 17:24:37 +00:00
2024-08-04 12:14:56 +00:00
browser = init_browser ( )
login_component = LinkedInAuthenticator ( browser )
apply_component = LinkedInJobManager ( browser )
2024-08-31 20:58:40 +00:00
gpt_answerer_component = GPTAnswerer ( parameters , llm_api_key )
2024-08-04 12:14:56 +00:00
bot = LinkedInBotFacade ( login_component , apply_component )
bot . set_secrets ( email , password )
2024-08-22 09:42:49 +00:00
bot . set_job_application_profile_and_resume ( job_application_profile_object , resume_object )
bot . set_gpt_answerer_and_resume_generator ( gpt_answerer_component , resume_generator_manager )
2024-08-04 12:14:56 +00:00
bot . set_parameters ( parameters )
bot . start_login ( )
bot . start_apply ( )
2024-08-22 09:42:49 +00:00
except WebDriverException as e :
print ( f " WebDriver error occurred: { e } " )
2024-08-04 12:14:56 +00:00
except Exception as e :
raise RuntimeError ( f " Error running the bot: { str ( e ) } " )
2024-08-22 09:42:49 +00:00
2024-08-04 12:14:56 +00:00
@click.command ( )
@click.option ( ' --resume ' , type = click . Path ( exists = True , file_okay = True , dir_okay = False , path_type = Path ) , help = " Path to the resume PDF file " )
def main ( resume : Path = None ) :
try :
data_folder = Path ( " data_folder " )
secrets_file , config_file , plain_text_resume_file , output_folder = FileManager . validate_data_folder ( data_folder )
2024-08-22 09:42:49 +00:00
2024-08-04 12:14:56 +00:00
parameters = ConfigValidator . validate_config ( config_file )
2024-08-31 20:58:40 +00:00
email , password , llm_api_key = ConfigValidator . validate_secrets ( secrets_file )
2024-08-22 09:42:49 +00:00
2024-08-04 12:14:56 +00:00
parameters [ ' uploads ' ] = FileManager . file_paths_to_dict ( resume , plain_text_resume_file )
parameters [ ' outputFileDirectory ' ] = output_folder
2024-08-22 09:42:49 +00:00
2024-08-31 20:58:40 +00:00
create_and_run_bot ( email , password , parameters , llm_api_key )
2024-08-04 12:14:56 +00:00
except ConfigError as ce :
print ( f " Configuration error: { str ( ce ) } " )
2024-08-04 16:33:22 +00:00
print ( " Refer to the configuration guide for troubleshooting: https://github.com/feder-cr/LinkedIn_AIHawk_automatic_job_application/blob/main/readme.md#configuration " )
2024-08-04 12:14:56 +00:00
except FileNotFoundError as fnf :
print ( f " File not found: { str ( fnf ) } " )
2024-08-04 16:33:22 +00:00
print ( " Ensure all required files are present in the data folder. " )
print ( " Refer to the file setup guide: https://github.com/feder-cr/LinkedIn_AIHawk_automatic_job_application/blob/main/readme.md#configuration " )
2024-08-04 12:14:56 +00:00
except RuntimeError as re :
2024-08-22 09:42:49 +00:00
2024-08-04 12:14:56 +00:00
print ( f " Runtime error: { str ( re ) } " )
2024-08-22 09:42:49 +00:00
2024-08-04 16:33:22 +00:00
print ( " Refer to the configuration and troubleshooting guide: https://github.com/feder-cr/LinkedIn_AIHawk_automatic_job_application/blob/main/readme.md#configuration " )
2024-08-04 12:14:56 +00:00
except Exception as e :
print ( f " An unexpected error occurred: { str ( e ) } " )
2024-08-04 16:33:22 +00:00
print ( " Refer to the general troubleshooting guide: https://github.com/feder-cr/LinkedIn_AIHawk_automatic_job_application/blob/main/readme.md#configuration " )
2024-08-04 12:14:56 +00:00
if __name__ == " __main__ " :
2024-08-04 16:33:22 +00:00
main ( )