2024-08-04 12:14:56 +00:00
import json
import os
import re
import textwrap
from datetime import datetime
from typing import Dict , List
2024-08-22 09:42:49 +00:00
from pathlib import Path
2024-08-04 12:14:56 +00:00
from dotenv import load_dotenv
from langchain_core . messages . ai import AIMessage
from langchain_core . output_parsers import StrOutputParser
from langchain_core . prompt_values import StringPromptValue
from langchain_core . prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from Levenshtein import distance
import strings
load_dotenv ( )
class LLMLogger :
def __init__ ( self , llm : ChatOpenAI ) :
self . llm = llm
@staticmethod
def log_request ( prompts , parsed_reply : Dict [ str , Dict ] ) :
2024-08-22 09:42:49 +00:00
calls_log = os . path . join ( Path ( " data_folder/output " ) , " open_ai_calls.json " )
2024-08-04 12:14:56 +00:00
if isinstance ( prompts , StringPromptValue ) :
prompts = prompts . text
elif isinstance ( prompts , Dict ) :
# Convert prompts to a dictionary if they are not in the expected format
prompts = {
f " prompt_ { i + 1 } " : prompt . content
for i , prompt in enumerate ( prompts . messages )
}
else :
prompts = {
f " prompt_ { i + 1 } " : prompt . content
for i , prompt in enumerate ( prompts . messages )
}
current_time = datetime . now ( ) . strftime ( " % Y- % m- %d % H: % M: % S " )
# Extract token usage details from the response
token_usage = parsed_reply [ " usage_metadata " ]
output_tokens = token_usage [ " output_tokens " ]
input_tokens = token_usage [ " input_tokens " ]
total_tokens = token_usage [ " total_tokens " ]
# Extract model details from the response
model_name = parsed_reply [ " response_metadata " ] [ " model_name " ]
prompt_price_per_token = 0.00000015
completion_price_per_token = 0.0000006
# Calculate the total cost of the API call
total_cost = ( input_tokens * prompt_price_per_token ) + (
output_tokens * completion_price_per_token
)
# Create a log entry with all relevant information
log_entry = {
" model " : model_name ,
" time " : current_time ,
" prompts " : prompts ,
" replies " : parsed_reply [ " content " ] , # Response content
" total_tokens " : total_tokens ,
" input_tokens " : input_tokens ,
" output_tokens " : output_tokens ,
" total_cost " : total_cost ,
}
# Write the log entry to the log file in JSON format
with open ( calls_log , " a " , encoding = " utf-8 " ) as f :
json_string = json . dumps ( log_entry , ensure_ascii = False , indent = 4 )
f . write ( json_string + " \n " )
class LoggerChatModel :
def __init__ ( self , llm : ChatOpenAI ) :
self . llm = llm
def __call__ ( self , messages : List [ Dict [ str , str ] ] ) - > str :
# Call the LLM with the provided messages and log the response.
reply = self . llm ( messages )
parsed_reply = self . parse_llmresult ( reply )
LLMLogger . log_request ( prompts = messages , parsed_reply = parsed_reply )
return reply
def parse_llmresult ( self , llmresult : AIMessage ) - > Dict [ str , Dict ] :
# Parse the LLM result into a structured format.
content = llmresult . content
response_metadata = llmresult . response_metadata
id_ = llmresult . id
usage_metadata = llmresult . usage_metadata
parsed_result = {
" content " : content ,
" response_metadata " : {
" model_name " : response_metadata . get ( " model_name " , " " ) ,
" system_fingerprint " : response_metadata . get ( " system_fingerprint " , " " ) ,
" finish_reason " : response_metadata . get ( " finish_reason " , " " ) ,
" logprobs " : response_metadata . get ( " logprobs " , None ) ,
} ,
" id " : id_ ,
" usage_metadata " : {
" input_tokens " : usage_metadata . get ( " input_tokens " , 0 ) ,
" output_tokens " : usage_metadata . get ( " output_tokens " , 0 ) ,
" total_tokens " : usage_metadata . get ( " total_tokens " , 0 ) ,
} ,
}
return parsed_result
class GPTAnswerer :
def __init__ ( self , openai_api_key ) :
self . llm_cheap = LoggerChatModel (
2024-08-22 09:42:49 +00:00
ChatOpenAI ( model_name = " gpt-4o-mini " , openai_api_key = openai_api_key , temperature = 0.8 )
2024-08-04 12:14:56 +00:00
)
@property
def job_description ( self ) :
return self . job . description
@staticmethod
def find_best_match ( text : str , options : list [ str ] ) - > str :
distances = [
( option , distance ( text . lower ( ) , option . lower ( ) ) ) for option in options
]
best_option = min ( distances , key = lambda x : x [ 1 ] ) [ 0 ]
return best_option
@staticmethod
def _remove_placeholders ( text : str ) - > str :
text = text . replace ( " PLACEHOLDER " , " " )
return text . strip ( )
@staticmethod
def _preprocess_template_string ( template : str ) - > str :
# Preprocess a template string to remove unnecessary indentation.
return textwrap . dedent ( template )
def set_resume ( self , resume ) :
self . resume = resume
def set_job ( self , job ) :
self . job = job
2024-08-22 09:42:49 +00:00
self . job . set_summarize_job_description ( self . summarize_job_description ( self . job . description ) )
2024-08-04 12:14:56 +00:00
2024-08-22 09:42:49 +00:00
def set_job_application_profile ( self , job_application_profile ) :
self . job_application_profile = job_application_profile
2024-08-04 12:14:56 +00:00
def summarize_job_description ( self , text : str ) - > str :
strings . summarize_prompt_template = self . _preprocess_template_string (
strings . summarize_prompt_template
)
prompt = ChatPromptTemplate . from_template ( strings . summarize_prompt_template )
chain = prompt | self . llm_cheap | StrOutputParser ( )
output = chain . invoke ( { " text " : text } )
return output
2024-08-22 09:42:49 +00:00
2024-08-04 12:14:56 +00:00
def _create_chain ( self , template : str ) :
prompt = ChatPromptTemplate . from_template ( template )
return prompt | self . llm_cheap | StrOutputParser ( )
2024-08-22 09:42:49 +00:00
2024-08-04 12:14:56 +00:00
def answer_question_textual_wide_range ( self , question : str ) - > str :
# Define chains for each section of the resume
2024-08-12 22:00:55 +00:00
chains = {
2024-08-04 12:14:56 +00:00
" personal_information " : self . _create_chain ( strings . personal_information_template ) ,
" self_identification " : self . _create_chain ( strings . self_identification_template ) ,
" legal_authorization " : self . _create_chain ( strings . legal_authorization_template ) ,
" work_preferences " : self . _create_chain ( strings . work_preferences_template ) ,
" education_details " : self . _create_chain ( strings . education_details_template ) ,
" experience_details " : self . _create_chain ( strings . experience_details_template ) ,
" projects " : self . _create_chain ( strings . projects_template ) ,
" availability " : self . _create_chain ( strings . availability_template ) ,
" salary_expectations " : self . _create_chain ( strings . salary_expectations_template ) ,
" certifications " : self . _create_chain ( strings . certifications_template ) ,
" languages " : self . _create_chain ( strings . languages_template ) ,
" interests " : self . _create_chain ( strings . interests_template ) ,
2024-08-12 22:00:55 +00:00
" cover_letter " : self . _create_chain ( strings . coverletter_template ) ,
2024-08-04 12:14:56 +00:00
}
2024-08-22 09:42:49 +00:00
section_prompt = """
You are assisting a bot designed to automatically apply for jobs on LinkedIn . The bot receives various questions about job applications and needs to determine the most relevant section of the resume to provide an accurate response .
For the following question : ' {question} ' , determine which section of the resume is most relevant .
Respond with exactly one of the following options :
- Personal information
- Self Identification
- Legal Authorization
- Work Preferences
- Education Details
- Experience Details
- Projects
- Availability
- Salary Expectations
- Certifications
- Languages
- Interests
- Cover letter
Here are detailed guidelines to help you choose the correct section :
1. * * Personal Information * * :
- * * Purpose * * : Contains your basic contact details and online profiles .
- * * Use When * * : The question is about how to contact you or requests links to your professional online presence .
- * * Examples * * : Email address , phone number , LinkedIn profile , GitHub repository , personal website .
2. * * Self Identification * * :
- * * Purpose * * : Covers personal identifiers and demographic information .
- * * Use When * * : The question pertains to your gender , pronouns , veteran status , disability status , or ethnicity .
- * * Examples * * : Gender , pronouns , veteran status , disability status , ethnicity .
3. * * Legal Authorization * * :
- * * Purpose * * : Details your work authorization status and visa requirements .
- * * Use When * * : The question asks about your ability to work in specific countries or if you need sponsorship or visas .
- * * Examples * * : Work authorization in EU and US , visa requirements , legally allowed to work .
4. * * Work Preferences * * :
- * * Purpose * * : Specifies your preferences regarding work conditions and job roles .
- * * Use When * * : The question is about your preferences for remote work , in - person work , relocation , and willingness to undergo assessments or background checks .
- * * Examples * * : Remote work , in - person work , open to relocation , willingness to complete assessments .
5. * * Education Details * * :
- * * Purpose * * : Contains information about your academic qualifications .
- * * Use When * * : The question concerns your degrees , universities attended , GPA , and relevant coursework .
- * * Examples * * : Degree , university , GPA , field of study , exams .
6. * * Experience Details * * :
- * * Purpose * * : Details your professional work history and key responsibilities .
- * * Use When * * : The question pertains to your job roles , responsibilities , and achievements in previous positions .
- * * Examples * * : Job positions , company names , key responsibilities , skills acquired .
7. * * Projects * * :
- * * Purpose * * : Highlights specific projects you have worked on .
- * * Use When * * : The question asks about particular projects , their descriptions , or links to project repositories .
- * * Examples * * : Project names , descriptions , links to project repositories .
8. * * Availability * * :
- * * Purpose * * : Provides information on your availability for new roles .
- * * Use When * * : The question is about how soon you can start a new job or your notice period .
- * * Examples * * : Notice period , availability to start .
9. * * Salary Expectations * * :
- * * Purpose * * : Covers your expected salary range .
- * * Use When * * : The question pertains to your salary expectations or compensation requirements .
- * * Examples * * : Desired salary range .
10. * * Certifications * * :
- * * Purpose * * : Lists your professional certifications or licenses .
- * * Use When * * : The question involves your certifications or qualifications from recognized organizations .
- * * Examples * * : Certification names , issuing bodies , dates of validity .
11. * * Languages * * :
- * * Purpose * * : Describes the languages you can speak and your proficiency levels .
- * * Use When * * : The question asks about your language skills or proficiency in specific languages .
- * * Examples * * : Languages spoken , proficiency levels .
12. * * Interests * * :
- * * Purpose * * : Details your personal or professional interests .
- * * Use When * * : The question is about your hobbies , interests , or activities outside of work .
- * * Examples * * : Personal hobbies , professional interests .
13. * * Cover Letter * * :
- * * Purpose * * : Contains your personalized cover letter or statement .
- * * Use When * * : The question involves your cover letter or specific written content intended for the job application .
- * * Examples * * : Cover letter content , personalized statements .
Provide only the exact name of the section from the list above with no additional text .
"""
2024-08-04 12:14:56 +00:00
prompt = ChatPromptTemplate . from_template ( section_prompt )
chain = prompt | self . llm_cheap | StrOutputParser ( )
output = chain . invoke ( { " question " : question } )
section_name = output . lower ( ) . replace ( " " , " _ " )
2024-08-12 22:00:55 +00:00
if section_name == " cover_letter " :
chain = chains . get ( section_name )
2024-08-22 09:42:49 +00:00
output = chain . invoke ( { " resume " : self . resume , " job_description " : self . job_description } )
2024-08-12 22:00:55 +00:00
return output
2024-08-22 09:42:49 +00:00
resume_section = getattr ( self . resume , section_name , None ) or getattr ( self . job_application_profile , section_name , None )
2024-08-04 12:14:56 +00:00
if resume_section is None :
2024-08-22 09:42:49 +00:00
raise ValueError ( f " Section ' { section_name } ' not found in either resume or job_application_profile. " )
2024-08-12 22:00:55 +00:00
chain = chains . get ( section_name )
2024-08-04 12:14:56 +00:00
if chain is None :
raise ValueError ( f " Chain not defined for section ' { section_name } ' " )
2024-08-12 22:00:55 +00:00
return chain . invoke ( { " resume_section " : resume_section , " question " : question } )
2024-08-04 12:14:56 +00:00
def answer_question_numeric ( self , question : str , default_experience : int = 3 ) - > int :
func_template = self . _preprocess_template_string ( strings . numeric_question_template )
prompt = ChatPromptTemplate . from_template ( func_template )
chain = prompt | self . llm_cheap | StrOutputParser ( )
2024-08-22 09:42:49 +00:00
output_str = chain . invoke ( { " resume_educations " : self . resume . education_details , " resume_jobs " : self . resume . experience_details , " resume_projects " : self . resume . projects , " question " : question } )
2024-08-04 12:14:56 +00:00
try :
output = self . extract_number_from_string ( output_str )
except ValueError :
output = default_experience
return output
def extract_number_from_string ( self , output_str ) :
numbers = re . findall ( r " \ d+ " , output_str )
if numbers :
return int ( numbers [ 0 ] )
else :
raise ValueError ( " No numbers found in the string " )
def answer_question_from_options ( self , question : str , options : list [ str ] ) - > str :
func_template = self . _preprocess_template_string ( strings . options_template )
prompt = ChatPromptTemplate . from_template ( func_template )
chain = prompt | self . llm_cheap | StrOutputParser ( )
output_str = chain . invoke ( { " resume " : self . resume , " question " : question , " options " : options } )
best_option = self . find_best_match ( output_str , options )
return best_option
2024-08-22 09:42:49 +00:00
def resume_or_cover ( self , phrase : str ) - > str :
# Define the prompt template
prompt_template = """
Given the following phrase , respond with only ' resume ' if the phrase is about a resume , or ' cover ' if it ' s about a cover letter. Do not provide any additional information or explanations.
phrase : { phrase }
"""
prompt = ChatPromptTemplate . from_template ( prompt_template )
chain = prompt | self . llm_cheap | StrOutputParser ( )
response = chain . invoke ( { " phrase " : phrase } )
if " resume " in response :
return " resume "
elif " cover " in response :
return " cover "
else :
return " resume "