linkedIn_auto_jobs_applier_.../gpt.py
2024-08-22 10:42:49 +01:00

331 lines
15 KiB
Python

import json
import os
import re
import textwrap
from datetime import datetime
from typing import Dict, List
from pathlib import Path
from dotenv import load_dotenv
from langchain_core.messages.ai import AIMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompt_values import StringPromptValue
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from Levenshtein import distance
import strings
load_dotenv()
class LLMLogger:
def __init__(self, llm: ChatOpenAI):
self.llm = llm
@staticmethod
def log_request(prompts, parsed_reply: Dict[str, Dict]):
calls_log = os.path.join(Path("data_folder/output"), "open_ai_calls.json")
if isinstance(prompts, StringPromptValue):
prompts = prompts.text
elif isinstance(prompts, Dict):
# Convert prompts to a dictionary if they are not in the expected format
prompts = {
f"prompt_{i+1}": prompt.content
for i, prompt in enumerate(prompts.messages)
}
else:
prompts = {
f"prompt_{i+1}": prompt.content
for i, prompt in enumerate(prompts.messages)
}
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# Extract token usage details from the response
token_usage = parsed_reply["usage_metadata"]
output_tokens = token_usage["output_tokens"]
input_tokens = token_usage["input_tokens"]
total_tokens = token_usage["total_tokens"]
# Extract model details from the response
model_name = parsed_reply["response_metadata"]["model_name"]
prompt_price_per_token = 0.00000015
completion_price_per_token = 0.0000006
# Calculate the total cost of the API call
total_cost = (input_tokens * prompt_price_per_token) + (
output_tokens * completion_price_per_token
)
# Create a log entry with all relevant information
log_entry = {
"model": model_name,
"time": current_time,
"prompts": prompts,
"replies": parsed_reply["content"], # Response content
"total_tokens": total_tokens,
"input_tokens": input_tokens,
"output_tokens": output_tokens,
"total_cost": total_cost,
}
# Write the log entry to the log file in JSON format
with open(calls_log, "a", encoding="utf-8") as f:
json_string = json.dumps(log_entry, ensure_ascii=False, indent=4)
f.write(json_string + "\n")
class LoggerChatModel:
def __init__(self, llm: ChatOpenAI):
self.llm = llm
def __call__(self, messages: List[Dict[str, str]]) -> str:
# Call the LLM with the provided messages and log the response.
reply = self.llm(messages)
parsed_reply = self.parse_llmresult(reply)
LLMLogger.log_request(prompts=messages, parsed_reply=parsed_reply)
return reply
def parse_llmresult(self, llmresult: AIMessage) -> Dict[str, Dict]:
# Parse the LLM result into a structured format.
content = llmresult.content
response_metadata = llmresult.response_metadata
id_ = llmresult.id
usage_metadata = llmresult.usage_metadata
parsed_result = {
"content": content,
"response_metadata": {
"model_name": response_metadata.get("model_name", ""),
"system_fingerprint": response_metadata.get("system_fingerprint", ""),
"finish_reason": response_metadata.get("finish_reason", ""),
"logprobs": response_metadata.get("logprobs", None),
},
"id": id_,
"usage_metadata": {
"input_tokens": usage_metadata.get("input_tokens", 0),
"output_tokens": usage_metadata.get("output_tokens", 0),
"total_tokens": usage_metadata.get("total_tokens", 0),
},
}
return parsed_result
class GPTAnswerer:
def __init__(self, openai_api_key):
self.llm_cheap = LoggerChatModel(
ChatOpenAI(model_name="gpt-4o-mini", openai_api_key=openai_api_key, temperature=0.8)
)
@property
def job_description(self):
return self.job.description
@staticmethod
def find_best_match(text: str, options: list[str]) -> str:
distances = [
(option, distance(text.lower(), option.lower())) for option in options
]
best_option = min(distances, key=lambda x: x[1])[0]
return best_option
@staticmethod
def _remove_placeholders(text: str) -> str:
text = text.replace("PLACEHOLDER", "")
return text.strip()
@staticmethod
def _preprocess_template_string(template: str) -> str:
# Preprocess a template string to remove unnecessary indentation.
return textwrap.dedent(template)
def set_resume(self, resume):
self.resume = resume
def set_job(self, job):
self.job = job
self.job.set_summarize_job_description(self.summarize_job_description(self.job.description))
def set_job_application_profile(self, job_application_profile):
self.job_application_profile = job_application_profile
def summarize_job_description(self, text: str) -> str:
strings.summarize_prompt_template = self._preprocess_template_string(
strings.summarize_prompt_template
)
prompt = ChatPromptTemplate.from_template(strings.summarize_prompt_template)
chain = prompt | self.llm_cheap | StrOutputParser()
output = chain.invoke({"text": text})
return output
def _create_chain(self, template: str):
prompt = ChatPromptTemplate.from_template(template)
return prompt | self.llm_cheap | StrOutputParser()
def answer_question_textual_wide_range(self, question: str) -> str:
# Define chains for each section of the resume
chains = {
"personal_information": self._create_chain(strings.personal_information_template),
"self_identification": self._create_chain(strings.self_identification_template),
"legal_authorization": self._create_chain(strings.legal_authorization_template),
"work_preferences": self._create_chain(strings.work_preferences_template),
"education_details": self._create_chain(strings.education_details_template),
"experience_details": self._create_chain(strings.experience_details_template),
"projects": self._create_chain(strings.projects_template),
"availability": self._create_chain(strings.availability_template),
"salary_expectations": self._create_chain(strings.salary_expectations_template),
"certifications": self._create_chain(strings.certifications_template),
"languages": self._create_chain(strings.languages_template),
"interests": self._create_chain(strings.interests_template),
"cover_letter": self._create_chain(strings.coverletter_template),
}
section_prompt = """
You are assisting a bot designed to automatically apply for jobs on LinkedIn. The bot receives various questions about job applications and needs to determine the most relevant section of the resume to provide an accurate response.
For the following question: '{question}', determine which section of the resume is most relevant.
Respond with exactly one of the following options:
- Personal information
- Self Identification
- Legal Authorization
- Work Preferences
- Education Details
- Experience Details
- Projects
- Availability
- Salary Expectations
- Certifications
- Languages
- Interests
- Cover letter
Here are detailed guidelines to help you choose the correct section:
1. **Personal Information**:
- **Purpose**: Contains your basic contact details and online profiles.
- **Use When**: The question is about how to contact you or requests links to your professional online presence.
- **Examples**: Email address, phone number, LinkedIn profile, GitHub repository, personal website.
2. **Self Identification**:
- **Purpose**: Covers personal identifiers and demographic information.
- **Use When**: The question pertains to your gender, pronouns, veteran status, disability status, or ethnicity.
- **Examples**: Gender, pronouns, veteran status, disability status, ethnicity.
3. **Legal Authorization**:
- **Purpose**: Details your work authorization status and visa requirements.
- **Use When**: The question asks about your ability to work in specific countries or if you need sponsorship or visas.
- **Examples**: Work authorization in EU and US, visa requirements, legally allowed to work.
4. **Work Preferences**:
- **Purpose**: Specifies your preferences regarding work conditions and job roles.
- **Use When**: The question is about your preferences for remote work, in-person work, relocation, and willingness to undergo assessments or background checks.
- **Examples**: Remote work, in-person work, open to relocation, willingness to complete assessments.
5. **Education Details**:
- **Purpose**: Contains information about your academic qualifications.
- **Use When**: The question concerns your degrees, universities attended, GPA, and relevant coursework.
- **Examples**: Degree, university, GPA, field of study, exams.
6. **Experience Details**:
- **Purpose**: Details your professional work history and key responsibilities.
- **Use When**: The question pertains to your job roles, responsibilities, and achievements in previous positions.
- **Examples**: Job positions, company names, key responsibilities, skills acquired.
7. **Projects**:
- **Purpose**: Highlights specific projects you have worked on.
- **Use When**: The question asks about particular projects, their descriptions, or links to project repositories.
- **Examples**: Project names, descriptions, links to project repositories.
8. **Availability**:
- **Purpose**: Provides information on your availability for new roles.
- **Use When**: The question is about how soon you can start a new job or your notice period.
- **Examples**: Notice period, availability to start.
9. **Salary Expectations**:
- **Purpose**: Covers your expected salary range.
- **Use When**: The question pertains to your salary expectations or compensation requirements.
- **Examples**: Desired salary range.
10. **Certifications**:
- **Purpose**: Lists your professional certifications or licenses.
- **Use When**: The question involves your certifications or qualifications from recognized organizations.
- **Examples**: Certification names, issuing bodies, dates of validity.
11. **Languages**:
- **Purpose**: Describes the languages you can speak and your proficiency levels.
- **Use When**: The question asks about your language skills or proficiency in specific languages.
- **Examples**: Languages spoken, proficiency levels.
12. **Interests**:
- **Purpose**: Details your personal or professional interests.
- **Use When**: The question is about your hobbies, interests, or activities outside of work.
- **Examples**: Personal hobbies, professional interests.
13. **Cover Letter**:
- **Purpose**: Contains your personalized cover letter or statement.
- **Use When**: The question involves your cover letter or specific written content intended for the job application.
- **Examples**: Cover letter content, personalized statements.
Provide only the exact name of the section from the list above with no additional text.
"""
prompt = ChatPromptTemplate.from_template(section_prompt)
chain = prompt | self.llm_cheap | StrOutputParser()
output = chain.invoke({"question": question})
section_name = output.lower().replace(" ", "_")
if section_name == "cover_letter":
chain = chains.get(section_name)
output = chain.invoke({"resume": self.resume, "job_description": self.job_description})
return output
resume_section = getattr(self.resume, section_name, None) or getattr(self.job_application_profile, section_name, None)
if resume_section is None:
raise ValueError(f"Section '{section_name}' not found in either resume or job_application_profile.")
chain = chains.get(section_name)
if chain is None:
raise ValueError(f"Chain not defined for section '{section_name}'")
return chain.invoke({"resume_section": resume_section, "question": question})
def answer_question_numeric(self, question: str, default_experience: int = 3) -> int:
func_template = self._preprocess_template_string(strings.numeric_question_template)
prompt = ChatPromptTemplate.from_template(func_template)
chain = prompt | self.llm_cheap | StrOutputParser()
output_str = chain.invoke({"resume_educations": self.resume.education_details,"resume_jobs": self.resume.experience_details,"resume_projects": self.resume.projects , "question": question})
try:
output = self.extract_number_from_string(output_str)
except ValueError:
output = default_experience
return output
def extract_number_from_string(self, output_str):
numbers = re.findall(r"\d+", output_str)
if numbers:
return int(numbers[0])
else:
raise ValueError("No numbers found in the string")
def answer_question_from_options(self, question: str, options: list[str]) -> str:
func_template = self._preprocess_template_string(strings.options_template)
prompt = ChatPromptTemplate.from_template(func_template)
chain = prompt | self.llm_cheap | StrOutputParser()
output_str = chain.invoke({"resume": self.resume, "question": question, "options": options})
best_option = self.find_best_match(output_str, options)
return best_option
def resume_or_cover(self, phrase: str) -> str:
# Define the prompt template
prompt_template = """
Given the following phrase, respond with only 'resume' if the phrase is about a resume, or 'cover' if it's about a cover letter. Do not provide any additional information or explanations.
phrase: {phrase}
"""
prompt = ChatPromptTemplate.from_template(prompt_template)
chain = prompt | self.llm_cheap | StrOutputParser()
response = chain.invoke({"phrase": phrase})
if "resume" in response:
return "resume"
elif "cover" in response:
return "cover"
else:
return "resume"