diff --git a/requirements.txt b/requirements.txt index 5139ade..03290b7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,16 +1,16 @@ -langchain==0.2.11 -langchain-community==0.2.10 -langchain-core==0.2.24 -langchain-openai==0.1.17 -langchain-text-splitters==0.2.2 -langsmith==0.1.93 -Levenshtein==0.25.1 -openai==1.37.1 -regex==2024.7.24 -reportlab==4.2.2 -selenium==4.9.1 -webdriver-manager==4.0.2 -click -git+https://github.com/feder-cr/lib_resume_builder_AIHawk.git -linkedin-api -PyPDF2==3.0.1 \ No newline at end of file +langchain==0.2.11 +langchain-community==0.2.10 +langchain-core==0.2.24 +langchain-openai==0.1.17 +langchain-text-splitters==0.2.2 +langsmith==0.1.93 +Levenshtein==0.25.1 +openai==1.37.1 +regex==2024.7.24 +reportlab==4.2.2 +selenium==4.9.1 +webdriver-manager==4.0.2 +click +git+https://github.com/feder-cr/lib_resume_builder_AIHawk.git +linkedin-api +pdfminer.six==20221105 \ No newline at end of file diff --git a/resume_yaml_generator.py b/resume_yaml_generator.py index acf36f5..46982c2 100644 --- a/resume_yaml_generator.py +++ b/resume_yaml_generator.py @@ -5,7 +5,7 @@ import os from typing import Dict, Any import re from jsonschema import validate, ValidationError -import PyPDF2 +from pdfminer.high_level import extract_text def load_yaml(file_path: str) -> Dict[str, Any]: with open(file_path, 'r') as file: @@ -118,12 +118,7 @@ def generate_report(validation_result: Dict[str, Any], output_file: str): print(report) def pdf_to_text(pdf_path: str) -> str: - text = "" - with open(pdf_path, 'rb') as file: - reader = PyPDF2.PdfReader(file) - for page in reader.pages: - text += page.extract_text() - return text + return extract_text(pdf_path) def main(): parser = argparse.ArgumentParser(description="Generate a resume YAML file from a PDF or text resume using OpenAI API")