replace lib PYPDF2 with pdfminer.six

This commit is contained in:
Maurice McCabe 2024-09-02 01:44:54 -07:00
parent 23567ee7c4
commit 685da0f9fc
2 changed files with 18 additions and 23 deletions

View File

@ -1,16 +1,16 @@
langchain==0.2.11
langchain-community==0.2.10
langchain-core==0.2.24
langchain-openai==0.1.17
langchain-text-splitters==0.2.2
langsmith==0.1.93
Levenshtein==0.25.1
openai==1.37.1
regex==2024.7.24
reportlab==4.2.2
selenium==4.9.1
webdriver-manager==4.0.2
click
git+https://github.com/feder-cr/lib_resume_builder_AIHawk.git
linkedin-api
PyPDF2==3.0.1
langchain==0.2.11
langchain-community==0.2.10
langchain-core==0.2.24
langchain-openai==0.1.17
langchain-text-splitters==0.2.2
langsmith==0.1.93
Levenshtein==0.25.1
openai==1.37.1
regex==2024.7.24
reportlab==4.2.2
selenium==4.9.1
webdriver-manager==4.0.2
click
git+https://github.com/feder-cr/lib_resume_builder_AIHawk.git
linkedin-api
pdfminer.six==20221105

View File

@ -5,7 +5,7 @@ import os
from typing import Dict, Any
import re
from jsonschema import validate, ValidationError
import PyPDF2
from pdfminer.high_level import extract_text
def load_yaml(file_path: str) -> Dict[str, Any]:
with open(file_path, 'r') as file:
@ -118,12 +118,7 @@ def generate_report(validation_result: Dict[str, Any], output_file: str):
print(report)
def pdf_to_text(pdf_path: str) -> str:
text = ""
with open(pdf_path, 'rb') as file:
reader = PyPDF2.PdfReader(file)
for page in reader.pages:
text += page.extract_text()
return text
return extract_text(pdf_path)
def main():
parser = argparse.ArgumentParser(description="Generate a resume YAML file from a PDF or text resume using OpenAI API")