replace lib PYPDF2 with pdfminer.six

This commit is contained in:
Maurice McCabe 2024-09-02 01:44:54 -07:00
parent 23567ee7c4
commit 685da0f9fc
2 changed files with 18 additions and 23 deletions

View File

@ -13,4 +13,4 @@ webdriver-manager==4.0.2
click
git+https://github.com/feder-cr/lib_resume_builder_AIHawk.git
linkedin-api
PyPDF2==3.0.1
pdfminer.six==20221105

View File

@ -5,7 +5,7 @@ import os
from typing import Dict, Any
import re
from jsonschema import validate, ValidationError
import PyPDF2
from pdfminer.high_level import extract_text
def load_yaml(file_path: str) -> Dict[str, Any]:
with open(file_path, 'r') as file:
@ -118,12 +118,7 @@ def generate_report(validation_result: Dict[str, Any], output_file: str):
print(report)
def pdf_to_text(pdf_path: str) -> str:
text = ""
with open(pdf_path, 'rb') as file:
reader = PyPDF2.PdfReader(file)
for page in reader.pages:
text += page.extract_text()
return text
return extract_text(pdf_path)
def main():
parser = argparse.ArgumentParser(description="Generate a resume YAML file from a PDF or text resume using OpenAI API")