diff --git a/.gitignore b/.gitignore index 6e8e0d9..d613771 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,5 @@ _* data_folder* .venv generated_cv -resume.html \ No newline at end of file +resume.html +.vscode \ No newline at end of file diff --git a/gpt.py b/gpt.py index fa1edaa..74e6804 100644 --- a/gpt.py +++ b/gpt.py @@ -171,13 +171,13 @@ class GPTAnswerer: resume_markdown_chain = resume_markdown_prompt | self.llm_cheap | StrOutputParser() fusion_job_description_resume_chain = fusion_job_description_resume_prompt | self.llm_cheap | StrOutputParser() - html_template = strings.html_template.format(email_address=self.resume.personal_information.email, phone_number=self.resume.personal_information.phonePrefix + self.resume.personal_information.phone , github_link=self.resume.personal_information.github, linkedin_link=self.resume.personal_information.linkedin,city=self.resume.personal_information.city,country=self.resume.personal_information.country) + #html_template = strings.html_template.format(email_address=self.resume.personal_information.email, phone_number=self.resume.personal_information.phonePrefix + self.resume.personal_information.phone , github_link=self.resume.personal_information.github, linkedin_link=self.resume.personal_information.linkedin,city=self.resume.personal_information.city,country=self.resume.personal_information.country) composed_chain = ( resume_markdown_chain | (lambda output: {"job_description": self.job.summarize_job_description, "formatted_resume": output}) | fusion_job_description_resume_chain - | (lambda formatted_resume: html_template + formatted_resume) + | (lambda formatted_resume: strings.html_template + formatted_resume) ) try: diff --git a/linkedIn_easy_applier.py b/linkedIn_easy_applier.py index 7201297..f0dfb1b 100644 --- a/linkedIn_easy_applier.py +++ b/linkedIn_easy_applier.py @@ -1,4 +1,4 @@ -import io +import base64 import os import random import tempfile @@ -23,12 +23,10 @@ import io import time from reportlab.lib.pagesizes import letter from reportlab.pdfgen import canvas -from reportlab.lib.styles import getSampleStyleSheet -from reportlab.platypus import SimpleDocTemplate, Paragraph from reportlab.lib.pagesizes import letter -from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer -from reportlab.lib.styles import getSampleStyleSheet -from xhtml2pdf import pisa +from xhtml2pdf import pisa + +import utils class LinkedInEasyApplier: def __init__(self, driver: Any, resume_dir: Optional[str], set_old_answers: List[Tuple[str, str, str]], gpt_answerer: Any): @@ -181,45 +179,28 @@ class LinkedInEasyApplier: def _create_and_upload_resume(self, element): max_retries = 3 - retry_delay = 1 # seconds + retry_delay = 1 folder_path = 'generated_cv' - # Create the directory if it doesn't exist if not os.path.exists(folder_path): os.makedirs(folder_path) - for attempt in range(max_retries): try: html_string = self.gpt_answerer.get_resume_html() - file_name = 'resume.html' - with open(file_name, 'w', encoding='utf-8') as file: - file.write(html_string) - file_path = os.path.abspath(file_name) - self.driver.execute_script("window.open('');") - self.driver.switch_to.window(self.driver.window_handles[1]) - self.driver.get(f"file:///{file_path}") - time.sleep(1) - page_source = self.driver.page_source - self.driver.close() - self.driver.switch_to.window(self.driver.window_handles[0]) + with tempfile.NamedTemporaryFile(delete=False, suffix='.html', mode='w', encoding='utf-8') as temp_html_file: + temp_html_file.write(html_string) + file_name_HTML = temp_html_file.name file_name_pdf = f"resume_{uuid.uuid4().hex}.pdf" file_path_pdf = os.path.join(folder_path, file_name_pdf) - - # Convert HTML to PDF and save it to the specified folder - with open(file_path_pdf, 'wb') as pdf_file: - pisa_status = pisa.CreatePDF(page_source, dest=pdf_file) + + with open(file_path_pdf, "wb") as f: + f.write(base64.b64decode(utils.HTML_to_PDF(file_name_HTML,True))) - file_path_pdf = os.path.abspath(file_path_pdf) - # Upload the file element.send_keys(file_path_pdf) - - if pisa_status.err: - raise Exception(f"PDF generation failed with error: {pisa_status.err}") - time.sleep(2) # Give some time for the upload process + os.remove(file_name_HTML) return True - except Exception: if attempt < max_retries - 1: time.sleep(retry_delay) diff --git a/resume_template/casual-markdown.js b/resume_template/casual-markdown.js new file mode 100644 index 0000000..7c2b05e --- /dev/null +++ b/resume_template/casual-markdown.js @@ -0,0 +1,202 @@ +/***************************************************************************** + * casual-markdown - a lightweight regexp-base markdown parser with TOC support + * 2022/07/31, v0.90, refine frontmatter (simple yaml) + * 2023/04/12, v0.92, addCopyButton for code-block + * + * Copyright (c) 2022-2023, Casualwriter (MIT Licensed) + * https://github.com/casualwriter/casual-markdown +*****************************************************************************/ +;(function(){ + + // define md object, and extent function (which is a dummy function) + var md = { yaml:{}, before: function (str) {return str}, after: function (str) {return str} } + + // function for REGEXP to convert html tag. ie. => <TAG*gt; + md.formatTag = function (html) { return html.replace(//g,'>'); } + + // frontmatter for simple YAML (support multi-level, but string value only) + md.formatYAML = function (front, matter) { + var level = {}, latest = md.yaml; + matter.replace( /^\s*#(.*)$/gm, '' ).replace( /^( *)([^:^\n]+):(.*)$/gm, function(m, sp, key,val) { + level[sp] = level[sp] || latest + latest = level[sp][key.trim()] = val.trim() || {} + for (e in level) if(e>sp) level[e]=null; + } ); + return '' + } + + //===== format code-block, highlight remarks/keywords for code/sql + md.formatCode = function (match, title, block) { + // convert tag <> to < > tab to 3 space, support marker using ^^^ + block = block.replace(//g,'>') + block = block.replace(/\t/g,' ').replace(/\^\^\^(.+?)\^\^\^/g, '$1') + + // highlight comment and keyword based on title := none | sql | code + if (title.toLowerCase(title) == 'sql') { + block = block.replace(/^\-\-(.*)/gm,'--$1').replace(/\s\-\-(.*)/gm,' --$1') + block = block.replace(/(\s?)(function|procedure|return|if|then|else|end|loop|while|or|and|case|when)(\s)/gim,'$1$2$3') + block = block.replace(/(\s?)(select|update|delete|insert|create|from|where|group by|having|set)(\s)/gim,'$1$2$3') + } else if ((title||'none')!=='none') { + block = block.replace(/^\/\/(.*)/gm,'//$1').replace(/\s\/\/(.*)/gm,' //$1') + block = block.replace(/(\s?)(function|procedure|return|exit|if|then|else|end|loop|while|or|and|case|when)(\s)/gim,'$1$2$3') + block = block.replace(/(\s?)(var|let|const|=>|for|next|do|while|loop|continue|break|switch|try|catch|finally)(\s)/gim,'$1$2$3') + } + + return '
'  + block + '
' + } + + // copy to clipboard for code-block + md.clipboard = function (e) { + navigator.clipboard.writeText( e.parentNode.innerText.replace('copy\n','') ) + e.innerText = 'copied' + } + + //===== parse markdown string into HTML string (exclude code-block) + md.parser = function( mdstr ) { + + // apply yaml variables + for (var name in this.yaml) mdstr = mdstr.replace( new RegExp('\{\{\\s*'+name+'\\s*\}\}', 'gm'), this.yaml[name] ) + + // table syntax + mdstr = mdstr.replace(/\n(.+?)\n.*?\-\-\s?\|\s?\-\-.*?\n([\s\S]*?)\n\s*?\n/g, function (m,p1,p2) { + var thead = p1.replace(/^\|(.+)/gm,'$1').replace(/(.+)\|$/gm,'$1').replace(/\|/g,'') + var tbody = p2.replace(/^\|(.+)/gm,'$1').replace(/(.+)\|$/gm,'$1') + tbody = tbody.replace(/(.+)/gm,'$1').replace(/\|/g,'') + return '\n\n\n\n' + tbody + '\n
' + thead + '\n
\n\n' + } ) + + // horizontal rule =>
+ mdstr = mdstr.replace(/^-{3,}|^\_{3,}|^\*{3,}$/gm, '
').replace(/\n\n/g, '\n

') + + // header =>

..

+ mdstr = mdstr.replace(/^##### (.*?)\s*#*$/gm, '
$1
') + .replace(/^#### (.*?)\s*#*$/gm, '

$1

') + .replace(/^### (.*?)\s*#*$/gm, '

$1

') + .replace(/^## (.*?)\s*#*$/gm, '

$1

') + .replace(/^# (.*?)\s*#*$/gm, '

$1

') + .replace(/^(.*?)\s*{(.*)}\s*<\/h\d\>$/gm, '$2') + + // inline code-block: `code-block` => code-block + mdstr = mdstr.replace(/``(.*?)``/gm, function(m,p){ return '' + md.formatTag(p).replace(/`/g,'`') + ''} ) + mdstr = mdstr.replace(/`(.*?)`/gm, '$1' ) + + // blockquote, max 2 levels =>
{text}
+ mdstr = mdstr.replace(/^\>\> (.*$)/gm, '
$1
') + mdstr = mdstr.replace(/^\> (.*$)/gm, '
$1
') + mdstr = mdstr.replace(/<\/blockquote\>\n/g, '\n
' ) + mdstr = mdstr.replace(/<\/blockquote\>\n/g, '\n
' ) + + // image syntax: ![title](url) => title + mdstr = mdstr.replace(/!\[(.*?)\]\((.*?) "(.*?)"\)/gm, '$1') + mdstr = mdstr.replace(/!\[(.*?)\]\((.*?)\)/gm, '$1') + + // links syntax: [title "title"](url) => text + mdstr = mdstr.replace(/\[(.*?)\]\((.*?) "new"\)/gm, '$1') + mdstr = mdstr.replace(/\[(.*?)\]\((.*?) "(.*?)"\)/gm, '$1') + mdstr = mdstr.replace(/([<\s])(https?\:\/\/.*?)([\s\>])/gm, '$1$2$3') + mdstr = mdstr.replace(/\[(.*?)\]\(\)/gm, '$1') + mdstr = mdstr.replace(/\[(.*?)\]\((.*?)\)/gm, '$1') + + // unordered/ordered list, max 2 levels =>
  • ..
,
  1. ..
+ mdstr = mdstr.replace(/^[\*+-][ .](.*)/gm, '
  • $1
' ) + mdstr = mdstr.replace(/^\d\d?[ .](.*)/gm, '
  1. $1
' ) + mdstr = mdstr.replace(/^\s{2,6}[\*+-][ .](.*)/gm, '
    • $1
' ) + mdstr = mdstr.replace(/^\s{2,6}\d[ .](.*)/gm, '
    1. $1
' ) + mdstr = mdstr.replace(/<\/[ou]l\>\n\n?<[ou]l\>/g, '\n' ) + mdstr = mdstr.replace(/<\/[ou]l\>\n<[ou]l\>/g, '\n' ) + + // text decoration: bold, italic, underline, strikethrough, highlight + mdstr = mdstr.replace(/\*\*\*(\w.*?[^\\])\*\*\*/gm, '$1') + mdstr = mdstr.replace(/\*\*(\w.*?[^\\])\*\*/gm, '$1') + mdstr = mdstr.replace(/\*(\w.*?[^\\])\*/gm, '$1') + mdstr = mdstr.replace(/___(\w.*?[^\\])___/gm, '$1') + mdstr = mdstr.replace(/__(\w.*?[^\\])__/gm, '$1') + // mdstr = mdstr.replace(/_(\w.*?[^\\])_/gm, '$1') // NOT support!! + mdstr = mdstr.replace(/\^\^\^(.+?)\^\^\^/gm, '$1') + mdstr = mdstr.replace(/\^\^(\w.*?)\^\^/gm, '$1') + mdstr = mdstr.replace(/~~(\w.*?)~~/gm, '$1') + + // line break and paragraph =>

+ mdstr = mdstr.replace(/ \n/g, '\n
').replace(/\n\s*\n/g, '\n

\n') + + // indent as code-block + mdstr = mdstr.replace(/^ {4,10}(.*)/gm, function(m,p) { return '

' + md.formatTag(p) + '
'} ) + mdstr = mdstr.replace(/^\t(.*)/gm, function(m,p) { return '
' + md.formatTag(p) + '
'} ) + mdstr = mdstr.replace(/<\/code\><\/pre\>\n/g, '\n' ) + + // Escaping Characters + return mdstr.replace(/\\([`_~\*\+\-\.\^\\\<\>\(\)\[\]])/gm, '$1' ) + } + + //===== parse markdown string into HTML content (cater code-block) + md.html = function (mdText) { + // replace \r\n to \n, and handle front matter for simple YAML + mdText = mdText.replace(/\r\n/g, '\n').replace( /^---+\s*\n([\s\S]*?)\n---+\s*\n/, md.formatYAML ) + // handle code-block. + mdText = mdText.replace(/\n~~~/g,'\n```').replace(/\n``` *(.*?)\n([\s\S]*?)\n``` *\n/g, md.formatCode) + + // split by "", skip for code-block and process normal text + var pos1=0, pos2=0, mdHTML = '' + while ( (pos1 = mdText.indexOf('')) >= 0 ) { + pos2 = mdText.indexOf('', pos1 ) + mdHTML += md.after( md.parser( md.before( mdText.substr(0,pos1) ) ) ) + mdHTML += mdText.substr(pos1, (pos2>0? pos2-pos1+7 : mdtext.length) ) + mdText = mdText.substr( pos2 + 7 ) + } + + return '
' + mdHTML + md.after( md.parser( md.before(mdText) ) ) + '
' + } + + //===== TOC support + md.toc = function (srcDiv, tocDiv, options ) { + + // select elements, set title + var tocSelector = (options&&options.css) || 'h1,h2,h3,h4' + var tocTitle = (options&&options.title) || 'Table of Contents' + var toc = document.getElementById(srcDiv).querySelectorAll( tocSelector ) + var html = '
    ' + (tocTitle=='none'? '' : '

    ' + tocTitle + '

    '); + + // loop for each element,add
  • element with class in TAG name. + for (var i=0; i' + html += toc[i].textContent + '
  • '; + } + + document.getElementById(tocDiv).innerHTML = html + "
"; + + //===== scrollspy support (ps: add to document.body if element(scrollspy) not found) + if ( options && options.scrollspy ) { + + (document.getElementById(options.scrollspy)||document).onscroll = function () { + + // get TOC elements, and viewport position + var list = document.getElementById(tocDiv).querySelectorAll('li') + var divScroll = document.getElementById(options.scrollspy) || document.documentElement + var divHeight = divScroll.clientHeight || divScroll.offsetHeight + + // loop for each TOC element, add/remove scrollspy class + for (var i=0; i0 && pos { + headerInfoDiv.appendChild(el); + }); + const contactInfoDiv = document.createElement('div'); + contactInfoDiv.className = 'contact-info'; + contactInfoElements.forEach(el => { + contactInfoDiv.appendChild(el); + }); + newHeader.appendChild(headerInfoDiv); + newHeader.appendChild(contactInfoDiv); + const h2 = document.querySelector('h2'); + if (h2) { + h2.parentNode.insertBefore(newHeader, h2); + } +} + + setTimeout(function() { + reorganizeHeader(); + }, 100); diff --git a/resume_template/resume.css b/resume_template/resume.css new file mode 100644 index 0000000..aff67eb --- /dev/null +++ b/resume_template/resume.css @@ -0,0 +1,156 @@ +@import url('https://fonts.googleapis.com/css2?family=Montserrat:wght@700&family=Roboto:wght@400&display=swap'); + +/* Reset generale per uniformità */ +body, h2, h3, h4, p, ul, ol { + margin: 0; + padding: 0; + font-family: 'Roboto', sans-serif; +} + +body { + line-height: 1.6; + margin: auto; + padding: 20px; + max-width: 1024px; + color: #333; + background-color: #f8f9fa; +} + +/* Header Style */ +header { + background-color: #e9ecef; /* Sfondo leggermente più scuro */ + padding: 15px 20px 0 20px; /* Padding: 15px sopra, 20px a destra e a sinistra, 0px sotto */ + border-bottom: 2px solid #d1d1d1; /* Bordo sottile per separare il header dal resto del contenuto */ + font-family: 'Roboto', sans-serif; /* Font per il testo generico */ + display: flex; + justify-content: space-between; + align-items: center; +} + +.header-info { + flex: 1; +} + +.header-info h1 { + margin: 0; + font-size: 30px; /* Dimensione del font aumentata per il nome */ + color: #000; /* Colore nero per il testo del nome */ + font-family: 'Montserrat', sans-serif; /* Font più accattivante per il nome */ +} + +.header-info a { + color: #0056b3; /* Colore blu intenso per i link */ + text-decoration: none; + font-weight: bold; +} + +.header-info a:hover { + text-decoration: underline; +} + +.contact-info { + flex: 0; + text-align: center; /* Centratura del testo */ +} + +.contact-info a { + display: block; + color: #0056b3; /* Colore blu intenso per i link */ + text-decoration: none; + margin-bottom: 5px; /* Spaziatura tra i contatti */ + font-size: 14px; /* Dimensione del font per i dettagli di contatto */ + font-family: 'Roboto', sans-serif; /* Font moderno per i dettagli di contatto */ + font-weight: bold; +} + +.contact-info a:hover { + color: #ff5722; /* Colore arancione per i link al passaggio del mouse */ +} + +.contact-info a:visited { + color: #666; /* Colore grigio scuro per i link visitati */ +} + +/* Stile per i titoli delle sezioni */ +h2 { + font-size: 24px; + color: #0056b3; + border-bottom: 1px solid #d1d1d1; + margin-bottom: 10px; + padding-bottom: 5px; + font-family: 'Montserrat', sans-serif; +} + +/* Stile per le esperienze professionali */ +h3 { + font-size: 20px; + color: #212529; + margin-top: 15px; + margin-bottom: 5px; +} + +em { + color: #555; +} + +ol, ul { + margin-left: 20px; + margin-bottom: 15px; +} + +li { + margin-bottom: 8px; + line-height: 1.5; +} + +/* Stile per le sezioni secondarie */ +p { + margin-bottom: 15px; +} + +b { + color: #212529; +} + +/* Stile per i link */ +a { + color: #0056b3; +} + +a:hover { + color: #ff5722; + text-decoration: underline; +} + +/* Responsività per schermi più piccoli */ +@media (max-width: 768px) { + header { + flex-direction: column; + text-align: center; + } + + .contact-info { + text-align: center; + margin-top: 10px; + } +} + +.markdown code { background:#f0f0f0; color:navy; border-radius:6px; padding:2px; } +.markdown pre { background:#f0f0f0; margin:12px; border:1px solid #ddd; padding:20px 12px; border-radius:6px; } +.markdown pre:hover button { display:block; } +.markdown pre button { display:none; position:relative; float:right; top:-16px } +.markdown blockquote { background:#f0f0f0; border-left:6px solid grey; padding:8px } +.markdown table { margin:12px; border-collapse: collapse; } +.markdown th { border:1px solid grey; background:lightgrey; padding:6px; } +.markdown td { border:1px solid grey; padding:6px; } +.markdown tr:nth-child(even) { background:#f0f0f0; } +.markdown ins { color:#890604 } +.markdown rem { color:#198964 } +.toc ul { padding: 0 12px; } +.toc h3 { color:#0057b7; border-bottom:1px dotted grey } +.toc .H1 { list-style-type:none; font-weight:600; margin:4px; background:#eee } +.toc .H2 { list-style-type:none; font-weight:600; margin:4px; } +.toc .H3 { margin-left:2em } +.toc .H4 { margin-left:4em } +.toc .active { color:#0057b7 } +.toc li:hover { background:#f0f0f0 } \ No newline at end of file diff --git a/utils.py b/utils.py index 1f2deb4..8ac6b44 100644 --- a/utils.py +++ b/utils.py @@ -1,6 +1,13 @@ +import json +import os import random import time +from selenium.common.exceptions import WebDriverException +from selenium.webdriver.chrome.service import Service as ChromeService from selenium import webdriver +import time +import glob +from webdriver_manager.chrome import ChromeDriverManager headless = False chromeProfilePath = r"/home/.config/google-chrome/linkedin_profile" @@ -43,6 +50,58 @@ def scroll_slow(driver, scrollable_element, start=0, end=3600, step=100, reverse except Exception as e: print(f"Exception occurred: {e}") + +def HTML_to_PDF(FilePath, Hide_Window=True): + # Validate and prepare file paths + if not os.path.isfile(FilePath): + raise FileNotFoundError(f"The specified file does not exist: {FilePath}") + + FilePath = f"file:///{os.path.abspath(FilePath).replace(os.sep, '/')}" + + # Set up Chrome options + chrome_options = webdriver.ChromeOptions() + if Hide_Window: + chrome_options.add_argument("--headless") # Run Chrome in headless mode + + # Initialize Chrome driver + service = ChromeService(ChromeDriverManager().install()) + driver = webdriver.Chrome(service=service, options=chrome_options) + + try: + # Load the HTML file + driver.get(FilePath) + + start_time = time.time() + pdf_base64 = driver.execute_cdp_cmd("Page.printToPDF", { + "printBackground": True, # Incluir los fondos en el PDF + "landscape": False, # Orientación vertical + "paperWidth": 10, # Ancho en pulgadas (Carta: 8.5) + "paperHeight": 11, # Alto en pulgadas (Carta: 11) + "marginTop": 0, # Márgenes en pulgadas + "marginBottom": 0, + "marginLeft": 0, + "marginRight": 0, + "displayHeaderFooter": False, # No mostrar encabezado y pie de página + "preferCSSPageSize": True, # Preferir el tamaño de página definido por CSS + "generateDocumentOutline": False, # No generar un índice en el PDF + "generateTaggedPDF": False, # No generar PDF accesible + "transferMode": "ReturnAsBase64" # Retornar el PDF como base64 + }) + + + # Check if PDF generation was successful + if time.time() - start_time > 120: + raise TimeoutError("PDF generation exceeded the specified timeout limit.") + # Return the base64-encoded PDF + return pdf_base64['data'] + + except WebDriverException as e: + raise RuntimeError(f"WebDriver exception occurred: {e}") + + finally: + # Ensure the driver is closed + driver.quit() + def chromeBrowserOptions(): options = webdriver.ChromeOptions() options.add_argument('--no-sandbox')