import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline import pdfplumber import re import openpyxl import os from huggingface_hub import login # Function to authenticate Hugging Face using token def authenticate_hf(token): try: login(token) return "Authentication Successful" except Exception as e: return f"Error: {e}" # Initialize the model and tokenizer model_name = "Qwen/Qwen2.5-1.5B-Instruct" # Replace with the actual model name tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) # Function to extract text from PDF def extract_text_from_pdf(pdf_path): with pdfplumber.open(pdf_path) as pdf: text = '' for page in pdf.pages: text += page.extract_text() return text # Function to parse the resume text for name, email, phone, and skills def parse_resume(text): # Define the prompts for each type of information prompts = { "name": "Extract the name from this resume:\n", "email": "Extract the email address from this resume:\n", "phone": "Extract the phone number from this resume:\n", "skills": "Extract the technical skills from this resume:\n" } results = {} for key, prompt in prompts.items(): # Generate model response for each field inputs = tokenizer(prompt + text, return_tensors="pt") outputs = model.generate(**inputs, max_length=50000) response = tokenizer.decode(outputs[0], skip_special_tokens=True) if key == 'email': # Use regex to validate email format email = re.findall(r'\S+@\S+', response) results[key] = email[0] if email else None elif key == 'phone': # Use regex to validate phone number format phone = re.findall(r'\b\d{10,15}\b', response) results[key] = phone[0] if phone else None elif key == 'skills': # Extract technical skills results[key] = response else: results[key] = response return results # Function to save parsed data to Excel file def save_to_excel(parsed_data, output_file): wb = openpyxl.Workbook() ws = wb.active ws.append(["Name", "Email", "Phone", "Skills"]) for data in parsed_data: ws.append([data["name"], data["email"], data["phone"], data["skills"]]) wb.save(output_file) # Function to process PDF files and output an Excel file def process_pdfs(pdfs): parsed_data = [] for pdf in pdfs: # Extract text from the PDF text = extract_text_from_pdf(pdf.name) # Parse the text for relevant details parsed_info = parse_resume(text) # Add parsed information to the list parsed_data.append(parsed_info) # Save the parsed data to an Excel file output_file = "parsed_resumes.xlsx" save_to_excel(parsed_data, output_file) return output_file # Gradio interface setup with Hugging Face API token input with gr.Blocks() as app: # Adding Hugging Face logo gr.Image("https://huggingface.co/front/assets/huggingface_logo.svg", label="Hugging Face Logo", width=150) gr.Markdown("### Hugging Face Authentication") # Input field for Hugging Face API token (blank space) hf_token = gr.Textbox(label="Hugging Face API Token", placeholder="Enter your Hugging Face token here", type="password", value="") login_button = gr.Button("Authenticate") auth_status = gr.Textbox(label="Authentication Status", interactive=False) # Authenticate Hugging Face model when button is clicked login_button.click(authenticate_hf, inputs=hf_token, outputs=auth_status) gr.Markdown("### Upload PDF Resumes") # File input to upload resumes (use "filepath" for type) pdfs_input = gr.File(file_count="multiple", type="filepath") output_file = gr.File() # Process the PDFs and parse them process_button = gr.Button("Process Resumes") process_button.click(process_pdfs, inputs=pdfs_input, outputs=output_file) # Launch the app app.launch()