import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import pdfplumber
import re
import openpyxl
import os
from huggingface_hub import login

# Function to authenticate Hugging Face using token
def authenticate_hf(token):
    try:
        login(token)
        return "Authentication Successful"
    except Exception as e:
        return f"Error: {e}"

# Initialize the model and tokenizer
model_name = "Qwen/Qwen2.5-1.5B-Instruct"  # Replace with the actual model name
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Function to extract text from PDF
def extract_text_from_pdf(pdf_path):
    with pdfplumber.open(pdf_path) as pdf:
        text = ''
        for page in pdf.pages:
            text += page.extract_text()
    return text

# Function to parse the resume text for name, email, phone, and skills
def parse_resume(text):
    # Define the prompts for each type of information
    prompts = {
        "name": "Extract the name from this resume:\n",
        "email": "Extract the email address from this resume:\n",
        "phone": "Extract the phone number from this resume:\n",
        "skills": "Extract the technical skills from this resume:\n"
    }

    results = {}

    for key, prompt in prompts.items():
        # Generate model response for each field
        inputs = tokenizer(prompt + text, return_tensors="pt")
        outputs = model.generate(**inputs, max_length=50000)
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)

        if key == 'email':
            # Use regex to validate email format
            email = re.findall(r'\S+@\S+', response)
            results[key] = email[0] if email else None
        elif key == 'phone':
            # Use regex to validate phone number format
            phone = re.findall(r'\b\d{10,15}\b', response)
            results[key] = phone[0] if phone else None
        elif key == 'skills':
            # Extract technical skills
            results[key] = response
        else:
            results[key] = response
    
    return results

# Function to save parsed data to Excel file
def save_to_excel(parsed_data, output_file):
    wb = openpyxl.Workbook()
    ws = wb.active
    ws.append(["Name", "Email", "Phone", "Skills"])

    for data in parsed_data:
        ws.append([data["name"], data["email"], data["phone"], data["skills"]])
    
    wb.save(output_file)

# Function to process PDF files and output an Excel file
def process_pdfs(pdfs):
    parsed_data = []
    
    for pdf in pdfs:
        # Extract text from the PDF
        text = extract_text_from_pdf(pdf.name)
        
        # Parse the text for relevant details
        parsed_info = parse_resume(text)
        
        # Add parsed information to the list
        parsed_data.append(parsed_info)

    # Save the parsed data to an Excel file
    output_file = "parsed_resumes.xlsx"
    save_to_excel(parsed_data, output_file)

    return output_file

# Gradio interface setup with Hugging Face API token input
with gr.Blocks() as app:
    # Adding Hugging Face logo
    gr.Image("https://huggingface.co/front/assets/huggingface_logo.svg", label="Hugging Face Logo", width=150)

    gr.Markdown("### Hugging Face Authentication")
    
    # Input field for Hugging Face API token (blank space)
    hf_token = gr.Textbox(label="Hugging Face API Token", placeholder="Enter your Hugging Face token here", type="password", value="")
    login_button = gr.Button("Authenticate")
    auth_status = gr.Textbox(label="Authentication Status", interactive=False)
    
    # Authenticate Hugging Face model when button is clicked
    login_button.click(authenticate_hf, inputs=hf_token, outputs=auth_status)
    
    gr.Markdown("### Upload PDF Resumes")

    # File input to upload resumes (use "filepath" for type)
    pdfs_input = gr.File(file_count="multiple", type="filepath")
    output_file = gr.File()

    # Process the PDFs and parse them
    process_button = gr.Button("Process Resumes")
    process_button.click(process_pdfs, inputs=pdfs_input, outputs=output_file)

# Launch the app
app.launch()