import gradio as gr import spacy import requests from bs4 import BeautifulSoup import PyPDF2 import subprocess import sys import time # Ensure the language model is downloaded try: nlp = spacy.load("en_core_web_sm") except OSError: subprocess.run([sys.executable, "-m", "spacy", "download", "en_core_web_sm"]) nlp = spacy.load("en_core_web_sm") # Extract text from PDF def extract_text_from_pdf(pdf_file): pdf_reader = PyPDF2.PdfReader(pdf_file) text = "" for page in pdf_reader.pages: text += page.extract_text() or "" return text # Extract skills and location from text def extract_skills_and_location(text): doc = nlp(text) skills = [] location = None skill_keywords = ['Python', 'Data Analysis', 'Machine Learning', 'SQL', 'Java', 'Project Management'] for token in doc: if token.text in skill_keywords and token.text not in skills: skills.append(token.text) for ent in doc.ents: if ent.label_ == 'GPE': # GPE = Geopolitical Entity (e.g., city, country) location = ent.text break return skills, location # Web scraper function for job listings def fetch_job_listings(job_title, location, retries=3): base_url = f"https://www.examplejobboard.com/jobs?query={job_title}&location={location}" headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36' } for attempt in range(retries): try: response = requests.get(base_url, headers=headers, timeout=10) response.raise_for_status() break # Exit loop if request is successful except requests.RequestException as e: if attempt < retries - 1: # If not the last attempt time.sleep(2) # Wait before retrying continue # Retry the request return f"Failed to retrieve job listings. Error: {str(e)}" soup = BeautifulSoup(response.text, 'html.parser') job_cards = soup.find_all('div', class_='job-card')[:5] # Adjust to match the website's structure if not job_cards: return "No job listings found." job_listings = [] for job in job_cards: title_tag = job.find('h2', class_='job-title') company_tag = job.find('div', class_='company') location_tag = job.find('span', class_='job-location') description_tag = job.find('p', class_='job-description') date_posted_tag = job.find('time', class_='date-posted') job_info = { 'Title': title_tag.text.strip() if title_tag else 'N/A', 'Company': company_tag.text.strip() if company_tag else 'N/A', 'Location': location_tag.text.strip() if location_tag else 'N/A', 'Description': description_tag.text.strip() if description_tag else 'N/A', 'Date Posted': date_posted_tag.text.strip() if date_posted_tag else 'N/A', 'URL': title_tag.a['href'] if title_tag and title_tag.a else '' } job_listings.append(job_info) return job_listings # Main function to handle resume processing def process_resume(file): resume_text = extract_text_from_pdf(file) skills, location = extract_skills_and_location(resume_text) job_title = skills[0] if skills else "Software Engineer" job_listings = fetch_job_listings(job_title, location if location else "India") html_content = "
{', '.join(skills)}
" if skills else "No specific skills found.
" html_content += "{location}
" if location else "Location not identified.
" html_content += "{job_listings}
" else: for job in job_listings: html_content += f"""Company: {job['Company']}
Location: {job['Location']}
Date Posted: {job['Date Posted']}
{job['Description']}