DreamStream-1 commited on
Commit
f96439f
·
verified ·
1 Parent(s): c4c5b60

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +127 -120
app.py CHANGED
@@ -1,127 +1,134 @@
1
  import gradio as gr
2
- import requests
3
  import os
4
- import csv
5
- from sentence_transformers import util
6
-
7
- # Set up API endpoint and API Key
8
- api_key = os.getenv("GOOGLE_API_KEY") # Store your API Key in environment variables
9
- api_url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1-5-flash:embedText" # Updated for Gemini 1.5 Flash model
10
-
11
- headers = {
12
- "Authorization": f"Bearer {api_key}",
13
- "Content-Type": "application/json"
14
- }
15
-
16
- def get_gemini_embeddings(text):
17
- data = {
18
- "model": "gemini-1-5-flash", # Use the Gemini 1.5 Flash model
19
- "text": text
20
- }
21
-
22
- # Send POST request to Gemini API
23
- response = requests.post(api_url, headers=headers, json=data)
24
-
25
- if response.status_code == 200:
26
- response_data = response.json()
27
- return response_data.get("embeddings", [])
 
 
 
 
 
 
 
28
  else:
29
- print(f"Error: {response.status_code} - {response.text}")
30
- return []
31
-
32
- def extract_text_from_resume(resume_file):
33
- # Extract text from resume (for example, using PyPDF2 or textract for PDFs)
34
- # This placeholder should be replaced with actual code for resume text extraction
35
- return "Sample resume text"
36
-
37
- def extract_leadership_experience(resume_text):
38
- # Logic to extract leadership experience from resume text
39
- return "Leadership Experience Example"
40
-
41
- def extract_entities_via_gemini(resume_text):
42
- # Logic to extract named entities (e.g., Name, Email, Contact) using Gemini API
43
- return {"name": "John Doe", "email": "john.doe@example.com", "contact": "123-456-7890"}
44
-
45
- def save_results_to_csv(results):
46
- csv_file_path = "/tmp/results.csv"
47
- with open(csv_file_path, mode='w', newline='') as file:
48
- writer = csv.writer(file)
49
- writer.writerow(["Resume Name", "Similarity Score (%)", "Eligibility", "Name", "Leadership Experience", "Email", "Contact"])
50
- for result in results:
51
- writer.writerow(result)
52
- return csv_file_path
53
-
54
- def check_similarity(job_description, resume_files):
55
- results = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
- # Get embeddings for the job description using Gemini 1.5 Flash
58
- job_emb = get_gemini_embeddings(job_description)
59
-
60
- if not job_emb:
61
- # In case of an error, return an empty DataFrame and an error message
62
- return [["Error in embedding job description using Gemini 1.5 Flash API."]], None
63
-
64
- for resume_file in resume_files:
65
- resume_text = extract_text_from_resume(resume_file)
66
- if not resume_text:
67
- results.append([resume_file.name, 0, "Not Eligible", None, "No leadership experience", "No Email", "No Contact"])
68
- continue
69
-
70
- # Get embeddings for the resume using Gemini 1.5 Flash
71
- resume_emb = get_gemini_embeddings(resume_text)
72
-
73
- if not resume_emb:
74
- results.append([resume_file.name, 0, "Not Eligible", None, "No leadership experience", "No Email", "No Contact"])
75
  continue
76
-
77
- # Calculate similarity score between job description and resume
78
- similarity_score = util.pytorch_cos_sim(job_emb, resume_emb)[0][0].item()
79
- similarity_percentage = similarity_score * 100
80
-
81
- leadership_experience = extract_leadership_experience(resume_text)
82
- contact_info = extract_entities_via_gemini(resume_text)
83
-
84
- if similarity_score >= 0.50:
85
- candidate_name = contact_info.get('name', 'Unknown Candidate')
86
- results.append([
87
- resume_file.name,
88
- similarity_percentage,
89
- "Eligible",
90
- candidate_name,
91
- leadership_experience,
92
- contact_info.get('email', 'No Email'),
93
- contact_info.get('contact', 'No Contact')
94
- ])
95
- else:
96
- results.append([
97
- resume_file.name,
98
- similarity_percentage,
99
- "Not Eligible",
100
- None,
101
- leadership_experience,
102
- contact_info.get('email', 'No Email'),
103
- contact_info.get('contact', 'No Contact')
104
- ])
105
-
106
- # Save results to CSV and return them
107
- csv_file_path = save_results_to_csv(results)
108
- return results, csv_file_path
109
-
110
- # Gradio UI
111
- with gr.Blocks() as demo:
112
- with gr.Row():
113
- job_desc_input = gr.Textbox(label="Job Description", lines=3)
114
- resume_input = gr.Files(label="Upload Resumes", file_count="multiple", file_types=[".pdf", ".txt"])
115
-
116
- results_output = gr.Dataframe(headers=["Resume Name", "Similarity Score (%)", "Eligibility", "Name", "Leadership Experience", "Email", "Contact"])
117
-
118
- check_button = gr.Button("Check Similarity")
119
-
120
- # Set up button's action
121
- check_button.click(
122
- check_similarity,
123
- inputs=[job_desc_input, resume_input],
124
- outputs=[results_output, gr.File(label="Download CSV")]
125
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
- demo.launch()
 
1
  import gradio as gr
2
+ from sentence_transformers import SentenceTransformer, util
3
  import os
4
+ from PyPDF2 import PdfReader
5
+ import docx
6
+ import re
7
+ import google.generativeai as genai
8
+ from sklearn.feature_extraction.text import TfidfVectorizer
9
+ from sklearn.metrics.pairwise import cosine_similarity
10
+
11
+ # Load pre-trained embedding model for basic analysis
12
+ sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
13
+
14
+ # Configure Google API for Gemini 1.5 Flash
15
+ api_key = os.getenv('GOOGLE_API_KEY')
16
+ if not api_key:
17
+ raise ValueError("Google API key not found. Please set GOOGLE_API_KEY.")
18
+ genai.configure(api_key=api_key)
19
+
20
+ # Maximum resumes to process
21
+ MAX_RESUMES = 10
22
+
23
+ # Helper Functions
24
+ def extract_text_from_file(file_path):
25
+ ext = os.path.splitext(file_path)[1].lower()
26
+ if ext == ".txt":
27
+ with open(file_path, 'r', encoding='utf-8') as f:
28
+ return f.read()
29
+ elif ext == ".pdf":
30
+ reader = PdfReader(file_path)
31
+ return "".join(page.extract_text() for page in reader.pages)
32
+ elif ext == ".docx":
33
+ doc = docx.Document(file_path)
34
+ return " ".join(para.text for para in doc.paragraphs)
35
  else:
36
+ return ""
37
+
38
+ def calculate_similarity(resume_text, job_desc):
39
+ resume_emb = sentence_model.encode(resume_text, convert_to_tensor=True)
40
+ job_emb = sentence_model.encode(job_desc, convert_to_tensor=True)
41
+ similarity = util.pytorch_cos_sim(resume_emb, job_emb)[0][0].item()
42
+ return round(similarity * 100, 2)
43
+
44
+ def calculate_match_percentage(resume_text, job_desc):
45
+ docs = [resume_text, job_desc]
46
+ vectorizer = TfidfVectorizer(stop_words="english")
47
+ tfidf_matrix = vectorizer.fit_transform(docs)
48
+ cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
49
+ return round(cosine_sim[0][0] * 100, 2)
50
+
51
+ def analyze_with_gemini(resume_text, job_desc):
52
+ prompt = f"""
53
+ Analyze the resume with respect to the job description.
54
+ Resume: {resume_text}
55
+ Job Description: {job_desc}
56
+ Extract:
57
+ 1. Candidate Name
58
+ 2. Relevant Skills
59
+ 3. Educational Background
60
+ 4. Team Leadership Experience (years)
61
+ 5. Management Experience (years)
62
+ 6. Match Percentage
63
+ Provide a summary of qualifications in 5 bullet points.
64
+ """
65
+ response = genai.GenerativeModel('gemini-1.5-flash').generate_content(prompt)
66
+ return response.text.strip()
67
+
68
+ def process_resumes(job_desc_file, resumes):
69
+ if not job_desc_file or not resumes:
70
+ return "Please upload a job description and resumes for analysis."
71
+
72
+ if len(resumes) > MAX_RESUMES:
73
+ return f"Please upload no more than {MAX_RESUMES} resumes."
74
+
75
+ # Load job description text
76
+ job_desc = extract_text_from_file(job_desc_file)
77
 
78
+ results = []
79
+ for resume in resumes:
80
+ resume_text = extract_text_from_file(resume.name)
81
+
82
+ if not resume_text.strip():
83
+ results.append({
84
+ "Resume": resume.name,
85
+ "Similarity (Embed)": 0.0,
86
+ "Match Percentage (TF-IDF)": 0.0,
87
+ "Gemini Analysis": "Failed to extract text from resume."
88
+ })
 
 
 
 
 
 
 
89
  continue
90
+
91
+ # Calculate similarity using embeddings
92
+ embed_similarity = calculate_similarity(resume_text, job_desc)
93
+
94
+ # Calculate match percentage using TF-IDF
95
+ tfidf_match = calculate_match_percentage(resume_text, job_desc)
96
+
97
+ # Detailed analysis with Gemini API
98
+ try:
99
+ gemini_analysis = analyze_with_gemini(resume_text, job_desc)
100
+ except Exception as e:
101
+ gemini_analysis = f"Gemini analysis failed: {str(e)}"
102
+
103
+ results.append({
104
+ "Resume": resume.name,
105
+ "Similarity (Embed)": embed_similarity,
106
+ "Match Percentage (TF-IDF)": tfidf_match,
107
+ "Gemini Analysis": gemini_analysis
108
+ })
109
+
110
+ # Format results for display
111
+ output = "\n\n".join(
112
+ f"**{res['Resume']}**\n"
113
+ f"Similarity (Embed): {res['Similarity (Embed)']}%\n"
114
+ f"Match Percentage (TF-IDF): {res['Match Percentage (TF-IDF)']}%\n"
115
+ f"Gemini Analysis:\n{res['Gemini Analysis']}\n"
116
+ for res in results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  )
118
+ return output
119
+
120
+ # Gradio Interface
121
+ job_desc_input = gr.File(label="Upload Job Description (TXT, PDF, DOCX)", type="filepath")
122
+ resumes_input = gr.Files(label="Upload Resumes (TXT, PDF, DOCX)", type="file")
123
+
124
+ results_output = gr.Textbox(label="Analysis Results", lines=30)
125
+
126
+ interface = gr.Interface(
127
+ fn=process_resumes,
128
+ inputs=[job_desc_input, resumes_input],
129
+ outputs=[results_output],
130
+ title="Resume Analysis with Gemini API",
131
+ description="Upload a job description and resumes to evaluate candidates' match."
132
+ )
133
 
134
+ interface.launch()