Spaces:

Prernas19
/

resume_parser

Sleeping

App Files Files Community

Prernas19 commited on Aug 10, 2024

Commit

51593d8

verified ·

1 Parent(s): 7eb897b

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -28

app.py CHANGED Viewed

@@ -12,7 +12,6 @@ download("en_core_web_sm")
 # Load the spaCy model
 nlp = spacy.load("en_core_web_sm")
 # Set of English words
 nltk.download('words', quiet=True)
 english_words = set(words.words())
@@ -40,37 +39,32 @@ def extract_text_from_docx(file):
     return "\n".join([para.text for para in doc.paragraphs])
 def extract_companies(text):
-    # Process the text with the spaCy model
     doc = nlp(text)
     companies = []
-    # Define a regex pattern for common company name suffixes
     company_pattern = re.compile(
         r'\b(?:Inc|Corp|LLC|Ltd|Co|Company|Group|Services|Technologies|Pvt|Solutions|Consulting)\b', re.IGNORECASE)
-    # Iterate over the identified entities in the text
     for ent in doc.ents:
-        if ent.label_ == "ORG":
-            # Apply the regex pattern to filter out company names
-            if company_pattern.search(ent.text):
-                companies.append(ent.text)
-    return companies
 def extract_colleges(text):
     doc = nlp(text)
     colleges = []
-    # Extended list of education-related keywords
     edu_keywords = ["university", "college", "institute", "school", "academy", "polytechnic", "faculty", "department", "center", "centre", "campus", "educational", "institute of technology"]
     for sent in doc.sents:
-        # Extract entities labeled as ORG and check if they contain education-related keywords
         edu_ents = [ent for ent in sent.ents if ent.label_ == "ORG" and any(keyword in ent.text.lower() for keyword in edu_keywords)]
         for edu in edu_ents:
             colleges.append(edu.text)
-    return colleges
 def extract_years_of_experience(text):
     years = re.findall(r'(\d+)\s+year[s]*', text, re.IGNORECASE)
@@ -131,28 +125,27 @@ def parse_resume(file):
         summary = extract_summary(doc)
         linkedin = extract_linkedin(text)
-        result = {
-            "Companies Worked For": companies,
-            "Colleges Attended": colleges,
-            "Years of Experience": years_of_experience,
-            "Phone Number": phone,
-            "Email ID": email,
-            "Summary": summary,
-            "LinkedIn ID": linkedin
-        }
-        return result
     except Exception as e:
         import traceback
-        return {"Error": f"An error occurred while parsing the resume: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"}
-# Create Gradio interface
 iface = gr.Interface(
     fn=parse_resume,
     inputs=gr.File(label="Upload Resume (PDF or DOCX)"),
-    outputs=gr.JSON(label="Extracted Information"),
     title="Advanced Resume Parser",
     description="Upload a resume in PDF or DOCX format to extract key information."
 )
-iface.launch(share=True)

 # Load the spaCy model
 nlp = spacy.load("en_core_web_sm")
 # Set of English words
 nltk.download('words', quiet=True)
 english_words = set(words.words())
     return "\n".join([para.text for para in doc.paragraphs])
 def extract_companies(text):
     doc = nlp(text)
     companies = []
     company_pattern = re.compile(
         r'\b(?:Inc|Corp|LLC|Ltd|Co|Company|Group|Services|Technologies|Pvt|Solutions|Consulting)\b', re.IGNORECASE)
     for ent in doc.ents:
+        if ent.label_ == "ORG" and company_pattern.search(ent.text):
+            companies.append(ent.text)
+    # Join companies with new lines
+    return "\n".join(companies)
 def extract_colleges(text):
     doc = nlp(text)
     colleges = []
     edu_keywords = ["university", "college", "institute", "school", "academy", "polytechnic", "faculty", "department", "center", "centre", "campus", "educational", "institute of technology"]
     for sent in doc.sents:
         edu_ents = [ent for ent in sent.ents if ent.label_ == "ORG" and any(keyword in ent.text.lower() for keyword in edu_keywords)]
         for edu in edu_ents:
             colleges.append(edu.text)
+    # Join colleges with new lines
+    return "\n".join(colleges)
 def extract_years_of_experience(text):
     years = re.findall(r'(\d+)\s+year[s]*', text, re.IGNORECASE)
         summary = extract_summary(doc)
         linkedin = extract_linkedin(text)
+        return companies, colleges, years_of_experience, phone, email, summary, linkedin
     except Exception as e:
         import traceback
+        return f"An error occurred while parsing the resume: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
+# Create Gradio interface with separate output components
 iface = gr.Interface(
     fn=parse_resume,
     inputs=gr.File(label="Upload Resume (PDF or DOCX)"),
+    outputs=[
+        gr.Textbox(label="Companies Worked For", lines=10),
+        gr.Textbox(label="Colleges Attended", lines=10),
+        gr.Textbox(label="Years of Experience"),
+        gr.Textbox(label="Phone Number"),
+        gr.Textbox(label="Email ID"),
+        gr.Textbox(label="Summary", lines=3),
+        gr.Textbox(label="LinkedIn ID")
+    ],
     title="Advanced Resume Parser",
     description="Upload a resume in PDF or DOCX format to extract key information."
 )
+iface.launch(share=True)