Ahmad-Moiz commited on
Commit
1460310
1 Parent(s): 915e8b3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -18
app.py CHANGED
@@ -1,17 +1,15 @@
1
  import streamlit as st
2
  from dotenv import load_dotenv
3
  import os
4
- from pathlib import Path
5
- from typing import Any, Dict, List, Optional
6
  from llama_index.llama_pack.base import BaseLlamaPack
7
- from llama_index.readers import PDFReader
8
  from llama_index.llms.base import LLM
9
  from llama_index.llms import OpenAI
10
  from llama_index import ServiceContext
11
  from llama_index.schema import NodeWithScore
12
  from llama_index.response_synthesizers import TreeSummarize
13
  from pydantic import BaseModel
14
- import pdfplumber
15
  import io
16
 
17
  # Load environment variables from .env file
@@ -55,7 +53,6 @@ class ResumeScreenerPack(BaseLlamaPack):
55
  criteria: List[str] = [],
56
  llm: Optional[LLM] = None
57
  ) -> None:
58
- self.reader = PDFReader()
59
  llm = llm or OpenAI(model="gpt-4", api_key=openai_api_key)
60
  service_context = ServiceContext.from_defaults(llm=llm)
61
  criteria_str = _format_criteria_str(criteria)
@@ -66,16 +63,15 @@ class ResumeScreenerPack(BaseLlamaPack):
66
  output_cls=ResumeScreenerDecision, service_context=service_context
67
  )
68
 
69
- def get_modules(self) -> Dict[str, Any]:
70
  """Get modules."""
71
- return {"reader": self.reader, "synthesizer": self.synthesizer}
72
 
73
- def run(self, resume_path: str, *args: Any, **kwargs: Any) -> Any:
74
  """Run pack."""
75
- docs = self.reader.load_data(Path(resume_path))
76
  output = self.synthesizer.synthesize(
77
  query=self.query,
78
- nodes=[NodeWithScore(node=doc, score=1.0) for doc in docs],
79
  )
80
  return output.response
81
 
@@ -103,14 +99,13 @@ def main():
103
  def extract_text_from_pdf(uploaded_file):
104
  if uploaded_file is not None:
105
  try:
106
- # Read PDF content from BytesIO
107
- uploaded_content = io.BytesIO(uploaded_file.read())
108
-
109
- with pdfplumber.open(uploaded_content) as pdf:
110
- text = ""
111
- for page in pdf.pages:
112
- text += page.extract_text()
113
- return text
114
  except Exception as e:
115
  st.error(f"Error extracting text from PDF: {str(e)}")
116
  return ""
 
1
  import streamlit as st
2
  from dotenv import load_dotenv
3
  import os
4
+ from typing import Any, List, Optional
 
5
  from llama_index.llama_pack.base import BaseLlamaPack
 
6
  from llama_index.llms.base import LLM
7
  from llama_index.llms import OpenAI
8
  from llama_index import ServiceContext
9
  from llama_index.schema import NodeWithScore
10
  from llama_index.response_synthesizers import TreeSummarize
11
  from pydantic import BaseModel
12
+ import PyPDF2
13
  import io
14
 
15
  # Load environment variables from .env file
 
53
  criteria: List[str] = [],
54
  llm: Optional[LLM] = None
55
  ) -> None:
 
56
  llm = llm or OpenAI(model="gpt-4", api_key=openai_api_key)
57
  service_context = ServiceContext.from_defaults(llm=llm)
58
  criteria_str = _format_criteria_str(criteria)
 
63
  output_cls=ResumeScreenerDecision, service_context=service_context
64
  )
65
 
66
+ def get_modules(self) -> dict:
67
  """Get modules."""
68
+ return {"synthesizer": self.synthesizer}
69
 
70
+ def run(self, resume_text: str) -> Any:
71
  """Run pack."""
 
72
  output = self.synthesizer.synthesize(
73
  query=self.query,
74
+ nodes=[NodeWithScore(node=resume_text, score=1.0)],
75
  )
76
  return output.response
77
 
 
99
  def extract_text_from_pdf(uploaded_file):
100
  if uploaded_file is not None:
101
  try:
102
+ # Read PDF content using PyPDF2
103
+ pdf_reader = PyPDF2.PdfFileReader(uploaded_file)
104
+ text = ""
105
+ for page_num in range(pdf_reader.numPages):
106
+ page = pdf_reader.getPage(page_num)
107
+ text += page.extractText()
108
+ return text
 
109
  except Exception as e:
110
  st.error(f"Error extracting text from PDF: {str(e)}")
111
  return ""