Hammad712 commited on
Commit
2cc093f
·
verified ·
1 Parent(s): 05847c9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -71
app.py CHANGED
@@ -1,78 +1,31 @@
1
  import streamlit as st
2
- import os
3
- import json
4
- import tempfile
5
- from typing import List
6
- from pydantic import BaseModel
7
- from langchain_groq import ChatGroq
8
- from langchain.document_loaders import PyPDFLoader
9
 
10
- # Define the response schema using Pydantic
11
- class ExtractionResult(BaseModel):
12
- answers: List[str]
13
 
14
- # Function to initialize the LLM
15
- def get_llm(api_key: str):
16
- return ChatGroq(
17
- model="llama-3.3-70b-versatile",
18
- temperature=0,
19
- max_tokens=1024,
20
- api_key=api_key
21
- )
22
 
23
- # Function to process the uploaded PDF and extract text
24
- def process_pdf(file) -> str:
25
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
26
- tmp_file.write(file.read())
27
- tmp_path = tmp_file.name
28
-
29
- loader = PyPDFLoader(tmp_path)
30
- pages = loader.load_and_split()
31
- os.remove(tmp_path)
32
- all_page_content = "\n".join(page.page_content for page in pages)
33
- return all_page_content
34
-
35
- # Build the prompt using the JSON schema from ExtractionResult
36
- def build_prompt(all_page_content: str) -> str:
37
- schema_dict = ExtractionResult.model_json_schema()
38
- schema = json.dumps(schema_dict, indent=2)
39
- system_message = (
40
- "You are a document analysis tool that extracts the options and correct answers from the provided document content. "
41
- "The output must be a JSON object that strictly follows the schema: " + schema
42
- )
43
- user_message = (
44
- "Please extract the correct answers and options (A, B, C, D, E) from the following document content:\n\n"
45
- + all_page_content
46
- )
47
- return system_message + "\n\n" + user_message
48
-
49
- def main():
50
- st.title("PDF Answer Extraction App")
51
- st.write("Upload a PDF document to extract the correct answers and options.")
52
-
53
- # Retrieve API key from Streamlit secrets or environment variables
54
- api_key = st.secrets.get("GROQ_API_KEY") or os.getenv("GROQ_API_KEY")
55
- if not api_key:
56
- st.error("GROQ API key not found! Please set it in your environment or Streamlit secrets.")
57
- st.stop()
58
-
59
- # Initialize the language model
60
- llm = get_llm(api_key)
61
-
62
- uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf"])
63
-
64
- if uploaded_file is not None:
65
- with st.spinner("Processing the PDF..."):
66
  try:
67
- all_page_content = process_pdf(uploaded_file)
68
- prompt = build_prompt(all_page_content)
69
- response = llm.invoke(prompt, response_format={"type": "json_object"})
70
- result = ExtractionResult.model_validate_json(response.content)
71
-
72
- st.success("Extraction complete!")
73
- st.json(result.model_dump())
 
 
 
 
 
 
 
 
 
74
  except Exception as e:
75
  st.error(f"An error occurred: {e}")
76
-
77
- if __name__ == "__main__":
78
- main()
 
1
  import streamlit as st
2
+ import requests
 
 
 
 
 
 
3
 
4
+ st.title("PDF Extraction App")
5
+ st.write("Upload a PDF file to extract correct answers and options using the backend service.")
 
6
 
7
+ # File uploader widget for PDF files
8
+ uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf"])
 
 
 
 
 
 
9
 
10
+ if uploaded_file is not None:
11
+ if st.button("Extract Answers"):
12
+ with st.spinner("Processing the file, please wait..."):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  try:
14
+ # Prepare the file payload
15
+ files = {
16
+ "file": (uploaded_file.name, uploaded_file.read(), "application/pdf")
17
+ }
18
+ # Make a POST request to the FastAPI endpoint
19
+ response = requests.post(
20
+ "https://hammad712-grading.hf.space/extract-answers/",
21
+ files=files
22
+ )
23
+ # Check for successful response
24
+ if response.status_code == 200:
25
+ result = response.json()
26
+ st.success("Extraction successful!")
27
+ st.json(result)
28
+ else:
29
+ st.error(f"Error: {response.text}")
30
  except Exception as e:
31
  st.error(f"An error occurred: {e}")