Spaces:

Hammad712
/

grading-app

Sleeping

App Files Files Community

Hammad712 commited on Mar 4

Commit

05847c9

verified ·

1 Parent(s): 86757af

Create app.py

Browse files

Files changed (1) hide show

app.py +78 -0

app.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import streamlit as st
+import os
+import json
+import tempfile
+from typing import List
+from pydantic import BaseModel
+from langchain_groq import ChatGroq
+from langchain.document_loaders import PyPDFLoader
+# Define the response schema using Pydantic
+class ExtractionResult(BaseModel):
+    answers: List[str]
+# Function to initialize the LLM
+def get_llm(api_key: str):
+    return ChatGroq(
+        model="llama-3.3-70b-versatile",
+        temperature=0,
+        max_tokens=1024,
+        api_key=api_key
+    )
+# Function to process the uploaded PDF and extract text
+def process_pdf(file) -> str:
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
+        tmp_file.write(file.read())
+        tmp_path = tmp_file.name
+    loader = PyPDFLoader(tmp_path)
+    pages = loader.load_and_split()
+    os.remove(tmp_path)
+    all_page_content = "\n".join(page.page_content for page in pages)
+    return all_page_content
+# Build the prompt using the JSON schema from ExtractionResult
+def build_prompt(all_page_content: str) -> str:
+    schema_dict = ExtractionResult.model_json_schema()
+    schema = json.dumps(schema_dict, indent=2)
+    system_message = (
+        "You are a document analysis tool that extracts the options and correct answers from the provided document content. "
+        "The output must be a JSON object that strictly follows the schema: " + schema
+    )
+    user_message = (
+        "Please extract the correct answers and options (A, B, C, D, E) from the following document content:\n\n"
+        + all_page_content
+    )
+    return system_message + "\n\n" + user_message
+def main():
+    st.title("PDF Answer Extraction App")
+    st.write("Upload a PDF document to extract the correct answers and options.")
+    # Retrieve API key from Streamlit secrets or environment variables
+    api_key = st.secrets.get("GROQ_API_KEY") or os.getenv("GROQ_API_KEY")
+    if not api_key:
+        st.error("GROQ API key not found! Please set it in your environment or Streamlit secrets.")
+        st.stop()
+    # Initialize the language model
+    llm = get_llm(api_key)
+    uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf"])
+    if uploaded_file is not None:
+        with st.spinner("Processing the PDF..."):
+            try:
+                all_page_content = process_pdf(uploaded_file)
+                prompt = build_prompt(all_page_content)
+                response = llm.invoke(prompt, response_format={"type": "json_object"})
+                result = ExtractionResult.model_validate_json(response.content)
+                st.success("Extraction complete!")
+                st.json(result.model_dump())
+            except Exception as e:
+                st.error(f"An error occurred: {e}")
+if __name__ == "__main__":
+    main()