Spaces:

brunogreen25
/

AILearningBuddy

Runtime error

App Files Files Community

brunogreen25 commited on 15 days ago

Commit

a5162e0

•

1 Parent(s): ea667dd

Upload 4 files

Browse files

Files changed (4) hide show

.env-sample +1 -0
app.py +182 -0
prompt_generation.py +117 -0
utils.py +18 -0

.env-sample ADDED Viewed

	@@ -0,0 +1 @@


1	+ OPENAI_API_KEY=""

app.py ADDED Viewed

	@@ -0,0 +1,182 @@

+import streamlit as st
+from streamlit_chat import message
+from utils import PAGE, read_pdf
+from prompt_generation import OpenAILLM
+from dotenv import load_dotenv
+load_dotenv()
+def init():
+    if 'current_page' not in st.session_state:
+        st.session_state.current_page = PAGE.MAIN
+        st.session_state.mcq_question_number = 3
+        st.session_state.llm = OpenAILLM(mcq_question_number=st.session_state.mcq_question_number)
+        st.session_state.chat_start = False
+        st.session_state.chat_messages = []
+    # Setting page title and header
+    st.set_page_config(page_title="AILearningBuddy", page_icon=":book:")
+    st.markdown("<h1 style='text-align: center;'>AI Learning Buddy</h1>", unsafe_allow_html=True)
+def main_page():
+    # Header
+    st.header("Main Page")
+    # Upload docs
+    file = st.file_uploader("Upload documents", type=["pdf", "txt"])
+    MAX_FILE_SIZE = 2 * 1024 * 1024  # 2 MB
+    if file is not None:
+        # Check the file size
+        file_size = file.size
+        if file_size > MAX_FILE_SIZE:
+            st.error(f"File size should not exceed {MAX_FILE_SIZE / (1024 * 1024)} MB. Please upload a smaller file.")
+        else:
+            st.success("File uploaded successfully!")
+            # Read file based on its type
+            if file.type == "application/pdf":
+                text = read_pdf(file)
+                st.session_state.llm.upload_text(text)
+            elif file.type == "text/plain":
+                text = file.read().decode("utf-8")
+                st.session_state.llm.upload_text(text)
+            else:
+                st.error("Unsupported file type.")
+    # Display buttons if file is uploaded
+    if st.session_state.llm.is_text_uploaded():
+        col1, col2 = st.columns([1, 1])
+        with col1:
+            st.markdown("<h4>LEARN</h4>", unsafe_allow_html=True)
+            if st.button("Create summary", key="summary_button"):
+                st.session_state.current_page = PAGE.SUMMARY
+                st.rerun()
+            if st.button("Chat about the file", key="chat_button"):
+                st.session_state.current_page = PAGE.CHAT
+                st.session_state.chat_start = True
+                st.rerun()
+        with col2:
+            st.markdown("<h4>TEST</h4>", unsafe_allow_html=True)
+            if st.button("Create quiz", key="mcq_button"):
+                st.session_state.current_page = PAGE.MCQ
+                st.session_state.current_question = 0
+                st.rerun()
+def summary_page():
+    # Header
+    if st.button(":back: Main Page"):
+        st.session_state.current_page = PAGE.MAIN
+        st.session_state.llm.empty_text()
+        st.rerun()
+    st.header("Summary")
+    # Get the summary
+    summary = st.session_state.llm.get_text_summary()
+    # Write summary
+    st.write(summary)
+def chat_page():
+    # Header
+    if st.button(":back: Main Page"):
+        st.session_state.current_page = PAGE.MAIN
+        st.session_state.chat_start = False
+        st.session_state.chat_messages = []
+        st.session_state.llm.empty_text()
+        st.rerun()
+    st.header("Chat About the Document")
+    # Response and user container
+    response_container = st.container()
+    user_container = st.container()
+    with user_container:
+        with st.form(key='my_form', clear_on_submit=True):
+            user_input = st.text_area("Type here:", key='input', height=100)
+            send_button = st.form_submit_button(label='Send')
+            if send_button or st.session_state.chat_start:
+                # Get the model response, and save it
+                if st.session_state.chat_start:
+                    user_input, model_response = st.session_state.llm.start_chat()
+                    st.session_state.chat_start = False
+                else:
+                    model_response = st.session_state.llm.get_chat_response(user_input)
+                st.session_state.chat_messages += [user_input, model_response]
+            # Display chat messages
+            with response_container:
+                for i in range(1, len(st.session_state.chat_messages)):
+                    if i % 2:
+                        message(st.session_state.chat_messages[i], key=f'{str(i)}_AI', avatar_style="pixel-art")
+                    else:
+                        message(st.session_state.chat_messages[i], is_user=True, key=f'{str(i)}_user',
+                                avatar_style="adventurer-neutral")
+def mcq_page():
+    # Header
+    if st.button(":back: Main Page"):
+        st.session_state.current_page = PAGE.MAIN
+        st.session_state.current_question = 0
+        st.session_state.llm.empty_text()
+        st.rerun()
+    # Setup MCQ
+    if st.session_state.current_question == 0:
+        st.session_state.llm.start_mcq()
+    # For every MCQ question
+    if st.session_state.current_question < st.session_state.mcq_question_number:
+        # QA header
+        st.header(f"Question {st.session_state.current_question + 1} / {st.session_state.mcq_question_number}")
+        # Generate the QA text
+        question, answers = st.session_state.llm.get_mcq_question()
+        # QA form
+        with st.form(key='my_form', clear_on_submit=True):
+            selected_answer = st.radio(f"{question}:", answers)
+            send_button = st.form_submit_button(label="Next")
+            if send_button:
+                print("SELECTED ANSWER: ", selected_answer)
+                st.session_state.llm.mcq_record_answer(selected_answer)
+                st.session_state.current_question += 1
+    else:
+        # Results header
+        st.header("Results")
+        # For the last QA, show score
+        st.session_state.current_question += 1
+        score, score_perc = st.session_state.llm.get_mcq_score()
+        st.markdown("<h4>" + f"Score: {score} / {st.session_state.mcq_question_number} ({score_perc} %)" + "</h4>", unsafe_allow_html=True)
+        # List your answers and the correct ones
+        for i, qa in enumerate(st.session_state.llm.mcq_answer_sheet):
+            question, answer, user_answer = qa['question'], qa['answer'], qa['user_answer']
+            st.write("---")
+            st.write(f"**Question {i+1}/{st.session_state.mcq_question_number}:** {question}")
+            st.write(f"**Correct answer:** {answer}")
+            st.write(f"**User answer:** {user_answer}")
+# Main structure
+init()
+# Page selector
+match st.session_state.current_page:
+    case PAGE.MAIN:
+        main_page()
+    case PAGE.SUMMARY:
+        summary_page()
+    case PAGE.CHAT:
+        chat_page()
+    case PAGE.MCQ:
+        mcq_page()

prompt_generation.py ADDED Viewed

	@@ -0,0 +1,117 @@

+from langchain_openai import ChatOpenAI
+from langchain.chains import ConversationChain
+from langchain.chains.conversation.memory import ConversationBufferMemory, ConversationSummaryMemory
+from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.docstore.document import Document
+from langchain.chains.summarize import load_summarize_chain
+from langchain.output_parsers import StructuredOutputParser, ResponseSchema
+import json
+import random
+class OpenAILLM:
+    def __init__(self, temperature: float = 1.,
+                 model_name: str = 'gpt-4o',
+                 mcq_question_number: int = 10):
+        # Model-related instantiations
+        self.llm = ChatOpenAI(temperature=temperature, model_name=model_name)
+        self.Memory = ConversationBufferMemory
+        self.chain_summary = load_summarize_chain(self.llm, chain_type="map_reduce", verbose=True)
+        self.chain_chat = ConversationChain(llm=self.llm, verbose=False, memory=self.Memory())
+        # Other utils instantiation
+        self.docs = []
+        self.text_splitter = RecursiveCharacterTextSplitter()
+        self.chat_document_intro = "Read the following document: "
+        self.chat_message_begin = "What would you like to know about the uploaded document?"
+        self.mcq_question_number = mcq_question_number
+        self.mcq_intro = """
+            Generate a question, correct answer and 3 possible false answers from the inputted document.
+            Make sure that it is unique from the ones you have generated before!
+            Only create 3 possible false answers and a correct answers!
+        """
+        self.mcq_answer_sheet = []
+    def upload_text(self, text):
+        texts = self.text_splitter.split_text(text)
+        self.docs = [Document(text) for text in texts]
+    def is_text_uploaded(self):
+        return True if self.docs else False
+    def empty_text(self):
+        self.docs = []
+        self.chain_chat.memory = self.Memory()
+    def get_text_summary(self):
+        summary = self.chain_summary.run(self.docs)
+        return summary
+    def start_chat(self):
+        # Add document to the system's context
+        self.chain_chat.memory.save_context({"input": self.chat_document_intro}, {"output": ""})
+        for doc in self.docs:
+            self.chain_chat.memory.save_context({"input": doc.page_content}, {"output": ""})
+        return str(self.chain_chat.memory), self.chat_message_begin
+    def get_chat_response(self, user_input: str):
+        response = self.chain_chat.predict(input=user_input)
+        return response
+    def start_mcq(self):
+        # Instantiate response schema to define JSON output
+        response_schemas = [
+            ResponseSchema(name="question", description="Question generated from provided document."),
+            ResponseSchema(name="answer", description="One correct answer for the asked question."),
+            ResponseSchema(name="choices",
+                           description="3 available false options for a multiple-choice question in comma separated."),
+        ]
+        output_format_instructions = StructuredOutputParser.from_response_schemas(
+            response_schemas).get_format_instructions()
+        # Define the prompt that will be used for MCQ questions
+        prompt = PromptTemplate(
+            template="{task_instructions}\n {output_format_instructions}",
+            input_variables=["task_instructions", "output_format_instructions"]
+        )
+        # Get the MCQ query based on the prompt (by filling in the prompt values)
+        self.mcq_query = prompt.format(task_instructions=self.mcq_intro,
+                                       output_format_instructions=output_format_instructions)
+        # Uplaod the document to the model
+        self.start_chat()
+    def get_mcq_question(self):
+        print("HERE")
+        while True:
+            try:
+                response = self.chain_chat.predict(input=self.mcq_query)
+                print(response)
+                response_parsed = json.loads(response[len(r"```json"):-len(r"```")])
+                question = response_parsed["question"]
+                answers = [response_parsed["answer"]] + [false_answer.strip() for false_answer in
+                                                         response_parsed["choices"].split(',')]
+                break
+            except Exception as e:
+                print(e)
+        self.mcq_answer_sheet.append({
+            "question": question,
+            "answer": answers[0],
+            "user_answer": None,
+            "choices": answers
+        })
+        return question, random.sample(answers, len(answers))
+    def mcq_record_answer(self, answer):
+        self.mcq_answer_sheet[-1]["user_answer"] = answer
+    def get_mcq_score(self):
+        score = sum([sheet['answer'] == sheet['user_answer'] for sheet in self.mcq_answer_sheet])
+        score_perc = round(score / self.mcq_question_number, 4) * 100
+        return score, score_perc

utils.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from enum import Enum
+import PyPDF2
+class PAGE(Enum):
+    MAIN = 1
+    SUMMARY = 2
+    MCQ = 3
+    CHAT = 4
+# Function to read PDF content
+def read_pdf(file):
+    pdf_reader = PyPDF2.PdfReader(file)
+    text = ""
+    for page_num in range(len(pdf_reader.pages)):
+        page = pdf_reader.pages[page_num]
+        text += page.extract_text()
+    return text