Spaces:

realambuj
/

Question_Answer_Pair_Generation_from_Documents

Sleeping

App Files Files Community

realambuj commited on Feb 18, 2024

Commit

1ffc5c5

verified ·

1 Parent(s): 096390f

Upload 6 files

Browse files

Files changed (6) hide show

Pic.png +0 -0
app.py +101 -0
deep-learning.png +0 -0
prompts.py +88 -0
requirements.txt +3 -0
utils.py +48 -0

Pic.png ADDED Viewed

app.py ADDED Viewed

	@@ -0,0 +1,101 @@

+from langchain.document_loaders import PyPDFLoader
+from utils import *
+import os
+import google.generativeai as genai
+from langchain_google_genai import ChatGoogleGenerativeAI
+from dotenv import load_dotenv
+import streamlit as st
+st.set_page_config(layout="wide", page_title="QA Pair Generation from Documents",page_icon='deep-learning.png')
+temperature = 0.3
+pages = []
+numPairs = 2
+option = ''
+optionCategory = ("Long QA Pairs", "MCQs", "Short QA Pairs")
+load_dotenv()
+genai.configure(api_key=os.environ.get("GOOGLE_API_KEY"))
+model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=temperature)
+def LongQAPairs():
+    response = []
+    with st.spinner('Generating Long Question Answer Pairs...'):
+        response =getLongQAPairs(pages[0:len(pages) - 1], numPairs, model)
+    for qaPair in response:
+        with st.chat_message("user"):
+            st.write("Question : {}".format(qaPair['question']))
+            st.write("Answer : {}".format(qaPair['answer']))
+def ShortQAPairs():
+    response = []
+    with st.spinner('Generating Short Question Answer Pairs...'):
+        response = getShortQAPairs(pages[0:len(pages) - 1], numPairs, model)
+    for qaPair in response:
+        with st.chat_message("user"):
+            st.write("Question : {}".format(qaPair['question']))
+            st.write("Answer : {}".format(qaPair['answer']))
+def McqQAPairs():
+    response = []
+    with st.spinner('Generating MCQ Question Answer Pairs...'):
+        response = getMcqQAPairs(pages[0:len(pages) - 1], numPairs, model)
+    for qaPair in response:
+        with st.chat_message("user"):
+            st.radio(label=qaPair['question'],options=qaPair["options"],disabled=True,index=qaPair['correct_option_index'])
+with st.sidebar:
+    st.image('Pic.png')
+    st.title("Final Year Project")
+    st.divider()
+    with st.container(border=True):
+        st.text('Model: Gemini Pro', help='Developed by Google \n')
+        temperature = st.slider('Temperature:', 0.0, 1.0, 0.3, 0.1)
+    code = '''Team Members CSE(20-37):
+    \nAmbuj Raj BT20CSE054 \nSrishti Pandey BT20CSE068 \nPrateek Niket BT20CSE211 \nSmriti Singh BT20CSE156'''
+    st.code(code, language='JAVA')
+    code = '''Mentored By: \nDr. Amol Bhopale'''
+    st.code(code, language='JAVA')
+st.title('Question Answer Pair Generation From Documents')
+with st.container(border=True):
+    col1, col2 = st.columns(2)
+    with col1:
+        st.write("Please Upload Your File")
+        uploaded_file = st.file_uploader("Choose a file", type='.pdf', accept_multiple_files=False)
+        if uploaded_file is not None:
+            with open("temp.pdf", "wb") as f:
+                f.write(uploaded_file.getbuffer())
+            # Get the path of the uploaded file
+            file_path = "temp.pdf"
+            pdf_loader = PyPDFLoader(file_path)
+            pages = pdf_loader.load_and_split()
+            print(len(pages))
+    with col2:
+        st.write('Please Choose your Configuration')
+        option = st.selectbox(
+            "In Which Category would you like to Generate Question Answer Pairs?",
+            optionCategory,
+            index=None,
+            placeholder="Select Category of Question Answer Pairs",
+        )
+        numPairs = st.number_input('Number of QA Pairs', min_value=1, max_value=20, step=2,value=2)
+if st.button("Generate", type="primary"):
+    if option == "Long QA Pairs" and len(pages) and option in optionCategory:
+        LongQAPairs()
+    elif option == "MCQs" and len(pages) and option in optionCategory:
+        McqQAPairs()
+    elif option == "Short QA Pairs" and len(pages) and option in optionCategory:
+        ShortQAPairs()
+    elif len(pages) or option not in optionCategory or uploaded_file is None:
+        st.error('Required Fields are Missing!', icon="🚨")

deep-learning.png ADDED Viewed

prompts.py ADDED Viewed

	@@ -0,0 +1,88 @@

+# updated gemini prompts , for initial prompts use the ipynb file
+# use Gemini-PRO Model
+def getMcqQAPrompt():
+    prompt = """
+    Imagine leading a stimulating debate among renowned experts based on the following text:
+    {context}
+    To spark insightful discussion, design {numPairs} challenging multiple-choice questions, each with four plausible yet distinct options. Only one will be the accurate answer, revealed alongside the shuffled options as a bonus point for the sharpest minds!
+    Craft your questions thoughtfully, employing a variety of types (factual recall, inferential reasoning, critical analysis) to test the depth and agility of the experts' understanding. Remember, the more nuanced and insightful your questions, the richer and more engaging the intellectual exchange will become.
+    Then output only a json array that would describe each question and answer it will have in this format. Generate a valid json array.
+    Please include each and every Question Answer Pair in the context.
+    {{
+        "question": <string>,
+        "answer": <string>,
+        "options" : [string],
+        "correct_option_index" : <number>,
+    }}
+    Never output the instructions given for output.
+    Not include  ```json in output , only give output as array.
+    """
+    return prompt
+def getLongQAPrompt():
+    prompt = """
+    Carefully read and comprehend the following paragraph:
+    {context}
+    Now, create {numPairs} thought-provoking questions that delve into the key points, details, and implications of the paragraph. Provide concise and informative answers to each question, ensuring factual accuracy and clarity.
+    Strive to generate a diverse range of question types (who, what, when, where, why, how) to explore various aspects of the text. Prioritize questions that encourage deeper understanding and critical thinking.
+    Then output only a json array that would describe each question and answer it will have in this format. Generate a valid json array.
+    Please include each and every Question Answer Pair in the context.
+    {{
+        "question": <string>,
+        "answer": <string>
+    }}
+    Never output the instructions given for output.
+    Not include  ```json in output , only give output as array.
+    """
+    return prompt
+def getShortQAPrompt():
+    prompt = """
+    Carefully read and comprehend the following paragraph:
+    {context}
+    Now, craft {numPairs} intriguing questions that pierce through the heart of the paragraph, demanding concise answers. Aim for single-word or two-word responses that capture the essence.
+    Diversify your question types (who, what, when, where, why, how) to illuminate various facets of the text. Prioritize questions that spark reflection and ignite critical thinking.
+    Then output only a json array that would describe each question and answer it will have in this format. Generate a valid json array.
+    Please include each and every Question Answer Pair in the context.
+    {{
+        "question": <string>,
+        "answer": <string>
+    }}
+    Never output the instructions given for output.
+    Not include  ```json in output , only give output as array.
+    Remember, brevity is key! One or two words should suffice to convey the point.
+    """
+    return prompt
+def getRagChainPrompt():
+    prompt = """
+    Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
+    provided context just say, "Answer is not available in the given Context", don't provide the wrong answer\n\n
+    Context:\n {context}?\n
+    Question: \n{question}\n
+    Answer: __answer__
+    Always return response in JSON format
+    Response should not contain ***
+    """
+    return prompt

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+langchain==0.1.4
+google-generativeai==0.3.2
+streamlit==1.30.0

utils.py ADDED Viewed

	@@ -0,0 +1,48 @@

+from langchain.chains.question_answering import load_qa_chain
+from langchain.prompts import PromptTemplate
+import json
+from prompts import *
+# from langchain.text_splitter import RecursiveCharacterTextSplitter
+# from PyPDF2 import PdfReader
+# from langchain_google_genai import GoogleGenerativeAIEmbeddings
+# from langchain.vectorstores import FAISS
+# import PyPDF2
+# utils function to generate QA Pairs
+def util(context, numPairs, inputPrompt,model):
+    stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=inputPrompt)
+    stuff_answer = stuff_chain(
+        {"input_documents": context, "numPairs": numPairs}, return_only_outputs=True
+    )
+    output_text = stuff_answer['output_text']
+    output_json = json.loads(output_text)
+    return output_json
+### Generating Q-A pairs Full Length QA Pairs
+def getLongQAPairs(context, numPairs,model):
+    prompt_template = getLongQAPrompt()
+    prompt = PromptTemplate(
+        template=prompt_template, input_variables=["context", "numPairs"]
+    )
+    return util(context, numPairs, prompt,model)
+### Generating Q-A pairs - One Word Answer Type Pair
+def getShortQAPairs(context, numPairs,model):
+    prompt_template = getShortQAPrompt()
+    prompt = PromptTemplate(
+        template=prompt_template, input_variables=["context", "numPairs"]
+    )
+    return util(context, numPairs, prompt,model)
+### Generating Q-A pairs - MCQ
+def getMcqQAPairs(context, numPairs,model):
+    prompt_template = getMcqQAPrompt()
+    prompt = PromptTemplate(
+        template=prompt_template, input_variables=["context", "numPairs"]
+    )
+    return util(context, numPairs, prompt,model)