QnA / app.py
Sasidhar's picture
Update app.py
6da1bda
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'
import streamlit as st
#from streamlit_chat import message
from streamlit.runtime.scriptrunner.script_run_context import get_script_run_ctx
from datetime import datetime
import pandas as pd
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter,RecursiveCharacterTextSplitter
# from services.qa_chat_mode import *
# from services.qna import *
# from services.utils import *
# from services.transcripts import *
# from langchain import HuggingFaceHub
# from services.smart_prompt import PDSCoverageChain
@st.cache_data
def get_text_chunks(pdf_docs,chunk_size=2000,overlap=0):
documents = ""
for pdf in pdf_docs:
pdf_reader = PdfReader(pdf)
for page in pdf_reader.pages:
documents += page.extract_text()
# text_splitter = CharacterTextSplitter(separator="\n",
# chunk_size=chunk_size,
# chunk_overlap=overlap,
# length_function=len)
text_splitter = RecursiveCharacterTextSplitter(
# Set a really small chunk size, just to show.
chunk_size = chunk_size,
chunk_overlap = overlap)
texts = text_splitter.split_text(documents)
return texts
def build_experimental_ui():
with st.sidebar:
tabs = st.sidebar.selectbox('SELECT TASK', [
"Question & Answer",
"Question & Answer (Chat Mode)",
"Transcript Intelligence",
])
st.markdown('---')
# Filters
if tabs=='Question & Answer':
selected_model = st.selectbox("Select Model:", options=[], index=0)
selected_embeddings = st.selectbox("Select Embeddings:", options=[], index=0)
# strategy = st.selectbox("Select Strategy:", options=STRATEGY_OPTIONS, index=1, disabled=True)
strategy = ''
elif tabs=='Question & Answer (Chat Mode)':
selected_model = st.selectbox("Select Model:", options=[], index=0)
selected_embeddings = st.selectbox("Select Embeddings:", options=[], index=0)
elif tabs == 'Transcript Intelligence':
selected_model = st.selectbox("Select Model:", options=[], index=0)
# selected_embeddings = st.selectbox("Select Embeddings:", options=EMBEDDING_OPTIONS, index=0)
# claimnumber = st.selectbox("Select Claim:", options=CLAIM_OPTIONS, index=0, disabled=True)
pdf_docs = st.file_uploader('Upload a PDF file', type=['pdf'],accept_multiple_files=True)
st.session_state['pdf_file'] = pdf_docs
Process = st.button("Process", disabled=(pdf_docs==[]))
if Process:
if pdf_docs:
if pdf_docs!=st.session_state['pdf_file']:
st.session_state['pdf_file'] = pdf_docs
with st.spinner('Creating embeddings...'):
texts = get_text_chunks(pdf_docs=pdf_docs)
# st.write([len(x) for x in texts])
# retriever = get_retriever_from_text(texts, embeddings[selected_embeddings])
# st.session_state['retriever'] = retriever
st.error("Disclaimer: All data processed in this application will be sent to OpenAI API based in the United States.")
st.markdown('## '+tabs)
if tabs=='Question & Answer':
st.markdown('---')
# # Question & Answer
# if st.session_state['retriever'] is None:
# disable_query = True
# else:
# disable_query = False
prompt = st.text_input('Input your prompt', disabled=False, key="text")
questions_file = st.file_uploader('Upload a CSV file with questions', type=['csv'],accept_multiple_files=False)
if questions_file:
questions_df = pd.read_csv(questions_file)
# if strategy=='Without Chain-of-Thought':
# instruction = st.text_area('Input your instruction (optional)', value=st.session_state['qa_instruction'], disabled=disable_query)
# with st.expander("Sample instruction"):
# sample_instruction = "Answer the question based on the context provided. Explain with reason in bullet points. Let's think step by step."
# button_sample_instruction = st.button(sample_instruction, key='instruction1', disabled=disable_query, on_click=set_qa_instruction, args=(sample_instruction,))
# if st.session_state.query is None:
# disable = True
# else:
# disable = False
button_query = st.button('Submit', disabled=False)
if button_query:
for question in questions_df['question']:
instruction = f'{prompt}.Question:{question}'
return
else:
st.info("Under Development")
build_experimental_ui()