import os os.environ['KMP_DUPLICATE_LIB_OK']='True' import streamlit as st #from streamlit_chat import message from streamlit.runtime.scriptrunner.script_run_context import get_script_run_ctx from datetime import datetime import pandas as pd from PyPDF2 import PdfReader from langchain.text_splitter import CharacterTextSplitter,RecursiveCharacterTextSplitter # from services.qa_chat_mode import * # from services.qna import * # from services.utils import * # from services.transcripts import * # from langchain import HuggingFaceHub # from services.smart_prompt import PDSCoverageChain @st.cache_data def get_text_chunks(pdf_docs,chunk_size=2000,overlap=0): documents = "" for pdf in pdf_docs: pdf_reader = PdfReader(pdf) for page in pdf_reader.pages: documents += page.extract_text() # text_splitter = CharacterTextSplitter(separator="\n", # chunk_size=chunk_size, # chunk_overlap=overlap, # length_function=len) text_splitter = RecursiveCharacterTextSplitter( # Set a really small chunk size, just to show. chunk_size = chunk_size, chunk_overlap = overlap) texts = text_splitter.split_text(documents) return texts def build_experimental_ui(): with st.sidebar: tabs = st.sidebar.selectbox('SELECT TASK', [ "Question & Answer", "Question & Answer (Chat Mode)", "Transcript Intelligence", ]) st.markdown('---') # Filters if tabs=='Question & Answer': selected_model = st.selectbox("Select Model:", options=[], index=0) selected_embeddings = st.selectbox("Select Embeddings:", options=[], index=0) # strategy = st.selectbox("Select Strategy:", options=STRATEGY_OPTIONS, index=1, disabled=True) strategy = '' elif tabs=='Question & Answer (Chat Mode)': selected_model = st.selectbox("Select Model:", options=[], index=0) selected_embeddings = st.selectbox("Select Embeddings:", options=[], index=0) elif tabs == 'Transcript Intelligence': selected_model = st.selectbox("Select Model:", options=[], index=0) # selected_embeddings = st.selectbox("Select Embeddings:", options=EMBEDDING_OPTIONS, index=0) # claimnumber = st.selectbox("Select Claim:", options=CLAIM_OPTIONS, index=0, disabled=True) pdf_docs = st.file_uploader('Upload a PDF file', type=['pdf'],accept_multiple_files=True) st.session_state['pdf_file'] = pdf_docs Process = st.button("Process", disabled=(pdf_docs==[])) if Process: if pdf_docs: if pdf_docs!=st.session_state['pdf_file']: st.session_state['pdf_file'] = pdf_docs with st.spinner('Creating embeddings...'): texts = get_text_chunks(pdf_docs=pdf_docs) # st.write([len(x) for x in texts]) # retriever = get_retriever_from_text(texts, embeddings[selected_embeddings]) # st.session_state['retriever'] = retriever st.error("Disclaimer: All data processed in this application will be sent to OpenAI API based in the United States.") st.markdown('## '+tabs) if tabs=='Question & Answer': st.markdown('---') # # Question & Answer # if st.session_state['retriever'] is None: # disable_query = True # else: # disable_query = False prompt = st.text_input('Input your prompt', disabled=False, key="text") questions_file = st.file_uploader('Upload a CSV file with questions', type=['csv'],accept_multiple_files=False) if questions_file: questions_df = pd.read_csv(questions_file) # if strategy=='Without Chain-of-Thought': # instruction = st.text_area('Input your instruction (optional)', value=st.session_state['qa_instruction'], disabled=disable_query) # with st.expander("Sample instruction"): # sample_instruction = "Answer the question based on the context provided. Explain with reason in bullet points. Let's think step by step." # button_sample_instruction = st.button(sample_instruction, key='instruction1', disabled=disable_query, on_click=set_qa_instruction, args=(sample_instruction,)) # if st.session_state.query is None: # disable = True # else: # disable = False button_query = st.button('Submit', disabled=False) if button_query: for question in questions_df['question']: instruction = f'{prompt}.Question:{question}' return else: st.info("Under Development") build_experimental_ui()