Kelvinhjk's picture
Update app.py
5b9ca5f
import os
import streamlit as st
from streamlit_option_menu import option_menu
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from transformers import TFAutoModelForQuestionAnswering, AutoTokenizer, pipeline
from PIL import Image
os.environ["OPENAI_API_KEY"] = "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"
# Read data
with open("./data/full_context.txt", "r") as file1:
doc = file1.read()
# Splitting up the text into smaller chunks for indexing
text_splitter = CharacterTextSplitter(
separator = "\n",
chunk_size = 1000,
chunk_overlap = 200, #striding over the text
length_function = len,
)
texts = text_splitter.split_text(doc)
# Download embeddings from OpenAI
embeddings = OpenAIEmbeddings()
docsearch = FAISS.from_texts(texts, embeddings)
# Load roberta model
model_path0 = "./models/roberta_model"
model0 = TFAutoModelForQuestionAnswering.from_pretrained(model_path0)
tokenizer0 = AutoTokenizer.from_pretrained('deepset/roberta-base-squad2')
# Initialize Transformer pipeline with our own model and tokenizer
question_answerer0 = pipeline("question-answering", model=model0, tokenizer=tokenizer0)
# Load bert base model
model_path1 = "./models/bert_finetuned_model"
model1 = TFAutoModelForQuestionAnswering.from_pretrained(model_path1)
tokenizer1 = AutoTokenizer.from_pretrained('huggingface-course/bert-finetuned-squad')
# Initialize Transformer pipeline with our own model and tokenizer
question_answerer1 = pipeline("question-answering", model=model1, tokenizer=tokenizer1)
def QnAfunction(question, QnAmodel):
docs_found = docsearch.similarity_search(question)
score = 0.01
answer = ''
for doc in docs_found:
doc_result = QnAmodel(question=question, context = doc.page_content)
if doc_result['score'] > score:
score = doc_result['score']
answer = doc_result['answer']
if answer != '':
return answer, score
# print("Answer: ", answer1)
# print("Score: ", score1)
else:
return "No Answer found. Please ask question related to Bachelor of Computer Science program at Swinburne.", 0
# print("No Answer found. Please ask question related to Bachelor of Computer Science program at Swinburne.")
# GUI with Streamlit
st.markdown("""
<style>
.big-font {
margin: 15px 0 10px 0 !important;
font-size:25px !important;
font-weight: bold !important;
}
</style>
""", unsafe_allow_html=True)
with st.sidebar:
selected = option_menu("Model selection", ["Roberta base squad2", "Bert finetuned squad"],
icons=['box-fill', 'box-fill'], menu_icon="cast", default_index=0)
image = Image.open('Swinburne_Logo.png')
st.image(image)
st.markdown('<p class="big-font">QnA for Swinburne\'s Bachelor of Computer Science progrom</p>', unsafe_allow_html=True)
st.write("- ", selected)
if selected == "Roberta base squad2":
text0 = st.text_area("Type question (Eg. What is the duration of the Bachelor of Computer Science program?):", max_chars=350)
if text0:
#######
ans0, score0 = QnAfunction(text0, question_answerer0)
if score0 > 0.5:
st.write("Answer: ", ans0)
st.write("Score: ", score0)
else:
st.write(ans0)
elif selected == "Bert finetuned squad":
text1 = st.text_area("Type question (Eg. What is the duration of the Bachelor of Computer Science program?): ", max_chars=350)
if text1:
# Fed in the question to the model
ans1, score1 = QnAfunction(text1, question_answerer1)
if score1 > 0.5:
st.write("Answer: ", ans1)
st.write("Score: ", score1)
else:
st.write(ans1)