Kelvinhjk commited on
Commit
640fbe7
·
1 Parent(s): 2c2ca73

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -34
app.py CHANGED
@@ -1,11 +1,12 @@
1
  import os
2
  import streamlit as st
 
3
  from langchain.embeddings.openai import OpenAIEmbeddings
4
  from langchain.text_splitter import CharacterTextSplitter
5
  from langchain.vectorstores import FAISS
6
  from transformers import TFAutoModelForQuestionAnswering, AutoTokenizer, pipeline
7
 
8
- os.environ["OPENAI_API_KEY"] = "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"
9
 
10
  # Read data
11
  with open("./data/full_context.txt", "r") as file1:
@@ -20,52 +21,81 @@ text_splitter = CharacterTextSplitter(
20
  )
21
  texts = text_splitter.split_text(doc)
22
 
23
-
24
  # Download embeddings from OpenAI
25
  embeddings = OpenAIEmbeddings()
26
  docsearch = FAISS.from_texts(texts, embeddings)
27
 
28
- # Load model
29
- model_path = "./models/roberta_model"
30
-
31
- model = TFAutoModelForQuestionAnswering.from_pretrained(model_path)
32
- tokenizer = AutoTokenizer.from_pretrained('deepset/roberta-base-squad2')
 
33
 
 
 
 
 
34
  # Initialize Transformer pipeline with our own model and tokenizer
35
- question_answerer = pipeline("question-answering", model=model, tokenizer=tokenizer)
36
 
37
- def findHighestScore(question):
38
- docs_found = docsearch.similarity_search(question)
39
- doc_score = 0.01
40
- doc_answer = ''
41
 
 
 
 
 
42
  for doc in docs_found:
43
- doc_result = question_answerer(question=question, context = doc.page_content)
44
- if doc_result['score'] > doc_score:
45
- doc_score = doc_result['score']
46
- doc_answer = doc_result['answer']
47
-
48
- return doc_answer, doc_score
49
 
50
-
51
- def QnAfunction(question):
52
- answer1, score1 = findHighestScore(question)
53
- if answer1 != '':
54
- return answer1, score1
55
  # print("Answer: ", answer1)
56
  # print("Score: ", score1)
57
-
58
  else:
59
  return "No Answer found. Please ask question related to Bachelor of Computer Science program at Swinburne.", 0
60
  # print("No Answer found. Please ask question related to Bachelor of Computer Science program at Swinburne.")
61
 
62
-
63
- text = st.text_area("Ask any question about the Bachelor of Computer Science program at Swinburne: ")
64
- if text:
65
- ans, score = QnAfunction(text)
66
- if score > 0.5:
67
- st.write("Answer: ", ans)
68
- st.write("Score: ", score)
69
- else:
70
- st.write(ans)
71
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import streamlit as st
3
+ from streamlit_option_menu import option_menu
4
  from langchain.embeddings.openai import OpenAIEmbeddings
5
  from langchain.text_splitter import CharacterTextSplitter
6
  from langchain.vectorstores import FAISS
7
  from transformers import TFAutoModelForQuestionAnswering, AutoTokenizer, pipeline
8
 
9
+ os.environ["OPENAI_API_KEY"] = "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"
10
 
11
  # Read data
12
  with open("./data/full_context.txt", "r") as file1:
 
21
  )
22
  texts = text_splitter.split_text(doc)
23
 
 
24
  # Download embeddings from OpenAI
25
  embeddings = OpenAIEmbeddings()
26
  docsearch = FAISS.from_texts(texts, embeddings)
27
 
28
+ # Load roberta model
29
+ model_path0 = "./models/roberta_model"
30
+ model0 = TFAutoModelForQuestionAnswering.from_pretrained(model_path0)
31
+ tokenizer0 = AutoTokenizer.from_pretrained('deepset/roberta-base-squad2')
32
+ # Initialize Transformer pipeline with our own model and tokenizer
33
+ question_answerer0 = pipeline("question-answering", model=model0, tokenizer=tokenizer0)
34
 
35
+ # Load bert base model
36
+ model_path1 = "./models/bert_finetuned_model"
37
+ model1 = TFAutoModelForQuestionAnswering.from_pretrained(model_path1)
38
+ tokenizer1 = AutoTokenizer.from_pretrained('huggingface-course/bert-finetuned-squad')
39
  # Initialize Transformer pipeline with our own model and tokenizer
40
+ question_answerer1 = pipeline("question-answering", model=model1, tokenizer=tokenizer1)
41
 
 
 
 
 
42
 
43
+ def QnAfunction(question, QnAmodel):
44
+ docs_found = docsearch.similarity_search(question)
45
+ score = 0.5
46
+ answer = ''
47
  for doc in docs_found:
48
+ doc_result = QnAmodel(question=question, context = doc.page_content)
49
+ if doc_result['score'] > score:
50
+ score = doc_result['score']
51
+ answer = doc_result['answer']
 
 
52
 
53
+ if answer != '':
54
+ return answer, score
 
 
 
55
  # print("Answer: ", answer1)
56
  # print("Score: ", score1)
 
57
  else:
58
  return "No Answer found. Please ask question related to Bachelor of Computer Science program at Swinburne.", 0
59
  # print("No Answer found. Please ask question related to Bachelor of Computer Science program at Swinburne.")
60
 
61
+ # GUI with Streamlit
62
+ st.markdown("""
63
+ <style>
64
+ .big-font {
65
+ margin: 50px 0 10px 0 !important;
66
+ font-size:25px !important;
67
+ font-weight: bold !important;
68
+ }
69
+ </style>
70
+ """, unsafe_allow_html=True)
71
+
72
+ with st.sidebar:
73
+ selected = option_menu("Model selection", ["Roberta base squad2", "Bert finetuned squad"],
74
+ icons=['box-fill', 'box-fill'], menu_icon="cast", default_index=0)
75
+
76
+ if selected == "Roberta base squad2":
77
+ st.markdown('<p class="big-font">QnA for Swinburne\'s Bachelor of Computer Science progrom</p>', unsafe_allow_html=True)
78
+ st.write("- ", selected)
79
+ text = st.text_area("Type question (Eg. What is the duration of the Bachelor of Computer Science program?): ")
80
+ if text:
81
+ #######
82
+ ans, score = QnAfunction(text, question_answerer0)
83
+ if score > 0.5:
84
+ st.write("Answer: ", ans)
85
+ st.write("Score: ", score)
86
+ else:
87
+ st.write(ans)
88
+
89
+
90
+ elif selected == "Bert finetuned squad":
91
+ st.markdown('<p class="big-font">QnA for Swinburne\'s Bachelor of Computer Science progrom</p>', unsafe_allow_html=True)
92
+ st.write("- ", selected)
93
+ text = st.text_area("Type question (Eg. What is the duration of the Bachelor of Computer Science program?): ")
94
+ if text:
95
+ # Fed in the question to the model
96
+ ans, score = QnAfunction(text, question_answerer1)
97
+ if score > 0.5:
98
+ st.write("Answer: ", ans)
99
+ st.write("Score: ", score)
100
+ else:
101
+ st.write(ans)