Spaces:
Runtime error
Runtime error
import os | |
import streamlit as st | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.vectorstores import FAISS | |
from transformers import TFAutoModelForQuestionAnswering, AutoTokenizer, pipeline | |
os.environ["OPENAI_API_KEY"] = "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" | |
# Read data | |
with open("./data/full_context.txt", "r") as file1: | |
doc = file1.read() | |
# Splitting up the text into smaller chunks for indexing | |
text_splitter = CharacterTextSplitter( | |
separator = "\n", | |
chunk_size = 1000, | |
chunk_overlap = 200, #striding over the text | |
length_function = len, | |
) | |
texts = text_splitter.split_text(doc) | |
# Download embeddings from OpenAI | |
embeddings = OpenAIEmbeddings() | |
docsearch = FAISS.from_texts(texts, embeddings) | |
# Load model | |
model_path = "/content/drive/MyDrive/Colab_Notebooks/COS30081_NLP/D_HD_Task/models/roberta_model" | |
model = TFAutoModelForQuestionAnswering.from_pretrained(model_path) | |
tokenizer = AutoTokenizer.from_pretrained('deepset/roberta-base-squad2') | |
# Initialize Transformer pipeline with our own model and tokenizer | |
question_answerer = pipeline("question-answering", model=model, tokenizer=tokenizer) | |
def findHighestScore(question): | |
docs_found = docsearch.similarity_search(question) | |
doc_score = 0.5 | |
doc_answer = '' | |
for doc in docs_found: | |
doc_result = question_answerer(question=question, context = doc.page_content) | |
if doc_result['score'] > doc_score: | |
doc_score = doc_result['score'] | |
doc_answer = doc_result['answer'] | |
return doc_answer, doc_score | |
def QnAfunction(question): | |
answer1, score1 = findHighestScore(question) | |
if answer1 != '': | |
return answer1, score1 | |
# print("Answer: ", answer1) | |
# print("Score: ", score1) | |
else: | |
return "No Answer found. Please ask question related to Bachelor of Computer Science program at Swinburne.", 0 | |
# print("No Answer found. Please ask question related to Bachelor of Computer Science program at Swinburne.") | |
text = st.text_area("Ask any question about the Bachelor of Computer Science program at Swinburne: ") | |
if text: | |
ans, score = QnAfunction(text) | |
st.json(ans) | |