import streamlit as st from transformers import pipeline import fitz, io, os from haystack.document_stores import ElasticsearchDocumentStore from haystack.nodes import BM25Retriever, FARMReader from haystack import Pipeline st.set_page_config( page_title="Team 4 - Roberta Question Answering System", layout="wide", initial_sidebar_state="expanded" ) st.markdown( """ """, unsafe_allow_html=True ) st.markdown("

Team 4 - RoBERTa Question-Answering System

", unsafe_allow_html=True) st.markdown("

Fine-Tuned and Optimized version with JBNU Dataset

", unsafe_allow_html=True) example_image = "https://raw.githubusercontent.com/cyberspyde/jbnu/master/image2.jpg" st.image(example_image, caption="Freedom, Justice and Creativity", use_column_width=True) st.markdown("

", unsafe_allow_html=True) col1, col2, col3 = st.columns(3) with col2: method = st.radio( "Select the appropriate method", ['Upload PDF', 'Upload TXT', 'Manual entry', 'JBNU'], key='visibility', horizontal=True ) qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2", tokenizer="deepset/roberta-base-squad2") document_store = ElasticsearchDocumentStore( host='121.186.58.11', username="", password="", index="document" ) retriever = BM25Retriever(document_store=document_store) reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=True) querying_pipeline = Pipeline() querying_pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"]) querying_pipeline.add_node(component=reader, name="Reader", inputs=["Retriever"]) if method == 'Manual entry': question = st.text_input("Question:") context = st.text_area("Context:") if st.button("Answer"): answer = qa_pipeline({"context": context, "question": question}) st.markdown("

Answer: " + answer['answer'] + "

", unsafe_allow_html=True) elif method == 'Upload PDF': pdf_file = st.file_uploader("Upload PDF", type=["pdf"]) if pdf_file is not None: pdf_file = fitz.open(stream=pdf_file.read(), filetype="pdf") with pdf_file as doc: text = "" for page in doc: text += page.get_text() question = st.text_input("Question:") if st.button("Answer"): answer = qa_pipeline({"context": text, "question": question}) st.markdown("

Answer: " + answer['answer'] + "

", unsafe_allow_html=True) elif method == 'Upload TXT': txt_file = st.file_uploader("Upload TXT", type=["txt"]) if txt_file is not None: txt_file = io.StringIO(txt_file.read().decode('utf-8')) contents = txt_file.read() question = st.text_input("Question:") if st.button("Answer"): answer = qa_pipeline({"context": contents, "question": question}) st.markdown("

Answer: " + answer['answer'] + "

", unsafe_allow_html=True) elif method == 'JBNU': question = st.text_input("Question:") if st.button("Answer"): prediction = querying_pipeline.run( query=str(question), params={ "Retriever": {"top_k": 10}, "Reader": {"top_k": 5} } ) for answer in prediction["answers"]: st.markdown("

Answer: " + answer.answer + "

", unsafe_allow_html=True) st.markdown("

Context: " + answer.context + "

", unsafe_allow_html=True) st.markdown("

Score: " + str(answer.score) + "

", unsafe_allow_html=True) st.markdown("---") github_link = "https://github.com/cyberspyde/jbnu" telegram_link = "https://t.me/cyberspyde" st.markdown(f"", unsafe_allow_html=True)