Spaces:

Nighter
/

Question_Answering_with_Long_Short_Models

Runtime error

App Files Files Community

Nighter commited on Dec 5, 2023

Commit

38bc3de

•

1 Parent(s): c8770f9

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -43

app.py CHANGED Viewed

@@ -7,9 +7,9 @@ import re
 from tensorflow.keras.models import load_model
 # Load long model
-with open('lstm-qa-long-answers-model/tokenizer.pickle', 'rb') as handle:
-    tokenizer = pickle.load(handle)
-long_answer_model = load_model('lstm-qa-long-answers-model/model.h5')
 def clean_text(text):
     text = re.sub(r'<.*?>', '', text)
@@ -21,49 +21,42 @@ def remove_parentheses(text):
     pattern = r'\([^)]*\)'
     return re.sub(pattern, '', text)
-def split_into_sentences(text):
-    sentences = re.split(r'\.\s*', text)
-    return [s.strip() for s in sentences if s]
-def predict_answer(context, question):
-    sentences = split_into_sentences(context)
-    best_sentence = None
-    best_score = 0
-    for sentence in sentences:
-        clean_sentence = clean_text(sentence)
-        question_seq = tokenizer.texts_to_sequences([question])
-        sentence_seq = tokenizer.texts_to_sequences([clean_sentence])
-        max_sentence_length = 300
-        padded_question = pad_sequences(question_seq, padding='post')
-        padded_sentence = pad_sequences(sentence_seq, maxlen=max_sentence_length, padding='post', truncating='post')
-        score = long_answer_model.predict([padded_sentence, padded_question])[0]
-        if score > best_score:
-            best_score = score
-            best_sentence = clean_sentence
-    return best_score, best_sentence
 # Load short model
-short_answer_model = pipeline(model="Nighter/QA_wiki_data_short_answer", from_tf=True)
 # Function to answer on all models
 def answer_questions(context, question):
-    long_score, long_answer = predict_answer(context, question)
-    # # Check if the original context is longer than 512 tokens
-    # if len(tokenizer.texts_to_sequences([context])[0]) > 512:
-    #     # If yes, use the long answer as the context for the short answer model
-    #     short_context = long_answer
-    # else:
-    #     # If no, use the original context
-    #     short_context = remove_parentheses(context)
-    # short_answer_result = short_answer_model(question=question, context=short_context)
-    short_answer_result = short_answer_model(question=question, context=remove_parentheses(context))
-    return short_answer_result['answer'], short_answer_result['score'], long_answer, long_score
 # App Interface
 with gr.Blocks() as app:
@@ -76,15 +69,15 @@ with gr.Blocks() as app:
             gr.ClearButton([context_input,question_input])
         with gr.Column():
             with gr.Row():
-                with gr.Column(scale=4):
-                    short_answer_output = gr.Textbox(lines=5, label="Distil BERT Short Answer")
-                with gr.Column(scale=1):
-                    short_score_output = gr.Number(label="Short Answer Score")
             with gr.Row():
-                with gr.Column(scale=4):
-                    long_answer_output = gr.Textbox(lines=5, label="LSTM Long Answer")
-                with gr.Column(scale=1):
-                    long_score_output = gr.Number(label="Long Answer Score")
     submit_btn.click(fn=answer_questions, inputs=[context_input, question_input], outputs=[short_answer_output, short_score_output, long_answer_output, long_score_output])
     examples='examples'

 from tensorflow.keras.models import load_model
 # Load long model
+# with open('lstm-qa-long-answers-model/tokenizer.pickle', 'rb') as handle:
+#     tokenizer = pickle.load(handle)
+# long_answer_model = load_model('lstm-qa-long-answers-model/model.h5')
 def clean_text(text):
     text = re.sub(r'<.*?>', '', text)
     pattern = r'\([^)]*\)'
     return re.sub(pattern, '', text)
+# def split_into_sentences(text):
+#     sentences = re.split(r'\.\s*', text)
+#     return [s.strip() for s in sentences if s]
+# def predict_answer(context, question):
+#     sentences = split_into_sentences(context)
+#     best_sentence = None
+#     best_score = 0
+#     for sentence in sentences:
+#         clean_sentence = clean_text(sentence)
+#         question_seq = tokenizer.texts_to_sequences([question])
+#         sentence_seq = tokenizer.texts_to_sequences([clean_sentence])
+#         max_sentence_length = 300
+#         padded_question = pad_sequences(question_seq, padding='post')
+#         padded_sentence = pad_sequences(sentence_seq, maxlen=max_sentence_length, padding='post', truncating='post')
+#         score = long_answer_model.predict([padded_sentence, padded_question])[0]
+#         if score > best_score:
+#             best_score = score
+#             best_sentence = clean_sentence
+#     return best_score, best_sentence
 # Load short model
+distilbert_base_uncased = pipeline(model="Nighter/QA_wiki_data_short_answer", from_tf=True)
+bert_base_uncased  = pipeline(model="Nighter/QA_bert_base_uncased_wiki_data_short_answer", from_tf=True)
 # Function to answer on all models
 def answer_questions(context, question):
+    # long_score, long_answer = predict_answer(context, question)
+    distilbert_base_uncased_result = distilbert_base_uncased(question=question, context=remove_parentheses(context))
+    bert_base_uncased_result =bert_base_uncased(question=question, context=remove_parentheses(context))
+    return distilbert_base_uncased_result['answer'], distilbert_base_uncased_result['score'], bert_base_uncased_result['answer'], bert_base_uncased_result['score'] #, long_answer, long_score
 # App Interface
 with gr.Blocks() as app:
             gr.ClearButton([context_input,question_input])
         with gr.Column():
             with gr.Row():
+                with gr.Column(scale=6):
+                    short_answer_output = gr.Textbox(lines=5, label="Distil BERT Base Uncased")
+                with gr.Column(scale=2):
+                    short_score_output = gr.Number(label="Distil BERT Base Uncased Score")
             with gr.Row():
+                with gr.Column(scale=6):
+                    long_answer_output = gr.Textbox(lines=5, label="BERT Base Uncased")
+                with gr.Column(scale=2):
+                    long_score_output = gr.Number(label="BERT Base Uncased Score")
     submit_btn.click(fn=answer_questions, inputs=[context_input, question_input], outputs=[short_answer_output, short_score_output, long_answer_output, long_score_output])
     examples='examples'