nlp-group
/

gradio_bert

Model card Files Files and versions Community

jrocha commited on Apr 14

Commit

5499fd6

•

1 Parent(s): b942bef

Rename gradio_sindi.py to gradio_bert.py

Browse files

Files changed (1) hide show

gradio_sindi.py → gradio_bert.py +52 -11

gradio_sindi.py → gradio_bert.py RENAMED Viewed

@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-"""gradio_sindi.ipynb
 Automatically generated by Colab.
@@ -9,11 +9,6 @@ Original file is located at
 # libraries
 """
-!pip install gradio>=4.13.0
-!pip install accelerate
-!pip install transformers>=4.34
 import gradio as gr
 import torch
@@ -31,7 +26,22 @@ splitted_df = pd.read_csv('/content/splitted_df_jo.csv')
 """# getting context"""
-def remove_symbols(text):
     remove_list = ['/', '(', ')', '\n', '.']
     remove_chars = "".join(remove_list)
     cleaned_text = "".join([char for char in text if char not in remove_chars])
@@ -42,7 +52,23 @@ def remove_symbols(text):
     return filtered_text
-def context_func(message):
   # Create a TF-IDF vectorizer
   vectorizer = TfidfVectorizer()
@@ -69,11 +95,26 @@ def context_func(message):
 tokenizer = AutoTokenizer.from_pretrained("nlp-group/sindi-bert-final")
 model = AutoModelForQuestionAnswering.from_pretrained("nlp-group/sindi-bert-final")
-def answer_question(question):
     context = context_func(question)
     # Tokenize the inputs
     inputs = tokenizer(question, context, return_tensors="pt", max_length=512, truncation=True)
     # Get the answer from the model
     outputs = model(**inputs)
     answer_start_scores = outputs.start_logits
@@ -81,7 +122,7 @@ def answer_question(question):
     answer_start = torch.argmax(answer_start_scores)
     answer_end = torch.argmax(answer_end_scores) + 1
     answer = tokenizer.decode(inputs["input_ids"][0][answer_start:answer_end])
     return answer, context
 iface = gr.Interface(fn=answer_question,

 # -*- coding: utf-8 -*-
+"""gradio_bert.ipynb
 Automatically generated by Colab.
 # libraries
 """
 import gradio as gr
 import torch
 """# getting context"""
+def remove_symbols(text: str)-> str:
+   """
+    Removes specified symbols and non-ASCII characters from the input text.
+    Args:
+        text (str): The input text to be cleaned.
+    Returns:
+        str: The cleaned text with specified symbols and non-ASCII characters removed.
+    Example:
+        >>> text = "This is a test string / with (some) symbols.\nAnd some non-ASCII characters like é and ñ."
+        >>> clean_text = remove_symbols(text)
+        >>> print(clean_text)
+        This is a test string  with some symbols.And some non-ASCII characters like  and .
+    """
     remove_list = ['/', '(', ')', '\n', '.']
     remove_chars = "".join(remove_list)
     cleaned_text = "".join([char for char in text if char not in remove_chars])
     return filtered_text
+def context_func(message: str)-> str:
+  """
+    Finds the most similar context from a collection of texts based on TF-IDF vectorization and cosine similarity.
+    Args:
+        message (str): The input message or question.
+    Returns:
+        str: The most similar context to the input message from the collection of texts.
+    Example:
+        >>> message = "What are the symptoms of breast cancer?"
+        >>> similar_context = context_func(message)
+        >>> print(similar_context)
+        Breast cancer is the most common cancer among women worldwide...
+    """
   # Create a TF-IDF vectorizer
   vectorizer = TfidfVectorizer()
 tokenizer = AutoTokenizer.from_pretrained("nlp-group/sindi-bert-final")
 model = AutoModelForQuestionAnswering.from_pretrained("nlp-group/sindi-bert-final")
+def answer_question(question: str)-> str, str:
+  """
+    Generates an answer to the input question based on the provided context.
+    Args:
+        question (str): The input question.
+    Returns:
+        tuple: A tuple containing the generated answer and the context used for answering.
+    Example:
+        >>> question = "What is the capital of France?"
+        >>> answer, context = answer_question(question)
+        >>> print("Answer:", answer)
+        >>> print("Context:", context)
+    """
     context = context_func(question)
     # Tokenize the inputs
     inputs = tokenizer(question, context, return_tensors="pt", max_length=512, truncation=True)
     # Get the answer from the model
     outputs = model(**inputs)
     answer_start_scores = outputs.start_logits
     answer_start = torch.argmax(answer_start_scores)
     answer_end = torch.argmax(answer_end_scores) + 1
     answer = tokenizer.decode(inputs["input_ids"][0][answer_start:answer_end])
     return answer, context
 iface = gr.Interface(fn=answer_question,