bconsolvo commited on
Commit
abf3d53
1 Parent(s): a6ed751

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -5
app.py CHANGED
@@ -1,8 +1,8 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
  import time
4
- import neural_compressor
5
- from optimum.intel.neural_compressor import IncQuantizedModelForQuestionAnswering
6
 
7
  # model_id = "Intel/bert-base-uncased-squadv1.1-sparse-80-1x4-block-pruneofa"
8
  # model_id = "Intel/distilbert-base-uncased-squadv1.1-sparse-80-1x4-block-pruneofa-int8"
@@ -40,14 +40,13 @@ def predict(context,question):
40
 
41
  return sparse_answer,sparse_duration #,dense_answer,dense_duration
42
 
43
- md = """
44
- If you came looking for chatGPT, sorry to disappoint, but this is different. This prediction model is designed to answer a question about a given input text. It is designed to do reading comprehension. The model does not just answer questions in general -- it only works from the text that you provide. However, automated reading comprehension can be a valuable task.
45
 
46
  The model is based on the Zafrir et al. (2021) paper: [Prune Once for All: Sparse Pre-Trained Language Models](https://arxiv.org/abs/2111.05754). The model can be found [here](https://huggingface.co/Intel/bert-base-uncased-squadv1.1-sparse-80-1x4-block-pruneofa). It has had weight pruning and model distillation applied to create a sparse weight pattern that is maintained even after fine-tuning has been applied. According to Zafrir et al. (2016), their "results show the best compression-to-accuracy ratio for BERT-Base". This model is still in FP32, but can be quantized to INT8 with the [Intel® Neural Compressor](https://github.com/intel/neural-compressor) for further compression.
47
 
48
  The training dataset used is the English Wikipedia dataset (2500M words), and then fine-tuned on the SQuADv1.1 dataset containing 89K training examples, compiled by Rajpurkar et al. (2016): [100, 000+ Questions for Machine Comprehension of Text](https://arxiv.org/abs/1606.05250).
49
 
50
- Author of Hugging Face Space: Benjamin Consolvo, AI Solutions Engineer Manager at Intel | Date last updated: 01/05/2023
51
  """
52
  # The main idea of this BERT-Base model is that it is much more fast and efficient in deployment than its dense counterpart: (https://huggingface.co/csarron/bert-base-uncased-squad-v1).
53
 
 
1
  import gradio as gr
2
  from transformers import pipeline
3
  import time
4
+ # import neural_compressor
5
+ # from optimum.intel.neural_compressor import IncQuantizedModelForQuestionAnswering
6
 
7
  # model_id = "Intel/bert-base-uncased-squadv1.1-sparse-80-1x4-block-pruneofa"
8
  # model_id = "Intel/distilbert-base-uncased-squadv1.1-sparse-80-1x4-block-pruneofa-int8"
 
40
 
41
  return sparse_answer,sparse_duration #,dense_answer,dense_duration
42
 
43
+ md = """This prediction model is designed to answer a question about a given input text--reading comprehension. The model does not just answer questions in general -- it only works from the text that you provide. However, automated reading comprehension can be a valuable task.
 
44
 
45
  The model is based on the Zafrir et al. (2021) paper: [Prune Once for All: Sparse Pre-Trained Language Models](https://arxiv.org/abs/2111.05754). The model can be found [here](https://huggingface.co/Intel/bert-base-uncased-squadv1.1-sparse-80-1x4-block-pruneofa). It has had weight pruning and model distillation applied to create a sparse weight pattern that is maintained even after fine-tuning has been applied. According to Zafrir et al. (2016), their "results show the best compression-to-accuracy ratio for BERT-Base". This model is still in FP32, but can be quantized to INT8 with the [Intel® Neural Compressor](https://github.com/intel/neural-compressor) for further compression.
46
 
47
  The training dataset used is the English Wikipedia dataset (2500M words), and then fine-tuned on the SQuADv1.1 dataset containing 89K training examples, compiled by Rajpurkar et al. (2016): [100, 000+ Questions for Machine Comprehension of Text](https://arxiv.org/abs/1606.05250).
48
 
49
+ Author of Hugging Face Space: Benjamin Consolvo, AI Solutions Engineer Manager at Intel | Date last updated: 03/28/2023
50
  """
51
  # The main idea of this BERT-Base model is that it is much more fast and efficient in deployment than its dense counterpart: (https://huggingface.co/csarron/bert-base-uncased-squad-v1).
52