Anvil-ML commited on
Commit
b04dab8
·
1 Parent(s): ccf66b3

added padding and truncation to tokenizer

Browse files
Files changed (1) hide show
  1. app.py +6 -8
app.py CHANGED
@@ -1,5 +1,9 @@
1
  import gradio as gr
2
-
 
 
 
 
3
 
4
  def interpret_pred(pred):
5
  low_bond = -6.748472
@@ -30,15 +34,9 @@ def interpret_pred_with_sensibility(pred):
30
 
31
 
32
  def main(text_sentence):
33
- import torch
34
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
35
- from transformers import Trainer, TrainingArguments, EarlyStoppingCallback
36
-
37
- barthez_tokenizer = AutoTokenizer.from_pretrained("moussaKam/barthez")
38
- model = AutoModelForSequenceClassification.from_pretrained("Anvil-ML/detecteur-ia")
39
 
40
  input_ids = torch.tensor(
41
- [barthez_tokenizer.encode(text_sentence, add_special_tokens=True)]
42
  )
43
 
44
  predict = model.forward(input_ids)[0]
 
1
  import gradio as gr
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
+ from transformers import Trainer, TrainingArguments, EarlyStoppingCallback
5
+ barthez_tokenizer = AutoTokenizer.from_pretrained("moussaKam/barthez")
6
+ model = AutoModelForSequenceClassification.from_pretrained("Anvil-ML/detecteur-ia")
7
 
8
  def interpret_pred(pred):
9
  low_bond = -6.748472
 
34
 
35
 
36
  def main(text_sentence):
 
 
 
 
 
 
37
 
38
  input_ids = torch.tensor(
39
+ [barthez_tokenizer.encode(text_sentence, truncation=True, padding=True, add_special_tokens=True)]
40
  )
41
 
42
  predict = model.forward(input_ids)[0]