lkurakht commited on
Commit
4c51699
1 Parent(s): 24f55e3
Files changed (3) hide show
  1. app.py +50 -0
  2. requirements.txt +4 -1
  3. test_inference.py +35 -0
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ import numpy as np
4
+ import json
5
+ import typing as tp
6
+
7
+ import torch.nn.functional as F
8
+ from torch import Tensor
9
+
10
+ from datasets import ClassLabel
11
+ import transformers
12
+ from transformers import BertForSequenceClassification
13
+ from transformers import BertForSequenceClassification, AutoTokenizer
14
+
15
+ st.markdown("## Portuguese European and Brazilian dialect classifier")
16
+ st.markdown("[You can see the difference between dialects here](https://en.wikipedia.org/wiki/Portuguese_language#Writing_system)")
17
+ text = st.text_input('## Text:')
18
+
19
+ tokenizer = AutoTokenizer.from_pretrained('adalbertojunior/distilbert-portuguese-cased', do_lower_case=False)
20
+
21
+ classes = ['pt', 'pt_br']
22
+
23
+ class_label = ClassLabel(names=classes)
24
+
25
+
26
+ @st.cache_data
27
+ def get_model():
28
+ return BertForSequenceClassification.from_pretrained(
29
+ './pt_br_model',
30
+ num_labels = 2,
31
+ output_attentions = False,
32
+ output_hidden_states = False,
33
+ )
34
+
35
+
36
+ model = get_model()
37
+
38
+
39
+ @torch.inference_mode()
40
+ def print_results():
41
+ input_tensor = tokenizer(text, padding=True, truncation=True, max_length=256, add_special_tokens=True, return_tensors="pt")
42
+ logits = model(**input_tensor).logits
43
+ probabilities = F.softmax(logits, dim=1).flatten().tolist()
44
+ maxidx = np.argmax(probabilities)
45
+ results = f"### {classes[maxidx]} score: {probabilities[maxidx]*100}%"
46
+ st.markdown('## Results:')
47
+ st.markdown(results)
48
+
49
+ if text:
50
+ print_results()
requirements.txt CHANGED
@@ -1,2 +1,5 @@
1
  torch
2
- transformers
 
 
 
 
1
  torch
2
+ transformers
3
+ streamlit
4
+ numpy
5
+ datasets
test_inference.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ import json
4
+ import typing as tp
5
+
6
+ import torch.nn.functional as F
7
+ from torch import Tensor
8
+
9
+ from datasets import ClassLabel
10
+ import transformers
11
+ from transformers import BertForSequenceClassification
12
+ from transformers import BertForSequenceClassification, AutoTokenizer
13
+ import numpy as np
14
+
15
+ tokenizer = AutoTokenizer.from_pretrained('adalbertojunior/distilbert-portuguese-cased', do_lower_case=False)
16
+
17
+ classes = ['pt','pt_br']
18
+ class_label = ClassLabel(names=classes)
19
+
20
+ def get_model():
21
+ return BertForSequenceClassification.from_pretrained(
22
+ './pt_br_model',
23
+ num_labels = 2,
24
+ output_attentions = False,
25
+ output_hidden_states = False,
26
+ )
27
+
28
+ model = get_model()
29
+ text = 'hello'
30
+ input_tensor = tokenizer(text, padding=True, truncation=True, max_length=256, add_special_tokens=True, return_tensors="pt")
31
+
32
+ logits=model(**input_tensor).logits
33
+ probabilities = F.softmax(logits, dim=1).flatten().tolist()
34
+ maxidx = np.argmax(probabilities)
35
+ print(classes[maxidx], probabilities[maxidx])