AnnaPalatkina commited on
Commit
34051fc
1 Parent(s): c0ac237

Upload 6 files

Browse files
Files changed (6) hide show
  1. README.md +16 -4
  2. app.py +36 -0
  3. config.py +10 -0
  4. model_nobert_norec.bin +3 -0
  5. requirements.txt +5 -0
  6. sentiment_wrapper.py +100 -0
README.md CHANGED
@@ -1,12 +1,24 @@
1
  ---
2
- title: Fine Grained SA
3
- emoji: 🐠
4
- colorFrom: red
5
  colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 3.13.0
8
  app_file: app.py
9
  pinned: false
10
  ---
 
 
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Norec Norbert2 TEST
3
+ emoji: 🏃
4
+ colorFrom: indigo
5
  colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 3.13.0
8
  app_file: app.py
9
  pinned: false
10
  ---
11
+ <br>
12
+ <br>
13
 
14
+ This space provides a gradio demo and an easy-to-run wrapper of the pre-trained model for fine-grained sentiment analysis in Norwegian language, pre-trained on the [NoReC dataset](https://github.com/ltgoslo/norec).
15
+
16
+ Information about project you an fine on the website of [University of Oslo](https://www.mn.uio.no/ifi/english/research/projects/sant/)
17
+
18
+ The model can be easily used for predicting sentiment as follows:
19
+ ```python
20
+ >>> from sentiment_wrapper import PredictionModel
21
+ >>> model = PredictionModel()
22
+ >>> model.predict(['vi liker svart kaffe', 'jeg elsker virkelig røde roser!'])
23
+ [5,5]
24
+ ```
app.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentiment_wrapper import PredictionModel
2
+ import gradio as gr
3
+
4
+ model = PredictionModel()
5
+
6
+
7
+ def predict(text:str):
8
+ result = model.predict([text])[0]
9
+ return f'class: {result}'
10
+
11
+ markdown_text = '''
12
+ <br>
13
+ <br>
14
+ This space provides a gradio demo and an easy-to-run wrapper of the pre-trained model for fine-grained sentiment analysis in Norwegian language, pre-trained on the [NoReC dataset](https://huggingface.co/datasets/norec).
15
+
16
+ The model can be easily used for predicting sentiment as follows:
17
+ ```python
18
+ >>> from sentiment_wrapper import PredictionModel
19
+ >>> model = PredictionModel()
20
+ >>> model.predict(['vi liker svart kaffe'])
21
+ [2]
22
+ ```
23
+ '''
24
+
25
+ with gr.Blocks() as demo:
26
+ with gr.Row(equal_height=False) as row:
27
+ text_input = gr.Textbox(label="input")
28
+ text_output = gr.Textbox(label="output")
29
+ with gr.Row(scale=4) as row:
30
+ text_button = gr.Button("submit").style(full_width=True)
31
+
32
+ text_button.click(fn=predict, inputs=text_input, outputs=text_output)
33
+ gr.Markdown(markdown_text)
34
+
35
+
36
+ demo.launch()
config.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ params = {
2
+ 'pretrained_model_name': 'ltgoslo/norbert2',
3
+ 'path_to_model_bin': 'model_nobert_norec.bin',
4
+ 'LR': 1e-05,
5
+ 'dropout': 0.4,
6
+ 'warmup': 2,
7
+ 'epochs': 10,
8
+ 'max_length': 512,
9
+ 'batch_size': 4,
10
+ }
model_nobert_norec.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:470395ae27da50eb2291c61cb7d6518aaa2f50fb92279d24fb85ca2f373fc503
3
+ size 498185517
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ scikit-learn
4
+ pandas
5
+ numpy
sentiment_wrapper.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup
2
+ from sklearn.metrics import classification_report, f1_score
3
+ from torch.utils.data import Dataset, DataLoader
4
+ from tqdm.auto import tqdm
5
+ from config import params
6
+ from torch import nn
7
+ import pandas as pd
8
+ import numpy as np
9
+ import warnings
10
+ import random
11
+ import torch
12
+ import os
13
+
14
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
15
+
16
+
17
+ class Dataset(Dataset):
18
+ def __init__(self, texts, max_len):
19
+ self.texts = texts
20
+ self.tokenizer = BertTokenizer.from_pretrained(params['pretrained_model_name'])
21
+ self.max_len = max_len
22
+
23
+ def __len__(self):
24
+ return len(self.texts)
25
+
26
+ def __getitem__(self, item):
27
+ text = str(self.texts[item])
28
+ encoding = self.tokenizer.encode_plus(
29
+ text,
30
+ add_special_tokens=True,
31
+ max_length=self.max_len,
32
+ return_token_type_ids=False,
33
+ pad_to_max_length=True,
34
+ return_attention_mask=True,
35
+ truncation=True,
36
+ return_tensors='pt',
37
+ )
38
+
39
+ return {
40
+ 'text': text,
41
+ 'input_ids': encoding['input_ids'].flatten(),
42
+ 'attention_mask': encoding['attention_mask'].flatten(),
43
+ }
44
+
45
+ class SentimentClassifier(nn.Module):
46
+
47
+ def __init__(self, n_classes):
48
+ super(SentimentClassifier, self).__init__()
49
+ self.bert = BertModel.from_pretrained(params['pretrained_model_name'])
50
+ self.drop = nn.Dropout(params['dropout'])
51
+ self.out = nn.Linear(self.bert.config.hidden_size, n_classes)
52
+
53
+ def forward(self, input_ids, attention_mask):
54
+
55
+ bert_output = self.bert(
56
+ input_ids=input_ids,
57
+ attention_mask=attention_mask,
58
+ return_dict=False
59
+ )
60
+ last_hidden_state, pooled_output = bert_output
61
+ output = self.drop(pooled_output)
62
+ return self.out(output)
63
+
64
+
65
+ class PredictionModel:
66
+
67
+ def __init__(self):
68
+ self.model = SentimentClassifier(n_classes = 6)
69
+ self.loss_fn = nn.CrossEntropyLoss().to(device)
70
+
71
+ def create_data_loader(self, X_test, max_len, batch_size):
72
+ ds = Dataset(
73
+ texts= np.array(X_test),
74
+ max_len=max_len
75
+ )
76
+ return DataLoader(
77
+ ds,
78
+ batch_size=batch_size
79
+ )
80
+
81
+ def predict(self, X_test: list):
82
+
83
+ data_loader = self.create_data_loader(X_test, params['max_length'], params['batch_size'])
84
+ self.model.load_state_dict(torch.load(params['path_to_model_bin']))
85
+ self.model.eval()
86
+ losses = []
87
+ y_pred = []
88
+ with torch.no_grad():
89
+ for d in data_loader:
90
+ input_ids = d["input_ids"].to(device)
91
+ attention_mask = d["attention_mask"].to(device)
92
+ outputs = self.model(
93
+ input_ids=input_ids,
94
+ attention_mask=attention_mask
95
+ )
96
+ _, preds = torch.max(outputs, dim=1)
97
+ y_pred += preds.tolist()
98
+ return y_pred
99
+
100
+