Spaces:

AnnaPalatkina
/

fine_grained_SA

Runtime error

App Files Files Community

AnnaPalatkina commited on Dec 14, 2022

Commit

34051fc

•

1 Parent(s): c0ac237

Upload 6 files

Browse files

Files changed (6) hide show

README.md +16 -4
app.py +36 -0
config.py +10 -0
model_nobert_norec.bin +3 -0
requirements.txt +5 -0
sentiment_wrapper.py +100 -0

README.md CHANGED Viewed

@@ -1,12 +1,24 @@
 ---
-title: Fine Grained SA
-emoji: 🐠
-colorFrom: red
 colorTo: indigo
 sdk: gradio
 sdk_version: 3.13.0
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Norec Norbert2 TEST
+emoji: 🏃
+colorFrom: indigo
 colorTo: indigo
 sdk: gradio
 sdk_version: 3.13.0
 app_file: app.py
 pinned: false
 ---
+<br>
+<br>
+This space provides a gradio demo and an easy-to-run wrapper of the pre-trained model for fine-grained sentiment analysis in Norwegian language, pre-trained on the [NoReC dataset](https://github.com/ltgoslo/norec).
+Information about project you an fine on the website of [University of Oslo](https://www.mn.uio.no/ifi/english/research/projects/sant/)
+The model can be easily used for predicting sentiment as follows:
+```python
+>>> from sentiment_wrapper import PredictionModel
+>>> model = PredictionModel()
+>>> model.predict(['vi liker svart kaffe', 'jeg elsker virkelig røde roser!'])
+[5,5]
+```

app.py ADDED Viewed

	@@ -0,0 +1,36 @@

+from sentiment_wrapper import PredictionModel
+import gradio as gr
+model = PredictionModel()
+def predict(text:str):
+    result = model.predict([text])[0]
+    return f'class: {result}'
+markdown_text = '''
+<br>
+<br>
+This space provides a gradio demo and an easy-to-run wrapper of the pre-trained model for fine-grained sentiment analysis in Norwegian language, pre-trained on the [NoReC dataset](https://huggingface.co/datasets/norec).
+The model can be easily used for predicting sentiment as follows:
+```python
+>>> from sentiment_wrapper import PredictionModel
+>>> model = PredictionModel()
+>>> model.predict(['vi liker svart kaffe'])
+[2]
+```
+'''
+with gr.Blocks() as demo:
+    with gr.Row(equal_height=False) as row:
+        text_input = gr.Textbox(label="input")
+        text_output = gr.Textbox(label="output")
+    with gr.Row(scale=4) as row:
+        text_button = gr.Button("submit").style(full_width=True)
+    text_button.click(fn=predict, inputs=text_input, outputs=text_output)
+    gr.Markdown(markdown_text)
+demo.launch()

config.py ADDED Viewed

	@@ -0,0 +1,10 @@

+params = {
+    'pretrained_model_name': 'ltgoslo/norbert2',
+    'path_to_model_bin': 'model_nobert_norec.bin',
+    'LR': 1e-05,
+    'dropout': 0.4,
+    'warmup': 2,
+    'epochs': 10,
+    'max_length': 512,
+    'batch_size': 4,
+}

model_nobert_norec.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:470395ae27da50eb2291c61cb7d6518aaa2f50fb92279d24fb85ca2f373fc503
+size 498185517

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+transformers
+torch
+scikit-learn
+pandas
+numpy

sentiment_wrapper.py ADDED Viewed

	@@ -0,0 +1,100 @@

+from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup
+from sklearn.metrics import  classification_report, f1_score
+from torch.utils.data import Dataset, DataLoader
+from tqdm.auto import tqdm
+from config import params
+from torch import nn
+import pandas as pd
+import numpy as np
+import warnings
+import random
+import torch
+import os
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+class Dataset(Dataset):
+  def __init__(self, texts, max_len):
+    self.texts = texts
+    self.tokenizer = BertTokenizer.from_pretrained(params['pretrained_model_name'])
+    self.max_len = max_len
+  def __len__(self):
+    return len(self.texts)
+  def __getitem__(self, item):
+    text = str(self.texts[item])
+    encoding = self.tokenizer.encode_plus(
+      text,
+      add_special_tokens=True,
+      max_length=self.max_len,
+      return_token_type_ids=False,
+      pad_to_max_length=True,
+      return_attention_mask=True,
+      truncation=True,
+      return_tensors='pt',
+    )
+    return {
+      'text': text,
+      'input_ids': encoding['input_ids'].flatten(),
+      'attention_mask': encoding['attention_mask'].flatten(),
+    }
+class SentimentClassifier(nn.Module):
+  def __init__(self, n_classes):
+    super(SentimentClassifier, self).__init__()
+    self.bert = BertModel.from_pretrained(params['pretrained_model_name'])
+    self.drop = nn.Dropout(params['dropout'])
+    self.out = nn.Linear(self.bert.config.hidden_size, n_classes)
+  def forward(self, input_ids, attention_mask):
+    bert_output = self.bert(
+      input_ids=input_ids,
+      attention_mask=attention_mask,
+      return_dict=False
+    )
+    last_hidden_state, pooled_output = bert_output
+    output = self.drop(pooled_output)
+    return self.out(output)
+class PredictionModel:
+    def __init__(self):
+        self.model = SentimentClassifier(n_classes = 6)
+        self.loss_fn = nn.CrossEntropyLoss().to(device)
+    def create_data_loader(self, X_test, max_len, batch_size):
+        ds = Dataset(
+            texts= np.array(X_test),
+            max_len=max_len
+        )
+        return DataLoader(
+            ds,
+            batch_size=batch_size
+        )
+    def predict(self, X_test: list):
+        data_loader = self.create_data_loader(X_test, params['max_length'], params['batch_size'])
+        self.model.load_state_dict(torch.load(params['path_to_model_bin']))
+        self.model.eval()
+        losses = []
+        y_pred = []
+        with torch.no_grad():
+            for d in data_loader:
+                input_ids = d["input_ids"].to(device)
+                attention_mask = d["attention_mask"].to(device)
+                outputs = self.model(
+                    input_ids=input_ids,
+                    attention_mask=attention_mask
+                )
+                _, preds = torch.max(outputs, dim=1)
+                y_pred += preds.tolist()
+        return y_pred