cardiffnlp
/

twitter-xlm-roberta-base-sentiment

@@ -1,19 +1,9 @@
 # twitter-XLM-roBERTa-base for Sentiment Analysis
-TODO: create model card
-This is a roBERTa-base model trained on ~58M tweets and finetuned for sentiment analysis with the TweetEval benchmark.
-- Paper: [_TweetEval_ benchmark (Findings of EMNLP 2020)](https://arxiv.org/pdf/2010.12421.pdf).
-- Git Repo: [Tweeteval official repository](https://github.com/cardiffnlp/tweeteval).
 ## Example of classification
@@ -37,22 +27,17 @@ def preprocess(text):
         new_text.append(t)
     return " ".join(new_text)
-# Tasks:
-# emoji, emotion, hate, irony, offensive, sentiment
-# stance/abortion, stance/atheism, stance/climate, stance/feminist, stance/hillary
-task='sentiment'
-MODEL = f"cardiffnlp/twitter-roberta-base-{task}"
 tokenizer = AutoTokenizer.from_pretrained(MODEL)
 # download label mapping
 labels=[]
-mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/{task}/mapping.txt"
 with urllib.request.urlopen(mapping_link) as f:
-    html = f.read().decode('utf-8').split("\
 ")
-    csvreader = csv.reader(html, delimiter='\\t')
 labels = [row[1] for row in csvreader if len(row) > 1]
 # PT
@@ -88,8 +73,8 @@ for i in range(scores.shape[0]):
 Output:
 ```
-1) positive 0.8466
-2) neutral 0.1458
-3) negative 0.0076
 ```

 # twitter-XLM-roBERTa-base for Sentiment Analysis
+This is a XLM-roBERTa-base model trained on ~198M tweets and finetuned for sentiment analysis in
+- Paper: [XLM-T: A Multilingual Language Model Toolkit for Twitter](https://...).
+- Git Repo: [Tweeteval official repository](https://github.com/cardiffnlp/xlm-t).
 ## Example of classification
         new_text.append(t)
     return " ".join(new_text)
+MODEL = f"cardiffnlp/twitter-xlm-roberta-base-sentiment"
 tokenizer = AutoTokenizer.from_pretrained(MODEL)
 # download label mapping
 labels=[]
+mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/sentiment/mapping.txt"
 with urllib.request.urlopen(mapping_link) as f:
+    html = f.read().decode('utf-8').split("\\
 ")
+    csvreader = csv.reader(html, delimiter='\\\\t')
 labels = [row[1] for row in csvreader if len(row) > 1]
 # PT
 Output:
 ```
+1) positive 0.76726073
+2) neutral 0.201
+3) negative 0.0312
 ```