nicholasKluge commited on
Commit
4a6ab46
1 Parent(s): c738f51

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +15 -7
README.md CHANGED
@@ -16,11 +16,11 @@ widget:
16
  - text: "<s>Uma mulher está misturando ovos.<s>A mulher está bebendo.</s>"
17
  example_title: Exemplo
18
  ---
19
- # TeenyTinyLlama-460m-Assin2
20
 
21
  TeenyTinyLlama is a pair of small foundational models trained in Brazilian Portuguese.
22
 
23
- This repository contains a version of [TeenyTinyLlama-460m](https://huggingface.co/nicholasKluge/TeenyTinyLlama-460m) (`TeenyTinyLlama-460m-Assin2`) fine-tuned on the [Assin2](https://huggingface.co/datasets/assin2).
24
 
25
  ## Details
26
 
@@ -38,7 +38,7 @@ from transformers import pipeline
38
 
39
  text = "<s>Qual a capital do Brasil?<s>A capital do Brasil é Brasília!</s>"
40
 
41
- classifier = pipeline("text-classification", model="nicholasKluge/TeenyTinyLlama-460m-Assin2")
42
  classifier(text)
43
 
44
  # >>> [{'label': 'ENTAILED', 'score': 0.9392824769020081}]
@@ -63,13 +63,13 @@ dataset = load_dataset("assin2")
63
 
64
  # Create a `ModelForSequenceClassification`
65
  model = AutoModelForSequenceClassification.from_pretrained(
66
- "nicholasKluge/TeenyTinyLlama-460m",
67
  num_labels=2,
68
  id2label={0: "UNENTAILED", 1: "ENTAILED"},
69
  label2id={"UNENTAILED": 0, "ENTAILED": 1}
70
  )
71
 
72
- tokenizer = AutoTokenizer.from_pretrained("nicholasKluge/TeenyTinyLlama-460m")
73
 
74
  # Format the dataset
75
  train = dataset['train'].to_pandas()
@@ -158,7 +158,6 @@ All the shown results are the higher accuracy scores achieved on the respective
158
  ## Cite as 🤗
159
 
160
  ```latex
161
-
162
  @misc{correa24ttllama,
163
  title = {TeenyTinyLlama: open-source tiny language models trained in Brazilian Portuguese},
164
  author = {Corr{\^e}a, Nicholas Kluge and Falk, Sophia and Fatimah, Shiza and Sen, Aniket and De Oliveira, Nythamar},
@@ -166,6 +165,15 @@ All the shown results are the higher accuracy scores achieved on the respective
166
  year={2024}
167
  }
168
 
 
 
 
 
 
 
 
 
 
169
  ```
170
 
171
  ## Funding
@@ -174,4 +182,4 @@ This repository was built as part of the RAIES ([Rede de Inteligência Artificia
174
 
175
  ## License
176
 
177
- TeenyTinyLlama-460m-Assin2 is licensed under the Apache License, Version 2.0. See the [LICENSE](LICENSE) file for more details.
 
16
  - text: "<s>Uma mulher está misturando ovos.<s>A mulher está bebendo.</s>"
17
  example_title: Exemplo
18
  ---
19
+ # TeenyTinyLlama-160m-Assin2
20
 
21
  TeenyTinyLlama is a pair of small foundational models trained in Brazilian Portuguese.
22
 
23
+ This repository contains a version of [TeenyTinyLlama-160m](https://huggingface.co/nicholasKluge/TeenyTinyLlama-160m) (`TeenyTinyLlama-160m-Assin2`) fine-tuned on the [Assin2](https://huggingface.co/datasets/assin2).
24
 
25
  ## Details
26
 
 
38
 
39
  text = "<s>Qual a capital do Brasil?<s>A capital do Brasil é Brasília!</s>"
40
 
41
+ classifier = pipeline("text-classification", model="nicholasKluge/TeenyTinyLlama-160m-Assin2")
42
  classifier(text)
43
 
44
  # >>> [{'label': 'ENTAILED', 'score': 0.9392824769020081}]
 
63
 
64
  # Create a `ModelForSequenceClassification`
65
  model = AutoModelForSequenceClassification.from_pretrained(
66
+ "nicholasKluge/TeenyTinyLlama-160m",
67
  num_labels=2,
68
  id2label={0: "UNENTAILED", 1: "ENTAILED"},
69
  label2id={"UNENTAILED": 0, "ENTAILED": 1}
70
  )
71
 
72
+ tokenizer = AutoTokenizer.from_pretrained("nicholasKluge/TeenyTinyLlama-160m")
73
 
74
  # Format the dataset
75
  train = dataset['train'].to_pandas()
 
158
  ## Cite as 🤗
159
 
160
  ```latex
 
161
  @misc{correa24ttllama,
162
  title = {TeenyTinyLlama: open-source tiny language models trained in Brazilian Portuguese},
163
  author = {Corr{\^e}a, Nicholas Kluge and Falk, Sophia and Fatimah, Shiza and Sen, Aniket and De Oliveira, Nythamar},
 
165
  year={2024}
166
  }
167
 
168
+ @misc{correa24ttllama,
169
+ doi = {10.1016/j.mlwa.2024.100558},
170
+ url = {https://www.sciencedirect.com/science/article/pii/S2666827024000343},
171
+ title = {TeenyTinyLlama: open-source tiny language models trained in Brazilian Portuguese},
172
+ author = {Corr{\^e}a, Nicholas Kluge and Falk, Sophia and Fatimah, Shiza and Sen, Aniket and De Oliveira, Nythamar},
173
+ journal={Machine Learning With Applications},
174
+ publisher = {Springer},
175
+ year={2024}
176
+ }
177
  ```
178
 
179
  ## Funding
 
182
 
183
  ## License
184
 
185
+ TeenyTinyLlama-160m-Assin2 is licensed under the Apache License, Version 2.0. See the [LICENSE](LICENSE) file for more details.