Spaces:
Running
on
Zero
Running
on
Zero
File size: 20,169 Bytes
8f59407 f9b8716 8f59407 ee7626a 8f59407 ba2a3ae 7d6f3d4 3255b79 ba2a3ae 3255b79 ba2a3ae 9d25f83 ba2a3ae 8f59407 6c19101 8f59407 38c66c7 123980d 38dd92a ba01e7c 09834f8 ddb7329 38c66c7 0325d36 ddb7329 453f168 ca9e330 c629a2b ddb7329 38c66c7 ca9e330 09834f8 c629a2b f9b8716 0325d36 f9b8716 ca9e330 6c19101 f9b8716 ca68a3c 09834f8 c629a2b ca68a3c 09834f8 c629a2b ca68a3c f9b8716 1469e49 09834f8 c629a2b 1469e49 f9b8716 c629a2b 1469e49 f9b8716 496bd7a f9b8716 0325d36 496bd7a f9b8716 0325d36 496bd7a f9b8716 c93dad0 6805c60 8f59407 cce4feb 123980d 8f59407 1469e49 8f59407 a26ce70 b42a3ee 95a7d48 0598d8e a26ce70 26bcd5f aab3062 a26ce70 123980d 5daac23 7d6f3d4 f08b866 ba2a3ae 8c5ac11 5daac23 ba2a3ae 0325d36 5c594a5 a26ce70 123980d 272ce33 978d584 123980d 0325d36 38c66c7 0325d36 38c66c7 0325d36 38c66c7 0325d36 09834f8 ecffc39 0325d36 ca68a3c ecffc39 0325d36 ca68a3c 09834f8 ecffc39 0325d36 1469e49 ecffc39 c629a2b 1469e49 09834f8 ecffc39 0325d36 496bd7a e3def2b 0325d36 c629a2b 496bd7a f9b8716 09834f8 f9b8716 ecffc39 0325d36 6805c60 ecffc39 c629a2b 6805c60 3255b79 cce4feb 5daac23 95a7d48 5daac23 95a7d48 5daac23 95a7d48 5daac23 3255b79 8f59407 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 |
import gradio as gr
import spaces
from transformers import pipeline
import torch
DESCRIPTION="""
### a Turkish encoder-decoder language model
Welcome to our Huggingface space, where you can explore the capabilities of TURNA.
**Key Features of TURNA:**
- **Powerful Architecture:** TURNA contains 1.1B parameters, and was pre-trained with an encoder-decoder architecture following the UL2 framework on 43B tokens from various domains.
- **Diverse Training Data:** Our model is trained on a varied dataset of 43 billion tokens, covering a wide array of domains.
- **Broad Applications:** TURNA is fine-tuned for a variety of generation and understanding tasks, including:
- Summarization
- Paraphrasing
- News title generation
- Sentiment classification
- Text categorization
- Named entity recognition
- Part-of-speech tagging
- Semantic textual similarity
- Natural language inference
**Note:** First inference might take time as the models are downloaded on-the-go.
*TURNA can generate toxic content or provide erroneous information. Double-check before usage.*
"""
CITATION = """
Refer to our [paper](https://arxiv.org/abs/2401.14373) for more details.
### Citation
```bibtex
@misc{uludogan2024turna,
title={TURNA: A Turkish Encoder-Decoder Language Model for Enhanced Understanding and Generation},
author={Gökçe Uludoğan and Zeynep Yirmibeşoğlu Balal and Furkan Akkurt and Melikşah Türker and Onur Güngör and Susan Üsküdarlı},
year={2024},
eprint={2401.14373},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
```
"""
sentiment_example = [["Bu üründen çok memnun kaldım."]]
long_text = [["Eyfel Kulesi (Fransızca: La tour Eiffel [la tuʀ ɛˈfɛl]), Paris'teki demir kule. Kule, aynı zamanda tüm dünyada Fransa'nın sembolü halini almıştır. İsmini, inşa ettiren Fransız inşaat mühendisi Gustave Eiffel'den alır.[1] En büyük turizm cazibelerinden biri olan Eyfel Kulesi, yılda 6 milyon turist çeker. 2002 yılında toplam ziyaretçi sayısı 200 milyona ulaşmıştır."], ["Kalp krizi geçirenlerin yaklaşık üçte birinin kısa bir süre önce grip atlattığı düşünülüyor. Peki grip virüsü ne yapıyor da kalp krizine yol açıyor? Karpuz şöyle açıkladı: Grip virüsü kanın yapışkanlığını veya pıhtılaşmasını artırıyor."]]
ner_example = [["Benim adım Turna."]]
t2t_example = [["Paraphrase: Bu üründen çok memnun kaldım."]]
nli_example = [["Bunu çok beğendim.", "Bunu çok sevdim."]]
text_category_example = [[" anadolu_efes e 18 lik star ! beko_basketbol_ligi nde iddialı bir kadroyla sezona giren anadolu_efes transfer harekatına devam ediyor"]]
@spaces.GPU
def nli(first_input, second_input, model_choice="turna_nli_nli_tr"):
if model_choice=="turna_nli_nli_tr":
input = f"hipotez: {first_input} önerme: {second_input}"
nli_model = pipeline(model="boun-tabi-LMG/turna_nli_nli_tr", device=0)
return nli_model(input)[0]["generated_text"]
else:
input = f"ilk cümle: {first_input} ikinci cümle: {second_input}"
stsb_model = pipeline(model="boun-tabi-LMG/turna_semantic_similarity_stsb_tr", device=0)
return stsb_model(input)[0]["generated_text"]
@spaces.GPU
def sentiment_analysis(input, model_choice="turna_classification_17bintweet_sentiment"):
sentiment_model = pipeline(model=f"boun-tabi-LMG/{model_choice}", device=0)
return sentiment_model(input, max_new_tokens = 4)[0]["generated_text"]
@spaces.GPU
def pos(input, model_choice, max_new_tokens, length_penalty, no_repeat_ngram_size):
if model_choice=="turna_pos_imst":
pos_imst = pipeline(model="boun-tabi-LMG/turna_pos_imst", device=0)
return pos_imst(input, max_new_tokens = max_new_tokens, length_penalty=length_penalty, no_repeat_ngram_size=no_repeat_ngram_size)[0]["generated_text"]
else:
pos_boun = pipeline(model="boun-tabi-LMG/turna_pos_boun", device=0)
return pos_boun(input, max_new_tokens = max_new_tokens, length_penalty=length_penalty, no_repeat_ngram_size=no_repeat_ngram_size)[0]["generated_text"]
@spaces.GPU
def ner(input, model_choice, max_new_tokens, length_penalty, no_repeat_ngram_size):
if model_choice=="turna_ner_wikiann":
ner_wikiann = pipeline(model="boun-tabi-LMG/turna_ner_wikiann", device=0)
return ner_wikiann(input, max_new_tokens = max_new_tokens, length_penalty=length_penalty, no_repeat_ngram_size=no_repeat_ngram_size)[0]["generated_text"]
else:
ner_model = pipeline(model="boun-tabi-LMG/turna_ner_milliyet", device=0)
return ner_model(input, max_new_tokens = max_new_tokens, length_penalty=length_penalty, no_repeat_ngram_size=no_repeat_ngram_size)[0]["generated_text"]
@spaces.GPU
def paraphrase(input, model_choice, max_new_tokens):
if model_choice=="turna_paraphrasing_tatoeba":
paraphrasing = pipeline(model="boun-tabi-LMG/turna_paraphrasing_tatoeba", device=0)
return paraphrasing(input, max_new_tokens = max_new_tokens)[0]["generated_text"]
else:
paraphrasing_sub = pipeline(model="boun-tabi-LMG/turna_paraphrasing_opensubtitles", device=0)
return paraphrasing_sub(input, max_new_tokens = max_new_tokens)[0]["generated_text"]
@spaces.GPU
def summarize(input, model_choice, max_new_tokens, length_penalty, no_repeat_ngram_size):
model_mapping = {"turna_summarization_tr_news": "boun-tabi-LMG/turna_summarization_tr_news",
"turna_summarization_mlsum": "boun-tabi-LMG/turna_summarization_mlsum"}
summarization_model = pipeline(model=model_mapping[model_choice], device=0)
return summarization_model(input, max_new_tokens = max_new_tokens, length_penalty=length_penalty, no_repeat_ngram_size=no_repeat_ngram_size)[0]["generated_text"]
@spaces.GPU
def generate_title(input, model_choice, max_new_tokens, length_penalty, no_repeat_ngram_size):
model_mapping = {"turna_title_generation_tr_news": "boun-tabi-LMG/turna_title_generation_tr_news",
"turna_title_generation_mlsum": "boun-tabi-LMG/turna_title_generation_mlsum"}
summarization_model = pipeline(model=model_mapping[model_choice], device=0)
return summarization_model(input, max_new_tokens = max_new_tokens, length_penalty=length_penalty, no_repeat_ngram_size=no_repeat_ngram_size)[0]["generated_text"]
@spaces.GPU
def categorize(input):
ttc = pipeline(model="boun-tabi-LMG/turna_classification_ttc4900", device=0)
return ttc(input, max_new_tokens = 8)[0]["generated_text"]
@spaces.GPU
def turna(input, max_new_tokens, length_penalty,
top_k, top_p, temp, num_beams,
do_sample, no_repeat_ngram_size, repetition_penalty, turna_model_version):
turna = pipeline(model=f"boun-tabi-LMG/{turna_model_version}", device=0)
input = f"[S2S] {input}<EOS>"
return turna(input, max_new_tokens = max_new_tokens, length_penalty=length_penalty,
top_k=top_k, top_p=top_p, temperature=temp, num_beams=num_beams,
do_sample = do_sample, no_repeat_ngram_size=no_repeat_ngram_size, repetition_penalty=repetition_penalty)[0]["generated_text"]
with gr.Blocks(theme="abidlabs/Lime") as demo:
gr.Markdown("# TURNA")
gr.Image("images/turna-logo.png", width=100, show_label=False, show_download_button=False, show_share_button=False)
with gr.Tab("TURNA"):
gr.Markdown(DESCRIPTION)
with gr.Tab("Sentiment Analysis"):
gr.Markdown("TURNA fine-tuned on sentiment analysis. Enter text to analyse sentiment and pick the model (tweets or product reviews).")
with gr.Column():
with gr.Row():
with gr.Column():
sentiment_choice = gr.Radio(choices = ["turna_classification_17bintweet_sentiment", "turna_classification_tr_product_reviews"], label ="Model", value="turna_classification_17bintweet_sentiment")
sentiment_input = gr.Textbox(label="Sentiment Analysis Input")
sentiment_submit = gr.Button()
sentiment_output = gr.Textbox(label="Sentiment Analysis Output")
sentiment_submit.click(sentiment_analysis, inputs=[sentiment_input, sentiment_choice], outputs=sentiment_output)
sentiment_examples = gr.Examples(examples = sentiment_example, inputs = [sentiment_input, sentiment_choice], outputs=sentiment_output, fn=sentiment_analysis)
with gr.Tab("Text Categorization"):
gr.Markdown("TURNA fine-tuned on text categorization. Enter text to categorize text or try the example.")
with gr.Column():
with gr.Row():
with gr.Column():
text_input = gr.Textbox(label="Text Categorization Input")
text_submit = gr.Button()
text_output = gr.Textbox(label="Text Categorization Output")
text_submit.click(categorize, inputs=[text_input], outputs=text_output)
text_examples = gr.Examples(examples = text_category_example,inputs=[text_input], outputs=text_output, fn=categorize)
with gr.Tab("NLI & STS"):
gr.Markdown("TURNA fine-tuned on natural language inference or semantic textual similarity. Enter text to infer entailment or measure semantic similarity. ")
with gr.Column():
with gr.Row():
with gr.Column():
nli_choice = gr.Radio(choices = ["turna_nli_nli_tr", "turna_semantic_similarity_stsb_tr"], label ="Model", value="turna_nli_nli_tr")
nli_first_input = gr.Textbox(label="First Sentence")
nli_second_input = gr.Textbox(label="Second Sentence")
nli_submit = gr.Button()
nli_output = gr.Textbox(label="NLI Output")
nli_submit.click(nli, inputs=[nli_first_input, nli_second_input, nli_choice], outputs=nli_output)
nli_examples = gr.Examples(examples = nli_example, inputs = [nli_first_input, nli_second_input, nli_choice], outputs=nli_output, fn=nli)
with gr.Tab("POS"):
gr.Markdown("TURNA fine-tuned on part-of-speech-tagging. Enter text to parse parts of speech and pick the model.")
with gr.Column():
with gr.Row():
with gr.Column():
pos_choice = gr.Radio(choices = ["turna_pos_imst", "turna_pos_boun"], label ="Model", value="turna_pos_imst")
with gr.Accordion("Advanced Generation Parameters"):
max_new_tokens = gr.Slider(label = "Maximum length",
minimum = 0,
maximum = 64,
value = 64)
length_penalty = gr.Slider(label = "Length penalty",
minimum = -10,
maximum = 10,
value=2.0)
no_repeat_ngram_size =gr.Slider(label="No Repeat N-Gram Size", minimum=0,value=3,)
with gr.Column():
pos_input = gr.Textbox(label="POS Input")
pos_submit = gr.Button()
pos_output = gr.Textbox(label="POS Output")
pos_submit.click(pos, inputs=[pos_input, pos_choice, max_new_tokens, length_penalty, no_repeat_ngram_size], outputs=pos_output)
pos_examples = gr.Examples(examples = ner_example, inputs = [pos_input, pos_choice, max_new_tokens, length_penalty, no_repeat_ngram_size], outputs=pos_output, fn=pos)
with gr.Tab("NER"):
gr.Markdown("TURNA fine-tuned on named entity recognition. Enter text to parse named entities and pick the model.")
with gr.Column():
with gr.Row():
with gr.Column():
ner_choice = gr.Radio(choices = ["turna_ner_wikiann", "turna_ner_milliyet"], label ="Model", value="turna_ner_wikiann")
with gr.Accordion("Advanced Generation Parameters"):
max_new_tokens = gr.Slider(label = "Maximum length",
minimum = 0,
maximum = 64,
value = 64)
length_penalty = gr.Slider(label = "Length penalty",
minimum = -10,
maximum = 10,
value=2.0)
no_repeat_ngram_size =gr.Slider(label="No Repeat N-Gram Size", minimum=0,value=3,)
with gr.Column():
ner_input = gr.Textbox(label="NER Input")
ner_submit = gr.Button()
ner_output = gr.Textbox(label="NER Output")
ner_submit.click(ner, inputs=[ner_input, ner_choice, max_new_tokens, length_penalty, no_repeat_ngram_size], outputs=ner_output)
ner_examples = gr.Examples(examples = ner_example, inputs = [ner_input, ner_choice, max_new_tokens, length_penalty, no_repeat_ngram_size], outputs=ner_output, fn=ner)
with gr.Tab("Paraphrase"):
gr.Markdown("TURNA fine-tuned on paraphrasing. Enter text to paraphrase and pick the model.")
with gr.Column():
with gr.Row():
with gr.Column():
paraphrasing_choice = gr.Radio(choices = ["turna_paraphrasing_tatoeba", "turna_paraphrasing_opensubtitles"], label ="Model", value="turna_paraphrasing_tatoeba")
with gr.Accordion("Advanced Generation Parameters"):
max_new_tokens = gr.Slider(label = "Maximum length",
minimum = 0,
maximum = 20,
value = 20)
with gr.Column():
paraphrasing_input = gr.Textbox(label = "Paraphrasing Input")
paraphrasing_submit = gr.Button()
paraphrasing_output = gr.Text(label="Paraphrasing Output")
paraphrasing_submit.click(paraphrase, inputs=[paraphrasing_input, paraphrasing_choice, max_new_tokens], outputs=paraphrasing_output)
paraphrase_examples = gr.Examples(examples = long_text, inputs = [paraphrasing_input, paraphrasing_choice, max_new_tokens], outputs=paraphrasing_output, fn=paraphrase)
with gr.Tab("Summarization"):
gr.Markdown("TURNA fine-tuned on summarization. Enter text to summarize and pick the model.")
with gr.Column():
with gr.Row():
with gr.Column():
sum_choice = gr.Radio(choices = ["turna_summarization_mlsum", "turna_summarization_tr_news"], label ="Model", value="turna_summarization_mlsum")
with gr.Accordion("Advanced Generation Parameters"):
max_new_tokens = gr.Slider(label = "Maximum length",
minimum = 0,
maximum = 512,
value = 128)
length_penalty = gr.Slider(label = "Length penalty",
minimum = -10,
maximum = 10,
value=2.0)
no_repeat_ngram_size =gr.Slider(label="No Repeat N-Gram Size", minimum=0,value=3,)
with gr.Column():
sum_input = gr.Textbox(label = "Summarization Input")
sum_submit = gr.Button()
sum_output = gr.Textbox(label = "Summarization Output")
sum_submit.click(summarize, inputs=[sum_input, sum_choice, max_new_tokens, length_penalty, no_repeat_ngram_size], outputs=sum_output)
sum_examples = gr.Examples(examples = long_text, inputs = [sum_input, sum_choice, max_new_tokens, length_penalty, no_repeat_ngram_size], outputs=sum_output, fn=summarize)
with gr.Tab("Title Generation"):
gr.Markdown("TURNA fine-tuned on news title generation. Enter news text to generate a title.")
with gr.Column():
with gr.Row():
with gr.Column():
title_choice = gr.Radio(choices = ["turna_title_generation_tr_news", "turna_title_generation_mlsum"], label ="Model", value="turna_title_generation_tr_news")
with gr.Accordion("Advanced Generation Parameters"):
max_new_tokens = gr.Slider(label = "Maximum length",
minimum = 0,
maximum = 64,
value = 64)
length_penalty = gr.Slider(label = "Length penalty",
minimum = -10,
maximum = 10,
value=2.0)
no_repeat_ngram_size =gr.Slider(label="No Repeat N-Gram Size", minimum=0,value=3,)
with gr.Column():
title_input = gr.Textbox(label = "News Title Generation Input")
title_submit = gr.Button()
title_output = gr.Textbox(label = "News Title Generation Output")
title_submit.click(generate_title, inputs=[title_input, title_choice, max_new_tokens, length_penalty, no_repeat_ngram_size], outputs=title_output)
title_examples = gr.Examples(examples = long_text, inputs = [title_input, title_choice, max_new_tokens, length_penalty, no_repeat_ngram_size], outputs=title_output, fn=generate_title)
with gr.Tab("Text Generation"):
gr.Markdown("Pre-trained TURNA. Enter text to start generating.")
with gr.Column():
with gr.Row():
with gr.Column():
with gr.Accordion("Advanced Generation Parameters"):
max_new_tokens = gr.Slider(label = "Maximum length",
minimum = 0,
maximum = 512,
value = 128)
length_penalty = gr.Slider(label = "Length penalty",
value=1.0)
top_k = gr.Slider(label = "Top-k", value=10)
top_p = gr.Slider(label = "Top-p", value=0.95)
temp = gr.Slider(label = "Temperature", value=1.0, minimum=0.1, maximum=100.0)
no_repeat_ngram_size =gr.Slider(label="No Repeat N-Gram Size", minimum=0,value=3,)
repetition_penalty = gr.Slider(label = "Repetition Penalty", minimum=0.0, value=3.1, step=0.1)
num_beams = gr.Slider(label = "Number of beams", minimum=1,
maximum=10, value=3)
do_sample = gr.Radio(choices = [True, False], value = True, label = "Sampling")
turna_model_version = gr.Radio(choices = ["TURNA", "TURNA-2850K", "TURNA-4350K"], value = "TURNA", label = "Choose TURNA model version")
with gr.Column():
text_gen_input = gr.Textbox(label="Text Generation Input")
text_gen_submit = gr.Button()
text_gen_output = gr.Textbox(label="Text Generation Output")
text_gen_submit.click(turna, inputs=[text_gen_input, max_new_tokens, length_penalty,
top_k, top_p, temp, num_beams,
do_sample, no_repeat_ngram_size, repetition_penalty, turna_model_version], outputs=text_gen_output)
text_gen_example = [["Bir varmış, bir yokmuş, evvel zaman içinde, kalbur saman içinde, uzak diyarların birinde bir turna"]]
text_gen_examples = gr.Examples(examples = text_gen_example, inputs = [text_gen_input, max_new_tokens, length_penalty,
top_k, top_p, temp, num_beams, do_sample, no_repeat_ngram_size, repetition_penalty, turna_model_version], outputs=text_gen_output, fn=turna)
gr.Markdown(CITATION)
demo.launch() |