File size: 3,037 Bytes
4a4e551
 
 
 
 
7f1f820
4a4e551
ba22794
1035aed
 
 
 
 
 
ba22794
1035aed
ba22794
1035aed
ba22794
1035aed
ba22794
1035aed
 
ba22794
1035aed
 
 
98793d5
1035aed
 
 
 
ba22794
1035aed
 
a2b1396
1035aed
a2b1396
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24da1a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a2b1396
 
1035aed
 
a2b1396
24da1a9
2ecade9
a2b1396
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import requests
import streamlit as st
import time 
from transformers import pipeline
import os
from .utils import query

def write():
	st.markdown(
			"""
			<h1 style="text-align:left;">TURNA</h1>
			""",
			unsafe_allow_html=True,
		)

	st.write("#")

	col = st.columns(2)

	col[0].image("images/turna-logo.png", width=100)

	st.markdown(
			"""

			<h3 style="text-align:right;">TURNA is a Turkish encoder-decoder language model.</h3>
			
			<p style="text-align:right;"><p>
			<p style="text-align:right;">Use the generation parameters on the sidebar to adjust generation quality.</p>
			<p style="text-align:right;"><p>
			""",
			unsafe_allow_html=True,
		)

	#st.title('Turkish Language Generation')
	#st.write('...with Turna')

	# Sidebar

	# Taken from https://huggingface.co/spaces/flax-community/spanish-gpt2/blob/main/app.py
	st.sidebar.subheader("Configurable parameters")

	max_new_tokens = st.sidebar.number_input(
		"Maximum length",
		min_value=0,
		max_value=512,
		value=128,
		help="The maximum length of the sequence to be generated.",
	)
	length_penalty = st.sidebar.number_input(
		"Length penalty",
		value=1.0,
		help=" length_penalty > 0.0 promotes longer sequences, while length_penalty < 0.0 encourages shorter sequences. ",
	)
	do_sample = st.sidebar.selectbox(
		"Sampling?",
		(True, False),
		help="Whether or not to use sampling; use greedy decoding otherwise.",
	)
	num_beams = st.sidebar.number_input(
		"Number of beams",
		min_value=1,
		max_value=10,
		value=3,
		help="The number of beams to use for beam search.",
	)
	repetition_penalty = st.sidebar.number_input(
		"Repetition Penalty",
		min_value=0.0,
		value=3.0,
		step=0.1,
		help="The parameter for repetition penalty. 1.0 means no penalty",
	)
	no_repeat_ngram_size = st.sidebar.number_input(
		"No Repeat N-Gram Size",
		min_value=0,
		value=3,
		help="If set to int > 0, all ngrams of that size can only occur once.",
	)
	temp = st.sidebar.slider(
		"Temperature",
		value=1.0,
		min_value=0.1,
		max_value=100.0,
		help="The value used to module the next token probabilities.",
	)
	top_k = st.sidebar.number_input(
		"Top k",
		value=10,
		help="The number of highest probability vocabulary tokens to keep for top-k-filtering.",
	)
	top_p = st.sidebar.number_input(
		"Top p",
		value=0.95,
		help=" If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.",
	)

	input_text = st.text_area(label='Enter a text: ', height=100, 
		value="Türkiye'nin başkenti neresidir?")
	url = "https://api-inference.huggingface.co/models/boun-tabi-LMG/TURNA"
	params = {"length_penalty": length_penalty, "no_repeat_ngram_size": no_repeat_ngram_size, "max_new_tokens": max_new_tokens,
			"do_sample":do_sample, "num_beams":num_beams, "repetition_penalty":repetition_penalty,
			 "top_p":top_p, "top_k":top_k, "temperature":temp, "early_stopping": True}
	if st.button("Generate"):
		with st.spinner('Generating...'):
			output = query(input_text, url, params)
			st.success(output)