File size: 2,138 Bytes
dbd0cd3
f7c5310
59c9f9b
529d1c8
 
 
 
f7c5310
529d1c8
3a028ad
529d1c8
 
 
 
 
 
 
 
 
 
 
3a028ad
60a7c28
63507dd
3a028ad
811019f
3a028ad
 
811019f
3a028ad
 
811019f
f7c5310
 
 
a656e2f
6c82ba0
45ad4b4
907d7ea
f7c5310
 
63507dd
3a028ad
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("TwentyNine/byt5-small-ainu-latinizer-cos_w_restarts")
model1 = AutoModelForSeq2SeqLM.from_pretrained("TwentyNine/byt5-small-ainu-latinizer-cos_w_restarts")
model2 = AutoModelForSeq2SeqLM.from_pretrained("TwentyNine/byt5-small-ainu-latinizer-polynomial")
model3 = AutoModelForSeq2SeqLM.from_pretrained("TwentyNine/byt5-small-ainu-latinizer-linear")

def transcribe(input_str, model_index):
    output_str = ''
    model = None
    
    match model_index:
        case 1:
            model = model1
        case 2:
            model = model2
        case 3:
            model = model3
        case _:
            model = model1

    for input in input_str.split('\n'):
        input_enc  = tokenizer.encode(input.strip(), return_tensors='pt')
        output_enc = model.generate(input_enc, max_length=256)
        
        if len(output_str) > 0:
            output_str = output_str + '\n'
            
        output_str = output_str + tokenizer.decode(output_enc[0], skip_special_tokens=True)
        
    return output_str

gradio_app = gr.Interface(
    transcribe,
    inputs=[gr.Textbox(label='Input (kana)', value='トゥイマ ヒ ワ エエㇰ ワ ヒオーイオイ。ピㇼカノ ヌカㇻ ヤン!', placeholder='トゥイマ ヒ ワ エエㇰ ワ ヒオーイオイ。ピㇼカノ ヌカㇻ ヤン!', info='Ainu text written in Japanese katakana (input).', interactive=True, autofocus=True), gr.Radio(label="Training scheduler type", choices=[("Cosine with Restarts", 1), ("Polynomial", 2), ("Linear", 3)])],
    outputs=gr.Textbox(label='Output (alphabet)', info='Ainu text written in the Latin alphabet (output).'),
    title='KIT/TIP ByT5 Ainu Kana-Latin Converter',
    article='<p>Example sentence borrowed from <a href="https://www.hakusuisha.co.jp/book/b584600.html">New Express Ainu-go</a> by <a href="https://researchmap.jp/read0064265/?lang=english">Professor NAKAGAWA Hiroshi</a> of Chiba University.</p>'
)

if __name__ == '__main__':
    gradio_app.launch()