File size: 1,773 Bytes
3294ddc
 
 
 
81d612f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d6242a6
28e6409
b7f7313
 
 
3294ddc
 
 
 
 
 
81d612f
3294ddc
c1329a8
3294ddc
 
 
 
 
 
ac55441
422fe7b
f014f01
3294ddc
 
c2b95a6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from transformers import pipeline
import gradio as gr
import pyewts
converter = pyewts.pyewts()
# def remove_repeated_words(text):
#     # Tokenize the input text into words
#     words = text.split()

#     # Create a dictionary to count word occurrences
#     word_count = {}

#     # Create a list to store the final words
#     new_words = []

#     for word in words:
#         # Check if the word is in the dictionary
#         if word in word_count:
#             # If it has occurred once before, add it to the list with a count of 2
#             if word_count[word] == 1:
#                 new_words.append(word)
#                 word_count[word] = 2
#         else:
#             # If it has not occurred before, add it to the dictionary with a count of 1
#             word_count[word] = 1
#             new_words.append(word)

#     result = ' '.join(new_words)
#     return result

# pipe = pipeline(model="openpecha/whisper-small",device='cuda')
# pipe = pipeline(model="TenzinGayche/whisper-small-3",device='cuda')
pipe = pipeline(model="spsither/whipser-small-r2",device='cuda')

def transcribe(microphone, upload):
    if(microphone):
       audio = microphone  
    else:
       audio = upload
    text = pipe(audio)["text"]
    # text = remove_repeated_words(text)
    state = converter.toUnicode(text)
    return state,audio

# Set the starting state to an empty string

iface = gr.Interface(
    fn=transcribe,
    inputs=[gr.Audio(source="microphone", type="filepath"),gr.Audio(source="upload", type="filepath")],
    outputs=["text","audio"],
    title="Whisper Small Tibetan",
    description="Realtime demo for Tibetan speech recognition using a fine-tuned Whisper medium model. Feedbacks: https://forms.gle/psbZnXGeBWXptkvs9",
)

iface.launch()