thak123 commited on
Commit
caaee3e
1 Parent(s): 8a1e498

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -72
app.py CHANGED
@@ -1,55 +1,55 @@
1
- # from transformers import WhisperTokenizer
2
- # import os
3
- # tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small") #, language="marathi", task="transcribe"
4
 
5
- # from transformers import pipeline
6
- # import gradio as gr
7
- # import torch
8
-
9
- # pipe = pipeline(model="thak123/gom-stt-v3", #"thak123/whisper-small-LDC-V1", #"thak123/whisper-small-gom",
10
- # task="automatic-speech-recognition", tokenizer= tokenizer) # change to "your-username/the-name-you-picked"
11
-
12
- # # pipe.model.config.forced_decoder_ids = (
13
- # # pipe.tokenizer.get_decoder_prompt_ids(
14
- # # language="marathi", task="transcribe"
15
- # # )
16
- # # )
17
-
18
- # def transcribe_speech(filepath):
19
- # output = pipe(
20
- # filepath,
21
- # max_new_tokens=256,
22
- # generate_kwargs={
23
- # "task": "transcribe",
24
- # "language": "konkani",
25
- # }, # update with the language you've fine-tuned on
26
- # chunk_length_s=30,
27
- # batch_size=8,
28
- # padding=True
29
- # )
30
- # return output["text"]
31
 
 
 
32
 
33
- # demo = gr.Blocks()
 
 
 
 
34
 
35
- # mic_transcribe = gr.Interface(
36
- # fn=transcribe_speech,
37
- # inputs=gr.Audio(sources="microphone", type="filepath"),
38
- # outputs=gr.components.Textbox(),
39
- # )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- # file_transcribe = gr.Interface(
42
- # fn=transcribe_speech,
43
- # inputs=gr.Audio(sources="upload", type="filepath"),
44
- # outputs=gr.components.Textbox(),
45
- # )
46
- # with demo:
47
- # gr.TabbedInterface(
48
- # [mic_transcribe, file_transcribe],
49
- # ["Transcribe Microphone", "Transcribe Audio File"],
50
- # )
51
 
52
- # demo.launch(debug=True)
53
 
54
  # # def transcribe(audio):
55
  # # # text = pipe(audio)["text"]
@@ -75,31 +75,31 @@
75
  # # iface.launch()
76
 
77
 
78
- from transformers import WhisperTokenizer, pipeline
79
- import gradio as gr
80
- import os
81
 
82
- tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small", language="marathi", task="transcribe")
83
-
84
- pipe = pipeline(model="thak123/gom-stt-v3", task="automatic-speech-recognition", tokenizer=tokenizer)
85
-
86
- def transcribe(audio):
87
- result = pipe(audio)
88
- text = result[0]['text']
89
- print("op", text)
90
- return text
91
-
92
- iface = gr.Interface(
93
- fn=transcribe,
94
- inputs=[gr.Audio(sources=["microphone", "upload"])],
95
- outputs="text",
96
- examples=[
97
- [os.path.join(os.path.dirname("."), "audio/chalyaami.mp3")],
98
- [os.path.join(os.path.dirname("."), "audio/ekdonteen.flac")],
99
- [os.path.join(os.path.dirname("."), "audio/heyatachadjaale.mp3")],
100
- ],
101
- title="Whisper Konkani",
102
- description="Realtime demo for Konkani speech recognition using a fine-tuned Whisper small model.",
103
- )
104
 
105
- iface.launch()
 
1
+ from transformers import WhisperTokenizer
2
+ import os
3
+ tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small") #, language="marathi", task="transcribe"
4
 
5
+ from transformers import pipeline
6
+ import gradio as gr
7
+ import torch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ pipe = pipeline(model="thak123/gom-stt-v3", #"thak123/whisper-small-LDC-V1", #"thak123/whisper-small-gom",
10
+ task="automatic-speech-recognition", tokenizer= tokenizer) # change to "your-username/the-name-you-picked"
11
 
12
+ # pipe.model.config.forced_decoder_ids = (
13
+ # pipe.tokenizer.get_decoder_prompt_ids(
14
+ # language="marathi", task="transcribe"
15
+ # )
16
+ # )
17
 
18
+ def transcribe_speech(filepath):
19
+ output = pipe(
20
+ filepath,
21
+ max_new_tokens=256,
22
+ generate_kwargs={
23
+ "task": "transcribe",
24
+ "language": "konkani",
25
+ }, # update with the language you've fine-tuned on
26
+ chunk_length_s=30,
27
+ batch_size=8,
28
+ # padding=True
29
+ )
30
+ return output["text"]
31
+
32
+
33
+ demo = gr.Blocks()
34
+
35
+ mic_transcribe = gr.Interface(
36
+ fn=transcribe_speech,
37
+ inputs=gr.Audio(sources="microphone", type="filepath"),
38
+ outputs=gr.components.Textbox(),
39
+ )
40
 
41
+ file_transcribe = gr.Interface(
42
+ fn=transcribe_speech,
43
+ inputs=gr.Audio(sources="upload", type="filepath"),
44
+ outputs=gr.components.Textbox(),
45
+ )
46
+ with demo:
47
+ gr.TabbedInterface(
48
+ [mic_transcribe, file_transcribe],
49
+ ["Transcribe Microphone", "Transcribe Audio File"],
50
+ )
51
 
52
+ demo.launch(debug=True)
53
 
54
  # # def transcribe(audio):
55
  # # # text = pipe(audio)["text"]
 
75
  # # iface.launch()
76
 
77
 
78
+ # from transformers import WhisperTokenizer, pipeline
79
+ # import gradio as gr
80
+ # import os
81
 
82
+ # tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small", language="marathi", task="transcribe")
83
+
84
+ # pipe = pipeline(model="thak123/gom-stt-v3", task="automatic-speech-recognition", tokenizer=tokenizer)
85
+
86
+ # def transcribe(audio):
87
+ # result = pipe(audio)
88
+ # text = result[0]['text']
89
+ # print("op", text)
90
+ # return text
91
+
92
+ # iface = gr.Interface(
93
+ # fn=transcribe,
94
+ # inputs=[gr.Audio(sources=["microphone", "upload"])],
95
+ # outputs="text",
96
+ # examples=[
97
+ # [os.path.join(os.path.dirname("."), "audio/chalyaami.mp3")],
98
+ # [os.path.join(os.path.dirname("."), "audio/ekdonteen.flac")],
99
+ # [os.path.join(os.path.dirname("."), "audio/heyatachadjaale.mp3")],
100
+ # ],
101
+ # title="Whisper Konkani",
102
+ # description="Realtime demo for Konkani speech recognition using a fine-tuned Whisper small model.",
103
+ # )
104
 
105
+ # iface.launch()