vasudevgupta commited on
Commit
d27ed5b
1 Parent(s): b0158c4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -0
app.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import tensorflow as tf
3
+ from wav2vec2 import Wav2Vec2Processor, Wav2Vec2ForCTC
4
+
5
+
6
+ if __name__ == '__main__':
7
+ processor = Wav2Vec2Processor(is_tokenizer=False)
8
+ tokenizer = Wav2Vec2Processor(is_tokenizer=True)
9
+ model = Wav2Vec2ForCTC.from_pretrained("vasudevgupta/gsoc-wav2vec2-960h")
10
+
11
+ def _forward(speech: tf.Tensor):
12
+ speech = processor(speech)[None]
13
+ tf_out = model(speech, training=False)
14
+ return tf.squeeze(tf.argmax(tf_out, axis=-1))
15
+
16
+ def recognize_text(inputs):
17
+ _, speech = inputs
18
+ speech = tf.constant(speech, dtype=tf.float32)
19
+ speech = tf.transpose(speech)
20
+ tf_out = _forward(speech)
21
+ return tokenizer.decode(tf_out.numpy().tolist())
22
+
23
+ gr.Interface(fn=recognize_text, inputs="audio", outputs="text").launch()