Spaces:

mobinln
/

najva

Runtime error

mobinln commited on Jun 29, 2024

Commit

d1dfef4

1 Parent(s): 7c11e96

fix: audio interface

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,29 +7,37 @@ model = WhisperForConditionalGeneration.from_pretrained("Neurai/NeuraSpeech_Whis
 forced_decoder_ids = processor.get_decoder_prompt_ids(language="fa", task="transcribe")
-def transcribe(audio, *args):
-    print(audio, args)
     if audio is None:
         return "No audio input provided. Please record or upload an audio file."
-    # audio is now a file path, not a tuple
-    try:
-        array, sample_rate = librosa.load(audio, sr=16000)
-    except Exception as e:
-        return f"Error loading audio file: {str(e)}"
-    # The rest of the function remains the same
     array = librosa.to_mono(array)
-    input_features = processor(array, sampling_rate=sample_rate, return_tensors="pt").input_features
     # generate token ids
-    predicted_ids = model.generate(input_features, forced_decoder_ids=forced_decoder_ids)
     # decode token ids to text
     transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
-    print(transcription)
-    return transcription[0]  # Return the first (and only) transcription
 demo = gr.Interface(
     fn=transcribe,
     inputs=[gr.Audio(sources=["microphone"], type="filepath")],

 forced_decoder_ids = processor.get_decoder_prompt_ids(language="fa", task="transcribe")
+def transcribe(audio):
     if audio is None:
         return "No audio input provided. Please record or upload an audio file."
+    sample_rate, array = audio
+    sr = 16000
     array = librosa.to_mono(array)
+    array = librosa.resample(array, orig_sr=sample_rate, target_sr=16000)
+    input_features = processor(array, sampling_rate=sr, return_tensors="pt").input_features
     # generate token ids
+    predicted_ids = model.generate(input_features)
     # decode token ids to text
     transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
+    return transcription
+# input_audio = gr.Audio(
+#     sources=["microphone"],
+#     waveform_options=gr.WaveformOptions(
+#         waveform_color="#01C6FF",
+#         waveform_progress_color="#0066B4",
+#         skip_length=2,
+#         show_controls=True,
+#     ),
+# )
+# demo = gr.Interface(
+#     fn=reverse_audio,
+#     inputs=input_audio,
+#     outputs="text"
+# )
 demo = gr.Interface(
     fn=transcribe,
     inputs=[gr.Audio(sources=["microphone"], type="filepath")],