Spaces:

cdactvm
/

Hindi_ASR

Sleeping

App Files Files Community

cdactvm commited on Oct 17, 2024

Commit

bcc223d

verified ·

1 Parent(s): f656d13

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -1

app.py CHANGED Viewed

@@ -59,7 +59,48 @@ def transcribe_hindi_old(audio):
     replaced_words = replace_words(processd_doubles)
     converted_text=text_to_int(replaced_words)
     return converted_text
 def sel_lng(lng, mic=None, file=None):
     if mic is not None:
         audio = mic
@@ -74,6 +115,9 @@ def sel_lng(lng, mic=None, file=None):
         return transcribe_hindi_new(audio)
     elif lng== "model_3":
         return transcribe_hindi_lm(audio)
 # demo=gr.Interface(
 #     transcribe,
@@ -92,7 +136,7 @@ demo=gr.Interface(
     inputs=[
         gr.Dropdown([
-            "model_1","model_2","model_3"],label="Select Model"),
         gr.Audio(sources=["microphone","upload"], type="filepath"),
     ],
     outputs=[

     replaced_words = replace_words(processd_doubles)
     converted_text=text_to_int(replaced_words)
     return converted_text
+## implementation of noise reduction techniques.
+###############################################
+def noise_reduction_pipeline(filepath):
+    # Your existing noise reduction code
+    audio, sr = librosa.load(filepath, sr=None)
+    audio_hp = high_pass_filter(audio, sr, cutoff=100, order=5)
+    audio_wiener = wiener_filter(audio_hp)
+    audio_vad = apply_vad(audio_wiener, sr)
+    output_filepath = "processed_output.wav"
+    sf.write(output_filepath, audio_vad, sr)
+    return output_filepath
+# Hugging Face ASR function uses the pre-loaded model
+def transcribe_with_huggingface(filepath):
+    result = transcriber_hindi_lm(filepath)
+    text_value = result['text']
+    cleaned_text = text_value.replace("<s>", "")
+    converted_to_list = convert_to_list(cleaned_text, text_to_list())
+    processed_doubles = process_doubles(converted_to_list)
+    replaced_words = replace_words(processed_doubles)
+    converted_text = text_to_int(replaced_words)
+    print("Transcription: ", converted_text)
+    return converted_text
+# Combined function to process and transcribe audio
+def process_audio_and_transcribe(audio):
+    # Step 1: Preprocess (Noise Reduction)
+    try:
+        processed_filepath = noise_reduction_pipeline(audio)
+    except webrtcvad.Error as e:
+        return f"Error in processing audio for VAD: {str(e)}"
+    # Step 2: Transcription
+    try:
+        transcription = transcribe_with_huggingface(processed_filepath)
+    except Exception as e:
+        return f"Transcription failed: {str(e)}"
+    return transcription
+#################################################
 def sel_lng(lng, mic=None, file=None):
     if mic is not None:
         audio = mic
         return transcribe_hindi_new(audio)
     elif lng== "model_3":
         return transcribe_hindi_lm(audio)
+    elif lng== "model_4":
+        return process_audio_and_transcribe(audio)
 # demo=gr.Interface(
 #     transcribe,
     inputs=[
         gr.Dropdown([
+            "model_1","model_2","model_3","model_4"],label="Select Model"),
         gr.Audio(sources=["microphone","upload"], type="filepath"),
     ],
     outputs=[