cdactvm commited on
Commit
bcc223d
1 Parent(s): f656d13

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -1
app.py CHANGED
@@ -59,7 +59,48 @@ def transcribe_hindi_old(audio):
59
  replaced_words = replace_words(processd_doubles)
60
  converted_text=text_to_int(replaced_words)
61
  return converted_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
 
 
 
63
  def sel_lng(lng, mic=None, file=None):
64
  if mic is not None:
65
  audio = mic
@@ -74,6 +115,9 @@ def sel_lng(lng, mic=None, file=None):
74
  return transcribe_hindi_new(audio)
75
  elif lng== "model_3":
76
  return transcribe_hindi_lm(audio)
 
 
 
77
 
78
  # demo=gr.Interface(
79
  # transcribe,
@@ -92,7 +136,7 @@ demo=gr.Interface(
92
 
93
  inputs=[
94
  gr.Dropdown([
95
- "model_1","model_2","model_3"],label="Select Model"),
96
  gr.Audio(sources=["microphone","upload"], type="filepath"),
97
  ],
98
  outputs=[
 
59
  replaced_words = replace_words(processd_doubles)
60
  converted_text=text_to_int(replaced_words)
61
  return converted_text
62
+
63
+ ## implementation of noise reduction techniques.
64
+ ###############################################
65
+ def noise_reduction_pipeline(filepath):
66
+ # Your existing noise reduction code
67
+ audio, sr = librosa.load(filepath, sr=None)
68
+ audio_hp = high_pass_filter(audio, sr, cutoff=100, order=5)
69
+ audio_wiener = wiener_filter(audio_hp)
70
+ audio_vad = apply_vad(audio_wiener, sr)
71
+ output_filepath = "processed_output.wav"
72
+ sf.write(output_filepath, audio_vad, sr)
73
+ return output_filepath
74
+
75
+ # Hugging Face ASR function uses the pre-loaded model
76
+ def transcribe_with_huggingface(filepath):
77
+ result = transcriber_hindi_lm(filepath)
78
+ text_value = result['text']
79
+ cleaned_text = text_value.replace("<s>", "")
80
+ converted_to_list = convert_to_list(cleaned_text, text_to_list())
81
+ processed_doubles = process_doubles(converted_to_list)
82
+ replaced_words = replace_words(processed_doubles)
83
+ converted_text = text_to_int(replaced_words)
84
+ print("Transcription: ", converted_text)
85
+ return converted_text
86
+
87
+ # Combined function to process and transcribe audio
88
+ def process_audio_and_transcribe(audio):
89
+ # Step 1: Preprocess (Noise Reduction)
90
+ try:
91
+ processed_filepath = noise_reduction_pipeline(audio)
92
+ except webrtcvad.Error as e:
93
+ return f"Error in processing audio for VAD: {str(e)}"
94
+
95
+ # Step 2: Transcription
96
+ try:
97
+ transcription = transcribe_with_huggingface(processed_filepath)
98
+ except Exception as e:
99
+ return f"Transcription failed: {str(e)}"
100
 
101
+ return transcription
102
+ #################################################
103
+
104
  def sel_lng(lng, mic=None, file=None):
105
  if mic is not None:
106
  audio = mic
 
115
  return transcribe_hindi_new(audio)
116
  elif lng== "model_3":
117
  return transcribe_hindi_lm(audio)
118
+ elif lng== "model_4":
119
+ return process_audio_and_transcribe(audio)
120
+
121
 
122
  # demo=gr.Interface(
123
  # transcribe,
 
136
 
137
  inputs=[
138
  gr.Dropdown([
139
+ "model_1","model_2","model_3","model_4"],label="Select Model"),
140
  gr.Audio(sources=["microphone","upload"], type="filepath"),
141
  ],
142
  outputs=[