Spaces:

cdactvm
/

Hindi_ASR

Sleeping

App Files Files Community

cdactvm commited on Oct 16, 2024

Commit

bfde6e2

verified ·

1 Parent(s): ed68cbd

Upload 12 files

Browse files

Files changed (10) hide show

NoiseReductionInASR.py +351 -0
Text2List.py +66 -0
applyVad.py +105 -0
convert2list.py +15 -3
highPassFilter.py +35 -0
main.py +51 -0
numberMapping.py +135 -0
processDoubles.py +54 -24
replaceWords.py +157 -144
wienerFilter.py +22 -0

NoiseReductionInASR.py ADDED Viewed

	@@ -0,0 +1,351 @@

+#!/usr/bin/env python
+# coding: utf-8
+# In[ ]:
+get_ipython().system('pip install webrtcvad')
+# In[ ]:
+# import librosa
+# import numpy as np
+# import scipy.signal
+# import webrtcvad
+# import soundfile as sf  # New library for saving audio
+# import matplotlib.pyplot as plt
+# # Function to apply a high-pass filter
+# def high_pass_filter(audio, sr, cutoff=100):
+#     # Design a high-pass Butterworth filter
+#     sos = scipy.signal.butter(10, cutoff, btype='highpass', fs=sr, output='sos')
+#     filtered_audio = scipy.signal.sosfilt(sos, audio)
+#     return filtered_audio
+# # Function to apply Wiener filter for noise reduction
+# def wiener_filter(audio):
+#     return scipy.signal.wiener(audio)
+# # Voice Activity Detection using WebRTC VAD
+# def apply_vad(audio, sr, frame_duration=30, aggressiveness=3):
+#     vad = webrtcvad.Vad(aggressiveness)  # aggressiveness: 0 (least aggressive) to 3 (most aggressive)
+#     # Convert audio to 16-bit PCM (required by webrtcvad)
+#     audio_int16 = np.int16(audio * 32767)  # assuming `audio` is in range [-1, 1]
+#     frame_size = int(sr * frame_duration / 1000)  # frame size in samples
+#     frames = [audio_int16[i:i+frame_size] for i in range(0, len(audio_int16), frame_size)]
+#     voiced_audio = np.concatenate([frame for frame in frames if vad.is_speech(frame.tobytes(), sample_rate=sr)])
+#     # Convert back to float32
+#     voiced_audio = np.float32(voiced_audio) / 32767
+#     return voiced_audio
+# # Load the audio file
+# def load_audio(filepath):
+#     # Load with librosa
+#     audio, sr = librosa.load(filepath, sr=None)
+#     return audio, sr
+# # Save the audio file using soundfile
+# def save_audio(filepath, audio, sr):
+#     # Use soundfile.write to save the audio
+#     sf.write(filepath, audio, sr)
+# # Full noise reduction pipeline
+# def noise_reduction_pipeline(filepath):
+#     # Step 1: Load audio
+#     audio, sr = load_audio(filepath)
+#     print(f"Loaded audio with sample rate: {sr}, duration: {len(audio) / sr:.2f} seconds")
+#     # Step 2: Apply high-pass filter
+#     audio_hp = high_pass_filter(audio, sr, cutoff=100)  # Remove low-frequency noise below 100 Hz
+#     # Step 3: Apply Wiener filter (for noise reduction)
+#     audio_wiener = wiener_filter(audio_hp)
+#     # Step 4: Apply Voice Activity Detection (VAD)
+#     audio_vad = apply_vad(audio_wiener, sr)
+#     # Step 5: Save processed audio
+#     output_filepath = "processed_output.wav"
+#     save_audio(output_filepath, audio_vad, sr)
+#     print(f"Processed audio saved to {output_filepath}")
+#     return output_filepath
+# # Optional: Plot the original and processed audio signals
+# def plot_signals(original, processed, sr):
+#     plt.figure(figsize=(14, 6))
+#     plt.subplot(2, 1, 1)
+#     librosa.display.waveshow(original, sr=sr)
+#     plt.title("Original Audio")
+#     plt.subplot(2, 1, 2)
+#     librosa.display.waveshow(processed, sr=sr)
+#     plt.title("Processed Audio")
+#     plt.tight_layout()
+#     plt.show()
+# # Example usage:
+# if __name__ == "__main__":
+#     # Replace 'input.wav' with your audio file path
+#     input_filepath = 'C:/Users/WCHL/Desktop/hindi_dataset/train/hp_sounds/crm/hi/1728268478957.wav'  # input file to process
+#     processed_filepath = noise_reduction_pipeline(input_filepath)
+#     # processed_filepath=
+#     # Load original and processed audio for visualization
+#     original_audio, sr = load_audio(input_filepath)
+#     processed_audio, _ = load_audio(processed_filepath)
+#     # Plot the original and processed signals
+#     plot_signals(original_audio, processed_audio, sr)
+# In[ ]:
+# !pip install speechbrain
+# ##########################
+#
+# In[1]:
+# Load the Hugging Face ASR pipeline
+from transformers import pipeline
+hindi_pipe = pipeline("automatic-speech-recognition", model="cdactvm/w2v-bert-2.0-hindi_new")
+whisper_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3")
+eng_pipe = pipeline(task="automatic-speech-recognition", model="C:/Users/WCHL/Desktop/huggingface_english/hf_eng")
+# In[12]:
+import os
+import re
+import librosa
+import nbimporter
+import torchaudio
+import numpy as np
+import scipy.signal
+import webrtcvad
+import soundfile as sf
+import warnings
+warnings.filterwarnings("ignore")
+from transformers import pipeline
+from text2int import text_to_int
+from isNumber import is_number
+from Text2List import text_to_list
+from convert2list import convert_to_list
+from processDoubles import process_doubles
+from replaceWords import replace_words
+from applyVad import apply_vad
+from wienerFilter import wiener_filter
+from highPassFilter import high_pass_filter
+def noise_reduction_pipeline(filepath):
+    audio, sr = librosa.load(filepath, sr=None)
+    print(sr)
+    audio_hp = high_pass_filter(audio, sr, cutoff=100, order=5)
+    audio_wiener = wiener_filter(audio_hp)
+    audio_vad = apply_vad(audio_wiener, sr)
+    output_filepath = "processed_output.wav"
+    sf.write(output_filepath, audio_vad, sr)
+    return output_filepath
+# Hugging Face ASR pipeline integration
+def transcribe_with_huggingface(filepath):
+    asr_pipeline = pipeline("automatic-speech-recognition", model="cdactvm/w2v-bert-2.0-hindi_new")
+    result = asr_pipeline(filepath)
+    text_value=result['text']
+    cleaned_text=text_value.replace("<s>", "")
+    converted_to_list=convert_to_list(cleaned_text,text_to_list())
+    processd_doubles=process_doubles(converted_to_list)
+    replaced_words = replace_words(processd_doubles)
+    converted_text=text_to_int(replaced_words)
+    print("Transcription: ", converted_text)
+    return converted_text
+if __name__ == "__main__":
+    # Step 1: Input file path
+    input_filepath = 'C:/Users/WCHL/Desktop/hp_sounds/101003/crm/hi/1728685442307.wav'
+    # input_file="enhanced.wav"
+    # Step 2: Preprocess (Noise Reduction)
+    processed_filepath = noise_reduction_pipeline(input_filepath)
+    # Step 3: ASR (Automatic Speech Recognition) with Hugging Face pipeline
+    transcription = transcribe_with_huggingface(processed_filepath)
+# In[ ]:
+# result = eng_pipe(filepath)
+result = hindi_pipe("C:/Users/WCHL/Desktop/hp_sounds/101003/crm/hi/1728685502007.wav")
+# result = hindi_pipe("./enhanced/1728268841215.wav")
+# result = whisper_pipe(filepath)
+text_value=result['text']
+cleaned_text=text_value.replace("<s>", "")
+converted_to_list=convert_to_list(cleaned_text,text_to_list())
+processd_doubles=process_doubles(converted_to_list)
+replaced_words = replace_words(processd_doubles)
+converted_text=text_to_int(replaced_words)
+# Output the transcription
+print("Transcription: ", converted_text)
+नमस्का जी 1 मन 2 पुलिस हेलप्लेन से बात कर रहे बताइए आपकी ाएमर्जेंसी है
+नमिश्का जी 1 मन 2 पुलिस हेलप्लेन से बात कर रह बताइए आपकी क्या एमर्जेंसी है
+नमस्का जी 1 मन 2 पुलिस हेलप्लेन से बात कर रह बताइए आपके क्या एमर्जेंसी हैवेल्कम 2 एमर्जनसी
+वेल्कम 2 एमर्जनसी
+वेलकम 2 एमर्जेंसी
+और 9 र मलीख वेल्कम 2 एमर्जंसीनमस्कार जी 1 ्स 2 बारा पुलस हल्प्लाइन में आपका स्वागत ह बताइए आपकी के एमर्जेंसी है
+नमस्कार जी 1 ्स दौबारा पुलिस हेल्प्लाइ में आपका स्वागत है बताइए आपकी के एमर्जेंसी है
+नमस्कार जी 1 2 बारा पुलिस हल्प्लाइन में आपका स्वागत है बताइए आपकी क् एमर्जेंसी हैमस्कार जी 1 ्स 2 12 पुलस हल्प्लाइन में आपका स्वागत ह बताइए आपकी के एमर्जेंसी है
+नमस्कार जी 1 ्स दौबारा पुलिस हेल्प्लाइ में आपका स्वागत है बताइए आपकी के एमर्जेंसी है
+नमस्कार जी 1 2 12 पुलिस हल्प्लाइन में आपका स्वागत है बताइए आपकी क् एमर्जेंसी हैनमस्कार जी इक्सुबारा में आपका स्वागत हैइनम
+नमस्कार जी इक्सुबारा में आपका स्वागत है कि इनमें
+नमस्कार जी 1 ्सुबारा में आपका स्वागत हैइन
+# In[ ]:
+import os
+import numpy as np
+import scipy.signal
+import webrtcvad
+import soundfile as sf
+import librosa
+import logging
+from transformers import pipeline
+from text2int import text_to_int
+from isNumber import is_number
+from Text2List import text_to_list
+from convert2list import convert_to_list
+from processDoubles import process_doubles
+from replaceWords import replace_words
+# Set up logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# Noise reduction functions
+def high_pass_filter(audio, sr, cutoff=100, order=5):
+    try:
+        sos = scipy.signal.butter(order, cutoff, btype='highpass', fs=sr, output='sos')
+        filtered_audio = scipy.signal.sosfilt(sos, audio)
+        return filtered_audio
+    except Exception as e:
+        logging.error(f"High-pass filter failed: {e}")
+        return audio
+def wiener_filter(audio):
+    try:
+        return scipy.signal.wiener(audio)
+    except Exception as e:
+        logging.error(f"Wiener filter failed: {e}")
+        return audio
+def apply_vad(audio, sr, frame_duration=30, aggressiveness=3):
+    try:
+        vad = webrtcvad.Vad(aggressiveness)
+        audio_int16 = np.int16(audio * 32767)
+        frame_size = int(sr * frame_duration / 1000)
+        frames = [audio_int16[i:i + frame_size] for i in range(0, len(audio_int16), frame_size)]
+        voiced_audio = np.concatenate([frame for frame in frames if vad.is_speech(frame.tobytes(), sample_rate=sr)])
+        voiced_audio = np.float32(voiced_audio) / 32767
+        return voiced_audio
+    except Exception as e:
+        logging.error(f"VAD processing failed: {e}")
+        return audio
+def load_audio(filepath):
+    try:
+        audio, sr = librosa.load(filepath, sr=None)
+        return audio, sr
+    except Exception as e:
+        logging.error(f"Failed to load audio: {e}")
+        return None, None
+def save_audio(filepath, audio, sr):
+    try:
+        sf.write(filepath, audio, sr)
+        logging.info(f"Audio saved at {filepath}")
+    except Exception as e:
+        logging.error(f"Failed to save audio: {e}")
+def noise_reduction_pipeline(filepath):
+    # Step 1: Load audio
+    audio, sr = load_audio(filepath)
+    if audio is None:
+        return None
+    # Step 2: Apply high-pass filter
+    audio_hp = high_pass_filter(audio, sr)
+    # Step 3: Apply Wiener filter
+    audio_wiener = wiener_filter(audio_hp)
+    # Step 4: Apply VAD
+    audio_vad = apply_vad(audio_wiener, sr)
+    # Step 5: Save cleaned audio
+    output_filepath = "processed_output.wav"
+    save_audio(output_filepath, audio_vad, sr)
+    return output_filepath
+# Hugging Face ASR pipeline integration
+def transcribe_with_huggingface(filepath, model_name="cdactvm/w2v-bert-2.0-hindi_new"):
+    try:
+        # Load ASR model
+        logging.info("Loading ASR model...")
+        asr_pipeline = pipeline("automatic-speech-recognition", model=model_name)
+        # Run the ASR pipeline on the processed audio
+        result = asr_pipeline(filepath)
+        text_value = result.get('text', '')
+        # Clean and process transcription
+        cleaned_text = text_value.replace("<s>", "")
+        converted_to_list = convert_to_list(cleaned_text, text_to_list())
+        processed_doubles = process_doubles(converted_to_list)
+        replaced_words = replace_words(processed_doubles)
+        converted_text = text_to_int(replaced_words)
+        logging.info("Transcription completed.")
+        return converted_text
+    except Exception as e:
+        logging.error(f"ASR transcription failed: {e}")
+        return ""
+if __name__ == "__main__":
+    # Input file path
+    input_filepath = 'C:/Users/WCHL/Desktop/hp_sounds/101005/crm/hi/1728268817091.wav'
+    # Step 1: Preprocess (Noise Reduction)
+    processed_filepath = noise_reduction_pipeline(input_filepath)
+    # Step 2: Check if noise reduction succeeded
+    if processed_filepath:
+        # Step 3: ASR (Automatic Speech Recognition) with Hugging Face pipeline
+        transcription = transcribe_with_huggingface(processed_filepath)
+        if transcription:
+            print("Transcription:", transcription)
+        else:
+            logging.warning("No transcription could be generated.")
+    else:
+        logging.warning("Noise reduction failed; skipping ASR transcription.")
+# In[ ]:

Text2List.py ADDED Viewed

	@@ -0,0 +1,66 @@

+#!/usr/bin/env python
+# coding: utf-8
+# In[4]:
+def text_to_list():
+    text_list = [
+    # Hindi script for English numbers (11-19)
+    'इलेवन', 'ट्वेल्व', 'थर्टीन', 'फोर्टीन', 'फिफ्टीन', 'सिक्स्टीन', 'सेवन्टीन', 'एटीन', 'नाइन्टीन',
+    # Hindi numbers (11-19)
+    'ग्यारह', 'बारह', 'तेरह','तेरा ', 'चौदह', 'पंद्रह', 'सोलह','सोल्ला' 'सत्रह', 'सतरा', 'अठारा', 'उनाइस','अठारह', 'उन्नीस',
+    # Hindi script for English multiples of ten (20, 30, ..., 90)
+    'ट्वेंटी', 'थर्टी', 'फोर्टी', 'फिफ्टी', 'सिक्स्टी', 'सेवेन्टी', 'सेवंटी', 'सत्तर','सेवनटी','सेवेनटी','सेवांटी','एटी', 'नाइंटी',
+    # Hindi multiples of ten (20, 30, ..., 90)
+    'बीस', 'तीस', 'चालीस', 'पचास', 'साठ', 'सत्तर', 'अस्सी', 'नब्बे',
+    # Hindi script for English combinations of 21-29
+    'ट्वेंटी वन', 'ट्वेंटी टू', 'ट्वेंटी थ्री', 'ट्वेंटी फोर', 'ट्वेंटी फाइव', 'ट्वेंटी सिक्स', 'ट्वेंटी सेवन', 'ट्वेंटी एट', 'ट्वेंटी नाइन',
+    # Hindi combinations of 21-29
+    'इक्कीस', 'बाईस', 'तेईस', 'चौबीस', 'पच्चीस', 'छब्बीस', 'सत्ताईस', 'अट्ठाईस', 'उनतीस',
+    # Hindi script for English combinations of 31-39
+    'थर्टी वन', 'थर्टी टू', 'थर्टी थ्री', 'थर्टी फोर', 'थर्टी फाइव', 'थर्टी सिक्स', 'थर्टी सेवन', 'थर्टी एट', 'थर्टी नाइन',
+    # Hindi combinations of 31-39
+    'इकतीस', 'बत्तीस', 'तेतीस', 'चौंतीस', 'पैंतीस', 'छत्तीस', 'सैंतीस', 'अड़तीस', 'उनतालीस',
+    # Hindi script for English combinations of 41-49
+    'फोर्टी वन', 'फोर्टी टू', 'फोर्टी थ्री', 'फोर्टी फोर', 'फोर्टी फाइव', 'फोर्टी सिक्स', 'फोर्टी सेवन', 'फोर्टी एट', 'फोर्टी नाइन',
+    # Hindi combinations of 41-49
+    'इकतालीस', 'बयालीस', 'तैंतालीस', 'चौंतालीस', 'पैंतालीस', 'छयालिस', 'सैंतालीस', 'अड़तालीस', 'उनचास',
+    # Hindi script for English combinations of 51-59
+    'फिफ्टी वन', 'फिफ्टी टू', 'फिफ्टी थ्री', 'फिफ्टी फोर', 'फिफ्टी फाइव', 'फिफ्टी सिक्स', 'फिफ्टी सेवन', 'फिफ्टी एट', 'फिफ्टी नाइन',
+    # Hindi combinations of 51-59
+    'इक्यावन', 'बावन', 'तिरेपन', 'चौवन', 'पचपन', 'छप्पन', 'सत्तावन','संतावन', 'अट्ठावन', 'उनसठ','अंठावन','उंसट',
+    # Hindi script for English combinations of 61-69
+    'सिक्स्टी वन', 'सिक्स्टी टू', 'सिक्स्टी थ्री', 'सिक्स्टी फोर', 'सिक्स्टी फाइव', 'सिक्स्टी सिक्स', 'सिक्स्टी सेवन', 'सिक्स्टी एट', 'सिक्स्टी नाइन',
+    # Hindi combinations of 61-69
+    'इकसठ', 'बासठ', 'तिरसठ', 'चौंसठ', 'पैंसठ', 'छियासठ', 'सड़सठ', 'अड़सठ', 'उनहत्तर',
+    # Hindi script for English combinations of 71-79
+    'सेवेन्टी वन', 'सेवेन्टी टू', 'सेवेन्टी थ्री', 'सेवेन्टी फोर', 'सेवेन्टी फाइव', 'सेवेन्टी सिक्स', 'सेवेन्टी सेवन', 'सेवेन्टी एट', 'सेवेन्टी नाइन',
+    # Hindi combinations of 71-79
+    'इकहत्तर', 'बहत्तर', 'तिहत्तर', 'तियत्तर','तीहत्तर','पचत्तर', 'चिहत्तर', 'अटत्तर', 'उनासी'  'चौहत्तर', 'पचहत्तर', 'छिहत्तर', 'सतहत्तर', 'अठह��्तर', 'उन्यासी','उनासी','अठत्तर',
+    # Hindi script for English combinations of 81-89
+    'एटी वन', 'एटी टू', 'एटी थ्री', 'एटी फोर', 'एटी फाइव', 'एटी सिक्स', 'एटी सेवन', 'एटी एट', 'एटी नाइन',
+    # Hindi combinations of 81-89
+    'इक्यासी', 'बयासी', 'तिरासी', 'चौरासी', 'पचासी', 'छियासी', 'सतासी', 'अठासी', 'नवासी',
+    # Hindi script for English combinations of 91-99
+    'नाइंटी वन', 'नाइंटी टू', 'नाइंटी थ्री', 'नाइंटी फोर', 'नाइंटी फाइव', 'नाइंटी सिक्स', 'नाइंटी सेवन', 'नाइंटी एट', 'नाइंटी नाइन',
+    # Hindi combinations of 91-99
+    'इक्यानवे', 'बानवे', 'तिरानवे', 'चौरानवे', 'पचानवे', 'छियानवे', 'सतानवे', 'अठानवे', 'निन्यानवे',
+    # Hindi script for English numbers (0-10)
+    'ज़ीरो', 'वन', 'टू', 'थ्री', 'फोर', 'फाइव', 'सिक्स', 'सेवन', 'एट', 'नाइन', 'टेन',
+    # Hindi numbers (0-10)
+    'जीरो', 'एक', 'दो', 'तीन', 'चार', 'पांच', 'छह', 'सात', 'आठ', 'नौ', 'दस',
+    # Hindi script for 100
+    'हंड्रेड',
+    # Hindi for 100
+    'सौ',
+]
+    return text_list
+# In[ ]:

applyVad.py ADDED Viewed

	@@ -0,0 +1,105 @@

+#!/usr/bin/env python
+# coding: utf-8
+# In[3]:
+# import webrtcvad
+# import numpy as np
+# import librosa
+# def apply_vad(audio, sr, frame_duration=30, aggressiveness=3):
+#     '''
+#      Voice Activity Detection (VAD): It is a technique used to determine whether a segment of audio contains speech.
+#      This is useful in noisy environments where you want to filter out non-speech parts of the audio.
+#      webrtcvad: This is a Python package based on the VAD from the WebRTC (Web Real-Time Communication) project.
+#      It helps detect speech in small chunks of audio.
+#      '''
+#     vad = webrtcvad.Vad()
+#     audio_int16 = np.int16(audio * 32767)
+#     frame_size = int(sr * frame_duration / 1000)
+#     frames = [audio_int16[i:i + frame_size] for i in range(0, len(audio_int16), frame_size)]
+#     voiced_audio = np.concatenate([frame for frame in frames if vad.is_speech(frame.tobytes(), sample_rate=sr)])
+#     voiced_audio = np.float32(voiced_audio) / 32767
+#     return voiced_audio
+# In[1]:
+# import webrtcvad
+# import librosa
+# import numpy as np
+# def apply_vad(audio, sr, frame_duration_ms=30):
+#     # Initialize WebRTC VAD
+#     vad = webrtcvad.Vad()
+#     vad.set_mode(1)  # Set aggressiveness mode (0-3)
+#     # Convert to 16kHz if not already
+#     if sr != 16000:
+#         audio = librosa.resample(audio, orig_sr=sr, target_sr=16000)
+#         sr = 16000
+#     # Convert to 16-bit PCM
+#     audio = (audio * 32768).astype(np.int16)
+#     frame_length = int(sr * (frame_duration_ms / 1000.0))  # Calculate fram
+#     e length in samples
+#     bytes_per_frame = frame_length * 2  # 16-bit audio has 2 bytes per sample
+#     # Apply VAD to the audio
+#     voiced_frames = []
+#     for i in range(0, len(audio), frame_length):
+#         frame = audio[i:i + frame_length].tobytes()
+#         if len(frame) == bytes_per_frame and vad.is_speech(frame, sr):
+#             voiced_frames.extend(audio[i:i + frame_length])
+#     # Return the VAD-filtered audio
+#     return np.array(voiced_frames)
+# In[4]:
+import webrtcvad
+import numpy as np
+import librosa
+def apply_vad(audio, sr, frame_duration=30, aggressiveness=3):
+    '''
+    Voice Activity Detection (VAD): Detects speech in audio.
+    '''
+    vad = webrtcvad.Vad(aggressiveness)
+    # Resample to 16000 Hz if not already (recommended for better compatibility)
+    if sr != 16000:
+        audio = librosa.resample(audio, orig_sr=sr, target_sr=16000)
+        sr = 16000
+    # Convert to 16-bit PCM format expected by webrtcvad
+    audio_int16 = np.int16(audio * 32767)
+    # Ensure frame size matches WebRTC's expected lengths
+    frame_size = int(sr * frame_duration / 1000)
+    if frame_size % 2 != 0:
+        frame_size -= 1  # Make sure it's even to avoid processing issues
+    frames = [audio_int16[i:i + frame_size] for i in range(0, len(audio_int16), frame_size)]
+    # Filter out non-speech frames
+    voiced_frames = []
+    for frame in frames:
+        if len(frame) == frame_size and vad.is_speech(frame.tobytes(), sample_rate=sr):
+            voiced_frames.append(frame)
+    # Concatenate the voiced frames
+    voiced_audio = np.concatenate(voiced_frames)
+    voiced_audio = np.float32(voiced_audio) / 32767
+    return voiced_audio
+# In[ ]:

convert2list.py CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # coding: utf-8
-# In[30]:
 # import nbimporter
@@ -41,14 +41,14 @@ def convert_to_list(text, text_list):
     result = ' '.join(matched_words)
     return result
-text = "जीरोएकदोतीनचारपांचछहसातआठनौदसजीरोएकदोतीनचारपांच"
 if __name__=="__main__":
     converted=convert_to_list(text, text_to_list())
     print(converted)
-# In[33]:
 # # import nbimporter
@@ -94,3 +94,15 @@ if __name__=="__main__":
 #     converted=convert_to_list(text, text_to_list())
 #     print(converted)

 #!/usr/bin/env python
 # coding: utf-8
+# In[ ]:
 # import nbimporter
     result = ' '.join(matched_words)
     return result
+# text = "जीरोएकदोतीनचारपांचछहसातआठनौदसजीरोएकदोतीनचारपांच"
 if __name__=="__main__":
     converted=convert_to_list(text, text_to_list())
     print(converted)
+# In[ ]:
 # # import nbimporter
 #     converted=convert_to_list(text, text_to_list())
 #     print(converted)
+# In[ ]:
+get_ipython().system('git clone https://huggingface.co/StephennFernandes/wav2vec2-XLS-R-300m-konkani')
+# In[ ]:

highPassFilter.py ADDED Viewed

	@@ -0,0 +1,35 @@

+#!/usr/bin/env python
+# coding: utf-8
+# In[2]:
+import scipy.signal
+def high_pass_filter(audio, sr, cutoff=100, order=5):
+    """
+    Applies a high-pass filter to an audio signal.
+    Parameters:
+    audio (numpy array): The input audio signal.
+    sr (int): The sample rate of the audio signal.
+    cutoff (float): The cutoff frequency in Hz. Default is 100 Hz.
+    order (int): The order of the filter. Default is 5.
+    Returns:
+    numpy array: The filtered audio signal.
+    """
+    # Design the high-pass filter using a Butterworth filter design
+    sos = scipy.signal.butter(order, cutoff, btype='highpass', fs=sr, output='sos')
+    # Apply the filter using sosfilt (second-order sections filter)
+    filtered_audio = scipy.signal.sosfilt(sos, audio)
+    return filtered_audio
+# In[ ]:

main.py ADDED Viewed

	@@ -0,0 +1,51 @@

+#!/usr/bin/env python
+# coding: utf-8
+# In[1]:
+get_ipython().system('pip install nbimporter')
+# In[2]:
+# Import necessary libraries and filter warnings
+import warnings
+warnings.filterwarnings("ignore")
+import nbimporter
+import os
+import re
+import torchaudio
+from transformers import pipeline
+from text2int import text_to_int
+from isNumber import is_number
+from Text2List import text_to_list
+from convert2list import convert_to_list
+from processDoubles import process_doubles
+from replaceWords import replace_words
+pipe = pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-bert-2.0-hindi_v1")
+# In[4]:
+# # Process the audio file
+transcript = pipe("C:/Users/WCHL/Desktop/hindi_dataset/train/hindi_numbers_test/hindi7.mp3")
+text_value = transcript['text']
+processd_doubles=process_doubles(text_value)
+# converted_to_list=convert_to_list(processd_doubles,text_to_list())
+replaced_words = replace_words(processd_doubles)
+converted_text=text_to_int(replaced_words)
+print(f"generated text    : {text_value}")
+print(f"processed doubles : {processd_doubles}")
+# print(f"converted to list : {converted_to_list}")
+print(f"replaced words    : {replaced_words}")
+print(f"final text        : {converted_text}")
+# In[ ]:

numberMapping.py ADDED Viewed

	@@ -0,0 +1,135 @@

+#!/usr/bin/env python
+# coding: utf-8
+# In[ ]:
+replacement_map = {
+    'zero': ['शून्य', 'जेरो', 'शुन्ना', 'जीरो'],
+    'one': ['वन', 'एंक', 'इक', 'एक'],
+    'two': ['टू', 'दौ', 'दो'],
+    'three': ['थ्री', 'तीना', 'तीन', 'त्री'],
+    'four': ['फोर', 'फॉर', 'च्यार', 'चार'],
+    'five': ['फाइव', 'पाँच', 'पांच'],
+    'six': ['सिक्स', 'चह', 'छौ', 'छै', 'छह'],
+    'seven': ['सेवन', 'सात'],
+    'eight': ['एट', 'अट', 'आठ'],
+    'nine': ['नाइन', 'नौ'],
+    'ten': ['टेन', 'दस'],
+    # Numbers from 11 to 19
+    'eleven': ['इलेवन', 'ग्यारह'],
+    'twelve': ['ट्वेल्व', 'बारह'],
+    'thirteen': ['थर्टीन', 'तेरह'],
+    'fourteen': ['फोर्टीन', 'चौदह'],
+    'fifteen': ['फिफ्टीन', 'पंद्रह'],
+    'sixteen': ['सिक्स्टीन', 'सोलह'],
+    'seventeen': ['सेवंटीन', 'सत्रह'],
+    'eighteen': ['एटीन', 'अठारह'],
+    'nineteen': ['नाइनटीन', 'उन्नीस'],
+    # Multiples of ten
+    'twenty': ['ट्वेंटी', 'बीस'],
+    'thirty': ['थर्टी', 'तीस'],
+    'forty': ['फोर्टी', 'चालीस'],
+    'fifty': ['फिफ्टी', 'पचास'],
+    'sixty': ['सिक्स्टी', 'साठ'],
+    'seventy': ['सेवंटी', 'सत्तर'],
+    'eighty': ['एटी', 'अस्सी'],
+    'ninety': ['नाइंटी', 'नब्बे'],
+    # Numbers from 21 to 29
+    'twenty one': ['ट्वेंटी वन', 'इक्कीस'],
+    'twenty two': ['ट्वेंटी टू', 'बाईस'],
+    'twenty three': ['ट्वेंटी थ्री', 'तेईस'],
+    'twenty four': ['ट्वेंटी फोर', 'चौबीस'],
+    'twenty five': ['ट्वेंटी फाइव', 'पच्चीस'],
+    'twenty six': ['ट्वेंटी सिक्स', 'छब्बीस'],
+    'twenty seven': ['ट्वेंटी सेवन', 'सत्ताईस'],
+    'twenty eight': ['ट्वेंटी एट', 'अट्ठाईस'],
+    'twenty nine': ['ट्वेंटी नाइन', 'उनतीस'],
+    # Numbers from 31 to 39
+    'thirty one': ['थर्टी वन', 'इकतीस'],
+    'thirty two': ['थर्टी टू', 'बत्तीस'],
+    'thirty three': ['थर्टी थ्री', 'तेतीस'],
+    'thirty four': ['थर्टी फोर', 'चौंतीस'],
+    'thirty five': ['थर्टी फाइव', 'पैंतीस'],
+    'thirty six': ['थर्टी सिक्स', 'छत्तीस'],
+    'thirty seven': ['थर्टी सेवन', 'सैंतीस'],
+    'thirty eight': ['थर्टी एट', 'अड़तीस'],
+    'thirty nine': ['थर्टी नाइन', 'उनतालीस'],
+    # Numbers from 41 to 49
+    'forty one': ['फोर्टी वन', 'इकतालीस'],
+    'forty two': ['फोर्टी टू', 'बयालीस'],
+    'forty three': ['फोर्टी थ्री', 'तैंतालीस'],
+    'forty four': ['फोर्टी फोर', 'चौंतालीस'],
+    'forty five': ['फोर्टी फाइव', 'पैंतालीस'],
+    'forty six': ['फोर्टी सिक्स', 'छयालिस'],
+    'forty seven': ['फोर्टी सेवन', 'सैंतालीस'],
+    'forty eight': ['फोर्टी एट', 'अड़तालीस'],
+    'forty nine': ['फोर्टी नाइन', 'उनचास'],
+    # Numbers from 51 to 59
+    'fifty one': ['फिफ्टी वन', 'इक्यावन'],
+    'fifty two': ['फिफ्टी टू', 'बावन'],
+    'fifty three': ['फिफ्टी थ्री', 'तिरेपन'],
+    'fifty four': ['फिफ्टी फोर', 'चौवन'],
+    'fifty five': ['फिफ्टी फाइव', 'पचपन'],
+    'fifty six': ['फिफ्टी सिक्स', 'छप्पन'],
+    'fifty seven': ['फिफ्टी सेवन', 'सत्तावन'],
+    'fifty eight': ['फिफ्टी एट', 'अट्ठावन'],
+    'fifty nine': ['फिफ्टी नाइन', 'उनसठ'],
+    # Numbers from 61 to 69
+    'sixty one': ['सिक्स्टी वन', 'इकसठ'],
+    'sixty two': ['सिक्स्टी टू', 'बासठ'],
+    'sixty three': ['सिक्स्टी थ्री', 'तिरसठ'],
+    'sixty four': ['सिक्स्टी फोर', 'चौंसठ'],
+    'sixty five': ['सिक्स��टी फाइव', 'पैंसठ'],
+    'sixty six': ['सिक्स्टी सिक्स', 'छियासठ'],
+    'sixty seven': ['सिक्स्टी सेवन', 'सड़सठ'],
+    'sixty eight': ['सिक्स्टी एट', 'अड़सठ'],
+    'sixty nine': ['सिक्स्टी नाइन', 'उनहत्तर'],
+    # Numbers from 71 to 79
+    'seventy one': ['सेवंटी वन', 'इकहत्तर'],
+    'seventy two': ['सेवंटी टू', 'बहत्तर'],
+    'seventy three': ['सेवंटी थ्री', 'तिहत्तर'],
+    'seventy four': ['सेवंटी फोर', 'चौहत्तर'],
+    'seventy five': ['सेवंटी फाइव', 'पचहत्तर'],
+    'seventy six': ['सेवंटी सिक्स', 'छिहत्तर'],
+    'seventy seven': ['सेवंटी सेवन', 'सतहत्तर'],
+    'seventy eight': ['सेवंटी एट', 'अठहत्तर'],
+    'seventy nine': ['सेवंटी नाइन', 'उन्यासी'],
+    # Numbers from 81 to 89
+    'eighty one': ['एटी वन', 'इक्यासी'],
+    'eighty two': ['एटी टू', 'बयासी'],
+    'eighty three': ['एटी थ्री', 'तिरासी'],
+    'eighty four': ['एटी फोर', 'चौरासी'],
+    'eighty five': ['एटी फाइव', 'पचासी'],
+    'eighty six': ['एटी सिक्स', 'छियासी'],
+    'eighty seven': ['एटी सेवन', 'सतासी'],
+    'eighty eight': ['एटी एट', 'अठासी'],
+    'eighty nine': ['एटी नाइन', 'नवासी'],
+    # Numbers from 91 to 99
+    'ninety one': ['नाइंटी वन', 'इक्यानवे'],
+    'ninety two': ['नाइंटी टू', 'बानवे'],
+    'ninety three': ['नाइंटी थ्री', 'तिरानवे'],
+    'ninety four': ['नाइंटी फोर', 'चौरानवे'],
+    'ninety five': ['नाइंटी फाइव', 'पचानवे'],
+    'ninety six': ['नाइंटी सिक्स', 'छियानवे'],
+    'ninety seven': ['नाइंटी सेवन', 'सतानवे'],
+    'ninety eight': ['नाइंटी एट', 'अठानवे'],
+    'ninety nine': ['नाइंटी नाइन', 'निन्यानवे'],
+    # Hundred
+    'hundred': ['हंड्रेड', 'सौ'],
+    # Special for double digits
+    'डबल': ['दबल', 'डबल', 'दुबाल'],
+}

processDoubles.py CHANGED Viewed

@@ -1,24 +1,54 @@
-import re
-def process_doubles(sentence):
-    # Use regex to split 'डबल' followed by numbers/words without space (e.g., "डबलवन" -> "डबल वन")
-    sentence = re.sub(r'(डबल)(\S+)', r'\1 \2', sentence)
-    tokens = sentence.split()
-    result = []
-    i = 0
-    while i < len(tokens):
-        if tokens[i] == "डबल":
-            if i + 1 < len(tokens):
-                result.append(tokens[i + 1])  # Append the next word/number
-                result.append(tokens[i + 1])  # Append the next word/number again to duplicate
-                i += 2  # Skip over the next word since it's already processed
-            else:
-                result.append(tokens[i])
-                i += 1
-        else:
-            result.append(tokens[i])
-            i += 1
-    return ' '.join(result)

+#!/usr/bin/env python
+# coding: utf-8
+# In[2]:
+# # Function to process "double" followed by a number
+# def process_doubles(sentence):
+#     tokens = sentence.split()
+#     result = []
+#     i = 0
+#     while i < len(tokens):
+#         if tokens[i] == "डबल":
+#             if i + 1 < len(tokens):
+#                 result.append(tokens[i + 1])
+#                 result.append(tokens[i + 1])
+#                 i += 2
+#             else:
+#                 result.append(tokens[i])
+#                 i += 1
+#         else:
+#             result.append(tokens[i])
+#             i += 1
+#     return ' '.join(result)
+# In[ ]:
+import re
+def process_doubles(sentence):
+    # Use regex to split 'डबल' followed by numbers/words without space (e.g., "डबलवन" -> "डबल वन")
+    sentence = re.sub(r'(डबल)(\S+)', r'\1 \2', sentence)
+    tokens = sentence.split()
+    result = []
+    i = 0
+    while i < len(tokens):
+        if tokens[i] == "डबल":
+            if i + 1 < len(tokens):
+                result.append(tokens[i + 1])  # Append the next word/number
+                result.append(tokens[i + 1])  # Append the next word/number again to duplicate
+                i += 2  # Skip over the next word since it's already processed
+            else:
+                result.append(tokens[i])
+                i += 1
+        else:
+            result.append(tokens[i])
+            i += 1
+    return ' '.join(result)

replaceWords.py CHANGED Viewed

@@ -1,144 +1,157 @@
-import re
-def replace_words(sentence):
-    # Define a dictionary mapping a single word to a list of words or phrases
-    replacement_map = {
-    # Multiples of ten
-    'twenty': ['ट्वेंटी', 'बीस'],
-    'thirty': ['थर्टी', 'तीस'],
-    'forty': ['फोर्टी', 'चालीस'],
-    'fifty': ['फिफ्टी', 'पचास'],
-    'sixty': ['सिक्स्टी', 'साठ'],
-    'seventy': ['सेवंटी', 'सत्तर','सेवनटी','सेवेनटी','सेवांटी'],
-    'eighty': ['एटी', 'अस्सी'],
-    'ninety': ['नाइंटी', 'नब्बे'],
-    # Numbers from 11 to 19
-    'eleven': ['इलेवन', 'ग्यारह','इगारा'],
-    'twelve': ['ट्वेल्व', 'बारह'],
-    'thirteen': ['थर्टीन', 'तेरह','तेरा'],
-    'fourteen': ['फोर्टीन', 'चौदह'],
-    'fifteen': ['फिफ्टीन', 'पंद्रह','पंद्रा'],
-    'sixteen': ['सिक्स्टीन', 'सोलह','सोल्ला'],
-    'seventeen': ['सेवंटीन', 'सत्रह''सतरा'],
-    'eighteen': ['एटीन', 'अठारह''अठारा'],
-    'nineteen': ['नाइनटीन', 'उन्नीस','उन्नईस','उनाइस'],
-    # Numbers from 21 to 29
-    'twenty one': ['ट्वेंटी वन', 'इक्कीस'],
-    'twenty two': ['ट्वेंटी टू', 'बाईस'],
-    'twenty three': ['ट्वेंटी थ्री', 'तेईस'],
-    'twenty four': ['ट्वेंटी फोर', 'चौबीस'],
-    'twenty five': ['ट्वेंटी फाइव', 'पच्चीस'],
-    'twenty six': ['ट्वेंटी सिक्स', 'छब्बीस'],
-    'twenty seven': ['ट्वेंटी सेवन', 'सत्ताईस','सताईस'],
-    'twenty eight': ['ट्वेंटी एट', 'अट्ठाईस','अठ्ठाइस','अठ्ठाईस'],
-    'twenty nine': ['ट्वेंटी नाइन', 'उनतीस'],
-    # Numbers from 31 to 39
-    'thirty one': ['थर्टी वन', 'इकतीस','इक्तीस'],
-    'thirty two': ['थर्टी टू', 'बत्तीस'],
-    'thirty three': ['थर्टी थ्री', 'तेतीस','तैतीस'],
-    'thirty four': ['थर्टी फोर', 'चौंतीस','चौतीस'],
-    'thirty five': ['थर्टी फाइव', 'पैंतीस','पैतीस'],
-    'thirty six': ['थर्टी सिक्स', 'छत्तीस'],
-    'thirty seven': ['थर्टी सेवन', 'सैंतीस'],
-    'thirty eight': ['थर्टी एट', 'अड़तीस'],
-    'thirty nine': ['थर्टी नाइन', 'उनतालीस'],
-    # Numbers from 41 to 49
-    'forty one': ['फोर्टी वन', 'इकतालीस'],
-    'forty two': ['फोर्टी टू', 'बयालीस'],
-    'forty three': ['फोर्टी थ्री', 'तैंतालीस'],
-    'forty four': ['फोर्टी फोर', 'चौंतालीस'],
-    'forty five': ['फोर्टी फाइव', 'पैंतालीस'],
-    'forty six': ['फोर्टी सिक्स', 'छयालिस'],
-    'forty seven': ['फोर्टी सेवन', 'सैंतालीस'],
-    'forty eight': ['फोर्टी एट', 'अड़तालीस'],
-    'forty nine': ['फोर्टी नाइन', 'उनचास'],
-    # Numbers from 51 to 59
-    'fifty one': ['फिफ्टी वन', 'इक्यावन'],
-    'fifty two': ['फिफ्टी टू', 'बावन'],
-    'fifty three': ['फिफ्टी थ्री', 'तिरेपन','तिरपन','तीरपन'],
-    'fifty four': ['फिफ्टी फोर', 'चौवन'],
-    'fifty five': ['फिफ्टी फाइव', 'पचपन'],
-    'fifty six': ['फिफ्टी सिक्स', 'छप्पन','छपपन'],
-    'fifty seven': ['फिफ्टी सेवन', 'सत्तावन','संताबन','संतावन'],
-    'fifty eight': ['फिफ्टी एट', 'अट्ठावन','अंठावन'],
-    'fifty nine': ['फिफ्टी नाइन', 'उनसठ','उंसट','उंसठ'],
-    # Numbers from 61 to 69
-    'sixty one': ['सिक्स्टी वन', 'इकसठ'],
-    'sixty two': ['सिक्स्टी टू', 'बासठ'],
-    'sixty three': ['सिक्स्टी थ्री', 'तिरसठ'],
-    'sixty four': ['सिक्स्टी फोर', 'चौंसठ'],
-    'sixty five': ['सिक्स्टी फाइव', 'पैंसठ'],
-    'sixty six': ['सिक्स्टी सिक्स', 'छियासठ'],
-    'sixty seven': ['सिक्���्टी सेवन', 'सड़सठ'],
-    'sixty eight': ['सिक्स्टी एट', 'अड़सठ'],
-    'sixty nine': ['सिक्स्टी नाइन', 'उनहत्तर'],
-    # Numbers from 71 to 79
-    'seventy one': ['सेवंटी वन', 'इकहत्तर','इखत्तर','इकत्तर'],
-    'seventy two': ['सेवंटी टू', 'बहत्तर'],
-    'seventy three': ['सेवंटी थ्री', 'तिहत्तर','तियत्र','तियत्तर','तीहत्तर','तिहत्थर'],
-    'seventy four': ['सेवंटी फोर', 'चौहत्तर',],
-    'seventy five': ['सेवंटी फाइव', 'पचहत्तर','पछत्तर','पिछत्तर','पचहत्तर','पचत्तर'],
-    'seventy six': ['सेवंटी सिक्स', 'छिहत्तर','छीहत्तर'],
-    'seventy seven': ['सेवंटी सेवन', 'सतहत्तर','सतात्तर','सतत्तर','सतहत्थर'],
-    'seventy eight': ['सेवंटी एट', 'अठहत्तर','अठत्तर'],
-    'seventy nine': ['सेवंटी नाइन', 'उन्यासी','उनासी'],
-    # Numbers from 81 to 89
-    'eighty one': ['एटी वन', 'इक्यासी'],
-    'eighty two': ['एटी टू', 'बयासी'],
-    'eighty three': ['एटी थ्री', 'तिरासी'],
-    'eighty four': ['एटी फोर', 'चौरासी'],
-    'eighty five': ['एटी फाइव', 'पचासी','पिचासी'],
-    'eighty six': ['एटी सिक्स', 'छियासी'],
-    'eighty seven': ['एटी सेवन', 'सतासी'],
-    'eighty eight': ['एटी एट', 'अठासी'],
-    'eighty nine': ['एटी नाइन', 'नवासी'],
-    # Numbers from 91 to 99
-    'ninety one': ['नाइंटी वन', 'इक्यानवे'],
-    'ninety two': ['नाइंटी टू', 'बानवे','बानबे'],
-    'ninety three': ['नाइंटी थ्री', 'तिरानवे'],
-    'ninety four': ['नाइंटी फोर', 'चौरानवे'],
-    'ninety five': ['नाइंटी फाइव', 'पचानवे'],
-    'ninety six': ['नाइंटी सिक्स', 'छियानवे'],
-    'ninety seven': ['नाइंटी सेवन', 'सतानवे'],
-    'ninety eight': ['नाइंटी एट', 'अठानवे'],
-    'ninety nine': ['नाइंटी नाइन', 'निन्यानवे'],
-    # Numbers from one to ten
-    'seven': ['सेवन', 'सात'],
-    'zero': ['शून्य', 'जेरो', 'शुन्ना', 'जीरो'],
-    'one': ['वन', 'एंक', 'इक', 'एक'],
-    'two': ['टू', 'दो'],
-    'three': ['थ्री', 'तीना', 'तीन', 'त्री'],
-    'four': ['फोर','फ़ोर', 'फॉर', 'च्यार', 'चार'],
-    'five': ['फाइव', 'पाँच', 'पांच'],
-    'six': ['सिक्स', 'चह', 'छौ', 'छै', 'छह', 'छे'],
-    'eight': ['एट', 'अट', 'आठ'],
-    'nine': ['नाइन', 'नौ'],
-    'ten': ['टेन', 'दस'],
-    # Hundred
-    'hundred': ['हंड्रेड', 'सौ','सो','साव'],
-    # Thousand
-    'thousand' : ['हजार','थौजनड','थाउजंड','हज़ार'],
-    # Lakhs
-    'lac' : ['लाख'],
-    }
-    words = sentence.split()  # Split the sentence by spaces
-    # Replace words using the mapping
-    for i, word in enumerate(words):
-        for replacement, patterns in replacement_map.items():
-            if word in patterns:
-                words[i] = replacement  # Replace the word if it's fully matched
-    # Join the processed words back into a sentence
-    return ' '.join(words)

+#!/usr/bin/env python
+# coding: utf-8
+# In[1]:
+import re
+def replace_words(sentence):
+    # Define a dictionary mapping a single word to a list of words or phrases
+    replacement_map = {
+    # Multiples of ten
+    'twenty': ['ट्वेंटी', 'बीस'],
+    'thirty': ['थर्टी', 'तीस'],
+    'forty': ['फोर्टी', 'चालीस'],
+    'fifty': ['फिफ्टी', 'पचास'],
+    'sixty': ['सिक्स्टी', 'साठ'],
+    'seventy': ['सेवंटी', 'सत्तर','सेवनटी','सेवेनटी','सेवांटी'],
+    'eighty': ['एटी', 'अस्सी'],
+    'ninety': ['नाइंटी', 'नब्बे'],
+    # Numbers from 11 to 19
+    'eleven': ['इलेवन', 'ग्यारह','इगारा'],
+    'twelve': ['ट्वेल्व', 'बारह','बारा','मंटों','सौबारह','शौबारह'],
+    'thirteen': ['थर्टी���', 'तेरह','तेरा'],
+    'fourteen': ['फोर्टीन', 'चौदह'],
+    'fifteen': ['फिफ्टीन', 'पंद्रह','पंद्रा'],
+    'sixteen': ['सिक्स्टीन', 'सोलह','सोल्ला'],
+    'seventeen': ['सेवंटीन', 'सत्रह''सतरा'],
+    'eighteen': ['एटीन', 'अठारह''अठारा'],
+    'nineteen': ['नाइनटीन', 'उन्नीस','उन्नईस','उनाइस'],
+    # Numbers from 21 to 29
+    'twenty one': ['ट्वेंटी वन', 'इक्कीस'],
+    'twenty two': ['ट्वेंटी टू', 'बाईस'],
+    'twenty three': ['ट्वेंटी थ्री', 'तेईस'],
+    'twenty four': ['ट्वेंटी फोर', 'चौबीस'],
+    'twenty five': ['ट्वेंटी फाइव', 'पच्चीस'],
+    'twenty six': ['ट्वेंटी सिक्स', 'छब्बीस'],
+    'twenty seven': ['ट्वेंटी सेवन', 'सत्ताईस','सताईस'],
+    'twenty eight': ['ट्वेंटी एट', 'अट्ठाईस','अठ्ठाइस','अठ्ठाईस'],
+    'twenty nine': ['ट्वेंटी नाइन', 'उनतीस'],
+    # Numbers from 31 to 39
+    'thirty one': ['थर्टी वन', 'इकतीस'],
+    'thirty two': ['थर्टी टू', 'बत्तीस'],
+    'thirty three': ['थर्टी थ्री', 'तेतीस'],
+    'thirty four': ['थर्टी फोर', 'चौंतीस'],
+    'thirty five': ['थर्टी फाइव', 'पैंतीस'],
+    'thirty six': ['थर्टी सिक्स', 'छत्तीस'],
+    'thirty seven': ['थर्टी सेवन', 'सैंतीस'],
+    'thirty eight': ['थर्टी एट', 'अड़तीस'],
+    'thirty nine': ['थर्टी नाइन', 'उनतालीस'],
+    # Numbers from 41 to 49
+    'forty one': ['फोर्टी वन', 'इकतालीस'],
+    'forty two': ['फोर्टी टू', 'बयालीस'],
+    'forty three': ['फोर्टी थ्री', 'तैंतालीस'],
+    'forty four': ['फोर्टी फोर', 'चौंतालीस'],
+    'forty five': ['फोर्टी फाइव', 'पैंतालीस'],
+    'forty six': ['फोर्टी सिक्स', 'छयालिस'],
+    'forty seven': ['फोर्टी सेवन', 'सैंतालीस'],
+    'forty eight': ['फोर्टी एट', 'अड़तालीस'],
+    'forty nine': ['फोर्टी नाइन', 'उनचास'],
+    # Numbers from 51 to 59
+    'fifty one': ['फिफ्टी वन', 'इक्यावन'],
+    'fifty two': ['फिफ्टी टू', 'बावन'],
+    'fifty three': ['फिफ्टी थ्री', 'तिरेपन','तिरपन','तीरपन'],
+    'fifty four': ['फिफ्टी फोर', 'चौवन'],
+    'fifty five': ['फिफ्टी फाइव', 'पचपन'],
+    'fifty six': ['फिफ्टी सिक्स', 'छप्पन','छपपन'],
+    'fifty seven': ['फिफ्टी सेवन', 'सत्तावन','संताबन','संतावन'],
+    'fifty eight': ['फिफ्टी एट', 'अट्ठावन','अंठावन'],
+    'fifty nine': ['फिफ्टी नाइन', 'उनसठ','उंसट','उंसठ'],
+    # Numbers from 61 to 69
+    'sixty one': ['सिक्स्टी वन', 'इकसठ'],
+    'sixty two': ['सिक्स्टी टू', 'बासठ'],
+    'sixty three': ['सिक्स्टी थ्री', 'तिरसठ'],
+    'sixty four': ['सिक्स्टी फोर', 'चौंसठ'],
+    'sixty five': ['सिक्स्टी फाइव', 'पैंसठ'],
+    'sixty six': ['सिक्स्टी सिक्स', 'छियासठ'],
+    'sixty seven': ['सिक्स्टी सेवन', 'सड़सठ'],
+    'sixty eight': ['सिक्स्टी एट', 'अड़सठ'],
+    'sixty nine': ['सिक्स्टी नाइन', 'उनहत्तर'],
+    # Numbers from 71 to 79
+    'seventy one': ['सेवंटी वन', 'इकहत्तर','इखत्तर','इकत्तर'],
+    'seventy two': ['सेवंटी टू', 'बहत्तर'],
+    'seventy three': ['सेवंटी थ्री', 'तिहत्तर','तियत्र','तियत्तर','तीहत्तर','तिहत्थर'],
+    'seventy four': ['सेवंटी फोर', 'चौहत्तर',],
+    'seventy five': ['सेवंटी फाइव', 'पचहत्तर','पछत्तर','पिछत्तर','पचहत्तर','पचत्तर'],
+    'seventy six': ['सेवंटी स���क्स', 'छिहत्तर','छीहत्तर'],
+    'seventy seven': ['सेवंटी सेवन', 'सतहत्तर','सतात्तर','सतत्तर','सतहत्थर'],
+    'seventy eight': ['सेवंटी एट', 'अठहत्तर','अठत्तर'],
+    'seventy nine': ['सेवंटी नाइन', 'उन्यासी','उनासी'],
+    # Numbers from 81 to 89
+    'eighty one': ['एटी वन', 'इक्यासी'],
+    'eighty two': ['एटी टू', 'बयासी'],
+    'eighty three': ['एटी थ्री', 'तिरासी'],
+    'eighty four': ['एटी फोर', 'चौरासी'],
+    'eighty five': ['एटी फाइव', 'पचासी','पिचासी'],
+    'eighty six': ['एटी सिक्स', 'छियासी'],
+    'eighty seven': ['एटी सेवन', 'सतासी'],
+    'eighty eight': ['एटी एट', 'अठासी'],
+    'eighty nine': ['एटी नाइन', 'नवासी'],
+    # Numbers from 91 to 99
+    'ninety one': ['नाइंटी वन', 'इक्यानवे'],
+    'ninety two': ['नाइंटी टू', 'बानवे','बानबे'],
+    'ninety three': ['नाइंटी थ्री', 'तिरानवे'],
+    'ninety four': ['नाइंटी फोर', 'चौरानवे'],
+    'ninety five': ['नाइंटी फाइव', 'पचानवे'],
+    'ninety six': ['नाइंटी सिक्स', 'छियानवे'],
+    'ninety seven': ['नाइंटी सेवन', 'सतानवे'],
+    'ninety eight': ['नाइंटी एट', 'अठानवे'],
+    'ninety nine': ['नाइंटी नाइन', 'निन्यानवे'],
+    # Numbers from one to ten
+    'seven': ['सेवन', 'सात'],
+    'zero': ['शून्य', 'जेरो', 'शुन्ना', 'जीरो'],
+    'one': ['वन', 'एंक', 'इक', 'एक'],
+    'two': ['टू', 'दो'],
+    'three': ['थ्री', 'तीना', 'तीन', 'त्री'],
+    'four': ['फोर','फ़ोर', 'फॉर', 'च्यार', 'चार'],
+    'five': ['फाइव', 'पाँच', 'पांच'],
+    'six': ['सिक्स', 'चह', 'छौ', 'छै', 'छह', 'छे'],
+    'eight': ['एट', 'अट', 'आठ'],
+    'nine': ['नाइन', 'नौ'],
+    'ten': ['टेन', 'दस'],
+    # Hundred
+    'hundred': ['हंड्रेड', 'सौ','सो','साव'],
+    # Thousand
+    'thousand' : ['हजार','थौजनड','थाउजंड','हज़ार'],
+    # Lakhs
+    'lac' : ['लाख'],
+    'one hundred twelve' : ['इक्सुबारा','वनमंटों','वनवंतु'],
+    }
+    words = sentence.split()  # Split the sentence by spaces
+    # Replace words using the mapping
+    for i, word in enumerate(words):
+        for replacement, patterns in replacement_map.items():
+            if word in patterns:
+                words[i] = replacement  # Replace the word if it's fully matched
+    # Join the processed words back into a sentence
+    return ' '.join(words)
+# In[ ]:

wienerFilter.py ADDED Viewed

	@@ -0,0 +1,22 @@

+#!/usr/bin/env python
+# coding: utf-8
+# In[2]:
+import scipy.signal
+def wiener_filter(audio):
+    '''
+    The Wiener filter is designed to minimize the impact of noise by applying an adaptive filtering process.
+    It tries to estimate the original, clean signal by taking into account both the noisy signal and the statistical properties of the noise.
+    The Wiener filter is particularly useful when dealing with stationary noise (constant background noise, like white noise).
+    '''
+    return scipy.signal.wiener(audio)
+# In[ ]: