omarelsayeed's picture
Duplicate from omarelsayeed/quran_recitation_wav2vec
33bc2a9
import speech_recognition as sr
from pydub import AudioSegment
import soundfile
import pyrebase
from pydub import AudioSegment
import soundfile
import os
import pyrebase
import gradio as gr
from transformers import AutoProcessor
from transformers import Wav2Vec2ForCTC
import gradio as gr
import torch
import torchaudio
processor = AutoProcessor.from_pretrained("omarelsayeed/wav2vec2_ar_anz2")
model = Wav2Vec2ForCTC.from_pretrained("omarelsayeed/wav2vec2_ar_anz2")
processor.save_pretrained("my_model")
model.save_pretrained("my_model")
def recite_wav(path_name):
# fix pcm_16 error
# data, samplerate = soundfile.read(path_name)
# soundfile.write('_.wav', data , samplerate , subtype='PCM_16')
audio , sr = torchaudio.load("xd.wav")
# resampler = torchaudio.transforms.Resample(sr , 16000)
# audio = resampler(audio)[None]
inputs = processor(audio, return_tensors="pt", padding="longest" , sampling_rate = 16_000)
with torch.no_grad():
logits = model(inputs.input_values.squeeze(0))[0]
transcription = processor.batch_decode(logits.numpy()).text
return transcription[0]
def list_all_files():
for _file in storage.child("wavfiles").list_files():
print(_file.name)
def download_wav_file(wav_name):
storage.child("wavfiles").child(wav_name).download("xd.wav")
def get_quran_text(wav_file_path):
download_wav_file(wav_file_path)
recitation = recite_wav("xd.wav")
os.remove("xd.wav")
return recitation
firebaseConfig = {
"apiKey": "AIzaSyDjgBD762KveE8GBO7jqTTkj_mKhUTDwGM",
"authDomain": "quran-c5cbe.firebaseapp.com",
"databaseURL":"quran-c5cbe.firebaseio.com/",
"projectId": "quran-c5cbe",
"storageBucket": "quran-c5cbe.appspot.com",
"serviceAccount":"quran-c5cbe-firebase-adminsdk-jvpbe-cebaf5aaa6.json"
}
firebase = pyrebase.initialize_app(firebaseConfig)
storage = firebase.storage()
iface = gr.Interface(fn=get_quran_text, inputs="text", outputs="text")
iface.launch()