import os import gradio as gr import torch import pydub import torchaudio from torchaudio.sox_effects import apply_effects_tensor import numpy as np from transformers import AutoFeatureExtractor, AutoModelForAudioXVector device = torch.device("cuda" if torch.cuda.is_available() else "cpu") def load_audio(file_name): audio = pydub.AudioSegment.from_file(file_name) arr = np.array(audio.get_array_of_samples(), dtype=np.float32) arr = arr / (1 << (8 * audio.sample_width - 1)) return arr.astype(np.float32), audio.frame_rate STYLE = """ """ OUTPUT_OK = ( STYLE + """
" "🎙️ Learn more about WavLM | " "📚 WavLM paper | " "📚 X-Vector paper" "
" ) examples = [ ["samples/cate_blanch.mp3", "samples/cate_blanch_2.mp3"], ["samples/cate_blanch.mp3", "samples/cate_blanch_3.mp3"], ["samples/cate_blanch_2.mp3", "samples/cate_blanch_3.mp3"], ["samples/heath_ledger.mp3", "samples/heath_ledger_3.mp3"], ["samples/russel_crowe.mp3", "samples/russel_crowe_2.mp3"], ["samples/cate_blanch.mp3", "samples/kirsten_dunst.wav"], ["samples/russel_crowe.mp3", "samples/kirsten_dunst.wav"], ["samples/russel_crowe_2.mp3", "samples/kirsten_dunst.wav"], ["samples/heath_ledger.mp3", "samples/denzel_washington.mp3"], ["samples/leonardo_dicaprio.mp3", "samples/russel_crowe.mp3"], ["samples/leonardo_dicaprio.mp3", "samples/russel_crowe_2.mp3"], ["samples/naomi_watts.mp3", "samples/denzel_washington.mp3"], ["samples/naomi_watts.mp3", "samples/leonardo_dicaprio.mp3"], ["samples/naomi_watts.mp3", "samples/cate_blanch_2.mp3"], ["samples/naomi_watts.mp3", "samples/kirsten_dunst.wav"], ] interface = gr.Interface( fn=similarity_fn, inputs=inputs, outputs=output, title="Voice Authentication with WavLM + X-Vectors", description=description, article=article, layout="horizontal", theme="huggingface", allow_flagging=False, live=False, examples=examples, ) interface.launch(enable_queue=True)