Spaces:

amanmibra
/

void-emb-demo

Runtime error

App Files Files Community

void-emb-demo / app.py

amanmibra

add app.py

248de36 over 1 year ago

raw

history blame

2.59 kB

	"""
	This code is for testing and demonstration.
	Source code for credit: https://huggingface.co/spaces/nithinraok/titanet-speaker-verification/blob/main/app.py
	"""

	import gradio as gr
	import torch
	from nemo.collections.asr.models import EncDecSpeakerLabelModel

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model_name = "nvidia/speakerverification_en_titanet_large"
	model = EncDecSpeakerLabelModel.from_pretrained(model_name).to(device)

	def compare(path1, path2):
	if not (path1 and path2):
	raise gr.Error("Need recordings from both speakers!")

	embs1 = model.get_embedding(path1).squeeze()
	embs2 = model.get_embedding(path2).squeeze()

	#Length Normalize
	X = embs1 / torch.linalg.norm(embs1)
	Y = embs2 / torch.linalg.norm(embs2)

	# Score
	similarity_score = torch.dot(X, Y) / ((torch.dot(X, X) * torch.dot(Y, Y)) ** 0.5)
	similarity_score = (similarity_score + 1) / 2

	# # Decision
	# if similarity_score >= THRESHOLD:
	# return OUTPUT_OK.format(similarity_score * 100)
	# else:
	# return OUTPUT_FAIL.format(similarity_score * 100)
	return "{:.4f}".format(similarity_score.item())


	inputs = [
	gr.inputs.Audio(source="microphone", type="filepath", optional=True, label="Speaker #1"),
	gr.inputs.Audio(source="microphone", type="filepath", optional=True, label="Speaker #2"),
	]

	upload_inputs = [
	gr.inputs.Audio(source="upload", type="filepath", optional=True, label="Speaker #1"),
	gr.inputs.Audio(source="upload", type="filepath", optional=True, label="Speaker #2"),
	]

	description = (
	"The purpose of this demo is to show how VoID could work with speech embeddings rather than mel spectograms.\n"
	"This demonstration will analyze two recordings of speech and ascertain whether they have been spoken by the same individual.\n"
	"You can attempt this exercise using your own voice."
	)

	title="VoID with TitaNet Embeddings"

	microphone_interface = gr.Interface(
	fn=compare,
	inputs=inputs,
	outputs="text",
	title=title,
	description=description,
	layout="horizontal",
	theme="huggingface",
	allow_flagging=False,
	live=False,
	# examples=examples,
	)

	upload_interface = gr.Interface(
	fn=compare,
	inputs=upload_inputs,
	outputs="text",
	title=title,
	description=description,
	layout="horizontal",
	theme="huggingface",
	allow_flagging=False,
	live=False,
	# examples=examples,
	)

	demo = gr.TabbedInterface([microphone_interface, upload_interface], ["Microphone", "Upload File"])

	demo.launch()