Spaces:

openpecha
/

tibetan-stt-whisper-sm

Runtime error

Update app.py

c1329a8 about 1 year ago

1.64 kB

	from transformers import pipeline
	import gradio as gr
	import pyewts
	converter = pyewts.pyewts()

	# def remove_repeated_words(text):
	# # Tokenize the input text into words
	# words = text.split()

	# # Create a dictionary to count word occurrences
	# word_count = {}

	# # Create a list to store the final words
	# new_words = []

	# for word in words:
	# # Check if the word is in the dictionary
	# if word in word_count:
	# # If it has occurred once before, add it to the list with a count of 2
	# if word_count[word] == 1:
	# new_words.append(word)
	# word_count[word] = 2
	# else:
	# # If it has not occurred before, add it to the dictionary with a count of 1
	# word_count[word] = 1
	# new_words.append(word)

	# result = ' '.join(new_words)
	# return result

	pipe = pipeline(model="TenzinGayche/whisper-small-3",device='cuda') # change to "your-username/the-name-you-picked"
	def transcribe(microphone, upload):
	if(microphone):
	audio = microphone
	else:
	audio = upload
	text = pipe(audio)["text"]
	# text = remove_repeated_words(text)
	state = converter.toUnicode(text)
	return state,audio

	# Set the starting state to an empty string

	iface = gr.Interface(
	fn=transcribe,
	inputs=[gr.Audio(source="microphone", type="filepath"),gr.Audio(source="upload", type="filepath")],
	outputs=["text","audio"],
	title="Whisper Small Tibetan",
	description="Realtime demo for Tibetan speech recognition using a fine-tuned Whisper medium model.",
	)

	iface.launch()