aoxo
/

wav2vec2-large-mal

Automatic Speech Recognition

Inference Endpoints

Model card Files Files and versions Community

wav2vec2-large-mal / inference.py

aoxo's picture

Create inference.py

5305ddc verified about 1 month ago

history blame contribute delete

965 Bytes

	import torch
	import librosa
	from transformers import AutoModelForCTC, Wav2Vec2Processor

	# Load the model and processor
	model = AutoModelForCTC.from_pretrained("aoxo/wav2vec2-large-mal")
	processor = Wav2Vec2Processor.from_pretrained("aoxo/wav2vec2-large-mal")

	# Function to transcribe audio
	def transcribe_audio(audio_path):
	# Load the audio file
	# Resample to 16kHz if needed
	waveform, _ = librosa.load(audio_path, sr=16000)

	# Process the audio
	inputs = processor(waveform, sampling_rate=16000, return_tensors="pt")

	# Perform inference
	with torch.no_grad():
	logits = model(inputs.input_values).logits

	# Decode the prediction
	predicted_ids = torch.argmax(logits, dim=-1)
	transcription = processor.batch_decode(predicted_ids)[0]

	return transcription

	# Example usage
	audio_path = "path/to/your/audio/file.wav"
	transcription = transcribe_audio(audio_path)
	print("Transcription:", transcription)