from vosk import Model, KaldiRecognizer import wave import json class VoskTranscriber: def __init__(self, model_path="Vosk/vosk-model-small-en-us-0.15"): self.model = Model(model_path) def transcribe_audio(self, audio_data): try: with wave.open(audio_data, "rb") as wf: recognizer = KaldiRecognizer(self.model, wf.getframerate()) recognizer.SetWords(True) text = "" while data := wf.readframes(4000): if recognizer.AcceptWaveform(data): text += json.loads(recognizer.Result())["text"] + " " text += json.loads(recognizer.FinalResult())["text"] return {"success": True, "text": text.strip()} except Exception as e: return {"success": False, "error": str(e)} if __name__ == "__main__": transcriber = VoskTranscriber() print(transcriber.transcribe_audio("output.wav"))