# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="scb10x/llama-3-typhoon-v1.5-8b-audio-preview", trust_remote_code=True) from transformers import AutoModel # Initialize from the trained model model = AutoModel.from_pretrained( "scb10x/llama-3-typhoon-v1.5-8b-audio-preview", torch_dtype=torch.float16, trust_remote_code=True ) model.to("cuda") model.eval() # Run generation prompt_pattern="<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n {}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n" response = model.generate( wav_path="path_to_your_audio.wav", prompt="transcribe this audio", prompt_pattern=prompt_pattern, do_sample=False, max_length=1200, repetition_penalty=1.1, num_beams=1, # temperature=0.4, # top_p=0.9, # streamer=streamer # supports TextIteratorStreamer ) print(response)