import gradio as gr import torch from transformers import pipeline, AutoTokenizer from nemo.collections.asr.models import EncDecMultiTaskModel # load model canary_model = EncDecMultiTaskModel.from_pretrained('nvidia/canary-1b') # update dcode params decode_cfg = canary_model.cfg.decoding decode_cfg.beam.beam_size = 1 canary_model.change_decoding_strategy(decode_cfg) pipe = pipeline( "automatic-speech-recognition", model="nvidia/canary-1b" ) # pipe = pipeline( # "text-generation", # model="QuantFactory/Meta-Llama-3-8B-Instruct-GGUF", # model_kwargs={"torch_dtype": torch.bfloat16}, # device_map="auto" # ) gr.Interface.from_pipeline(pipe, title="ASR", description="Using pipeline with Canary-1B", ).launch(inbrowser=True)