import gradio as gr import loop MODEL_ID = "TinyLlama/TinyLlama_v1.1_math_code" handler = loop.EndpointHandler(MODEL_ID) def respond(prompt, grammar, max_new_tokens, max_time): args = { "inputs": prompt, "grammar": grammar, "max-new-tokens": max_new_tokens, "max-time": max_time } return handler(args)[0] demo = gr.Interface( respond, inputs=["textarea", "textarea"], outputs=["textarea"], additional_inputs=[gr.Number(value=512, precision=0), gr.Number(value=30, precision=0)] ) if __name__ == "__main__": demo.launch()