import gradio as gr | |
import loop | |
MODEL_ID = "TinyLlama/TinyLlama_v1.1_math_code" | |
handler = loop.EndpointHandler(MODEL_ID) | |
def respond(prompt, grammar, max_new_tokens, max_time): | |
args = { "inputs": prompt, "grammar": grammar, "max-new-tokens": max_new_tokens, "max-time": max_time } | |
return handler(args)[0] | |
demo = gr.Interface( | |
respond, | |
inputs=["textarea", "textarea"], | |
outputs=["textarea"], | |
additional_inputs=[gr.Number(value=512, precision=0), gr.Number(value=30, precision=0)] | |
) | |
if __name__ == "__main__": | |
demo.launch() | |