kjcjohnson's picture
Better performance and config?
de0bfd0
raw
history blame contribute delete
554 Bytes
import gradio as gr
import loop
MODEL_ID = "TinyLlama/TinyLlama_v1.1_math_code"
handler = loop.EndpointHandler(MODEL_ID)
def respond(prompt, grammar, max_new_tokens, max_time):
args = { "inputs": prompt, "grammar": grammar, "max-new-tokens": max_new_tokens, "max-time": max_time }
return handler(args)[0]
demo = gr.Interface(
respond,
inputs=["textarea", "textarea"],
outputs=["textarea"],
additional_inputs=[gr.Number(value=512, precision=0), gr.Number(value=30, precision=0)]
)
if __name__ == "__main__":
demo.launch()