Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import os | |
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer | |
token = os.environ["HF_TOKEN"] | |
tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b",token=token) | |
model = AutoModelForCausalLM.from_pretrained("google/gemma-7b",token=token) | |
streamer = TextStreamer(tokenizer,skip_prompt=True) | |
def generate(inputs): | |
inputs = tokenizer([inputs], return_tensors="pt") | |
yield model.generate(**inputs, streamer=streamer) | |
app = gr.ChatInterface(generate) | |
app.launch(debug=True) | |