Spaces:
Running
Running
File size: 1,525 Bytes
ad72b89 5e32214 dc34cfb 6d9aa31 340ab8d 97d06ed dc34cfb ad72b89 a740fc3 dc34cfb 340ab8d dc34cfb 6b85a72 dc34cfb 143c8f0 a740fc3 dc34cfb a740fc3 31328f6 dc34cfb 6b85a72 dc34cfb 5ecd58d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr
hf_token = os.environ["hf_token"]
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
b_tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-1b1")#using small parameter version of model for faster inference on hf
b_model = AutoModelForCausalLM.from_pretrained("bigscience/bloom-1b1",device_map = "auto")
g_tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b",token = hf_token)#using small paramerter version of model for faster inference on hf
g_model = AutoModelForCausalLM.from_pretrained("google/gemma-2-2b",token = hf_token,device_map="auto")
def Sentence_Commpletion(model_name, input):
if model_name == "Bloom":
tokenizer, model = b_tokenizer, b_model
inputss = tokenizer(input, return_tensors="pt")
outputs = model.generate(inputss.input_ids, max_new_tokens=31, num_return_sequences=1)
elif model_name == "Gemma":
tokenizer, model = g_tokenizer, g_model
inputs= tokenizer(input, return_tensors="pt")
outputs = model.generate(inputs.input_ids, max_new_tokens=32)
return tokenizer.decode(outputs[0],skip_special_tokens=True)
interface = gr.Interface(
fn=Sentence_Commpletion,
inputs=[gr.Radio(["Bloom", "Gemma"], label="Choose model"),
gr.Textbox(placeholder="Enter sentece"),],
outputs="text",
title="Bloom vs Gemma Sentence completion",)
interface.launch(share = True, debug = True)
|