charanhu's picture
Update app.py
6105c78
import gradio as gr
from transformers import AutoTokenizer
from auto_gptq import AutoGPTQForCausalLM
model_path = 'vita-group/vicuna-7b-v1.3_gptq'
tokenizer_path = 'lmsys/vicuna-7b-v1.3'
model = AutoGPTQForCausalLM.from_quantized(
model_path,
disable_exllama=True,
device_map='auto',
revision='2bit_128g',
)
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, trust_remote_code=True)
def generate_response(prompt):
input_ids = tokenizer(prompt, return_tensors='pt').input_ids.to('cuda')
outputs = model.generate(input_ids=input_ids, max_length=128)
decoded_output = tokenizer.decode(outputs[0])
return decoded_output
iface = gr.Interface(fn=generate_response, inputs="text", outputs="text")
iface.launch()