Spaces:
Runtime error
Runtime error
File size: 749 Bytes
d109023 43305aa d109023 6105c78 43305aa 7df47b5 badf33f 43305aa badf33f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
import gradio as gr
from transformers import AutoTokenizer
from auto_gptq import AutoGPTQForCausalLM
model_path = 'vita-group/vicuna-7b-v1.3_gptq'
tokenizer_path = 'lmsys/vicuna-7b-v1.3'
model = AutoGPTQForCausalLM.from_quantized(
model_path,
disable_exllama=True,
device_map='auto',
revision='2bit_128g',
)
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, trust_remote_code=True)
def generate_response(prompt):
input_ids = tokenizer(prompt, return_tensors='pt').input_ids.to('cuda')
outputs = model.generate(input_ids=input_ids, max_length=128)
decoded_output = tokenizer.decode(outputs[0])
return decoded_output
iface = gr.Interface(fn=generate_response, inputs="text", outputs="text")
iface.launch()
|