File size: 749 Bytes
d109023
43305aa
 
d109023
6105c78
 
43305aa
 
 
 
 
 
 
 
 
7df47b5
badf33f
43305aa
 
 
 
badf33f
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import gradio as gr
from transformers import AutoTokenizer
from auto_gptq import AutoGPTQForCausalLM

model_path = 'vita-group/vicuna-7b-v1.3_gptq'
tokenizer_path = 'lmsys/vicuna-7b-v1.3'

model = AutoGPTQForCausalLM.from_quantized(
    model_path,
    disable_exllama=True,
    device_map='auto',
    revision='2bit_128g',
)

tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, trust_remote_code=True)

def generate_response(prompt):
    input_ids = tokenizer(prompt, return_tensors='pt').input_ids.to('cuda')
    outputs = model.generate(input_ids=input_ids, max_length=128)
    decoded_output = tokenizer.decode(outputs[0])
    return decoded_output

iface = gr.Interface(fn=generate_response, inputs="text", outputs="text")
iface.launch()