charanhu commited on
Commit
43305aa
β€’
1 Parent(s): 31f279f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -10
app.py CHANGED
@@ -1,17 +1,24 @@
1
  import gradio as gr
2
- import torch
3
- from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
4
 
5
- tokenizer = AutoTokenizer.from_pretrained("kimnt93/chat-llama2-1b-1.0")
6
- model = AutoModelForCausalLM.from_pretrained("kimnt93/chat-llama2-1b-1.0")
 
 
 
 
 
 
 
 
 
7
 
8
  def generate_response(prompt):
9
- conversation = [{'role': 'user', 'content': prompt}]
10
- prompt = tokenizer.apply_chat_template(conversation, tokenizer=False, add_generation_prompt=True)
11
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
12
- outputs = model.generate(**inputs, use_cache=True, max_length=4096)
13
- outputs_text = tokenizer.decode(outputs[0])
14
- return outputs_text
15
 
16
  iface = gr.Interface(fn=generate_response, inputs="text", outputs="text")
17
  iface.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer
3
+ from auto_gptq import AutoGPTQForCausalLM
4
 
5
+ model_path = 'TheBloke/SOLAR-10.7B-Instruct-v1.0-uncensored-GPTQ'
6
+ tokenizer_path = 'TheBloke/SOLAR-10.7B-Instruct-v1.0-uncensored-GPTQ'
7
+
8
+ model = AutoGPTQForCausalLM.from_quantized(
9
+ model_path,
10
+ disable_exllama=True,
11
+ device_map='auto',
12
+ revision='2bit_128g',
13
+ )
14
+
15
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, trust_remote_code=True)
16
 
17
  def generate_response(prompt):
18
+ input_ids = tokenizer(prompt, return_tensors='pt').input_ids.to('cuda')
19
+ outputs = model.generate(input_ids=input_ids, max_length=128)
20
+ decoded_output = tokenizer.decode(outputs[0])
21
+ return decoded_output
 
 
22
 
23
  iface = gr.Interface(fn=generate_response, inputs="text", outputs="text")
24
  iface.launch()