matteosz commited on
Commit
7d403eb
·
1 Parent(s): 77fb7b9

Added draft

Browse files
Files changed (1) hide show
  1. app.py +37 -0
app.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from peft import PeftConfig, AutoPeftModelForCausalLM
4
+ from transformers import AutoTokenizer, BitsAndBytesConfig
5
+
6
+ checkpoint = 'ernestoBocini/Phi3-mini-DPO-Tuned'
7
+ base_model_id = 'microsoft/Phi-3-mini-4k-instruct'
8
+
9
+ nf4_config = BitsAndBytesConfig(
10
+ load_in_4bit=True,
11
+ bnb_4bit_quant_type="nf4",
12
+ bnb_4bit_use_double_quant=True,
13
+ bnb_4bit_compute_dtype=torch.bfloat16
14
+ )
15
+
16
+ config = PeftConfig.from_pretrained(checkpoint)
17
+ tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True)
18
+ tokenizer.model_max_length = 512
19
+ tokenizer.pad_token = tokenizer.unk_token # use unk rather than eos token to prevent endless generation
20
+ tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
21
+ tokenizer.padding_side = 'left'
22
+
23
+ model = AutoPeftModelForCausalLM.from_pretrained(
24
+ checkpoint,
25
+ quantization_config=nf4_config,
26
+ is_trainable=False,
27
+ trust_remote_code=True
28
+ ).eval()
29
+
30
+ def chat(user_input):
31
+ inputs = tokenizer(user_input, return_tensors="pt", truncation=True, padding="max_length")
32
+ outputs = model(**inputs, max_new_tokens=512, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)
33
+ reply = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
34
+ return reply
35
+
36
+ iface = gr.Interface(fn=chat, inputs="text", outputs="text", title="Chatbot")
37
+ iface.launch()