TohidA commited on
Commit
4e52062
·
1 Parent(s): 74d20b5

Updated Gradio app with new model

Browse files
Files changed (1) hide show
  1. app.py +67 -6
app.py CHANGED
@@ -1,10 +1,71 @@
1
 
2
- import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
- def greet(name):
5
- return "Hello " + name + "!!"
6
 
7
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- if __name__ == "__main__":
10
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
 
2
+ from peft import PeftModel, PeftConfig
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ import torch
5
+ from transformers import AutoTokenizer
6
+ from peft import PeftModel, PeftConfig
7
+
8
+ config = PeftConfig.from_pretrained("TohidA/LlamaInstructMona")
9
+ model = AutoModelForCausalLM.from_pretrained("mlabonne/llama-2-7b-miniguanaco")
10
+ model = PeftModel.from_pretrained(model, "TohidA/LlamaInstructMona")
11
+
12
+ if torch.cuda.is_available():
13
+ model = model.cuda()
14
+
15
+ tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
16
+
17
+ def prompt(instruction, input=''):
18
+ if input=='':
19
+ return f"Below is an instruction that describes a task. Write a response that appropriately completes the request. \n\n### Instruction:\n{instruction} \n\n### Response:\n"
20
+ return f"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. \n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n"
21
 
22
+ tokenizer.pad_token = tokenizer.eos_token
23
+ tokenizer.pad_token_id = tokenizer.eos_token_id
24
 
25
+ def instruct(instruction, input='', temperature=0.7, top_p=0.95, top_k=4, max_new_tokens=128, do_sample=False, penalty_alpha=0.6, repetition_penalty=1., stop="\n\n"):
26
+ input_ids = tokenizer(prompt(instruction, input).strip(), return_tensors='pt').input_ids.cuda()
27
+ with torch.cuda.amp.autocast():
28
+ outputs = model.generate(
29
+ input_ids=input_ids,
30
+ return_dict_in_generate=True,
31
+ output_scores=True,
32
+ max_new_tokens=max_new_tokens,
33
+ temperature=temperature,
34
+ top_p=top_p,
35
+ top_k=top_k,
36
+ do_sample=do_sample,
37
+ repetition_penalty=repetition_penalty
38
+ )
39
+ if stop=="":
40
+ return tokenizer.decode(outputs.sequences[0], skip_special_tokens=True).split("### Response:")[1].strip(), prompt(instruction, input)
41
+ return tokenizer.decode(outputs.sequences[0], skip_special_tokens=True).split("### Response:")[1].strip().split(stop)[0].strip(), prompt(instruction, input)
42
 
43
+
44
+ import locale
45
+ locale.getpreferredencoding = lambda: "UTF-8"
46
+
47
+ import gradio as gr
48
+
49
+ input_text = gr.Textbox(label="Input")
50
+ instruction_text = gr.Textbox(label="Instruction")
51
+ temperature = gr.Slider(label="Temperature", minimum=0, maximum=1, value=0.7, step=0.05)
52
+ top_p = gr.Slider(label="Top-P", minimum=0, maximum=1, value=0.95, step=0.01)
53
+ top_k = gr.Slider(label="Top-K", minimum=0, maximum=128, value=40, step=1)
54
+ max_new_tokens = gr.Slider(label="Tokens", minimum=1, maximum=256, value=64)
55
+ do_sample = gr.Checkbox(label="Do Sample", value=True)
56
+ penalty_alpha = gr.Slider(minimum=0, maximum=1, value=0.5)
57
+ repetition_penalty = gr.Slider(minimum=1., maximum=2., value=1., step=0.1)
58
+ stop = gr.Textbox(label="Stopping Criteria", value="")
59
+
60
+ output_prompt = gr.Textbox(label="Prompt")
61
+ output_text = gr.Textbox(label="Output")
62
+ description = """
63
+ The [TohidA/InstructLlamaMONA-withMONAdataset](https://hf.co/TohidA/LlamaInstructMona). A Llama chat 7B model finetuned on an [instruction dataset](https://huggingface.co/mlabonne/llama-2-7b-miniguanaco), then finetuned with the RL/PPO using a [Reward model](https://huggingface.co/TohidA/MONAreward) which is a BERT classifier trained on [Monda dataset](https://huggingface.co/datasets/TohidA/MONA), with [low rank adaptation](https://arxiv.org/abs/2106.09685) for a single epoch.
64
+ """
65
+ gr.Interface(fn=instruct,
66
+ inputs=[instruction_text, input_text, temperature, top_p, top_k, max_new_tokens, do_sample, penalty_alpha, repetition_penalty, stop],
67
+ outputs=[output_text, output_prompt],
68
+ title="InstructLlamaMONA 7B Gradio Demo", description=description).launch(
69
+ debug=True,
70
+ share=True
71
+ )