harpreetsahota commited on
Commit
260fe59
1 Parent(s): 1973071

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +145 -0
app.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import torch
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
5
+
6
+ token = os.environ["HUGGINGFACEHUB_API_TOKEN"]
7
+
8
+ model_id = 'Deci/DeciLM-6b-instruct'
9
+
10
+ SYSTEM_PROMPT_TEMPLATE = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
11
+
12
+ ### Instruction:
13
+
14
+ {instruction}
15
+
16
+ ### Response:
17
+ """
18
+
19
+ DESCRIPTION = """
20
+ # <p style="text-align: center; color: #292b47;"> 🤖 <span style='color: #3264ff;'>DeciLM-6B-Instruct:</span> A Fast Instruction-Tuned Model💨 </p>
21
+ <span style='color: #292b47;'>Welcome to <a href="https://huggingface.co/Deci/DeciLM-6b-instruct" style="color: #3264ff;">DeciLM-6B-Instruct</a>! DeciLM-6B-Instruct is a 6B parameter instruction-tuned language model and released under the Llama license. It's an instruction-tuned model, not a chat-tuned model; you should prompt the model with an instruction that describes a task, and the model will respond appropriately to complete the task.</span>
22
+ <p><span style='color: #292b47;'>Learn more about the base model <a href="https://huggingface.co/Deci/DeciLM-6b" style="color: #3264ff;">DeciLM-6B.</a></span></p>
23
+ """
24
+
25
+ # LICENSE = """
26
+ # <p/>
27
+
28
+ # ---
29
+ # As a derivate work of [Llama-2-7b-chat](https://huggingface.co/meta-llama/Llama-2-7b-chat) by Meta,
30
+ # this demo is governed by the original [license](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat/blob/main/LICENSE.txt) and [acceptable use policy](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat/blob/main/USE_POLICY.md).
31
+ # """
32
+
33
+ if not torch.cuda.is_available():
34
+ DESCRIPTION += 'You need a GPU for this example. Try using colab: https://bit.ly/decilm-instruct-nb'
35
+
36
+ if torch.cuda.is_available():
37
+ model = AutoModelForCausalLM.from_pretrained(
38
+ model_id,
39
+ torch_dtype=torch.float16,
40
+ device_map='auto',
41
+ trust_remote_code=True,
42
+ use_auth_token=token
43
+ )
44
+ else:
45
+ model = None
46
+
47
+ tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=token)
48
+ tokenizer.pad_token = tokenizer.eos_token
49
+
50
+ # Function to construct the prompt using the new system prompt template
51
+ def get_prompt_with_template(message: str) -> str:
52
+ return SYSTEM_PROMPT_TEMPLATE.format(instruction=message)
53
+
54
+ # Function to generate the model's response
55
+ def generate_model_response(message: str) -> str:
56
+ prompt = get_prompt_with_template(message)
57
+ inputs = tokenizer(prompt, return_tensors='pt')
58
+ if torch.cuda.is_available():
59
+ inputs = inputs.to('cuda')
60
+ # Include **generate_kwargs to include the user-defined options
61
+ output = model.generate(**inputs,
62
+ max_new_tokens=3000,
63
+ num_beams=5,
64
+ no_repeat_ngram_size=4,
65
+ early_stopping=True,
66
+ do_sample=True
67
+ )
68
+ return tokenizer.decode(output[0], skip_special_tokens=True)
69
+
70
+ # Function to extract the content after "### Response:"
71
+ def extract_response_content(full_response: str, ) -> str:
72
+ response_start_index = full_response.find("### Response:")
73
+ if response_start_index != -1:
74
+ return full_response[response_start_index + len("### Response:"):].strip()
75
+ else:
76
+ return full_response
77
+
78
+ # The main function that uses the dynamic generate_kwargs
79
+ def get_response_with_template(message: str) -> str:
80
+ full_response = generate_model_response(message)
81
+ return extract_response_content(full_response)
82
+
83
+ with gr.Blocks(css="/content/style.css") as demo:
84
+ gr.Markdown(DESCRIPTION)
85
+ gr.DuplicateButton(value='Duplicate Space for private use',
86
+ elem_id='duplicate-button')
87
+ with gr.Group():
88
+ chatbot = gr.Textbox(label='DeciLM-6B-Instruct Output:')
89
+ with gr.Row():
90
+ textbox = gr.Textbox(
91
+ container=False,
92
+ show_label=False,
93
+ placeholder='Type an instruction...',
94
+ scale=10,
95
+ elem_id="textbox"
96
+ )
97
+ submit_button = gr.Button(
98
+ '💬 Submit',
99
+ variant='primary',
100
+ scale=1,
101
+ min_width=0,
102
+ elem_id="submit_button"
103
+ )
104
+
105
+ # Clear button to clear the chat history
106
+ clear_button = gr.Button(
107
+ '🗑️ Clear',
108
+ variant='secondary',
109
+ )
110
+
111
+ clear_button.click(
112
+ fn=lambda: ('',''),
113
+ outputs=[textbox, chatbot],
114
+ queue=False,
115
+ api_name=False,
116
+ )
117
+
118
+ submit_button.click(
119
+ fn=get_response_with_template,
120
+ inputs=textbox,
121
+ outputs= chatbot,
122
+ queue=False,
123
+ api_name=False,
124
+ )
125
+
126
+ gr.Examples(
127
+ examples=[
128
+ 'Write detailed instructions for making chocolate chip pancakes.',
129
+ 'Write a 250-word article about your love of pancakes.',
130
+ 'Explain the plot of Back to the Future in three sentences.',
131
+ 'How do I make a trap beat?',
132
+ 'A step-by-step guide to learning Python in one month.',
133
+ ],
134
+ inputs=textbox,
135
+ outputs=chatbot,
136
+ fn=get_response_with_template,
137
+ cache_examples=True,
138
+ elem_id="examples"
139
+ )
140
+
141
+
142
+ gr.HTML(label="Keep in touch", value="<img src='./content/deci-coder-banner.png' alt='Keep in touch' style='display: block; color: #292b47; margin: auto; max-width: 800px;'>")
143
+
144
+
145
+ demo.launch(share=True, debug=True)