LLukas22 commited on
Commit
b7ddbe8
·
1 Parent(s): 9f148ca

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +152 -0
app.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from llm_rs import AutoModel,SessionConfig,GenerationConfig,Precision
3
+
4
+ repo_name = "rustformers/mpt-7b-ggml"
5
+ file_name = "mpt-7b-instruct-q4_0-ggjt.bin"
6
+
7
+ examples = [
8
+ "Write a travel blog about a 3-day trip to Thailand.",
9
+ "Write a short story about a robot that has a nice day.",
10
+ "Convert the following to a single line of JSON:\n\n```name: John\nage: 30\naddress:\n street:123 Main St.\n city: San Francisco\n state: CA\n zip: 94101\n```",
11
+ "Write a quick post to congratulate Rustformers about their launch.",
12
+ "Explain how a candle works to a 6 year old in a few sentences.",
13
+ "What are some of the most common misconceptions about birds?",
14
+ "Explain why the rust programming language is so popular.",
15
+ ]
16
+
17
+ session_config = SessionConfig(threads=2,batch_size=2)
18
+ model = AutoModel.from_pretrained(repo_name, model_file=file_name, session_config=session_config,verbose=True)
19
+
20
+ def process_stream(instruction, temperature, top_p, top_k, max_new_tokens, seed):
21
+
22
+ prompt=f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
23
+ ### Instruction:
24
+ {instruction}
25
+ ### Response:
26
+ Answer:"""
27
+ generation_config = GenerationConfig(seed=seed,temperature=temperature,top_p=top_p,top_k=top_k,max_new_tokens=max_new_tokens)
28
+ response = ""
29
+ streamer = model.stream(prompt=prompt,generation_config=generation_config)
30
+ for new_text in streamer:
31
+ response += new_text
32
+ yield response
33
+
34
+
35
+ with gr.Blocks(
36
+ theme=gr.themes.Soft(),
37
+ css=".disclaimer {font-variant-caps: all-small-caps;}",
38
+ ) as demo:
39
+ gr.Markdown(
40
+ """<h1><center>Rustformers: MPT-7B-Instruct</center></h1>
41
+
42
+ This demo utalizes the [rustformers/llm](https://github.com/rustformers/llm) library via [llm-rs](https://github.com/LLukas22/llm-rs-python) to execute [MPT-7B-Instruct](https://huggingface.co/mosaicml/mpt-7b-instruct) on a space with 2 CPU cores.
43
+ """
44
+ )
45
+ with gr.Row():
46
+ with gr.Column():
47
+ with gr.Row():
48
+ instruction = gr.Textbox(
49
+ placeholder="Enter your question here",
50
+ label="Question/Instruction",
51
+ elem_id="q-input",
52
+ )
53
+ with gr.Accordion("Advanced Options:", open=False):
54
+ with gr.Row():
55
+ with gr.Column():
56
+ with gr.Row():
57
+ temperature = gr.Slider(
58
+ label="Temperature",
59
+ value=0.8,
60
+ minimum=0.1,
61
+ maximum=1.0,
62
+ step=0.1,
63
+ interactive=True,
64
+ info="Higher values produce more diverse outputs",
65
+ )
66
+ with gr.Column():
67
+ with gr.Row():
68
+ top_p = gr.Slider(
69
+ label="Top-p (nucleus sampling)",
70
+ value=0.95,
71
+ minimum=0.0,
72
+ maximum=1.0,
73
+ step=0.01,
74
+ interactive=True,
75
+ info=(
76
+ "Sample from the smallest possible set of tokens whose cumulative probability "
77
+ "exceeds top_p. Set to 1 to disable and sample from all tokens."
78
+ ),
79
+ )
80
+ with gr.Column():
81
+ with gr.Row():
82
+ top_k = gr.Slider(
83
+ label="Top-k",
84
+ value=40,
85
+ minimum=5,
86
+ maximum=80,
87
+ step=1,
88
+ interactive=True,
89
+ info="Sample from a shortlist of top-k tokens — 0 to disable and sample from all tokens.",
90
+ )
91
+ with gr.Column():
92
+ with gr.Row():
93
+ max_new_tokens = gr.Slider(
94
+ label="Maximum new tokens",
95
+ value=256,
96
+ minimum=0,
97
+ maximum=1024,
98
+ step=5,
99
+ interactive=True,
100
+ info="The maximum number of new tokens to generate",
101
+ )
102
+
103
+ with gr.Column():
104
+ with gr.Row():
105
+ seed = gr.Number(
106
+ label="Seed",
107
+ value=42,
108
+ interactive=True,
109
+ info="The seed to use for the generation",
110
+ precision=0
111
+ )
112
+ with gr.Row():
113
+ submit = gr.Button("Submit")
114
+ with gr.Row():
115
+ with gr.Box():
116
+ gr.Markdown("**MPT-7B-Instruct**")
117
+ output_7b = gr.Markdown()
118
+
119
+ with gr.Row():
120
+ gr.Examples(
121
+ examples=examples,
122
+ inputs=[instruction],
123
+ cache_examples=False,
124
+ fn=process_stream,
125
+ outputs=output_7b,
126
+ )
127
+ with gr.Row():
128
+ gr.Markdown(
129
+ "Disclaimer: MPT-7B can produce factually incorrect output, and should not be relied on to produce "
130
+ "factually accurate information. MPT-7B was trained on various public datasets; while great efforts "
131
+ "have been taken to clean the pretraining data, it is possible that this model could generate lewd, "
132
+ "biased, or otherwise offensive outputs.",
133
+ elem_classes=["disclaimer"],
134
+ )
135
+ with gr.Row():
136
+ gr.Markdown(
137
+ "[Privacy policy](https://gist.github.com/samhavens/c29c68cdcd420a9aa0202d0839876dac)",
138
+ elem_classes=["disclaimer"],
139
+ )
140
+
141
+ submit.click(
142
+ process_stream,
143
+ inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed],
144
+ outputs=output_7b,
145
+ )
146
+ instruction.submit(
147
+ process_stream,
148
+ inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed],
149
+ outputs=output_7b,
150
+ )
151
+
152
+ demo.queue(max_size=4, concurrency_count=1).launch(debug=True)