sam749 commited on
Commit
56b6398
1 Parent(s): dd9942a

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +42 -20
main.py CHANGED
@@ -1,6 +1,9 @@
1
  from ctransformers import AutoModelForCausalLM
2
- from fastapi import FastAPI, Form
3
- from pydantic import BaseModel
 
 
 
4
 
5
  #Model loading
6
  llm = AutoModelForCausalLM.from_pretrained("dolphin-2.0-mistral-7b.Q4_K_S.gguf",
@@ -8,27 +11,46 @@ model_type='mistral',
8
  max_new_tokens = 1096,
9
  threads = 3,
10
  )
11
-
12
-
13
- #Pydantic object
14
- class validation(BaseModel):
15
- prompt: str
16
-
17
- #Fast API
18
- app = FastAPI()
19
-
20
- #Zephyr completion
21
- @app.post("/llm_on_cpu")
22
- async def stream(item: validation):
23
- system_prompt = 'Below is an instruction that describes a task. Write a response that appropriately completes the request.'
24
  start,end = "<|im_start|>", "<|im_end|>"
25
- prompt = f"<|im_start|>system\n{system_prompt}{end}\n{start}user\n{item.prompt.strip()}{end}\n"
26
  return llm(prompt)
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
 
 
 
 
 
29
 
30
 
31
-
32
-
33
-
34
-
 
1
  from ctransformers import AutoModelForCausalLM
2
+ import gradio as gr
3
+
4
+ greety = """
5
+ A special thanks to [TheBloke](https://huggingface.co/TheBloke) for the quantized model and [Gathnex](https://medium.com/@gathnex) for his excellent tutorial.
6
+ """
7
 
8
  #Model loading
9
  llm = AutoModelForCausalLM.from_pretrained("dolphin-2.0-mistral-7b.Q4_K_S.gguf",
 
11
  max_new_tokens = 1096,
12
  threads = 3,
13
  )
14
+
15
+ def stream(prompt,UL):
16
+ system_prompt = 'You are a hlepful AI assistant. Below is an instruction that describes a task. Write a response that appropriately completes the request.'
 
 
 
 
 
 
 
 
 
 
17
  start,end = "<|im_start|>", "<|im_end|>"
18
+ prompt = f"<|im_start|>system\n{system_prompt}{end}\n{start}user\n{prompt.strip()}{end}\n"
19
  return llm(prompt)
20
 
21
+ css = """
22
+ h1{
23
+ text-align: center;
24
+ }
25
+
26
+ #duplicate-button{
27
+ margin: auto;
28
+ color: whitesmoke;
29
+ background: #1565c0;
30
+ }
31
+
32
+ .contain{
33
+ max-width: 900px;
34
+ margin: auto;
35
+ padding-top: 1.5rem;
36
+ }
37
+ """
38
+
39
+ chat_interface = gr.ChatInterface(
40
+ fn = stream,
41
+ stop_btn='None',
42
+ examples = [
43
+ "what are 'Large Language Models'?",
44
+ "Explain OCEAN personality types"
45
+ ],
46
+ )
47
 
48
+ with gr.Blocks(css=css) as demo:
49
+ gr.HTML("<h1><center>Dolphin2.0_x_Mistral Demo</center></h1>")
50
+ gr.DuplicateButton(value="Duplicate Space for private use",elem_id="duplicate-button")
51
+ chat_interface.render()
52
+ gr.Markdown(greety)
53
 
54
 
55
+ if __name__ == "__main__":
56
+ demo.queue(max_size=10).launch()