nchen909 commited on
Commit
e7cb277
β€’
1 Parent(s): 7012ad7

Upload 8 files

Browse files
README.md CHANGED
@@ -1,10 +1,13 @@
1
  ---
2
- title: Fastapi T5
3
- emoji: 🐒
4
- colorFrom: purple
5
- colorTo: blue
6
- sdk: docker
 
 
7
  pinned: false
 
8
  ---
9
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Mistral 7B Instruct GGUF Run On CPU Basic
3
+ emoji: πŸ”₯
4
+ colorFrom: blue
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ sdk_version: 4.3.0
8
+ app_file: app.py
9
  pinned: false
10
+ license: mit
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gpt4all import GPT4All
3
+ from huggingface_hub import hf_hub_download
4
+
5
+ title = "Apollo-7B-GGUF Run On CPU"
6
+
7
+ description = """
8
+ πŸ”Ž [Apollo-7B](https://huggingface.co/FreedomIntelligence/Apollo-7B) [GGUF format model](https://huggingface.co/FreedomIntelligence/Apollo-7B-GGUF) , 8-bit quantization balanced quality gguf version, running on CPU. Using [GitHub - llama.cpp](https://github.com/ggerganov/llama.cpp) [GitHub - gpt4all](https://github.com/nomic-ai/gpt4all).
9
+
10
+ πŸ”¨ Running on CPU-Basic free hardware. Suggest duplicating this space to run without a queue.
11
+
12
+ Mistral does not support system prompt symbol (such as ```<<SYS>>```) now, input your system prompt in the first message if you need. Learn more: [Guardrailing Mistral 7B](https://docs.mistral.ai/usage/guardrailing).
13
+ """
14
+
15
+ """
16
+ [Model From TheBloke/Mistral-7B-Instruct-v0.1-GGUF](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF)
17
+ [Mistral-instruct-v0.1 System prompt](https://docs.mistral.ai/usage/guardrailing)
18
+ """
19
+
20
+ model_path = "models"
21
+ model_name = "Apollo-7B-q8_0.gguf"
22
+ hf_hub_download(repo_id="FreedomIntelligence/Apollo-7B-GGUF", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)
23
+
24
+ print("Start the model init process")
25
+ model = model = GPT4All(model_name, model_path, allow_download = False, device="cpu")
26
+ print("Finish the model init process")
27
+
28
+ model.config["promptTemplate"] = "[INST] {0} [/INST]"
29
+ model.config["systemPrompt"] = ""
30
+ model._is_chat_session_activated = False
31
+
32
+ max_new_tokens = 2048
33
+
34
+ def generater(message, history, temperature, top_p, top_k):
35
+ prompt = "<s>"
36
+ for user_message, assistant_message in history:
37
+ prompt += model.config["promptTemplate"].format(user_message)
38
+ prompt += assistant_message + "</s>"
39
+ prompt += model.config["promptTemplate"].format(message)
40
+ outputs = []
41
+ for token in model.generate(prompt=prompt, temp=temperature, top_k = top_k, top_p = top_p, max_tokens = max_new_tokens, streaming=True):
42
+ outputs.append(token)
43
+ yield "".join(outputs)
44
+
45
+ def vote(data: gr.LikeData):
46
+ if data.liked:
47
+ return
48
+ else:
49
+ return
50
+
51
+ chatbot = gr.Chatbot(avatar_images=('resourse/user-icon.png', 'resourse/chatbot-icon.png'),bubble_full_width = False)
52
+
53
+ additional_inputs=[
54
+ gr.Slider(
55
+ label="temperature",
56
+ value=0.5,
57
+ minimum=0.0,
58
+ maximum=2.0,
59
+ step=0.05,
60
+ interactive=True,
61
+ info="Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.",
62
+ ),
63
+ gr.Slider(
64
+ label="top_p",
65
+ value=1.0,
66
+ minimum=0.0,
67
+ maximum=1.0,
68
+ step=0.01,
69
+ interactive=True,
70
+ info="0.1 means only the tokens comprising the top 10% probability mass are considered. Suggest set to 1 and use temperature. 1 means 100% and will disable it",
71
+ ),
72
+ gr.Slider(
73
+ label="top_k",
74
+ value=40,
75
+ minimum=0,
76
+ maximum=1000,
77
+ step=1,
78
+ interactive=True,
79
+ info="limits candidate tokens to a fixed number after sorting by probability. Setting it higher than the vocabulary size deactivates this limit.",
80
+ )
81
+ ]
82
+
83
+ iface = gr.ChatInterface(
84
+ fn = generater,
85
+ title=title,
86
+ description = description,
87
+ chatbot=chatbot,
88
+ additional_inputs=additional_inputs,
89
+ examples=[
90
+ ["ζžΈζžζœ‰δ»€δΉˆη–—ζ•ˆ"],
91
+ ["I've taken several courses of antibiotics for recurring infections, and now they seem less effective. Am I developing antibiotic resistance?"],
92
+ ]
93
+ )
94
+
95
+ with gr.Blocks(css="resourse/style/custom.css") as demo:
96
+ chatbot.like(vote, None, None)
97
+ iface.render()
98
+
99
+ if __name__ == "__main__":
100
+ demo.queue(max_size=3).launch()
models/blank ADDED
File without changes
requirements.txt CHANGED
@@ -1,7 +1 @@
1
- fastapi==0.74.*
2
- requests==2.27.*
3
- sentencepiece==0.1.*
4
- torch==1.11.*
5
- transformers==4.*
6
- uvicorn[standard]==0.17.*
7
- gradio
 
1
+ gpt4all==2.0.2
 
 
 
 
 
 
resourse/chatbot-icon.png ADDED
resourse/style/blank ADDED
File without changes
resourse/style/custom.css ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ h1 {
2
+ text-align: center;
3
+ }
4
+
5
+ .contain {
6
+ max-width: 1000px;
7
+ margin: auto;
8
+ padding-top: 1.5rem;
9
+ }
resourse/user-icon.png ADDED