Futuresony commited on
Commit
a42f949
·
verified ·
1 Parent(s): 2807a8c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -0
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
+
4
+ """
5
+ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
+ """
7
+ client = InferenceClient("Futuresony/future_ai_12_10_2024.gguf")
8
+
9
+
10
+ def respond(
11
+ message,
12
+ history: list[tuple[str, str]],
13
+ system_message,
14
+ max_tokens,
15
+ temperature,
16
+ top_p,
17
+ top_k=50, # Added top_k for fine control over generation
18
+ repetition_penalty=1.2, # Added to penalize repetitive patterns
19
+ ):
20
+ messages = [{"role": "system", "content": system_message}]
21
+
22
+ for val in history:
23
+ if val[0]:
24
+ messages.append({"role": "user", "content": val[0]})
25
+ if val[1]:
26
+ messages.append({"role": "assistant", "content": val[1]})
27
+
28
+ messages.append({"role": "user", "content": message})
29
+
30
+ response = ""
31
+
32
+ # Streaming response with additional settings
33
+ for message in client.chat_completion(
34
+ messages,
35
+ max_tokens=max_tokens,
36
+ stream=True,
37
+ temperature=temperature,
38
+ top_p=top_p,
39
+ top_k=top_k, # Added top_k
40
+ repetition_penalty=repetition_penalty, # Added repetition_penalty
41
+ use_cache=True, # Enables caching for efficiency
42
+ ):
43
+ token = message.choices[0].delta.content
44
+
45
+ response += token
46
+ yield response
47
+
48
+
49
+ """
50
+ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
51
+ """
52
+ demo = gr.ChatInterface(
53
+ respond,
54
+ additional_inputs=[
55
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
56
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
57
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
58
+ gr.Slider(
59
+ minimum=0.1,
60
+ maximum=1.0,
61
+ value=0.95,
62
+ step=0.05,
63
+ label="Top-p (nucleus sampling)",
64
+ ),
65
+ gr.Slider(
66
+ minimum=10,
67
+ maximum=100,
68
+ value=50,
69
+ step=1,
70
+ label="Top-k (sampling control)", # New input for top-k
71
+ ),
72
+ gr.Slider(
73
+ minimum=1.0,
74
+ maximum=2.0,
75
+ value=1.2,
76
+ step=0.1,
77
+ label="Repetition Penalty", # New input for repetition_penalty
78
+ ),
79
+ ],
80
+ )
81
+
82
+
83
+ if __name__ == "__main__":
84
+ demo.launch()
85
+