pabloce commited on
Commit
e976361
1 Parent(s): 3cae82a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -9
app.py CHANGED
@@ -25,7 +25,7 @@ css = """
25
  .dark.user {
26
  background: #0a1120 !important;
27
  }
28
- .dark.assistant, .dark.pending {
29
  background: transparent !important;
30
  }
31
  """
@@ -37,6 +37,8 @@ def respond(
37
  max_tokens,
38
  temperature,
39
  top_p,
 
 
40
  model,
41
  ):
42
  from llama_cpp import Llama
@@ -47,14 +49,13 @@ def respond(
47
  from llama_cpp_agent.chat_history.messages import Roles
48
  print(message)
49
  print(history)
50
- print(max_tokens)
51
- print(temperature)
52
- print(top_p)
53
- print(model)
54
 
55
  llm = Llama(
56
  model_path=f"models/{model}",
 
 
57
  n_gpu_layers=81,
 
58
  n_ctx=8192,
59
  )
60
  provider = LlamaCppPythonProvider(llm)
@@ -67,7 +68,11 @@ def respond(
67
  )
68
 
69
  settings = provider.get_provider_default_settings()
 
 
 
70
  settings.max_tokens = max_tokens
 
71
  settings.stream = True
72
 
73
  messages = BasicChatHistory()
@@ -81,7 +86,6 @@ def respond(
81
  'role': Roles.assistant,
82
  'content': msn[1]
83
  }
84
-
85
  messages.add_message(user)
86
  messages.add_message(assistant)
87
 
@@ -95,14 +99,28 @@ def respond(
95
  demo = gr.ChatInterface(
96
  respond,
97
  additional_inputs=[
98
- gr.Slider(minimum=1, maximum=8192, value=8192, step=1, label="Max new tokens"),
99
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
100
  gr.Slider(
101
  minimum=0.1,
102
  maximum=1.0,
103
  value=0.95,
104
  step=0.05,
105
- label="Top-p (nucleus sampling)",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  ),
107
  gr.Dropdown(['dolphin-2.9.1-yi-1.5-34b-Q6_K.gguf', 'dolphin-2.9.1-llama-3-70b.Q3_K_M.gguf'], value="dolphin-2.9.1-llama-3-70b.Q3_K_M.gguf", label="Model"),
108
  ],
@@ -121,7 +139,7 @@ demo = gr.ChatInterface(
121
  undo_btn="Undo",
122
  clear_btn="Clear",
123
  submit_btn="Send",
124
- description="Cognitive Computation: 🐬 Chat multi llm"
125
  )
126
 
127
  if __name__ == "__main__":
 
25
  .dark.user {
26
  background: #0a1120 !important;
27
  }
28
+ .dark.assistant {
29
  background: transparent !important;
30
  }
31
  """
 
37
  max_tokens,
38
  temperature,
39
  top_p,
40
+ top_k,
41
+ repeat_penalty,
42
  model,
43
  ):
44
  from llama_cpp import Llama
 
49
  from llama_cpp_agent.chat_history.messages import Roles
50
  print(message)
51
  print(history)
 
 
 
 
52
 
53
  llm = Llama(
54
  model_path=f"models/{model}",
55
+ flash_attn=True,
56
+ n_threads=40,
57
  n_gpu_layers=81,
58
+ n_batch=1024,
59
  n_ctx=8192,
60
  )
61
  provider = LlamaCppPythonProvider(llm)
 
68
  )
69
 
70
  settings = provider.get_provider_default_settings()
71
+ settings.temperature = temperature
72
+ settings.top_k = top_k
73
+ settings.top_p = top_p
74
  settings.max_tokens = max_tokens
75
+ settings.repeat_penalty = repeat_penalty
76
  settings.stream = True
77
 
78
  messages = BasicChatHistory()
 
86
  'role': Roles.assistant,
87
  'content': msn[1]
88
  }
 
89
  messages.add_message(user)
90
  messages.add_message(assistant)
91
 
 
99
  demo = gr.ChatInterface(
100
  respond,
101
  additional_inputs=[
102
+ gr.Slider(minimum=1, maximum=8192, value=8192, step=1, label="Max tokens"),
103
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
104
  gr.Slider(
105
  minimum=0.1,
106
  maximum=1.0,
107
  value=0.95,
108
  step=0.05,
109
+ label="Top-p",
110
+ ),
111
+ gr.Slider(
112
+ minimum=0,
113
+ maximum=100,
114
+ value=40,
115
+ step=1,
116
+ label="Top-k",
117
+ ),
118
+ gr.Slider(
119
+ minimum=0.0,
120
+ maximum=2.0,
121
+ value=1.1,
122
+ step=0.1,
123
+ label="Repetition penalty",
124
  ),
125
  gr.Dropdown(['dolphin-2.9.1-yi-1.5-34b-Q6_K.gguf', 'dolphin-2.9.1-llama-3-70b.Q3_K_M.gguf'], value="dolphin-2.9.1-llama-3-70b.Q3_K_M.gguf", label="Model"),
126
  ],
 
139
  undo_btn="Undo",
140
  clear_btn="Clear",
141
  submit_btn="Send",
142
+ description="Cognitive Computation: Chat Doplhin 🐬 2.9.1-llama-3-70b & 2.9.1-yi-1.5-34b"
143
  )
144
 
145
  if __name__ == "__main__":