muryshev commited on
Commit
95b3088
1 Parent(s): 00ed39e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -8
app.py CHANGED
@@ -45,9 +45,9 @@ model = Llama(
45
  n_parts=1,
46
  #n_batch=100,
47
  logits_all=True,
48
- #n_threads=12,
49
  verbose=True,
50
- #n_gpu_layers=35,
51
  n_gqa=8 #must be set for 70b models
52
  )
53
 
@@ -129,12 +129,12 @@ def generate_search_request():
129
  parameters = data.get("parameters", {})
130
 
131
  # Extract parameters from the request
132
- temperature = 0.01
133
  truncate = parameters.get("truncate", 1000)
134
  max_new_tokens = parameters.get("max_new_tokens", 1024)
135
- top_p = 0.8
136
  repetition_penalty = parameters.get("repetition_penalty", 1.2)
137
- top_k = 20
138
  return_full_text = parameters.get("return_full_text", False)
139
 
140
 
@@ -168,12 +168,12 @@ def generate_response():
168
  parameters = data.get("parameters", {})
169
 
170
  # Extract parameters from the request
171
- temperature = 0.02#parameters.get("temperature", 0.01)
172
  truncate = parameters.get("truncate", 1000)
173
  max_new_tokens = parameters.get("max_new_tokens", 1024)
174
- top_p = 80#parameters.get("top_p", 0.85)
175
  repetition_penalty = parameters.get("repetition_penalty", 1.2)
176
- top_k = 25#parameters.get("top_k", 30)
177
  return_full_text = parameters.get("return_full_text", False)
178
 
179
 
 
45
  n_parts=1,
46
  #n_batch=100,
47
  logits_all=True,
48
+ n_threads=12,
49
  verbose=True,
50
+ n_gpu_layers=35,
51
  n_gqa=8 #must be set for 70b models
52
  )
53
 
 
129
  parameters = data.get("parameters", {})
130
 
131
  # Extract parameters from the request
132
+ temperature = parameters.get("temperature", 0.01)
133
  truncate = parameters.get("truncate", 1000)
134
  max_new_tokens = parameters.get("max_new_tokens", 1024)
135
+ top_p = parameters.get("top_p", 0.85)
136
  repetition_penalty = parameters.get("repetition_penalty", 1.2)
137
+ top_k = parameters.get("top_k", 30)
138
  return_full_text = parameters.get("return_full_text", False)
139
 
140
 
 
168
  parameters = data.get("parameters", {})
169
 
170
  # Extract parameters from the request
171
+ temperature = parameters.get("temperature", 0.01)
172
  truncate = parameters.get("truncate", 1000)
173
  max_new_tokens = parameters.get("max_new_tokens", 1024)
174
+ top_p = parameters.get("top_p", 0.85)
175
  repetition_penalty = parameters.get("repetition_penalty", 1.2)
176
+ top_k = parameters.get("top_k", 30)
177
  return_full_text = parameters.get("return_full_text", False)
178
 
179