muryshev commited on
Commit
32fc9c0
1 Parent(s): e4466f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -10
app.py CHANGED
@@ -37,16 +37,7 @@ model_name = "ggml-model-q4_1.gguf"
37
 
38
  #snapshot_download(repo_id=repo_name, local_dir=".", allow_patterns=model_name)
39
 
40
- model = Llama(
41
- model_path=model_name,
42
- n_ctx=2000,
43
- n_parts=1,
44
- #n_batch=100,
45
- logits_all=True,
46
- #n_threads=12,
47
- verbose=True,
48
- n_gqa=8 #must be set for 70b models
49
- )
50
 
51
 
52
  def get_message_tokens(model, role, content):
@@ -84,6 +75,7 @@ def generate_tokens(model, generator):
84
  for token in generator:
85
  if token == model.token_eos() or stop_generation:
86
  stop_generation = False
 
87
  yield b'' # End of chunk
88
  break
89
 
@@ -111,6 +103,9 @@ def generate_unknown_response():
111
  def generate_search_request():
112
  global stop_generation
113
  stop_generation = False
 
 
 
114
  data = request.get_json()
115
  app.logger.info(data)
116
  user_query = data.get("query", "")
@@ -126,6 +121,17 @@ def generate_search_request():
126
  top_k = 20
127
  return_full_text = parameters.get("return_full_text", False)
128
 
 
 
 
 
 
 
 
 
 
 
 
129
  tokens = get_system_tokens_for_preprompt(model, preprompt)
130
  tokens.append(LINEBREAK_TOKEN)
131
 
@@ -146,6 +152,7 @@ def generate_search_request():
146
  def generate_response():
147
  global stop_generation
148
  stop_generation = False
 
149
 
150
  data = request.get_json()
151
  app.logger.info(data)
@@ -163,6 +170,16 @@ def generate_response():
163
  return_full_text = parameters.get("return_full_text", False)
164
 
165
 
 
 
 
 
 
 
 
 
 
 
166
 
167
  # Generate the response
168
  #system_tokens = get_system_tokens(model)
 
37
 
38
  #snapshot_download(repo_id=repo_name, local_dir=".", allow_patterns=model_name)
39
 
40
+
 
 
 
 
 
 
 
 
 
41
 
42
 
43
  def get_message_tokens(model, role, content):
 
75
  for token in generator:
76
  if token == model.token_eos() or stop_generation:
77
  stop_generation = False
78
+ app.logger.info('Abort generating')
79
  yield b'' # End of chunk
80
  break
81
 
 
103
  def generate_search_request():
104
  global stop_generation
105
  stop_generation = False
106
+
107
+
108
+
109
  data = request.get_json()
110
  app.logger.info(data)
111
  user_query = data.get("query", "")
 
121
  top_k = 20
122
  return_full_text = parameters.get("return_full_text", False)
123
 
124
+ model = Llama(
125
+ model_path=model_name,
126
+ n_ctx=2000,
127
+ n_parts=1,
128
+ #n_batch=100,
129
+ logits_all=True,
130
+ #n_threads=12,
131
+ verbose=True,
132
+ n_gqa=8 #must be set for 70b models
133
+ )
134
+
135
  tokens = get_system_tokens_for_preprompt(model, preprompt)
136
  tokens.append(LINEBREAK_TOKEN)
137
 
 
152
  def generate_response():
153
  global stop_generation
154
  stop_generation = False
155
+
156
 
157
  data = request.get_json()
158
  app.logger.info(data)
 
170
  return_full_text = parameters.get("return_full_text", False)
171
 
172
 
173
+ model = Llama(
174
+ model_path=model_name,
175
+ n_ctx=2000,
176
+ n_parts=1,
177
+ #n_batch=100,
178
+ logits_all=True,
179
+ #n_threads=12,
180
+ verbose=True,
181
+ n_gqa=8 #must be set for 70b models
182
+ )
183
 
184
  # Generate the response
185
  #system_tokens = get_system_tokens(model)