John6666 commited on
Commit
aae8857
1 Parent(s): a09e5e0

Upload 3 files

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. llmdolphin.py +195 -184
app.py CHANGED
@@ -221,7 +221,7 @@ with gr.Blocks(fill_width=True, elem_id="container", css=css, delete_cache=(60,
221
  ).success(
222
  fn=dolphin_respond_auto,
223
  inputs=[prompt, chatbot],
224
- outputs=[chatbot, result],
225
  queue=True,
226
  show_progress="full",
227
  show_api=False,
 
221
  ).success(
222
  fn=dolphin_respond_auto,
223
  inputs=[prompt, chatbot],
224
+ outputs=[chatbot, result, prompt],
225
  queue=True,
226
  show_progress="full",
227
  show_api=False,
llmdolphin.py CHANGED
@@ -976,19 +976,19 @@ def add_dolphin_models(query, format_name):
976
  if s and "" in s: s.remove("")
977
  if len(s) == 1:
978
  repo = s[0]
979
- if not api.repo_exists(repo_id = repo): return gr.update(visible=True)
980
  files = api.list_repo_files(repo_id = repo)
981
  for file in files:
982
  if str(file).endswith(".gguf"): add_models[filename] = [repo, format]
983
  elif len(s) >= 2:
984
  repo = s[0]
985
  filename = s[1]
986
- if not api.repo_exists(repo_id = repo) or not api.file_exists(repo_id = repo, filename = filename): return gr.update(visible=True)
987
  add_models[filename] = [repo, format]
988
- else: return gr.update(visible=True)
989
  except Exception as e:
990
  print(e)
991
- return gr.update(visible=True)
992
  llm_models = (llm_models | add_models).copy()
993
  update_llm_model_tupled_list()
994
  choices = get_dolphin_models()
@@ -1235,84 +1235,89 @@ def dolphin_respond(
1235
  repeat_penalty: float = 1.1,
1236
  progress=gr.Progress(track_tqdm=True),
1237
  ):
1238
- progress(0, desc="Processing...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1239
 
1240
- if override_llm_format:
1241
- chat_template = override_llm_format
1242
- else:
1243
- chat_template = llm_models[model][1]
1244
-
1245
- llm = Llama(
1246
- model_path=str(Path(f"{llm_models_dir}/{model}")),
1247
- flash_attn=True,
1248
- n_gpu_layers=81, # 81
1249
- n_batch=1024,
1250
- n_ctx=8192, #8192
1251
- )
1252
- provider = LlamaCppPythonProvider(llm)
1253
-
1254
- agent = LlamaCppAgent(
1255
- provider,
1256
- system_prompt=f"{system_message}",
1257
- predefined_messages_formatter_type=chat_template if not isinstance(chat_template, MessagesFormatter) else None,
1258
- custom_messages_formatter=chat_template if isinstance(chat_template, MessagesFormatter) else None,
1259
- debug_output=False
1260
- )
1261
-
1262
- settings = provider.get_provider_default_settings()
1263
- settings.temperature = temperature
1264
- settings.top_k = top_k
1265
- settings.top_p = top_p
1266
- settings.max_tokens = max_tokens
1267
- settings.repeat_penalty = repeat_penalty
1268
- settings.stream = True
1269
-
1270
- messages = BasicChatHistory()
1271
-
1272
- for msn in history:
1273
- user = {
1274
- 'role': Roles.user,
1275
- 'content': msn[0]
1276
- }
1277
- assistant = {
1278
- 'role': Roles.assistant,
1279
- 'content': msn[1]
1280
- }
1281
- messages.add_message(user)
1282
- messages.add_message(assistant)
1283
-
1284
- stream = agent.get_chat_response(
1285
- message,
1286
- llm_sampling_settings=settings,
1287
- chat_history=messages,
1288
- returns_streaming_generator=True,
1289
- print_output=False
1290
- )
1291
-
1292
- progress(0.5, desc="Processing...")
1293
-
1294
- outputs = ""
1295
- for output in stream:
1296
- outputs += output
1297
- yield [(outputs, None)]
1298
 
1299
 
1300
  def dolphin_parse(
1301
  history: list[tuple[str, str]],
1302
  ):
1303
- if dolphin_sysprompt_mode == "Chat with LLM" or not history or len(history) < 1:
1304
- return "", gr.update(visible=True), gr.update(visible=True)
1305
  try:
 
 
1306
  msg = history[-1][0]
1307
  raw_prompt = get_raw_prompt(msg)
1308
- except Exception:
1309
- return "", gr.update(visible=True), gr.update(visible=True)
1310
- prompts = []
1311
- if dolphin_sysprompt_mode == "Japanese to Danbooru Dictionary" and is_japanese(raw_prompt):
1312
- prompts = list_uniq(jatags_to_danbooru_tags(to_list_ja(raw_prompt)) + ["nsfw", "explicit"])
1313
- else:
1314
- prompts = list_uniq(to_list(raw_prompt) + ["nsfw", "explicit"])
1315
- return ", ".join(prompts), gr.update(interactive=True), gr.update(interactive=True)
 
1316
 
1317
 
1318
  @torch.inference_mode()
@@ -1329,87 +1334,92 @@ def dolphin_respond_auto(
1329
  repeat_penalty: float = 1.1,
1330
  progress=gr.Progress(track_tqdm=True),
1331
  ):
1332
- #if not is_japanese(message): return [(None, None)]
1333
- progress(0, desc="Processing...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1334
 
1335
- if override_llm_format:
1336
- chat_template = override_llm_format
1337
- else:
1338
- chat_template = llm_models[model][1]
1339
-
1340
- llm = Llama(
1341
- model_path=str(Path(f"{llm_models_dir}/{model}")),
1342
- flash_attn=True,
1343
- n_gpu_layers=81, # 81
1344
- n_batch=1024,
1345
- n_ctx=8192, #8192
1346
- )
1347
- provider = LlamaCppPythonProvider(llm)
1348
-
1349
- agent = LlamaCppAgent(
1350
- provider,
1351
- system_prompt=f"{system_message}",
1352
- predefined_messages_formatter_type=chat_template if not isinstance(chat_template, MessagesFormatter) else None,
1353
- custom_messages_formatter=chat_template if isinstance(chat_template, MessagesFormatter) else None,
1354
- debug_output=False
1355
- )
1356
-
1357
- settings = provider.get_provider_default_settings()
1358
- settings.temperature = temperature
1359
- settings.top_k = top_k
1360
- settings.top_p = top_p
1361
- settings.max_tokens = max_tokens
1362
- settings.repeat_penalty = repeat_penalty
1363
- settings.stream = True
1364
-
1365
- messages = BasicChatHistory()
1366
-
1367
- for msn in history:
1368
- user = {
1369
- 'role': Roles.user,
1370
- 'content': msn[0]
1371
- }
1372
- assistant = {
1373
- 'role': Roles.assistant,
1374
- 'content': msn[1]
1375
- }
1376
- messages.add_message(user)
1377
- messages.add_message(assistant)
1378
-
1379
- progress(0, desc="Translating...")
1380
- stream = agent.get_chat_response(
1381
- message,
1382
- llm_sampling_settings=settings,
1383
- chat_history=messages,
1384
- returns_streaming_generator=True,
1385
- print_output=False
1386
- )
1387
-
1388
- progress(0.5, desc="Processing...")
1389
-
1390
- outputs = ""
1391
- for output in stream:
1392
- outputs += output
1393
- yield [(outputs, None)], gr.update()
1394
 
1395
 
1396
  def dolphin_parse_simple(
1397
  message: str,
1398
  history: list[tuple[str, str]],
1399
  ):
1400
- #if not is_japanese(message): return message
1401
- if dolphin_sysprompt_mode == "Chat with LLM" or not history or len(history) < 1: return message
1402
  try:
 
 
1403
  msg = history[-1][0]
1404
  raw_prompt = get_raw_prompt(msg)
1405
- except Exception:
 
 
 
 
 
 
 
1406
  return ""
1407
- prompts = []
1408
- if dolphin_sysprompt_mode == "Japanese to Danbooru Dictionary" and is_japanese(raw_prompt):
1409
- prompts = list_uniq(jatags_to_danbooru_tags(to_list_ja(raw_prompt)) + ["nsfw", "explicit", "rating_explicit"])
1410
- else:
1411
- prompts = list_uniq(to_list(raw_prompt) + ["nsfw", "explicit", "rating_explicit"])
1412
- return ", ".join(prompts)
1413
 
1414
 
1415
  # https://huggingface.co/spaces/CaioXapelaum/GGUF-Playground
@@ -1430,47 +1440,47 @@ def respond_playground(
1430
  top_k,
1431
  repeat_penalty,
1432
  ):
1433
- if override_llm_format:
1434
- chat_template = override_llm_format
1435
- else:
1436
- chat_template = llm_models[model][1]
1437
-
1438
- llm = Llama(
1439
- model_path=str(Path(f"{llm_models_dir}/{model}")),
1440
- flash_attn=True,
1441
- n_gpu_layers=81, # 81
1442
- n_batch=1024,
1443
- n_ctx=8192, #8192
1444
- )
1445
- provider = LlamaCppPythonProvider(llm)
1446
-
1447
- agent = LlamaCppAgent(
1448
- provider,
1449
- system_prompt=f"{system_message}",
1450
- predefined_messages_formatter_type=chat_template if not isinstance(chat_template, MessagesFormatter) else None,
1451
- custom_messages_formatter=chat_template if isinstance(chat_template, MessagesFormatter) else None,
1452
- debug_output=False
1453
- )
1454
-
1455
- settings = provider.get_provider_default_settings()
1456
- settings.temperature = temperature
1457
- settings.top_k = top_k
1458
- settings.top_p = top_p
1459
- settings.max_tokens = max_tokens
1460
- settings.repeat_penalty = repeat_penalty
1461
- settings.stream = True
1462
-
1463
- messages = BasicChatHistory()
1464
-
1465
- # Add user and assistant messages to the history
1466
- for msn in history:
1467
- user = {'role': Roles.user, 'content': msn[0]}
1468
- assistant = {'role': Roles.assistant, 'content': msn[1]}
1469
- messages.add_message(user)
1470
- messages.add_message(assistant)
1471
-
1472
- # Stream the response
1473
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1474
  stream = agent.get_chat_response(
1475
  message,
1476
  llm_sampling_settings=settings,
@@ -1484,4 +1494,5 @@ def respond_playground(
1484
  outputs += output
1485
  yield outputs
1486
  except Exception as e:
1487
- yield f"Error during response generation: {str(e)}"
 
 
976
  if s and "" in s: s.remove("")
977
  if len(s) == 1:
978
  repo = s[0]
979
+ if not api.repo_exists(repo_id = repo): return gr.update()
980
  files = api.list_repo_files(repo_id = repo)
981
  for file in files:
982
  if str(file).endswith(".gguf"): add_models[filename] = [repo, format]
983
  elif len(s) >= 2:
984
  repo = s[0]
985
  filename = s[1]
986
+ if not api.repo_exists(repo_id = repo) or not api.file_exists(repo_id = repo, filename = filename): return gr.update()
987
  add_models[filename] = [repo, format]
988
+ else: return gr.update()
989
  except Exception as e:
990
  print(e)
991
+ return gr.update()
992
  llm_models = (llm_models | add_models).copy()
993
  update_llm_model_tupled_list()
994
  choices = get_dolphin_models()
 
1235
  repeat_penalty: float = 1.1,
1236
  progress=gr.Progress(track_tqdm=True),
1237
  ):
1238
+ try:
1239
+ progress(0, desc="Processing...")
1240
+
1241
+ if override_llm_format:
1242
+ chat_template = override_llm_format
1243
+ else:
1244
+ chat_template = llm_models[model][1]
1245
+
1246
+ llm = Llama(
1247
+ model_path=str(Path(f"{llm_models_dir}/{model}")),
1248
+ flash_attn=True,
1249
+ n_gpu_layers=81, # 81
1250
+ n_batch=1024,
1251
+ n_ctx=8192, #8192
1252
+ )
1253
+ provider = LlamaCppPythonProvider(llm)
1254
+
1255
+ agent = LlamaCppAgent(
1256
+ provider,
1257
+ system_prompt=f"{system_message}",
1258
+ predefined_messages_formatter_type=chat_template if not isinstance(chat_template, MessagesFormatter) else None,
1259
+ custom_messages_formatter=chat_template if isinstance(chat_template, MessagesFormatter) else None,
1260
+ debug_output=False
1261
+ )
1262
+
1263
+ settings = provider.get_provider_default_settings()
1264
+ settings.temperature = temperature
1265
+ settings.top_k = top_k
1266
+ settings.top_p = top_p
1267
+ settings.max_tokens = max_tokens
1268
+ settings.repeat_penalty = repeat_penalty
1269
+ settings.stream = True
1270
+
1271
+ messages = BasicChatHistory()
1272
+
1273
+ for msn in history:
1274
+ user = {
1275
+ 'role': Roles.user,
1276
+ 'content': msn[0]
1277
+ }
1278
+ assistant = {
1279
+ 'role': Roles.assistant,
1280
+ 'content': msn[1]
1281
+ }
1282
+ messages.add_message(user)
1283
+ messages.add_message(assistant)
1284
+
1285
+ stream = agent.get_chat_response(
1286
+ message,
1287
+ llm_sampling_settings=settings,
1288
+ chat_history=messages,
1289
+ returns_streaming_generator=True,
1290
+ print_output=False
1291
+ )
1292
+
1293
+ progress(0.5, desc="Processing...")
1294
 
1295
+ outputs = ""
1296
+ for output in stream:
1297
+ outputs += output
1298
+ yield [(outputs, None)]
1299
+ except Exception as e:
1300
+ print(e)
1301
+ yield [("", None)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1302
 
1303
 
1304
  def dolphin_parse(
1305
  history: list[tuple[str, str]],
1306
  ):
 
 
1307
  try:
1308
+ if dolphin_sysprompt_mode == "Chat with LLM" or not history or len(history) < 1:
1309
+ return "", gr.update(), gr.update()
1310
  msg = history[-1][0]
1311
  raw_prompt = get_raw_prompt(msg)
1312
+ prompts = []
1313
+ if dolphin_sysprompt_mode == "Japanese to Danbooru Dictionary" and is_japanese(raw_prompt):
1314
+ prompts = list_uniq(jatags_to_danbooru_tags(to_list_ja(raw_prompt)) + ["nsfw", "explicit"])
1315
+ else:
1316
+ prompts = list_uniq(to_list(raw_prompt) + ["nsfw", "explicit"])
1317
+ return ", ".join(prompts), gr.update(interactive=True), gr.update(interactive=True)
1318
+ except Exception as e:
1319
+ print(e)
1320
+ return "", gr.update(), gr.update()
1321
 
1322
 
1323
  @torch.inference_mode()
 
1334
  repeat_penalty: float = 1.1,
1335
  progress=gr.Progress(track_tqdm=True),
1336
  ):
1337
+ try:
1338
+ #if not is_japanese(message): return [(None, None)]
1339
+ progress(0, desc="Processing...")
1340
+
1341
+ if override_llm_format:
1342
+ chat_template = override_llm_format
1343
+ else:
1344
+ chat_template = llm_models[model][1]
1345
+
1346
+ llm = Llama(
1347
+ model_path=str(Path(f"{llm_models_dir}/{model}")),
1348
+ flash_attn=True,
1349
+ n_gpu_layers=81, # 81
1350
+ n_batch=1024,
1351
+ n_ctx=8192, #8192
1352
+ )
1353
+ provider = LlamaCppPythonProvider(llm)
1354
+
1355
+ agent = LlamaCppAgent(
1356
+ provider,
1357
+ system_prompt=f"{system_message}",
1358
+ predefined_messages_formatter_type=chat_template if not isinstance(chat_template, MessagesFormatter) else None,
1359
+ custom_messages_formatter=chat_template if isinstance(chat_template, MessagesFormatter) else None,
1360
+ debug_output=False
1361
+ )
1362
+
1363
+ settings = provider.get_provider_default_settings()
1364
+ settings.temperature = temperature
1365
+ settings.top_k = top_k
1366
+ settings.top_p = top_p
1367
+ settings.max_tokens = max_tokens
1368
+ settings.repeat_penalty = repeat_penalty
1369
+ settings.stream = True
1370
+
1371
+ messages = BasicChatHistory()
1372
+
1373
+ for msn in history:
1374
+ user = {
1375
+ 'role': Roles.user,
1376
+ 'content': msn[0]
1377
+ }
1378
+ assistant = {
1379
+ 'role': Roles.assistant,
1380
+ 'content': msn[1]
1381
+ }
1382
+ messages.add_message(user)
1383
+ messages.add_message(assistant)
1384
+
1385
+ progress(0, desc="Translating...")
1386
+ stream = agent.get_chat_response(
1387
+ message,
1388
+ llm_sampling_settings=settings,
1389
+ chat_history=messages,
1390
+ returns_streaming_generator=True,
1391
+ print_output=False
1392
+ )
1393
 
1394
+ progress(0.5, desc="Processing...")
1395
+
1396
+ outputs = ""
1397
+ for output in stream:
1398
+ outputs += output
1399
+ yield [(outputs, None)], gr.update(), gr.update()
1400
+ except Exception as e:
1401
+ print(e)
1402
+ yield [("", None)], gr.update(), gr.update()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1403
 
1404
 
1405
  def dolphin_parse_simple(
1406
  message: str,
1407
  history: list[tuple[str, str]],
1408
  ):
 
 
1409
  try:
1410
+ #if not is_japanese(message): return message
1411
+ if dolphin_sysprompt_mode == "Chat with LLM" or not history or len(history) < 1: return message
1412
  msg = history[-1][0]
1413
  raw_prompt = get_raw_prompt(msg)
1414
+ prompts = []
1415
+ if dolphin_sysprompt_mode == "Japanese to Danbooru Dictionary" and is_japanese(raw_prompt):
1416
+ prompts = list_uniq(jatags_to_danbooru_tags(to_list_ja(raw_prompt)) + ["nsfw", "explicit", "rating_explicit"])
1417
+ else:
1418
+ prompts = list_uniq(to_list(raw_prompt) + ["nsfw", "explicit", "rating_explicit"])
1419
+ return ", ".join(prompts)
1420
+ except Exception as e:
1421
+ print(e)
1422
  return ""
 
 
 
 
 
 
1423
 
1424
 
1425
  # https://huggingface.co/spaces/CaioXapelaum/GGUF-Playground
 
1440
  top_k,
1441
  repeat_penalty,
1442
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1443
  try:
1444
+ if override_llm_format:
1445
+ chat_template = override_llm_format
1446
+ else:
1447
+ chat_template = llm_models[model][1]
1448
+
1449
+ llm = Llama(
1450
+ model_path=str(Path(f"{llm_models_dir}/{model}")),
1451
+ flash_attn=True,
1452
+ n_gpu_layers=81, # 81
1453
+ n_batch=1024,
1454
+ n_ctx=8192, #8192
1455
+ )
1456
+ provider = LlamaCppPythonProvider(llm)
1457
+
1458
+ agent = LlamaCppAgent(
1459
+ provider,
1460
+ system_prompt=f"{system_message}",
1461
+ predefined_messages_formatter_type=chat_template if not isinstance(chat_template, MessagesFormatter) else None,
1462
+ custom_messages_formatter=chat_template if isinstance(chat_template, MessagesFormatter) else None,
1463
+ debug_output=False
1464
+ )
1465
+
1466
+ settings = provider.get_provider_default_settings()
1467
+ settings.temperature = temperature
1468
+ settings.top_k = top_k
1469
+ settings.top_p = top_p
1470
+ settings.max_tokens = max_tokens
1471
+ settings.repeat_penalty = repeat_penalty
1472
+ settings.stream = True
1473
+
1474
+ messages = BasicChatHistory()
1475
+
1476
+ # Add user and assistant messages to the history
1477
+ for msn in history:
1478
+ user = {'role': Roles.user, 'content': msn[0]}
1479
+ assistant = {'role': Roles.assistant, 'content': msn[1]}
1480
+ messages.add_message(user)
1481
+ messages.add_message(assistant)
1482
+
1483
+ # Stream the response
1484
  stream = agent.get_chat_response(
1485
  message,
1486
  llm_sampling_settings=settings,
 
1494
  outputs += output
1495
  yield outputs
1496
  except Exception as e:
1497
+ print(e)
1498
+ yield ""