pseudotensor commited on
Commit
a0e2e84
1 Parent(s): dc1d7fe

Update with h2oGPT hash f121dcf534b7c7da96e22fdfb00a7436503f167e

Browse files
Files changed (3) hide show
  1. app.py +72 -16
  2. finetune.py +1 -0
  3. utils.py +64 -0
app.py CHANGED
@@ -2,10 +2,11 @@ import functools
2
  import inspect
3
  import sys
4
  import os
 
5
  import traceback
6
  import typing
7
-
8
- from utils import set_seed, flatten_list, clear_torch_cache, system_info_print
9
 
10
  SEED = 1236
11
  set_seed(SEED)
@@ -27,10 +28,11 @@ from finetune import get_loaders, example_data_points, generate_prompt, get_gith
27
  human, bot, prompt_type_to_model_name, inv_prompt_type_to_model_lower
28
  from stopping import CallbackToGenerator, Stream, StoppingCriteriaSub
29
 
30
- is_hf = os.getenv("HUGGINGFACE_SPACES")
31
- is_gpth2oai = os.getenv("GPT_H2O_AI")
32
  is_public = is_hf or is_gpth2oai # multi-user case with fixed model and disclaimer
33
  is_low_mem = is_hf # assumes run on 24GB consumer GPU
 
34
 
35
 
36
  def main(
@@ -58,6 +60,7 @@ def main(
58
 
59
  llama_type: bool = None,
60
  debug: bool = False,
 
61
  share: bool = True,
62
  local_files_only: bool = False,
63
  resume_download: bool = True,
@@ -111,6 +114,7 @@ def main(
111
  if is_hf:
112
  # must override share if in spaces
113
  share = False
 
114
 
115
  # get defaults
116
  model_lower = base_model.lower()
@@ -178,7 +182,7 @@ def main(
178
  if not eval_sharegpt_as_output:
179
  model, tokenizer, device = get_model(**locals())
180
  model_state = [model, tokenizer, device, base_model]
181
- fun = partial(evaluate, model_state, debug=debug, chat=chat)
182
  else:
183
  assert eval_sharegpt_prompts_only > 0
184
 
@@ -542,8 +546,9 @@ def go_gradio(**kwargs):
542
  if is_public:
543
  description += """<p><b> DISCLAIMERS: </b><ul><i><li>The data used to train this model include The Pile and other sources. These may contain objectionable content, so the model may reproduce that material. Use application and responses at own risk.</i></li>"""
544
  if kwargs['load_8bit']:
545
- description += """<i><li> Model is loaded in 8-bit and with other limitations in order to fit on GPUs with lower amounts of VRAM, so UX can be worse than non-hosted version.</i></li>"""
546
- description += """<i><li>Model loading and unloading disabled to avoid GPU OOM for multi-user environment.</i></li></ul></p>"""
 
547
 
548
  if kwargs['verbose']:
549
  task_info_md = f"""
@@ -551,14 +556,43 @@ def go_gradio(**kwargs):
551
  else:
552
  task_info_md = ''
553
 
554
- css_code = """footer {visibility: hidden}
555
- body{background-image:url("https://h2o.ai/content/experience-fragments/h2o/us/en/site/header/master/_jcr_content/root/container/header_copy/logo.coreimg.svg/1678976605175/h2o-logo.svg");}}"""
 
556
 
557
- from gradio.themes.utils import colors, fonts, sizes
558
  if kwargs['h2ocolors']:
559
- colors_dict = dict(primary_hue=colors.yellow,
560
- secondary_hue=colors.yellow,
561
- neutral_hue=colors.gray,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
562
  spacing_size=sizes.spacing_md,
563
  radius_size=sizes.radius_md,
564
  text_size=sizes.text_md,
@@ -635,7 +669,7 @@ body{background-image:url("https://h2o.ai/content/experience-fragments/h2o/us/en
635
 
636
  # go button visible if
637
  base_wanted = bool(kwargs['base_model']) and kwargs['login_mode_if_model0']
638
- go_btn = gr.Button(value="LOGIN", visible=base_wanted, variant="primary")
639
  normal_block = gr.Row(visible=not base_wanted)
640
  with normal_block:
641
  with gr.Tabs():
@@ -770,12 +804,27 @@ body{background-image:url("https://h2o.ai/content/experience-fragments/h2o/us/en
770
  add_model_button = gr.Button("Add new model name")
771
  add_lora_button = gr.Button("Add new LORA name", visible=kwargs['show_lora'])
772
  with gr.TabItem("System"):
773
- with gr.Row():
 
 
 
774
  with gr.Column():
775
  system_text = gr.Textbox(label='System Info')
776
  system_btn = gr.Button(value='Get System Info')
777
 
 
 
778
 
 
 
 
 
 
 
 
 
 
 
779
  inputs_list = get_inputs_list(locals(), kwargs['model_lower'])
780
  from functools import partial
781
  all_kwargs = kwargs.copy()
@@ -1094,7 +1143,7 @@ body{background-image:url("https://h2o.ai/content/experience-fragments/h2o/us/en
1094
 
1095
 
1096
  input_args_list = ['model_state']
1097
- inputs_kwargs_list = ['debug', 'chat', 'hard_stop_list', 'sanitize_bot_response', 'model_state0']
1098
 
1099
 
1100
  def get_inputs_list(inputs_dict, model_lower):
@@ -1157,6 +1206,7 @@ def evaluate(
1157
  src_lang=None,
1158
  tgt_lang=None,
1159
  debug=False,
 
1160
  chat=False,
1161
  hard_stop_list=None,
1162
  sanitize_bot_response=True,
@@ -1369,6 +1419,8 @@ def evaluate(
1369
  raise StopIteration
1370
  yield prompter.get_response(decoded_output, prompt=inputs_decoded,
1371
  sanitize_bot_response=sanitize_bot_response)
 
 
1372
  return
1373
  else:
1374
  outputs = model.generate(**gen_kwargs)
@@ -1585,5 +1637,9 @@ if __name__ == "__main__":
1585
 
1586
  python generate.py --base_model='togethercomputer/GPT-NeoXT-Chat-Base-20B' --prompt_type='human_bot' --lora_weights='GPT-NeoXT-Chat-Base-20B.merged.json.8_epochs.57b2892c53df5b8cefac45f84d019cace803ef26.28'
1587
 
 
 
 
 
1588
  """, flush=True)
1589
  fire.Fire(main)
 
2
  import inspect
3
  import sys
4
  import os
5
+ import time
6
  import traceback
7
  import typing
8
+ import filelock
9
+ from utils import set_seed, flatten_list, clear_torch_cache, system_info_print, zip_data, save_generate_output
10
 
11
  SEED = 1236
12
  set_seed(SEED)
 
28
  human, bot, prompt_type_to_model_name, inv_prompt_type_to_model_lower
29
  from stopping import CallbackToGenerator, Stream, StoppingCriteriaSub
30
 
31
+ is_hf = bool(os.getenv("HUGGINGFACE_SPACES"))
32
+ is_gpth2oai = bool(os.getenv("GPT_H2O_AI"))
33
  is_public = is_hf or is_gpth2oai # multi-user case with fixed model and disclaimer
34
  is_low_mem = is_hf # assumes run on 24GB consumer GPU
35
+ admin_pass = os.getenv("ADMIN_PASS")
36
 
37
 
38
  def main(
 
60
 
61
  llama_type: bool = None,
62
  debug: bool = False,
63
+ save_path: str = None,
64
  share: bool = True,
65
  local_files_only: bool = False,
66
  resume_download: bool = True,
 
114
  if is_hf:
115
  # must override share if in spaces
116
  share = False
117
+ save_path = os.getenv('SAVE_PATH')
118
 
119
  # get defaults
120
  model_lower = base_model.lower()
 
182
  if not eval_sharegpt_as_output:
183
  model, tokenizer, device = get_model(**locals())
184
  model_state = [model, tokenizer, device, base_model]
185
+ fun = partial(evaluate, model_state, debug=debug, chat=chat, save_path=save_path)
186
  else:
187
  assert eval_sharegpt_prompts_only > 0
188
 
 
546
  if is_public:
547
  description += """<p><b> DISCLAIMERS: </b><ul><i><li>The data used to train this model include The Pile and other sources. These may contain objectionable content, so the model may reproduce that material. Use application and responses at own risk.</i></li>"""
548
  if kwargs['load_8bit']:
549
+ description += """<i><li> Model is loaded in 8-bit, model loading-unloading is disabled, and other limitations exist in order to fit on GPUs with lower amounts of VRAM, so UX can be worse than non-hosted version.</i></li>"""
550
+ description += """<i><li>Conversations may be used to improve h2oGPT. Do not share sensitive information.</i></li>"""
551
+ description += """<i><li>By using h2oGPT, you accept our [Terms of Service](https://github.com/h2oai/h2ogpt/blob/main/tos.md).</i></li></ul></p>"""
552
 
553
  if kwargs['verbose']:
554
  task_info_md = f"""
 
556
  else:
557
  task_info_md = ''
558
 
559
+ css_code = """footer {visibility: hidden;}
560
+ body{background:linear-gradient(#f5f5f5,#e5e5e5);}
561
+ body.dark{background:linear-gradient(#0d0d0d,#333333);}"""
562
 
563
+ from gradio.themes.utils import Color, colors, fonts, sizes
564
  if kwargs['h2ocolors']:
565
+ h2o_yellow = Color(
566
+ name="yellow",
567
+ c50="#fffef2",
568
+ c100="#fff9e6",
569
+ c200="#ffecb3",
570
+ c300="#ffe28c",
571
+ c400="#ffd659",
572
+ c500="#fec925",
573
+ c600="#e6ac00",
574
+ c700="#bf8f00",
575
+ c800="#a67c00",
576
+ c900="#664d00",
577
+ c950="#403000",
578
+ )
579
+ h2o_gray = Color(
580
+ name="gray",
581
+ c50="#f2f2f2",
582
+ c100="#e5e5e5",
583
+ c200="#cccccc",
584
+ c300="#b2b2b2",
585
+ c400="#999999",
586
+ c500="#7f7f7f",
587
+ c600="#666666",
588
+ c700="#4c4c4c",
589
+ c800="#333333",
590
+ c900="#191919",
591
+ c950="#0d0d0d",
592
+ )
593
+ colors_dict = dict(primary_hue=h2o_yellow,
594
+ secondary_hue=h2o_yellow,
595
+ neutral_hue=h2o_gray,
596
  spacing_size=sizes.spacing_md,
597
  radius_size=sizes.radius_md,
598
  text_size=sizes.text_md,
 
669
 
670
  # go button visible if
671
  base_wanted = bool(kwargs['base_model']) and kwargs['login_mode_if_model0']
672
+ go_btn = gr.Button(value="ENTER", visible=base_wanted, variant="primary")
673
  normal_block = gr.Row(visible=not base_wanted)
674
  with normal_block:
675
  with gr.Tabs():
 
804
  add_model_button = gr.Button("Add new model name")
805
  add_lora_button = gr.Button("Add new LORA name", visible=kwargs['show_lora'])
806
  with gr.TabItem("System"):
807
+ system_row = gr.Row(visible=not is_public)
808
+ admin_pass_textbox = gr.Textbox(label="Admin Password", type='password', visible=is_public)
809
+ admin_btn = gr.Button(value="admin", visible=is_public)
810
+ with system_row:
811
  with gr.Column():
812
  system_text = gr.Textbox(label='System Info')
813
  system_btn = gr.Button(value='Get System Info')
814
 
815
+ zip_btn = gr.Button("Zip")
816
+ file_output = gr.File()
817
 
818
+ # Get flagged data
819
+ zip_data1 = functools.partial(zip_data, root_dirs=['flagged_data_points', kwargs['save_path']])
820
+ zip_btn.click(zip_data1, inputs=None, outputs=file_output)
821
+
822
+ def check_admin_pass(x):
823
+ return gr.update(visible=x == admin_pass)
824
+
825
+ admin_btn.click(check_admin_pass, inputs=admin_pass_textbox, outputs=system_row)
826
+
827
+ # Get inputs to evaluate()
828
  inputs_list = get_inputs_list(locals(), kwargs['model_lower'])
829
  from functools import partial
830
  all_kwargs = kwargs.copy()
 
1143
 
1144
 
1145
  input_args_list = ['model_state']
1146
+ inputs_kwargs_list = ['debug', 'chat', 'save_path', 'hard_stop_list', 'sanitize_bot_response', 'model_state0']
1147
 
1148
 
1149
  def get_inputs_list(inputs_dict, model_lower):
 
1206
  src_lang=None,
1207
  tgt_lang=None,
1208
  debug=False,
1209
+ save_path=None,
1210
  chat=False,
1211
  hard_stop_list=None,
1212
  sanitize_bot_response=True,
 
1419
  raise StopIteration
1420
  yield prompter.get_response(decoded_output, prompt=inputs_decoded,
1421
  sanitize_bot_response=sanitize_bot_response)
1422
+ if save_path:
1423
+ save_generate_output(output=decoded_output, base_model=base_model, json_file_path=save_path)
1424
  return
1425
  else:
1426
  outputs = model.generate(**gen_kwargs)
 
1637
 
1638
  python generate.py --base_model='togethercomputer/GPT-NeoXT-Chat-Base-20B' --prompt_type='human_bot' --lora_weights='GPT-NeoXT-Chat-Base-20B.merged.json.8_epochs.57b2892c53df5b8cefac45f84d019cace803ef26.28'
1639
 
1640
+ must have 4*48GB GPU and run without 8bit in order for sharding to work with infer_devices=False
1641
+ can also pass --prompt_type='human_bot' and model can somewhat handle instructions without being instruct tuned
1642
+ python generate.py --base_model=decapoda-research/llama-65b-hf --load_8bit=False --infer_devices=False --prompt_type='human_bot'
1643
+
1644
  """, flush=True)
1645
  fire.Fire(main)
finetune.py CHANGED
@@ -73,6 +73,7 @@ prompt_type_to_model_name = {
73
  'decapoda-research/llama-7b-hf',
74
  'decapoda-research/llama-13b-hf',
75
  'decapoda-research/llama-30b-hf',
 
76
  'facebook/mbart-large-50-many-to-many-mmt',
77
  'philschmid/bart-large-cnn-samsum',
78
  'philschmid/flan-t5-base-samsum',
 
73
  'decapoda-research/llama-7b-hf',
74
  'decapoda-research/llama-13b-hf',
75
  'decapoda-research/llama-30b-hf',
76
+ 'decapoda-research/llama-65b-hf',
77
  'facebook/mbart-large-50-many-to-many-mmt',
78
  'philschmid/bart-large-cnn-samsum',
79
  'philschmid/flan-t5-base-samsum',
utils.py CHANGED
@@ -1,7 +1,13 @@
 
1
  import os
2
  import gc
3
  import random
 
4
  import time
 
 
 
 
5
  import numpy as np
6
  import pandas as pd
7
  import torch
@@ -87,3 +93,61 @@ def system_info_print():
87
  return df.to_markdown()
88
  except Exception as e:
89
  return "Error: %s" % str(e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import contextlib
2
  import os
3
  import gc
4
  import random
5
+ import shutil
6
  import time
7
+ import traceback
8
+ import zipfile
9
+
10
+ import filelock
11
  import numpy as np
12
  import pandas as pd
13
  import torch
 
93
  return df.to_markdown()
94
  except Exception as e:
95
  return "Error: %s" % str(e)
96
+
97
+
98
+ def zip_data(root_dirs=None, zip_path='data.zip', base_dir='./'):
99
+ try:
100
+ return _zip_data(zip_path=zip_path, base_dir=base_dir, root_dirs=root_dirs)
101
+ except Exception as e:
102
+ traceback.print_exc()
103
+ print('Exception in zipping: %s' % str(e))
104
+
105
+
106
+ def _zip_data(root_dirs=None, zip_path='data.zip', base_dir='./'):
107
+ assert root_dirs is not None
108
+ with zipfile.ZipFile(zip_path, "w") as expt_zip:
109
+ for root_dir in root_dirs:
110
+ if root_dir is None:
111
+ continue
112
+ for root, d, files in os.walk(root_dir):
113
+ for file in files:
114
+ file_to_archive = os.path.join(root, file)
115
+ assert os.path.exists(file_to_archive)
116
+ path_to_archive = os.path.relpath(file_to_archive, base_dir)
117
+ expt_zip.write(filename=file_to_archive, arcname=path_to_archive)
118
+ return "data.zip"
119
+
120
+
121
+ def save_generate_output(output=None, base_model=None, json_file_path=None):
122
+ try:
123
+ return _save_generate_output(output=output, base_model=base_model, json_file_path=json_file_path)
124
+ except Exception as e:
125
+ traceback.print_exc()
126
+ print('Exception in saving: %s' % str(e))
127
+
128
+
129
+ def _save_generate_output(output=None, base_model=None, json_file_path=None):
130
+ """
131
+ Save conversation to .json, row by row
132
+ Appends if file exists
133
+ """
134
+ assert isinstance(json_file_path, str), "must provide save_path"
135
+ as_file = os.path.normpath(json_file_path)
136
+ if os.path.isfile(as_file):
137
+ # protection if had file there before
138
+ os.remove(as_file)
139
+ os.makedirs(json_file_path, exist_ok=True)
140
+ json_file_file = os.path.join(json_file_path, 'save.json')
141
+ import json
142
+ if output[-10:] == '\n\n<human>:':
143
+ # remove trailing <human>:
144
+ output = output[:-10]
145
+ with filelock.FileLock("save_path.lock"):
146
+ # lock logging in case have concurrency
147
+ with open(json_file_file, "a") as f:
148
+ # just add [ at start, and ] at end, and have proper JSON dataset
149
+ f.write(
150
+ " " + json.dumps(
151
+ dict(text=output, time=time.ctime(), base_model=base_model)
152
+ ) + ",\n"
153
+ )