qgyd2021 commited on
Commit
a974b8f
·
1 Parent(s): 18dfcaf

[update]add code

Browse files
.gitignore ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ .git/
3
+ .idea/
4
+
5
+ cache/
6
+ flagged/
7
+ gradio_cached_examples/
8
+ hub_datasets/
9
+
10
+ **/__pycache__/
README.md CHANGED
@@ -1,11 +1,11 @@
1
  ---
2
- title: Reward Model Gpt2 Stack Exchange
3
  emoji: 📚
4
  colorFrom: yellow
5
  colorTo: red
6
  sdk: gradio
7
- sdk_version: 3.45.2
8
- app_file: app.py
9
  pinned: false
10
  ---
11
 
 
1
  ---
2
+ title: Reward Model GPT2 Stack Exchange
3
  emoji: 📚
4
  colorFrom: yellow
5
  colorTo: red
6
  sdk: gradio
7
+ sdk_version: gradio==3.38.0
8
+ app_file: main.py
9
  pinned: false
10
  ---
11
 
examples/reward_model/reward_model_gpt2_stack_exchange/1.prepare_data.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import argparse
4
+
5
+ from datasets import load_dataset
6
+
7
+ from project_settings import project_path
8
+
9
+
10
+ def get_args():
11
+ parser = argparse.ArgumentParser()
12
+
13
+ parser.add_argument("--dataset_path", default="lvwerra/stack-exchange-paired", type=str)
14
+ parser.add_argument(
15
+ "--dataset_cache_dir",
16
+ default=(project_path / "hub_datasets").as_posix(),
17
+ type=str
18
+ )
19
+ args = parser.parse_args()
20
+ return args
21
+
22
+
23
+ def main():
24
+ args = get_args()
25
+
26
+ train_dataset = load_dataset(
27
+ path=args.dataset_path,
28
+ data_dir="data/reward",
29
+ split="train",
30
+ cache_dir=args.dataset_cache_dir
31
+ )
32
+ eval_dataset = load_dataset(
33
+ path=args.dataset_path,
34
+ data_dir="data/evaluation",
35
+ split="train",
36
+ cache_dir=args.dataset_cache_dir
37
+ )
38
+ print(train_dataset)
39
+ print(eval_dataset)
40
+
41
+ return
42
+
43
+
44
+ if __name__ == '__main__':
45
+ main()
examples/reward_model/reward_model_gpt2_stack_exchange/2.train_model.py ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ reference:
5
+ https://github.com/huggingface/trl
6
+
7
+ https://huggingface.co/docs/trl/main/en/reward_trainer
8
+ https://huggingface.co/docs/trl/index
9
+ https://huggingface.co/blog/trl-peft
10
+
11
+ https://medium.com/towards-generative-ai/reward-model-training-2209d1befb5f
12
+
13
+ https://github.com/huggingface/trl/blob/main/examples/research_projects/stack_llama/scripts/reward_modeling.py
14
+
15
+
16
+ # Tensorboard View
17
+ tensorboard \
18
+ --logdir=file_dir/serialization_dir/runs/Sep22_09-36-16_nlp \
19
+ --port=8008 \
20
+ --bind_all
21
+
22
+ http://10.75.27.247:8008/
23
+ """
24
+ import argparse
25
+ from dataclasses import dataclass, field
26
+ import os
27
+ from typing import Any, Dict, List, Optional, Union
28
+
29
+ import evaluate
30
+ import numpy as np
31
+ import torch
32
+ import torch.nn as nn
33
+ from datasets import load_dataset
34
+ from peft import LoraConfig, TaskType, get_peft_model
35
+ from transformers import (
36
+ AutoModelForSequenceClassification,
37
+ AutoTokenizer,
38
+ HfArgumentParser,
39
+ PreTrainedTokenizerBase,
40
+ Trainer,
41
+ TrainerCallback,
42
+ TrainingArguments,
43
+ )
44
+ from transformers.utils import PaddingStrategy
45
+ from transformers.models.gpt2.modeling_gpt2 import GPT2ForSequenceClassification
46
+ from transformers.trainer_utils import EvalPrediction, IntervalStrategy
47
+
48
+ from project_settings import project_path
49
+
50
+
51
+ @dataclass
52
+ class ScriptArguments:
53
+ # dataset
54
+ dataset_path: str = field(default="lvwerra/stack-exchange-paired")
55
+ dataset_cache_dir: str = field(default=(project_path / "hub_datasets").as_posix())
56
+ train_subset: Optional[int] = field(default=-1)
57
+ eval_subset: Optional[int] = field(default=10000)
58
+
59
+ # cache
60
+ cache_dir: str = field(default="cache_dir")
61
+
62
+ # model
63
+ model_name: Optional[str] = field(default="gpt2")
64
+ num_labels: Optional[int] = field(default=1)
65
+ last_checkpoint: Optional[str] = field(default="last_checkpoint")
66
+
67
+ # tokenizer
68
+ tokenizer_name: Optional[str] = field(default=None)
69
+
70
+ # dataset process
71
+ max_length: Optional[int] = field(default=512)
72
+
73
+ # lora
74
+ lora_rank: int = field(default=64)
75
+ lora_alpha: int = field(default=32)
76
+ lora_dropout: float = field(default=0.05)
77
+
78
+ # training_args
79
+ output_dir: Optional[str] = field(default="output_dir")
80
+ evaluation_strategy: Union[IntervalStrategy, str] = field(default="steps")
81
+ per_device_train_batch_size: Optional[int] = field(default=4)
82
+ per_device_eval_batch_size: Optional[int] = field(default=1)
83
+ gradient_accumulation_steps: Optional[int] = field(default=1)
84
+ learning_rate: Optional[float] = field(default=2e-5)
85
+ weight_decay: Optional[float] = field(default=0.001)
86
+ num_train_epochs: float = field(default=1.0)
87
+ lr_scheduler_type: Optional[str] = field(default="linear")
88
+ logging_strategy: Union[IntervalStrategy, str] = field(default="steps")
89
+ save_strategy: Union[IntervalStrategy, str] = field(default="steps")
90
+ logging_steps: float = field(default=500)
91
+ bf16: bool = field(default=False)
92
+ fp16: bool = field(default=False)
93
+ local_rank: Optional[int] = field(default=-1, metadata={"help": "Used for multi-gpu"})
94
+ eval_steps: Optional[float] = field(default=5000)
95
+ save_steps: float = field(default=500)
96
+ save_total_limit: Optional[int] = field(default=5)
97
+ remove_unused_columns: Optional[bool] = field(default=False)
98
+ label_names: Optional[List[str]] = field(default=None)
99
+ deepspeed: Optional[str] = field(default=None)
100
+ optim: Optional[str] = field(default="adamw_hf")
101
+ report_to: Optional[List[str]] = field(default=None)
102
+ resume_from_checkpoint: Optional[bool] = field(default=False)
103
+ gradient_checkpointing: Optional[bool] = field(default=False)
104
+
105
+ # addition
106
+ eval_first_step: Optional[bool] = field(
107
+ default=False,
108
+ metadata={"help": "Whether to run eval after the first step"},
109
+ )
110
+
111
+
112
+ def get_args():
113
+ parser = HfArgumentParser(ScriptArguments)
114
+ args = parser.parse_args_into_dataclasses()[0]
115
+ return args
116
+
117
+
118
+ @dataclass
119
+ class RewardDataCollatorWithPadding:
120
+ tokenizer: PreTrainedTokenizerBase
121
+ padding: Union[bool, str, PaddingStrategy] = PaddingStrategy.MAX_LENGTH
122
+ max_length: Optional[int] = None
123
+ pad_to_multiple_of: Optional[int] = None
124
+ return_tensors: str = "pt"
125
+
126
+ def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, Any]:
127
+ features_j = []
128
+ features_k = []
129
+ for feature in features:
130
+ features_j.append({
131
+ "input_ids": feature["input_ids_j"],
132
+ "attention_mask": feature["attention_mask_j"],
133
+ })
134
+ features_k.append({
135
+ "input_ids": feature["input_ids_k"],
136
+ "attention_mask": feature["attention_mask_k"],
137
+ })
138
+ batch_j = self.tokenizer.pad(
139
+ features_j,
140
+ padding=self.padding,
141
+ max_length=self.max_length,
142
+ pad_to_multiple_of=self.pad_to_multiple_of,
143
+ return_tensors=self.return_tensors,
144
+ )
145
+ batch_k = self.tokenizer.pad(
146
+ features_k,
147
+ padding=self.padding,
148
+ max_length=self.max_length,
149
+ pad_to_multiple_of=self.pad_to_multiple_of,
150
+ return_tensors=self.return_tensors,
151
+ )
152
+ batch = {
153
+ "input_ids_j": batch_j["input_ids"],
154
+ "attention_mask_j": batch_j["attention_mask"],
155
+ "input_ids_k": batch_k["input_ids"],
156
+ "attention_mask_k": batch_k["attention_mask"],
157
+ "return_loss": True,
158
+ }
159
+ return batch
160
+
161
+
162
+ class RewardTrainer(Trainer):
163
+ # Define how to compute the reward loss.
164
+ # We use the InstructGPT pairwise logloss: https://arxiv.org/abs/2203.02155
165
+ def compute_loss(self, model, inputs, return_outputs=False):
166
+ rewards_j = model(input_ids=inputs["input_ids_j"], attention_mask=inputs["attention_mask_j"])[0]
167
+ rewards_k = model(input_ids=inputs["input_ids_k"], attention_mask=inputs["attention_mask_k"])[0]
168
+ loss = - nn.functional.logsigmoid(rewards_j - rewards_k).mean()
169
+ if return_outputs:
170
+ return loss, {"rewards_j": rewards_j, "rewards_k": rewards_k}
171
+ return loss
172
+
173
+
174
+ class EvaluateFirstStepCallback(TrainerCallback):
175
+ def on_step_end(self, args, state, control, **kwargs):
176
+ if state.global_step == 1:
177
+ control.should_evaluate = True
178
+
179
+
180
+ def main():
181
+ args = get_args()
182
+
183
+ # dataset
184
+ train_dataset = load_dataset(
185
+ path=args.dataset_path,
186
+ data_dir="data/reward",
187
+ split="train",
188
+ cache_dir=args.dataset_cache_dir
189
+ )
190
+ if args.train_subset > 0:
191
+ train_dataset = train_dataset.select(range(args.train_subset))
192
+ eval_dataset = load_dataset(
193
+ path=args.dataset_path,
194
+ data_dir="data/evaluation",
195
+ split="train",
196
+ cache_dir=args.dataset_cache_dir
197
+ )
198
+ if args.eval_subset > 0:
199
+ eval_dataset = eval_dataset.select(range(args.eval_subset))
200
+
201
+ # training_args
202
+ training_args = TrainingArguments(
203
+ output_dir=args.output_dir,
204
+ evaluation_strategy=args.evaluation_strategy,
205
+ per_device_train_batch_size=args.per_device_train_batch_size,
206
+ per_device_eval_batch_size=args.per_device_eval_batch_size,
207
+ gradient_accumulation_steps=args.gradient_accumulation_steps,
208
+ learning_rate=args.learning_rate,
209
+ weight_decay=args.weight_decay,
210
+ num_train_epochs=args.num_train_epochs,
211
+ lr_scheduler_type=args.lr_scheduler_type,
212
+ logging_strategy=args.logging_strategy,
213
+ logging_steps=args.logging_steps,
214
+ save_strategy=args.save_strategy,
215
+ bf16=args.bf16,
216
+ fp16=args.fp16,
217
+ local_rank=args.local_rank,
218
+ eval_steps=args.eval_steps,
219
+ save_steps=args.save_steps,
220
+ save_total_limit=args.save_total_limit,
221
+ remove_unused_columns=args.remove_unused_columns,
222
+ label_names=list() if args.label_names is None else args.label_names,
223
+ deepspeed=args.deepspeed,
224
+ optim=args.optim,
225
+ report_to=args.report_to,
226
+ resume_from_checkpoint=args.resume_from_checkpoint,
227
+ gradient_checkpointing=args.gradient_checkpointing,
228
+ )
229
+
230
+ # tokenizer
231
+ tokenizer_name = args.tokenizer_name if args.tokenizer_name is not None else args.model_name
232
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, use_auth_token=True)
233
+
234
+ # model
235
+ model = AutoModelForSequenceClassification.from_pretrained(
236
+ args.model_name,
237
+ num_labels=args.num_labels,
238
+ )
239
+
240
+ peft_config = LoraConfig(
241
+ task_type=TaskType.SEQ_CLS,
242
+ inference_mode=False,
243
+ r=args.lora_rank,
244
+ lora_alpha=args.lora_alpha,
245
+ lora_dropout=args.lora_dropout,
246
+ )
247
+
248
+ model = get_peft_model(model, peft_config)
249
+ model.print_trainable_parameters()
250
+
251
+ # Need to do this for gpt2, because it doesn't have an official pad token.
252
+ tokenizer.pad_token = tokenizer.eos_token
253
+ model.config.pad_token_id = tokenizer.eos_token_id
254
+ model.config.use_cache = not args.gradient_checkpointing
255
+ original_columns = train_dataset.column_names
256
+
257
+ # Turn the dataset into pairs of post + summaries,
258
+ # where text_j is the preferred question + answer and text_k is the other.
259
+ # Then tokenize the dataset.
260
+ def preprocess_function(examples):
261
+ new_examples = {
262
+ "input_ids_j": [],
263
+ "attention_mask_j": [],
264
+ "input_ids_k": [],
265
+ "attention_mask_k": [],
266
+ }
267
+ for question, response_j, response_k in zip(examples["question"], examples["response_j"], examples["response_k"]):
268
+ tokenized_j = tokenizer("Question: " + question + "\n\nAnswer: " + response_j,
269
+ max_length=args.max_length, truncation=True)
270
+ tokenized_k = tokenizer("Question: " + question + "\n\nAnswer: " + response_k,
271
+ max_length=args.max_length, truncation=True)
272
+
273
+ new_examples["input_ids_j"].append(tokenized_j["input_ids"])
274
+ new_examples["attention_mask_j"].append(tokenized_j["attention_mask"])
275
+ new_examples["input_ids_k"].append(tokenized_k["input_ids"])
276
+ new_examples["attention_mask_k"].append(tokenized_k["attention_mask"])
277
+
278
+ return new_examples
279
+
280
+ # preprocess the dataset and filter out QAs that are longer than script_args.max_length
281
+ train_dataset = train_dataset.map(
282
+ preprocess_function,
283
+ batched=True,
284
+ num_proc=os.cpu_count() // 2,
285
+ remove_columns=original_columns,
286
+ cache_file_name=os.path.join(args.cache_dir, 'train.cache')
287
+ )
288
+ train_dataset = train_dataset.filter(
289
+ lambda x: len(x["input_ids_j"]) <= args.max_length and len(x["input_ids_k"]) <= args.max_length,
290
+ num_proc=os.cpu_count() // 2,
291
+ )
292
+
293
+ eval_dataset = eval_dataset.map(
294
+ preprocess_function,
295
+ batched=True,
296
+ num_proc=os.cpu_count() // 2,
297
+ remove_columns=original_columns,
298
+ cache_file_name=os.path.join(args.cache_dir, 'eval.cache')
299
+ )
300
+ eval_dataset = eval_dataset.filter(
301
+ lambda x: len(x["input_ids_j"]) <= args.max_length and len(x["input_ids_k"]) <= args.max_length,
302
+ num_proc=os.cpu_count() // 2,
303
+ )
304
+
305
+ # Define the metric that we'll use for validation.
306
+ accuracy = evaluate.load("accuracy")
307
+
308
+ def compute_metrics(eval_pred: EvalPrediction) -> Dict[str, Any]:
309
+ predictions, _ = eval_pred
310
+ # Here, predictions is rewards_j and rewards_k.
311
+ # We want to see how much of the time rewards_j > rewards_k.
312
+ predictions = np.argmax(predictions, axis=0)
313
+ labels = np.zeros(predictions.shape)
314
+ return accuracy.compute(predictions=predictions, references=labels)
315
+
316
+ # Train the model, woohoo.
317
+ trainer = RewardTrainer(
318
+ model=model,
319
+ args=training_args,
320
+ train_dataset=train_dataset,
321
+ eval_dataset=eval_dataset,
322
+ compute_metrics=compute_metrics,
323
+ data_collator=RewardDataCollatorWithPadding(tokenizer=tokenizer,
324
+ padding="max_length",
325
+ max_length=args.max_length),
326
+ )
327
+
328
+ if args.eval_first_step:
329
+ trainer.add_callback(EvaluateFirstStepCallback())
330
+
331
+ trainer.train(args.resume_from_checkpoint)
332
+
333
+ print("Saving last checkpoint of the model")
334
+ last_checkpoint = os.path.join(args.output_dir, args.last_checkpoint)
335
+ model.save_pretrained(last_checkpoint)
336
+ return
337
+
338
+
339
+ if __name__ == '__main__':
340
+ main()
examples/reward_model/reward_model_gpt2_stack_exchange/3.merge_lora.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import argparse
4
+
5
+ from peft import PeftModel
6
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
7
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
8
+ import torch
9
+ """
10
+ 使用该脚本,将lora的权重合并大base model中
11
+ """
12
+
13
+
14
+ def get_args():
15
+ """
16
+ python3 3.merge_lora.py \
17
+ --pretrained_model_name_or_path /data/tianxing/PycharmProjects/Transformers/pretrained_models/huggingface/gpt2 \
18
+ --adapter_name_or_path /data/tianxing/PycharmProjects/Transformers/examples/reward_model/reward_model_gpt2_stack/file_dir/serialization_dir/last_checkpoint \
19
+ --save_directory /data/tianxing/PycharmProjects/Transformers/trained_models/reward_model_gpt2_stack
20
+
21
+ """
22
+ parser = argparse.ArgumentParser()
23
+
24
+ parser.add_argument(
25
+ "--pretrained_model_name_or_path",
26
+ default="YeungNLP/firefly-chatglm2-6b",
27
+ type=str
28
+ )
29
+ parser.add_argument(
30
+ "--adapter_name_or_path",
31
+ default="YeungNLP/firefly-baichuan-7b-qlora-sft",
32
+ type=str
33
+ )
34
+ parser.add_argument("--save_directory", default="save_directory", type=str)
35
+
36
+ parser.add_argument("--num_labels", default=1, type=int)
37
+
38
+ args = parser.parse_args()
39
+ return args
40
+
41
+
42
+ def main():
43
+ args = get_args()
44
+
45
+ config = AutoConfig.from_pretrained(
46
+ args.pretrained_model_name_or_path,
47
+ trust_remote_code=True,
48
+ )
49
+ tokenizer = AutoTokenizer.from_pretrained(
50
+ args.pretrained_model_name_or_path,
51
+ trust_remote_code=True,
52
+ # llama不支持fast
53
+ use_fast=False if config.model_type == 'llama' else True
54
+ )
55
+
56
+ model = AutoModelForSequenceClassification.from_pretrained(
57
+ args.pretrained_model_name_or_path,
58
+ num_labels=args.num_labels,
59
+ )
60
+
61
+ model = PeftModel.from_pretrained(model, args.adapter_name_or_path, device_map={"": "cpu"})
62
+ model = model.merge_and_unload()
63
+
64
+ tokenizer.save_pretrained(args.save_directory)
65
+ model.save_pretrained(args.save_directory)
66
+ return
67
+
68
+
69
+ if __name__ == '__main__':
70
+ main()
examples/reward_model/reward_model_gpt2_stack_exchange/4.test_model.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import argparse
4
+ import os
5
+ import sys
6
+
7
+ pwd = os.path.abspath(os.path.dirname(__file__))
8
+ sys.path.append(os.path.join(pwd, '../../../'))
9
+
10
+ from project_settings import project_path
11
+
12
+ hf_hub_cache = (project_path / "cache/huggingface/hub").as_posix()
13
+
14
+ os.environ["HUGGINGFACE_HUB_CACHE"] = hf_hub_cache
15
+
16
+ import torch
17
+ import torch.nn as nn
18
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
19
+ from transformers.models.deberta_v2.modeling_deberta_v2 import DebertaV2ForSequenceClassification
20
+ from transformers.models.deberta_v2.tokenization_deberta_v2 import DebertaV2Tokenizer
21
+
22
+
23
+ def get_args():
24
+ """
25
+ python3 4.test_model.py --pretrained_model_name_or_path /data/tianxing/PycharmProjects/Transformers/trained_models/reward_model_gpt2_stack
26
+
27
+ """
28
+ parser = argparse.ArgumentParser()
29
+ parser.add_argument(
30
+ "--pretrained_model_name_or_path",
31
+ default=(project_path / "trained_models/reward_model_gpt2_stack").as_posix(),
32
+ type=str
33
+ )
34
+ parser.add_argument("--question", default="I know the question has been asked thousands of times, but I'll ask it again: is there a way (even patchy) to write/read a dumb text file with Javascript or Protoype ? This is only for debug purposes, and is not designed for production. The thing is I need it to work with (at least) both Firefox and IE (preferably under Windows). Thanks in advance !", type=str)
35
+ parser.add_argument(
36
+ "--response_j",
37
+ default="**It *is* possible to read/write to a local file via JavaScript**: take a look at [TiddlyWIki](http://www.tiddlywiki.com/). *(Caveat: only works for local documents.)* I have actually written a [Single Page Application](http://softwareas.com/towards-a-single-page-application-framework) (SPA) using [twFile](http://jquery.tiddlywiki.org/twFile.html), a part of the TiddlyWiki codebase: 1. Works in different browsers: (IE, Firefox, Chrome) 2. This code is a little old now. TiddlyWiki abandoned the jQuery plugin design a while ago. (Look at the [current TiddlyWiki filesystem.js](http://dev.tiddlywiki.org/browser/Trunk/core/js/FileSystem.js) for more a more recent implementation. It's not isolated for you like the twFile plug-in, though). 3. Although written as a jQuery plug-in, I've studied the code and it is almost completely decoupled from jQuery. **Update:** I have uploaded a [proof-of-concept](http://coolcases.com/jeopardy/) that accesses a local file via JavaScript. * Modifying this application to write to a file is trivial. * I have not tried to get this to work as a file served from a web server, but it should be possible since there are [server-side implementations of TiddlyWiki](http://tiddlywiki.org/wiki/Can_I_use_TiddlyWiki_as_a_multi-user/collaborative/server_based_wiki%3F)<>. **Update:** So it looks like the server side implementations of TiddlyWiki use a server \"adapter\" to modify a file stored on the server, similar to [Peter's description](https://stackoverflow.com/questions/3195720/write-a-file-with-prototype-or-plain-javascript/3195752#3195752). The pure JavaScript method will probably not work if the page is served from a web server due to cross-domain security limitations.",
38
+ type=str
39
+ )
40
+ parser.add_argument(
41
+ "--response_k",
42
+ default="Javascript in browsers doesn't allow you to write local files, for **security reasons**. This **may change with time**, but as for now you have to **deal with it**.",
43
+ type=str
44
+ )
45
+ parser.add_argument('--max_length', default=512, type=int)
46
+
47
+ parser.add_argument('--device', default="cuda" if torch.cuda.is_available() else "cpu", type=str)
48
+
49
+ args = parser.parse_args()
50
+ return args
51
+
52
+
53
+ def main():
54
+ args = get_args()
55
+
56
+ tokenizer = AutoTokenizer.from_pretrained(args.pretrained_model_name_or_path)
57
+ model = AutoModelForSequenceClassification.from_pretrained(
58
+ args.pretrained_model_name_or_path,
59
+ num_labels=1,
60
+ )
61
+ model.eval()
62
+
63
+ tokenizer.pad_token = tokenizer.eos_token
64
+ model.config.pad_token_id = tokenizer.eos_token_id
65
+
66
+ text_j = "Question: {}\n\nAnswer: {}".format(args.question, args.response_j)
67
+ text_k = "Question: {}\n\nAnswer: {}".format(args.question, args.response_k)
68
+
69
+ text_encoded = tokenizer.__call__([text_j, text_k],
70
+ padding="longest",
71
+ max_length=args.max_length,
72
+ truncation=True
73
+ )
74
+
75
+ input_ids = text_encoded["input_ids"]
76
+ attention_mask = text_encoded["attention_mask"]
77
+ input_ids = torch.tensor(input_ids, dtype=torch.long)
78
+ attention_mask = torch.tensor(attention_mask, dtype=torch.long)
79
+
80
+ outputs = model.forward(input_ids=input_ids, attention_mask=attention_mask)
81
+ pooled_logits = outputs[0]
82
+ pooled_logits = pooled_logits.cpu().detach()
83
+ score = nn.functional.sigmoid(pooled_logits)
84
+
85
+ print(score.shape)
86
+ print(score)
87
+
88
+ return
89
+
90
+
91
+ if __name__ == '__main__':
92
+ main()
examples/reward_model/reward_model_gpt2_stack_exchange/run.sh ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ # sh run.sh --stage 0 --stop_stage 0 --system_version centos
4
+ # sh run.sh --stage 1 --stop_stage 1 --system_version centos
5
+ # sh run.sh --stage 2 --stop_stage 2 --system_version centos
6
+ # sh run.sh --stage 3 --stop_stage 3 --system_version centos
7
+
8
+ # bitsandbytes
9
+ export LD_LIBRARY_PATH="/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
10
+
11
+ # params
12
+ system_version="windows";
13
+ verbose=true;
14
+ stage=0 # start from 0 if you need to start from data preparation
15
+ stop_stage=5
16
+
17
+ pretrained_model_supplier=
18
+ pretrained_model_name=gpt2
19
+
20
+ last_checkpoint_dir=last_checkpoint
21
+ final_model_name=reward_model_gpt2_stack
22
+
23
+ # parse options
24
+ while true; do
25
+ [ -z "${1:-}" ] && break; # break if there are no arguments
26
+ case "$1" in
27
+ --*) name=$(echo "$1" | sed s/^--// | sed s/-/_/g);
28
+ eval '[ -z "${'"$name"'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
29
+ old_value="(eval echo \\$$name)";
30
+ if [ "${old_value}" == "true" ] || [ "${old_value}" == "false" ]; then
31
+ was_bool=true;
32
+ else
33
+ was_bool=false;
34
+ fi
35
+
36
+ # Set the variable to the right value-- the escaped quotes make it work if
37
+ # the option had spaces, like --cmd "queue.pl -sync y"
38
+ eval "${name}=\"$2\"";
39
+
40
+ # Check that Boolean-valued arguments are really Boolean.
41
+ if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
42
+ echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
43
+ exit 1;
44
+ fi
45
+ shift 2;
46
+ ;;
47
+
48
+ *) break;
49
+ esac
50
+ done
51
+
52
+ $verbose && echo "system_version: ${system_version}"
53
+
54
+ work_dir="$(pwd)"
55
+ file_dir="${work_dir}/file_dir"
56
+ cache_dir="${file_dir}/cache_dir"
57
+ serialization_dir="${file_dir}/serialization_dir"
58
+
59
+ pretrained_models_dir="${work_dir}/../../../pretrained_models/huggingface/${pretrained_model_supplier}"
60
+ final_model_dir="${work_dir}/../../../trained_models/${final_model_name}";
61
+
62
+ mkdir -p "${file_dir}"
63
+ mkdir -p "${cache_dir}"
64
+ mkdir -p "${serialization_dir}"
65
+ mkdir -p "${pretrained_models_dir}"
66
+ mkdir -p "${final_model_dir}"
67
+
68
+ export PYTHONPATH="${work_dir}/../../.."
69
+
70
+ if [ $system_version == "windows" ]; then
71
+ alias python3='C:/Users/tianx/PycharmProjects/virtualenv/Transformers/Scripts/python.exe'
72
+ elif [ $system_version == "centos" ]; then
73
+ # conda activate Transformers
74
+ alias python3='/usr/local/miniconda3/envs/Transformers/bin/python3'
75
+ elif [ $system_version == "ubuntu" ]; then
76
+ alias python3='/usr/local/miniconda3/envs/Transformers/bin/python3'
77
+ elif [ $system_version == "macos" ]; then
78
+ alias python3='/Users/honey/PycharmProjects/virtualenv/TrainLLM/bin/python'
79
+ fi
80
+
81
+
82
+ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
83
+ $verbose && echo "stage 0: download pretrained model"
84
+ cd "${pretrained_models_dir}" || exit 1;
85
+
86
+ if [ ! -d "${pretrained_model_name}" ]; then
87
+ git clone "https://huggingface.co/${pretrained_model_supplier:+$pretrained_model_supplier/}${pretrained_model_name}/"
88
+
89
+ cd "${pretrained_model_name}" || exit 1;
90
+
91
+ rm -rf onnx/
92
+ rm -rf .git
93
+ rm -rf .gitattributes
94
+ rm -rf 64-8bits.tflite
95
+ rm -rf 64-fp16.tflite
96
+ rm -rf 64.tflite
97
+ rm -rf flax_model.msgpack
98
+ rm -rf model.safetensors
99
+ rm -rf rust_model.ot
100
+ rm -rf tf_model.h5
101
+ rm -rf model.safetensors
102
+
103
+ fi
104
+
105
+ fi
106
+
107
+
108
+ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
109
+ $verbose && echo "stage 1: prepare data"
110
+ cd "${work_dir}" || exit 1;
111
+
112
+ python3 1.prepare_data.py
113
+
114
+ fi
115
+
116
+
117
+ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
118
+ $verbose && echo "stage 2: train model"
119
+ cd "${work_dir}" || exit 1;
120
+
121
+ python3 2.train_model.py \
122
+ --cache_dir "${cache_dir}" \
123
+ --model_name "${pretrained_models_dir}/${pretrained_model_name}" \
124
+ --last_checkpoint "${last_checkpoint_dir}" \
125
+ --output_dir "${serialization_dir}"
126
+
127
+ fi
128
+
129
+
130
+ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
131
+ $verbose && echo "stage 3: merge lora"
132
+ cd "${work_dir}" || exit 1;
133
+
134
+ python3 3.merge_lora.py \
135
+ --pretrained_model_name_or_path "${pretrained_models_dir}/${pretrained_model_name}" \
136
+ --adapter_name_or_path "${serialization_dir}/${last_checkpoint_dir}" \
137
+ --save_directory "${final_model_dir}"
138
+
139
+ fi
examples/reward_model/reward_model_gpt2_stack_exchange/stop.sh ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ kill -9 `ps -aef | grep 'run.sh' | grep -v grep | awk '{print $2}' | sed 's/\n/ /'`
4
+
5
+ kill -9 `ps -aef | grep 'Transformers/bin/python3' | grep -v grep | awk '{print $2}' | sed 's/\n/ /'`
main.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import argparse
4
+ import io
5
+ import json
6
+ import os
7
+ import re
8
+ from typing import Dict, List
9
+
10
+ from project_settings import project_path
11
+
12
+ os.environ["HUGGINGFACE_HUB_CACHE"] = (project_path / "cache/huggingface/hub").as_posix()
13
+
14
+ import gradio as gr
15
+ import matplotlib.pyplot as plt
16
+ import numpy as np
17
+ from PIL import Image
18
+ import requests
19
+ import torch
20
+ import torch.nn as nn
21
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
22
+
23
+ from project_settings import project_path
24
+
25
+
26
+ def calc_reward(pretrained_model_name_or_path: str,
27
+ question: str,
28
+ response_j: str,
29
+ response_k: str = None,
30
+ max_length: int = 512
31
+ ):
32
+
33
+ tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path)
34
+ model = AutoModelForSequenceClassification.from_pretrained(
35
+ pretrained_model_name_or_path,
36
+ num_labels=1,
37
+ )
38
+ model.eval()
39
+
40
+ tokenizer.pad_token = tokenizer.eos_token
41
+ model.config.pad_token_id = tokenizer.eos_token_id
42
+
43
+ text_j = "Question: {}\n\nAnswer: {}".format(question, response_j)
44
+ text_k = "Question: {}\n\nAnswer: {}".format(question, response_k)
45
+
46
+ text_encoded = tokenizer.__call__([text_j, text_k],
47
+ padding="longest",
48
+ max_length=max_length,
49
+ truncation=True
50
+ )
51
+
52
+ input_ids = text_encoded["input_ids"]
53
+ attention_mask = text_encoded["attention_mask"]
54
+ input_ids = torch.tensor(input_ids, dtype=torch.long)
55
+ attention_mask = torch.tensor(attention_mask, dtype=torch.long)
56
+
57
+ outputs = model.forward(input_ids=input_ids, attention_mask=attention_mask)
58
+ pooled_logits = outputs[0]
59
+ pooled_logits = pooled_logits.cpu().detach()
60
+ scores = nn.functional.sigmoid(pooled_logits)
61
+
62
+ scores = scores.tolist()
63
+ scores = [round(score[0], 6) for score in scores]
64
+ scores = [str(score) for score in scores]
65
+
66
+ result = ", ".join(scores)
67
+ return result
68
+
69
+
70
+ def main():
71
+
72
+ description = """
73
+ The score for response_j and response_k is independent. Set two box for response to facilitate comparison.
74
+ """
75
+
76
+ examples = [
77
+ [
78
+ """There seems to be a lot of software to control (or emulate) mouse input through the keyboard, but what about the opposite? Basically I'm looking for a way to emulate up/down/left/right clicks with mouse movement, at a fast rate (i.e. lots of very short and quick right clicks while I move the mouse to the right) If I have to learn some scripting language to do it, ok, but I don't know if it would even be possible. Note: This is meant to work on fullscreen, and having a way to turn it on/off with an F# key would be awesome! Thanks for your time :)""",
79
+ """If you're on Windows, what about the On-Screen Keyboard? It's found under **All Programs -> Accessories -> Accessibility** on XP (similar for Vista+) ![alt text](https://i.stack.imgur.com/nPFOE.png)""",
80
+ """If you want something where you can type with your mouse, then I suggest you take a look at [Dasher](http://www.inference.phy.cam.ac.uk/dasher/). That is, if I take your question title as the question. As I really don't quite understand your question.""",
81
+ ],
82
+ [
83
+ """I have installed the Java 3D API on PC via the exe installer, which simply created a new directory with `j3dcore.jar`, `vecmath.jar`, `j3dutils.jar` in a lib sub-directory and `j3dcore-ogl.dll` in a bin sub-directory. Netbeans had no issues and my code compiled and executed smoothly, however once I built my project and tried to run it from the command prompt I got an `UnsatisfiedLinkError` saying that `no j3dcore-ogl in java.library.path`. Google came to the rescue and gave me 3 viable solutions: * by copying the dll file into my JRE's bin directory * by adding the path of the dll file to the library path (`java -Djava.library.path=dllpath`) * load the dll in the program with `System.load()` (I couldn't get this one to work, actually) My question is: Is there an elegant solution to this problem, that I missed? It seems tedious that for each different PC someone would like to use this program on, he'd have to either copy the dll or add it to the library path before it can run. (Side question: How come Netbeans didn't have a problem with the dll?)""",
84
+ """*Edit - After re-reading your question, your issue sounds different. However I'm able to get my running like so, by just dropping all dll files in the same directory as the .bat file starting the java process:* *java -classpath ./YourJar.jar;./lib/j3dcore.jar;./lib/vecmath.jar;./lib/j3dutils.jar package.MainClass* *And that works on multiple user's PCs, so I know simply dropping it in the working directory works.* I believe it depends on the version of Java being used - 64 bit or 32 bit. The correct dll file (of the same name) needs to be in the working directory. I think I was getting a similar problem when the wrong dll was being used, and it's not OS-dependent (if your 64 bit OS has 32-bit Java installed, you'd need the 32 bit j3dcore-ogl.dll file). So the question is, which version of Java are you using *(when running outside of your IDE)*, and which version of the dll are you putting (if any) in the working directory? I don't need any dll files in my path settings to get this working on other's PCs, and did not use System.load(), and did NOT copy files into my user's JRE/bin directory - so I know this is possible without the 3 options you mention.""",
85
+ """I guess DLL are searched in all folders in %PATH% on windows. (LD\_LIBRARY\_PATH for UNIX flavors) Could you try by adding the path to dll to %path% variable? It appears that you are trying package a product with many jars as dependencies. You may benefit from [One-Jar](http://one-jar.sourceforge.net/index.php?page=details&file=native). It claims to have native dll support.""",
86
+
87
+ ],
88
+ [
89
+ """``` pt=new Date(2019,11,12,8,2,3) console.log(pt.getFullYear()," ",pt.getMonth()); ``` gives result `2019 " " 11` ``` console.log(pt.getFullYear()+" "+pt.getMonth()); ``` gives the result as `2019 11` What is the difference between using, and + in this example?""",
90
+ """``` console.log(pt.getFullYear()," ",pt.getMonth()); ``` The above example passes three separate arguments to console.log. What it outputs depends on how `console.log` is implemented. It has changed over time and is little bit different between browsers. When invoked with arguments like in the example, it has access to the variables and can display them with some magic depending on type, for example if they are arrays or objects. In your example it is displayed as: ``` 2019 " " 11 ``` where the numbers are in blue text, indicating that it was a variable of type number, and the empty string is shown in red, indicating that is was a string. Compare this to the following example, where it all is converted to a string before being passed to `console.log` in one argument: ``` console.log(pt.getFullYear()+" "+pt.getMonth()); ``` where it is displayed as ``` 2017 5 ``` with black text, indicating that it was passed as a string in the first parameter. The first parameter to `console.log` can be used as a format string, like `printf` in c and other languages. For example ``` console.log( "%d %d", pt.getFullYear(), pt.getMonth() ); ``` where %d is a place holder for a number. The output is in black text and gives the exact same output as your second example. ``` console.log("%d %d", pt.getFullYear(),pt.getMonth(), pt.getDate()); ``` In the example above, the year and month will be shown in black text, but the date will be in blue. This is because the format string only have two placeholders, but there are three arguments. `console.log` show the extra arguments, using the magic. Documentation: * [Standard](https://console.spec.whatwg.org/) * [Google Chrome](https://developers.google.com/web/tools/chrome-devtools/console/console-reference). * [Mozilla Firefox](https://developer.mozilla.org/en-US/docs/Web/API/Console) * [Microsoft Edge](https://msdn.microsoft.com/library/hh772169.aspx) * [Apple Safari](https://developer.apple.com/library/content/documentation/AppleApplications/Conceptual/Safari_Developer_Guide/Console/Console.html) * [Opera](http://www.opera.com/dragonfly/documentation/console/)""",
91
+ """console.log is part of the Console API and is accesible in various browsers. You can find its full documentation on [MDN](https://developer.mozilla.org/en-US/docs/Web/API/Console/log). It states that console log has the following parameters: ``` obj1 ... objN ``` > > A list of JavaScript objects to output. The string representations of > each of these objects are appended together in the order listed and > output. > > > So, when you concatenate the parameters you pass only one object to the function and when you pass multiple parameters `console.log` will do the concatenation for you.""",
92
+
93
+ ],
94
+
95
+ [
96
+ """I have a `<div id="content">`. I want to load the content from <http://vietduc24h.com> into my `div`: ``` <html> <head> <script type="text/javascript"> $(document).ready(function() { $("#content").attr("src","http://vietduc24h.com"); }) </script> </head> <body> <div id="content"></div> </body> </html ``` I don't want to use an iframe. How can I do this?""",
97
+ """Try this code with the jQuery `Load` function: ``` $('#content').load('http://vietduc24h.com', function() { alert('Load was performed.'); }); ``` If you encounter in security issues because of the Cross-Origin-Resource-Sharing policy than you have to use a proxy in your server code.""",
98
+ """Try this: ``` $("#content").html('<object data="http://vietduc24h.com">'); ``` Taken from [this answer](https://stackoverflow.com/a/9964050/646668).""",
99
+
100
+ ],
101
+
102
+ ]
103
+
104
+ demo = gr.Interface(
105
+ fn=calc_reward,
106
+ inputs=[
107
+ gr.Dropdown(choices=["qgyd2021/reward_model_gpt2_stack_exchange"],
108
+ value="qgyd2021/reward_model_gpt2_stack_exchange",
109
+ label="model_name",
110
+ ),
111
+ gr.Text(label="question", lines=2, max_lines=200),
112
+ gr.Text(label="response_j", lines=4, max_lines=200),
113
+ gr.Text(label="response_k", lines=4, max_lines=200),
114
+ ],
115
+ outputs=[gr.Text(label="reward score", lines=1, max_lines=1)],
116
+ examples=examples,
117
+ cache_examples=False,
118
+ examples_per_page=6,
119
+ title="Reward Model GPT2 Stack Exchange",
120
+ description=description,
121
+ )
122
+ demo.launch()
123
+
124
+ return
125
+
126
+
127
+ if __name__ == '__main__':
128
+ main()
project_settings.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import os
4
+ from pathlib import Path
5
+
6
+
7
+ project_path = os.path.abspath(os.path.dirname(__file__))
8
+ project_path = Path(project_path)
9
+
10
+
11
+ if __name__ == '__main__':
12
+ pass
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio==3.38.0
2
+ transformers==4.30.2
3
+ torch==1.13.1