Initial commit.

Browse files

Files changed (10) hide show

README.md +64 -0
config.json +73 -0
merges.txt +0 -0
model_args.json +1 -0
pytorch_model.bin +3 -0
special_tokens_map.json +1 -0
tokenizer.json +0 -0
tokenizer_config.json +1 -0
training_args.bin +3 -0
vocab.json +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,64 @@

+---
+language: en
+license: apache-2.0
+tags:
+- transformers
+- bart
+- paraphrase
+- seq2seq
+datasets:
+- quora
+- paws
+---
+# BART Paraphrase Model (Large)
+A large BART seq2seq (text2text generation) model fine-tuned on 3 paraphrase datasets.
+## Model description
+The BART model was proposed in [BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension](https://arxiv.org/abs/1910.13461) by Lewis et al. (2019).
+- Bart uses a standard seq2seq/machine translation architecture with a bidirectional encoder (like BERT) and a left-to-right decoder (like GPT).
+- The pretraining task involves randomly shuffling the order of the original sentences and a novel in-filling scheme, where spans of text are replaced with a single mask token.
+- BART is particularly effective when fine tuned for text generation. This model is fine-tuned on 3 paraphrase datasets (Quora, PAWS and MSR paraphrase corpus).
+The original BART code is from this [repository](https://github.com/pytorch/fairseq/tree/master/examples/bart).
+## Intended uses & limitations
+You can use the pre-trained model for paraphrasing an input sentence.
+### How to use
+```python
+import torch
+from transformers import BartForConditionalGeneration, BartTokenizer
+input_sentence = "They were there to enjoy us and they were there to pray for us."
+model = BartForConditionalGeneration.from_pretrained('eugenesiow/bart-paraphrase')
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = model.to(device)
+tokenizer = BartTokenizer.from_pretrained('eugenesiow/bart-paraphrase')
+batch = tokenizer(input_sentence, return_tensors='pt')
+generated_ids = model.generate(batch['input_ids'])
+generated_sentence = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_sentence)
+```
+### Output
+```
+['They were there to enjoy us and to pray for us.']
+```
+## Training data
+The model was fine-tuned on a pretrained [`facebook/bart-large`](https://huggingface.co/facebook/bart-large), using the [Quora](https://huggingface.co/datasets/quora), [PAWS](https://huggingface.co/datasets/paws) and [MSR paraphrase corpus](https://www.microsoft.com/en-us/download/details.aspx?id=52398).
+## Training procedure
+We follow the training procedure provided in the [simpletransformers](https://github.com/ThilinaRajapakse/simpletransformers) seq2seq [example](https://github.com/ThilinaRajapakse/simpletransformers/blob/master/examples/seq2seq/paraphrasing/train.py).
+## BibTeX entry and citation info
+```bibtex
+@misc{lewis2019bart,
+      title={BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension},
+      author={Mike Lewis and Yinhan Liu and Naman Goyal and Marjan Ghazvininejad and Abdelrahman Mohamed and Omer Levy and Ves Stoyanov and Luke Zettlemoyer},
+      year={2019},
+      eprint={1910.13461},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL}
+}
+```

config.json ADDED Viewed

	@@ -0,0 +1,73 @@

+{
+  "_name_or_path": "facebook/bart-large",
+  "activation_dropout": 0.1,
+  "activation_function": "gelu",
+  "add_bias_logits": false,
+  "add_final_layer_norm": false,
+  "architectures": [
+    "BartForConditionalGeneration"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 0,
+  "classif_dropout": 0.1,
+  "classifier_dropout": 0.0,
+  "d_model": 1024,
+  "decoder_attention_heads": 16,
+  "decoder_ffn_dim": 4096,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 12,
+  "decoder_start_token_id": 2,
+  "dropout": 0.1,
+  "early_stopping": true,
+  "encoder_attention_heads": 16,
+  "encoder_ffn_dim": 4096,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 12,
+  "eos_token_id": 2,
+  "forced_eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "max_position_embeddings": 1024,
+  "model_type": "bart",
+  "no_repeat_ngram_size": 3,
+  "normalize_before": false,
+  "num_beams": 4,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "scale_embedding": false,
+  "task_specific_params": {
+    "summarization": {
+      "length_penalty": 1.0,
+      "max_length": 128,
+      "min_length": 12,
+      "num_beams": 4
+    },
+    "summarization_cnn": {
+      "length_penalty": 2.0,
+      "max_length": 142,
+      "min_length": 56,
+      "num_beams": 4
+    },
+    "summarization_xsum": {
+      "length_penalty": 1.0,
+      "max_length": 62,
+      "min_length": 11,
+      "num_beams": 6
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.10.2",
+  "use_cache": true,
+  "vocab_size": 50265
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model_args.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"adafactor_beta1": null, "adafactor_clip_threshold": 1.0, "adafactor_decay_rate": -0.8, "adafactor_eps": [1e-30, 0.001], "adafactor_relative_step": true, "adafactor_scale_parameter": true, "adafactor_warmup_init": true, "adam_epsilon": 1e-08, "best_model_dir": "outputs/best_model", "cache_dir": "cache_dir/", "config": {}, "cosine_schedule_num_cycles": 0.5, "custom_layer_parameters": [], "custom_parameter_groups": [], "dataloader_num_workers": 0, "do_lower_case": false, "dynamic_quantize": false, "early_stopping_consider_epochs": false, "early_stopping_delta": 0, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 3, "encoding": null, "eval_batch_size": 64, "evaluate_during_training": true, "evaluate_during_training_silent": true, "evaluate_during_training_steps": 2500, "evaluate_during_training_verbose": true, "evaluate_each_epoch": true, "fp16": false, "gradient_accumulation_steps": 1, "learning_rate": 5e-05, "local_rank": -1, "logging_steps": 50, "manual_seed": null, "max_grad_norm": 1.0, "max_seq_length": 128, "model_name": "facebook/bart-large", "model_type": "bart", "multiprocessing_chunksize": -1, "n_gpu": 1, "no_cache": false, "no_save": false, "not_saved_args": [], "num_train_epochs": 2, "optimizer": "AdamW", "output_dir": "outputs/", "overwrite_output_dir": true, "polynomial_decay_schedule_lr_end": 1e-07, "polynomial_decay_schedule_power": 1.0, "process_count": 254, "quantized_model": false, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": false, "save_model_every_epoch": true, "save_optimizer_and_scheduler": true, "save_steps": -1, "scheduler": "linear_schedule_with_warmup", "silent": false, "skip_special_tokens": true, "tensorboard_dir": null, "thread_count": null, "tokenizer_name": null, "tokenizer_type": null, "train_batch_size": 8, "train_custom_parameters_only": false, "use_cached_eval_features": false, "use_early_stopping": false, "use_hf_datasets": false, "use_multiprocessing": false, "use_multiprocessing_for_evaluation": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 2047, "weight_decay": 0.0, "model_class": "Seq2SeqModel", "base_marian_model_name": "facebook/bart-large", "dataset_class": null, "dataset_cache_dir": null, "do_sample": true, "early_stopping": true, "evaluate_generated_text": false, "faiss_d": 768, "faiss_m": 128, "include_title_in_knowledge_dataset": true, "length_penalty": 2.0, "max_length": 128, "max_steps": -1, "num_beams": null, "num_return_sequences": 3, "rag_embed_batch_size": 16, "repetition_penalty": 1.0, "save_knowledge_dataset": true, "save_knowledge_dataset_with_checkpoints": false, "split_text_character": " ", "split_text_n": 100, "src_lang": "en_XX", "tgt_lang": "ro_RO", "top_k": 50, "top_p": 0.95, "use_multiprocessed_decoding": false}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89cee0616fbdc3125ce8db1f5883e5693fea564643666a732f250c263f4246ad
+size 1625557313

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "add_prefix_space": false, "errors": "replace", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": "<mask>", "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "facebook/bart-large", "tokenizer_class": "BartTokenizer"}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:48da8f5990832568039f7a6d8b21377674e536aaa771935f7a44c7cc0f3498f5
+size 3375

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff