“[shujaatalishariati]” commited on
Commit
847e3e1
·
1 Parent(s): 9367038

Initial commit for Gradio app with GECToR

Browse files
app.py CHANGED
@@ -7,6 +7,8 @@ import nltk
7
  from nltk.corpus import wordnet
8
  from textblob import TextBlob
9
  from pattern.en import conjugate, lemma, pluralize, singularize
 
 
10
 
11
  # Initialize the English text classification pipeline for AI detection
12
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
@@ -84,29 +86,41 @@ def correct_singular_plural_errors(text):
84
 
85
  return ' '.join(corrected_text)
86
 
87
- # Function to check and correct article errors
88
- def correct_article_errors(text):
89
- doc = nlp(text)
90
- corrected_text = []
91
- for token in doc:
92
- if token.text in ['a', 'an']:
93
- next_token = token.nbor(1)
94
- if token.text == "a" and next_token.text[0].lower() in "aeiou":
95
- corrected_text.append("an")
96
- elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
97
- corrected_text.append("a")
98
- else:
99
- corrected_text.append(token.text)
100
- else:
101
- corrected_text.append(token.text)
102
- return ' '.join(corrected_text)
103
-
104
  # Function to correct overall grammar using TextBlob
105
- def correct_grammar(text):
106
  blob = TextBlob(text)
107
  corrected_text = str(blob.correct()) # TextBlob's built-in grammar correction
108
  return corrected_text
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  # Paraphrasing function using SpaCy and NLTK (Humanifier)
111
  def paraphrase_with_spacy_nltk(text):
112
  doc = nlp(text)
@@ -132,28 +146,17 @@ def paraphrase_with_spacy_nltk(text):
132
  else:
133
  paraphrased_words.append(token.text)
134
 
135
- # Join the words back into a sentence
136
- paraphrased_sentence = ' '.join(paraphrased_words)
137
-
138
- return paraphrased_sentence
139
 
140
  # Combined function: Paraphrase -> Grammar Correction -> Capitalization (Humanifier)
141
  def paraphrase_and_correct(text):
142
  # Step 1: Paraphrase the text
143
  paraphrased_text = paraphrase_with_spacy_nltk(text)
144
 
145
- # Step 2: Apply grammatical corrections on the paraphrased text
146
- corrected_text = correct_article_errors(paraphrased_text)
147
- corrected_text = capitalize_sentences_and_nouns(corrected_text)
148
- corrected_text = correct_singular_plural_errors(corrected_text)
149
 
150
- # Step 3: Correct tense errors
151
- corrected_text = correct_tense_errors(corrected_text)
152
-
153
- # Step 4: Correct overall grammar using TextBlob
154
- final_text = correct_grammar(corrected_text)
155
-
156
- return final_text
157
 
158
  # Gradio app setup with two tabs
159
  with gr.Blocks() as demo:
@@ -163,15 +166,13 @@ with gr.Blocks() as demo:
163
  label1 = gr.Textbox(lines=1, label='Predicted Label 🎃')
164
  score1 = gr.Textbox(lines=1, label='Prob')
165
 
166
- # Connect the prediction function to the button
167
  button1.click(predict_en, inputs=[t1], outputs=[label1, score1], api_name='predict_en')
168
 
169
  with gr.Tab("Humanifier"):
170
  text_input = gr.Textbox(lines=5, label="Input Text")
171
  paraphrase_button = gr.Button("Paraphrase & Correct")
172
- output_text = gr.Textbox(label="Paraphrased Text")
173
 
174
- # Connect the paraphrasing function to the button
175
  paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
176
 
177
  # Launch the app
 
7
  from nltk.corpus import wordnet
8
  from textblob import TextBlob
9
  from pattern.en import conjugate, lemma, pluralize, singularize
10
+ from gector.gec_model import GecBERTModel # Import GECToR Model
11
+ from utils.helpers import read_lines, normalize # GECToR utilities
12
 
13
  # Initialize the English text classification pipeline for AI detection
14
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
 
86
 
87
  return ' '.join(corrected_text)
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  # Function to correct overall grammar using TextBlob
90
+ def correct_grammar_textblob(text):
91
  blob = TextBlob(text)
92
  corrected_text = str(blob.correct()) # TextBlob's built-in grammar correction
93
  return corrected_text
94
 
95
+ # Initialize GECToR Model for Grammar Correction
96
+ def load_gector_model():
97
+ model_path = ["gector/roberta_1_gector.th"] # Ensure model file is placed correctly
98
+ vocab_path = "output_vocabulary"
99
+ model = GecBERTModel(vocab_path=vocab_path,
100
+ model_paths=model_path,
101
+ max_len=50,
102
+ min_len=3,
103
+ iterations=5,
104
+ min_error_probability=0.0,
105
+ lowercase_tokens=0,
106
+ model_name="roberta",
107
+ special_tokens_fix=1,
108
+ log=False,
109
+ confidence=0,
110
+ del_confidence=0,
111
+ is_ensemble=False,
112
+ weigths=None)
113
+ return model
114
+
115
+ # Load the GECToR model
116
+ gector_model = load_gector_model()
117
+
118
+ # Function to correct grammar using GECToR
119
+ def correct_grammar_gector(text):
120
+ sentences = [text.split()]
121
+ corrected_sentences, _ = gector_model.handle_batch(sentences)
122
+ return " ".join(corrected_sentences[0])
123
+
124
  # Paraphrasing function using SpaCy and NLTK (Humanifier)
125
  def paraphrase_with_spacy_nltk(text):
126
  doc = nlp(text)
 
146
  else:
147
  paraphrased_words.append(token.text)
148
 
149
+ return ' '.join(paraphrased_words)
 
 
 
150
 
151
  # Combined function: Paraphrase -> Grammar Correction -> Capitalization (Humanifier)
152
  def paraphrase_and_correct(text):
153
  # Step 1: Paraphrase the text
154
  paraphrased_text = paraphrase_with_spacy_nltk(text)
155
 
156
+ # Step 2: Apply grammatical corrections using GECToR
157
+ corrected_text = correct_grammar_gector(paraphrased_text)
 
 
158
 
159
+ return corrected_text
 
 
 
 
 
 
160
 
161
  # Gradio app setup with two tabs
162
  with gr.Blocks() as demo:
 
166
  label1 = gr.Textbox(lines=1, label='Predicted Label 🎃')
167
  score1 = gr.Textbox(lines=1, label='Prob')
168
 
 
169
  button1.click(predict_en, inputs=[t1], outputs=[label1, score1], api_name='predict_en')
170
 
171
  with gr.Tab("Humanifier"):
172
  text_input = gr.Textbox(lines=5, label="Input Text")
173
  paraphrase_button = gr.Button("Paraphrase & Correct")
174
+ output_text = gr.Textbox(label="Paraphrased and Corrected Text")
175
 
 
176
  paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
177
 
178
  # Launch the app
gector/bert_token_embedder.py ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tweaked version of corresponding AllenNLP file"""
2
+ import logging
3
+ from copy import deepcopy
4
+ from typing import Dict
5
+
6
+ import torch
7
+ import torch.nn.functional as F
8
+ from allennlp.modules.token_embedders.token_embedder import TokenEmbedder
9
+ from allennlp.nn import util
10
+ from transformers import AutoModel, PreTrainedModel
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class PretrainedBertModel:
16
+ """
17
+ In some instances you may want to load the same BERT model twice
18
+ (e.g. to use as a token embedder and also as a pooling layer).
19
+ This factory provides a cache so that you don't actually have to load the model twice.
20
+ """
21
+
22
+ _cache: Dict[str, PreTrainedModel] = {}
23
+
24
+ @classmethod
25
+ def load(cls, model_name: str, cache_model: bool = True) -> PreTrainedModel:
26
+ if model_name in cls._cache:
27
+ return PretrainedBertModel._cache[model_name]
28
+
29
+ model = AutoModel.from_pretrained(model_name)
30
+ if cache_model:
31
+ cls._cache[model_name] = model
32
+
33
+ return model
34
+
35
+
36
+ class BertEmbedder(TokenEmbedder):
37
+ """
38
+ A ``TokenEmbedder`` that produces BERT embeddings for your tokens.
39
+ Should be paired with a ``BertIndexer``, which produces wordpiece ids.
40
+ Most likely you probably want to use ``PretrainedBertEmbedder``
41
+ for one of the named pretrained models, not this base class.
42
+ Parameters
43
+ ----------
44
+ bert_model: ``BertModel``
45
+ The BERT model being wrapped.
46
+ top_layer_only: ``bool``, optional (default = ``False``)
47
+ If ``True``, then only return the top layer instead of apply the scalar mix.
48
+ max_pieces : int, optional (default: 512)
49
+ The BERT embedder uses positional embeddings and so has a corresponding
50
+ maximum length for its input ids. Assuming the inputs are windowed
51
+ and padded appropriately by this length, the embedder will split them into a
52
+ large batch, feed them into BERT, and recombine the output as if it was a
53
+ longer sequence.
54
+ num_start_tokens : int, optional (default: 1)
55
+ The number of starting special tokens input to BERT (usually 1, i.e., [CLS])
56
+ num_end_tokens : int, optional (default: 1)
57
+ The number of ending tokens input to BERT (usually 1, i.e., [SEP])
58
+ scalar_mix_parameters: ``List[float]``, optional, (default = None)
59
+ If not ``None``, use these scalar mix parameters to weight the representations
60
+ produced by different layers. These mixing weights are not updated during
61
+ training.
62
+ """
63
+
64
+ def __init__(
65
+ self,
66
+ bert_model: PreTrainedModel,
67
+ top_layer_only: bool = False,
68
+ max_pieces: int = 512,
69
+ num_start_tokens: int = 1,
70
+ num_end_tokens: int = 1
71
+ ) -> None:
72
+ super().__init__()
73
+ self.bert_model = deepcopy(bert_model)
74
+ self.output_dim = bert_model.config.hidden_size
75
+ self.max_pieces = max_pieces
76
+ self.num_start_tokens = num_start_tokens
77
+ self.num_end_tokens = num_end_tokens
78
+ self._scalar_mix = None
79
+
80
+ def set_weights(self, freeze):
81
+ for param in self.bert_model.parameters():
82
+ param.requires_grad = not freeze
83
+ return
84
+
85
+ def get_output_dim(self) -> int:
86
+ return self.output_dim
87
+
88
+ def forward(
89
+ self,
90
+ input_ids: torch.LongTensor,
91
+ offsets: torch.LongTensor = None
92
+ ) -> torch.Tensor:
93
+ """
94
+ Parameters
95
+ ----------
96
+ input_ids : ``torch.LongTensor``
97
+ The (batch_size, ..., max_sequence_length) tensor of wordpiece ids.
98
+ offsets : ``torch.LongTensor``, optional
99
+ The BERT embeddings are one per wordpiece. However it's possible/likely
100
+ you might want one per original token. In that case, ``offsets``
101
+ represents the indices of the desired wordpiece for each original token.
102
+ Depending on how your token indexer is configured, this could be the
103
+ position of the last wordpiece for each token, or it could be the position
104
+ of the first wordpiece for each token.
105
+ For example, if you had the sentence "Definitely not", and if the corresponding
106
+ wordpieces were ["Def", "##in", "##ite", "##ly", "not"], then the input_ids
107
+ would be 5 wordpiece ids, and the "last wordpiece" offsets would be [3, 4].
108
+ If offsets are provided, the returned tensor will contain only the wordpiece
109
+ embeddings at those positions, and (in particular) will contain one embedding
110
+ per token. If offsets are not provided, the entire tensor of wordpiece embeddings
111
+ will be returned.
112
+ """
113
+
114
+ batch_size, full_seq_len = input_ids.size(0), input_ids.size(-1)
115
+ initial_dims = list(input_ids.shape[:-1])
116
+
117
+ # The embedder may receive an input tensor that has a sequence length longer than can
118
+ # be fit. In that case, we should expect the wordpiece indexer to create padded windows
119
+ # of length `self.max_pieces` for us, and have them concatenated into one long sequence.
120
+ # E.g., "[CLS] I went to the [SEP] [CLS] to the store to [SEP] ..."
121
+ # We can then split the sequence into sub-sequences of that length, and concatenate them
122
+ # along the batch dimension so we effectively have one huge batch of partial sentences.
123
+ # This can then be fed into BERT without any sentence length issues. Keep in mind
124
+ # that the memory consumption can dramatically increase for large batches with extremely
125
+ # long sentences.
126
+ needs_split = full_seq_len > self.max_pieces
127
+ last_window_size = 0
128
+ if needs_split:
129
+ # Split the flattened list by the window size, `max_pieces`
130
+ split_input_ids = list(input_ids.split(self.max_pieces, dim=-1))
131
+
132
+ # We want all sequences to be the same length, so pad the last sequence
133
+ last_window_size = split_input_ids[-1].size(-1)
134
+ padding_amount = self.max_pieces - last_window_size
135
+ split_input_ids[-1] = F.pad(split_input_ids[-1], pad=[0, padding_amount], value=0)
136
+
137
+ # Now combine the sequences along the batch dimension
138
+ input_ids = torch.cat(split_input_ids, dim=0)
139
+
140
+ input_mask = (input_ids != 0).long()
141
+ # input_ids may have extra dimensions, so we reshape down to 2-d
142
+ # before calling the BERT model and then reshape back at the end.
143
+ all_encoder_layers = self.bert_model(
144
+ input_ids=util.combine_initial_dims(input_ids),
145
+ attention_mask=util.combine_initial_dims(input_mask),
146
+ )[0]
147
+ if len(all_encoder_layers[0].shape) == 3:
148
+ all_encoder_layers = torch.stack(all_encoder_layers)
149
+ elif len(all_encoder_layers[0].shape) == 2:
150
+ all_encoder_layers = torch.unsqueeze(all_encoder_layers, dim=0)
151
+
152
+ if needs_split:
153
+ # First, unpack the output embeddings into one long sequence again
154
+ unpacked_embeddings = torch.split(all_encoder_layers, batch_size, dim=1)
155
+ unpacked_embeddings = torch.cat(unpacked_embeddings, dim=2)
156
+
157
+ # Next, select indices of the sequence such that it will result in embeddings representing the original
158
+ # sentence. To capture maximal context, the indices will be the middle part of each embedded window
159
+ # sub-sequence (plus any leftover start and final edge windows), e.g.,
160
+ # 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
161
+ # "[CLS] I went to the very fine [SEP] [CLS] the very fine store to eat [SEP]"
162
+ # with max_pieces = 8 should produce max context indices [2, 3, 4, 10, 11, 12] with additional start
163
+ # and final windows with indices [0, 1] and [14, 15] respectively.
164
+
165
+ # Find the stride as half the max pieces, ignoring the special start and end tokens
166
+ # Calculate an offset to extract the centermost embeddings of each window
167
+ stride = (self.max_pieces - self.num_start_tokens - self.num_end_tokens) // 2
168
+ stride_offset = stride // 2 + self.num_start_tokens
169
+
170
+ first_window = list(range(stride_offset))
171
+
172
+ max_context_windows = [
173
+ i
174
+ for i in range(full_seq_len)
175
+ if stride_offset - 1 < i % self.max_pieces < stride_offset + stride
176
+ ]
177
+
178
+ # Lookback what's left, unless it's the whole self.max_pieces window
179
+ if full_seq_len % self.max_pieces == 0:
180
+ lookback = self.max_pieces
181
+ else:
182
+ lookback = full_seq_len % self.max_pieces
183
+
184
+ final_window_start = full_seq_len - lookback + stride_offset + stride
185
+ final_window = list(range(final_window_start, full_seq_len))
186
+
187
+ select_indices = first_window + max_context_windows + final_window
188
+
189
+ initial_dims.append(len(select_indices))
190
+
191
+ recombined_embeddings = unpacked_embeddings[:, :, select_indices]
192
+ else:
193
+ recombined_embeddings = all_encoder_layers
194
+
195
+ # Recombine the outputs of all layers
196
+ # (layers, batch_size * d1 * ... * dn, sequence_length, embedding_dim)
197
+ # recombined = torch.cat(combined, dim=2)
198
+ input_mask = (recombined_embeddings != 0).long()
199
+
200
+ if self._scalar_mix is not None:
201
+ mix = self._scalar_mix(recombined_embeddings, input_mask)
202
+ else:
203
+ mix = recombined_embeddings[-1]
204
+
205
+ # At this point, mix is (batch_size * d1 * ... * dn, sequence_length, embedding_dim)
206
+
207
+ if offsets is None:
208
+ # Resize to (batch_size, d1, ..., dn, sequence_length, embedding_dim)
209
+ dims = initial_dims if needs_split else input_ids.size()
210
+ return util.uncombine_initial_dims(mix, dims)
211
+ else:
212
+ # offsets is (batch_size, d1, ..., dn, orig_sequence_length)
213
+ offsets2d = util.combine_initial_dims(offsets)
214
+ # now offsets is (batch_size * d1 * ... * dn, orig_sequence_length)
215
+ range_vector = util.get_range_vector(
216
+ offsets2d.size(0), device=util.get_device_of(mix)
217
+ ).unsqueeze(1)
218
+ # selected embeddings is also (batch_size * d1 * ... * dn, orig_sequence_length)
219
+ selected_embeddings = mix[range_vector, offsets2d]
220
+
221
+ return util.uncombine_initial_dims(selected_embeddings, offsets.size())
222
+
223
+
224
+ # @TokenEmbedder.register("bert-pretrained")
225
+ class PretrainedBertEmbedder(BertEmbedder):
226
+
227
+ """
228
+ Parameters
229
+ ----------
230
+ pretrained_model: ``str``
231
+ Either the name of the pretrained model to use (e.g. 'bert-base-uncased'),
232
+ or the path to the .tar.gz file with the model weights.
233
+ If the name is a key in the list of pretrained models at
234
+ https://github.com/huggingface/pytorch-pretrained-BERT/blob/master/pytorch_pretrained_bert/modeling.py#L41
235
+ the corresponding path will be used; otherwise it will be interpreted as a path or URL.
236
+ requires_grad : ``bool``, optional (default = False)
237
+ If True, compute gradient of BERT parameters for fine tuning.
238
+ top_layer_only: ``bool``, optional (default = ``False``)
239
+ If ``True``, then only return the top layer instead of apply the scalar mix.
240
+ scalar_mix_parameters: ``List[float]``, optional, (default = None)
241
+ If not ``None``, use these scalar mix parameters to weight the representations
242
+ produced by different layers. These mixing weights are not updated during
243
+ training.
244
+ """
245
+
246
+ def __init__(
247
+ self,
248
+ pretrained_model: str,
249
+ requires_grad: bool = False,
250
+ top_layer_only: bool = False,
251
+ special_tokens_fix: int = 0,
252
+ ) -> None:
253
+ model = PretrainedBertModel.load(pretrained_model)
254
+
255
+ for param in model.parameters():
256
+ param.requires_grad = requires_grad
257
+
258
+ super().__init__(
259
+ bert_model=model,
260
+ top_layer_only=top_layer_only
261
+ )
262
+
263
+ if special_tokens_fix:
264
+ try:
265
+ vocab_size = self.bert_model.embeddings.word_embeddings.num_embeddings
266
+ except AttributeError:
267
+ # reserve more space
268
+ vocab_size = self.bert_model.word_embedding.num_embeddings + 5
269
+ self.bert_model.resize_token_embeddings(vocab_size + 1)
gector/datareader.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tweaked AllenNLP dataset reader."""
2
+ import logging
3
+ import re
4
+ from random import random
5
+ from typing import Dict, List
6
+
7
+ from allennlp.common.file_utils import cached_path
8
+ from allennlp.data.dataset_readers.dataset_reader import DatasetReader
9
+ from allennlp.data.fields import TextField, SequenceLabelField, MetadataField, Field
10
+ from allennlp.data.instance import Instance
11
+ from allennlp.data.token_indexers import TokenIndexer, SingleIdTokenIndexer
12
+ from allennlp.data.tokenizers import Token
13
+ from overrides import overrides
14
+
15
+ from utils.helpers import SEQ_DELIMETERS, START_TOKEN
16
+
17
+ logger = logging.getLogger(__name__) # pylint: disable=invalid-name
18
+
19
+
20
+ @DatasetReader.register("seq2labels_datareader")
21
+ class Seq2LabelsDatasetReader(DatasetReader):
22
+ """
23
+ Reads instances from a pretokenised file where each line is in the following format:
24
+
25
+ WORD###TAG [TAB] WORD###TAG [TAB] ..... \n
26
+
27
+ and converts it into a ``Dataset`` suitable for sequence tagging. You can also specify
28
+ alternative delimiters in the constructor.
29
+
30
+ Parameters
31
+ ----------
32
+ delimiters: ``dict``
33
+ The dcitionary with all delimeters.
34
+ token_indexers : ``Dict[str, TokenIndexer]``, optional (default=``{"tokens": SingleIdTokenIndexer()}``)
35
+ We use this to define the input representation for the text. See :class:`TokenIndexer`.
36
+ Note that the `output` tags will always correspond to single token IDs based on how they
37
+ are pre-tokenised in the data file.
38
+ max_len: if set than will truncate long sentences
39
+ """
40
+ # fix broken sentences mostly in Lang8
41
+ BROKEN_SENTENCES_REGEXP = re.compile(r'\.[a-zA-RT-Z]')
42
+
43
+ def __init__(self,
44
+ token_indexers: Dict[str, TokenIndexer] = None,
45
+ delimeters: dict = SEQ_DELIMETERS,
46
+ skip_correct: bool = False,
47
+ skip_complex: int = 0,
48
+ lazy: bool = False,
49
+ max_len: int = None,
50
+ test_mode: bool = False,
51
+ tag_strategy: str = "keep_one",
52
+ tn_prob: float = 0,
53
+ tp_prob: float = 0,
54
+ broken_dot_strategy: str = "keep") -> None:
55
+ super().__init__(lazy)
56
+ self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()}
57
+ self._delimeters = delimeters
58
+ self._max_len = max_len
59
+ self._skip_correct = skip_correct
60
+ self._skip_complex = skip_complex
61
+ self._tag_strategy = tag_strategy
62
+ self._broken_dot_strategy = broken_dot_strategy
63
+ self._test_mode = test_mode
64
+ self._tn_prob = tn_prob
65
+ self._tp_prob = tp_prob
66
+
67
+ @overrides
68
+ def _read(self, file_path):
69
+ # if `file_path` is a URL, redirect to the cache
70
+ file_path = cached_path(file_path)
71
+ with open(file_path, "r") as data_file:
72
+ logger.info("Reading instances from lines in file at: %s", file_path)
73
+ for line in data_file:
74
+ line = line.strip("\n")
75
+ # skip blank and broken lines
76
+ if not line or (not self._test_mode and self._broken_dot_strategy == 'skip'
77
+ and self.BROKEN_SENTENCES_REGEXP.search(line) is not None):
78
+ continue
79
+
80
+ tokens_and_tags = [pair.rsplit(self._delimeters['labels'], 1)
81
+ for pair in line.split(self._delimeters['tokens'])]
82
+ try:
83
+ tokens = [Token(token) for token, tag in tokens_and_tags]
84
+ tags = [tag for token, tag in tokens_and_tags]
85
+ except ValueError:
86
+ tokens = [Token(token[0]) for token in tokens_and_tags]
87
+ tags = None
88
+
89
+ if tokens and tokens[0] != Token(START_TOKEN):
90
+ tokens = [Token(START_TOKEN)] + tokens
91
+
92
+ words = [x.text for x in tokens]
93
+ if self._max_len is not None:
94
+ tokens = tokens[:self._max_len]
95
+ tags = None if tags is None else tags[:self._max_len]
96
+ instance = self.text_to_instance(tokens, tags, words)
97
+ if instance:
98
+ yield instance
99
+
100
+ def extract_tags(self, tags: List[str]):
101
+ op_del = self._delimeters['operations']
102
+
103
+ labels = [x.split(op_del) for x in tags]
104
+
105
+ comlex_flag_dict = {}
106
+ # get flags
107
+ for i in range(5):
108
+ idx = i + 1
109
+ comlex_flag_dict[idx] = sum([len(x) > idx for x in labels])
110
+
111
+ if self._tag_strategy == "keep_one":
112
+ # get only first candidates for r_tags in right and the last for left
113
+ labels = [x[0] for x in labels]
114
+ elif self._tag_strategy == "merge_all":
115
+ # consider phrases as a words
116
+ pass
117
+ else:
118
+ raise Exception("Incorrect tag strategy")
119
+
120
+ detect_tags = ["CORRECT" if label == "$KEEP" else "INCORRECT" for label in labels]
121
+ return labels, detect_tags, comlex_flag_dict
122
+
123
+ def text_to_instance(self, tokens: List[Token], tags: List[str] = None,
124
+ words: List[str] = None) -> Instance: # type: ignore
125
+ """
126
+ We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
127
+ """
128
+ # pylint: disable=arguments-differ
129
+ fields: Dict[str, Field] = {}
130
+ sequence = TextField(tokens, self._token_indexers)
131
+ fields["tokens"] = sequence
132
+ fields["metadata"] = MetadataField({"words": words})
133
+ if tags is not None:
134
+ labels, detect_tags, complex_flag_dict = self.extract_tags(tags)
135
+ if self._skip_complex and complex_flag_dict[self._skip_complex] > 0:
136
+ return None
137
+ rnd = random()
138
+ # skip TN
139
+ if self._skip_correct and all(x == "CORRECT" for x in detect_tags):
140
+ if rnd > self._tn_prob:
141
+ return None
142
+ # skip TP
143
+ else:
144
+ if rnd > self._tp_prob:
145
+ return None
146
+
147
+ fields["labels"] = SequenceLabelField(labels, sequence,
148
+ label_namespace="labels")
149
+ fields["d_tags"] = SequenceLabelField(detect_tags, sequence,
150
+ label_namespace="d_tags")
151
+ return Instance(fields)
gector/gec_model.py ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Wrapper of AllenNLP model. Fixes errors based on model predictions"""
2
+ import logging
3
+ import os
4
+ import sys
5
+ from time import time
6
+
7
+ import torch
8
+ from allennlp.data.dataset import Batch
9
+ from allennlp.data.fields import TextField
10
+ from allennlp.data.instance import Instance
11
+ from allennlp.data.tokenizers import Token
12
+ from allennlp.data.vocabulary import Vocabulary
13
+ from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder
14
+ from allennlp.nn import util
15
+
16
+ from gector.bert_token_embedder import PretrainedBertEmbedder
17
+ from gector.seq2labels_model import Seq2Labels
18
+ from gector.tokenizer_indexer import PretrainedBertIndexer
19
+ from utils.helpers import PAD, UNK, get_target_sent_by_edits, START_TOKEN
20
+ from utils.helpers import get_weights_name
21
+
22
+ logging.getLogger("werkzeug").setLevel(logging.ERROR)
23
+ logger = logging.getLogger(__file__)
24
+
25
+
26
+ class GecBERTModel(object):
27
+ def __init__(self, vocab_path=None, model_paths=None,
28
+ weigths=None,
29
+ max_len=50,
30
+ min_len=3,
31
+ lowercase_tokens=False,
32
+ log=False,
33
+ iterations=3,
34
+ model_name='roberta',
35
+ special_tokens_fix=1,
36
+ is_ensemble=True,
37
+ min_error_probability=0.0,
38
+ confidence=0,
39
+ del_confidence=0,
40
+ resolve_cycles=False,
41
+ ):
42
+ self.model_weights = list(map(float, weigths)) if weigths else [1] * len(model_paths)
43
+ self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
44
+ self.max_len = max_len
45
+ self.min_len = min_len
46
+ self.lowercase_tokens = lowercase_tokens
47
+ self.min_error_probability = min_error_probability
48
+ self.vocab = Vocabulary.from_files(vocab_path)
49
+ self.log = log
50
+ self.iterations = iterations
51
+ self.confidence = confidence
52
+ self.del_conf = del_confidence
53
+ self.resolve_cycles = resolve_cycles
54
+ # set training parameters and operations
55
+
56
+ self.indexers = []
57
+ self.models = []
58
+ for model_path in model_paths:
59
+ if is_ensemble:
60
+ model_name, special_tokens_fix = self._get_model_data(model_path)
61
+ weights_name = get_weights_name(model_name, lowercase_tokens)
62
+ self.indexers.append(self._get_indexer(weights_name, special_tokens_fix))
63
+ model = Seq2Labels(vocab=self.vocab,
64
+ text_field_embedder=self._get_embbeder(weights_name, special_tokens_fix),
65
+ confidence=self.confidence,
66
+ del_confidence=self.del_conf,
67
+ ).to(self.device)
68
+ if torch.cuda.is_available():
69
+ model.load_state_dict(torch.load(model_path), strict=False)
70
+ else:
71
+ model.load_state_dict(torch.load(model_path,
72
+ map_location=torch.device('cpu')),
73
+ strict=False)
74
+ model.eval()
75
+ self.models.append(model)
76
+
77
+ @staticmethod
78
+ def _get_model_data(model_path):
79
+ model_name = model_path.split('/')[-1]
80
+ tr_model, stf = model_name.split('_')[:2]
81
+ return tr_model, int(stf)
82
+
83
+ def _restore_model(self, input_path):
84
+ if os.path.isdir(input_path):
85
+ print("Model could not be restored from directory", file=sys.stderr)
86
+ filenames = []
87
+ else:
88
+ filenames = [input_path]
89
+ for model_path in filenames:
90
+ try:
91
+ if torch.cuda.is_available():
92
+ loaded_model = torch.load(model_path)
93
+ else:
94
+ loaded_model = torch.load(model_path,
95
+ map_location=lambda storage,
96
+ loc: storage)
97
+ except:
98
+ print(f"{model_path} is not valid model", file=sys.stderr)
99
+ own_state = self.model.state_dict()
100
+ for name, weights in loaded_model.items():
101
+ if name not in own_state:
102
+ continue
103
+ try:
104
+ if len(filenames) == 1:
105
+ own_state[name].copy_(weights)
106
+ else:
107
+ own_state[name] += weights
108
+ except RuntimeError:
109
+ continue
110
+ print("Model is restored", file=sys.stderr)
111
+
112
+ def predict(self, batches):
113
+ t11 = time()
114
+ predictions = []
115
+ for batch, model in zip(batches, self.models):
116
+ batch = util.move_to_device(batch.as_tensor_dict(), 0 if torch.cuda.is_available() else -1)
117
+ with torch.no_grad():
118
+ prediction = model.forward(**batch)
119
+ predictions.append(prediction)
120
+
121
+ preds, idx, error_probs = self._convert(predictions)
122
+ t55 = time()
123
+ if self.log:
124
+ print(f"Inference time {t55 - t11}")
125
+ return preds, idx, error_probs
126
+
127
+ def get_token_action(self, token, index, prob, sugg_token):
128
+ """Get lost of suggested actions for token."""
129
+ # cases when we don't need to do anything
130
+ if prob < self.min_error_probability or sugg_token in [UNK, PAD, '$KEEP']:
131
+ return None
132
+
133
+ if sugg_token.startswith('$REPLACE_') or sugg_token.startswith('$TRANSFORM_') or sugg_token == '$DELETE':
134
+ start_pos = index
135
+ end_pos = index + 1
136
+ elif sugg_token.startswith("$APPEND_") or sugg_token.startswith("$MERGE_"):
137
+ start_pos = index + 1
138
+ end_pos = index + 1
139
+
140
+ if sugg_token == "$DELETE":
141
+ sugg_token_clear = ""
142
+ elif sugg_token.startswith('$TRANSFORM_') or sugg_token.startswith("$MERGE_"):
143
+ sugg_token_clear = sugg_token[:]
144
+ else:
145
+ sugg_token_clear = sugg_token[sugg_token.index('_') + 1:]
146
+
147
+ return start_pos - 1, end_pos - 1, sugg_token_clear, prob
148
+
149
+ def _get_embbeder(self, weigths_name, special_tokens_fix):
150
+ embedders = {'bert': PretrainedBertEmbedder(
151
+ pretrained_model=weigths_name,
152
+ requires_grad=False,
153
+ top_layer_only=True,
154
+ special_tokens_fix=special_tokens_fix)
155
+ }
156
+ text_field_embedder = BasicTextFieldEmbedder(
157
+ token_embedders=embedders,
158
+ embedder_to_indexer_map={"bert": ["bert", "bert-offsets"]},
159
+ allow_unmatched_keys=True)
160
+ return text_field_embedder
161
+
162
+ def _get_indexer(self, weights_name, special_tokens_fix):
163
+ bert_token_indexer = PretrainedBertIndexer(
164
+ pretrained_model=weights_name,
165
+ do_lowercase=self.lowercase_tokens,
166
+ max_pieces_per_token=5,
167
+ special_tokens_fix=special_tokens_fix
168
+ )
169
+ return {'bert': bert_token_indexer}
170
+
171
+ def preprocess(self, token_batch):
172
+ seq_lens = [len(sequence) for sequence in token_batch if sequence]
173
+ if not seq_lens:
174
+ return []
175
+ max_len = min(max(seq_lens), self.max_len)
176
+ batches = []
177
+ for indexer in self.indexers:
178
+ batch = []
179
+ for sequence in token_batch:
180
+ tokens = sequence[:max_len]
181
+ tokens = [Token(token) for token in ['$START'] + tokens]
182
+ batch.append(Instance({'tokens': TextField(tokens, indexer)}))
183
+ batch = Batch(batch)
184
+ batch.index_instances(self.vocab)
185
+ batches.append(batch)
186
+
187
+ return batches
188
+
189
+ def _convert(self, data):
190
+ all_class_probs = torch.zeros_like(data[0]['class_probabilities_labels'])
191
+ error_probs = torch.zeros_like(data[0]['max_error_probability'])
192
+ for output, weight in zip(data, self.model_weights):
193
+ all_class_probs += weight * output['class_probabilities_labels'] / sum(self.model_weights)
194
+ error_probs += weight * output['max_error_probability'] / sum(self.model_weights)
195
+
196
+ max_vals = torch.max(all_class_probs, dim=-1)
197
+ probs = max_vals[0].tolist()
198
+ idx = max_vals[1].tolist()
199
+ return probs, idx, error_probs.tolist()
200
+
201
+ def update_final_batch(self, final_batch, pred_ids, pred_batch,
202
+ prev_preds_dict):
203
+ new_pred_ids = []
204
+ total_updated = 0
205
+ for i, orig_id in enumerate(pred_ids):
206
+ orig = final_batch[orig_id]
207
+ pred = pred_batch[i]
208
+ prev_preds = prev_preds_dict[orig_id]
209
+ if orig != pred and pred not in prev_preds:
210
+ final_batch[orig_id] = pred
211
+ new_pred_ids.append(orig_id)
212
+ prev_preds_dict[orig_id].append(pred)
213
+ total_updated += 1
214
+ elif orig != pred and pred in prev_preds:
215
+ # update final batch, but stop iterations
216
+ final_batch[orig_id] = pred
217
+ total_updated += 1
218
+ else:
219
+ continue
220
+ return final_batch, new_pred_ids, total_updated
221
+
222
+ def postprocess_batch(self, batch, all_probabilities, all_idxs,
223
+ error_probs):
224
+ all_results = []
225
+ noop_index = self.vocab.get_token_index("$KEEP", "labels")
226
+ for tokens, probabilities, idxs, error_prob in zip(batch,
227
+ all_probabilities,
228
+ all_idxs,
229
+ error_probs):
230
+ length = min(len(tokens), self.max_len)
231
+ edits = []
232
+
233
+ # skip whole sentences if there no errors
234
+ if max(idxs) == 0:
235
+ all_results.append(tokens)
236
+ continue
237
+
238
+ # skip whole sentence if probability of correctness is not high
239
+ if error_prob < self.min_error_probability:
240
+ all_results.append(tokens)
241
+ continue
242
+
243
+ for i in range(length + 1):
244
+ # because of START token
245
+ if i == 0:
246
+ token = START_TOKEN
247
+ else:
248
+ token = tokens[i - 1]
249
+ # skip if there is no error
250
+ if idxs[i] == noop_index:
251
+ continue
252
+
253
+ sugg_token = self.vocab.get_token_from_index(idxs[i],
254
+ namespace='labels')
255
+ action = self.get_token_action(token, i, probabilities[i],
256
+ sugg_token)
257
+ if not action:
258
+ continue
259
+
260
+ edits.append(action)
261
+ all_results.append(get_target_sent_by_edits(tokens, edits))
262
+ return all_results
263
+
264
+ def handle_batch(self, full_batch):
265
+ """
266
+ Handle batch of requests.
267
+ """
268
+ final_batch = full_batch[:]
269
+ batch_size = len(full_batch)
270
+ prev_preds_dict = {i: [final_batch[i]] for i in range(len(final_batch))}
271
+ short_ids = [i for i in range(len(full_batch))
272
+ if len(full_batch[i]) < self.min_len]
273
+ pred_ids = [i for i in range(len(full_batch)) if i not in short_ids]
274
+ total_updates = 0
275
+
276
+ for n_iter in range(self.iterations):
277
+ orig_batch = [final_batch[i] for i in pred_ids]
278
+
279
+ sequences = self.preprocess(orig_batch)
280
+
281
+ if not sequences:
282
+ break
283
+ probabilities, idxs, error_probs = self.predict(sequences)
284
+
285
+ pred_batch = self.postprocess_batch(orig_batch, probabilities,
286
+ idxs, error_probs)
287
+ if self.log:
288
+ print(f"Iteration {n_iter + 1}. Predicted {round(100*len(pred_ids)/batch_size, 1)}% of sentences.")
289
+
290
+ final_batch, pred_ids, cnt = \
291
+ self.update_final_batch(final_batch, pred_ids, pred_batch,
292
+ prev_preds_dict)
293
+ total_updates += cnt
294
+
295
+ if not pred_ids:
296
+ break
297
+
298
+ return final_batch, total_updates
gector/seq2labels_model.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Basic model. Predicts tags for every token"""
2
+ from typing import Dict, Optional, List, Any
3
+
4
+ import numpy
5
+ import torch
6
+ import torch.nn.functional as F
7
+ from allennlp.data import Vocabulary
8
+ from allennlp.models.model import Model
9
+ from allennlp.modules import TimeDistributed, TextFieldEmbedder
10
+ from allennlp.nn import InitializerApplicator, RegularizerApplicator
11
+ from allennlp.nn.util import get_text_field_mask, sequence_cross_entropy_with_logits
12
+ from allennlp.training.metrics import CategoricalAccuracy
13
+ from overrides import overrides
14
+ from torch.nn.modules.linear import Linear
15
+
16
+
17
+ @Model.register("seq2labels")
18
+ class Seq2Labels(Model):
19
+ """
20
+ This ``Seq2Labels`` simply encodes a sequence of text with a stacked ``Seq2SeqEncoder``, then
21
+ predicts a tag (or couple tags) for each token in the sequence.
22
+
23
+ Parameters
24
+ ----------
25
+ vocab : ``Vocabulary``, required
26
+ A Vocabulary, required in order to compute sizes for input/output projections.
27
+ text_field_embedder : ``TextFieldEmbedder``, required
28
+ Used to embed the ``tokens`` ``TextField`` we get as input to the model.
29
+ encoder : ``Seq2SeqEncoder``
30
+ The encoder (with its own internal stacking) that we will use in between embedding tokens
31
+ and predicting output tags.
32
+ calculate_span_f1 : ``bool``, optional (default=``None``)
33
+ Calculate span-level F1 metrics during training. If this is ``True``, then
34
+ ``label_encoding`` is required. If ``None`` and
35
+ label_encoding is specified, this is set to ``True``.
36
+ If ``None`` and label_encoding is not specified, it defaults
37
+ to ``False``.
38
+ label_encoding : ``str``, optional (default=``None``)
39
+ Label encoding to use when calculating span f1.
40
+ Valid options are "BIO", "BIOUL", "IOB1", "BMES".
41
+ Required if ``calculate_span_f1`` is true.
42
+ labels_namespace : ``str``, optional (default=``labels``)
43
+ This is needed to compute the SpanBasedF1Measure metric, if desired.
44
+ Unless you did something unusual, the default value should be what you want.
45
+ verbose_metrics : ``bool``, optional (default = False)
46
+ If true, metrics will be returned per label class in addition
47
+ to the overall statistics.
48
+ initializer : ``InitializerApplicator``, optional (default=``InitializerApplicator()``)
49
+ Used to initialize the model parameters.
50
+ regularizer : ``RegularizerApplicator``, optional (default=``None``)
51
+ If provided, will be used to calculate the regularization penalty during training.
52
+ """
53
+
54
+ def __init__(self, vocab: Vocabulary,
55
+ text_field_embedder: TextFieldEmbedder,
56
+ predictor_dropout=0.0,
57
+ labels_namespace: str = "labels",
58
+ detect_namespace: str = "d_tags",
59
+ verbose_metrics: bool = False,
60
+ label_smoothing: float = 0.0,
61
+ confidence: float = 0.0,
62
+ del_confidence: float = 0.0,
63
+ initializer: InitializerApplicator = InitializerApplicator(),
64
+ regularizer: Optional[RegularizerApplicator] = None) -> None:
65
+ super(Seq2Labels, self).__init__(vocab, regularizer)
66
+
67
+ self.label_namespaces = [labels_namespace,
68
+ detect_namespace]
69
+ self.text_field_embedder = text_field_embedder
70
+ self.num_labels_classes = self.vocab.get_vocab_size(labels_namespace)
71
+ self.num_detect_classes = self.vocab.get_vocab_size(detect_namespace)
72
+ self.label_smoothing = label_smoothing
73
+ self.confidence = confidence
74
+ self.del_conf = del_confidence
75
+ self.incorr_index = self.vocab.get_token_index("INCORRECT",
76
+ namespace=detect_namespace)
77
+
78
+ self._verbose_metrics = verbose_metrics
79
+ self.predictor_dropout = TimeDistributed(torch.nn.Dropout(predictor_dropout))
80
+
81
+ self.tag_labels_projection_layer = TimeDistributed(
82
+ Linear(text_field_embedder._token_embedders['bert'].get_output_dim(), self.num_labels_classes))
83
+
84
+ self.tag_detect_projection_layer = TimeDistributed(
85
+ Linear(text_field_embedder._token_embedders['bert'].get_output_dim(), self.num_detect_classes))
86
+
87
+ self.metrics = {"accuracy": CategoricalAccuracy()}
88
+
89
+ initializer(self)
90
+
91
+ @overrides
92
+ def forward(self, # type: ignore
93
+ tokens: Dict[str, torch.LongTensor],
94
+ labels: torch.LongTensor = None,
95
+ d_tags: torch.LongTensor = None,
96
+ metadata: List[Dict[str, Any]] = None) -> Dict[str, torch.Tensor]:
97
+ # pylint: disable=arguments-differ
98
+ """
99
+ Parameters
100
+ ----------
101
+ tokens : Dict[str, torch.LongTensor], required
102
+ The output of ``TextField.as_array()``, which should typically be passed directly to a
103
+ ``TextFieldEmbedder``. This output is a dictionary mapping keys to ``TokenIndexer``
104
+ tensors. At its most basic, using a ``SingleIdTokenIndexer`` this is: ``{"tokens":
105
+ Tensor(batch_size, num_tokens)}``. This dictionary will have the same keys as were used
106
+ for the ``TokenIndexers`` when you created the ``TextField`` representing your
107
+ sequence. The dictionary is designed to be passed directly to a ``TextFieldEmbedder``,
108
+ which knows how to combine different word representations into a single vector per
109
+ token in your input.
110
+ labels : torch.LongTensor, optional (default = None)
111
+ A torch tensor representing the sequence of integer gold class labels of shape
112
+ ``(batch_size, num_tokens)``.
113
+ d_tags : torch.LongTensor, optional (default = None)
114
+ A torch tensor representing the sequence of integer gold class labels of shape
115
+ ``(batch_size, num_tokens)``.
116
+ metadata : ``List[Dict[str, Any]]``, optional, (default = None)
117
+ metadata containing the original words in the sentence to be tagged under a 'words' key.
118
+
119
+ Returns
120
+ -------
121
+ An output dictionary consisting of:
122
+ logits : torch.FloatTensor
123
+ A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing
124
+ unnormalised log probabilities of the tag classes.
125
+ class_probabilities : torch.FloatTensor
126
+ A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing
127
+ a distribution of the tag classes per word.
128
+ loss : torch.FloatTensor, optional
129
+ A scalar loss to be optimised.
130
+
131
+ """
132
+ encoded_text = self.text_field_embedder(tokens)
133
+ batch_size, sequence_length, _ = encoded_text.size()
134
+ mask = get_text_field_mask(tokens)
135
+ logits_labels = self.tag_labels_projection_layer(self.predictor_dropout(encoded_text))
136
+ logits_d = self.tag_detect_projection_layer(encoded_text)
137
+
138
+ class_probabilities_labels = F.softmax(logits_labels, dim=-1).view(
139
+ [batch_size, sequence_length, self.num_labels_classes])
140
+ class_probabilities_d = F.softmax(logits_d, dim=-1).view(
141
+ [batch_size, sequence_length, self.num_detect_classes])
142
+ error_probs = class_probabilities_d[:, :, self.incorr_index] * mask
143
+ incorr_prob = torch.max(error_probs, dim=-1)[0]
144
+
145
+ probability_change = [self.confidence, self.del_conf] + [0] * (self.num_labels_classes - 2)
146
+ class_probabilities_labels += torch.FloatTensor(probability_change).repeat(
147
+ (batch_size, sequence_length, 1)).to(class_probabilities_labels.device)
148
+
149
+ output_dict = {"logits_labels": logits_labels,
150
+ "logits_d_tags": logits_d,
151
+ "class_probabilities_labels": class_probabilities_labels,
152
+ "class_probabilities_d_tags": class_probabilities_d,
153
+ "max_error_probability": incorr_prob}
154
+ if labels is not None and d_tags is not None:
155
+ loss_labels = sequence_cross_entropy_with_logits(logits_labels, labels, mask,
156
+ label_smoothing=self.label_smoothing)
157
+ loss_d = sequence_cross_entropy_with_logits(logits_d, d_tags, mask)
158
+ for metric in self.metrics.values():
159
+ metric(logits_labels, labels, mask.float())
160
+ metric(logits_d, d_tags, mask.float())
161
+ output_dict["loss"] = loss_labels + loss_d
162
+
163
+ if metadata is not None:
164
+ output_dict["words"] = [x["words"] for x in metadata]
165
+ return output_dict
166
+
167
+ @overrides
168
+ def decode(self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
169
+ """
170
+ Does a simple position-wise argmax over each token, converts indices to string labels, and
171
+ adds a ``"tags"`` key to the dictionary with the result.
172
+ """
173
+ for label_namespace in self.label_namespaces:
174
+ all_predictions = output_dict[f'class_probabilities_{label_namespace}']
175
+ all_predictions = all_predictions.cpu().data.numpy()
176
+ if all_predictions.ndim == 3:
177
+ predictions_list = [all_predictions[i] for i in range(all_predictions.shape[0])]
178
+ else:
179
+ predictions_list = [all_predictions]
180
+ all_tags = []
181
+
182
+ for predictions in predictions_list:
183
+ argmax_indices = numpy.argmax(predictions, axis=-1)
184
+ tags = [self.vocab.get_token_from_index(x, namespace=label_namespace)
185
+ for x in argmax_indices]
186
+ all_tags.append(tags)
187
+ output_dict[f'{label_namespace}'] = all_tags
188
+ return output_dict
189
+
190
+ @overrides
191
+ def get_metrics(self, reset: bool = False) -> Dict[str, float]:
192
+ metrics_to_return = {metric_name: metric.get_metric(reset) for
193
+ metric_name, metric in self.metrics.items()}
194
+ return metrics_to_return
gector/tokenization.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from time import time
3
+
4
+
5
+ os.environ['TOKENIZERS_PARALLELISM'] = 'false'
6
+
7
+
8
+ def get_bpe_groups(token_offsets, bpe_offsets, input_ids, max_bpe_pieces=5):
9
+ bpe_groups = []
10
+ last_used_bpe = 0
11
+ # find the size of offsets
12
+ if (0, 0) in bpe_offsets:
13
+ bpe_size = bpe_offsets.index((0, 0))
14
+ else:
15
+ bpe_size = len(bpe_offsets)
16
+
17
+ saved_ids = [i for i in range(len(input_ids))]
18
+ redundant_ids = []
19
+ for token_offset in token_offsets:
20
+ start_token, end_token = token_offset
21
+ bpe_group = []
22
+ mapping_is_found = False
23
+ for i in range(last_used_bpe, bpe_size):
24
+ start_bpe, end_bpe = bpe_offsets[i]
25
+ if start_bpe >= start_token and end_bpe <= end_token:
26
+ # check if bpe_group is satisfy max_bpe_pieces constraint
27
+ if len(bpe_group) < max_bpe_pieces:
28
+ bpe_group.append(i)
29
+ else:
30
+ redundant_ids.append(i)
31
+ last_used_bpe = i + 1
32
+ mapping_is_found = True
33
+ elif mapping_is_found:
34
+ # stop doing useless iterations
35
+ break
36
+ else:
37
+ continue
38
+ bpe_groups.append(bpe_group)
39
+ saved_ids = [i for i in saved_ids if i not in redundant_ids]
40
+ return bpe_groups, saved_ids
41
+
42
+
43
+ def reduce_input_ids(input_ids, bpe_groups, saved_ids,
44
+ max_bpe_length=80, max_bpe_pieces=5):
45
+ # check if sequence is satisfy max_bpe_length constraint
46
+ while len(saved_ids) > max_bpe_length:
47
+ max_bpe_pieces -= 1
48
+ for token_id in range(len(bpe_groups)):
49
+ if len(bpe_groups[token_id]) > max_bpe_pieces:
50
+ redundant_ids = bpe_groups[token_id][max_bpe_pieces:]
51
+ bpe_groups[token_id] = bpe_groups[token_id][:max_bpe_pieces]
52
+ saved_ids = [i for i in saved_ids if i not in redundant_ids]
53
+
54
+ # get offsets
55
+ reduced_ids = [input_ids[i] for i in saved_ids]
56
+ correct_offsets = []
57
+ idx = 0
58
+ for i, bpe_group in enumerate(bpe_groups):
59
+ norm_idx = min(idx, len(reduced_ids) - 1)
60
+ correct_offsets.append(norm_idx)
61
+ idx += len(bpe_group)
62
+
63
+ return reduced_ids, correct_offsets
64
+
65
+
66
+ def get_offsets_and_reduce_input_ids(tokenizer_output, token_offset_list,
67
+ index_name="bert", max_bpe_length=80,
68
+ max_bpe_pieces=5):
69
+ timings = {"bpe": 0, "reduce": 0, "mask": 0}
70
+ output_ids, output_offsets, output_masks = [], [], []
71
+ for i, token_offsets in enumerate(token_offset_list):
72
+ input_ids = tokenizer_output['input_ids'][i]
73
+
74
+ t0 = time()
75
+ # get bpe level offsets
76
+ bpe_offsets = tokenizer_output['offset_mapping'][i]
77
+ bpe_groups, saved_ids = get_bpe_groups(token_offsets, bpe_offsets,
78
+ input_ids,
79
+ max_bpe_pieces=max_bpe_pieces)
80
+ t1 = time()
81
+ timings["bpe"] += t1 - t0
82
+
83
+ # reduce sequence length
84
+ reduced_ids, correct_offsets = reduce_input_ids(input_ids, bpe_groups,
85
+ saved_ids,
86
+ max_bpe_length=max_bpe_length,
87
+ max_bpe_pieces=max_bpe_pieces)
88
+
89
+ t2 = time()
90
+ timings["reduce"] += t2 - t1
91
+
92
+ # get mask
93
+ bpe_mask = [1 for _ in correct_offsets]
94
+ output_ids.append(reduced_ids)
95
+ output_offsets.append(correct_offsets)
96
+ output_masks.append(bpe_mask)
97
+
98
+ t3 = time()
99
+ timings["mask"] += t3 - t2
100
+
101
+ # tt = sum(timings.values())
102
+ # timings = {k: f"{round(v * 100 / tt, 2)}%" for k, v in timings.items()}
103
+ # print(timings)
104
+
105
+ output = {index_name: output_ids,
106
+ f"{index_name}-offsets": output_offsets,
107
+ "mask": output_masks}
108
+ return output
109
+
110
+
111
+ def get_offset_for_tokens(tokens):
112
+ sentence = " ".join(tokens)
113
+ token_offsets = []
114
+ end_idx = 0
115
+ for token in tokens:
116
+ idx = sentence[end_idx:].index(token) + end_idx
117
+ end_idx = idx + len(token)
118
+ offset = (idx, end_idx)
119
+ token_offsets.append(offset)
120
+ return token_offsets
121
+
122
+
123
+ def get_token_offsets(batch):
124
+ token_offset_list = []
125
+ for tokens in batch:
126
+ token_offsets = get_offset_for_tokens(tokens)
127
+ token_offset_list.append(token_offsets)
128
+ return token_offset_list
129
+
130
+
131
+ def pad_output(output, pad_idx=0):
132
+ padded_output = {}
133
+ for input_key in output.keys():
134
+ indexes = output[input_key]
135
+ max_len = max([len(x) for x in indexes])
136
+ padded_indexes = []
137
+ for index_list in indexes:
138
+ cur_len = len(index_list)
139
+ pad_len = max_len - cur_len
140
+ padded_indexes.append(index_list + [pad_idx] * pad_len)
141
+ padded_output[input_key] = padded_indexes
142
+ return padded_output
143
+
144
+
145
+ def tokenize_batch(tokenizer, batch_tokens, index_name="bert",
146
+ max_bpe_length=80, max_bpe_pieces=5):
147
+ timings = {}
148
+ t0 = time()
149
+ # get batch with sentences
150
+ batch_sentences = [" ".join(x) for x in batch_tokens]
151
+ # get token level offsets
152
+ token_offset_list = get_token_offsets(batch_tokens)
153
+ # token_offset_list = get_token_offsets_multi(batch_tokens)
154
+ t1 = time()
155
+ timings["offset_time"] = t1 - t0
156
+ # tokenize batch
157
+ tokenizer_output = tokenizer.batch_encode_plus(batch_sentences,
158
+ pad_to_max_length=False,
159
+ return_offsets_mapping=True,
160
+ add_special_tokens=False)
161
+
162
+ t2 = time()
163
+ timings["tokenize_time"] = t2 - t1
164
+ # postprocess batch
165
+ output = get_offsets_and_reduce_input_ids(tokenizer_output,
166
+ token_offset_list,
167
+ index_name=index_name,
168
+ max_bpe_length=max_bpe_length,
169
+ max_bpe_pieces=max_bpe_pieces)
170
+
171
+ t3 = time()
172
+ timings["reduce_time"] = t3 - t2
173
+ # pad output
174
+ output = pad_output(output)
175
+ t4 = time()
176
+ timings["pading_time"] = t4 - t3
177
+ # tt = sum(timings.values())
178
+ # timings = {k:f"{round(v*100/tt, 2)}%" for k,v in timings.items()}
179
+ # print(timings)
180
+
181
+ return output
gector/tokenizer_indexer.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tweaked version of corresponding AllenNLP file"""
2
+ import logging
3
+ from collections import defaultdict
4
+ from typing import Dict, List, Callable
5
+
6
+ from allennlp.common.util import pad_sequence_to_length
7
+ from allennlp.data.token_indexers.token_indexer import TokenIndexer
8
+ from allennlp.data.tokenizers.token import Token
9
+ from allennlp.data.vocabulary import Vocabulary
10
+ from overrides import overrides
11
+ from transformers import AutoTokenizer
12
+
13
+ from utils.helpers import START_TOKEN
14
+
15
+ from gector.tokenization import tokenize_batch
16
+ import copy
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ # TODO(joelgrus): Figure out how to generate token_type_ids out of this token indexer.
22
+
23
+
24
+ class TokenizerIndexer(TokenIndexer[int]):
25
+ """
26
+ A token indexer that does the wordpiece-tokenization (e.g. for BERT embeddings).
27
+ If you are using one of the pretrained BERT models, you'll want to use the ``PretrainedBertIndexer``
28
+ subclass rather than this base class.
29
+
30
+ Parameters
31
+ ----------
32
+ tokenizer : ``Callable[[str], List[str]]``
33
+ A function that does the actual tokenization.
34
+ max_pieces : int, optional (default: 512)
35
+ The BERT embedder uses positional embeddings and so has a corresponding
36
+ maximum length for its input ids. Any inputs longer than this will
37
+ either be truncated (default), or be split apart and batched using a
38
+ sliding window.
39
+ token_min_padding_length : ``int``, optional (default=``0``)
40
+ See :class:`TokenIndexer`.
41
+ """
42
+
43
+ def __init__(self,
44
+ tokenizer: Callable[[str], List[str]],
45
+ max_pieces: int = 512,
46
+ max_pieces_per_token: int = 3,
47
+ token_min_padding_length: int = 0) -> None:
48
+ super().__init__(token_min_padding_length)
49
+
50
+ # The BERT code itself does a two-step tokenization:
51
+ # sentence -> [words], and then word -> [wordpieces]
52
+ # In AllenNLP, the first step is implemented as the ``BertBasicWordSplitter``,
53
+ # and this token indexer handles the second.
54
+
55
+ self.tokenizer = tokenizer
56
+ self.max_pieces_per_token = max_pieces_per_token
57
+ self.max_pieces = max_pieces
58
+ self.max_pieces_per_sentence = 80
59
+
60
+ @overrides
61
+ def tokens_to_indices(self, tokens: List[Token],
62
+ vocabulary: Vocabulary,
63
+ index_name: str) -> Dict[str, List[int]]:
64
+ text = [token.text for token in tokens]
65
+ batch_tokens = [text]
66
+
67
+ output_fast = tokenize_batch(self.tokenizer,
68
+ batch_tokens,
69
+ max_bpe_length=self.max_pieces,
70
+ max_bpe_pieces=self.max_pieces_per_token)
71
+ output_fast = {k: v[0] for k, v in output_fast.items()}
72
+ return output_fast
73
+
74
+ @overrides
75
+ def count_vocab_items(self, token: Token, counter: Dict[str, Dict[str, int]]):
76
+ # If we only use pretrained models, we don't need to do anything here.
77
+ pass
78
+
79
+ @overrides
80
+ def get_padding_token(self) -> int:
81
+ return 0
82
+
83
+ @overrides
84
+ def get_padding_lengths(self, token: int) -> Dict[str, int]: # pylint: disable=unused-argument
85
+ return {}
86
+
87
+ @overrides
88
+ def pad_token_sequence(self,
89
+ tokens: Dict[str, List[int]],
90
+ desired_num_tokens: Dict[str, int],
91
+ padding_lengths: Dict[str, int]) -> Dict[str, List[int]]: # pylint: disable=unused-argument
92
+ return {key: pad_sequence_to_length(val, desired_num_tokens[key])
93
+ for key, val in tokens.items()}
94
+
95
+ @overrides
96
+ def get_keys(self, index_name: str) -> List[str]:
97
+ """
98
+ We need to override this because the indexer generates multiple keys.
99
+ """
100
+ # pylint: disable=no-self-use
101
+ return [index_name, f"{index_name}-offsets", f"{index_name}-type-ids", "mask"]
102
+
103
+
104
+ class PretrainedBertIndexer(TokenizerIndexer):
105
+ # pylint: disable=line-too-long
106
+ """
107
+ A ``TokenIndexer`` corresponding to a pretrained BERT model.
108
+
109
+ Parameters
110
+ ----------
111
+ pretrained_model: ``str``
112
+ Either the name of the pretrained model to use (e.g. 'bert-base-uncased'),
113
+ or the path to the .txt file with its vocabulary.
114
+ If the name is a key in the list of pretrained models at
115
+ https://github.com/huggingface/pytorch-pretrained-BERT/blob/master/pytorch_pretrained_bert/tokenization.py#L33
116
+ the corresponding path will be used; otherwise it will be interpreted as a path or URL.
117
+ do_lowercase: ``bool``, optional (default = True)
118
+ Whether to lowercase the tokens before converting to wordpiece ids.
119
+ max_pieces: int, optional (default: 512)
120
+ The BERT embedder uses positional embeddings and so has a corresponding
121
+ maximum length for its input ids. Any inputs longer than this will
122
+ either be truncated (default), or be split apart and batched using a
123
+ sliding window.
124
+ """
125
+
126
+ def __init__(self,
127
+ pretrained_model: str,
128
+ do_lowercase: bool = True,
129
+ max_pieces: int = 512,
130
+ max_pieces_per_token: int = 5,
131
+ special_tokens_fix: int = 0) -> None:
132
+
133
+ if pretrained_model.endswith("-cased") and do_lowercase:
134
+ logger.warning("Your BERT model appears to be cased, "
135
+ "but your indexer is lowercasing tokens.")
136
+ elif pretrained_model.endswith("-uncased") and not do_lowercase:
137
+ logger.warning("Your BERT model appears to be uncased, "
138
+ "but your indexer is not lowercasing tokens.")
139
+
140
+ model_name = copy.deepcopy(pretrained_model)
141
+
142
+ model_tokenizer = AutoTokenizer.from_pretrained(
143
+ model_name, do_lower_case=do_lowercase, do_basic_tokenize=False, use_fast=True)
144
+
145
+ # to adjust all tokenizers
146
+ if hasattr(model_tokenizer, 'encoder'):
147
+ model_tokenizer.vocab = model_tokenizer.encoder
148
+ if hasattr(model_tokenizer, 'sp_model'):
149
+ model_tokenizer.vocab = defaultdict(lambda: 1)
150
+ for i in range(model_tokenizer.sp_model.get_piece_size()):
151
+ model_tokenizer.vocab[model_tokenizer.sp_model.id_to_piece(i)] = i
152
+
153
+ if special_tokens_fix:
154
+ model_tokenizer.add_tokens([START_TOKEN])
155
+ model_tokenizer.vocab[START_TOKEN] = len(model_tokenizer) - 1
156
+
157
+ super().__init__(tokenizer=model_tokenizer,
158
+ max_pieces=max_pieces,
159
+ max_pieces_per_token=max_pieces_per_token
160
+ )
161
+
gector/trainer.py ADDED
@@ -0,0 +1,845 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tweaked version of corresponding AllenNLP file"""
2
+ import datetime
3
+ import logging
4
+ import math
5
+ import os
6
+ import time
7
+ import traceback
8
+ from typing import Dict, Optional, List, Tuple, Union, Iterable, Any
9
+
10
+ import torch
11
+ import torch.optim.lr_scheduler
12
+ from allennlp.common import Params
13
+ from allennlp.common.checks import ConfigurationError, parse_cuda_device
14
+ from allennlp.common.tqdm import Tqdm
15
+ from allennlp.common.util import dump_metrics, gpu_memory_mb, peak_memory_mb, lazy_groups_of
16
+ from allennlp.data.instance import Instance
17
+ from allennlp.data.iterators.data_iterator import DataIterator, TensorDict
18
+ from allennlp.models.model import Model
19
+ from allennlp.nn import util as nn_util
20
+ from allennlp.training import util as training_util
21
+ from allennlp.training.checkpointer import Checkpointer
22
+ from allennlp.training.learning_rate_schedulers import LearningRateScheduler
23
+ from allennlp.training.metric_tracker import MetricTracker
24
+ from allennlp.training.momentum_schedulers import MomentumScheduler
25
+ from allennlp.training.moving_average import MovingAverage
26
+ from allennlp.training.optimizers import Optimizer
27
+ from allennlp.training.tensorboard_writer import TensorboardWriter
28
+ from allennlp.training.trainer_base import TrainerBase
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ class Trainer(TrainerBase):
34
+ def __init__(
35
+ self,
36
+ model: Model,
37
+ optimizer: torch.optim.Optimizer,
38
+ scheduler: torch.optim.lr_scheduler,
39
+ iterator: DataIterator,
40
+ train_dataset: Iterable[Instance],
41
+ validation_dataset: Optional[Iterable[Instance]] = None,
42
+ patience: Optional[int] = None,
43
+ validation_metric: str = "-loss",
44
+ validation_iterator: DataIterator = None,
45
+ shuffle: bool = True,
46
+ num_epochs: int = 20,
47
+ accumulated_batch_count: int = 1,
48
+ serialization_dir: Optional[str] = None,
49
+ num_serialized_models_to_keep: int = 20,
50
+ keep_serialized_model_every_num_seconds: int = None,
51
+ checkpointer: Checkpointer = None,
52
+ model_save_interval: float = None,
53
+ cuda_device: Union[int, List] = -1,
54
+ grad_norm: Optional[float] = None,
55
+ grad_clipping: Optional[float] = None,
56
+ learning_rate_scheduler: Optional[LearningRateScheduler] = None,
57
+ momentum_scheduler: Optional[MomentumScheduler] = None,
58
+ summary_interval: int = 100,
59
+ histogram_interval: int = None,
60
+ should_log_parameter_statistics: bool = True,
61
+ should_log_learning_rate: bool = False,
62
+ log_batch_size_period: Optional[int] = None,
63
+ moving_average: Optional[MovingAverage] = None,
64
+ cold_step_count: int = 0,
65
+ cold_lr: float = 1e-3,
66
+ cuda_verbose_step=None,
67
+ ) -> None:
68
+ """
69
+ A trainer for doing supervised learning. It just takes a labeled dataset
70
+ and a ``DataIterator``, and uses the supplied ``Optimizer`` to learn the weights
71
+ for your model over some fixed number of epochs. You can also pass in a validation
72
+ dataset and enable early stopping. There are many other bells and whistles as well.
73
+
74
+ Parameters
75
+ ----------
76
+ model : ``Model``, required.
77
+ An AllenNLP model to be optimized. Pytorch Modules can also be optimized if
78
+ their ``forward`` method returns a dictionary with a "loss" key, containing a
79
+ scalar tensor representing the loss function to be optimized.
80
+
81
+ If you are training your model using GPUs, your model should already be
82
+ on the correct device. (If you use `Trainer.from_params` this will be
83
+ handled for you.)
84
+ optimizer : ``torch.nn.Optimizer``, required.
85
+ An instance of a Pytorch Optimizer, instantiated with the parameters of the
86
+ model to be optimized.
87
+ iterator : ``DataIterator``, required.
88
+ A method for iterating over a ``Dataset``, yielding padded indexed batches.
89
+ train_dataset : ``Dataset``, required.
90
+ A ``Dataset`` to train on. The dataset should have already been indexed.
91
+ validation_dataset : ``Dataset``, optional, (default = None).
92
+ A ``Dataset`` to evaluate on. The dataset should have already been indexed.
93
+ patience : Optional[int] > 0, optional (default=None)
94
+ Number of epochs to be patient before early stopping: the training is stopped
95
+ after ``patience`` epochs with no improvement. If given, it must be ``> 0``.
96
+ If None, early stopping is disabled.
97
+ validation_metric : str, optional (default="loss")
98
+ Validation metric to measure for whether to stop training using patience
99
+ and whether to serialize an ``is_best`` model each epoch. The metric name
100
+ must be prepended with either "+" or "-", which specifies whether the metric
101
+ is an increasing or decreasing function.
102
+ validation_iterator : ``DataIterator``, optional (default=None)
103
+ An iterator to use for the validation set. If ``None``, then
104
+ use the training `iterator`.
105
+ shuffle: ``bool``, optional (default=True)
106
+ Whether to shuffle the instances in the iterator or not.
107
+ num_epochs : int, optional (default = 20)
108
+ Number of training epochs.
109
+ serialization_dir : str, optional (default=None)
110
+ Path to directory for saving and loading model files. Models will not be saved if
111
+ this parameter is not passed.
112
+ num_serialized_models_to_keep : ``int``, optional (default=20)
113
+ Number of previous model checkpoints to retain. Default is to keep 20 checkpoints.
114
+ A value of None or -1 means all checkpoints will be kept.
115
+ keep_serialized_model_every_num_seconds : ``int``, optional (default=None)
116
+ If num_serialized_models_to_keep is not None, then occasionally it's useful to
117
+ save models at a given interval in addition to the last num_serialized_models_to_keep.
118
+ To do so, specify keep_serialized_model_every_num_seconds as the number of seconds
119
+ between permanently saved checkpoints. Note that this option is only used if
120
+ num_serialized_models_to_keep is not None, otherwise all checkpoints are kept.
121
+ checkpointer : ``Checkpointer``, optional (default=None)
122
+ An instance of class Checkpointer to use instead of the default. If a checkpointer is specified,
123
+ the arguments num_serialized_models_to_keep and keep_serialized_model_every_num_seconds should
124
+ not be specified. The caller is responsible for initializing the checkpointer so that it is
125
+ consistent with serialization_dir.
126
+ model_save_interval : ``float``, optional (default=None)
127
+ If provided, then serialize models every ``model_save_interval``
128
+ seconds within single epochs. In all cases, models are also saved
129
+ at the end of every epoch if ``serialization_dir`` is provided.
130
+ cuda_device : ``Union[int, List[int]]``, optional (default = -1)
131
+ An integer or list of integers specifying the CUDA device(s) to use. If -1, the CPU is used.
132
+ grad_norm : ``float``, optional, (default = None).
133
+ If provided, gradient norms will be rescaled to have a maximum of this value.
134
+ grad_clipping : ``float``, optional (default = ``None``).
135
+ If provided, gradients will be clipped `during the backward pass` to have an (absolute)
136
+ maximum of this value. If you are getting ``NaNs`` in your gradients during training
137
+ that are not solved by using ``grad_norm``, you may need this.
138
+ learning_rate_scheduler : ``LearningRateScheduler``, optional (default = None)
139
+ If specified, the learning rate will be decayed with respect to
140
+ this schedule at the end of each epoch (or batch, if the scheduler implements
141
+ the ``step_batch`` method). If you use :class:`torch.optim.lr_scheduler.ReduceLROnPlateau`,
142
+ this will use the ``validation_metric`` provided to determine if learning has plateaued.
143
+ To support updating the learning rate on every batch, this can optionally implement
144
+ ``step_batch(batch_num_total)`` which updates the learning rate given the batch number.
145
+ momentum_scheduler : ``MomentumScheduler``, optional (default = None)
146
+ If specified, the momentum will be updated at the end of each batch or epoch
147
+ according to the schedule.
148
+ summary_interval: ``int``, optional, (default = 100)
149
+ Number of batches between logging scalars to tensorboard
150
+ histogram_interval : ``int``, optional, (default = ``None``)
151
+ If not None, then log histograms to tensorboard every ``histogram_interval`` batches.
152
+ When this parameter is specified, the following additional logging is enabled:
153
+ * Histograms of model parameters
154
+ * The ratio of parameter update norm to parameter norm
155
+ * Histogram of layer activations
156
+ We log histograms of the parameters returned by
157
+ ``model.get_parameters_for_histogram_tensorboard_logging``.
158
+ The layer activations are logged for any modules in the ``Model`` that have
159
+ the attribute ``should_log_activations`` set to ``True``. Logging
160
+ histograms requires a number of GPU-CPU copies during training and is typically
161
+ slow, so we recommend logging histograms relatively infrequently.
162
+ Note: only Modules that return tensors, tuples of tensors or dicts
163
+ with tensors as values currently support activation logging.
164
+ should_log_parameter_statistics : ``bool``, optional, (default = True)
165
+ Whether to send parameter statistics (mean and standard deviation
166
+ of parameters and gradients) to tensorboard.
167
+ should_log_learning_rate : ``bool``, optional, (default = False)
168
+ Whether to send parameter specific learning rate to tensorboard.
169
+ log_batch_size_period : ``int``, optional, (default = ``None``)
170
+ If defined, how often to log the average batch size.
171
+ moving_average: ``MovingAverage``, optional, (default = None)
172
+ If provided, we will maintain moving averages for all parameters. During training, we
173
+ employ a shadow variable for each parameter, which maintains the moving average. During
174
+ evaluation, we backup the original parameters and assign the moving averages to corresponding
175
+ parameters. Be careful that when saving the checkpoint, we will save the moving averages of
176
+ parameters. This is necessary because we want the saved model to perform as well as the validated
177
+ model if we load it later. But this may cause problems if you restart the training from checkpoint.
178
+ """
179
+ super().__init__(serialization_dir, cuda_device)
180
+
181
+ # I am not calling move_to_gpu here, because if the model is
182
+ # not already on the GPU then the optimizer is going to be wrong.
183
+ self.model = model
184
+
185
+ self.iterator = iterator
186
+ self._validation_iterator = validation_iterator
187
+ self.shuffle = shuffle
188
+ self.optimizer = optimizer
189
+ self.scheduler = scheduler
190
+ self.train_data = train_dataset
191
+ self._validation_data = validation_dataset
192
+ self.accumulated_batch_count = accumulated_batch_count
193
+ self.cold_step_count = cold_step_count
194
+ self.cold_lr = cold_lr
195
+ self.cuda_verbose_step = cuda_verbose_step
196
+
197
+ if patience is None: # no early stopping
198
+ if validation_dataset:
199
+ logger.warning(
200
+ "You provided a validation dataset but patience was set to None, "
201
+ "meaning that early stopping is disabled"
202
+ )
203
+ elif (not isinstance(patience, int)) or patience <= 0:
204
+ raise ConfigurationError(
205
+ '{} is an invalid value for "patience": it must be a positive integer '
206
+ "or None (if you want to disable early stopping)".format(patience)
207
+ )
208
+
209
+ # For tracking is_best_so_far and should_stop_early
210
+ self._metric_tracker = MetricTracker(patience, validation_metric)
211
+ # Get rid of + or -
212
+ self._validation_metric = validation_metric[1:]
213
+
214
+ self._num_epochs = num_epochs
215
+
216
+ if checkpointer is not None:
217
+ # We can't easily check if these parameters were passed in, so check against their default values.
218
+ # We don't check against serialization_dir since it is also used by the parent class.
219
+ if num_serialized_models_to_keep != 20 \
220
+ or keep_serialized_model_every_num_seconds is not None:
221
+ raise ConfigurationError(
222
+ "When passing a custom Checkpointer, you may not also pass in separate checkpointer "
223
+ "args 'num_serialized_models_to_keep' or 'keep_serialized_model_every_num_seconds'."
224
+ )
225
+ self._checkpointer = checkpointer
226
+ else:
227
+ self._checkpointer = Checkpointer(
228
+ serialization_dir,
229
+ keep_serialized_model_every_num_seconds,
230
+ num_serialized_models_to_keep,
231
+ )
232
+
233
+ self._model_save_interval = model_save_interval
234
+
235
+ self._grad_norm = grad_norm
236
+ self._grad_clipping = grad_clipping
237
+
238
+ self._learning_rate_scheduler = learning_rate_scheduler
239
+ self._momentum_scheduler = momentum_scheduler
240
+ self._moving_average = moving_average
241
+
242
+ # We keep the total batch number as an instance variable because it
243
+ # is used inside a closure for the hook which logs activations in
244
+ # ``_enable_activation_logging``.
245
+ self._batch_num_total = 0
246
+
247
+ self._tensorboard = TensorboardWriter(
248
+ get_batch_num_total=lambda: self._batch_num_total,
249
+ serialization_dir=serialization_dir,
250
+ summary_interval=summary_interval,
251
+ histogram_interval=histogram_interval,
252
+ should_log_parameter_statistics=should_log_parameter_statistics,
253
+ should_log_learning_rate=should_log_learning_rate,
254
+ )
255
+
256
+ self._log_batch_size_period = log_batch_size_period
257
+
258
+ self._last_log = 0.0 # time of last logging
259
+
260
+ # Enable activation logging.
261
+ if histogram_interval is not None:
262
+ self._tensorboard.enable_activation_logging(self.model)
263
+
264
+ def rescale_gradients(self) -> Optional[float]:
265
+ return training_util.rescale_gradients(self.model, self._grad_norm)
266
+
267
+ def batch_loss(self, batch_group: List[TensorDict], for_training: bool) -> torch.Tensor:
268
+ """
269
+ Does a forward pass on the given batches and returns the ``loss`` value in the result.
270
+ If ``for_training`` is `True` also applies regularization penalty.
271
+ """
272
+ if self._multiple_gpu:
273
+ output_dict = training_util.data_parallel(batch_group, self.model, self._cuda_devices)
274
+ else:
275
+ assert len(batch_group) == 1
276
+ batch = batch_group[0]
277
+ batch = nn_util.move_to_device(batch, self._cuda_devices[0])
278
+ output_dict = self.model(**batch)
279
+
280
+ try:
281
+ loss = output_dict["loss"]
282
+ if for_training:
283
+ loss += self.model.get_regularization_penalty()
284
+ except KeyError:
285
+ if for_training:
286
+ raise RuntimeError(
287
+ "The model you are trying to optimize does not contain a"
288
+ " 'loss' key in the output of model.forward(inputs)."
289
+ )
290
+ loss = None
291
+
292
+ return loss
293
+
294
+ def _train_epoch(self, epoch: int) -> Dict[str, float]:
295
+ """
296
+ Trains one epoch and returns metrics.
297
+ """
298
+ logger.info("Epoch %d/%d", epoch, self._num_epochs - 1)
299
+ peak_cpu_usage = peak_memory_mb()
300
+ logger.info(f"Peak CPU memory usage MB: {peak_cpu_usage}")
301
+ gpu_usage = []
302
+ for gpu, memory in gpu_memory_mb().items():
303
+ gpu_usage.append((gpu, memory))
304
+ logger.info(f"GPU {gpu} memory usage MB: {memory}")
305
+
306
+ train_loss = 0.0
307
+ # Set the model to "train" mode.
308
+ self.model.train()
309
+
310
+ num_gpus = len(self._cuda_devices)
311
+
312
+ # Get tqdm for the training batches
313
+ raw_train_generator = self.iterator(self.train_data, num_epochs=1, shuffle=self.shuffle)
314
+ train_generator = lazy_groups_of(raw_train_generator, num_gpus)
315
+ num_training_batches = math.ceil(self.iterator.get_num_batches(self.train_data) / num_gpus)
316
+ residue = num_training_batches % self.accumulated_batch_count
317
+ self._last_log = time.time()
318
+ last_save_time = time.time()
319
+
320
+ batches_this_epoch = 0
321
+ if self._batch_num_total is None:
322
+ self._batch_num_total = 0
323
+
324
+ histogram_parameters = set(self.model.get_parameters_for_histogram_tensorboard_logging())
325
+
326
+ logger.info("Training")
327
+ train_generator_tqdm = Tqdm.tqdm(train_generator, total=num_training_batches)
328
+ cumulative_batch_size = 0
329
+ self.optimizer.zero_grad()
330
+ for batch_group in train_generator_tqdm:
331
+ batches_this_epoch += 1
332
+ self._batch_num_total += 1
333
+ batch_num_total = self._batch_num_total
334
+
335
+ iter_len = self.accumulated_batch_count \
336
+ if batches_this_epoch <= (num_training_batches - residue) else residue
337
+
338
+ if self.cuda_verbose_step is not None and batch_num_total % self.cuda_verbose_step == 0:
339
+ print(f'Before forward pass - Cuda memory allocated: {torch.cuda.memory_allocated() / 1e9}')
340
+ print(f'Before forward pass - Cuda memory cached: {torch.cuda.memory_cached() / 1e9}')
341
+ try:
342
+ loss = self.batch_loss(batch_group, for_training=True) / iter_len
343
+ except RuntimeError as e:
344
+ print(e)
345
+ for x in batch_group:
346
+ all_words = [len(y['words']) for y in x['metadata']]
347
+ print(f"Total sents: {len(all_words)}. "
348
+ f"Min {min(all_words)}. Max {max(all_words)}")
349
+ for elem in ['labels', 'd_tags']:
350
+ tt = x[elem]
351
+ print(
352
+ f"{elem} shape {list(tt.shape)} and min {tt.min().item()} and {tt.max().item()}")
353
+ for elem in ["bert", "mask", "bert-offsets"]:
354
+ tt = x['tokens'][elem]
355
+ print(
356
+ f"{elem} shape {list(tt.shape)} and min {tt.min().item()} and {tt.max().item()}")
357
+ raise e
358
+
359
+ if self.cuda_verbose_step is not None and batch_num_total % self.cuda_verbose_step == 0:
360
+ print(f'After forward pass - Cuda memory allocated: {torch.cuda.memory_allocated() / 1e9}')
361
+ print(f'After forward pass - Cuda memory cached: {torch.cuda.memory_cached() / 1e9}')
362
+
363
+ if torch.isnan(loss):
364
+ raise ValueError("nan loss encountered")
365
+
366
+ loss.backward()
367
+
368
+ if self.cuda_verbose_step is not None and batch_num_total % self.cuda_verbose_step == 0:
369
+ print(f'After backprop - Cuda memory allocated: {torch.cuda.memory_allocated() / 1e9}')
370
+ print(f'After backprop - Cuda memory cached: {torch.cuda.memory_cached() / 1e9}')
371
+
372
+ train_loss += loss.item() * iter_len
373
+
374
+ del batch_group, loss
375
+ torch.cuda.empty_cache()
376
+
377
+ if self.cuda_verbose_step is not None and batch_num_total % self.cuda_verbose_step == 0:
378
+ print(f'After collecting garbage - Cuda memory allocated: {torch.cuda.memory_allocated() / 1e9}')
379
+ print(f'After collecting garbage - Cuda memory cached: {torch.cuda.memory_cached() / 1e9}')
380
+
381
+ batch_grad_norm = self.rescale_gradients()
382
+
383
+ # This does nothing if batch_num_total is None or you are using a
384
+ # scheduler which doesn't update per batch.
385
+ if self._learning_rate_scheduler:
386
+ self._learning_rate_scheduler.step_batch(batch_num_total)
387
+ if self._momentum_scheduler:
388
+ self._momentum_scheduler.step_batch(batch_num_total)
389
+
390
+ if self._tensorboard.should_log_histograms_this_batch():
391
+ # get the magnitude of parameter updates for logging
392
+ # We need a copy of current parameters to compute magnitude of updates,
393
+ # and copy them to CPU so large models won't go OOM on the GPU.
394
+ param_updates = {
395
+ name: param.detach().cpu().clone()
396
+ for name, param in self.model.named_parameters()
397
+ }
398
+ if batches_this_epoch % self.accumulated_batch_count == 0 or \
399
+ batches_this_epoch == num_training_batches:
400
+ self.optimizer.step()
401
+ self.optimizer.zero_grad()
402
+ for name, param in self.model.named_parameters():
403
+ param_updates[name].sub_(param.detach().cpu())
404
+ update_norm = torch.norm(param_updates[name].view(-1))
405
+ param_norm = torch.norm(param.view(-1)).cpu()
406
+ self._tensorboard.add_train_scalar(
407
+ "gradient_update/" + name, update_norm / (param_norm + 1e-7)
408
+ )
409
+ else:
410
+ if batches_this_epoch % self.accumulated_batch_count == 0 or \
411
+ batches_this_epoch == num_training_batches:
412
+ self.optimizer.step()
413
+ self.optimizer.zero_grad()
414
+
415
+ # Update moving averages
416
+ if self._moving_average is not None:
417
+ self._moving_average.apply(batch_num_total)
418
+
419
+ # Update the description with the latest metrics
420
+ metrics = training_util.get_metrics(self.model, train_loss, batches_this_epoch)
421
+ description = training_util.description_from_metrics(metrics)
422
+
423
+ train_generator_tqdm.set_description(description, refresh=False)
424
+
425
+ # Log parameter values to Tensorboard
426
+ if self._tensorboard.should_log_this_batch():
427
+ self._tensorboard.log_parameter_and_gradient_statistics(self.model, batch_grad_norm)
428
+ self._tensorboard.log_learning_rates(self.model, self.optimizer)
429
+
430
+ self._tensorboard.add_train_scalar("loss/loss_train", metrics["loss"])
431
+ self._tensorboard.log_metrics({"epoch_metrics/" + k: v for k, v in metrics.items()})
432
+
433
+ if self._tensorboard.should_log_histograms_this_batch():
434
+ self._tensorboard.log_histograms(self.model, histogram_parameters)
435
+
436
+ if self._log_batch_size_period:
437
+ cur_batch = sum([training_util.get_batch_size(batch) for batch in batch_group])
438
+ cumulative_batch_size += cur_batch
439
+ if (batches_this_epoch - 1) % self._log_batch_size_period == 0:
440
+ average = cumulative_batch_size / batches_this_epoch
441
+ logger.info(f"current batch size: {cur_batch} mean batch size: {average}")
442
+ self._tensorboard.add_train_scalar("current_batch_size", cur_batch)
443
+ self._tensorboard.add_train_scalar("mean_batch_size", average)
444
+
445
+ # Save model if needed.
446
+ if self._model_save_interval is not None and (
447
+ time.time() - last_save_time > self._model_save_interval
448
+ ):
449
+ last_save_time = time.time()
450
+ self._save_checkpoint(
451
+ "{0}.{1}".format(epoch, training_util.time_to_str(int(last_save_time)))
452
+ )
453
+
454
+ metrics = training_util.get_metrics(self.model, train_loss, batches_this_epoch, reset=True)
455
+ metrics["cpu_memory_MB"] = peak_cpu_usage
456
+ for (gpu_num, memory) in gpu_usage:
457
+ metrics["gpu_" + str(gpu_num) + "_memory_MB"] = memory
458
+ return metrics
459
+
460
+ def _validation_loss(self) -> Tuple[float, int]:
461
+ """
462
+ Computes the validation loss. Returns it and the number of batches.
463
+ """
464
+ logger.info("Validating")
465
+
466
+ self.model.eval()
467
+
468
+ # Replace parameter values with the shadow values from the moving averages.
469
+ if self._moving_average is not None:
470
+ self._moving_average.assign_average_value()
471
+
472
+ if self._validation_iterator is not None:
473
+ val_iterator = self._validation_iterator
474
+ else:
475
+ val_iterator = self.iterator
476
+
477
+ num_gpus = len(self._cuda_devices)
478
+
479
+ raw_val_generator = val_iterator(self._validation_data, num_epochs=1, shuffle=False)
480
+ val_generator = lazy_groups_of(raw_val_generator, num_gpus)
481
+ num_validation_batches = math.ceil(
482
+ val_iterator.get_num_batches(self._validation_data) / num_gpus
483
+ )
484
+ val_generator_tqdm = Tqdm.tqdm(val_generator, total=num_validation_batches)
485
+ batches_this_epoch = 0
486
+ val_loss = 0
487
+ for batch_group in val_generator_tqdm:
488
+
489
+ loss = self.batch_loss(batch_group, for_training=False)
490
+ if loss is not None:
491
+ # You shouldn't necessarily have to compute a loss for validation, so we allow for
492
+ # `loss` to be None. We need to be careful, though - `batches_this_epoch` is
493
+ # currently only used as the divisor for the loss function, so we can safely only
494
+ # count those batches for which we actually have a loss. If this variable ever
495
+ # gets used for something else, we might need to change things around a bit.
496
+ batches_this_epoch += 1
497
+ val_loss += loss.detach().cpu().numpy()
498
+
499
+ # Update the description with the latest metrics
500
+ val_metrics = training_util.get_metrics(self.model, val_loss, batches_this_epoch)
501
+ description = training_util.description_from_metrics(val_metrics)
502
+ val_generator_tqdm.set_description(description, refresh=False)
503
+
504
+ # Now restore the original parameter values.
505
+ if self._moving_average is not None:
506
+ self._moving_average.restore()
507
+
508
+ return val_loss, batches_this_epoch
509
+
510
+ def train(self) -> Dict[str, Any]:
511
+ """
512
+ Trains the supplied model with the supplied parameters.
513
+ """
514
+ try:
515
+ epoch_counter = self._restore_checkpoint()
516
+ except RuntimeError:
517
+ traceback.print_exc()
518
+ raise ConfigurationError(
519
+ "Could not recover training from the checkpoint. Did you mean to output to "
520
+ "a different serialization directory or delete the existing serialization "
521
+ "directory?"
522
+ )
523
+
524
+ training_util.enable_gradient_clipping(self.model, self._grad_clipping)
525
+
526
+ logger.info("Beginning training.")
527
+
528
+ train_metrics: Dict[str, float] = {}
529
+ val_metrics: Dict[str, float] = {}
530
+ this_epoch_val_metric: float = None
531
+ metrics: Dict[str, Any] = {}
532
+ epochs_trained = 0
533
+ training_start_time = time.time()
534
+
535
+ if self.cold_step_count > 0:
536
+ base_lr = self.optimizer.param_groups[0]['lr']
537
+ for param_group in self.optimizer.param_groups:
538
+ param_group['lr'] = self.cold_lr
539
+ self.model.text_field_embedder._token_embedders['bert'].set_weights(freeze=True)
540
+
541
+ metrics["best_epoch"] = self._metric_tracker.best_epoch
542
+ for key, value in self._metric_tracker.best_epoch_metrics.items():
543
+ metrics["best_validation_" + key] = value
544
+
545
+ for epoch in range(epoch_counter, self._num_epochs):
546
+ if epoch == self.cold_step_count and epoch != 0:
547
+ for param_group in self.optimizer.param_groups:
548
+ param_group['lr'] = base_lr
549
+ self.model.text_field_embedder._token_embedders['bert'].set_weights(freeze=False)
550
+
551
+ epoch_start_time = time.time()
552
+ train_metrics = self._train_epoch(epoch)
553
+
554
+ # get peak of memory usage
555
+ if "cpu_memory_MB" in train_metrics:
556
+ metrics["peak_cpu_memory_MB"] = max(
557
+ metrics.get("peak_cpu_memory_MB", 0), train_metrics["cpu_memory_MB"]
558
+ )
559
+ for key, value in train_metrics.items():
560
+ if key.startswith("gpu_"):
561
+ metrics["peak_" + key] = max(metrics.get("peak_" + key, 0), value)
562
+
563
+ # clear cache before validation
564
+ torch.cuda.empty_cache()
565
+ if self._validation_data is not None:
566
+ with torch.no_grad():
567
+ # We have a validation set, so compute all the metrics on it.
568
+ val_loss, num_batches = self._validation_loss()
569
+ val_metrics = training_util.get_metrics(
570
+ self.model, val_loss, num_batches, reset=True
571
+ )
572
+
573
+ # Check validation metric for early stopping
574
+ this_epoch_val_metric = val_metrics[self._validation_metric]
575
+ self._metric_tracker.add_metric(this_epoch_val_metric)
576
+
577
+ if self._metric_tracker.should_stop_early():
578
+ logger.info("Ran out of patience. Stopping training.")
579
+ break
580
+
581
+ self._tensorboard.log_metrics(
582
+ train_metrics, val_metrics=val_metrics, log_to_console=True, epoch=epoch + 1
583
+ ) # +1 because tensorboard doesn't like 0
584
+
585
+ # Create overall metrics dict
586
+ training_elapsed_time = time.time() - training_start_time
587
+ metrics["training_duration"] = str(datetime.timedelta(seconds=training_elapsed_time))
588
+ metrics["training_start_epoch"] = epoch_counter
589
+ metrics["training_epochs"] = epochs_trained
590
+ metrics["epoch"] = epoch
591
+
592
+ for key, value in train_metrics.items():
593
+ metrics["training_" + key] = value
594
+ for key, value in val_metrics.items():
595
+ metrics["validation_" + key] = value
596
+
597
+ # if self.cold_step_count <= epoch:
598
+ self.scheduler.step(metrics['validation_loss'])
599
+
600
+ if self._metric_tracker.is_best_so_far():
601
+ # Update all the best_ metrics.
602
+ # (Otherwise they just stay the same as they were.)
603
+ metrics["best_epoch"] = epoch
604
+ for key, value in val_metrics.items():
605
+ metrics["best_validation_" + key] = value
606
+
607
+ self._metric_tracker.best_epoch_metrics = val_metrics
608
+
609
+ if self._serialization_dir:
610
+ dump_metrics(
611
+ os.path.join(self._serialization_dir, f"metrics_epoch_{epoch}.json"), metrics
612
+ )
613
+
614
+ # The Scheduler API is agnostic to whether your schedule requires a validation metric -
615
+ # if it doesn't, the validation metric passed here is ignored.
616
+ if self._learning_rate_scheduler:
617
+ self._learning_rate_scheduler.step(this_epoch_val_metric, epoch)
618
+ if self._momentum_scheduler:
619
+ self._momentum_scheduler.step(this_epoch_val_metric, epoch)
620
+
621
+ self._save_checkpoint(epoch)
622
+
623
+ epoch_elapsed_time = time.time() - epoch_start_time
624
+ logger.info("Epoch duration: %s", datetime.timedelta(seconds=epoch_elapsed_time))
625
+
626
+ if epoch < self._num_epochs - 1:
627
+ training_elapsed_time = time.time() - training_start_time
628
+ estimated_time_remaining = training_elapsed_time * (
629
+ (self._num_epochs - epoch_counter) / float(epoch - epoch_counter + 1) - 1
630
+ )
631
+ formatted_time = str(datetime.timedelta(seconds=int(estimated_time_remaining)))
632
+ logger.info("Estimated training time remaining: %s", formatted_time)
633
+
634
+ epochs_trained += 1
635
+
636
+ # make sure pending events are flushed to disk and files are closed properly
637
+ # self._tensorboard.close()
638
+
639
+ # Load the best model state before returning
640
+ best_model_state = self._checkpointer.best_model_state()
641
+ if best_model_state:
642
+ self.model.load_state_dict(best_model_state)
643
+
644
+ return metrics
645
+
646
+ def _save_checkpoint(self, epoch: Union[int, str]) -> None:
647
+ """
648
+ Saves a checkpoint of the model to self._serialization_dir.
649
+ Is a no-op if self._serialization_dir is None.
650
+
651
+ Parameters
652
+ ----------
653
+ epoch : Union[int, str], required.
654
+ The epoch of training. If the checkpoint is saved in the middle
655
+ of an epoch, the parameter is a string with the epoch and timestamp.
656
+ """
657
+ # If moving averages are used for parameters, we save
658
+ # the moving average values into checkpoint, instead of the current values.
659
+ if self._moving_average is not None:
660
+ self._moving_average.assign_average_value()
661
+
662
+ # These are the training states we need to persist.
663
+ training_states = {
664
+ "metric_tracker": self._metric_tracker.state_dict(),
665
+ "optimizer": self.optimizer.state_dict(),
666
+ "batch_num_total": self._batch_num_total,
667
+ }
668
+
669
+ # If we have a learning rate or momentum scheduler, we should persist them too.
670
+ if self._learning_rate_scheduler is not None:
671
+ training_states["learning_rate_scheduler"] = self._learning_rate_scheduler.state_dict()
672
+ if self._momentum_scheduler is not None:
673
+ training_states["momentum_scheduler"] = self._momentum_scheduler.state_dict()
674
+
675
+ self._checkpointer.save_checkpoint(
676
+ model_state=self.model.state_dict(),
677
+ epoch=epoch,
678
+ training_states=training_states,
679
+ is_best_so_far=self._metric_tracker.is_best_so_far(),
680
+ )
681
+
682
+ # Restore the original values for parameters so that training will not be affected.
683
+ if self._moving_average is not None:
684
+ self._moving_average.restore()
685
+
686
+ def _restore_checkpoint(self) -> int:
687
+ """
688
+ Restores the model and training state from the last saved checkpoint.
689
+ This includes an epoch count and optimizer state, which is serialized separately
690
+ from model parameters. This function should only be used to continue training -
691
+ if you wish to load a model for inference/load parts of a model into a new
692
+ computation graph, you should use the native Pytorch functions:
693
+ `` model.load_state_dict(torch.load("/path/to/model/weights.th"))``
694
+
695
+ If ``self._serialization_dir`` does not exist or does not contain any checkpointed weights,
696
+ this function will do nothing and return 0.
697
+
698
+ Returns
699
+ -------
700
+ epoch: int
701
+ The epoch at which to resume training, which should be one after the epoch
702
+ in the saved training state.
703
+ """
704
+ model_state, training_state = self._checkpointer.restore_checkpoint()
705
+
706
+ if not training_state:
707
+ # No checkpoint to restore, start at 0
708
+ return 0
709
+
710
+ self.model.load_state_dict(model_state)
711
+ self.optimizer.load_state_dict(training_state["optimizer"])
712
+ if self._learning_rate_scheduler is not None \
713
+ and "learning_rate_scheduler" in training_state:
714
+ self._learning_rate_scheduler.load_state_dict(training_state["learning_rate_scheduler"])
715
+ if self._momentum_scheduler is not None and "momentum_scheduler" in training_state:
716
+ self._momentum_scheduler.load_state_dict(training_state["momentum_scheduler"])
717
+ training_util.move_optimizer_to_cuda(self.optimizer)
718
+
719
+ # Currently the ``training_state`` contains a serialized ``MetricTracker``.
720
+ if "metric_tracker" in training_state:
721
+ self._metric_tracker.load_state_dict(training_state["metric_tracker"])
722
+ # It used to be the case that we tracked ``val_metric_per_epoch``.
723
+ elif "val_metric_per_epoch" in training_state:
724
+ self._metric_tracker.clear()
725
+ self._metric_tracker.add_metrics(training_state["val_metric_per_epoch"])
726
+ # And before that we didn't track anything.
727
+ else:
728
+ self._metric_tracker.clear()
729
+
730
+ if isinstance(training_state["epoch"], int):
731
+ epoch_to_return = training_state["epoch"] + 1
732
+ else:
733
+ epoch_to_return = int(training_state["epoch"].split(".")[0]) + 1
734
+
735
+ # For older checkpoints with batch_num_total missing, default to old behavior where
736
+ # it is unchanged.
737
+ batch_num_total = training_state.get("batch_num_total")
738
+ if batch_num_total is not None:
739
+ self._batch_num_total = batch_num_total
740
+
741
+ return epoch_to_return
742
+
743
+ # Requires custom from_params.
744
+ @classmethod
745
+ def from_params( # type: ignore
746
+ cls,
747
+ model: Model,
748
+ serialization_dir: str,
749
+ iterator: DataIterator,
750
+ train_data: Iterable[Instance],
751
+ validation_data: Optional[Iterable[Instance]],
752
+ params: Params,
753
+ validation_iterator: DataIterator = None,
754
+ ) -> "Trainer":
755
+
756
+ patience = params.pop_int("patience", None)
757
+ validation_metric = params.pop("validation_metric", "-loss")
758
+ shuffle = params.pop_bool("shuffle", True)
759
+ num_epochs = params.pop_int("num_epochs", 20)
760
+ cuda_device = parse_cuda_device(params.pop("cuda_device", -1))
761
+ grad_norm = params.pop_float("grad_norm", None)
762
+ grad_clipping = params.pop_float("grad_clipping", None)
763
+ lr_scheduler_params = params.pop("learning_rate_scheduler", None)
764
+ momentum_scheduler_params = params.pop("momentum_scheduler", None)
765
+
766
+ if isinstance(cuda_device, list):
767
+ model_device = cuda_device[0]
768
+ else:
769
+ model_device = cuda_device
770
+ if model_device >= 0:
771
+ # Moving model to GPU here so that the optimizer state gets constructed on
772
+ # the right device.
773
+ model = model.cuda(model_device)
774
+
775
+ parameters = [[n, p] for n, p in model.named_parameters() if p.requires_grad]
776
+ optimizer = Optimizer.from_params(parameters, params.pop("optimizer"))
777
+ if "moving_average" in params:
778
+ moving_average = MovingAverage.from_params(
779
+ params.pop("moving_average"), parameters=parameters
780
+ )
781
+ else:
782
+ moving_average = None
783
+
784
+ if lr_scheduler_params:
785
+ lr_scheduler = LearningRateScheduler.from_params(optimizer, lr_scheduler_params)
786
+ else:
787
+ lr_scheduler = None
788
+ if momentum_scheduler_params:
789
+ momentum_scheduler = MomentumScheduler.from_params(optimizer, momentum_scheduler_params)
790
+ else:
791
+ momentum_scheduler = None
792
+
793
+ if "checkpointer" in params:
794
+ if "keep_serialized_model_every_num_seconds" in params \
795
+ or "num_serialized_models_to_keep" in params:
796
+ raise ConfigurationError(
797
+ "Checkpointer may be initialized either from the 'checkpointer' key or from the "
798
+ "keys 'num_serialized_models_to_keep' and 'keep_serialized_model_every_num_seconds'"
799
+ " but the passed config uses both methods."
800
+ )
801
+ checkpointer = Checkpointer.from_params(params.pop("checkpointer"))
802
+ else:
803
+ num_serialized_models_to_keep = params.pop_int("num_serialized_models_to_keep", 20)
804
+ keep_serialized_model_every_num_seconds = params.pop_int(
805
+ "keep_serialized_model_every_num_seconds", None
806
+ )
807
+ checkpointer = Checkpointer(
808
+ serialization_dir=serialization_dir,
809
+ num_serialized_models_to_keep=num_serialized_models_to_keep,
810
+ keep_serialized_model_every_num_seconds=keep_serialized_model_every_num_seconds,
811
+ )
812
+ model_save_interval = params.pop_float("model_save_interval", None)
813
+ summary_interval = params.pop_int("summary_interval", 100)
814
+ histogram_interval = params.pop_int("histogram_interval", None)
815
+ should_log_parameter_statistics = params.pop_bool("should_log_parameter_statistics", True)
816
+ should_log_learning_rate = params.pop_bool("should_log_learning_rate", False)
817
+ log_batch_size_period = params.pop_int("log_batch_size_period", None)
818
+
819
+ params.assert_empty(cls.__name__)
820
+ return cls(
821
+ model,
822
+ optimizer,
823
+ iterator,
824
+ train_data,
825
+ validation_data,
826
+ patience=patience,
827
+ validation_metric=validation_metric,
828
+ validation_iterator=validation_iterator,
829
+ shuffle=shuffle,
830
+ num_epochs=num_epochs,
831
+ serialization_dir=serialization_dir,
832
+ cuda_device=cuda_device,
833
+ grad_norm=grad_norm,
834
+ grad_clipping=grad_clipping,
835
+ learning_rate_scheduler=lr_scheduler,
836
+ momentum_scheduler=momentum_scheduler,
837
+ checkpointer=checkpointer,
838
+ model_save_interval=model_save_interval,
839
+ summary_interval=summary_interval,
840
+ histogram_interval=histogram_interval,
841
+ should_log_parameter_statistics=should_log_parameter_statistics,
842
+ should_log_learning_rate=should_log_learning_rate,
843
+ log_batch_size_period=log_batch_size_period,
844
+ moving_average=moving_average,
845
+ )
output_vocabulary/d_tags.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ CORRECT
2
+ INCORRECT
3
+ @@UNKNOWN@@
4
+ @@PADDING@@
output_vocabulary/labels.txt ADDED
@@ -0,0 +1,5002 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $KEEP
2
+ $DELETE
3
+ $TRANSFORM_CASE_CAPITAL
4
+ $APPEND_the
5
+ $APPEND_,
6
+ $APPEND_a
7
+ $TRANSFORM_VERB_VB_VBZ
8
+ $TRANSFORM_AGREEMENT_PLURAL
9
+ $TRANSFORM_CASE_LOWER
10
+ $TRANSFORM_VERB_VB_VBN
11
+ $REPLACE_the
12
+ $REPLACE_a
13
+ $REPLACE_to
14
+ $TRANSFORM_VERB_VB_VBG
15
+ $REPLACE_.
16
+ $APPEND_to
17
+ $REPLACE_,
18
+ $REPLACE_in
19
+ $REPLACE_was
20
+ $TRANSFORM_VERB_VBZ_VB
21
+ $TRANSFORM_AGREEMENT_SINGULAR
22
+ $APPEND_I
23
+ $APPEND_.
24
+ $REPLACE_for
25
+ $REPLACE_I
26
+ $APPEND_(
27
+ $TRANSFORM_VERB_VBG_VB
28
+ $REPLACE_is
29
+ $REPLACE_have
30
+ $REPLACE_on
31
+ $REPLACE_are
32
+ $REPLACE_of
33
+ $REPLACE_it
34
+ $TRANSFORM_VERB_VBN_VB
35
+ $REPLACE_that
36
+ $APPEND_in
37
+ $REPLACE_and
38
+ $APPEND_it
39
+ $APPEND_that
40
+ $REPLACE_at
41
+ $APPEND_for
42
+ $APPEND_of
43
+ $APPEND_and
44
+ $REPLACE_an
45
+ $REPLACE_my
46
+ $REPLACE_had
47
+ $APPEND_have
48
+ $APPEND_an
49
+ $REPLACE_has
50
+ $APPEND_my
51
+ $APPEND_is
52
+ $APPEND_The
53
+ $APPEND_will
54
+ $REPLACE_with
55
+ $REPLACE_were
56
+ $REPLACE_be
57
+ $TRANSFORM_VERB_VBN_VBG
58
+ $APPEND_``
59
+ $REPLACE_do
60
+ $TRANSFORM_VERB_VBG_VBN
61
+ $REPLACE_this
62
+ $REPLACE_will
63
+ $TRANSFORM_VERB_VB_VBD
64
+ $APPEND_was
65
+ $REPLACE_n't
66
+ $APPEND_about
67
+ $REPLACE_from
68
+ $REPLACE_about
69
+ $REPLACE_It
70
+ $APPEND_on
71
+ $REPLACE_would
72
+ $MERGE_SPACE
73
+ $APPEND_at
74
+ $APPEND_'s
75
+ $REPLACE_as
76
+ $REPLACE_'s
77
+ $REPLACE_could
78
+ $APPEND_with
79
+ $REPLACE_did
80
+ $REPLACE_them
81
+ $REPLACE_The
82
+ $REPLACE_by
83
+ $REPLACE_so
84
+ $REPLACE_not
85
+ $REPLACE_can
86
+ $APPEND_am
87
+ $APPEND_be
88
+ $REPLACE_because
89
+ $APPEND_/
90
+ $REPLACE_they
91
+ $REPLACE_am
92
+ $APPEND_are
93
+ $TRANSFORM_VERB_VBZ_VBN
94
+ $REPLACE_'m
95
+ $REPLACE_their
96
+ $TRANSFORM_VERB_VBN_VBZ
97
+ $APPEND_had
98
+ $APPEND_would
99
+ $APPEND_-
100
+ $REPLACE_(
101
+ $TRANSFORM_VERB_VBN_VBD
102
+ $REPLACE_very
103
+ $REPLACE_people
104
+ $REPLACE_get
105
+ $REPLACE_there
106
+ $REPLACE_?
107
+ $APPEND_do
108
+ $REPLACE_;
109
+ $REPLACE_me
110
+ $REPLACE_one
111
+ $REPLACE_been
112
+ $APPEND_so
113
+ $APPEND_)
114
+ $APPEND_'m
115
+ $REPLACE_or
116
+ $REPLACE_some
117
+ $REPLACE_you
118
+ $TRANSFORM_VERB_VBD_VBN
119
+ $APPEND_as
120
+ $REPLACE_like
121
+ $TRANSFORM_VERB_VBD_VB
122
+ $REPLACE_which
123
+ $APPEND_has
124
+ $REPLACE_these
125
+ $REPLACE_This
126
+ $APPEND_from
127
+ $REPLACE_when
128
+ $APPEND_'ve
129
+ $REPLACE_``
130
+ $APPEND_there
131
+ $REPLACE_does
132
+ $APPEND_also
133
+ $APPEND_It
134
+ $APPEND_can
135
+ $REPLACE_:
136
+ $REPLACE_other
137
+ $APPEND_more
138
+ $REPLACE_want
139
+ $REPLACE_we
140
+ $REPLACE_'ve
141
+ $REPLACE_what
142
+ $REPLACE_more
143
+ $REPLACE_many
144
+ $REPLACE_into
145
+ $APPEND_been
146
+ $APPEND_by
147
+ $APPEND_this
148
+ $REPLACE_went
149
+ $REPLACE_time
150
+ $APPEND_only
151
+ $TRANSFORM_VERB_VBG_VBZ
152
+ $REPLACE_go
153
+ $REPLACE_while
154
+ $REPLACE_but
155
+ $APPEND_all
156
+ $APPEND_if
157
+ $REPLACE_should
158
+ $REPLACE_out
159
+ $APPEND_'
160
+ $REPLACE_during
161
+ $REPLACE_much
162
+ $APPEND_like
163
+ $REPLACE_!
164
+ $APPEND_but
165
+ $REPLACE_if
166
+ $REPLACE_since
167
+ $APPEND_people
168
+ $APPEND_because
169
+ $REPLACE_any
170
+ $APPEND_A
171
+ $REPLACE_another
172
+ $REPLACE_They
173
+ $APPEND_you
174
+ $REPLACE_ca
175
+ $REPLACE_our
176
+ $REPLACE_who
177
+ $APPEND_now
178
+ $REPLACE_really
179
+ $REPLACE_make
180
+ $APPEND_me
181
+ $APPEND_who
182
+ $REPLACE_In
183
+ $REPLACE_her
184
+ $REPLACE_English
185
+ $APPEND_some
186
+ $APPEND_when
187
+ $APPEND_still
188
+ $APPEND_them
189
+ $REPLACE_use
190
+ $APPEND_just
191
+ $REPLACE_things
192
+ $REPLACE_/
193
+ $REPLACE_got
194
+ $REPLACE_My
195
+ $APPEND_were
196
+ $REPLACE_he
197
+ $REPLACE_countries
198
+ $APPEND_their
199
+ $REPLACE_using
200
+ $TRANSFORM_VERB_VBZ_VBG
201
+ $APPEND_'ll
202
+ $REPLACE_being
203
+ $REPLACE_too
204
+ $APPEND_we
205
+ $APPEND_they
206
+ $REPLACE_lot
207
+ $REPLACE_-
208
+ $REPLACE_all
209
+ $REPLACE_good
210
+ $APPEND_[
211
+ $REPLACE_every
212
+ $REPLACE_)
213
+ $REPLACE_your
214
+ $APPEND_My
215
+ $APPEND_even
216
+ $APPEND_out
217
+ $REPLACE_his
218
+ $REPLACE_made
219
+ $APPEND_any
220
+ $REPLACE_where
221
+ $APPEND_which
222
+ $REPLACE_work
223
+ $REPLACE_used
224
+ $APPEND_one
225
+ $REPLACE_take
226
+ $APPEND_In
227
+ $REPLACE_There
228
+ $REPLACE_up
229
+ $REPLACE_how
230
+ $REPLACE_myself
231
+ $APPEND_what
232
+ $APPEND_very
233
+ $APPEND_?
234
+ $REPLACE_become
235
+ $REPLACE_think
236
+ $REPLACE_going
237
+ $REPLACE_Japanese
238
+ $REPLACE_well
239
+ $APPEND_being
240
+ $APPEND_or
241
+ $REPLACE_just
242
+ $REPLACE_write
243
+ $REPLACE_those
244
+ $REPLACE_feel
245
+ $REPLACE_until
246
+ $APPEND_However
247
+ $APPEND_our
248
+ $REPLACE_something
249
+ $APPEND_get
250
+ $REPLACE_diary
251
+ $REPLACE_no
252
+ $REPLACE_over
253
+ $APPEND_time
254
+ $APPEND_then
255
+ $REPLACE_see
256
+ $REPLACE_writing
257
+ $REPLACE_wo
258
+ $REPLACE_only
259
+ $REPLACE_'ll
260
+ $REPLACE_after
261
+ $REPLACE_know
262
+ $REPLACE_anything
263
+ $REPLACE_now
264
+ $REPLACE_That
265
+ $REPLACE_first
266
+ $REPLACE_than
267
+ $APPEND_up
268
+ $REPLACE_better
269
+ $REPLACE_hope
270
+ $REPLACE_through
271
+ $REPLACE_doing
272
+ $APPEND_go
273
+ $REPLACE_then
274
+ $APPEND_too
275
+ $REPLACE_studying
276
+ $REPLACE_its
277
+ $REPLACE_learn
278
+ $REPLACE_lives
279
+ $REPLACE_having
280
+ $REPLACE_told
281
+ $REPLACE_What
282
+ $REPLACE_she
283
+ $REPLACE_thought
284
+ $APPEND_not
285
+ $REPLACE_around
286
+ $REPLACE_him
287
+ $REPLACE_different
288
+ $APPEND_could
289
+ $APPEND_such
290
+ $REPLACE_able
291
+ $REPLACE_On
292
+ $REPLACE_before
293
+ $REPLACE_though
294
+ $REPLACE_also
295
+ $APPEND_entry
296
+ $REPLACE_learned
297
+ $TRANSFORM_CASE_UPPER
298
+ $APPEND_again
299
+ $REPLACE_friends
300
+ $APPEND_This
301
+ $REPLACE_might
302
+ $REPLACE_A
303
+ $REPLACE_However
304
+ $APPEND_really
305
+ $REPLACE_started
306
+ $REPLACE_improve
307
+ $APPEND_English
308
+ $REPLACE_years
309
+ $REPLACE_'
310
+ $REPLACE_most
311
+ $APPEND_how
312
+ $REPLACE_day
313
+ $APPEND_:
314
+ $APPEND_today
315
+ $REPLACE_find
316
+ $REPLACE_help
317
+ $APPEND_should
318
+ $REPLACE_We
319
+ $REPLACE_even
320
+ $REPLACE_may
321
+ $REPLACE_left
322
+ $REPLACE_called
323
+ $APPEND_did
324
+ $REPLACE_course
325
+ $REPLACE_These
326
+ $REPLACE_understand
327
+ $REPLACE_So
328
+ $REPLACE_said
329
+ $REPLACE_took
330
+ $REPLACE_person
331
+ $REPLACE_school
332
+ $REPLACE_such
333
+ $APPEND_called
334
+ $REPLACE_At
335
+ $APPEND_before
336
+ $REPLACE_way
337
+ $APPEND_he
338
+ $REPLACE_everyone
339
+ $REPLACE_here
340
+ $REPLACE_When
341
+ $REPLACE_everything
342
+ $REPLACE_need
343
+ $APPEND_her
344
+ $REPLACE_Because
345
+ $TRANSFORM_VERB_VBD_VBG
346
+ $REPLACE_say
347
+ $REPLACE_study
348
+ $APPEND_much
349
+ $REPLACE_still
350
+ $REPLACE_found
351
+ $APPEND_always
352
+ $REPLACE_last
353
+ $APPEND_other
354
+ $TRANSFORM_VERB_VBG_VBD
355
+ $REPLACE_learning
356
+ $REPLACE_correct
357
+ $REPLACE_two
358
+ $REPLACE_days
359
+ $REPLACE_difficult
360
+ $REPLACE_never
361
+ $APPEND__
362
+ $REPLACE_'d
363
+ $APPEND_your
364
+ $REPLACE_us
365
+ $REPLACE_foreign
366
+ $REPLACE_entry
367
+ $APPEND_!
368
+ $REPLACE_Japan
369
+ $APPEND_;
370
+ $REPLACE_tell
371
+ $REPLACE_give
372
+ $REPLACE_decided
373
+ $APPEND_during
374
+ $REPLACE_Also
375
+ $APPEND_his
376
+ $REPLACE_speak
377
+ $REPLACE_came
378
+ $REPLACE_little
379
+ $APPEND_while
380
+ $TRANSFORM_VERB_VBZ_VBD
381
+ $APPEND_things
382
+ $REPLACE_especially
383
+ $REPLACE_Recently
384
+ $REPLACE_come
385
+ $APPEND_especially
386
+ $REPLACE_needed
387
+ $APPEND_make
388
+ $REPLACE_whether
389
+ $REPLACE_felt
390
+ $REPLACE_Although
391
+ $REPLACE_someone
392
+ $REPLACE_As
393
+ $REPLACE_great
394
+ $REPLACE_today
395
+ $APPEND_since
396
+ $REPLACE_hard
397
+ $REPLACE_For
398
+ $REPLACE_became
399
+ $REPLACE_between
400
+ $REPLACE_beautiful
401
+ $REPLACE_life
402
+ $REPLACE_why
403
+ $APPEND_though
404
+ $APPEND_There
405
+ $APPEND_going
406
+ $REPLACE_long
407
+ $APPEND_where
408
+ $REPLACE_believe
409
+ $REPLACE_website
410
+ $REPLACE_heard
411
+ $REPLACE_job
412
+ $REPLACE_home
413
+ $REPLACE_'re
414
+ $REPLACE_But
415
+ $REPLACE_anyone
416
+ $REPLACE_again
417
+ $REPLACE_bad
418
+ $REPLACE_recently
419
+ $APPEND_here
420
+ $REPLACE_practice
421
+ $REPLACE_often
422
+ $APPEND_got
423
+ $APPEND_feel
424
+ $REPLACE_saw
425
+ $REPLACE_quickly
426
+ $REPLACE_language
427
+ $REPLACE_wanted
428
+ $APPEND_each
429
+ $REPLACE_put
430
+ $REPLACE_done
431
+ $REPLACE_minutes
432
+ $REPLACE_each
433
+ $APPEND_she
434
+ $REPLACE_grammar
435
+ $REPLACE_watch
436
+ $REPLACE_happy
437
+ $REPLACE_back
438
+ $REPLACE_friend
439
+ $REPLACE_off
440
+ $REPLACE_He
441
+ $REPLACE_Since
442
+ $APPEND_something
443
+ $APPEND_using
444
+ $APPEND_At
445
+ $REPLACE_university
446
+ $REPLACE_country
447
+ $REPLACE_watching
448
+ $REPLACE_received
449
+ $REPLACE_enough
450
+ $REPLACE_weather
451
+ $REPLACE_usually
452
+ $APPEND_back
453
+ $REPLACE_happened
454
+ $APPEND_having
455
+ $REPLACE_always
456
+ $APPEND_does
457
+ $REPLACE_After
458
+ $REPLACE_try
459
+ $REPLACE_start
460
+ $APPEND_already
461
+ $REPLACE_talk
462
+ $REPLACE_thing
463
+ $APPEND_But
464
+ $APPEND_For
465
+ $REPLACE_Then
466
+ $REPLACE_fun
467
+ $REPLACE_soon
468
+ $REPLACE_starting
469
+ $REPLACE_away
470
+ $APPEND_want
471
+ $REPLACE_asked
472
+ $APPEND_went
473
+ $REPLACE_trip
474
+ $REPLACE_new
475
+ $REPLACE_right
476
+ $APPEND_after
477
+ $REPLACE_keep
478
+ $REPLACE_interesting
479
+ $REPLACE_together
480
+ $REPLACE_Do
481
+ $APPEND_So
482
+ $REPLACE_beginning
483
+ $APPEND_myself
484
+ $REPLACE_getting
485
+ $APPEND_On
486
+ $REPLACE_restaurant
487
+ $REPLACE_looking
488
+ $REPLACE_children
489
+ $APPEND_last
490
+ $REPLACE_college
491
+ $APPEND_right
492
+ $REPLACE_stay
493
+ $REPLACE_year
494
+ $REPLACE_live
495
+ $REPLACE_travel
496
+ $REPLACE_favorite
497
+ $REPLACE_read
498
+ $APPEND_well
499
+ $REPLACE_written
500
+ $REPLACE_months
501
+ $APPEND_yet
502
+ $APPEND_first
503
+ $APPEND_most
504
+ $REPLACE_look
505
+ $REPLACE_tried
506
+ $REPLACE_clothes
507
+ $REPLACE_[
508
+ $REPLACE_kind
509
+ $APPEND_its
510
+ $REPLACE_&
511
+ $REPLACE_remember
512
+ $APPEND_him
513
+ $REPLACE_problem
514
+ $APPEND_*
515
+ $REPLACE_meet
516
+ $REPLACE_gave
517
+ $REPLACE_either
518
+ $REPLACE_makes
519
+ $REPLACE_elderly
520
+ $REPLACE_hobbies
521
+ $REPLACE_easily
522
+ $REPLACE_important
523
+ $APPEND_take
524
+ $APPEND_thing
525
+ $REPLACE_vocabulary
526
+ $REPLACE_listening
527
+ $REPLACE_must
528
+ $REPLACE_hours
529
+ $REPLACE_place
530
+ $REPLACE_While
531
+ $REPLACE_without
532
+ $REPLACE_end
533
+ $REPLACE_Korean
534
+ $REPLACE_Therefore
535
+ $REPLACE_working
536
+ $REPLACE_high
537
+ $REPLACE_house
538
+ $REPLACE_already
539
+ $APPEND_good
540
+ $REPLACE_opportunity
541
+ $APPEND_many
542
+ $REPLACE_family
543
+ $REPLACE_During
544
+ $REPLACE_First
545
+ $APPEND_both
546
+ $REPLACE_once
547
+ $REPLACE_experience
548
+ $REPLACE_tomorrow
549
+ $APPEND_these
550
+ $REPLACE_true
551
+ $APPEND_day
552
+ $REPLACE_leave
553
+ $APPEND_When
554
+ $REPLACE_watched
555
+ $APPEND_person
556
+ $REPLACE_best
557
+ $REPLACE_harder
558
+ $REPLACE_Today
559
+ $REPLACE_morning
560
+ $REPLACE_If
561
+ $REPLACE_woke
562
+ $APPEND_into
563
+ $APPEND_made
564
+ $REPLACE_foreigners
565
+ $REPLACE_part
566
+ $APPEND_ever
567
+ $APPEND_probably
568
+ $APPEND_way
569
+ $APPEND_over
570
+ $APPEND_n't
571
+ $REPLACE_towards
572
+ $REPLACE_three
573
+ $REPLACE_One
574
+ $REPLACE_studied
575
+ $REPLACE_nervous
576
+ $REPLACE_forward
577
+ $REPLACE_seen
578
+ $REPLACE_Chinese
579
+ $REPLACE_night
580
+ $APPEND_own
581
+ $REPLACE_taught
582
+ $APPEND_usually
583
+ $REPLACE_To
584
+ $REPLACE_communicate
585
+ $APPEND_Japanese
586
+ $REPLACE_entries
587
+ $REPLACE_traveling
588
+ $REPLACE_site
589
+ $REPLACE_difference
590
+ $APPEND_those
591
+ $TRANSFORM_VERB_VBD_VBZ
592
+ $REPLACE_rainy
593
+ $REPLACE_play
594
+ $REPLACE_comfortable
595
+ $REPLACE_recommend
596
+ $REPLACE_coming
597
+ $REPLACE_Is
598
+ $REPLACE_asleep
599
+ $REPLACE_realized
600
+ $APPEND_recently
601
+ $APPEND_around
602
+ $REPLACE_men
603
+ $REPLACE_Finally
604
+ $REPLACE_excited
605
+ $REPLACE_near
606
+ $APPEND_often
607
+ $REPLACE_t
608
+ $REPLACE_next
609
+ $REPLACE_ever
610
+ $APPEND_Today
611
+ $REPLACE_taking
612
+ $APPEND_started
613
+ $REPLACE_please
614
+ $APPEND_than
615
+ $REPLACE_sentences
616
+ $APPEND_What
617
+ $REPLACE_She
618
+ $APPEND_work
619
+ $REPLACE_visit
620
+ $REPLACE_surprised
621
+ $REPLACE_show
622
+ $REPLACE_You
623
+ $APPEND_used
624
+ $REPLACE_ago
625
+ $APPEND_Even
626
+ $APPEND_That
627
+ $REPLACE_similar
628
+ $APPEND_soon
629
+ $REPLACE_less
630
+ $REPLACE_enjoy
631
+ $REPLACE_diaries
632
+ $REPLACE_speaking
633
+ $REPLACE_past
634
+ $APPEND_through
635
+ $REPLACE_women
636
+ $REPLACE_planned
637
+ $REPLACE_later
638
+ $REPLACE_looked
639
+ $REPLACE_yet
640
+ $APPEND_us
641
+ $REPLACE_And
642
+ $APPEND_'d
643
+ $APPEND_As
644
+ $REPLACE_healthy
645
+ $APPEND_might
646
+ $REPLACE_class
647
+ $REPLACE_Now
648
+ $REPLACE_outside
649
+ $REPLACE_tired
650
+ $APPEND_else
651
+ $REPLACE_Please
652
+ $REPLACE_problems
653
+ $APPEND_They
654
+ $REPLACE_food
655
+ $REPLACE_reading
656
+ $APPEND_&
657
+ $APPEND_think
658
+ $REPLACE_finished
659
+ $REPLACE_popular
660
+ $REPLACE_Are
661
+ $APPEND_2
662
+ $APPEND_may
663
+ $APPEND_found
664
+ $APPEND_whether
665
+ $APPEND_We
666
+ $REPLACE_How
667
+ $REPLACE_continue
668
+ $REPLACE_everyday
669
+ $REPLACE_daily
670
+ $REPLACE_talked
671
+ $APPEND_new
672
+ $REPLACE_reason
673
+ $REPLACE_means
674
+ $REPLACE_opportunities
675
+ $APPEND_different
676
+ $REPLACE_business
677
+ $REPLACE_making
678
+ $APPEND_ago
679
+ $REPLACE_favourite
680
+ $REPLACE_bit
681
+ $REPLACE_delicious
682
+ $APPEND_every
683
+ $REPLACE_spend
684
+ $APPEND_finally
685
+ $APPEND_part
686
+ $REPLACE_yesterday
687
+ $REPLACE_down
688
+ $REPLACE_times
689
+ $REPLACE_holiday
690
+ $REPLACE_nice
691
+ $REPLACE_although
692
+ $REPLACE_earlier
693
+ $REPLACE_Can
694
+ $REPLACE_due
695
+ $APPEND_help
696
+ $REPLACE_caught
697
+ $REPLACE_quite
698
+ $APPEND_kind
699
+ $REPLACE_words
700
+ $REPLACE_movie
701
+ $REPLACE_else
702
+ $APPEND_together
703
+ $REPLACE_advertisement
704
+ $APPEND_Is
705
+ $APPEND_between
706
+ $APPEND_enough
707
+ $REPLACE_let
708
+ $REPLACE_instead
709
+ $REPLACE_disappointed
710
+ $REPLACE_Have
711
+ $APPEND_After
712
+ $APPEND_no
713
+ $APPEND_doing
714
+ $REPLACE_skills
715
+ $APPEND_instead
716
+ $REPLACE_Some
717
+ $REPLACE_Actually
718
+ $APPEND_3
719
+ $REPLACE_choose
720
+ $REPLACE_An
721
+ $APPEND_away
722
+ $REPLACE_Does
723
+ $REPLACE_played
724
+ $APPEND_Because
725
+ $REPLACE_both
726
+ $REPLACE_easier
727
+ $REPLACE_others
728
+ $REPLACE_eat
729
+ $REPLACE_onto
730
+ $REPLACE_sometimes
731
+ $REPLACE_began
732
+ $REPLACE_usual
733
+ $REPLACE_expensive
734
+ $APPEND_To
735
+ $APPEND_actually
736
+ $REPLACE_old
737
+ $APPEND_see
738
+ $APPEND_know
739
+ $REPLACE_few
740
+ $APPEND_why
741
+ $APPEND_sometimes
742
+ $REPLACE_Unfortunately
743
+ $APPEND_use
744
+ $REPLACE_older
745
+ $REPLACE_joined
746
+ $REPLACE_own
747
+ $REPLACE_raining
748
+ $REPLACE_themselves
749
+ $REPLACE_example
750
+ $APPEND_able
751
+ $REPLACE_arrived
752
+ $REPLACE_whom
753
+ $REPLACE_nothing
754
+ $REPLACE_fluently
755
+ $APPEND_getting
756
+ $REPLACE_convenient
757
+ $REPLACE_met
758
+ $REPLACE_becoming
759
+ $APPEND_better
760
+ $APPEND_become
761
+ $REPLACE_lots
762
+ $REPLACE_fast
763
+ $REPLACE_memories
764
+ $REPLACE_worse
765
+ $REPLACE_interested
766
+ $REPLACE_hear
767
+ $REPLACE_Secondly
768
+ $REPLACE_thoughts
769
+ $REPLACE_journal
770
+ $REPLACE_bought
771
+ $REPLACE_useful
772
+ $REPLACE_teach
773
+ $APPEND_learn
774
+ $REPLACE_throughout
775
+ $REPLACE_money
776
+ $REPLACE_change
777
+ $REPLACE_imagine
778
+ $REPLACE_late
779
+ $REPLACE_mine
780
+ $REPLACE_same
781
+ $REPLACE_future
782
+ $REPLACE_sure
783
+ $REPLACE_students
784
+ $REPLACE_along
785
+ $REPLACE_exercise
786
+ $REPLACE_opinion
787
+ $REPLACE_return
788
+ $REPLACE_cause
789
+ $REPLACE_month
790
+ $REPLACE_stop
791
+ $REPLACE_worried
792
+ $REPLACE_trying
793
+ $REPLACE_health
794
+ $REPLACE_American
795
+ $APPEND_writing
796
+ $REPLACE_enjoyed
797
+ $REPLACE_second
798
+ $APPEND_look
799
+ $APPEND_old
800
+ $REPLACE_finally
801
+ $REPLACE_wish
802
+ $REPLACE_famous
803
+ $REPLACE_talking
804
+ $REPLACE_abroad
805
+ $REPLACE_information
806
+ $APPEND_And
807
+ $REPLACE_stopped
808
+ $REPLACE_lose
809
+ $REPLACE_sentence
810
+ $REPLACE_pronunciation
811
+ $REPLACE_feeling
812
+ $REPLACE_younger
813
+ $REPLACE_passed
814
+ $REPLACE_among
815
+ $REPLACE_paid
816
+ $REPLACE_playing
817
+ $REPLACE_attend
818
+ $REPLACE_early
819
+ $REPLACE_All
820
+ $REPLACE_Maybe
821
+ $APPEND_high
822
+ $REPLACE_child
823
+ $APPEND_anything
824
+ $REPLACE_order
825
+ $REPLACE_saying
826
+ $REPLACE_families
827
+ $REPLACE_special
828
+ $REPLACE_spent
829
+ $REPLACE_appreciate
830
+ $REPLACE_successful
831
+ $APPEND_If
832
+ $REPLACE_turned
833
+ $REPLACE_cities
834
+ $REPLACE_definitely
835
+ $REPLACE_fell
836
+ $APPEND_try
837
+ $APPEND_skills
838
+ $REPLACE_world
839
+ $REPLACE_technology
840
+ $REPLACE_small
841
+ $REPLACE_wrote
842
+ $REPLACE_takes
843
+ $REPLACE_seems
844
+ $REPLACE_various
845
+ $REPLACE_actually
846
+ $REPLACE_exam
847
+ $REPLACE_free
848
+ $REPLACE_gone
849
+ $REPLACE_strong
850
+ $REPLACE_receive
851
+ $REPLACE_Yesterday
852
+ $REPLACE_probably
853
+ $REPLACE_Every
854
+ $REPLACE_normal
855
+ $REPLACE_ask
856
+ $REPLACE_company
857
+ $REPLACE_environment
858
+ $REPLACE_buy
859
+ $REPLACE_shows
860
+ $REPLACE_easy
861
+ $REPLACE_sincerely
862
+ $REPLACE_vacation
863
+ $REPLACE_far
864
+ $REPLACE_sauce
865
+ $REPLACE_teacher
866
+ $REPLACE_living
867
+ $REPLACE_parties
868
+ $REPLACE_goes
869
+ $REPLACE_Christmas
870
+ $REPLACE_Hello
871
+ $APPEND_start
872
+ $REPLACE_hurt
873
+ $REPLACE_wonder
874
+ $REPLACE_mind
875
+ $REPLACE_possible
876
+ $REPLACE_thinking
877
+ $APPEND_Now
878
+ $REPLACE_relationship
879
+ $REPLACE_plan
880
+ $REPLACE_man
881
+ $REPLACE_woman
882
+ $REPLACE_activities
883
+ $APPEND_down
884
+ $REPLACE_returned
885
+ $REPLACE_pay
886
+ $REPLACE_ability
887
+ $REPLACE_exciting
888
+ $REPLACE_safe
889
+ $APPEND_off
890
+ $APPEND_until
891
+ $REPLACE_goal
892
+ $APPEND_either
893
+ $REPLACE_mistakes
894
+ $REPLACE_within
895
+ $REPLACE_etc
896
+ $REPLACE_cost
897
+ $REPLACE_particular
898
+ $REPLACE_sense
899
+ $REPLACE_longer
900
+ $REPLACE_advice
901
+ $REPLACE_several
902
+ $APPEND_Do
903
+ $APPEND_currently
904
+ $REPLACE_answer
905
+ $REPLACE_Even
906
+ $REPLACE_held
907
+ $REPLACE_online
908
+ $APPEND_life
909
+ $REPLACE_Firstly
910
+ $REPLACE_close
911
+ $APPEND_study
912
+ $REPLACE_wear
913
+ $APPEND_language
914
+ $REPLACE_number
915
+ $REPLACE_young
916
+ $APPEND_whole
917
+ $APPEND_two
918
+ $APPEND_Then
919
+ $REPLACE_large
920
+ $REPLACE_eating
921
+ $REPLACE_given
922
+ $REPLACE_video
923
+ $REPLACE_spoke
924
+ $REPLACE_Another
925
+ $APPEND_rather
926
+ $APPEND_Of
927
+ $APPEND_once
928
+ $REPLACE_wonderful
929
+ $APPEND_must
930
+ $REPLACE_tonight
931
+ $REPLACE_Their
932
+ $REPLACE_languages
933
+ $REPLACE_big
934
+ $REPLACE_break
935
+ $REPLACE_government
936
+ $REPLACE_staff
937
+ $REPLACE_prepare
938
+ $REPLACE_quit
939
+ $REPLACE_completely
940
+ $REPLACE_ourselves
941
+ $APPEND_He
942
+ $REPLACE_nor
943
+ $APPEND_someone
944
+ $REPLACE_sad
945
+ $REPLACE_against
946
+ $REPLACE_anymore
947
+ $APPEND_give
948
+ $REPLACE_stayed
949
+ $REPLACE_achieve
950
+ $APPEND_An
951
+ $APPEND_Right
952
+ $REPLACE_maybe
953
+ $REPLACE_lost
954
+ $APPEND_came
955
+ $REPLACE_accommodation
956
+ $APPEND_One
957
+ $APPEND_"
958
+ $REPLACE_daughter
959
+ $APPEND_next
960
+ $REPLACE_love
961
+ $REPLACE_cheap
962
+ $REPLACE_poor
963
+ $REPLACE_By
964
+ $REPLACE_whole
965
+ $REPLACE_bring
966
+ $REPLACE_real
967
+ $REPLACE_worked
968
+ $REPLACE_almost
969
+ $REPLACE_prefer
970
+ $APPEND_find
971
+ $REPLACE_everybody
972
+ $APPEND_another
973
+ $REPLACE_create
974
+ $REPLACE_addition
975
+ $REPLACE_turn
976
+ $REPLACE_situation
977
+ $APPEND_due
978
+ $REPLACE_boyfriend
979
+ $APPEND_home
980
+ $REPLACE_finish
981
+ $REPLACE_rather
982
+ $APPEND_said
983
+ $APPEND_'re
984
+ $REPLACE_careful
985
+ $APPEND_long
986
+ $REPLACE_recommended
987
+ $REPLACE_customers
988
+ $REPLACE_level
989
+ $REPLACE_died
990
+ $REPLACE_comes
991
+ $APPEND_You
992
+ $REPLACE_glad
993
+ $APPEND_come
994
+ $REPLACE_short
995
+ $REPLACE_knowledge
996
+ $REPLACE_set
997
+ $REPLACE_Lang
998
+ $REPLACE_planning
999
+ $REPLACE_confidence
1000
+ $REPLACE_gain
1001
+ $REPLACE_check
1002
+ $REPLACE_immediately
1003
+ $REPLACE_scared
1004
+ $REPLACE_conversation
1005
+ $REPLACE_native
1006
+ $REPLACE_His
1007
+ $REPLACE_full
1008
+ $REPLACE_express
1009
+ $REPLACE_married
1010
+ $REPLACE_shopping
1011
+ $APPEND_working
1012
+ $APPEND_food
1013
+ $REPLACE_research
1014
+ $REPLACE_whenever
1015
+ $REPLACE_corrections
1016
+ $REPLACE_weird
1017
+ $APPEND_quite
1018
+ $REPLACE_necessary
1019
+ $REPLACE_Korea
1020
+ $REPLACE_whose
1021
+ $REPLACE_higher
1022
+ $APPEND_entries
1023
+ $REPLACE_Starting
1024
+ $REPLACE_attended
1025
+ $APPEND_5
1026
+ $APPEND_past
1027
+ $REPLACE_realize
1028
+ $REPLACE_cold
1029
+ $APPEND_playing
1030
+ $REPLACE_ended
1031
+ $REPLACE_inside
1032
+ $APPEND_please
1033
+ $REPLACE_coffee
1034
+ $REPLACE_enjoyable
1035
+ $APPEND_took
1036
+ $REPLACE_economic
1037
+ $APPEND_member
1038
+ $REPLACE_natural
1039
+ $REPLACE_registered
1040
+ $REPLACE_idea
1041
+ $APPEND_Recently
1042
+ $APPEND_<
1043
+ $REPLACE_caused
1044
+ $REPLACE_student
1045
+ $REPLACE_questions
1046
+ $REPLACE_music
1047
+ $REPLACE_story
1048
+ $REPLACE_happiness
1049
+ $REPLACE_gives
1050
+ $APPEND_She
1051
+ $REPLACE_Especially
1052
+ $REPLACE_energy
1053
+ $REPLACE_available
1054
+ $REPLACE_anywhere
1055
+ $REPLACE_taken
1056
+ $REPLACE_four
1057
+ $REPLACE_sleep
1058
+ $REPLACE_afraid
1059
+ $REPLACE_Everyone
1060
+ $APPEND_learning
1061
+ $REPLACE_ate
1062
+ $APPEND_few
1063
+ $APPEND_Since
1064
+ $REPLACE_helps
1065
+ $REPLACE_vegetables
1066
+ $REPLACE_kept
1067
+ $REPLACE_gets
1068
+ $REPLACE_explain
1069
+ $REPLACE_girlfriend
1070
+ $REPLACE_choice
1071
+ $REPLACE_waiting
1072
+ $APPEND_put
1073
+ $APPEND_yesterday
1074
+ $APPEND_During
1075
+ $REPLACE_From
1076
+ $APPEND_starting
1077
+ $REPLACE_scary
1078
+ $REPLACE_program
1079
+ $REPLACE_fish
1080
+ $REPLACE_hand
1081
+ $REPLACE_enter
1082
+ $APPEND_friends
1083
+ $REPLACE_decide
1084
+ $REPLACE_score
1085
+ $REPLACE_lonely
1086
+ $APPEND_easily
1087
+ $REPLACE_discovered
1088
+ $REPLACE_seeing
1089
+ $REPLACE_message
1090
+ $REPLACE_week
1091
+ $APPEND_studying
1092
+ $REPLACE_universities
1093
+ $REPLACE_introduce
1094
+ $REPLACE_common
1095
+ $REPLACE_heavily
1096
+ $REPLACE_People
1097
+ $REPLACE_care
1098
+ $APPEND_hard
1099
+ $REPLACE_hit
1100
+ $REPLACE_America
1101
+ $REPLACE_point
1102
+ $APPEND_need
1103
+ $REPLACE_funny
1104
+ $APPEND_almost
1105
+ $REPLACE_pass
1106
+ $REPLACE_temperature
1107
+ $REPLACE_performance
1108
+ $REPLACE_call
1109
+ $REPLACE_extremely
1110
+ $REPLACE_chance
1111
+ $REPLACE_main
1112
+ $REPLACE_season
1113
+ $REPLACE_series
1114
+ $REPLACE_nearby
1115
+ $REPLACE_license
1116
+ $REPLACE_expected
1117
+ $REPLACE_Last
1118
+ $REPLACE_picture
1119
+ $REPLACE_movies
1120
+ $APPEND_Also
1121
+ $REPLACE_seriously
1122
+ $REPLACE_via
1123
+ $REPLACE_running
1124
+ $REPLACE_run
1125
+ $REPLACE_regarding
1126
+ $REPLACE_chose
1127
+ $REPLACE_moment
1128
+ $APPEND_feeling
1129
+ $APPEND_bit
1130
+ $REPLACE_occurred
1131
+ $REPLACE_travelling
1132
+ $REPLACE_brought
1133
+ $APPEND_makes
1134
+ $REPLACE_amount
1135
+ $REPLACE_speakers
1136
+ $REPLACE_scenery
1137
+ $APPEND_year
1138
+ $APPEND_quickly
1139
+ $REPLACE_grateful
1140
+ $REPLACE_character
1141
+ $REPLACE_sleepy
1142
+ $REPLACE_bed
1143
+ $REPLACE_increase
1144
+ $APPEND_Good
1145
+ $REPLACE_area
1146
+ $REPLACE_certain
1147
+ $REPLACE_ways
1148
+ $REPLACE_looks
1149
+ $REPLACE_Nowadays
1150
+ $REPLACE_lucky
1151
+ $REPLACE_current
1152
+ $REPLACE_traditional
1153
+ $APPEND_write
1154
+ $APPEND_anymore
1155
+ $REPLACE_noticed
1156
+ $REPLACE_Did
1157
+ $REPLACE_matter
1158
+ $REPLACE_worry
1159
+ $REPLACE_angry
1160
+ $REPLACE_With
1161
+ $REPLACE_biggest
1162
+ $REPLACE_alcohol
1163
+ $APPEND_left
1164
+ $REPLACE_move
1165
+ $REPLACE_succeed
1166
+ $REPLACE_post
1167
+ $REPLACE_]
1168
+ $REPLACE_abilities
1169
+ $REPLACE_earthquake
1170
+ $REPLACE_visited
1171
+ $APPEND_]
1172
+ $REPLACE_speech
1173
+ $REPLACE_Thank
1174
+ $REPLACE_fewer
1175
+ $REPLACE_happen
1176
+ $APPEND_tomorrow
1177
+ $REPLACE_dinner
1178
+ $REPLACE_quiet
1179
+ $APPEND_type
1180
+ $REPLACE_previous
1181
+ $REPLACE_Furthermore
1182
+ $REPLACE_colleagues
1183
+ $REPLACE_present
1184
+ $REPLACE_No
1185
+ $REPLACE_chicken
1186
+ $REPLACE_city
1187
+ $REPLACE_weeks
1188
+ $REPLACE_develop
1189
+ $REPLACE_join
1190
+ $APPEND_Last
1191
+ $REPLACE_except
1192
+ $REPLACE_economy
1193
+ $REPLACE_sang
1194
+ $REPLACE_phrase
1195
+ $REPLACE_provide
1196
+ $REPLACE_lately
1197
+ $REPLACE_experienced
1198
+ $REPLACE_won
1199
+ $REPLACE_Though
1200
+ $APPEND_Therefore
1201
+ $APPEND_piece
1202
+ $REPLACE_including
1203
+ $REPLACE_husband
1204
+ $REPLACE_changed
1205
+ $REPLACE_view
1206
+ $REPLACE_becomes
1207
+ $REPLACE_share
1208
+ $APPEND_place
1209
+ $REPLACE_test
1210
+ $APPEND_4
1211
+ $APPEND_years
1212
+ $REPLACE_Our
1213
+ $REPLACE_wrong
1214
+ $REPLACE_seemed
1215
+ $REPLACE_wondering
1216
+ $REPLACE_computer
1217
+ $REPLACE_known
1218
+ $REPLACE_culture
1219
+ $REPLACE_Hong
1220
+ $REPLACE_clear
1221
+ $REPLACE_birthday
1222
+ $REPLACE_despite
1223
+ $REPLACE_front
1224
+ $REPLACE_sound
1225
+ $REPLACE_thankful
1226
+ $REPLACE_practise
1227
+ $REPLACE_Will
1228
+ $REPLACE_atmosphere
1229
+ $REPLACE_activity
1230
+ $APPEND_movie
1231
+ $REPLACE_China
1232
+ $REPLACE_reasons
1233
+ $REPLACE_name
1234
+ $REPLACE_serious
1235
+ $REPLACE_2
1236
+ $REPLACE_warm
1237
+ $REPLACE_depressed
1238
+ $REPLACE_simple
1239
+ $APPEND_trying
1240
+ $REPLACE_alone
1241
+ $APPEND_`
1242
+ $REPLACE_listen
1243
+ $REPLACE__
1244
+ $REPLACE_faithfully
1245
+ $REPLACE_Which
1246
+ $REPLACE_relieved
1247
+ $APPEND_1
1248
+ $REPLACE_price
1249
+ $REPLACE_store
1250
+ $REPLACE_lower
1251
+ $REPLACE_strange
1252
+ $REPLACE_game
1253
+ $REPLACE_sick
1254
+ $REPLACE_focus
1255
+ $REPLACE_suddenly
1256
+ $APPEND_Please
1257
+ $REPLACE_Would
1258
+ $REPLACE_traveled
1259
+ $REPLACE_event
1260
+ $REPLACE_ones
1261
+ $APPEND_Yesterday
1262
+ $APPEND_making
1263
+ $REPLACE_remembered
1264
+ $REPLACE_s
1265
+ $REPLACE_Lately
1266
+ $APPEND_S
1267
+ $REPLACE_member
1268
+ $APPEND_decided
1269
+ $REPLACE_across
1270
+ $REPLACE_entered
1271
+ $APPEND_maybe
1272
+ $REPLACE_University
1273
+ $REPLACE_difficulties
1274
+ $REPLACE_terrible
1275
+ $REPLACE_places
1276
+ $REPLACE_pretty
1277
+ $REPLACE_weekend
1278
+ $REPLACE_decision
1279
+ $APPEND_later
1280
+ $REPLACE_anybody
1281
+ $REPLACE_result
1282
+ $REPLACE_buses
1283
+ $REPLACE_Fortunately
1284
+ $APPEND_suddenly
1285
+ $REPLACE_slept
1286
+ $APPEND_school
1287
+ $REPLACE_group
1288
+ $REPLACE_electricity
1289
+ $REPLACE_fan
1290
+ $REPLACE_supposed
1291
+ $REPLACE_recent
1292
+ $REPLACE_wants
1293
+ $APPEND_10
1294
+ $REPLACE_low
1295
+ $APPEND_continue
1296
+ $APPEND_keep
1297
+ $APPEND_words
1298
+ $APPEND_Sometimes
1299
+ $REPLACE_type
1300
+ $REPLACE_Tomorrow
1301
+ $REPLACE_okay
1302
+ $APPEND_class
1303
+ $REPLACE_Her
1304
+ $APPEND_everything
1305
+ $APPEND_university
1306
+ $REPLACE_behind
1307
+ $REPLACE_clean
1308
+ $REPLACE_anxious
1309
+ $REPLACE_follow
1310
+ $APPEND_amount
1311
+ $REPLACE_parents
1312
+ $APPEND_While
1313
+ $REPLACE_email
1314
+ $REPLACE_mean
1315
+ $REPLACE_Most
1316
+ $APPEND_watching
1317
+ $REPLACE_taste
1318
+ $APPEND_taking
1319
+ $REPLACE_Sometimes
1320
+ $REPLACE_French
1321
+ $REPLACE_wearing
1322
+ $APPEND_weather
1323
+ $REPLACE_law
1324
+ $REPLACE_difficulty
1325
+ $APPEND_job
1326
+ $REPLACE_training
1327
+ $REPLACE_crowded
1328
+ $APPEND_All
1329
+ $REPLACE_gotten
1330
+ $REPLACE_catch
1331
+ $REPLACE_method
1332
+ $REPLACE_public
1333
+ $REPLACE_classes
1334
+ $REPLACE_seem
1335
+ $APPEND_show
1336
+ $REPLACE_question
1337
+ $REPLACE_development
1338
+ $REPLACE_says
1339
+ $REPLACE_faster
1340
+ $REPLACE_mother
1341
+ $REPLACE_guitar
1342
+ $REPLACE_teeth
1343
+ $REPLACE_song
1344
+ $REPLACE_lesson
1345
+ $REPLACE_knew
1346
+ $REPLACE_sent
1347
+ $REPLACE_unable
1348
+ $REPLACE_alot
1349
+ $REPLACE_Those
1350
+ $REPLACE_concert
1351
+ $APPEND_speak
1352
+ $REPLACE_software
1353
+ $REPLACE_German
1354
+ $REPLACE_Currently
1355
+ $REPLACE_yourself
1356
+ $REPLACE_fact
1357
+ $REPLACE_major
1358
+ $REPLACE_snowboarding
1359
+ $REPLACE_apartment
1360
+ $REPLACE_none
1361
+ $REPLACE_Here
1362
+ $REPLACE_reply
1363
+ $REPLACE_lived
1364
+ $APPEND_site
1365
+ $REPLACE_introduction
1366
+ $REPLACE_exchange
1367
+ $APPEND_level
1368
+ $REPLACE_iPhone
1369
+ $REPLACE_consider
1370
+ $REPLACE_leaves
1371
+ $APPEND_early
1372
+ $REPLACE_requires
1373
+ $REPLACE_Saturday
1374
+ $TRANSFORM_CASE_CAPITAL_1
1375
+ $REPLACE_further
1376
+ $REPLACE_absolutely
1377
+ $REPLACE_realised
1378
+ $APPEND_heard
1379
+ $REPLACE_following
1380
+ $REPLACE_doctor
1381
+ $REPLACE_beginner
1382
+ $APPEND_against
1383
+ $REPLACE_embarrassed
1384
+ $REPLACE_correctly
1385
+ $REPLACE_half
1386
+ $REPLACE_dangerous
1387
+ $REPLACE_moved
1388
+ $REPLACE_complete
1389
+ $REPLACE_perfect
1390
+ $REPLACE_Anyway
1391
+ $REPLACE_hold
1392
+ $REPLACE_differences
1393
+ $REPLACE_lunch
1394
+ $REPLACE_himself
1395
+ $REPLACE_based
1396
+ $APPEND_thought
1397
+ $REPLACE_reach
1398
+ $REPLACE_cheaper
1399
+ $REPLACE_loud
1400
+ $APPEND_By
1401
+ $APPEND_everyone
1402
+ $REPLACE_leaving
1403
+ $REPLACE_released
1404
+ $REPLACE_fine
1405
+ $REPLACE_Australia
1406
+ $REPLACE_style
1407
+ $REPLACE_deal
1408
+ $APPEND_along
1409
+ $REPLACE_satisfied
1410
+ $REPLACE_Of
1411
+ $REPLACE_variety
1412
+ $APPEND_improve
1413
+ $REPLACE_under
1414
+ $REPLACE_giving
1415
+ $REPLACE_party
1416
+ $APPEND_understand
1417
+ $REPLACE_everywhere
1418
+ $REPLACE_confident
1419
+ $APPEND_play
1420
+ $REPLACE_slow
1421
+ $REPLACE_centre
1422
+ $REPLACE_light
1423
+ $REPLACE_trouble
1424
+ $REPLACE_Its
1425
+ $APPEND_became
1426
+ $REPLACE_begin
1427
+ $REPLACE_grade
1428
+ $REPLACE_exams
1429
+ $REPLACE_busy
1430
+ $REPLACE_nbsp
1431
+ $REPLACE_3
1432
+ $REPLACE_control
1433
+ $REPLACE_characters
1434
+ $REPLACE_needs
1435
+ $REPLACE_pictures
1436
+ $APPEND_New
1437
+ $APPEND_test
1438
+ $REPLACE_currently
1439
+ $REPLACE_describe
1440
+ $REPLACE_uncomfortable
1441
+ $REPLACE_affected
1442
+ $REPLACE_songs
1443
+ $REPLACE_helped
1444
+ $REPLACE_head
1445
+ $APPEND_let
1446
+ $REPLACE_costs
1447
+ $REPLACE_five
1448
+ $REPLACE_slowly
1449
+ $REPLACE_1
1450
+ $REPLACE_causes
1451
+ $REPLACE_ashamed
1452
+ $APPEND_coming
1453
+ $APPEND_everyday
1454
+ $REPLACE_products
1455
+ $REPLACE_dishes
1456
+ $REPLACE_least
1457
+ $REPLACE_wore
1458
+ $REPLACE_internet
1459
+ $REPLACE_mentioned
1460
+ $APPEND_began
1461
+ $REPLACE_word
1462
+ $REPLACE_service
1463
+ $REPLACE_workers
1464
+ $REPLACE_continued
1465
+ $REPLACE_sounds
1466
+ $REPLACE_hour
1467
+ $REPLACE_jobs
1468
+ $REPLACE_career
1469
+ $REPLACE_personal
1470
+ $REPLACE_piece
1471
+ $REPLACE_per
1472
+ $REPLACE_Regarding
1473
+ $REPLACE_entrance
1474
+ $REPLACE_improving
1475
+ $APPEND_=
1476
+ $REPLACE_areas
1477
+ $REPLACE_1st
1478
+ $REPLACE_mostly
1479
+ $REPLACE_lessons
1480
+ $REPLACE_drink
1481
+ $REPLACE_hair
1482
+ $APPEND_exactly
1483
+ $REPLACE_e
1484
+ $REPLACE_luck
1485
+ $REPLACE_members
1486
+ $APPEND_means
1487
+ $REPLACE_mistake
1488
+ $REPLACE_somewhere
1489
+ $APPEND_pair
1490
+ $REPLACE_tomatoes
1491
+ $APPEND_definitely
1492
+ $REPLACE_swimming
1493
+ $REPLACE_perform
1494
+ $REPLACE_compared
1495
+ $REPLACE_unfortunately
1496
+ $REPLACE_however
1497
+ $REPLACE_twice
1498
+ $REPLACE_society
1499
+ $APPEND_20
1500
+ $REPLACE_preparing
1501
+ $REPLACE_Two
1502
+ $APPEND_Japan
1503
+ $REPLACE_nobody
1504
+ $REPLACE_environmental
1505
+ $REPLACE_till
1506
+ $REPLACE_fall
1507
+ $REPLACE_spoken
1508
+ $REPLACE_forget
1509
+ $REPLACE_form
1510
+ $APPEND_number
1511
+ $APPEND_watch
1512
+ $APPEND_live
1513
+ $REPLACE_include
1514
+ $REPLACE_related
1515
+ $REPLACE_wait
1516
+ $APPEND_These
1517
+ $REPLACE_European
1518
+ $APPEND_tell
1519
+ $REPLACE_meeting
1520
+ $REPLACE_evening
1521
+ $REPLACE_nowadays
1522
+ $REPLACE_northern
1523
+ $REPLACE_convenience
1524
+ $REPLACE_performed
1525
+ $REPLACE_plans
1526
+ $REPLACE_competition
1527
+ $REPLACE_open
1528
+ $REPLACE_confused
1529
+ $REPLACE_practicing
1530
+ $REPLACE_quality
1531
+ $REPLACE_professional
1532
+ $REPLACE_maintain
1533
+ $REPLACE_pain
1534
+ $REPLACE_familiar
1535
+ $REPLACE_classical
1536
+ $REPLACE_shop
1537
+ $REPLACE_filled
1538
+ $REPLACE_improved
1539
+ $REPLACE_meant
1540
+ $APPEND_listening
1541
+ $REPLACE_ceremony
1542
+ $REPLACE_increasing
1543
+ $REPLACE_drove
1544
+ $APPEND_completely
1545
+ $REPLACE_account
1546
+ $REPLACE_developed
1547
+ $REPLACE_lack
1548
+ $REPLACE_purpose
1549
+ $REPLACE_upon
1550
+ $REPLACE_tasted
1551
+ $REPLACE_crazy
1552
+ $REPLACE_summer
1553
+ $REPLACE_regret
1554
+ $REPLACE_born
1555
+ $REPLACE_rain
1556
+ $REPLACE_weight
1557
+ $REPLACE_required
1558
+ $REPLACE_accept
1559
+ $REPLACE_cut
1560
+ $REPLACE_flew
1561
+ $REPLACE_waste
1562
+ $APPEND_ca
1563
+ $APPEND_trip
1564
+ $REPLACE_Going
1565
+ $REPLACE_excellent
1566
+ $REPLACE_created
1567
+ $REPLACE_reality
1568
+ $REPLACE_cultural
1569
+ $REPLACE_save
1570
+ $REPLACE_programs
1571
+ $REPLACE_painful
1572
+ $REPLACE_Many
1573
+ $REPLACE_dish
1574
+ $REPLACE_teaching
1575
+ $REPLACE_Studying
1576
+ $REPLACE_water
1577
+ $REPLACE_happens
1578
+ $REPLACE_pleased
1579
+ $REPLACE_ordinary
1580
+ $APPEND_practice
1581
+ $REPLACE_train
1582
+ $REPLACE_results
1583
+ $REPLACE_Italian
1584
+ $REPLACE_weak
1585
+ $REPLACE_period
1586
+ $REPLACE_above
1587
+ $REPLACE_hot
1588
+ $REPLACE_Not
1589
+ $REPLACE_feelings
1590
+ $REPLACE_mobile
1591
+ $REPLACE_walk
1592
+ $APPEND_game
1593
+ $REPLACE_impressed
1594
+ $APPEND_same
1595
+ $REPLACE_Germany
1596
+ $REPLACE_girl
1597
+ $REPLACE_closer
1598
+ $REPLACE_communication
1599
+ $REPLACE_worst
1600
+ $APPEND_No
1601
+ $REPLACE_located
1602
+ $REPLACE_phone
1603
+ $REPLACE_sit
1604
+ $REPLACE_Lastly
1605
+ $REPLACE_feels
1606
+ $APPEND_listen
1607
+ $APPEND_done
1608
+ $REPLACE_subtitles
1609
+ $REPLACE_Whenever
1610
+ $REPLACE_potatoes
1611
+ $REPLACE_fluent
1612
+ $REPLACE_amazing
1613
+ $REPLACE_neither
1614
+ $APPEND_With
1615
+ $APPEND_never
1616
+ $REPLACE_stressed
1617
+ $REPLACE_prevent
1618
+ $REPLACE_photos
1619
+ $APPEND_$
1620
+ $REPLACE_non
1621
+ $REPLACE_agree
1622
+ $REPLACE_Moreover
1623
+ $REPLACE_restaurants
1624
+ $REPLACE_types
1625
+ $REPLACE_office
1626
+ $REPLACE_studies
1627
+ $REPLACE_history
1628
+ $REPLACE_calm
1629
+ $REPLACE_walked
1630
+ $REPLACE_modern
1631
+ $APPEND_three
1632
+ $REPLACE_clothing
1633
+ $REPLACE_private
1634
+ $APPEND_little
1635
+ $APPEND_outside
1636
+ $APPEND_OR
1637
+ $REPLACE_simply
1638
+ $REPLACE_particularly
1639
+ $REPLACE_notice
1640
+ $REPLACE_side
1641
+ $APPEND_looked
1642
+ $REPLACE_YouTube
1643
+ $APPEND_students
1644
+ $REPLACE_afterwards
1645
+ $APPEND_reading
1646
+ $REPLACE_graduate
1647
+ $REPLACE_library
1648
+ $REPLACE_gained
1649
+ $REPLACE_bicycle
1650
+ $REPLACE_son
1651
+ $APPEND_compared
1652
+ $REPLACE_events
1653
+ $APPEND_Although
1654
+ $REPLACE_US
1655
+ $REPLACE_properly
1656
+ $APPEND_Maybe
1657
+ $APPEND_Can
1658
+ $APPEND_best
1659
+ $REPLACE_wondered
1660
+ $REPLACE_arrive
1661
+ $APPEND_say
1662
+ $REPLACE_considered
1663
+ $REPLACE_dream
1664
+ $REPLACE_feet
1665
+ $REPLACE_broke
1666
+ $APPEND_From
1667
+ $REPLACE_southern
1668
+ $REPLACE_hometown
1669
+ $APPEND_journal
1670
+ $REPLACE_Everything
1671
+ $APPEND_money
1672
+ $REPLACE_concentrate
1673
+ $REPLACE_stories
1674
+ $REPLACE_teachers
1675
+ $APPEND_happened
1676
+ $REPLACE_New
1677
+ $REPLACE_transport
1678
+ $REPLACE_stronger
1679
+ $REPLACE_heart
1680
+ $REPLACE_staying
1681
+ $REPLACE_honest
1682
+ $REPLACE_sold
1683
+ $APPEND_wrong
1684
+ $APPEND_Or
1685
+ $REPLACE_relax
1686
+ $REPLACE_heavy
1687
+ $REPLACE_*
1688
+ $REPLACE_speaker
1689
+ $REPLACE_limited
1690
+ $APPEND_speaking
1691
+ $APPEND_e
1692
+ $REPLACE_countryside
1693
+ $REPLACE_heat
1694
+ $REPLACE_prepared
1695
+ $REPLACE_truth
1696
+ $REPLACE_books
1697
+ $REPLACE_drank
1698
+ $REPLACE_nuclear
1699
+ $REPLACE_title
1700
+ $REPLACE_6
1701
+ $REPLACE_boring
1702
+ $REPLACE_totally
1703
+ $REPLACE_practiced
1704
+ $REPLACE_therefore
1705
+ $REPLACE_book
1706
+ $REPLACE_regularly
1707
+ $REPLACE_safety
1708
+ $REPLACE_normally
1709
+ $REPLACE_visiting
1710
+ $APPEND_kinds
1711
+ $REPLACE_impressive
1712
+ $REPLACE_final
1713
+ $REPLACE_driving
1714
+ $REPLACE_stuff
1715
+ $REPLACE_guess
1716
+ $REPLACE_avoid
1717
+ $REPLACE_answered
1718
+ $REPLACE_pleasant
1719
+ $APPEND_times
1720
+ $APPEND_without
1721
+ $REPLACE_focused
1722
+ $REPLACE_badly
1723
+ $REPLACE_solve
1724
+ $REPLACE_grow
1725
+ $REPLACE_drive
1726
+ $APPEND_although
1727
+ $REPLACE_news
1728
+ $REPLACE_Afterwards
1729
+ $APPEND_6
1730
+ $REPLACE_Learning
1731
+ $REPLACE_Thanks
1732
+ $REPLACE_flight
1733
+ $REPLACE_building
1734
+ $REPLACE_opened
1735
+ $REPLACE_shocked
1736
+ $REPLACE_volleyball
1737
+ $REPLACE_accepted
1738
+ $APPEND_exam
1739
+ $REPLACE_team
1740
+ $REPLACE_system
1741
+ $APPEND_ones
1742
+ $REPLACE_goals
1743
+ $REPLACE_Before
1744
+ $REPLACE_meat
1745
+ $APPEND_Does
1746
+ $REPLACE_schedule
1747
+ $REPLACE_cream
1748
+ $REPLACE_listened
1749
+ $REPLACE_Why
1750
+ $REPLACE_worth
1751
+ $APPEND_members
1752
+ $REPLACE_strength
1753
+ $REPLACE_works
1754
+ $APPEND_m
1755
+ $REPLACE_surprise
1756
+ $REPLACE_holidays
1757
+ $REPLACE_7
1758
+ $APPEND_written
1759
+ $REPLACE_medicine
1760
+ $REPLACE_contact
1761
+ $REPLACE_position
1762
+ $APPEND_tried
1763
+ $REPLACE_highly
1764
+ $REPLACE_missed
1765
+ $REPLACE_typhoon
1766
+ $REPLACE_celebrate
1767
+ $REPLACE_February
1768
+ $REPLACE_greater
1769
+ $REPLACE_support
1770
+ $REPLACE_allow
1771
+ $REPLACE_appeared
1772
+ $REPLACE_naturally
1773
+ $REPLACE_breakfast
1774
+ $REPLACE_afternoon
1775
+ $REPLACE_dead
1776
+ $REPLACE_proud
1777
+ $REPLACE_stuck
1778
+ $APPEND_half
1779
+ $REPLACE_lyrics
1780
+ $APPEND_based
1781
+ $REPLACE_sing
1782
+ $REPLACE_process
1783
+ $REPLACE_search
1784
+ $REPLACE_sell
1785
+ $REPLACE_learnt
1786
+ $REPLACE_responsibility
1787
+ $REPLACE_field
1788
+ $REPLACE_lifestyle
1789
+ $REPLACE_helpful
1790
+ $REPLACE_Koreans
1791
+ $REPLACE_awake
1792
+ $REPLACE_success
1793
+ $APPEND_living
1794
+ $REPLACE_latest
1795
+ $REPLACE_corrected
1796
+ $REPLACE_communicating
1797
+ $REPLACE_raise
1798
+ $REPLACE_showed
1799
+ $REPLACE_father
1800
+ $REPLACE_marriage
1801
+ $REPLACE_elementary
1802
+ $REPLACE_allows
1803
+ $APPEND_lot
1804
+ $REPLACE_eventually
1805
+ $REPLACE_customer
1806
+ $REPLACE_unusual
1807
+ $REPLACE_advise
1808
+ $REPLACE_letter
1809
+ $REPLACE_clearly
1810
+ $REPLACE_essay
1811
+ $REPLACE_bigger
1812
+ $REPLACE_habit
1813
+ $APPEND_system
1814
+ $REPLACE_ran
1815
+ $APPEND_speakers
1816
+ $REPLACE_bored
1817
+ $REPLACE_whatever
1818
+ $REPLACE_fourth
1819
+ $REPLACE_chosen
1820
+ $REPLACE_room
1821
+ $REPLACE_30
1822
+ $REPLACE_carefully
1823
+ $REPLACE_loss
1824
+ $REPLACE_ingredients
1825
+ $REPLACE_singing
1826
+ $REPLACE_ride
1827
+ $REPLACE_build
1828
+ $REPLACE_cooking
1829
+ $REPLACE_add
1830
+ $REPLACE_mom
1831
+ $REPLACE_sign
1832
+ $REPLACE_chatting
1833
+ $REPLACE_happier
1834
+ $REPLACE_seat
1835
+ $REPLACE_affect
1836
+ $REPLACE_appropriate
1837
+ $REPLACE_named
1838
+ $APPEND_30
1839
+ $REPLACE_female
1840
+ $REPLACE_fashion
1841
+ $REPLACE_attending
1842
+ $REPLACE_Tonight
1843
+ $REPLACE_role
1844
+ $REPLACE_somebody
1845
+ $APPEND_Unfortunately
1846
+ $REPLACE_employees
1847
+ $REPLACE_face
1848
+ $REPLACE_middle
1849
+ $REPLACE_junior
1850
+ $REPLACE_lovely
1851
+ $REPLACE_reduce
1852
+ $REPLACE_positive
1853
+ $REPLACE_concerned
1854
+ $REPLACE_overseas
1855
+ $REPLACE_"
1856
+ $REPLACE_Second
1857
+ $APPEND_Our
1858
+ $APPEND_named
1859
+ $REPLACE_mountain
1860
+ $APPEND_eating
1861
+ $REPLACE_warmer
1862
+ $REPLACE_death
1863
+ $REPLACE_electronic
1864
+ $REPLACE_figure
1865
+ $REPLACE_frequently
1866
+ $REPLACE_pair
1867
+ $REPLACE_Americans
1868
+ $REPLACE_rest
1869
+ $REPLACE_TV
1870
+ $APPEND_themselves
1871
+ $APPEND_however
1872
+ $REPLACE_subject
1873
+ $APPEND_music
1874
+ $REPLACE_dormitory
1875
+ $APPEND_forward
1876
+ $REPLACE_department
1877
+ $REPLACE_pronounce
1878
+ $REPLACE_wake
1879
+ $REPLACE_cook
1880
+ $APPEND_visit
1881
+ $REPLACE_raised
1882
+ $REPLACE_smaller
1883
+ $REPLACE_stressful
1884
+ $APPEND_lately
1885
+ $REPLACE_completed
1886
+ $REPLACE_photography
1887
+ $REPLACE_10
1888
+ $APPEND_saying
1889
+ $REPLACE_dropped
1890
+ $REPLACE_laughed
1891
+ $APPEND_read
1892
+ $REPLACE_complain
1893
+ $REPLACE_Usually
1894
+ $APPEND_felt
1895
+ $REPLACE_Thus
1896
+ $REPLACE_foreigner
1897
+ $REPLACE_theatre
1898
+ $APPEND_website
1899
+ $APPEND_days
1900
+ $REPLACE_slightly
1901
+ $REPLACE_incorrect
1902
+ $REPLACE_frustrated
1903
+ $REPLACE_grandmother
1904
+ $REPLACE_forty
1905
+ $REPLACE_signed
1906
+ $APPEND_book
1907
+ $REPLACE_sore
1908
+ $REPLACE_classmates
1909
+ $REPLACE_equipment
1910
+ $REPLACE_memory
1911
+ $REPLACE_ordered
1912
+ $APPEND_stay
1913
+ $REPLACE_expect
1914
+ $REPLACE_drunk
1915
+ $APPEND_gave
1916
+ $REPLACE_midnight
1917
+ $APPEND_seem
1918
+ $APPEND_cut
1919
+ $REPLACE_address
1920
+ $REPLACE_couple
1921
+ $REPLACE_Compared
1922
+ $REPLACE_friendly
1923
+ $REPLACE_rode
1924
+ $REPLACE_losing
1925
+ $REPLACE_nearly
1926
+ $REPLACE_six
1927
+ $REPLACE_speeches
1928
+ $REPLACE_international
1929
+ $REPLACE_understood
1930
+ $REPLACE_thank
1931
+ $REPLACE_rarely
1932
+ $REPLACE_match
1933
+ $REPLACE_uploaded
1934
+ $REPLACE_Luckily
1935
+ $REPLACE_failed
1936
+ $REPLACE_hamburger
1937
+ $REPLACE_sleeping
1938
+ $REPLACE_tongue
1939
+ $REPLACE_colleague
1940
+ $REPLACE_require
1941
+ $REPLACE_terribly
1942
+ $REPLACE_case
1943
+ $APPEND_traditional
1944
+ $REPLACE_graduation
1945
+ $REPLACE_offer
1946
+ $REPLACE_respond
1947
+ $REPLACE_perfectly
1948
+ $REPLACE_businesses
1949
+ $REPLACE_8
1950
+ $APPEND_s
1951
+ $REPLACE_understanding
1952
+ $REPLACE_hungry
1953
+ $REPLACE_conclusion
1954
+ $REPLACE_homework
1955
+ $REPLACE_design
1956
+ $REPLACE_British
1957
+ $REPLACE_peaceful
1958
+ $REPLACE_forgot
1959
+ $REPLACE_suitable
1960
+ $REPLACE_soccer
1961
+ $REPLACE_tells
1962
+ $REPLACE_third
1963
+ $REPLACE_exactly
1964
+ $REPLACE_term
1965
+ $REPLACE_drinking
1966
+ $REPLACE_searching
1967
+ $REPLACE_hung
1968
+ $REPLACE_air
1969
+ $REPLACE_strongly
1970
+ $APPEND_looking
1971
+ $REPLACE_band
1972
+ $REPLACE_checked
1973
+ $REPLACE_send
1974
+ $REPLACE_Zealand
1975
+ $REPLACE_draw
1976
+ $REPLACE_educational
1977
+ $REPLACE_incident
1978
+ $APPEND_Some
1979
+ $APPEND_friend
1980
+ $APPEND_free
1981
+ $REPLACE_toward
1982
+ $REPLACE_interview
1983
+ $APPEND_>
1984
+ $REPLACE_tough
1985
+ $REPLACE_canceled
1986
+ $REPLACE_memorize
1987
+ $REPLACE_historical
1988
+ $REPLACE_slang
1989
+ $REPLACE_replied
1990
+ $REPLACE_considering
1991
+ $REPLACE_skill
1992
+ $REPLACE_musical
1993
+ $REPLACE_improvement
1994
+ $REPLACE_carry
1995
+ $REPLACE_education
1996
+ $APPEND_great
1997
+ $REPLACE_companies
1998
+ $REPLACE_cool
1999
+ $APPEND_comes
2000
+ $REPLACE_employee
2001
+ $REPLACE_age
2002
+ $APPEND_Yes
2003
+ $REPLACE_Could
2004
+ $REPLACE_relaxed
2005
+ $REPLACE_greatest
2006
+ $REPLACE_total
2007
+ $REPLACE_ready
2008
+ $REPLACE_guy
2009
+ $REPLACE_chocolate
2010
+ $APPEND_tense
2011
+ $REPLACE_earn
2012
+ $REPLACE_topic
2013
+ $REPLACE_beat
2014
+ $REPLACE_date
2015
+ $REPLACE_illnesses
2016
+ $REPLACE_conditioner
2017
+ $APPEND_inside
2018
+ $REPLACE_suggested
2019
+ $REPLACE_drama
2020
+ $REPLACE_pick
2021
+ $REPLACE_starts
2022
+ $REPLACE_manage
2023
+ $APPEND_anyway
2024
+ $REPLACE_Thailand
2025
+ $REPLACE_McDonald
2026
+ $REPLACE_Writing
2027
+ $APPEND_Are
2028
+ $REPLACE_2nd
2029
+ $APPEND_fall
2030
+ $REPLACE_flu
2031
+ $REPLACE_websites
2032
+ $REPLACE_snowy
2033
+ $APPEND_diary
2034
+ $REPLACE_road
2035
+ $REPLACE_professor
2036
+ $REPLACE_exhausted
2037
+ $APPEND_held
2038
+ $REPLACE_colored
2039
+ $REPLACE_sitting
2040
+ $REPLACE_wanna
2041
+ $REPLACE_according
2042
+ $REPLACE_lead
2043
+ $REPLACE_scene
2044
+ $REPLACE_hardly
2045
+ $REPLACE_ticket
2046
+ $REPLACE_remain
2047
+ $REPLACE_worrying
2048
+ $REPLACE_patience
2049
+ $REPLACE_Having
2050
+ $REPLACE_allowed
2051
+ $REPLACE_whilst
2052
+ $REPLACE_entire
2053
+ $REPLACE_promised
2054
+ $REPLACE_photo
2055
+ $REPLACE_motivated
2056
+ $REPLACE_dairy
2057
+ $APPEND_full
2058
+ $REPLACE_points
2059
+ $REPLACE_Soon
2060
+ $REPLACE_messages
2061
+ $APPEND_alone
2062
+ $REPLACE_alive
2063
+ $APPEND_Every
2064
+ $APPEND_entire
2065
+ $REPLACE_programme
2066
+ $REPLACE_fully
2067
+ $REPLACE_cloudy
2068
+ $REPLACE_occur
2069
+ $REPLACE_meaning
2070
+ $APPEND_area
2071
+ $REPLACE_liked
2072
+ $REPLACE_sweet
2073
+ $REPLACE_act
2074
+ $REPLACE_graduated
2075
+ $REPLACE_childhood
2076
+ $APPEND_available
2077
+ $REPLACE_believed
2078
+ $REPLACE_newspaper
2079
+ $REPLACE_enjoying
2080
+ $REPLACE_riding
2081
+ $APPEND_Not
2082
+ $REPLACE_body
2083
+ $REPLACE_beneficial
2084
+ $REPLACE_recognize
2085
+ $APPEND_native
2086
+ $REPLACE_attention
2087
+ $REPLACE_Until
2088
+ $REPLACE_struck
2089
+ $REPLACE_Just
2090
+ $REPLACE_correcting
2091
+ $REPLACE_interest
2092
+ $REPLACE_changing
2093
+ $REPLACE_pollution
2094
+ $APPEND_pieces
2095
+ $REPLACE_According
2096
+ $REPLACE_autumn
2097
+ $APPEND_problem
2098
+ $REPLACE_gym
2099
+ $REPLACE_basic
2100
+ $REPLACE_includes
2101
+ $REPLACE_games
2102
+ $APPEND_seeing
2103
+ $REPLACE_sunny
2104
+ $REPLACE_5
2105
+ $APPEND_learned
2106
+ $REPLACE_stage
2107
+ $REPLACE_touch
2108
+ $REPLACE_discuss
2109
+ $REPLACE_airplane
2110
+ $REPLACE_Has
2111
+ $REPLACE_die
2112
+ $REPLACE_relationships
2113
+ $REPLACE_effects
2114
+ $REPLACE_sat
2115
+ $REPLACE_parts
2116
+ $REPLACE_tsunami
2117
+ $REPLACE_response
2118
+ $REPLACE_teaches
2119
+ $REPLACE_self
2120
+ $REPLACE_thanks
2121
+ $REPLACE_rained
2122
+ $REPLACE_laundry
2123
+ $REPLACE_dependent
2124
+ $APPEND_near
2125
+ $REPLACE_below
2126
+ $REPLACE_custom
2127
+ $REPLACE_inconvenient
2128
+ $REPLACE_relaxing
2129
+ $REPLACE_wedding
2130
+ $REPLACE_challenge
2131
+ $APPEND_set
2132
+ $REPLACE_chatted
2133
+ $APPEND_immediately
2134
+ $REPLACE_attractive
2135
+ $REPLACE_translate
2136
+ $APPEND_Just
2137
+ $APPEND_TV
2138
+ $REPLACE_win
2139
+ $REPLACE_museum
2140
+ $REPLACE_neighborhood
2141
+ $REPLACE_Right
2142
+ $REPLACE_regular
2143
+ $REPLACE_experiences
2144
+ $APPEND_word
2145
+ $APPEND_played
2146
+ $REPLACE_hobby
2147
+ $REPLACE_developing
2148
+ $REPLACE_truly
2149
+ $APPEND_ended
2150
+ $REPLACE_issue
2151
+ $APPEND_correct
2152
+ $REPLACE_impossible
2153
+ $REPLACE_concerning
2154
+ $REPLACE_realise
2155
+ $REPLACE_brings
2156
+ $APPEND_room
2157
+ $REPLACE_advised
2158
+ $REPLACE_workplace
2159
+ $REPLACE_surfing
2160
+ $APPEND_Let
2161
+ $APPEND_daily
2162
+ $REPLACE_stomach
2163
+ $APPEND_night
2164
+ $REPLACE_meal
2165
+ $REPLACE_disadvantages
2166
+ $REPLACE_loudly
2167
+ $REPLACE_prize
2168
+ $REPLACE_besides
2169
+ $APPEND_experience
2170
+ $REPLACE_Despite
2171
+ $REPLACE_4
2172
+ $APPEND_concert
2173
+ $REPLACE_3rd
2174
+ $REPLACE_power
2175
+ $REPLACE_`
2176
+ $APPEND_lots
2177
+ $REPLACE_changes
2178
+ $REPLACE_kindergarten
2179
+ $REPLACE_sweat
2180
+ $REPLACE_ten
2181
+ $APPEND_wo
2182
+ $REPLACE_overcome
2183
+ $REPLACE_effective
2184
+ $REPLACE_terms
2185
+ $REPLACE_shown
2186
+ $REPLACE_chat
2187
+ $APPEND_team
2188
+ $REPLACE_sorry
2189
+ $APPEND_7
2190
+ $REPLACE_station
2191
+ $APPEND_man
2192
+ $REPLACE_produce
2193
+ $REPLACE_technological
2194
+ $REPLACE_differently
2195
+ $REPLACE_transferred
2196
+ $APPEND_told
2197
+ $APPEND_late
2198
+ $REPLACE_laugh
2199
+ $REPLACE_worker
2200
+ $REPLACE_space
2201
+ $REPLACE_introduced
2202
+ $REPLACE_single
2203
+ $REPLACE_cancelled
2204
+ $REPLACE_methods
2205
+ $REPLACE_transportation
2206
+ $REPLACE_Philippines
2207
+ $REPLACE_possibility
2208
+ $REPLACE_tasty
2209
+ $REPLACE_location
2210
+ $REPLACE_male
2211
+ $APPEND_simply
2212
+ $REPLACE_tastes
2213
+ $REPLACE_ease
2214
+ $REPLACE_straight
2215
+ $REPLACE_uses
2216
+ $REPLACE_participate
2217
+ $REPLACE_discover
2218
+ $APPEND_co
2219
+ $REPLACE_details
2220
+ $REPLACE_logged
2221
+ $REPLACE_bright
2222
+ $REPLACE_Once
2223
+ $REPLACE_walking
2224
+ $APPEND_spent
2225
+ $MERGE_HYPHEN
2226
+ $REPLACE_growing
2227
+ $REPLACE_slight
2228
+ $APPEND_current
2229
+ $REPLACE_moving
2230
+ $REPLACE_spring
2231
+ $REPLACE_August
2232
+ $REPLACE_fans
2233
+ $REPLACE_Well
2234
+ $APPEND_nervous
2235
+ $REPLACE_version
2236
+ $REPLACE_upset
2237
+ $REPLACE_stress
2238
+ $REPLACE_appointment
2239
+ $REPLACE_tasks
2240
+ $REPLACE_Being
2241
+ $REPLACE_encouraged
2242
+ $REPLACE_town
2243
+ $REPLACE_eight
2244
+ $REPLACE_mood
2245
+ $REPLACE_forecast
2246
+ $APPEND_lessons
2247
+ $APPEND_finished
2248
+ $REPLACE_increased
2249
+ $REPLACE_blossoms
2250
+ $REPLACE_aware
2251
+ $REPLACE_Besides
2252
+ $REPLACE_Taiwanese
2253
+ $REPLACE_someday
2254
+ $REPLACE_happening
2255
+ $REPLACE_volunteer
2256
+ $REPLACE_fireworks
2257
+ $REPLACE_ideas
2258
+ $REPLACE_curious
2259
+ $REPLACE_responsible
2260
+ $REPLACE_voice
2261
+ $REPLACE_covered
2262
+ $APPEND_ice
2263
+ $REPLACE_rang
2264
+ $REPLACE_items
2265
+ $REPLACE_apart
2266
+ $APPEND_program
2267
+ $REPLACE_bye
2268
+ $REPLACE_Next
2269
+ $REPLACE_complicated
2270
+ $REPLACE_Someone
2271
+ $APPEND_earlier
2272
+ $APPEND_difficult
2273
+ $REPLACE_invited
2274
+ $REPLACE_applied
2275
+ $APPEND_anyone
2276
+ $REPLACE_gaining
2277
+ $REPLACE_cute
2278
+ $REPLACE_line
2279
+ $REPLACE_partner
2280
+ $REPLACE_regretted
2281
+ $REPLACE_clock
2282
+ $APPEND_according
2283
+ $REPLACE_greatly
2284
+ $REPLACE_appear
2285
+ $REPLACE_opposite
2286
+ $REPLACE_Like
2287
+ $REPLACE_patient
2288
+ $REPLACE_spread
2289
+ $REPLACE_dollars
2290
+ $REPLACE_relieve
2291
+ $REPLACE_article
2292
+ $REPLACE_benefits
2293
+ $APPEND_American
2294
+ $REPLACE_Looking
2295
+ $REPLACE_Who
2296
+ $REPLACE_fix
2297
+ $REPLACE_human
2298
+ $REPLACE_technologies
2299
+ $REPLACE_breathe
2300
+ $REPLACE_strict
2301
+ $REPLACE_opinions
2302
+ $APPEND_possibly
2303
+ $REPLACE_appearance
2304
+ $REPLACE_explanation
2305
+ $REPLACE_herself
2306
+ $APPEND_student
2307
+ $REPLACE_plane
2308
+ $REPLACE_hearing
2309
+ $REPLACE_personality
2310
+ $REPLACE_attitude
2311
+ $REPLACE_journey
2312
+ $REPLACE_recover
2313
+ $REPLACE_magazine
2314
+ $REPLACE_disappeared
2315
+ $APPEND_taken
2316
+ $REPLACE_Me
2317
+ $REPLACE_efficiently
2318
+ $REPLACE_strawberries
2319
+ $APPEND_becoming
2320
+ $REPLACE_October
2321
+ $REPLACE_social
2322
+ $REPLACE_suicide
2323
+ $REPLACE_reached
2324
+ $REPLACE_damaged
2325
+ $REPLACE_personalities
2326
+ $REPLACE_valuable
2327
+ $REPLACE_height
2328
+ $REPLACE_Asian
2329
+ $REPLACE_sight
2330
+ $REPLACE_issues
2331
+ $REPLACE_titled
2332
+ $REPLACE_science
2333
+ $REPLACE_cell
2334
+ $REPLACE_amongst
2335
+ $APPEND_movies
2336
+ $REPLACE_June
2337
+ $REPLACE_policies
2338
+ $REPLACE_silent
2339
+ $REPLACE_girls
2340
+ $APPEND_company
2341
+ $APPEND_second
2342
+ $APPEND_ability
2343
+ $APPEND_hope
2344
+ $REPLACE_former
2345
+ $APPEND_GOOD
2346
+ $REPLACE_fashionable
2347
+ $REPLACE_club
2348
+ $APPEND_end
2349
+ $REPLACE_path
2350
+ $APPEND_+
2351
+ $REPLACE_top
2352
+ $APPEND_happy
2353
+ $REPLACE_lay
2354
+ $REPLACE_accident
2355
+ $REPLACE_festival
2356
+ $REPLACE_Later
2357
+ $REPLACE_destroyed
2358
+ $APPEND_plan
2359
+ $APPEND_famous
2360
+ $REPLACE_safely
2361
+ $APPEND_related
2362
+ $REPLACE_suit
2363
+ $REPLACE_stand
2364
+ $REPLACE_contrast
2365
+ $APPEND_period
2366
+ $REPLACE_highest
2367
+ $REPLACE_habits
2368
+ $APPEND_First
2369
+ $REPLACE_January
2370
+ $REPLACE_putting
2371
+ $REPLACE_grew
2372
+ $REPLACE_degrees
2373
+ $REPLACE_latter
2374
+ $REPLACE_extent
2375
+ $REPLACE_lang
2376
+ $REPLACE_episode
2377
+ $REPLACE_physically
2378
+ $APPEND_types
2379
+ $REPLACE_cooked
2380
+ $REPLACE_original
2381
+ $REPLACE_fresh
2382
+ $APPEND_world
2383
+ $REPLACE_l
2384
+ $REPLACE_Year
2385
+ $APPEND_wanted
2386
+ $REPLACE_Wednesday
2387
+ $REPLACE_unique
2388
+ $REPLACE_active
2389
+ $REPLACE_center
2390
+ $APPEND_problems
2391
+ $REPLACE_encourage
2392
+ $APPEND_8
2393
+ $REPLACE_individual
2394
+ $REPLACE_included
2395
+ $REPLACE_suggestions
2396
+ $REPLACE_sea
2397
+ $REPLACE_smoothly
2398
+ $REPLACE_headache
2399
+ $REPLACE_Was
2400
+ $REPLACE_Internet
2401
+ $REPLACE_pleasure
2402
+ $REPLACE_Thursday
2403
+ $REPLACE_board
2404
+ $REPLACE_phrases
2405
+ $REPLACE_built
2406
+ $APPEND_caused
2407
+ $REPLACE_subjects
2408
+ $APPEND_places
2409
+ $REPLACE_grammatical
2410
+ $REPLACE_suggest
2411
+ $APPEND_big
2412
+ $REPLACE_bath
2413
+ $APPEND_train
2414
+ $REPLACE_hesitant
2415
+ $APPEND_seriously
2416
+ $REPLACE_deep
2417
+ $APPEND_children
2418
+ $REPLACE_refreshed
2419
+ $APPEND_Correct
2420
+ $APPEND_yourself
2421
+ $APPEND_THE
2422
+ $REPLACE_reasonable
2423
+ $APPEND_spend
2424
+ $APPEND_skill
2425
+ $REPLACE_obvious
2426
+ $REPLACE_Friday
2427
+ $REPLACE_soup
2428
+ $REPLACE_basketball
2429
+ $REPLACE_Your
2430
+ $REPLACE_drawing
2431
+ $REPLACE_m
2432
+ $APPEND_sentences
2433
+ $REPLACE_english
2434
+ $APPEND_fell
2435
+ $REPLACE_colder
2436
+ $REPLACE_car
2437
+ $APPEND_group
2438
+ $REPLACE_receiving
2439
+ $REPLACE_sun
2440
+ $APPEND_15
2441
+ $APPEND_hot
2442
+ $APPEND_verb
2443
+ $REPLACE_technical
2444
+ $REPLACE_Through
2445
+ $APPEND_buy
2446
+ $REPLACE_route
2447
+ $REPLACE_Vietnamese
2448
+ $REPLACE_grandfather
2449
+ $REPLACE_April
2450
+ $REPLACE_lasts
2451
+ $REPLACE_environmentally
2452
+ $REPLACE_progress
2453
+ $REPLACE_telling
2454
+ $REPLACE_preparation
2455
+ $REPLACE_supermarket
2456
+ $REPLACE_Perhaps
2457
+ $REPLACE_plays
2458
+ $REPLACE_driver
2459
+ $REPLACE_anyway
2460
+ $APPEND_within
2461
+ $REPLACE_Vietnam
2462
+ $REPLACE_green
2463
+ $REPLACE_access
2464
+ $APPEND_t
2465
+ $REPLACE_concerns
2466
+ $REPLACE_laptop
2467
+ $APPEND_eventually
2468
+ $REPLACE_fried
2469
+ $REPLACE_pieces
2470
+ $REPLACE_security
2471
+ $REPLACE_condition
2472
+ $REPLACE_dreams
2473
+ $REPLACE_reminded
2474
+ $REPLACE_December
2475
+ $REPLACE_finding
2476
+ $REPLACE_produced
2477
+ $REPLACE_broken
2478
+ $REPLACE_raising
2479
+ $REPLACE_specific
2480
+ $REPLACE_humid
2481
+ $APPEND_reason
2482
+ $REPLACE_programming
2483
+ $REPLACE_brush
2484
+ $REPLACE_powerful
2485
+ $REPLACE_shape
2486
+ $REPLACE_involves
2487
+ $APPEND_summer
2488
+ $REPLACE_kinds
2489
+ $APPEND_eat
2490
+ $REPLACE_market
2491
+ $REPLACE_Introducing
2492
+ $APPEND_kept
2493
+ $APPEND_information
2494
+ $REPLACE_Filipino
2495
+ $REPLACE_hang
2496
+ $REPLACE_nature
2497
+ $REPLACE_stood
2498
+ $REPLACE_oldest
2499
+ $APPEND_books
2500
+ $APPEND_top
2501
+ $REPLACE_physical
2502
+ $REPLACE_Thai
2503
+ $REPLACE_effort
2504
+ $REPLACE_U
2505
+ $APPEND_phone
2506
+ $REPLACE_author
2507
+ $REPLACE_imagined
2508
+ $REPLACE_request
2509
+ $REPLACE_Australian
2510
+ $REPLACE_didn
2511
+ $REPLACE_Something
2512
+ $REPLACE_translator
2513
+ $REPLACE_text
2514
+ $APPEND_account
2515
+ $REPLACE_protect
2516
+ $REPLACE_resources
2517
+ $REPLACE_Additionally
2518
+ $APPEND_afterwards
2519
+ $APPEND_Should
2520
+ $REPLACE_awhile
2521
+ $REPLACE_meanings
2522
+ $APPEND_pictures
2523
+ $REPLACE_benefit
2524
+ $REPLACE_exist
2525
+ $REPLACE_connection
2526
+ $REPLACE_impression
2527
+ $APPEND_meeting
2528
+ $REPLACE_electrical
2529
+ $APPEND_style
2530
+ $REPLACE_larger
2531
+ $REPLACE_hotter
2532
+ $REPLACE_foot
2533
+ $APPEND_further
2534
+ $REPLACE_described
2535
+ $REPLACE_note
2536
+ $REPLACE_football
2537
+ $APPEND_ourselves
2538
+ $REPLACE_searched
2539
+ $REPLACE_temporary
2540
+ $REPLACE_semester
2541
+ $REPLACE_announced
2542
+ $REPLACE_Suddenly
2543
+ $APPEND_others
2544
+ $APPEND_goes
2545
+ $REPLACE_sort
2546
+ $REPLACE_itself
2547
+ $REPLACE_rich
2548
+ $APPEND_song
2549
+ $REPLACE_memorable
2550
+ $REPLACE_Europe
2551
+ $REPLACE_features
2552
+ $REPLACE_apply
2553
+ $REPLACE_celebrated
2554
+ $REPLACE_delivery
2555
+ $REPLACE_winter
2556
+ $REPLACE_miss
2557
+ $REPLACE_application
2558
+ $APPEND_onwards
2559
+ $REPLACE_population
2560
+ $REPLACE_failure
2561
+ $REPLACE_lazy
2562
+ $REPLACE_scored
2563
+ $REPLACE_November
2564
+ $APPEND_travel
2565
+ $REPLACE_Let
2566
+ $REPLACE_alcoholic
2567
+ $REPLACE_disappointment
2568
+ $REPLACE_severe
2569
+ $REPLACE_effect
2570
+ $REPLACE_speed
2571
+ $APPEND_How
2572
+ $APPEND_sounds
2573
+ $REPLACE_cooler
2574
+ $REPLACE_'cause
2575
+ $APPEND_mean
2576
+ $REPLACE_divided
2577
+ $REPLACE_ha
2578
+ $REPLACE_9
2579
+ $REPLACE_advantages
2580
+ $APPEND_call
2581
+ $REPLACE_21st
2582
+ $REPLACE_fit
2583
+ $REPLACE_lit
2584
+ $REPLACE_directly
2585
+ $REPLACE_videos
2586
+ $REPLACE_pressure
2587
+ $REPLACE_pursue
2588
+ $REPLACE_forgotten
2589
+ $REPLACE_industry
2590
+ $REPLACE_Speaking
2591
+ $APPEND_enjoy
2592
+ $REPLACE_Should
2593
+ $REPLACE_grown
2594
+ $REPLACE_participated
2595
+ $REPLACE_treat
2596
+ $REPLACE_expression
2597
+ $REPLACE_fly
2598
+ $REPLACE_tall
2599
+ $REPLACE_situations
2600
+ $REPLACE_host
2601
+ $REPLACE_visitors
2602
+ $APPEND_hear
2603
+ $REPLACE_Instead
2604
+ $REPLACE_agreed
2605
+ $REPLACE_affects
2606
+ $REPLACE_drew
2607
+ $REPLACE_spending
2608
+ $REPLACE_huge
2609
+ $REPLACE_ill
2610
+ $REPLACE_tradition
2611
+ $REPLACE_argue
2612
+ $REPLACE_turns
2613
+ $REPLACE_ground
2614
+ $REPLACE_sometime
2615
+ $REPLACE_Italy
2616
+ $APPEND_works
2617
+ $REPLACE_likely
2618
+ $REPLACE_Madam
2619
+ $APPEND_questions
2620
+ $REPLACE_ceremonies
2621
+ $APPEND_turn
2622
+ $APPEND_Korean
2623
+ $REPLACE_gradually
2624
+ $REPLACE_financial
2625
+ $REPLACE_involved
2626
+ $REPLACE_throw
2627
+ $REPLACE_advertising
2628
+ $REPLACE_tend
2629
+ $REPLACE_characteristics
2630
+ $APPEND_among
2631
+ $REPLACE_electric
2632
+ $REPLACE_sister
2633
+ $APPEND_car
2634
+ $REPLACE_fantastic
2635
+ $REPLACE_examination
2636
+ $APPEND_city
2637
+ $REPLACE_eaten
2638
+ $REPLACE_film
2639
+ $APPEND_small
2640
+ $REPLACE_players
2641
+ $REPLACE_stores
2642
+ $REPLACE_machine
2643
+ $REPLACE_managed
2644
+ $REPLACE_tour
2645
+ $APPEND_video
2646
+ $REPLACE_journals
2647
+ $REPLACE_guys
2648
+ $APPEND_meet
2649
+ $REPLACE_deeply
2650
+ $REPLACE_floor
2651
+ $REPLACE_keeps
2652
+ $REPLACE_talks
2653
+ $REPLACE_focusing
2654
+ $REPLACE_mysterious
2655
+ $APPEND_less
2656
+ $REPLACE_rice
2657
+ $REPLACE_recovered
2658
+ $REPLACE_injured
2659
+ $REPLACE_poorly
2660
+ $REPLACE_comedy
2661
+ $REPLACE_cigarettes
2662
+ $REPLACE_anime
2663
+ $REPLACE_influence
2664
+ $REPLACE_Eventually
2665
+ $REPLACE_offered
2666
+ $REPLACE_sale
2667
+ $REPLACE_effectively
2668
+ $REPLACE_disappointing
2669
+ $REPLACE_illness
2670
+ $REPLACE_comments
2671
+ $APPEND_talk
2672
+ $REPLACE_contains
2673
+ $APPEND_People
2674
+ $APPEND_power
2675
+ $REPLACE_31st
2676
+ $REPLACE_distance
2677
+ $REPLACE_appears
2678
+ $REPLACE_importance
2679
+ $REPLACE_choosing
2680
+ $APPEND_interesting
2681
+ $REPLACE_snow
2682
+ $APPEND_o
2683
+ $REPLACE_tennis
2684
+ $REPLACE_continues
2685
+ $REPLACE_dress
2686
+ $REPLACE_percent
2687
+ $REPLACE_size
2688
+ $REPLACE_dictionaries
2689
+ $APPEND_seems
2690
+ $REPLACE_fever
2691
+ $APPEND_etc
2692
+ $APPEND_Though
2693
+ $REPLACE_whereas
2694
+ $APPEND_several
2695
+ $APPEND_far
2696
+ $APPEND_classes
2697
+ $APPEND_public
2698
+ $REPLACE_traffic
2699
+ $REPLACE_damage
2700
+ $APPEND_nothing
2701
+ $REPLACE_worthwhile
2702
+ $REPLACE_appreciated
2703
+ $REPLACE_articles
2704
+ $APPEND_begin
2705
+ $APPEND_needed
2706
+ $REPLACE_recommendations
2707
+ $REPLACE_don
2708
+ $REPLACE_buildings
2709
+ $APPEND_four
2710
+ $REPLACE_jealous
2711
+ $REPLACE_seminar
2712
+ $APPEND_gradually
2713
+ $REPLACE_complaints
2714
+ $REPLACE_Nothing
2715
+ $REPLACE_advance
2716
+ $REPLACE_flowers
2717
+ $APPEND_Starting
2718
+ $REPLACE_beyond
2719
+ $REPLACE_advertised
2720
+ $APPEND_mainly
2721
+ $APPEND_possible
2722
+ $REPLACE_suffering
2723
+ $APPEND_12
2724
+ $REPLACE_Hopefully
2725
+ $APPEND_countries
2726
+ $APPEND_similar
2727
+ $REPLACE_quick
2728
+ $REPLACE_general
2729
+ $REPLACE_successfully
2730
+ $REPLACE_dark
2731
+ $REPLACE_unbelievable
2732
+ $REPLACE_causing
2733
+ $REPLACE_13th
2734
+ $REPLACE_unexpected
2735
+ $REPLACE_begins
2736
+ $REPLACE_tea
2737
+ $REPLACE_Sunday
2738
+ $APPEND_somewhere
2739
+ $REPLACE_digital
2740
+ $APPEND_stories
2741
+ $APPEND_idea
2742
+ $APPEND_tired
2743
+ $APPEND_family
2744
+ $REPLACE_animation
2745
+ $REPLACE_shot
2746
+ $REPLACE_Or
2747
+ $APPEND_managed
2748
+ $REPLACE_bus
2749
+ $APPEND_close
2750
+ $REPLACE_disease
2751
+ $REPLACE_desire
2752
+ $REPLACE_carried
2753
+ $REPLACE_disappear
2754
+ $REPLACE_essential
2755
+ $APPEND_news
2756
+ $REPLACE_forced
2757
+ $REPLACE_fault
2758
+ $REPLACE_translation
2759
+ $REPLACE_television
2760
+ $REPLACE_cried
2761
+ $REPLACE_freely
2762
+ $REPLACE_Valentine
2763
+ $REPLACE_somewhat
2764
+ $REPLACE_operation
2765
+ $REPLACE_conversational
2766
+ $APPEND_absolutely
2767
+ $APPEND_properly
2768
+ $REPLACE_sites
2769
+ $REPLACE_allergies
2770
+ $REPLACE_salary
2771
+ $REPLACE_rise
2772
+ $REPLACE_entertainment
2773
+ $REPLACE_kitchen
2774
+ $REPLACE_emotional
2775
+ $REPLACE_McDonalds
2776
+ $REPLACE_extra
2777
+ $APPEND_nearby
2778
+ $REPLACE_mention
2779
+ $APPEND_Here
2780
+ $APPEND_nice
2781
+ $APPEND_college
2782
+ $APPEND_Before
2783
+ $APPEND_form
2784
+ $REPLACE_likes
2785
+ $APPEND_turned
2786
+ $REPLACE_rent
2787
+ $REPLACE_tourists
2788
+ $REPLACE_unknown
2789
+ $REPLACE_actors
2790
+ $APPEND_longer
2791
+ $REPLACE_fill
2792
+ $REPLACE_Nobody
2793
+ $REPLACE_Singapore
2794
+ $REPLACE_helping
2795
+ $REPLACE_exercises
2796
+ $APPEND_real
2797
+ $APPEND_located
2798
+ $APPEND_received
2799
+ $APPEND_gets
2800
+ $APPEND_bad
2801
+ $REPLACE_doubt
2802
+ $REPLACE_sweaty
2803
+ $REPLACE_prefecture
2804
+ $REPLACE_audience
2805
+ $REPLACE_sports
2806
+ $REPLACE_minute
2807
+ $REPLACE_product
2808
+ $REPLACE_buying
2809
+ $REPLACE_exact
2810
+ $REPLACE_temporarily
2811
+ $REPLACE_Avatar
2812
+ $REPLACE_Skype
2813
+ $REPLACE_discussion
2814
+ $REPLACE_item
2815
+ $REPLACE_gon
2816
+ $REPLACE_accessories
2817
+ $REPLACE_incredibly
2818
+ $REPLACE_Where
2819
+ $REPLACE_World
2820
+ $REPLACE_advantage
2821
+ $REPLACE_ridiculous
2822
+ $REPLACE_wherever
2823
+ $REPLACE_shook
2824
+ $REPLACE_global
2825
+ $REPLACE_entitled
2826
+ $REPLACE_Working
2827
+ $APPEND_hours
2828
+ $REPLACE_Starbucks
2829
+ $REPLACE_routine
2830
+ $REPLACE_flavored
2831
+ $APPEND_item
2832
+ $REPLACE_techniques
2833
+ $REPLACE_creates
2834
+ $REPLACE_peace
2835
+ $REPLACE_annoyed
2836
+ $REPLACE_rate
2837
+ $REPLACE_September
2838
+ $REPLACE_Russian
2839
+ $REPLACE_assistant
2840
+ $REPLACE_plenty
2841
+ $REPLACE_local
2842
+ $APPEND_store
2843
+ $REPLACE_sooner
2844
+ $REPLACE_overslept
2845
+ $REPLACE_Everybody
2846
+ $REPLACE_selling
2847
+ $REPLACE_negative
2848
+ $REPLACE_setting
2849
+ $APPEND_helps
2850
+ $REPLACE_lecture
2851
+ $APPEND_happen
2852
+ $REPLACE_survive
2853
+ $REPLACE_art
2854
+ $APPEND_certainly
2855
+ $APPEND_fully
2856
+ $APPEND_above
2857
+ $REPLACE_speaks
2858
+ $REPLACE_asking
2859
+ $REPLACE_economical
2860
+ $REPLACE_salaries
2861
+ $APPEND_clearly
2862
+ $REPLACE_mail
2863
+ $REPLACE_holding
2864
+ $REPLACE_organise
2865
+ $REPLACE_efficient
2866
+ $APPEND_name
2867
+ $REPLACE_constantly
2868
+ $REPLACE_overtime
2869
+ $REPLACE_grandma
2870
+ $REPLACE_returning
2871
+ $REPLACE_laziness
2872
+ $REPLACE_importantly
2873
+ $APPEND_true
2874
+ $APPEND_series
2875
+ $REPLACE_converse
2876
+ $APPEND_session
2877
+ $REPLACE_sugar
2878
+ $APPEND_Currently
2879
+ $REPLACE_mentally
2880
+ $APPEND_starts
2881
+ $REPLACE_theater
2882
+ $APPEND_tonight
2883
+ $REPLACE_succeeded
2884
+ $REPLACE_awful
2885
+ $REPLACE_political
2886
+ $APPEND_important
2887
+ $REPLACE_log
2888
+ $REPLACE_awesome
2889
+ $REPLACE_00
2890
+ $APPEND_Did
2891
+ $REPLACE_announcement
2892
+ $REPLACE_addicted
2893
+ $REPLACE_disaster
2894
+ $REPLACE_page
2895
+ $REPLACE_blossom
2896
+ $REPLACE_stars
2897
+ $REPLACE_presentation
2898
+ $REPLACE_Nevertheless
2899
+ $APPEND_talking
2900
+ $APPEND_Instead
2901
+ $APPEND_Chinese
2902
+ $REPLACE_Festival
2903
+ $REPLACE_reasonably
2904
+ $APPEND_someday
2905
+ $REPLACE_expressions
2906
+ $APPEND_Lately
2907
+ $REPLACE_average
2908
+ $APPEND_season
2909
+ $REPLACE_cover
2910
+ $REPLACE_manager
2911
+ $REPLACE_wife
2912
+ $REPLACE_12
2913
+ $REPLACE_possibly
2914
+ $REPLACE_approaching
2915
+ $REPLACE_keeping
2916
+ $REPLACE_motorcycle
2917
+ $REPLACE_happily
2918
+ $APPEND_items
2919
+ $REPLACE_cherry
2920
+ $REPLACE_shall
2921
+ $REPLACE_determined
2922
+ $REPLACE_cheerful
2923
+ $REPLACE_ahead
2924
+ $REPLACE_solution
2925
+ $REPLACE_patients
2926
+ $REPLACE_unforgettable
2927
+ $REPLACE_decreasing
2928
+ $REPLACE_laid
2929
+ $REPLACE_arrange
2930
+ $REPLACE_content
2931
+ $REPLACE_starring
2932
+ $REPLACE_opening
2933
+ $REPLACE_continuing
2934
+ $REPLACE_bloom
2935
+ $REPLACE_concern
2936
+ $APPEND_towards
2937
+ $REPLACE_extreme
2938
+ $APPEND_Will
2939
+ $REPLACE_tests
2940
+ $REPLACE_replace
2941
+ $APPEND_mostly
2942
+ $REPLACE_inform
2943
+ $REPLACE_lying
2944
+ $REPLACE_barely
2945
+ $REPLACE_unpleasant
2946
+ $REPLACE_brand
2947
+ $REPLACE_turning
2948
+ $REPLACE_added
2949
+ $APPEND_age
2950
+ $REPLACE_wide
2951
+ $REPLACE_passing
2952
+ $REPLACE_production
2953
+ $REPLACE_23rd
2954
+ $REPLACE_ramen
2955
+ $REPLACE_occasionally
2956
+ $REPLACE_borrowed
2957
+ $REPLACE_comparison
2958
+ $REPLACE_curry
2959
+ $REPLACE_upcoming
2960
+ $REPLACE_begun
2961
+ $APPEND_mistakes
2962
+ $REPLACE_mouth
2963
+ $REPLACE_scenes
2964
+ $REPLACE_accidentally
2965
+ $REPLACE_gases
2966
+ $REPLACE_blog
2967
+ $REPLACE_Disney
2968
+ $APPEND_straight
2969
+ $REPLACE_topics
2970
+ $REPLACE_register
2971
+ $REPLACE_color
2972
+ $REPLACE_explained
2973
+ $APPEND_shopping
2974
+ $REPLACE_Taiwan
2975
+ $REPLACE_sales
2976
+ $REPLACE_dictionary
2977
+ $REPLACE_inexpensive
2978
+ $APPEND_directly
2979
+ $REPLACE_comfortably
2980
+ $REPLACE_suprised
2981
+ $APPEND_AM
2982
+ $REPLACE_dance
2983
+ $REPLACE_eager
2984
+ $REPLACE_envious
2985
+ $REPLACE_lie
2986
+ $REPLACE_Apart
2987
+ $REPLACE_closed
2988
+ $REPLACE_brother
2989
+ $REPLACE_hopefully
2990
+ $APPEND_caught
2991
+ $REPLACE_background
2992
+ $REPLACE_conditions
2993
+ $REPLACE_attracted
2994
+ $REPLACE_aim
2995
+ $REPLACE_twenty
2996
+ $REPLACE_Each
2997
+ $APPEND_air
2998
+ $REPLACE_technique
2999
+ $REPLACE_umbrella
3000
+ $REPLACE_Buddhist
3001
+ $REPLACE_yen
3002
+ $APPEND_clothes
3003
+ $APPEND_open
3004
+ $REPLACE_originally
3005
+ $APPEND_OK
3006
+ $REPLACE_complex
3007
+ $APPEND_upon
3008
+ $REPLACE_<
3009
+ $REPLACE_showing
3010
+ $REPLACE_weaknesses
3011
+ $REPLACE_OR
3012
+ $REPLACE_potato
3013
+ $APPEND_photo
3014
+ $REPLACE_flavor
3015
+ $REPLACE_Tuesday
3016
+ $REPLACE_organized
3017
+ $REPLACE_preferred
3018
+ $REPLACE_state
3019
+ $APPEND_normally
3020
+ $APPEND_areas
3021
+ $REPLACE_arranged
3022
+ $REPLACE_embarrassing
3023
+ $REPLACE_positively
3024
+ $REPLACE_coworkers
3025
+ $APPEND_host
3026
+ $REPLACE_influenced
3027
+ $REPLACE_respect
3028
+ $REPLACE_separate
3029
+ $REPLACE_comedies
3030
+ $APPEND_listened
3031
+ $REPLACE_report
3032
+ $REPLACE_Using
3033
+ $REPLACE_performing
3034
+ $REPLACE_construction
3035
+ $REPLACE_trees
3036
+ $REPLACE_conversations
3037
+ $REPLACE_western
3038
+ $APPEND_drinking
3039
+ $APPEND_Next
3040
+ $APPEND_points
3041
+ $APPEND_young
3042
+ $REPLACE_provides
3043
+ $REPLACE_motivation
3044
+ $REPLACE_muscle
3045
+ $REPLACE_diet
3046
+ $APPEND_fluently
3047
+ $REPLACE_Such
3048
+ $REPLACE_task
3049
+ $REPLACE_sounded
3050
+ $REPLACE_schools
3051
+ $REPLACE_park
3052
+ $APPEND_various
3053
+ $APPEND_five
3054
+ $REPLACE_unhappy
3055
+ $REPLACE_Due
3056
+ $REPLACE_alright
3057
+ $REPLACE_campus
3058
+ $APPEND_foreign
3059
+ $APPEND_studies
3060
+ $REPLACE_handle
3061
+ $REPLACE_continuous
3062
+ $REPLACE_drug
3063
+ $REPLACE_expenses
3064
+ $REPLACE_aged
3065
+ $REPLACE_surrounded
3066
+ $REPLACE_thus
3067
+ $REPLACE_noise
3068
+ $REPLACE_healthier
3069
+ $REPLACE_potential
3070
+ $REPLACE_Potter
3071
+ $APPEND_self
3072
+ $APPEND_picture
3073
+ $REPLACE_None
3074
+ $REPLACE_sudden
3075
+ $REPLACE_lifestyles
3076
+ $APPEND_given
3077
+ $REPLACE_aspects
3078
+ $REPLACE_specifically
3079
+ $REPLACE_destination
3080
+ $REPLACE_followed
3081
+ $REPLACE_Other
3082
+ $REPLACE_horrible
3083
+ $REPLACE_radiation
3084
+ $REPLACE_essays
3085
+ $REPLACE_apologize
3086
+ $REPLACE_placed
3087
+ $APPEND_future
3088
+ $REPLACE_awkward
3089
+ $REPLACE_thirty
3090
+ $REPLACE_kids
3091
+ $REPLACE_responsibilities
3092
+ $REPLACE_Generally
3093
+ $REPLACE_relatives
3094
+ $APPEND_More
3095
+ $REPLACE_safer
3096
+ $REPLACE_hoping
3097
+ $REPLACE_heroes
3098
+ $REPLACE_psychological
3099
+ $REPLACE_posted
3100
+ $REPLACE_treatment
3101
+ $REPLACE_glasses
3102
+ $REPLACE_souvenirs
3103
+ $REPLACE_entertaining
3104
+ $APPEND_Tomorrow
3105
+ $APPEND_activities
3106
+ $REPLACE_serve
3107
+ $REPLACE_actions
3108
+ $APPEND_teacher
3109
+ $REPLACE_o
3110
+ $REPLACE_forever
3111
+ $REPLACE_colour
3112
+ $APPEND_change
3113
+ $REPLACE_plants
3114
+ $REPLACE_fulfill
3115
+ $REPLACE_animated
3116
+ $REPLACE_textbook
3117
+ $REPLACE_mathematics
3118
+ $REPLACE_figured
3119
+ $APPEND_running
3120
+ $REPLACE_computers
3121
+ $REPLACE_Singaporean
3122
+ $REPLACE_imagination
3123
+ $REPLACE_runny
3124
+ $REPLACE_bill
3125
+ $REPLACE_meals
3126
+ $APPEND_perhaps
3127
+ $REPLACE_stupid
3128
+ $REPLACE_worries
3129
+ $APPEND_bought
3130
+ $APPEND_article
3131
+ $REPLACE_wasted
3132
+ $REPLACE_falling
3133
+ $REPLACE_necessity
3134
+ $APPEND_common
3135
+ $REPLACE_Tokyo
3136
+ $REPLACE_fascinating
3137
+ $REPLACE_Only
3138
+ $REPLACE_tense
3139
+ $APPEND_Ever
3140
+ $REPLACE_behaviour
3141
+ $REPLACE_magazines
3142
+ $REPLACE_cultures
3143
+ $REPLACE_rid
3144
+ $REPLACE_choices
3145
+ $REPLACE_track
3146
+ $REPLACE_complaint
3147
+ $REPLACE_white
3148
+ $REPLACE_approximately
3149
+ $REPLACE_largest
3150
+ $REPLACE_smart
3151
+ $APPEND_finish
3152
+ $REPLACE_acting
3153
+ $REPLACE_foolish
3154
+ $REPLACE_prices
3155
+ $REPLACE_r
3156
+ $REPLACE_swim
3157
+ $REPLACE_review
3158
+ $REPLACE_shameful
3159
+ $APPEND_Anyway
3160
+ $REPLACE_senior
3161
+ $REPLACE_proper
3162
+ $REPLACE_provided
3163
+ $REPLACE_troublesome
3164
+ $APPEND_known
3165
+ $REPLACE_homes
3166
+ $REPLACE_spirit
3167
+ $REPLACE_ga
3168
+ $REPLACE_Michael
3169
+ $APPEND_wish
3170
+ $APPEND_performance
3171
+ $REPLACE_typical
3172
+ $APPEND_Well
3173
+ $REPLACE_previously
3174
+ $REPLACE_fail
3175
+ $APPEND_itself
3176
+ $REPLACE_sung
3177
+ $REPLACE_citizens
3178
+ $REPLACE_rapidly
3179
+ $REPLACE_stadium
3180
+ $APPEND_page
3181
+ $APPEND_unfortunately
3182
+ $REPLACE_surprising
3183
+ $REPLACE_unfamiliar
3184
+ $REPLACE_repair
3185
+ $REPLACE_escape
3186
+ $REPLACE_actor
3187
+ $REPLACE_Almost
3188
+ $REPLACE_shoes
3189
+ $REPLACE_disagree
3190
+ $REPLACE_co
3191
+ $REPLACE_attempt
3192
+ $REPLACE_instance
3193
+ $REPLACE_lasted
3194
+ $APPEND_connect
3195
+ $APPEND_San
3196
+ $REPLACE_hairstyle
3197
+ $REPLACE_internship
3198
+ $REPLACE_Bye
3199
+ $REPLACE_tourist
3200
+ $REPLACE_5th
3201
+ $REPLACE_cousin
3202
+ $REPLACE_beside
3203
+ $REPLACE_facilities
3204
+ $REPLACE_yummy
3205
+ $REPLACE_prove
3206
+ $APPEND_certain
3207
+ $REPLACE_beginners
3208
+ $REPLACE_worn
3209
+ $REPLACE_wont
3210
+ $APPEND_wearing
3211
+ $REPLACE_improves
3212
+ $REPLACE_electronics
3213
+ $REPLACE_realistic
3214
+ $REPLACE_annoying
3215
+ $REPLACE_dreamed
3216
+ $APPEND_results
3217
+ $REPLACE_certainly
3218
+ $REPLACE_classroom
3219
+ $REPLACE_argument
3220
+ $REPLACE_warmth
3221
+ $REPLACE_achieved
3222
+ $APPEND_meaning
3223
+ $REPLACE_photographs
3224
+ $REPLACE_animals
3225
+ $REPLACE_community
3226
+ $REPLACE_interests
3227
+ $REPLACE_medium
3228
+ $REPLACE_beer
3229
+ $REPLACE_engineer
3230
+ $REPLACE_Good
3231
+ $APPEND_looks
3232
+ $REPLACE_beauty
3233
+ $APPEND_speaker
3234
+ $REPLACE_connect
3235
+ $APPEND_driving
3236
+ $APPEND_Have
3237
+ $REPLACE_reminds
3238
+ $REPLACE_apologized
3239
+ $REPLACE_obtain
3240
+ $REPLACE_Probably
3241
+ $REPLACE_strangers
3242
+ $APPEND_bring
3243
+ $REPLACE_smile
3244
+ $REPLACE_exhibition
3245
+ $REPLACE_pot
3246
+ $REPLACE_encounter
3247
+ $APPEND_degrees
3248
+ $REPLACE_lights
3249
+ $APPEND_bus
3250
+ $REPLACE_movement
3251
+ $REPLACE_cancel
3252
+ $REPLACE_y
3253
+ $REPLACE_black
3254
+ $REPLACE_concentration
3255
+ $REPLACE_graduating
3256
+ $REPLACE_usage
3257
+ $REPLACE_handsome
3258
+ $APPEND_ride
3259
+ $REPLACE_degree
3260
+ $APPEND_point
3261
+ $APPEND_conversation
3262
+ $REPLACE_menu
3263
+ $REPLACE_assistance
3264
+ $REPLACE_Summer
3265
+ $APPEND_behind
3266
+ $REPLACE_police
3267
+ $REPLACE_15th
3268
+ $REPLACE_separated
3269
+ $REPLACE_regardless
3270
+ $REPLACE_significant
3271
+ $REPLACE_transfer
3272
+ $REPLACE_religious
3273
+ $REPLACE_tempura
3274
+ $REPLACE_academic
3275
+ $REPLACE_otherwise
3276
+ $REPLACE_useless
3277
+ $REPLACE_celebrities
3278
+ $REPLACE_dislike
3279
+ $REPLACE_11
3280
+ $APPEND_sound
3281
+ $APPEND_^
3282
+ $REPLACE_replaced
3283
+ $REPLACE_sushi
3284
+ $REPLACE_wind
3285
+ $REPLACE_web
3286
+ $REPLACE_Britain
3287
+ $REPLACE_complained
3288
+ $REPLACE_model
3289
+ $REPLACE_de
3290
+ $REPLACE_depends
3291
+ $REPLACE_pm
3292
+ $REPLACE_cafe
3293
+ $REPLACE_congratulate
3294
+ $REPLACE_ending
3295
+ $APPEND_building
3296
+ $REPLACE_presented
3297
+ $REPLACE_shut
3298
+ $APPEND_restaurant
3299
+ $APPEND_March
3300
+ $REPLACE_freedom
3301
+ $APPEND_story
3302
+ $REPLACE_creating
3303
+ $REPLACE_concept
3304
+ $REPLACE_conduct
3305
+ $REPLACE_France
3306
+ $REPLACE_paper
3307
+ $REPLACE_offers
3308
+ $REPLACE_Oh
3309
+ $REPLACE_occured
3310
+ $REPLACE_touched
3311
+ $REPLACE_travelled
3312
+ $APPEND_Thus
3313
+ $REPLACE_sickness
3314
+ $REPLACE_neighbor
3315
+ $REPLACE_paying
3316
+ $REPLACE_national
3317
+ $APPEND_needs
3318
+ $REPLACE_climb
3319
+ $REPLACE_Take
3320
+ $APPEND_Everyone
3321
+ $REPLACE_aftershocks
3322
+ $REPLACE_committed
3323
+ $REPLACE_textbooks
3324
+ $REPLACE_waited
3325
+ $REPLACE_round
3326
+ $REPLACE_Okay
3327
+ $REPLACE_eldest
3328
+ $APPEND_allow
3329
+ $REPLACE_Spanish
3330
+ $REPLACE_Spring
3331
+ $REPLACE_absence
3332
+ $REPLACE_actresses
3333
+ $REPLACE_majority
3334
+ $REPLACE_growth
3335
+ $APPEND_requires
3336
+ $REPLACE_About
3337
+ $REPLACE_intend
3338
+ $APPEND_deep
3339
+ $REPLACE_enjoyment
3340
+ $APPEND_raining
3341
+ $REPLACE_Am
3342
+ $REPLACE_eyes
3343
+ $REPLACE_Afterward
3344
+ $REPLACE_drugs
3345
+ $REPLACE_cram
3346
+ $REPLACE_dancing
3347
+ $APPEND_M
3348
+ $REPLACE_nationalities
3349
+ $REPLACE_throat
3350
+ $APPEND_shows
3351
+ $REPLACE_Facebook
3352
+ $APPEND_TO
3353
+ $REPLACE_brilliant
3354
+ $REPLACE_drop
3355
+ $REPLACE_owner
3356
+ $APPEND_side
3357
+ $REPLACE_struggling
3358
+ $REPLACE_100
3359
+ $REPLACE_surely
3360
+ $REPLACE_devices
3361
+ $APPEND_takes
3362
+ $REPLACE_TO
3363
+ $REPLACE_neighbors
3364
+ $REPLACE_youth
3365
+ $REPLACE_connected
3366
+ $REPLACE_passes
3367
+ $REPLACE_kilometers
3368
+ $APPEND_fun
3369
+ $REPLACE_viewing
3370
+ $REPLACE_behavior
3371
+ $REPLACE_chores
3372
+ $REPLACE_mystery
3373
+ $APPEND_shall
3374
+ $APPEND_taught
3375
+ $REPLACE_display
3376
+ $REPLACE_ensure
3377
+ $APPEND_online
3378
+ $REPLACE_assignment
3379
+ $REPLACE_compare
3380
+ $APPEND_Still
3381
+ $REPLACE_conditioning
3382
+ $REPLACE_suffered
3383
+ $REPLACE_haven't
3384
+ $REPLACE_muscles
3385
+ $APPEND_grammar
3386
+ $APPEND_Two
3387
+ $REPLACE_chemistry
3388
+ $REPLACE_consideration
3389
+ $REPLACE_smoking
3390
+ $REPLACE_Harry
3391
+ $APPEND_seemed
3392
+ $REPLACE_marry
3393
+ $REPLACE_hunting
3394
+ $REPLACE_recommendation
3395
+ $APPEND_previously
3396
+ $REPLACE_dramas
3397
+ $REPLACE_passionate
3398
+ $APPEND_ways
3399
+ $REPLACE_hurts
3400
+ $APPEND_sense
3401
+ $APPEND_drink
3402
+ $REPLACE_refrigerator
3403
+ $REPLACE_organised
3404
+ $REPLACE_cleaning
3405
+ $REPLACE_courage
3406
+ $APPEND_arrived
3407
+ $REPLACE_housework
3408
+ $REPLACE_charge
3409
+ $REPLACE_violin
3410
+ $APPEND_offer
3411
+ $APPEND_water
3412
+ $REPLACE_injuries
3413
+ $REPLACE_perspective
3414
+ $REPLACE_hoped
3415
+ $REPLACE_challenging
3416
+ $REPLACE_THE
3417
+ $APPEND_regarding
3418
+ $APPEND_Their
3419
+ $REPLACE_upload
3420
+ $REPLACE_luxurious
3421
+ $REPLACE_unnecessary
3422
+ $APPEND_harder
3423
+ $APPEND_twice
3424
+ $REPLACE_rules
3425
+ $APPEND_rest
3426
+ $REPLACE_afford
3427
+ $APPEND_says
3428
+ $REPLACE_project
3429
+ $REPLACE_bear
3430
+ $REPLACE_mainly
3431
+ $REPLACE_Yet
3432
+ $REPLACE_diligently
3433
+ $REPLACE_led
3434
+ $REPLACE_architecture
3435
+ $REPLACE_accurate
3436
+ $REPLACE_mindset
3437
+ $REPLACE_fought
3438
+ $REPLACE_mid
3439
+ $REPLACE_vocalist
3440
+ $REPLACE_flexible
3441
+ $APPEND_girl
3442
+ $REPLACE_tiring
3443
+ $REPLACE_broadcast
3444
+ $REPLACE_July
3445
+ $APPEND_version
3446
+ $REPLACE_seven
3447
+ $REPLACE_Nice
3448
+ $REPLACE_alarm
3449
+ $APPEND_dish
3450
+ $REPLACE_jewelry
3451
+ $REPLACE_studing
3452
+ $REPLACE_cuisine
3453
+ $APPEND_According
3454
+ $APPEND_delicious
3455
+ $REPLACE_ladies
3456
+ $REPLACE_hospital
3457
+ $REPLACE_sweating
3458
+ $REPLACE_obviously
3459
+ $APPEND_interested
3460
+ $REPLACE_College
3461
+ $REPLACE_Autumn
3462
+ $REPLACE_Hawaii
3463
+ $REPLACE_scheduled
3464
+ $REPLACE_crying
3465
+ $REPLACE_climbing
3466
+ $APPEND_giving
3467
+ $REPLACE_smoke
3468
+ $APPEND_9
3469
+ $REPLACE_limit
3470
+ $REPLACE_flying
3471
+ $APPEND_knowledge
3472
+ $REPLACE_4th
3473
+ $REPLACE_Francisco
3474
+ $REPLACE_tournament
3475
+ $APPEND_sleep
3476
+ $REPLACE_participants
3477
+ $REPLACE_snacks
3478
+ $REPLACE_energetic
3479
+ $REPLACE_allergic
3480
+ $APPEND_fast
3481
+ $APPEND_score
3482
+ $REPLACE_clearer
3483
+ $APPEND_source
3484
+ $REPLACE_lottery
3485
+ $APPEND_service
3486
+ $REPLACE_acquire
3487
+ $REPLACE_arrival
3488
+ $APPEND_situation
3489
+ $REPLACE_polite
3490
+ $REPLACE_laughter
3491
+ $REPLACE_Thirdly
3492
+ $APPEND_particular
3493
+ $REPLACE_standard
3494
+ $REPLACE_suppose
3495
+ $REPLACE_emails
3496
+ $REPLACE_Disneyland
3497
+ $REPLACE_nine
3498
+ $REPLACE_rising
3499
+ $REPLACE_cartoon
3500
+ $REPLACE_refreshing
3501
+ $REPLACE_factories
3502
+ $REPLACE_20th
3503
+ $APPEND_single
3504
+ $APPEND_sometime
3505
+ $REPLACE_cleaner
3506
+ $APPEND_Such
3507
+ $APPEND_particularly
3508
+ $REPLACE_fruit
3509
+ $REPLACE_beforehand
3510
+ $REPLACE_11th
3511
+ $REPLACE_Halloween
3512
+ $REPLACE_attract
3513
+ $APPEND_forms
3514
+ $APPEND_under
3515
+ $REPLACE_guests
3516
+ $REPLACE_classmate
3517
+ $APPEND_Yours
3518
+ $REPLACE_learners
3519
+ $REPLACE_red
3520
+ $REPLACE_critical
3521
+ $REPLACE_pitiful
3522
+ $REPLACE_groups
3523
+ $REPLACE_grandparents
3524
+ $REPLACE_primary
3525
+ $REPLACE_Both
3526
+ $REPLACE_aside
3527
+ $REPLACE_youngest
3528
+ $REPLACE_practising
3529
+ $APPEND_Am
3530
+ $REPLACE_summary
3531
+ $REPLACE_telephone
3532
+ $APPEND_nowadays
3533
+ $REPLACE_20
3534
+ $REPLACE_tons
3535
+ $REPLACE_Listening
3536
+ $REPLACE_guilt
3537
+ $REPLACE_occurs
3538
+ $REPLACE_Anyways
3539
+ $REPLACE_rush
3540
+ $REPLACE_intermediate
3541
+ $REPLACE_theirs
3542
+ $APPEND_business
3543
+ $REPLACE_neighboring
3544
+ $REPLACE_independence
3545
+ $APPEND_cost
3546
+ $APPEND_country
3547
+ $REPLACE_beef
3548
+ $REPLACE_formal
3549
+ $APPEND_worked
3550
+ $REPLACE_Hence
3551
+ $REPLACE_Mother
3552
+ $REPLACE_picked
3553
+ $REPLACE_star
3554
+ $REPLACE_fishing
3555
+ $REPLACE_planted
3556
+ $REPLACE_fear
3557
+ $APPEND_100
3558
+ $APPEND_onto
3559
+ $REPLACE_choir
3560
+ $REPLACE_spot
3561
+ $REPLACE_correction
3562
+ $REPLACE_suits
3563
+ $REPLACE_Day
3564
+ $REPLACE_supported
3565
+ $REPLACE_comfort
3566
+ $REPLACE_newspapers
3567
+ $REPLACE_friendship
3568
+ $REPLACE_May
3569
+ $REPLACE_freezing
3570
+ $REPLACE_discussed
3571
+ $APPEND_{
3572
+ $APPEND_whom
3573
+ $REPLACE_trust
3574
+ $REPLACE_industries
3575
+ $REPLACE_decisions
3576
+ $APPEND_poor
3577
+ $APPEND_correctly
3578
+ $REPLACE_hundred
3579
+ $REPLACE_recipe
3580
+ $REPLACE_competitive
3581
+ $REPLACE_burden
3582
+ $REPLACE_abandoned
3583
+ $APPEND_walking
3584
+ $REPLACE_individuals
3585
+ $APPEND_travelling
3586
+ $REPLACE_theme
3587
+ $REPLACE_runs
3588
+ $REPLACE_threw
3589
+ $REPLACE_rock
3590
+ $APPEND_thinking
3591
+ $REPLACE_Taking
3592
+ $REPLACE_ideal
3593
+ $REPLACE_practical
3594
+ $APPEND_re
3595
+ $APPEND_station
3596
+ $REPLACE_collect
3597
+ $REPLACE_perhaps
3598
+ $REPLACE_advanced
3599
+ $REPLACE_humans
3600
+ $APPEND_realized
3601
+ $REPLACE_remove
3602
+ $REPLACE_notebook
3603
+ $REPLACE_continuously
3604
+ $REPLACE_beach
3605
+ $REPLACE_ends
3606
+ $REPLACE_secret
3607
+ $REPLACE_skilled
3608
+ $REPLACE_jump
3609
+ $REPLACE_episodes
3610
+ $REPLACE_cup
3611
+ $REPLACE_consists
3612
+ $REPLACE_release
3613
+ $REPLACE_notes
3614
+ $REPLACE_22nd
3615
+ $REPLACE_fallen
3616
+ $APPEND_Which
3617
+ $APPEND_saw
3618
+ $REPLACE_libraries
3619
+ $REPLACE_consecutive
3620
+ $REPLACE_March
3621
+ $REPLACE_closely
3622
+ $REPLACE_century
3623
+ $APPEND_per
3624
+ $REPLACE_circumstances
3625
+ $REPLACE_whoever
3626
+ $REPLACE_rented
3627
+ $REPLACE_aging
3628
+ $APPEND_regularly
3629
+ $REPLACE_cycling
3630
+ $REPLACE_depression
3631
+ $REPLACE_row
3632
+ $APPEND_constantly
3633
+ $APPEND_feelings
3634
+ $REPLACE_Angeles
3635
+ $REPLACE_talented
3636
+ $REPLACE_00am
3637
+ $REPLACE_shower
3638
+ $REPLACE_functions
3639
+ $APPEND_love
3640
+ $APPEND_believe
3641
+ $REPLACE_basis
3642
+ $REPLACE_follows
3643
+ $APPEND_hardly
3644
+ $REPLACE_teenager
3645
+ $REPLACE_diverse
3646
+ $REPLACE_Sir
3647
+ $REPLACE_decrease
3648
+ $REPLACE_goodbye
3649
+ $REPLACE_behave
3650
+ $APPEND_everywhere
3651
+ $REPLACE_users
3652
+ $REPLACE_analysis
3653
+ $REPLACE_translating
3654
+ $REPLACE_relaxation
3655
+ $REPLACE_unexpectedly
3656
+ $REPLACE_Russia
3657
+ $REPLACE_championship
3658
+ $APPEND_lives
3659
+ $REPLACE_hate
3660
+ $APPEND_somehow
3661
+ $REPLACE_joining
3662
+ $APPEND_stop
3663
+ $APPEND_enjoyed
3664
+ $APPEND_cup
3665
+ $REPLACE_flies
3666
+ $REPLACE_Talking
3667
+ $REPLACE_painting
3668
+ $REPLACE_letters
3669
+ $REPLACE_master
3670
+ $REPLACE_stated
3671
+ $REPLACE_aggressive
3672
+ $REPLACE_shy
3673
+ $APPEND_care
3674
+ $APPEND_wear
3675
+ $REPLACE_served
3676
+ $REPLACE_stops
3677
+ $APPEND_house
3678
+ $REPLACE_diligent
3679
+ $REPLACE_IN
3680
+ $REPLACE_deciding
3681
+ $REPLACE_sweets
3682
+ $REPLACE_argued
3683
+ $REPLACE_bookstore
3684
+ $APPEND_pretty
3685
+ $REPLACE_range
3686
+ $REPLACE_vegetable
3687
+ $REPLACE_appreciation
3688
+ $REPLACE_pity
3689
+ $REPLACE_update
3690
+ $REPLACE_More
3691
+ $REPLACE_laughing
3692
+ $REPLACE_economics
3693
+ $REPLACE_cellphone
3694
+ $REPLACE_OK
3695
+ $REPLACE_pregnant
3696
+ $REPLACE_spite
3697
+ $REPLACE_karaoke
3698
+ $REPLACE_tutor
3699
+ $REPLACE_cockroaches
3700
+ $APPEND_Most
3701
+ $REPLACE_additional
3702
+ $APPEND_energy
3703
+ $REPLACE_contain
3704
+ $REPLACE_actual
3705
+ $REPLACE_shining
3706
+ $APPEND_feels
3707
+ $REPLACE_lesser
3708
+ $REPLACE_pages
3709
+ $REPLACE_cartoons
3710
+ $REPLACE_arise
3711
+ $REPLACE_f
3712
+ $REPLACE_luckily
3713
+ $REPLACE_airport
3714
+ $REPLACE_windy
3715
+ $REPLACE_instructor
3716
+ $APPEND_Why
3717
+ $REPLACE_weighed
3718
+ $REPLACE_river
3719
+ $APPEND_frequently
3720
+ $APPEND_method
3721
+ $REPLACE_shrine
3722
+ $APPEND_short
3723
+ $REPLACE_suffer
3724
+ $REPLACE_6th
3725
+ $REPLACE_fight
3726
+ $APPEND_worth
3727
+ $REPLACE_absent
3728
+ $REPLACE_United
3729
+ $REPLACE_chef
3730
+ $REPLACE_anytime
3731
+ $REPLACE_Three
3732
+ $REPLACE_noisy
3733
+ $APPEND_therefore
3734
+ $REPLACE_iPod
3735
+ $APPEND_French
3736
+ $REPLACE_wishes
3737
+ $REPLACE_Yours
3738
+ $APPEND_Being
3739
+ $APPEND_Its
3740
+ $APPEND_field
3741
+ $APPEND_photos
3742
+ $REPLACE_definition
3743
+ $APPEND_gives
3744
+ $REPLACE_scores
3745
+ $APPEND_Having
3746
+ $REPLACE_statement
3747
+ $APPEND_spoken
3748
+ $APPEND_price
3749
+ $REPLACE_cleaned
3750
+ $REPLACE_varied
3751
+ $APPEND_Oh
3752
+ $REPLACE_wash
3753
+ $REPLACE_satisfactory
3754
+ $REPLACE_ceiling
3755
+ $APPEND_including
3756
+ $APPEND_special
3757
+ $APPEND_popular
3758
+ $REPLACE_invention
3759
+ $REPLACE_materials
3760
+ $REPLACE_media
3761
+ $REPLACE_=
3762
+ $REPLACE_dialogue
3763
+ $REPLACE_designed
3764
+ $REPLACE_popularity
3765
+ $REPLACE_York
3766
+ $REPLACE_Getting
3767
+ $APPEND_shown
3768
+ $REPLACE_carrying
3769
+ $REPLACE_00pm
3770
+ $REPLACE_stations
3771
+ $REPLACE_puts
3772
+ $REPLACE_screen
3773
+ $REPLACE_appreciative
3774
+ $REPLACE_cruel
3775
+ $APPEND_main
3776
+ $REPLACE_action
3777
+ $REPLACE_unlucky
3778
+ $REPLACE_God
3779
+ $APPEND_basically
3780
+ $REPLACE_d
3781
+ $REPLACE_climbed
3782
+ $REPLACE_thoroughly
3783
+ $REPLACE_Canada
3784
+ $REPLACE_hesitate
3785
+ $APPEND_developed
3786
+ $APPEND_post
3787
+ $REPLACE_represent
3788
+ $REPLACE_comment
3789
+ $REPLACE_controlled
3790
+ $REPLACE_source
3791
+ $REPLACE_customs
3792
+ $REPLACE_drawn
3793
+ $REPLACE_mature
3794
+ $REPLACE_commute
3795
+ $APPEND_Once
3796
+ $APPEND_letter
3797
+ $REPLACE_attached
3798
+ $REPLACE_gift
3799
+ $REPLACE_nap
3800
+ $APPEND_asked
3801
+ $REPLACE_inspired
3802
+ $APPEND_event
3803
+ $REPLACE_seafood
3804
+ $APPEND_watched
3805
+ $REPLACE_errors
3806
+ $APPEND_passed
3807
+ $APPEND_english
3808
+ $REPLACE_complaining
3809
+ $REPLACE_roommate
3810
+ $REPLACE_Life
3811
+ $REPLACE_mental
3812
+ $REPLACE_grades
3813
+ $APPEND_parts
3814
+ $REPLACE_pronounciation
3815
+ $REPLACE_strengthen
3816
+ $REPLACE_priority
3817
+ $APPEND_abroad
3818
+ $APPEND_ticket
3819
+ $REPLACE_insurance
3820
+ $REPLACE_hesitation
3821
+ $REPLACE_researched
3822
+ $REPLACE_unlike
3823
+ $REPLACE_exercising
3824
+ $REPLACE_exchanged
3825
+ $REPLACE_knows
3826
+ $REPLACE_founded
3827
+ $REPLACE_messy
3828
+ $REPLACE_dying
3829
+ $APPEND_plans
3830
+ $APPEND_match
3831
+ $REPLACE_Fourth
3832
+ $REPLACE_answers
3833
+ $REPLACE_assignments
3834
+ $REPLACE_Whether
3835
+ $REPLACE_elder
3836
+ $REPLACE_gas
3837
+ $REPLACE_heading
3838
+ $REPLACE_laws
3839
+ $REPLACE_kindly
3840
+ $REPLACE_wine
3841
+ $REPLACE_household
3842
+ $REPLACE_dining
3843
+ $REPLACE_sensitive
3844
+ $REPLACE_wet
3845
+ $REPLACE_Personally
3846
+ $APPEND_middle
3847
+ $REPLACE_busier
3848
+ $REPLACE_dirty
3849
+ $REPLACE_religion
3850
+ $REPLACE_facing
3851
+ $APPEND_totally
3852
+ $REPLACE_repeatedly
3853
+ $REPLACE_tries
3854
+ $REPLACE_organising
3855
+ $REPLACE_operating
3856
+ $REPLACE_ex
3857
+ $APPEND_languages
3858
+ $REPLACE_services
3859
+ $REPLACE_remaining
3860
+ $REPLACE_killed
3861
+ $REPLACE_fair
3862
+ $REPLACE_bike
3863
+ $REPLACE_'t
3864
+ $APPEND_titled
3865
+ $REPLACE_exception
3866
+ $APPEND_carefully
3867
+ $REPLACE_salon
3868
+ $REPLACE_translated
3869
+ $REPLACE_welcome
3870
+ $REPLACE_gratitude
3871
+ $REPLACE_Watching
3872
+ $REPLACE_adults
3873
+ $APPEND_large
3874
+ $REPLACE_untill
3875
+ $REPLACE_coach
3876
+ $REPLACE_mountains
3877
+ $REPLACE_sandwich
3878
+ $REPLACE_examples
3879
+ $APPEND_gone
3880
+ $REPLACE_multiple
3881
+ $APPEND_meant
3882
+ $REPLACE_delivered
3883
+ $REPLACE_entering
3884
+ $APPEND_Hello
3885
+ $REPLACE_option
3886
+ $REPLACE_cigarette
3887
+ $REPLACE_acted
3888
+ $REPLACE_bathroom
3889
+ $REPLACE_accustomed
3890
+ $REPLACE_literature
3891
+ $REPLACE_bottom
3892
+ $APPEND_course
3893
+ $APPEND_choose
3894
+ $REPLACE_resume
3895
+ $APPEND_web
3896
+ $REPLACE_aloud
3897
+ $REPLACE_material
3898
+ $REPLACE_struggle
3899
+ $REPLACE_trains
3900
+ $REPLACE_dog
3901
+ $APPEND_Both
3902
+ $REPLACE_leisure
3903
+ $REPLACE_climate
3904
+ $REPLACE_japanese
3905
+ $REPLACE_reduced
3906
+ $APPEND_break
3907
+ $APPEND_grow
3908
+ $REPLACE_Thinking
3909
+ $REPLACE_dessert
3910
+ $REPLACE_Yeah
3911
+ $REPLACE_salt
3912
+ $REPLACE_rare
3913
+ $REPLACE_fairly
3914
+ $REPLACE_knowing
3915
+ $REPLACE_varieties
3916
+ $APPEND_festival
3917
+ $REPLACE_kitten
3918
+ $APPEND_changes
3919
+ $REPLACE_Introduction
3920
+ $REPLACE_viruses
3921
+ $APPEND_gotten
3922
+ $REPLACE_h
3923
+ $REPLACE_experiencing
3924
+ $APPEND_rain
3925
+ $APPEND_weight
3926
+ $REPLACE_brown
3927
+ $REPLACE_Everyday
3928
+ $APPEND_Tokyo
3929
+ $REPLACE_split
3930
+ $REPLACE_section
3931
+ $APPEND_dinner
3932
+ $REPLACE_Making
3933
+ $REPLACE_courses
3934
+ $REPLACE_remains
3935
+ $REPLACE_Dragon
3936
+ $REPLACE_soft
3937
+ $REPLACE_independent
3938
+ $REPLACE_conducted
3939
+ $APPEND_mode
3940
+ $APPEND_tickets
3941
+ $APPEND_leave
3942
+ $APPEND_culture
3943
+ $REPLACE_Iam
3944
+ $REPLACE_joy
3945
+ $REPLACE_violent
3946
+ $REPLACE_leaf
3947
+ $REPLACE_fortune
3948
+ $APPEND_reasons
3949
+ $REPLACE_Fukushima
3950
+ $APPEND_thus
3951
+ $REPLACE_boss
3952
+ $REPLACE_player
3953
+ $REPLACE_closest
3954
+ $REPLACE_lies
3955
+ $APPEND_consists
3956
+ $REPLACE_impolite
3957
+ $REPLACE_unpredictable
3958
+ $REPLACE_shared
3959
+ $REPLACE_7th
3960
+ $APPEND_Up
3961
+ $REPLACE_step
3962
+ $APPEND_football
3963
+ $REPLACE_central
3964
+ $REPLACE_symptoms
3965
+ $REPLACE_funds
3966
+ $REPLACE_resolve
3967
+ $REPLACE_Technology
3968
+ $REPLACE_solutions
3969
+ $REPLACE_adult
3970
+ $REPLACE_military
3971
+ $REPLACE_supermarkets
3972
+ $APPEND_sites
3973
+ $REPLACE_levels
3974
+ $REPLACE_broad
3975
+ $REPLACE_smiling
3976
+ $REPLACE_expecting
3977
+ $REPLACE_shorter
3978
+ $APPEND_Like
3979
+ $REPLACE_gloomy
3980
+ $REPLACE_weekdays
3981
+ $REPLACE_blew
3982
+ $REPLACE_determine
3983
+ $REPLACE_discount
3984
+ $APPEND_attend
3985
+ $REPLACE_treated
3986
+ $REPLACE_length
3987
+ $REPLACE_raw
3988
+ $REPLACE_promote
3989
+ $REPLACE_court
3990
+ $REPLACE_commercial
3991
+ $REPLACE_expectations
3992
+ $APPEND_exercise
3993
+ $REPLACE_tickets
3994
+ $REPLACE_status
3995
+ $REPLACE_retirement
3996
+ $REPLACE_crowd
3997
+ $REPLACE_requested
3998
+ $REPLACE_South
3999
+ $APPEND_corrected
4000
+ $REPLACE_aunt
4001
+ $REPLACE_Traveling
4002
+ $REPLACE_region
4003
+ $REPLACE_pulled
4004
+ $APPEND_14
4005
+ $REPLACE_impatient
4006
+ $REPLACE_roads
4007
+ $REPLACE_value
4008
+ $REPLACE_existence
4009
+ $REPLACE_applications
4010
+ $REPLACE_boiled
4011
+ $REPLACE_warming
4012
+ $REPLACE_15
4013
+ $REPLACE_Iwas
4014
+ $REPLACE_accomplish
4015
+ $APPEND_Sounds
4016
+ $APPEND_send
4017
+ $APPEND_programs
4018
+ $REPLACE_costume
4019
+ $APPEND_1st
4020
+ $REPLACE_ancient
4021
+ $REPLACE_physics
4022
+ $REPLACE_record
4023
+ $REPLACE_published
4024
+ $REPLACE_cross
4025
+ $REPLACE_harmful
4026
+ $REPLACE_description
4027
+ $APPEND_wrote
4028
+ $APPEND_pay
4029
+ $REPLACE_fond
4030
+ $APPEND_color
4031
+ $REPLACE_asks
4032
+ $APPEND_stuff
4033
+ $REPLACE_specially
4034
+ $REPLACE_uneasy
4035
+ $APPEND_riding
4036
+ $REPLACE_inthe
4037
+ $REPLACE_nose
4038
+ $REPLACE_scientific
4039
+ $REPLACE_Among
4040
+ $REPLACE_danger
4041
+ $REPLACE_commit
4042
+ $REPLACE_Particularly
4043
+ $REPLACE_troubles
4044
+ $REPLACE_button
4045
+ $REPLACE_delayed
4046
+ $REPLACE_Diego
4047
+ $REPLACE_daytime
4048
+ $REPLACE_phenomenon
4049
+ $APPEND_following
4050
+ $REPLACE_Consequently
4051
+ $REPLACE_saving
4052
+ $REPLACE_souvenir
4053
+ $REPLACE_missing
4054
+ $REPLACE_unless
4055
+ $APPEND_office
4056
+ $REPLACE_anniversary
4057
+ $REPLACE_anger
4058
+ $APPEND_himself
4059
+ $APPEND_happening
4060
+ $REPLACE_cheer
4061
+ $REPLACE_animal
4062
+ $APPEND_subject
4063
+ $REPLACE_nicer
4064
+ $REPLACE_sells
4065
+ $REPLACE_lenses
4066
+ $REPLACE_OF
4067
+ $REPLACE_possibilities
4068
+ $REPLACE_efforts
4069
+ $REPLACE_Years
4070
+ $REPLACE_merchandise
4071
+ $REPLACE_subsidies
4072
+ $REPLACE_forms
4073
+ $REPLACE_hotel
4074
+ $APPEND_non
4075
+ $REPLACE_appetite
4076
+ $REPLACE_sport
4077
+ $REPLACE_expand
4078
+ $REPLACE_rhythm
4079
+ $APPEND_Another
4080
+ $REPLACE_Language
4081
+ $APPEND_Each
4082
+ $REPLACE_window
4083
+ $REPLACE_increases
4084
+ $REPLACE_states
4085
+ $REPLACE_excitement
4086
+ $REPLACE_promise
4087
+ $APPEND_seen
4088
+ $REPLACE_luggage
4089
+ $APPEND_generally
4090
+ $REPLACE_frustrating
4091
+ $REPLACE_colors
4092
+ $REPLACE_mosquitoes
4093
+ $REPLACE_seats
4094
+ $REPLACE_woken
4095
+ $REPLACE_switched
4096
+ $REPLACE_grammatically
4097
+ $REPLACE_ON
4098
+ $REPLACE_kindness
4099
+ $REPLACE_thieves
4100
+ $REPLACE_spoiled
4101
+ $REPLACE_States
4102
+ $REPLACE_hamburgers
4103
+ $APPEND_nearly
4104
+ $REPLACE_situated
4105
+ $REPLACE_foods
4106
+ $REPLACE_collecting
4107
+ $REPLACE_unfortunate
4108
+ $REPLACE_camera
4109
+ $REPLACE_dramatic
4110
+ $REPLACE_noodle
4111
+ $APPEND_human
4112
+ $REPLACE_re
4113
+ $REPLACE_humidity
4114
+ $APPEND_strongly
4115
+ $REPLACE_kimchi
4116
+ $APPEND_difference
4117
+ $REPLACE_artists
4118
+ $REPLACE_medical
4119
+ $REPLACE_incredible
4120
+ $APPEND_helping
4121
+ $APPEND_ahead
4122
+ $REPLACE_lines
4123
+ $REPLACE_thinks
4124
+ $REPLACE_thousand
4125
+ $REPLACE_sixth
4126
+ $REPLACE_exposed
4127
+ $REPLACE_colours
4128
+ $REPLACE_widely
4129
+ $APPEND_nuclear
4130
+ $REPLACE_worldwide
4131
+ $REPLACE_comprehension
4132
+ $APPEND_hair
4133
+ $REPLACE_halfway
4134
+ $APPEND_cause
4135
+ $REPLACE_cast
4136
+ $APPEND_coffee
4137
+ $REPLACE_attractions
4138
+ $REPLACE_beautifully
4139
+ $REPLACE_handwritten
4140
+ $APPEND_band
4141
+ $APPEND_improving
4142
+ $APPEND_40
4143
+ $REPLACE_shops
4144
+ $REPLACE_basically
4145
+ $APPEND_studied
4146
+ $REPLACE_manufacturer
4147
+ $REPLACE_Western
4148
+ $APPEND_throughout
4149
+ $REPLACE_identify
4150
+ $APPEND_Would
4151
+ $REPLACE_Switzerland
4152
+ $APPEND_everybody
4153
+ $APPEND_grade
4154
+ $REPLACE_farewell
4155
+ $REPLACE_romantic
4156
+ $REPLACE_Celsius
4157
+ $REPLACE_bread
4158
+ $APPEND_favorite
4159
+ $APPEND_Despite
4160
+ $REPLACE_downloaded
4161
+ $REPLACE_balance
4162
+ $APPEND_carry
4163
+ $REPLACE_cure
4164
+ $REPLACE_programmer
4165
+ $APPEND_considered
4166
+ $APPEND_slowly
4167
+ $REPLACE_discovery
4168
+ $APPEND_stopped
4169
+ $REPLACE_standing
4170
+ $REPLACE_earned
4171
+ $REPLACE_skating
4172
+ $REPLACE_detail
4173
+ $REPLACE_apology
4174
+ $REPLACE_writer
4175
+ $REPLACE_highway
4176
+ $REPLACE_Goodbye
4177
+ $REPLACE_quote
4178
+ $REPLACE_maintenance
4179
+ $APPEND_taste
4180
+ $REPLACE_package
4181
+ $REPLACE_responded
4182
+ $REPLACE_criticize
4183
+ $APPEND_deeply
4184
+ $REPLACE_jogging
4185
+ $APPEND_waiting
4186
+ $REPLACE_fatter
4187
+ $REPLACE_cycle
4188
+ $APPEND_Only
4189
+ $REPLACE_afterward
4190
+ $REPLACE_specialty
4191
+ $REPLACE_goodness
4192
+ $REPLACE_groceries
4193
+ $APPEND_staff
4194
+ $REPLACE_somehow
4195
+ $APPEND_Moreover
4196
+ $APPEND_training
4197
+ $REPLACE_clever
4198
+ $REPLACE_camp
4199
+ $APPEND_traveling
4200
+ $APPEND_minutes
4201
+ $REPLACE_sandwiches
4202
+ $APPEND_run
4203
+ $REPLACE_options
4204
+ $REPLACE_calories
4205
+ $REPLACE_branch
4206
+ $REPLACE_barbecue
4207
+ $APPEND_entrance
4208
+ $REPLACE_noodles
4209
+ $APPEND_products
4210
+ $APPEND_helped
4211
+ $REPLACE_newly
4212
+ $APPEND_drank
4213
+ $REPLACE_precise
4214
+ $REPLACE_increasingly
4215
+ $APPEND_Dear
4216
+ $REPLACE_novels
4217
+ $REPLACE_mix
4218
+ $REPLACE_budget
4219
+ $REPLACE_petrol
4220
+ $REPLACE_trial
4221
+ $APPEND_Perhaps
4222
+ $REPLACE_occasions
4223
+ $APPEND_Actually
4224
+ $REPLACE_eastern
4225
+ $REPLACE_sights
4226
+ $REPLACE_industrial
4227
+ $APPEND_result
4228
+ $REPLACE_generally
4229
+ $REPLACE_Canadian
4230
+ $REPLACE_Surprisingly
4231
+ $APPEND_strong
4232
+ $REPLACE_memorizing
4233
+ $REPLACE_irritated
4234
+ $REPLACE_implemented
4235
+ $REPLACE_Welcome
4236
+ $REPLACE_coast
4237
+ $REPLACE_signs
4238
+ $REPLACE_leading
4239
+ $APPEND_PM
4240
+ $APPEND_access
4241
+ $REPLACE_fat
4242
+ $REPLACE_breeze
4243
+ $REPLACE_India
4244
+ $APPEND_slept
4245
+ $REPLACE_weigh
4246
+ $REPLACE_commonly
4247
+ $REPLACE_supervisor
4248
+ $REPLACE_tomato
4249
+ $REPLACE_agency
4250
+ $APPEND_till
4251
+ $REPLACE_couldn
4252
+ $REPLACE_strangely
4253
+ $APPEND_stayed
4254
+ $REPLACE_ni
4255
+ $APPEND_exams
4256
+ $REPLACE_School
4257
+ $REPLACE_blue
4258
+ $APPEND_allowed
4259
+ $REPLACE_30th
4260
+ $REPLACE_kittens
4261
+ $REPLACE_typing
4262
+ $REPLACE_headed
4263
+ $APPEND_present
4264
+ $REPLACE_Reading
4265
+ $REPLACE_injury
4266
+ $REPLACE_Dear
4267
+ $REPLACE_PM
4268
+ $REPLACE_minor
4269
+ $REPLACE_drinks
4270
+ $REPLACE_enthusiasm
4271
+ $REPLACE_dilemma
4272
+ $REPLACE_income
4273
+ $REPLACE_sadness
4274
+ $REPLACE_weaker
4275
+ $REPLACE_Thanksgiving
4276
+ $REPLACE_documents
4277
+ $REPLACE_fake
4278
+ $REPLACE_boy
4279
+ $REPLACE_regards
4280
+ $APPEND_Finally
4281
+ $REPLACE_obstacle
4282
+ $REPLACE_batteries
4283
+ $APPEND_talked
4284
+ $APPEND_becomes
4285
+ $REPLACE_numerous
4286
+ $REPLACE_cheese
4287
+ $REPLACE_judge
4288
+ $APPEND_busy
4289
+ $APPEND_reach
4290
+ $APPEND_Fuji
4291
+ $REPLACE_intelligent
4292
+ $REPLACE_reception
4293
+ $REPLACE_Chinatown
4294
+ $REPLACE_repeat
4295
+ $APPEND_June
4296
+ $REPLACE_reported
4297
+ $APPEND_required
4298
+ $REPLACE_cases
4299
+ $REPLACE_matters
4300
+ $REPLACE_prepositions
4301
+ $REPLACE_accidents
4302
+ $REPLACE_fields
4303
+ $APPEND_ask
4304
+ $APPEND_sad
4305
+ $REPLACE_selected
4306
+ $REPLACE_skipped
4307
+ $REPLACE_freshmen
4308
+ $REPLACE_mode
4309
+ $REPLACE_calendar
4310
+ $REPLACE_luxury
4311
+ $REPLACE_summertime
4312
+ $REPLACE_device
4313
+ $APPEND_lesson
4314
+ $APPEND_surely
4315
+ $REPLACE_loved
4316
+ $REPLACE_reflect
4317
+ $REPLACE_shoulder
4318
+ $REPLACE_muscular
4319
+ $APPEND_plenty
4320
+ $REPLACE_Indian
4321
+ $REPLACE_pork
4322
+ $REPLACE_double
4323
+ $REPLACE_loneliness
4324
+ $REPLACE_economies
4325
+ $REPLACE_meaningful
4326
+ $REPLACE_cooperate
4327
+ $REPLACE_land
4328
+ $APPEND_report
4329
+ $REPLACE_block
4330
+ $REPLACE_cheapest
4331
+ $REPLACE_mirror
4332
+ $REPLACE_wealthy
4333
+ $APPEND_application
4334
+ $REPLACE_quarter
4335
+ $REPLACE_babies
4336
+ $REPLACE_risk
4337
+ $REPLACE_discussions
4338
+ $REPLACE_lightning
4339
+ $REPLACE_briefly
4340
+ $REPLACE_congratulated
4341
+ $REPLACE_breathing
4342
+ $REPLACE_eagerly
4343
+ $REPLACE_resolved
4344
+ $APPEND_staying
4345
+ $APPEND_history
4346
+ $APPEND_phones
4347
+ $REPLACE_involving
4348
+ $REPLACE_enthusiastic
4349
+ $REPLACE_cookies
4350
+ $REPLACE_frightened
4351
+ $REPLACE_entirely
4352
+ $REPLACE_enormous
4353
+ $APPEND_aspects
4354
+ $REPLACE_stable
4355
+ $APPEND_section
4356
+ $APPEND_Thanks
4357
+ $APPEND_women
4358
+ $REPLACE_phase
4359
+ $REPLACE_16th
4360
+ $REPLACE_spicy
4361
+ $APPEND_produced
4362
+ $REPLACE_street
4363
+ $REPLACE_ignore
4364
+ $REPLACE_designer
4365
+ $APPEND_club
4366
+ $REPLACE_mum
4367
+ $REPLACE_sincere
4368
+ $REPLACE_offensive
4369
+ $REPLACE_memorized
4370
+ $APPEND_question
4371
+ $REPLACE_wa
4372
+ $REPLACE_garbage
4373
+ $REPLACE_Playing
4374
+ $REPLACE_castle
4375
+ $REPLACE_swam
4376
+ $REPLACE_leader
4377
+ $REPLACE_earthquakes
4378
+ $REPLACE_displayed
4379
+ $REPLACE_marathon
4380
+ $APPEND_songs
4381
+ $REPLACE_See
4382
+ $REPLACE_burn
4383
+ $APPEND_happily
4384
+ $REPLACE_salesman
4385
+ $REPLACE_unhealthy
4386
+ $REPLACE_base
4387
+ $REPLACE_crossing
4388
+ $REPLACE_Honestly
4389
+ $REPLACE_machines
4390
+ $REPLACE_freshman
4391
+ $REPLACE_dry
4392
+ $APPEND_exact
4393
+ $APPEND_January
4394
+ $APPEND_terms
4395
+ $REPLACE_happiest
4396
+ $APPEND_tastes
4397
+ $APPEND_design
4398
+ $REPLACE_champion
4399
+ $REPLACE_Diary
4400
+ $REPLACE_expressing
4401
+ $REPLACE_hardest
4402
+ $REPLACE_installed
4403
+ $REPLACE_Go
4404
+ $REPLACE_dollar
4405
+ $REPLACE_wooden
4406
+ $REPLACE_contrary
4407
+ $REPLACE_refers
4408
+ $REPLACE_employment
4409
+ $REPLACE_removed
4410
+ $REPLACE_opposing
4411
+ $REPLACE_actress
4412
+ $REPLACE_Ever
4413
+ $APPEND_beginning
4414
+ $REPLACE_approach
4415
+ $REPLACE_guide
4416
+ $REPLACE_blooming
4417
+ $REPLACE_necessarily
4418
+ $REPLACE_fed
4419
+ $REPLACE_stands
4420
+ $REPLACE_principal
4421
+ $REPLACE_faced
4422
+ $APPEND_local
4423
+ $APPEND_highly
4424
+ $REPLACE_fiction
4425
+ $APPEND_finding
4426
+ $REPLACE_attracts
4427
+ $REPLACE_2011
4428
+ $REPLACE_businessmen
4429
+ $REPLACE_Friends
4430
+ $REPLACE_repaired
4431
+ $REPLACE_bet
4432
+ $REPLACE_hunger
4433
+ $REPLACE_dealing
4434
+ $REPLACE_Except
4435
+ $APPEND_role
4436
+ $REPLACE_admitted
4437
+ $REPLACE_island
4438
+ $REPLACE_quietly
4439
+ $REPLACE_lets
4440
+ $REPLACE_fee
4441
+ $REPLACE_performances
4442
+ $REPLACE_bar
4443
+ $REPLACE_maximum
4444
+ $REPLACE_escaped
4445
+ $REPLACE_ours
4446
+ $APPEND_originally
4447
+ $REPLACE_surroundings
4448
+ $REPLACE_golden
4449
+ $APPEND_technology
4450
+ $APPEND_research
4451
+ $REPLACE_borrow
4452
+ $REPLACE_remind
4453
+ $REPLACE_Beginning
4454
+ $REPLACE_passage
4455
+ $APPEND_drive
4456
+ $APPEND_teaching
4457
+ $REPLACE_typhoons
4458
+ $REPLACE_grabbed
4459
+ $REPLACE_incidents
4460
+ $REPLACE_hid
4461
+ $REPLACE_operate
4462
+ $REPLACE_19th
4463
+ $APPEND_sure
4464
+ $REPLACE_permission
4465
+ $APPEND_previous
4466
+ $REPLACE_rental
4467
+ $REPLACE_tothe
4468
+ $APPEND_round
4469
+ $REPLACE_Oops
4470
+ $REPLACE_survival
4471
+ $REPLACE_shaped
4472
+ $APPEND_costs
4473
+ $REPLACE_conference
4474
+ $APPEND_move
4475
+ $REPLACE_dressed
4476
+ $REPLACE_smells
4477
+ $REPLACE_artistic
4478
+ $REPLACE_holds
4479
+ $REPLACE_introducing
4480
+ $REPLACE_nursery
4481
+ $APPEND_May
4482
+ $REPLACE_troubled
4483
+ $REPLACE_optimistic
4484
+ $REPLACE_guarantee
4485
+ $REPLACE_toothache
4486
+ $REPLACE_bother
4487
+ $REPLACE_Congratulations
4488
+ $REPLACE_purchased
4489
+ $APPEND_21
4490
+ $REPLACE_accurately
4491
+ $REPLACE_belief
4492
+ $REPLACE_numbers
4493
+ $REPLACE_switch
4494
+ $REPLACE_personally
4495
+ $REPLACE_negatively
4496
+ $REPLACE_fireflies
4497
+ $APPEND_receive
4498
+ $APPEND_shop
4499
+ $REPLACE_haircut
4500
+ $REPLACE_productive
4501
+ $REPLACE_crisis
4502
+ $REPLACE_relatively
4503
+ $REPLACE_celebration
4504
+ $REPLACE_controversial
4505
+ $REPLACE_AM
4506
+ $REPLACE_factors
4507
+ $REPLACE_snowing
4508
+ $REPLACE_amusing
4509
+ $REPLACE_sharing
4510
+ $REPLACE_Companies
4511
+ $REPLACE_NYC
4512
+ $REPLACE_moves
4513
+ $REPLACE_hanging
4514
+ $REPLACE_simpler
4515
+ $APPEND_apart
4516
+ $REPLACE_race
4517
+ $REPLACE_hip
4518
+ $REPLACE_underwear
4519
+ $REPLACE_official
4520
+ $REPLACE_shift
4521
+ $APPEND_week
4522
+ $REPLACE_analyse
4523
+ $REPLACE_25th
4524
+ $REPLACE_teenage
4525
+ $APPEND_recent
4526
+ $REPLACE_skin
4527
+ $REPLACE_enroll
4528
+ $REPLACE_nickname
4529
+ $APPEND_accidentally
4530
+ $REPLACE_inventions
4531
+ $REPLACE_boys
4532
+ $APPEND_Afterwards
4533
+ $REPLACE_gentle
4534
+ $REPLACE_overnight
4535
+ $APPEND_explain
4536
+ $REPLACE_wanting
4537
+ $REPLACE_encouraging
4538
+ $REPLACE_contribute
4539
+ $REPLACE_necessities
4540
+ $REPLACE_enrolled
4541
+ $REPLACE_Normally
4542
+ $REPLACE_balloon
4543
+ $REPLACE_applying
4544
+ $APPEND_uses
4545
+ $REPLACE_recall
4546
+ $REPLACE_nearest
4547
+ $REPLACE_cashier
4548
+ $REPLACE_corner
4549
+ $APPEND_space
4550
+ $REPLACE_thatI
4551
+ $REPLACE_treasure
4552
+ $REPLACE_International
4553
+ $REPLACE_forth
4554
+ $REPLACE_assigned
4555
+ $APPEND_education
4556
+ $APPEND_except
4557
+ $REPLACE_jewellery
4558
+ $REPLACE_manga
4559
+ $APPEND_participate
4560
+ $APPEND_increase
4561
+ $REPLACE_slippery
4562
+ $REPLACE_snowboard
4563
+ $REPLACE_novel
4564
+ $REPLACE_predict
4565
+ $REPLACE_remained
4566
+ $REPLACE_outcome
4567
+ $APPEND_whose
4568
+ $APPEND_slightly
4569
+ $APPEND_serious
4570
+ $REPLACE_Research
4571
+ $REPLACE_marvelous
4572
+ $APPEND_excited
4573
+ $REPLACE_organization
4574
+ $REPLACE_list
4575
+ $REPLACE_automatically
4576
+ $REPLACE_differ
4577
+ $REPLACE_Mount
4578
+ $REPLACE_arrangement
4579
+ $APPEND_spending
4580
+ $REPLACE_adopt
4581
+ $APPEND_Soon
4582
+ $APPEND_Mr
4583
+ $REPLACE_irritable
4584
+ $REPLACE_Wish
4585
+ $REPLACE_writting
4586
+ $REPLACE_Sincerely
4587
+ $APPEND_winter
4588
+ $REPLACE_rose
4589
+ $REPLACE_businessman
4590
+ $REPLACE_flavors
4591
+ $REPLACE_smell
4592
+ $REPLACE_fortunate
4593
+ $APPEND_TOEIC
4594
+ $APPEND_mentioned
4595
+ $APPEND_process
4596
+ $APPEND_amp
4597
+ $APPEND_neither
4598
+ $REPLACE_enemies
4599
+ $REPLACE_acceptance
4600
+ $REPLACE_drivers
4601
+ $REPLACE_murderer
4602
+ $REPLACE_Melbourne
4603
+ $REPLACE_Specifically
4604
+ $APPEND_complete
4605
+ $APPEND_focus
4606
+ $REPLACE_illegal
4607
+ $APPEND_hurts
4608
+ $REPLACE_groom
4609
+ $APPEND_preposition
4610
+ $APPEND_com
4611
+ $APPEND_beautiful
4612
+ $REPLACE_sightseeing
4613
+ $REPLACE_bringing
4614
+ $REPLACE_sources
4615
+ $APPEND_videos
4616
+ $APPEND_lunch
4617
+ $APPEND_11
4618
+ $REPLACE_suggestion
4619
+ $REPLACE_programmes
4620
+ $APPEND_jobs
4621
+ $REPLACE_scent
4622
+ $REPLACE_crime
4623
+ $REPLACE_desperate
4624
+ $REPLACE_deliver
4625
+ $APPEND_performed
4626
+ $REPLACE_cars
4627
+ $REPLACE_pet
4628
+ $REPLACE_dangers
4629
+ $APPEND_perform
4630
+ $REPLACE_vehicles
4631
+ $APPEND_figure
4632
+ $APPEND_Later
4633
+ $REPLACE_matches
4634
+ $REPLACE_spaghetti
4635
+ $APPEND_light
4636
+ $REPLACE_corrects
4637
+ $REPLACE_Unlike
4638
+ $APPEND_occasionally
4639
+ $APPEND_truly
4640
+ $REPLACE_silence
4641
+ $REPLACE_intense
4642
+ $REPLACE_substitute
4643
+ $APPEND_freely
4644
+ $APPEND_party
4645
+ $APPEND_His
4646
+ $REPLACE_bothersome
4647
+ $REPLACE_pursuing
4648
+ $REPLACE_Out
4649
+ $REPLACE_direction
4650
+ $APPEND_check
4651
+ $REPLACE_authorities
4652
+ $APPEND_sort
4653
+ $REPLACE_challenges
4654
+ $REPLACE_plural
4655
+ $REPLACE_refused
4656
+ $REPLACE_informed
4657
+ $REPLACE_demand
4658
+ $REPLACE_mess
4659
+ $REPLACE_force
4660
+ $REPLACE_paintings
4661
+ $APPEND_remember
4662
+ $REPLACE_sky
4663
+ $APPEND_practicing
4664
+ $REPLACE_understandable
4665
+ $REPLACE_crashed
4666
+ $APPEND_communicate
4667
+ $REPLACE_manner
4668
+ $REPLACE_payment
4669
+ $REPLACE_artist
4670
+ $APPEND_tend
4671
+ $REPLACE_recession
4672
+ $REPLACE_til
4673
+ $REPLACE_mixed
4674
+ $APPEND_bar
4675
+ $REPLACE_England
4676
+ $REPLACE_gathered
4677
+ $REPLACE_combined
4678
+ $REPLACE_Rome
4679
+ $APPEND_wet
4680
+ $REPLACE_network
4681
+ $REPLACE_steak
4682
+ $REPLACE_California
4683
+ $REPLACE_birth
4684
+ $APPEND_state
4685
+ $REPLACE_expressed
4686
+ $REPLACE_haven
4687
+ $REPLACE_seldom
4688
+ $APPEND_health
4689
+ $REPLACE_partners
4690
+ $REPLACE_finishing
4691
+ $REPLACE_Monday
4692
+ $REPLACE_liters
4693
+ $REPLACE_Hi
4694
+ $APPEND_August
4695
+ $REPLACE_gorgeous
4696
+ $APPEND_seven
4697
+ $APPEND_remaining
4698
+ $REPLACE_chances
4699
+ $APPEND_older
4700
+ $REPLACE_Eating
4701
+ $APPEND_Christmas
4702
+ $REPLACE_dentist
4703
+ $REPLACE_league
4704
+ $REPLACE_korean
4705
+ $APPEND_greatly
4706
+ $APPEND_return
4707
+ $REPLACE_genres
4708
+ $REPLACE_authors
4709
+ $APPEND_Thank
4710
+ $REPLACE_diseases
4711
+ $REPLACE_travels
4712
+ $REPLACE_sheet
4713
+ $REPLACE_fastest
4714
+ $APPEND_surprised
4715
+ $REPLACE_rushed
4716
+ $APPEND_attending
4717
+ $APPEND_Furthermore
4718
+ $REPLACE_Laden
4719
+ $REPLACE_creative
4720
+ $REPLACE_meantime
4721
+ $REPLACE_Turkey
4722
+ $REPLACE_presenting
4723
+ $REPLACE_Christian
4724
+ $REPLACE_nervousness
4725
+ $REPLACE_meaningless
4726
+ $APPEND_player
4727
+ $REPLACE_motivate
4728
+ $REPLACE_advertisements
4729
+ $REPLACE_artwork
4730
+ $REPLACE_encouragement
4731
+ $REPLACE_regard
4732
+ $REPLACE_slower
4733
+ $REPLACE_dolls
4734
+ $REPLACE_200
4735
+ $REPLACE_unconsciously
4736
+ $APPEND_happens
4737
+ $REPLACE_facility
4738
+ $APPEND_advice
4739
+ $REPLACE_North
4740
+ $REPLACE_awareness
4741
+ $APPEND_planned
4742
+ $REPLACE_genetic
4743
+ $REPLACE_management
4744
+ $REPLACE_refund
4745
+ $REPLACE_brighter
4746
+ $REPLACE_confirm
4747
+ $REPLACE_burning
4748
+ $REPLACE_composition
4749
+ $APPEND_answer
4750
+ $REPLACE_conserve
4751
+ $REPLACE_destruction
4752
+ $REPLACE_duties
4753
+ $REPLACE_creativity
4754
+ $APPEND_expressions
4755
+ $APPEND_commit
4756
+ $REPLACE_East
4757
+ $REPLACE_milk
4758
+ $REPLACE_30pm
4759
+ $REPLACE_belong
4760
+ $REPLACE_autograph
4761
+ $REPLACE_caring
4762
+ $REPLACE_download
4763
+ $APPEND_development
4764
+ $REPLACE_compete
4765
+ $REPLACE_qualities
4766
+ $APPEND_avoid
4767
+ $REPLACE_recieved
4768
+ $APPEND_Perfect
4769
+ $REPLACE_yours
4770
+ $REPLACE_breaks
4771
+ $REPLACE_amusement
4772
+ $REPLACE_models
4773
+ $REPLACE_persevere
4774
+ $REPLACE_emergency
4775
+ $REPLACE_empty
4776
+ $REPLACE_rescue
4777
+ $APPEND_term
4778
+ $REPLACE_requirements
4779
+ $REPLACE_sufficient
4780
+ $APPEND_cooking
4781
+ $REPLACE_fascinated
4782
+ $REPLACE_14th
4783
+ $REPLACE_relevant
4784
+ $REPLACE_listed
4785
+ $REPLACE_vision
4786
+ $REPLACE_g
4787
+ $REPLACE_leadership
4788
+ $REPLACE_butI
4789
+ $APPEND_provide
4790
+ $REPLACE_organize
4791
+ $APPEND_created
4792
+ $REPLACE_12th
4793
+ $REPLACE_collection
4794
+ $REPLACE_supply
4795
+ $APPEND_Besides
4796
+ $REPLACE_stranger
4797
+ $REPLACE_combination
4798
+ $REPLACE_farther
4799
+ $REPLACE_awaiting
4800
+ $APPEND_hand
4801
+ $REPLACE_unsure
4802
+ $REPLACE_profile
4803
+ $APPEND_moving
4804
+ $APPEND_street
4805
+ $REPLACE_delighted
4806
+ $REPLACE_pretended
4807
+ $REPLACE_driven
4808
+ $REPLACE_maintaining
4809
+ $REPLACE_liar
4810
+ $TRANSFORM_SPLIT_HYPHEN
4811
+ $REPLACE_glass
4812
+ $REPLACE_stick
4813
+ $REPLACE_itchy
4814
+ $REPLACE_ought
4815
+ $REPLACE_consumption
4816
+ $REPLACE_quicker
4817
+ $REPLACE_spare
4818
+ $REPLACE_governments
4819
+ $APPEND_view
4820
+ $REPLACE_P
4821
+ $REPLACE_colorful
4822
+ $REPLACE_guitarist
4823
+ $APPEND_wants
4824
+ $REPLACE_million
4825
+ $REPLACE_behalf
4826
+ $REPLACE_kilometres
4827
+ $REPLACE_bank
4828
+ $APPEND_morning
4829
+ $REPLACE_weekends
4830
+ $REPLACE_occasion
4831
+ $APPEND_tour
4832
+ $REPLACE_object
4833
+ $REPLACE_Others
4834
+ $REPLACE_Considering
4835
+ $REPLACE_species
4836
+ $REPLACE_session
4837
+ $APPEND_removed
4838
+ $REPLACE_hiking
4839
+ $REPLACE_resolutions
4840
+ $REPLACE_peak
4841
+ $REPLACE_consequences
4842
+ $REPLACE_soaked
4843
+ $REPLACE_presents
4844
+ $APPEND_25
4845
+ $REPLACE_salad
4846
+ $REPLACE_filling
4847
+ $REPLACE_attack
4848
+ $APPEND_foods
4849
+ $REPLACE_tendency
4850
+ $REPLACE_discoveries
4851
+ $REPLACE_immediate
4852
+ $REPLACE_submitted
4853
+ $REPLACE_THAT
4854
+ $APPEND_develop
4855
+ $REPLACE_battery
4856
+ $REPLACE_dont
4857
+ $REPLACE_feature
4858
+ $APPEND_opportunity
4859
+ $REPLACE_bodies
4860
+ $REPLACE_goldfish
4861
+ $REPLACE_adapt
4862
+ $REPLACE_views
4863
+ $REPLACE_forgetting
4864
+ $REPLACE_saved
4865
+ $REPLACE_doesn
4866
+ $REPLACE_thirst
4867
+ $APPEND_Me
4868
+ $REPLACE_distant
4869
+ $REPLACE_opposition
4870
+ $REPLACE_breed
4871
+ $REPLACE_practised
4872
+ $REPLACE_miserable
4873
+ $APPEND_sore
4874
+ $REPLACE_brain
4875
+ $REPLACE_sessions
4876
+ $REPLACE_policeman
4877
+ $REPLACE_favor
4878
+ $REPLACE_managing
4879
+ $REPLACE_rains
4880
+ $REPLACE_baths
4881
+ $REPLACE_surrounding
4882
+ $REPLACE_Seoul
4883
+ $APPEND_regardless
4884
+ $APPEND_Something
4885
+ $REPLACE_architectural
4886
+ $REPLACE_ok
4887
+ $REPLACE_welfare
4888
+ $APPEND_share
4889
+ $REPLACE_daughters
4890
+ $REPLACE_phones
4891
+ $REPLACE_downstairs
4892
+ $REPLACE_arriving
4893
+ $REPLACE_stepped
4894
+ $REPLACE_competing
4895
+ $REPLACE_catching
4896
+ $REPLACE_conversing
4897
+ $REPLACE_encourages
4898
+ $REPLACE_depressing
4899
+ $REPLACE_begining
4900
+ $REPLACE_admission
4901
+ $APPEND_voice
4902
+ $REPLACE_boredom
4903
+ $APPEND_alot
4904
+ $APPEND_familiar
4905
+ $REPLACE_breaking
4906
+ $REPLACE_fortunately
4907
+ $REPLACE_Over
4908
+ $APPEND_lost
4909
+ $REPLACE_intended
4910
+ $REPLACE_neighbourhood
4911
+ $REPLACE_mysteries
4912
+ $REPLACE_certificate
4913
+ $REPLACE_data
4914
+ $APPEND_personal
4915
+ $REPLACE_joyful
4916
+ $REPLACE_immigrants
4917
+ $REPLACE_emotions
4918
+ $REPLACE_checkup
4919
+ $REPLACE_licence
4920
+ $REPLACE_juice
4921
+ $APPEND_whenever
4922
+ $REPLACE_dogs
4923
+ $REPLACE_thereby
4924
+ $APPEND_department
4925
+ $APPEND_assignment
4926
+ $REPLACE_defend
4927
+ $REPLACE_approached
4928
+ $REPLACE_Fireworks
4929
+ $APPEND_activity
4930
+ $APPEND_quality
4931
+ $REPLACE_basics
4932
+ $REPLACE_costumes
4933
+ $REPLACE_key
4934
+ $REPLACE_outdoors
4935
+ $REPLACE_hay
4936
+ $APPEND_prepare
4937
+ $REPLACE_hiding
4938
+ $REPLACE_curiosity
4939
+ $APPEND_dealing
4940
+ $REPLACE_passion
4941
+ $REPLACE_costed
4942
+ $REPLACE_fries
4943
+ $REPLACE_HAVE
4944
+ $REPLACE_divorced
4945
+ $APPEND_display
4946
+ $REPLACE_baby
4947
+ $APPEND_cherry
4948
+ $REPLACE_Returning
4949
+ $APPEND_lack
4950
+ $APPEND_learnt
4951
+ $REPLACE_Im
4952
+ $APPEND_naturally
4953
+ $REPLACE_router
4954
+ $APPEND_goals
4955
+ $REPLACE_seaside
4956
+ $REPLACE_summarize
4957
+ $APPEND_appeared
4958
+ $REPLACE_claim
4959
+ $APPEND_ate
4960
+ $REPLACE_exchanging
4961
+ $APPEND_arrive
4962
+ $APPEND_art
4963
+ $REPLACE_participating
4964
+ $REPLACE_seek
4965
+ $REPLACE_innocent
4966
+ $APPEND_express
4967
+ $REPLACE_lunchtime
4968
+ $REPLACE_reaction
4969
+ $REPLACE_consisted
4970
+ $REPLACE_Eastern
4971
+ $APPEND_track
4972
+ $APPEND_baby
4973
+ $REPLACE_touching
4974
+ $REPLACE_lively
4975
+ $REPLACE_bridge
4976
+ $REPLACE_murderers
4977
+ $REPLACE_Brazil
4978
+ $REPLACE_feeding
4979
+ $REPLACE_honestly
4980
+ $REPLACE_Piece
4981
+ $REPLACE_springs
4982
+ $REPLACE_purchase
4983
+ $REPLACE_pray
4984
+ $REPLACE_washed
4985
+ $APPEND_sentence
4986
+ $REPLACE_Olympics
4987
+ $REPLACE_strongest
4988
+ $REPLACE_leads
4989
+ $REPLACE_stomachache
4990
+ $REPLACE_John
4991
+ $REPLACE_opponent
4992
+ $REPLACE_contents
4993
+ $REPLACE_plot
4994
+ $APPEND_Many
4995
+ $REPLACE_experiment
4996
+ $REPLACE_beings
4997
+ $REPLACE_owns
4998
+ $REPLACE_airline
4999
+ $REPLACE_severely
5000
+ $REPLACE_ages
5001
+ @@UNKNOWN@@
5002
+ @@PADDING@@
output_vocabulary/non_padded_namespaces.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *tags
2
+ *labels
requirements.txt CHANGED
@@ -1,10 +1,14 @@
 
 
 
 
 
 
 
 
1
  gradio
2
- transformers
3
- torch
4
  spacy
5
  nltk
6
  gensim
7
  pattern
8
  textblob
9
-
10
-
 
1
+ torch==1.10.0
2
+ allennlp==0.8.4
3
+ python-Levenshtein==0.12.1
4
+ transformers==4.11.3
5
+ scikit-learn==0.20.0
6
+ sentencepiece==0.1.95
7
+ overrides==4.1.2
8
+ numpy==1.19.5
9
  gradio
 
 
10
  spacy
11
  nltk
12
  gensim
13
  pattern
14
  textblob
 
 
utils/filter_brackets.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import re
3
+
4
+ from helpers import write_lines
5
+
6
+
7
+ def filter_line(line):
8
+ if "-LRB-" in line and "-RRB-" in line:
9
+ rep = re.sub(r'\-.*?LRB.*?\-.*?\-.*?RRB.*?\-', '', line)
10
+ line_cleaned = rep
11
+ elif ("-LRB-" in line and "-RRB-" not in line) or (
12
+ "-LRB-" not in line and "-RRB-" in line):
13
+ line_cleaned = line.replace("-LRB-", '"').replace("-RRB-", '"')
14
+ else:
15
+ line_cleaned = line
16
+ return line_cleaned
17
+
18
+
19
+ def main(args):
20
+ with open(args.source) as f:
21
+ data = [row.rstrip() for row in f]
22
+
23
+ write_lines(args.output, [filter_line(row) for row in data])
24
+
25
+
26
+ if __name__ == '__main__':
27
+ parser = argparse.ArgumentParser()
28
+ parser.add_argument('-s', '--source',
29
+ help='Path to the source file',
30
+ required=True)
31
+ parser.add_argument('-o', '--output',
32
+ help='Path to the output file',
33
+ required=True)
34
+ args = parser.parse_args()
35
+ main(args)
utils/helpers.py ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+
4
+
5
+ VOCAB_DIR = Path(__file__).resolve().parent.parent / "data"
6
+ PAD = "@@PADDING@@"
7
+ UNK = "@@UNKNOWN@@"
8
+ START_TOKEN = "$START"
9
+ SEQ_DELIMETERS = {"tokens": " ",
10
+ "labels": "SEPL|||SEPR",
11
+ "operations": "SEPL__SEPR"}
12
+ REPLACEMENTS = {
13
+ "''": '"',
14
+ '--': '—',
15
+ '`': "'",
16
+ "'ve": "' ve",
17
+ }
18
+
19
+
20
+ def get_verb_form_dicts():
21
+ path_to_dict = os.path.join(VOCAB_DIR, "verb-form-vocab.txt")
22
+ encode, decode = {}, {}
23
+ with open(path_to_dict, encoding="utf-8") as f:
24
+ for line in f:
25
+ words, tags = line.split(":")
26
+ word1, word2 = words.split("_")
27
+ tag1, tag2 = tags.split("_")
28
+ decode_key = f"{word1}_{tag1}_{tag2.strip()}"
29
+ if decode_key not in decode:
30
+ encode[words] = tags
31
+ decode[decode_key] = word2
32
+ return encode, decode
33
+
34
+
35
+ ENCODE_VERB_DICT, DECODE_VERB_DICT = get_verb_form_dicts()
36
+
37
+
38
+ def get_target_sent_by_edits(source_tokens, edits):
39
+ target_tokens = source_tokens[:]
40
+ shift_idx = 0
41
+ for edit in edits:
42
+ start, end, label, _ = edit
43
+ target_pos = start + shift_idx
44
+ source_token = target_tokens[target_pos] \
45
+ if len(target_tokens) > target_pos >= 0 else ''
46
+ if label == "":
47
+ del target_tokens[target_pos]
48
+ shift_idx -= 1
49
+ elif start == end:
50
+ word = label.replace("$APPEND_", "")
51
+ target_tokens[target_pos: target_pos] = [word]
52
+ shift_idx += 1
53
+ elif label.startswith("$TRANSFORM_"):
54
+ word = apply_reverse_transformation(source_token, label)
55
+ if word is None:
56
+ word = source_token
57
+ target_tokens[target_pos] = word
58
+ elif start == end - 1:
59
+ word = label.replace("$REPLACE_", "")
60
+ target_tokens[target_pos] = word
61
+ elif label.startswith("$MERGE_"):
62
+ target_tokens[target_pos + 1: target_pos + 1] = [label]
63
+ shift_idx += 1
64
+
65
+ return replace_merge_transforms(target_tokens)
66
+
67
+
68
+ def replace_merge_transforms(tokens):
69
+ if all(not x.startswith("$MERGE_") for x in tokens):
70
+ return tokens
71
+
72
+ target_line = " ".join(tokens)
73
+ target_line = target_line.replace(" $MERGE_HYPHEN ", "-")
74
+ target_line = target_line.replace(" $MERGE_SPACE ", "")
75
+ return target_line.split()
76
+
77
+
78
+ def convert_using_case(token, smart_action):
79
+ if not smart_action.startswith("$TRANSFORM_CASE_"):
80
+ return token
81
+ if smart_action.endswith("LOWER"):
82
+ return token.lower()
83
+ elif smart_action.endswith("UPPER"):
84
+ return token.upper()
85
+ elif smart_action.endswith("CAPITAL"):
86
+ return token.capitalize()
87
+ elif smart_action.endswith("CAPITAL_1"):
88
+ return token[0] + token[1:].capitalize()
89
+ elif smart_action.endswith("UPPER_-1"):
90
+ return token[:-1].upper() + token[-1]
91
+ else:
92
+ return token
93
+
94
+
95
+ def convert_using_verb(token, smart_action):
96
+ key_word = "$TRANSFORM_VERB_"
97
+ if not smart_action.startswith(key_word):
98
+ raise Exception(f"Unknown action type {smart_action}")
99
+ encoding_part = f"{token}_{smart_action[len(key_word):]}"
100
+ decoded_target_word = decode_verb_form(encoding_part)
101
+ return decoded_target_word
102
+
103
+
104
+ def convert_using_split(token, smart_action):
105
+ key_word = "$TRANSFORM_SPLIT"
106
+ if not smart_action.startswith(key_word):
107
+ raise Exception(f"Unknown action type {smart_action}")
108
+ target_words = token.split("-")
109
+ return " ".join(target_words)
110
+
111
+
112
+ def convert_using_plural(token, smart_action):
113
+ if smart_action.endswith("PLURAL"):
114
+ return token + "s"
115
+ elif smart_action.endswith("SINGULAR"):
116
+ return token[:-1]
117
+ else:
118
+ raise Exception(f"Unknown action type {smart_action}")
119
+
120
+
121
+ def apply_reverse_transformation(source_token, transform):
122
+ if transform.startswith("$TRANSFORM"):
123
+ # deal with equal
124
+ if transform == "$KEEP":
125
+ return source_token
126
+ # deal with case
127
+ if transform.startswith("$TRANSFORM_CASE"):
128
+ return convert_using_case(source_token, transform)
129
+ # deal with verb
130
+ if transform.startswith("$TRANSFORM_VERB"):
131
+ return convert_using_verb(source_token, transform)
132
+ # deal with split
133
+ if transform.startswith("$TRANSFORM_SPLIT"):
134
+ return convert_using_split(source_token, transform)
135
+ # deal with single/plural
136
+ if transform.startswith("$TRANSFORM_AGREEMENT"):
137
+ return convert_using_plural(source_token, transform)
138
+ # raise exception if not find correct type
139
+ raise Exception(f"Unknown action type {transform}")
140
+ else:
141
+ return source_token
142
+
143
+
144
+ def read_parallel_lines(fn1, fn2):
145
+ lines1 = read_lines(fn1, skip_strip=True)
146
+ lines2 = read_lines(fn2, skip_strip=True)
147
+ assert len(lines1) == len(lines2)
148
+ out_lines1, out_lines2 = [], []
149
+ for line1, line2 in zip(lines1, lines2):
150
+ if not line1.strip() or not line2.strip():
151
+ continue
152
+ else:
153
+ out_lines1.append(line1)
154
+ out_lines2.append(line2)
155
+ return out_lines1, out_lines2
156
+
157
+
158
+ def read_lines(fn, skip_strip=False):
159
+ if not os.path.exists(fn):
160
+ return []
161
+ with open(fn, 'r', encoding='utf-8') as f:
162
+ lines = f.readlines()
163
+ return [s.strip() for s in lines if s.strip() or skip_strip]
164
+
165
+
166
+ def write_lines(fn, lines, mode='w'):
167
+ if mode == 'w' and os.path.exists(fn):
168
+ os.remove(fn)
169
+ with open(fn, encoding='utf-8', mode=mode) as f:
170
+ f.writelines(['%s\n' % s for s in lines])
171
+
172
+
173
+ def decode_verb_form(original):
174
+ return DECODE_VERB_DICT.get(original)
175
+
176
+
177
+ def encode_verb_form(original_word, corrected_word):
178
+ decoding_request = original_word + "_" + corrected_word
179
+ decoding_response = ENCODE_VERB_DICT.get(decoding_request, "").strip()
180
+ if original_word and decoding_response:
181
+ answer = decoding_response
182
+ else:
183
+ answer = None
184
+ return answer
185
+
186
+
187
+ def get_weights_name(transformer_name, lowercase):
188
+ if transformer_name == 'bert' and lowercase:
189
+ return 'bert-base-uncased'
190
+ if transformer_name == 'bert' and not lowercase:
191
+ return 'bert-base-cased'
192
+ if transformer_name == 'bert-large' and not lowercase:
193
+ return 'bert-large-cased'
194
+ if transformer_name == 'distilbert':
195
+ if not lowercase:
196
+ print('Warning! This model was trained only on uncased sentences.')
197
+ return 'distilbert-base-uncased'
198
+ if transformer_name == 'albert':
199
+ if not lowercase:
200
+ print('Warning! This model was trained only on uncased sentences.')
201
+ return 'albert-base-v1'
202
+ if lowercase:
203
+ print('Warning! This model was trained only on cased sentences.')
204
+ if transformer_name == 'roberta':
205
+ return 'roberta-base'
206
+ if transformer_name == 'roberta-large':
207
+ return 'roberta-large'
208
+ if transformer_name == 'gpt2':
209
+ return 'gpt2'
210
+ if transformer_name == 'transformerxl':
211
+ return 'transfo-xl-wt103'
212
+ if transformer_name == 'xlnet':
213
+ return 'xlnet-base-cased'
214
+ if transformer_name == 'xlnet-large':
215
+ return 'xlnet-large-cased'
216
+
217
+
218
+ def remove_double_tokens(sent):
219
+ tokens = sent.split(' ')
220
+ deleted_idx = []
221
+ for i in range(len(tokens) -1):
222
+ if tokens[i] == tokens[i + 1]:
223
+ deleted_idx.append(i + 1)
224
+ if deleted_idx:
225
+ tokens = [tokens[i] for i in range(len(tokens)) if i not in deleted_idx]
226
+ return ' '.join(tokens)
227
+
228
+
229
+ def normalize(sent):
230
+ sent = remove_double_tokens(sent)
231
+ for fr, to in REPLACEMENTS.items():
232
+ sent = sent.replace(fr, to)
233
+ return sent.lower()
utils/prepare_clc_fce_data.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ """
3
+ Convert CLC-FCE dataset (The Cambridge Learner Corpus) to the parallel sentences format.
4
+ """
5
+
6
+ import argparse
7
+ import glob
8
+ import os
9
+ import re
10
+ from xml.etree import cElementTree
11
+
12
+ from nltk.tokenize import sent_tokenize, word_tokenize
13
+ from tqdm import tqdm
14
+
15
+
16
+ def annotate_fce_doc(xml):
17
+ """Takes a FCE xml document and yields sentences with annotated errors."""
18
+ result = []
19
+ doc = cElementTree.fromstring(xml)
20
+ paragraphs = doc.findall('head/text/*/coded_answer/p')
21
+ for p in paragraphs:
22
+ text = _get_formatted_text(p)
23
+ result.append(text)
24
+
25
+ return '\n'.join(result)
26
+
27
+
28
+ def _get_formatted_text(elem, ignore_tags=None):
29
+ text = elem.text or ''
30
+ ignore_tags = [tag.upper() for tag in (ignore_tags or [])]
31
+ correct = None
32
+ mistake = None
33
+
34
+ for child in elem.getchildren():
35
+ tag = child.tag.upper()
36
+ if tag == 'NS':
37
+ text += _get_formatted_text(child)
38
+
39
+ elif tag == 'UNKNOWN':
40
+ text += ' UNKNOWN '
41
+
42
+ elif tag == 'C':
43
+ assert correct is None
44
+ correct = _get_formatted_text(child)
45
+
46
+ elif tag == 'I':
47
+ assert mistake is None
48
+ mistake = _get_formatted_text(child)
49
+
50
+ elif tag in ignore_tags:
51
+ pass
52
+
53
+ else:
54
+ raise ValueError(f"Unknown tag `{child.tag}`", text)
55
+
56
+ if correct or mistake:
57
+ correct = correct or ''
58
+ mistake = mistake or ''
59
+ if '=>' not in mistake:
60
+ text += f'{{{mistake}=>{correct}}}'
61
+ else:
62
+ text += mistake
63
+
64
+ text += elem.tail or ''
65
+ return text
66
+
67
+
68
+ def convert_fce(fce_dir):
69
+ """Processes the whole FCE directory. Yields annotated documents (strings)."""
70
+
71
+ # Ensure we got the valid dataset path
72
+ if not os.path.isdir(fce_dir):
73
+ raise UserWarning(
74
+ f"{fce_dir} is not a valid path")
75
+
76
+ dataset_dir = os.path.join(fce_dir, 'dataset')
77
+ if not os.path.exists(dataset_dir):
78
+ raise UserWarning(
79
+ f"{fce_dir} doesn't point to a dataset's root dir")
80
+
81
+ # Convert XML docs to the corpora format
82
+ filenames = sorted(glob.glob(os.path.join(dataset_dir, '*/*.xml')))
83
+
84
+ docs = []
85
+ for filename in filenames:
86
+ with open(filename, encoding='utf-8') as f:
87
+ doc = annotate_fce_doc(f.read())
88
+ docs.append(doc)
89
+ return docs
90
+
91
+
92
+ def main():
93
+ fce = convert_fce(args.fce_dataset_path)
94
+ with open(args.output + "/fce-original.txt", 'w', encoding='utf-8') as out_original, \
95
+ open(args.output + "/fce-applied.txt", 'w', encoding='utf-8') as out_applied:
96
+ for doc in tqdm(fce, unit='doc'):
97
+ sents = re.split(r"\n +\n", doc)
98
+ for sent in sents:
99
+ tokenized_sents = sent_tokenize(sent)
100
+ for i in range(len(tokenized_sents)):
101
+ if re.search(r"[{>][.?!]$", tokenized_sents[i]):
102
+ tokenized_sents[i + 1] = tokenized_sents[i] + " " + tokenized_sents[i + 1]
103
+ tokenized_sents[i] = ""
104
+ regexp = r'{([^{}]*?)=>([^{}]*?)}'
105
+ original = re.sub(regexp, r"\1", tokenized_sents[i])
106
+ applied = re.sub(regexp, r"\2", tokenized_sents[i])
107
+ # filter out nested alerts
108
+ if original != "" and applied != "" and not re.search(r"[{}=]", original) \
109
+ and not re.search(r"[{}=]", applied):
110
+ out_original.write(" ".join(word_tokenize(original)) + "\n")
111
+ out_applied.write(" ".join(word_tokenize(applied)) + "\n")
112
+
113
+
114
+ if __name__ == '__main__':
115
+ parser = argparse.ArgumentParser(description=(
116
+ "Convert CLC-FCE dataset to the parallel sentences format."))
117
+ parser.add_argument('fce_dataset_path',
118
+ help='Path to the folder with the FCE dataset')
119
+ parser.add_argument('--output',
120
+ help='Path to the output folder')
121
+ args = parser.parse_args()
122
+
123
+ main()
utils/preprocess_data.py ADDED
@@ -0,0 +1,488 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ from difflib import SequenceMatcher
4
+
5
+ import Levenshtein
6
+ import numpy as np
7
+ from tqdm import tqdm
8
+
9
+ from helpers import write_lines, read_parallel_lines, encode_verb_form, \
10
+ apply_reverse_transformation, SEQ_DELIMETERS, START_TOKEN
11
+
12
+
13
+ def perfect_align(t, T, insertions_allowed=0,
14
+ cost_function=Levenshtein.distance):
15
+ # dp[i, j, k] is a minimal cost of matching first `i` tokens of `t` with
16
+ # first `j` tokens of `T`, after making `k` insertions after last match of
17
+ # token from `t`. In other words t[:i] aligned with T[:j].
18
+
19
+ # Initialize with INFINITY (unknown)
20
+ shape = (len(t) + 1, len(T) + 1, insertions_allowed + 1)
21
+ dp = np.ones(shape, dtype=int) * int(1e9)
22
+ come_from = np.ones(shape, dtype=int) * int(1e9)
23
+ come_from_ins = np.ones(shape, dtype=int) * int(1e9)
24
+
25
+ dp[0, 0, 0] = 0 # The only known starting point. Nothing matched to nothing.
26
+ for i in range(len(t) + 1): # Go inclusive
27
+ for j in range(len(T) + 1): # Go inclusive
28
+ for q in range(insertions_allowed + 1): # Go inclusive
29
+ if i < len(t):
30
+ # Given matched sequence of t[:i] and T[:j], match token
31
+ # t[i] with following tokens T[j:k].
32
+ for k in range(j, len(T) + 1):
33
+ transform = \
34
+ apply_transformation(t[i], ' '.join(T[j:k]))
35
+ if transform:
36
+ cost = 0
37
+ else:
38
+ cost = cost_function(t[i], ' '.join(T[j:k]))
39
+ current = dp[i, j, q] + cost
40
+ if dp[i + 1, k, 0] > current:
41
+ dp[i + 1, k, 0] = current
42
+ come_from[i + 1, k, 0] = j
43
+ come_from_ins[i + 1, k, 0] = q
44
+ if q < insertions_allowed:
45
+ # Given matched sequence of t[:i] and T[:j], create
46
+ # insertion with following tokens T[j:k].
47
+ for k in range(j, len(T) + 1):
48
+ cost = len(' '.join(T[j:k]))
49
+ current = dp[i, j, q] + cost
50
+ if dp[i, k, q + 1] > current:
51
+ dp[i, k, q + 1] = current
52
+ come_from[i, k, q + 1] = j
53
+ come_from_ins[i, k, q + 1] = q
54
+
55
+ # Solution is in the dp[len(t), len(T), *]. Backtracking from there.
56
+ alignment = []
57
+ i = len(t)
58
+ j = len(T)
59
+ q = dp[i, j, :].argmin()
60
+ while i > 0 or q > 0:
61
+ is_insert = (come_from_ins[i, j, q] != q) and (q != 0)
62
+ j, k, q = come_from[i, j, q], j, come_from_ins[i, j, q]
63
+ if not is_insert:
64
+ i -= 1
65
+
66
+ if is_insert:
67
+ alignment.append(['INSERT', T[j:k], (i, i)])
68
+ else:
69
+ alignment.append([f'REPLACE_{t[i]}', T[j:k], (i, i + 1)])
70
+
71
+ assert j == 0
72
+
73
+ return dp[len(t), len(T)].min(), list(reversed(alignment))
74
+
75
+
76
+ def _split(token):
77
+ if not token:
78
+ return []
79
+ parts = token.split()
80
+ return parts or [token]
81
+
82
+
83
+ def apply_merge_transformation(source_tokens, target_words, shift_idx):
84
+ edits = []
85
+ if len(source_tokens) > 1 and len(target_words) == 1:
86
+ # check merge
87
+ transform = check_merge(source_tokens, target_words)
88
+ if transform:
89
+ for i in range(len(source_tokens) - 1):
90
+ edits.append([(shift_idx + i, shift_idx + i + 1), transform])
91
+ return edits
92
+
93
+ if len(source_tokens) == len(target_words) == 2:
94
+ # check swap
95
+ transform = check_swap(source_tokens, target_words)
96
+ if transform:
97
+ edits.append([(shift_idx, shift_idx + 1), transform])
98
+ return edits
99
+
100
+
101
+ def is_sent_ok(sent, delimeters=SEQ_DELIMETERS):
102
+ for del_val in delimeters.values():
103
+ if del_val in sent and del_val != delimeters["tokens"]:
104
+ return False
105
+ return True
106
+
107
+
108
+ def check_casetype(source_token, target_token):
109
+ if source_token.lower() != target_token.lower():
110
+ return None
111
+ if source_token.lower() == target_token:
112
+ return "$TRANSFORM_CASE_LOWER"
113
+ elif source_token.capitalize() == target_token:
114
+ return "$TRANSFORM_CASE_CAPITAL"
115
+ elif source_token.upper() == target_token:
116
+ return "$TRANSFORM_CASE_UPPER"
117
+ elif source_token[1:].capitalize() == target_token[1:] and source_token[0] == target_token[0]:
118
+ return "$TRANSFORM_CASE_CAPITAL_1"
119
+ elif source_token[:-1].upper() == target_token[:-1] and source_token[-1] == target_token[-1]:
120
+ return "$TRANSFORM_CASE_UPPER_-1"
121
+ else:
122
+ return None
123
+
124
+
125
+ def check_equal(source_token, target_token):
126
+ if source_token == target_token:
127
+ return "$KEEP"
128
+ else:
129
+ return None
130
+
131
+
132
+ def check_split(source_token, target_tokens):
133
+ if source_token.split("-") == target_tokens:
134
+ return "$TRANSFORM_SPLIT_HYPHEN"
135
+ else:
136
+ return None
137
+
138
+
139
+ def check_merge(source_tokens, target_tokens):
140
+ if "".join(source_tokens) == "".join(target_tokens):
141
+ return "$MERGE_SPACE"
142
+ elif "-".join(source_tokens) == "-".join(target_tokens):
143
+ return "$MERGE_HYPHEN"
144
+ else:
145
+ return None
146
+
147
+
148
+ def check_swap(source_tokens, target_tokens):
149
+ if source_tokens == [x for x in reversed(target_tokens)]:
150
+ return "$MERGE_SWAP"
151
+ else:
152
+ return None
153
+
154
+
155
+ def check_plural(source_token, target_token):
156
+ if source_token.endswith("s") and source_token[:-1] == target_token:
157
+ return "$TRANSFORM_AGREEMENT_SINGULAR"
158
+ elif target_token.endswith("s") and source_token == target_token[:-1]:
159
+ return "$TRANSFORM_AGREEMENT_PLURAL"
160
+ else:
161
+ return None
162
+
163
+
164
+ def check_verb(source_token, target_token):
165
+ encoding = encode_verb_form(source_token, target_token)
166
+ if encoding:
167
+ return f"$TRANSFORM_VERB_{encoding}"
168
+ else:
169
+ return None
170
+
171
+
172
+ def apply_transformation(source_token, target_token):
173
+ target_tokens = target_token.split()
174
+ if len(target_tokens) > 1:
175
+ # check split
176
+ transform = check_split(source_token, target_tokens)
177
+ if transform:
178
+ return transform
179
+ checks = [check_equal, check_casetype, check_verb, check_plural]
180
+ for check in checks:
181
+ transform = check(source_token, target_token)
182
+ if transform:
183
+ return transform
184
+ return None
185
+
186
+
187
+ def align_sequences(source_sent, target_sent):
188
+ # check if sent is OK
189
+ if not is_sent_ok(source_sent) or not is_sent_ok(target_sent):
190
+ return None
191
+ source_tokens = source_sent.split()
192
+ target_tokens = target_sent.split()
193
+ matcher = SequenceMatcher(None, source_tokens, target_tokens)
194
+ diffs = list(matcher.get_opcodes())
195
+ all_edits = []
196
+ for diff in diffs:
197
+ tag, i1, i2, j1, j2 = diff
198
+ source_part = _split(" ".join(source_tokens[i1:i2]))
199
+ target_part = _split(" ".join(target_tokens[j1:j2]))
200
+ if tag == 'equal':
201
+ continue
202
+ elif tag == 'delete':
203
+ # delete all words separatly
204
+ for j in range(i2 - i1):
205
+ edit = [(i1 + j, i1 + j + 1), '$DELETE']
206
+ all_edits.append(edit)
207
+ elif tag == 'insert':
208
+ # append to the previous word
209
+ for target_token in target_part:
210
+ edit = ((i1 - 1, i1), f"$APPEND_{target_token}")
211
+ all_edits.append(edit)
212
+ else:
213
+ # check merge first of all
214
+ edits = apply_merge_transformation(source_part, target_part,
215
+ shift_idx=i1)
216
+ if edits:
217
+ all_edits.extend(edits)
218
+ continue
219
+
220
+ # normalize alignments if need (make them singleton)
221
+ _, alignments = perfect_align(source_part, target_part,
222
+ insertions_allowed=0)
223
+ for alignment in alignments:
224
+ new_shift = alignment[2][0]
225
+ edits = convert_alignments_into_edits(alignment,
226
+ shift_idx=i1 + new_shift)
227
+ all_edits.extend(edits)
228
+
229
+ # get labels
230
+ labels = convert_edits_into_labels(source_tokens, all_edits)
231
+ # match tags to source tokens
232
+ sent_with_tags = add_labels_to_the_tokens(source_tokens, labels)
233
+ return sent_with_tags
234
+
235
+
236
+ def convert_edits_into_labels(source_tokens, all_edits):
237
+ # make sure that edits are flat
238
+ flat_edits = []
239
+ for edit in all_edits:
240
+ (start, end), edit_operations = edit
241
+ if isinstance(edit_operations, list):
242
+ for operation in edit_operations:
243
+ new_edit = [(start, end), operation]
244
+ flat_edits.append(new_edit)
245
+ elif isinstance(edit_operations, str):
246
+ flat_edits.append(edit)
247
+ else:
248
+ raise Exception("Unknown operation type")
249
+ all_edits = flat_edits[:]
250
+ labels = []
251
+ total_labels = len(source_tokens) + 1
252
+ if not all_edits:
253
+ labels = [["$KEEP"] for x in range(total_labels)]
254
+ else:
255
+ for i in range(total_labels):
256
+ edit_operations = [x[1] for x in all_edits if x[0][0] == i - 1
257
+ and x[0][1] == i]
258
+ if not edit_operations:
259
+ labels.append(["$KEEP"])
260
+ else:
261
+ labels.append(edit_operations)
262
+ return labels
263
+
264
+
265
+ def convert_alignments_into_edits(alignment, shift_idx):
266
+ edits = []
267
+ action, target_tokens, new_idx = alignment
268
+ source_token = action.replace("REPLACE_", "")
269
+
270
+ # check if delete
271
+ if not target_tokens:
272
+ edit = [(shift_idx, 1 + shift_idx), "$DELETE"]
273
+ return [edit]
274
+
275
+ # check splits
276
+ for i in range(1, len(target_tokens)):
277
+ target_token = " ".join(target_tokens[:i + 1])
278
+ transform = apply_transformation(source_token, target_token)
279
+ if transform:
280
+ edit = [(shift_idx, shift_idx + 1), transform]
281
+ edits.append(edit)
282
+ target_tokens = target_tokens[i + 1:]
283
+ for target in target_tokens:
284
+ edits.append([(shift_idx, shift_idx + 1), f"$APPEND_{target}"])
285
+ return edits
286
+
287
+ transform_costs = []
288
+ transforms = []
289
+ for target_token in target_tokens:
290
+ transform = apply_transformation(source_token, target_token)
291
+ if transform:
292
+ cost = 0
293
+ transforms.append(transform)
294
+ else:
295
+ cost = Levenshtein.distance(source_token, target_token)
296
+ transforms.append(None)
297
+ transform_costs.append(cost)
298
+ min_cost_idx = transform_costs.index(min(transform_costs))
299
+ # append to the previous word
300
+ for i in range(0, min_cost_idx):
301
+ target = target_tokens[i]
302
+ edit = [(shift_idx - 1, shift_idx), f"$APPEND_{target}"]
303
+ edits.append(edit)
304
+ # replace/transform target word
305
+ transform = transforms[min_cost_idx]
306
+ target = transform if transform is not None \
307
+ else f"$REPLACE_{target_tokens[min_cost_idx]}"
308
+ edit = [(shift_idx, 1 + shift_idx), target]
309
+ edits.append(edit)
310
+ # append to this word
311
+ for i in range(min_cost_idx + 1, len(target_tokens)):
312
+ target = target_tokens[i]
313
+ edit = [(shift_idx, 1 + shift_idx), f"$APPEND_{target}"]
314
+ edits.append(edit)
315
+ return edits
316
+
317
+
318
+ def add_labels_to_the_tokens(source_tokens, labels, delimeters=SEQ_DELIMETERS):
319
+ tokens_with_all_tags = []
320
+ source_tokens_with_start = [START_TOKEN] + source_tokens
321
+ for token, label_list in zip(source_tokens_with_start, labels):
322
+ all_tags = delimeters['operations'].join(label_list)
323
+ comb_record = token + delimeters['labels'] + all_tags
324
+ tokens_with_all_tags.append(comb_record)
325
+ return delimeters['tokens'].join(tokens_with_all_tags)
326
+
327
+
328
+ def convert_data_from_raw_files(source_file, target_file, output_file, chunk_size):
329
+ tagged = []
330
+ source_data, target_data = read_parallel_lines(source_file, target_file)
331
+ print(f"The size of raw dataset is {len(source_data)}")
332
+ cnt_total, cnt_all, cnt_tp = 0, 0, 0
333
+ for source_sent, target_sent in tqdm(zip(source_data, target_data)):
334
+ try:
335
+ aligned_sent = align_sequences(source_sent, target_sent)
336
+ except Exception:
337
+ aligned_sent = align_sequences(source_sent, target_sent)
338
+ if source_sent != target_sent:
339
+ cnt_tp += 1
340
+ alignments = [aligned_sent]
341
+ cnt_all += len(alignments)
342
+ try:
343
+ check_sent = convert_tagged_line(aligned_sent)
344
+ except Exception:
345
+ # debug mode
346
+ aligned_sent = align_sequences(source_sent, target_sent)
347
+ check_sent = convert_tagged_line(aligned_sent)
348
+
349
+ if "".join(check_sent.split()) != "".join(
350
+ target_sent.split()):
351
+ # do it again for debugging
352
+ aligned_sent = align_sequences(source_sent, target_sent)
353
+ check_sent = convert_tagged_line(aligned_sent)
354
+ print(f"Incorrect pair: \n{target_sent}\n{check_sent}")
355
+ continue
356
+ if alignments:
357
+ cnt_total += len(alignments)
358
+ tagged.extend(alignments)
359
+ if len(tagged) > chunk_size:
360
+ write_lines(output_file, tagged, mode='a')
361
+ tagged = []
362
+
363
+ print(f"Overall extracted {cnt_total}. "
364
+ f"Original TP {cnt_tp}."
365
+ f" Original TN {cnt_all - cnt_tp}")
366
+ if tagged:
367
+ write_lines(output_file, tagged, 'a')
368
+
369
+
370
+ def convert_labels_into_edits(labels):
371
+ all_edits = []
372
+ for i, label_list in enumerate(labels):
373
+ if label_list == ["$KEEP"]:
374
+ continue
375
+ else:
376
+ edit = [(i - 1, i), label_list]
377
+ all_edits.append(edit)
378
+ return all_edits
379
+
380
+
381
+ def get_target_sent_by_levels(source_tokens, labels):
382
+ relevant_edits = convert_labels_into_edits(labels)
383
+ target_tokens = source_tokens[:]
384
+ leveled_target_tokens = {}
385
+ if not relevant_edits:
386
+ target_sentence = " ".join(target_tokens)
387
+ return leveled_target_tokens, target_sentence
388
+ max_level = max([len(x[1]) for x in relevant_edits])
389
+ for level in range(max_level):
390
+ rest_edits = []
391
+ shift_idx = 0
392
+ for edits in relevant_edits:
393
+ (start, end), label_list = edits
394
+ label = label_list[0]
395
+ target_pos = start + shift_idx
396
+ source_token = target_tokens[target_pos] if target_pos >= 0 else START_TOKEN
397
+ if label == "$DELETE":
398
+ del target_tokens[target_pos]
399
+ shift_idx -= 1
400
+ elif label.startswith("$APPEND_"):
401
+ word = label.replace("$APPEND_", "")
402
+ target_tokens[target_pos + 1: target_pos + 1] = [word]
403
+ shift_idx += 1
404
+ elif label.startswith("$REPLACE_"):
405
+ word = label.replace("$REPLACE_", "")
406
+ target_tokens[target_pos] = word
407
+ elif label.startswith("$TRANSFORM"):
408
+ word = apply_reverse_transformation(source_token, label)
409
+ if word is None:
410
+ word = source_token
411
+ target_tokens[target_pos] = word
412
+ elif label.startswith("$MERGE_"):
413
+ # apply merge only on last stage
414
+ if level == (max_level - 1):
415
+ target_tokens[target_pos + 1: target_pos + 1] = [label]
416
+ shift_idx += 1
417
+ else:
418
+ rest_edit = [(start + shift_idx, end + shift_idx), [label]]
419
+ rest_edits.append(rest_edit)
420
+ rest_labels = label_list[1:]
421
+ if rest_labels:
422
+ rest_edit = [(start + shift_idx, end + shift_idx), rest_labels]
423
+ rest_edits.append(rest_edit)
424
+
425
+ leveled_tokens = target_tokens[:]
426
+ # update next step
427
+ relevant_edits = rest_edits[:]
428
+ if level == (max_level - 1):
429
+ leveled_tokens = replace_merge_transforms(leveled_tokens)
430
+ leveled_labels = convert_edits_into_labels(leveled_tokens,
431
+ relevant_edits)
432
+ leveled_target_tokens[level + 1] = {"tokens": leveled_tokens,
433
+ "labels": leveled_labels}
434
+
435
+ target_sentence = " ".join(leveled_target_tokens[max_level]["tokens"])
436
+ return leveled_target_tokens, target_sentence
437
+
438
+
439
+ def replace_merge_transforms(tokens):
440
+ if all(not x.startswith("$MERGE_") for x in tokens):
441
+ return tokens
442
+ target_tokens = tokens[:]
443
+ allowed_range = (1, len(tokens) - 1)
444
+ for i in range(len(tokens)):
445
+ target_token = tokens[i]
446
+ if target_token.startswith("$MERGE"):
447
+ if target_token.startswith("$MERGE_SWAP") and i in allowed_range:
448
+ target_tokens[i - 1] = tokens[i + 1]
449
+ target_tokens[i + 1] = tokens[i - 1]
450
+ target_tokens[i: i + 1] = []
451
+ target_line = " ".join(target_tokens)
452
+ target_line = target_line.replace(" $MERGE_HYPHEN ", "-")
453
+ target_line = target_line.replace(" $MERGE_SPACE ", "")
454
+ return target_line.split()
455
+
456
+
457
+ def convert_tagged_line(line, delimeters=SEQ_DELIMETERS):
458
+ label_del = delimeters['labels']
459
+ source_tokens = [x.split(label_del)[0]
460
+ for x in line.split(delimeters['tokens'])][1:]
461
+ labels = [x.split(label_del)[1].split(delimeters['operations'])
462
+ for x in line.split(delimeters['tokens'])]
463
+ assert len(source_tokens) + 1 == len(labels)
464
+ levels_dict, target_line = get_target_sent_by_levels(source_tokens, labels)
465
+ return target_line
466
+
467
+
468
+ def main(args):
469
+ convert_data_from_raw_files(args.source, args.target, args.output_file, args.chunk_size)
470
+
471
+
472
+ if __name__ == '__main__':
473
+ parser = argparse.ArgumentParser()
474
+ parser.add_argument('-s', '--source',
475
+ help='Path to the source file',
476
+ required=True)
477
+ parser.add_argument('-t', '--target',
478
+ help='Path to the target file',
479
+ required=True)
480
+ parser.add_argument('-o', '--output_file',
481
+ help='Path to the output file',
482
+ required=True)
483
+ parser.add_argument('--chunk_size',
484
+ type=int,
485
+ help='Dump each chunk size.',
486
+ default=1000000)
487
+ args = parser.parse_args()
488
+ main(args)