Spaces:
Runtime error
Runtime error
File size: 1,948 Bytes
c80917c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import random
def repeat(text, n_max_gram=3, n_max_repeat=3):
"""repeat n-grams"""
tokens = text.split()
n_gram = random.randint(1, n_max_gram)
repeat_token_idx = random.randint(0, len(tokens) - n_gram)
repeated_tokens = tokens[repeat_token_idx:repeat_token_idx+n_gram]
n_repeat = random.randint(1, n_max_repeat)
for _ in range(n_repeat):
insert_idx = random.randint(0, len(tokens))
tokens = tokens[:insert_idx] + \
repeated_tokens + tokens[insert_idx:]
new_text = " ".join(tokens)
return new_text
def remove(text, n_max_gram=3):
"""remove n-grams"""
tokens = text.split()
n_gram = random.randint(1, n_max_gram)
remove_token_idx = random.randint(0, len(tokens) - n_gram)
tokens = tokens[:remove_token_idx] + tokens[remove_token_idx + n_gram:]
new_text = " ".join(tokens)
return new_text
def insert(text, vocab, n_max_tokens=3):
"""Insert tokens"""
tokens = text.split()
n_insert_token = random.randint(1, n_max_tokens)
for _ in range(n_insert_token):
insert_token_idx = random.randint(0, len(tokens) - 1)
insert_token = random.choice(vocab)
tokens = tokens[:insert_token_idx] + [insert_token] + tokens[insert_token_idx:]
new_text = " ".join(tokens)
return new_text
def swap(text, vocab, n_max_tokens=3):
"""Swap tokens"""
tokens = text.split()
n_swap_tokens = random.randint(1, n_max_tokens)
for _ in range(n_swap_tokens):
swap_token_idx = random.randint(0, len(tokens) - 1)
swap_token = random.choice(vocab)
while swap_token == tokens[swap_token_idx]:
swap_token = random.choice(vocab)
tokens[swap_token_idx] = swap_token
new_text = " ".join(tokens)
return new_text
def shuffle(text):
"""shuffle tokens"""
tokens = text.split()
random.shuffle(tokens)
new_text = " ".join(tokens)
return new_text
|