File size: 1,948 Bytes
c80917c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import random

def repeat(text, n_max_gram=3, n_max_repeat=3):
    """repeat n-grams"""
    tokens = text.split()

    n_gram = random.randint(1, n_max_gram)

    repeat_token_idx = random.randint(0, len(tokens) - n_gram)

    repeated_tokens = tokens[repeat_token_idx:repeat_token_idx+n_gram]

    n_repeat = random.randint(1, n_max_repeat)
    for _ in range(n_repeat):
        insert_idx = random.randint(0, len(tokens))
        tokens = tokens[:insert_idx] + \
            repeated_tokens + tokens[insert_idx:]

    new_text = " ".join(tokens)
    return new_text

def remove(text, n_max_gram=3):
    """remove n-grams"""
    tokens = text.split()

    n_gram = random.randint(1, n_max_gram)

    remove_token_idx = random.randint(0, len(tokens) - n_gram)

    tokens = tokens[:remove_token_idx] + tokens[remove_token_idx + n_gram:]

    new_text = " ".join(tokens)
    return new_text

def insert(text, vocab, n_max_tokens=3):
    """Insert tokens"""
    tokens = text.split()

    n_insert_token = random.randint(1, n_max_tokens)

    for _ in range(n_insert_token):
        insert_token_idx = random.randint(0, len(tokens) - 1)
        insert_token = random.choice(vocab)
        tokens = tokens[:insert_token_idx] + [insert_token] + tokens[insert_token_idx:]

    new_text = " ".join(tokens)
    return new_text

def swap(text, vocab, n_max_tokens=3):
    """Swap tokens"""
    tokens = text.split()

    n_swap_tokens = random.randint(1, n_max_tokens)

    for _ in range(n_swap_tokens):
        swap_token_idx = random.randint(0, len(tokens) - 1)

        swap_token = random.choice(vocab)
        while swap_token == tokens[swap_token_idx]:
            swap_token = random.choice(vocab)

        tokens[swap_token_idx] = swap_token

    new_text = " ".join(tokens)
    return new_text

def shuffle(text):
    """shuffle tokens"""
    tokens = text.split()

    random.shuffle(tokens)

    new_text = " ".join(tokens)
    return new_text