File size: 841 Bytes
ead1d68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a2aeb80
 
ead1d68
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from transformers import PretrainedConfig
from nltk.corpus import stopwords
from typing import List
import nltk
nltk.download('stopwords')
nltk.download('punkt')

class GZIPEmbeddingConfig(PretrainedConfig):
    model_type = "gzipembed"
    def __init__(
        self,
        normalize = True,
        normalized_corpus = True,
        reduction = False,
        reduced_dimension = 0,
        remove_stop_words = True,
        stop_words = stopwords.words('english'),
        corpus = [],
        **kwargs,
    ):
        self.corpus = corpus
        self.normalize = normalize
        self.normalized_corpus = normalized_corpus
        self.reduction = reduction
        self.reduced_dimension = reduced_dimension,
        self.remove_stop_words = remove_stop_words
        self.stop_words = stop_words
        super().__init__(**kwargs)