File size: 841 Bytes
ead1d68 a2aeb80 ead1d68 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
from transformers import PretrainedConfig
from nltk.corpus import stopwords
from typing import List
import nltk
nltk.download('stopwords')
nltk.download('punkt')
class GZIPEmbeddingConfig(PretrainedConfig):
model_type = "gzipembed"
def __init__(
self,
normalize = True,
normalized_corpus = True,
reduction = False,
reduced_dimension = 0,
remove_stop_words = True,
stop_words = stopwords.words('english'),
corpus = [],
**kwargs,
):
self.corpus = corpus
self.normalize = normalize
self.normalized_corpus = normalized_corpus
self.reduction = reduction
self.reduced_dimension = reduced_dimension,
self.remove_stop_words = remove_stop_words
self.stop_words = stop_words
super().__init__(**kwargs)
|