DeepMoji / examples /create_twitter_vocab.py
Pendrokar's picture
torchmoji code
86a83a2
raw
history blame
377 Bytes
""" Creates a vocabulary from a tsv file.
"""
import codecs
import example_helper
from torchmoji.create_vocab import VocabBuilder
from torchmoji.word_generator import TweetWordGenerator
with codecs.open('../../twitterdata/tweets.2016-09-01', 'rU', 'utf-8') as stream:
wg = TweetWordGenerator(stream)
vb = VocabBuilder(wg)
vb.count_all_words()
vb.save_vocab()