simpleNWP / train.py
Tekkonetes
Update v2
efde5e9
raw
history blame contribute delete
700 Bytes
# Download nltk and numpy
import os
os.system('pip install nltk numpy')
import nltk
import numpy as np
nltk.download('punkt')
def train_model(corpus):
print("Training the model...")
tokens = nltk.word_tokenize(corpus)
model = {}
for i in range(len(tokens) - 1):
if tokens[i] in model:
model[tokens[i]].append(tokens[i + 1])
else:
model[tokens[i]] = [tokens[i + 1]]
print("Model has been trained. Saving the model...")
return model
import pickle
# Train the model on a given corpus
corpus = open('corpus.txt').read()
model = train_model(corpus)
# Save the model to a file
with open("model.pkl", "wb") as f:
pickle.dump(model, f)