|
|
|
import os |
|
os.system('pip install nltk numpy') |
|
import nltk |
|
import numpy as np |
|
|
|
nltk.download('punkt') |
|
def train_model(corpus): |
|
print("Training the model...") |
|
tokens = nltk.word_tokenize(corpus) |
|
model = {} |
|
for i in range(len(tokens) - 1): |
|
if tokens[i] in model: |
|
model[tokens[i]].append(tokens[i + 1]) |
|
else: |
|
model[tokens[i]] = [tokens[i + 1]] |
|
print("Model has been trained. Saving the model...") |
|
return model |
|
|
|
import pickle |
|
|
|
|
|
corpus = open('corpus.txt').read() |
|
model = train_model(corpus) |
|
|
|
|
|
with open("model.pkl", "wb") as f: |
|
pickle.dump(model, f) |
|
|