marinone94's picture
Training in progress, epoch 0
1ce325b
raw
history blame
1.24 kB
#include "split_worker.hh"
#include "../common/ngram.hh"
namespace lm {
namespace interpolate {
SplitWorker::SplitWorker(std::size_t order, util::stream::Chain &backoff_chain,
util::stream::Chain &sort_chain)
: order_(order) {
backoff_chain >> backoff_input_;
sort_chain >> sort_input_;
}
void SplitWorker::Run(const util::stream::ChainPosition &position) {
// input: ngram record (id, prob, and backoff)
// output: a float to the backoff_input stream
// an ngram id and a float to the sort_input stream
for (util::stream::Stream stream(position); stream; ++stream) {
NGram<ProbBackoff> ngram(stream.Get(), order_);
// write id and prob to the sort stream
float prob = ngram.Value().prob;
lm::WordIndex *out = reinterpret_cast<lm::WordIndex *>(sort_input_.Get());
for (const lm::WordIndex *it = ngram.begin(); it != ngram.end(); ++it) {
*out++ = *it;
}
*reinterpret_cast<float *>(out) = prob;
++sort_input_;
// write backoff to the backoff output stream
float boff = ngram.Value().backoff;
*reinterpret_cast<float *>(backoff_input_.Get()) = boff;
++backoff_input_;
}
sort_input_.Poison();
backoff_input_.Poison();
}
}
}