namespace lm { | |
namespace interpolate { | |
SplitWorker::SplitWorker(std::size_t order, util::stream::Chain &backoff_chain, | |
util::stream::Chain &sort_chain) | |
: order_(order) { | |
backoff_chain >> backoff_input_; | |
sort_chain >> sort_input_; | |
} | |
void SplitWorker::Run(const util::stream::ChainPosition &position) { | |
// input: ngram record (id, prob, and backoff) | |
// output: a float to the backoff_input stream | |
// an ngram id and a float to the sort_input stream | |
for (util::stream::Stream stream(position); stream; ++stream) { | |
NGram<ProbBackoff> ngram(stream.Get(), order_); | |
// write id and prob to the sort stream | |
float prob = ngram.Value().prob; | |
lm::WordIndex *out = reinterpret_cast<lm::WordIndex *>(sort_input_.Get()); | |
for (const lm::WordIndex *it = ngram.begin(); it != ngram.end(); ++it) { | |
*out++ = *it; | |
} | |
*reinterpret_cast<float *>(out) = prob; | |
++sort_input_; | |
// write backoff to the backoff output stream | |
float boff = ngram.Value().backoff; | |
*reinterpret_cast<float *>(backoff_input_.Get()) = boff; | |
++backoff_input_; | |
} | |
sort_input_.Poison(); | |
backoff_input_.Poison(); | |
} | |
} | |
} | |