lang-uk
/

word2vec-uk

Text2Text Generation

Model card Files Files and versions Community

word2vec-uk / pipeline.py

Dmitry Chaplinsky

Nope

b30bec4 over 1 year ago

history blame contribute delete

720 Bytes

	from gensim.models import KeyedVectors
	from typing import List, Dict


	class PreTrainedPipeline:
	def __init__(self, path=""):
	from huggingface_hub import hf_hub_download

	self.model = KeyedVectors.load_word2vec_format(
	hf_hub_download(repo_id="lang-uk/word2vec-uk", filename="ubercorpus.cased.tokenized.300d"), binary=False
	)

	def __call__(self, inputs: str) -> List[Dict]:
	"""
	Args:
	inputs (:obj:`str`):
	a string containing some text
	Return:
	A :obj:`str`
	"""
	inputs = inputs.strip()
	return [{"generated_text": ", \n\n".join(f"{k}" for k, v in self.model.most_similar(inputs, topn=30))}]