patomp's picture
Upload model
e49b33f
raw
history blame
891 Bytes
from transformers import PreTrainedModel
from pythainlp import word_vector
import torch
from .configuration import ThaiLightWeightEncoderConfig
from .projector import Projector
class ThaiLightWeightEncoderModel(PreTrainedModel):
config_class = ThaiLightWeightEncoderConfig
def __init__(self, config):
super().__init__(config)
self.wv = word_vector.WordVector(model_name=config.word_vector_model_name)
self.projector = Projector(
input_embedding_dim=config.input_embedding_dim,
final_embedding_dim=config.final_embedding_dim,
dropout=config.dropout
)
def forward(self, text: str):
embed = self.wv.sentence_vectorizer(text, use_mean=True)[0]
proj_embed = self.projector(torch.from_numpy(embed).float())
proj_embed = proj_embed.to("cpu").detach().numpy()
return proj_embed