2024-06-23_14-11-26_epoch_2 / model.py

Upload model

7d712bc verified 4 months ago

8.33 kB

	from typing import Any, Dict, Optional

	import torch
	import torch.nn as nn
	from transformers import AutoConfig, AutoModel, PreTrainedModel
	from transformers.modeling_outputs import (
	BaseModelOutputWithPooling,
	MaskedLMOutput,
	BaseModelOutput,
	SequenceClassifierOutput,
	)
	from enum import Enum

	from .config import ILKTConfig

	def cls_pooling(last_hidden_state, attention_mask):
	return last_hidden_state[:, 0, :]


	def create_head_blocks(
	hidden_size: int,
	n_dense: int,
	use_batch_norm: bool,
	use_layer_norm: bool,
	dropout: float,
	**kwargs,
	) -> nn.Module:
	blocks = []
	for _ in range(n_dense):
	blocks.append(nn.Linear(hidden_size, hidden_size))
	if use_batch_norm:
	blocks.append(nn.BatchNorm1d(hidden_size))
	elif use_layer_norm:
	blocks.append(nn.LayerNorm(hidden_size))
	blocks.append(nn.ReLU())
	if dropout > 0:
	blocks.append(nn.Dropout(dropout))
	return nn.Sequential(*blocks)


	class SentenceEmbeddingHead(nn.Module):
	def __init__(
	self, backbone_hidden_size: int, embedding_head_config: Dict[str, Any]
	):
	super().__init__()
	self.config = embedding_head_config

	self.head = nn.Sequential(
	*[
	create_head_blocks(backbone_hidden_size, **embedding_head_config),
	]
	)

	def forward(
	self, backbone_output: BaseModelOutput, attention_mask: torch.Tensor, **kwargs
	) -> BaseModelOutputWithPooling:
	if self.config["pool_type"] == "cls":
	embeddings = cls_pooling(backbone_output.last_hidden_state, attention_mask)
	else:
	raise NotImplementedError(
	f"Pooling type {self.config['pool_type']} not implemented"
	)
	embeddings = self.head(embeddings)
	if self.config["normalize_embeddings"]:
	embeddings = nn.functional.normalize(embeddings, p=2, dim=-1)
	return BaseModelOutputWithPooling(
	last_hidden_state=backbone_output.last_hidden_state,
	pooler_output=embeddings, # type: ignore
	)


	class MLMHead(nn.Module):
	def __init__(
	self,
	backbone_hidden_size: int,
	vocab_size: int,
	mlm_head_config: Dict[str, Any],
	):
	super().__init__()
	self.config = mlm_head_config

	self.head = nn.Sequential(
	*[
	create_head_blocks(backbone_hidden_size, **mlm_head_config),
	nn.Linear(backbone_hidden_size, vocab_size),
	]
	)

	def forward(
	self,
	backbone_output: BaseModelOutput,
	attention_mask: torch.Tensor,
	labels: Optional[torch.Tensor] = None,
	**kwargs,
	) -> MaskedLMOutput:
	prediction_scores = self.head(backbone_output.last_hidden_state)

	loss = None
	if labels is not None:
	loss_fct = nn.CrossEntropyLoss()
	loss = loss_fct(
	prediction_scores.view(-1, prediction_scores.size(-1)),
	labels.view(-1),
	)
	return MaskedLMOutput(loss=loss, logits=prediction_scores)


	class CLSHead(nn.Module):
	def __init__(
	self,
	backbone_hidden_size: int,
	n_classes: int,
	cls_head_config: Dict[str, Any],
	):
	super().__init__()
	self.config = cls_head_config

	self.head = nn.Sequential(
	*[
	create_head_blocks(backbone_hidden_size, **cls_head_config),
	nn.Linear(backbone_hidden_size, n_classes),
	]
	)

	def forward(
	self,
	backbone_output: BaseModelOutput,
	attention_mask: torch.Tensor,
	labels: Optional[torch.Tensor] = None,
	**kwargs,
	) -> SequenceClassifierOutput:
	if self.config["pool_type"] == "cls":
	embeddings = cls_pooling(backbone_output.last_hidden_state, attention_mask)
	else:
	raise NotImplementedError(
	f"Pooling type {self.config['pool_type']} not implemented"
	)

	prediction_scores = self.head(embeddings)

	loss = None
	if labels is not None:
	loss_fct = nn.CrossEntropyLoss()
	loss = loss_fct(
	prediction_scores.view(-1, prediction_scores.size(-1)),
	labels.view(-1),
	)
	return SequenceClassifierOutput(loss=loss, logits=prediction_scores)


	class ForwardRouting(Enum):
	GET_SENTENCE_EMBEDDING = "get_sentence_embedding"
	GET_MLM_OUTPUT = "get_mlm_output"
	GET_CLS_OUTPUT = "get_cls_output"


	class ILKTModel(PreTrainedModel):
	config_class = ILKTConfig

	def __init__(self, config: ILKTConfig):
	super().__init__(config)

	backbone_config = AutoConfig.from_pretrained(**config.backbone_config)
	pretrained_model_name_or_path = config.backbone_config[
	"pretrained_model_name_or_path"
	]
	self.backbone = AutoModel.from_pretrained(
	pretrained_model_name_or_path, config=backbone_config
	)

	backbone_hidden_size = backbone_config.hidden_size
	self.config.hidden_size = backbone_hidden_size
	backbone_vocab_size = backbone_config.vocab_size
	self.embedding_head = SentenceEmbeddingHead(
	backbone_hidden_size, config.embedding_head_config
	)
	self.mlm_head = MLMHead(
	backbone_hidden_size, backbone_vocab_size, config.mlm_head_config
	)

	self.cls_heads = nn.ModuleDict(
	dict(
	[
	(
	name,
	CLSHead(
	backbone_hidden_size, n_classes, config.cls_head_config
	),
	)
	for n_classes, name in config.cls_heads
	]
	)
	)

	def forward(
	self,
	input_ids: torch.Tensor,
	attention_mask: torch.Tensor,
	token_type_ids: Optional[torch.Tensor] = None,
	forward_routing: ForwardRouting = ForwardRouting.GET_SENTENCE_EMBEDDING,
	**kwargs,
	):
	if forward_routing == ForwardRouting.GET_SENTENCE_EMBEDDING:
	return self.get_sentence_embedding(
	input_ids, attention_mask, token_type_ids=token_type_ids
	)
	elif forward_routing == ForwardRouting.GET_MLM_OUTPUT:
	return self.get_mlm_output(
	input_ids, attention_mask, token_type_ids=token_type_ids, **kwargs
	)
	elif forward_routing == ForwardRouting.GET_CLS_OUTPUT:
	return self.get_cls_output(
	input_ids, attention_mask, token_type_ids=token_type_ids, **kwargs
	)
	else:
	raise ValueError(f"Unknown forward routing {forward_routing}")

	def get_sentence_embedding(
	self, input_ids: torch.Tensor, attention_mask: torch.Tensor, **kwargs
	):
	backbone_output: BaseModelOutput = self.backbone(
	input_ids=input_ids, attention_mask=attention_mask, **kwargs
	)

	embedding_output = self.embedding_head(
	backbone_output, attention_mask, **kwargs
	)

	return embedding_output

	def get_mlm_output(
	self,
	input_ids: torch.Tensor,
	attention_mask: torch.Tensor,
	labels: Optional[torch.Tensor] = None,
	**kwargs,
	):
	backbone_output: BaseModelOutput = self.backbone(
	input_ids=input_ids, attention_mask=attention_mask, **kwargs
	)

	mlm_output = self.mlm_head(backbone_output, attention_mask, labels, **kwargs)

	return mlm_output

	def get_cls_output(
	self,
	input_ids: torch.Tensor,
	attention_mask: torch.Tensor,
	head_name: str,
	labels: Optional[torch.Tensor] = None,
	**kwargs,
	):
	backbone_output: BaseModelOutput = self.backbone(
	input_ids=input_ids, attention_mask=attention_mask, **kwargs
	)

	if head_name not in self.cls_heads:
	raise ValueError(f"Head {head_name} not found in model")

	cls_output = self.cls_heads[head_name](
	backbone_output, attention_mask, labels, **kwargs
	)

	return cls_output