Source code for transformers4rec.tf.block.transformer

#
# Copyright (c) 2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import inspect
from typing import Any, Dict, Optional, Type, Union

import tensorflow as tf
import transformers
from transformers import PretrainedConfig, TFPreTrainedModel

from ...config.transformer import T4RecConfig, transformer_registry
from ..masking import MaskSequence
from ..utils.tf_utils import (
    get_tf_main_layer,
    maybe_deserialize_keras_objects,
    maybe_serialize_keras_objects,
)
from .base import Block

TransformerBody = Union[TFPreTrainedModel, PretrainedConfig, tf.keras.layers.Layer]


[docs]class TransformerPrepare(tf.keras.layers.Layer): def __init__(self, transformer, masking, **kwargs): super().__init__(**kwargs) self.transformer = transformer self.masking = masking
[docs] def call(self, inputs_embeds, **kwargs) -> Dict[str, Any]: raise NotImplementedError()
[docs]@tf.keras.utils.register_keras_serializable(package="transformers4rec") class TransformerBlock(Block): """ Class to support HF Transformers for session-based and sequential-based recommendation models. Parameters ---------- transformer: TransformerBody The T4RecConfig, The pre-trained HF model or the custom keras layer TF*MainLayer, related to specific transformer architecture. masking: Needed when masking is applied on the inputs. """ TRANSFORMER_TO_PREPARE: Dict[Type[TFPreTrainedModel], Type[TransformerPrepare]] = {} # TODO: Add {GPT2Model: GPT2Prepare} def __init__( self, transformer: TransformerBody, masking: Optional[MaskSequence] = None, prepare_module: Optional[Type[TransformerPrepare]] = None, output_fn=lambda model_outputs: model_outputs[0], **kwargs, ): super().__init__(**kwargs) self.transformer: TFPreTrainedModel if isinstance(transformer, T4RecConfig): self.transformer = get_tf_main_layer(transformer.to_huggingface_tf_model()) elif isinstance(transformer, PretrainedConfig): model_cls = transformers.TF_MODEL_MAPPING[transformer.__class__] self.transformer = get_tf_main_layer(model_cls(transformer)) elif isinstance(transformer, TFPreTrainedModel): self.transformer = get_tf_main_layer(transformer) else: self.transformer = transformer if masking: required = list(masking.transformer_required_arguments().keys()) check = all( param in inspect.signature(self.transformer.forward).parameters for param in required ) if not check: raise ValueError( f"{masking.__class__.__name__} requires the parameters: " f"{', '.join(required)} " f"in the {type(self.transformer)} signature" ) self.masking = masking self.prepare_module: Optional[TransformerPrepare] = None if not prepare_module and type(self.transformer) in self.TRANSFORMER_TO_PREPARE: prepare_module = self.TRANSFORMER_TO_PREPARE[type(self.transformer)] if prepare_module: self.prepare_module = prepare_module(transformer, masking) self.output_fn = output_fn
[docs] def get_config(self): config = super().get_config() config = maybe_serialize_keras_objects( self, config, ["transformer", "prepare_module", "masking"] ) return config
[docs] @classmethod def from_config(cls, config): config = maybe_deserialize_keras_objects( config, ["transformer", "prepare_module", "masking"] ) return super().from_config(config)
[docs] @classmethod def from_registry( cls, transformer: str, d_model: int, n_head: int, n_layer: int, total_seq_length: int, masking: Optional[MaskSequence] = None, ): """ Load the HF transformer architecture based on its name Parameters ---------- transformer: str Name of the Transformer to use. Possible values are : ["reformer", "gtp2", "longformer", "electra", "albert", "xlnet"] d_model: int size of hidden states for Transformers n_head: Number of attention heads for Transformers n_layer: int Number of layers for RNNs and Transformers" total_seq_length: int The maximum sequence length """ _transformer: TFPreTrainedModel = transformer_registry.parse(transformer).build( d_model=d_model, n_head=n_head, n_layer=n_layer, total_seq_length=total_seq_length, ) return cls(_transformer, masking=masking)
[docs] def call(self, inputs_embeds: tf.Tensor, **kwargs): """ Parameters ---------- inputs_embeds `tf.Tensor` of shape ({0}, hidden_size)` An embedded representation of a sequence. Returns ------- `tf.Tensor` """ transformer_kwargs = {"inputs_embeds": inputs_embeds} if self.prepare_module: transformer_kwargs = self.prepare_module(inputs_embeds) if self.masking: masking_kwargs = self.masking.transformer_arguments if masking_kwargs: transformer_kwargs.update(masking_kwargs) filtered_transformer_kwargs = {} for param in inspect.signature(self.transformer.call).parameters: if param in transformer_kwargs: filtered_transformer_kwargs[param] = transformer_kwargs[param] # In HF the call accept inputs as a dictionary containing all needed tensors model_outputs = self.transformer(filtered_transformer_kwargs) outputs = self.output_fn(model_outputs) # TODO: store the attention outputs for meta-data logging return outputs