Source code for merlin.models.tf.blocks.mlp

#
# Copyright (c) 2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from typing import List, Optional, Union

import tensorflow as tf

from merlin.models.tf.core.base import Block
from merlin.models.tf.core.combinators import ResidualBlock, SequentialBlock, TabularAggregationType
from merlin.models.tf.core.tabular import Filter, tabular_aggregation_registry
from merlin.models.tf.utils.tf_utils import (
    maybe_deserialize_keras_objects,
    maybe_serialize_keras_objects,
)
from merlin.models.utils.misc_utils import filter_kwargs
from merlin.schema import Schema, Tags

InitializerType = Union[str, tf.keras.initializers.Initializer]
RegularizerType = Union[str, tf.keras.regularizers.Regularizer]


[docs]def MLPBlock( dimensions: List[int], activation: Union[str, List[str]] = "relu", use_bias: bool = True, kernel_initializer: InitializerType = "glorot_uniform", bias_initializer: InitializerType = "zeros", kernel_regularizer: Optional[RegularizerType] = None, bias_regularizer: Optional[RegularizerType] = None, activity_regularizer: Optional[RegularizerType] = None, dropout: Optional[float] = None, normalization: Optional[Union[str, tf.keras.layers.Layer]] = None, filter: Optional[Union[Schema, Tags, List[str], "Filter"]] = None, no_activation_last_layer: bool = False, block_name: str = "MLPBlock", **kwargs, ) -> SequentialBlock: """ A block that applies a multi-layer perceptron to the input. Example usage:: mlp = ml.InputBlock(schema).connect(ml.MLPBlock([64, 32])) Parameters ---------- dimensions: List[int] The number of units in each layer of the MLP. activation: str The activation function to use. use_bias: bool Whether to use a bias in the MLP. kernel_initializer: InitializerType Initializer for the kernel weights matrix. Defaults to "glorot_uniform". bias_initializer: InitializerType Initializer for the bias vector. Default to "zeros". kernel_regularizer: Optional[RegularizerType] Regularizer function applied to the kernel weights matrix. Default to None. bias_regularizer: Optional[RegularizerType] Regularizer function applied to the bias vector. Default to None. activity_regularizer: Optional[RegularizerType] Regularizer function applied to the output of the layer (its "activation"). Default to None. dropout: float The dropout rate to use. normalization: str or Layer The normalization layer to use. filter: Schema, Tag, List[str], or Filter The filter to apply to the inputs of the MLP. no_activation_last_layer: bool Ensures that no activation function (i.e. 'linear') or droptout is used in the output of the last MLP layer block_name: str The name of the block. """ if isinstance(activation, list) and len(activation) != len(dimensions): raise ValueError( f"Activation and Dimensions length mismatch. \ Activation length: {len(activation)}, Dimensions length: {len(dimensions)}" ) block_layers = [] for idx, dim in enumerate(dimensions): dropout_layer = None activation_idx = activation if isinstance(activation, str) else activation[idx] if no_activation_last_layer and idx == len(dimensions) - 1: activation_idx = "linear" else: if dropout: if activation_idx in ["selu", tf.keras.activations.selu]: # Best practice for SeLU. It is also recommended # kernel_initializer="lecun_normal" dropout_layer = tf.keras.layers.AlphaDropout(dropout) else: dropout_layer = tf.keras.layers.Dropout(dropout) block_layers.append( _Dense( dim, activation=activation_idx, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, ) ) if dropout_layer: block_layers.append(dropout_layer) if normalization: if normalization == "batch_norm": block_layers.append(tf.keras.layers.BatchNormalization()) elif isinstance(normalization, tf.keras.layers.Layer): block_layers.append(normalization) else: raise ValueError("Normalization needs to be an instance `Layer` or " "`batch_norm`") return SequentialBlock(block_layers, filter=filter, block_name=block_name, **kwargs)
[docs]def DenseResidualBlock( low_rank_dim: Optional[int] = None, activation: Optional[Union[str, tf.keras.layers.Layer]] = "relu", use_bias: bool = True, dropout: Optional[float] = None, normalization: Optional[Union[str, tf.keras.layers.Layer]] = "batch_norm", depth: int = 1, **dense_kwargs, ) -> Block: """A block that applies a dense residual block to the input. The residual consists in the input summed element-wise with the output of the dense block. The dense block projects the inputs using dense layers to the same output dim. If the input dim is very high dimensional, the low_rank_dim can be used to create a low rank matrix and reduce the necessary number of parameters for this projection. Example usage:: block = ml.DenseResidualBlock(depth=3).connect(ml.MLPBlock([1])) Parameters ---------- low_rank_dim: int, optional The dimension of the low rank matrix. If set, it projects the input to the low_rank_dim (`LR`) and then back to the input dim (`I`) as output. That requires much less parameters (`I*LR + LR*I`) than projecting the inputs dim directly to the same dim (`I*I`). By default None activation: Union[str, tf.keras.layers.Layer], optional The activation function to use. By default "relu" use_bias: bool Whether to use a bias in the MLP. By default True dropout: float The dropout rate to use. By default 0.0 normalization: Union[str, tf.keras.layers.Layer], optional The normalization layer to use. By the default None. depth: int The number of residual blocks to stack. By default 1 """ block_layers = [] block_layers.append( DenseMaybeLowRank(low_rank_dim, activation=None, use_bias=use_bias, **dense_kwargs) ) if dropout: block_layers.append(tf.keras.layers.Dropout(dropout)) if normalization: if normalization == "batch_norm": block_layers.append(tf.keras.layers.BatchNormalization()) elif isinstance(normalization, tf.keras.layers.Layer): block_layers.append(normalization) else: raise ValueError("Normalization needs to be an instance `Layer` or " "`batch_norm`") output = ResidualBlock( SequentialBlock(block_layers, block_name="DenseResidual"), activation=activation ) if depth > 1: return output.repeat(depth - 1) elif depth < 1: raise ValueError( "The depth (number of stacked residual blocks) needs " "to be equal or greater than 1." ) return output
@tf.keras.utils.register_keras_serializable(package="merlin.models") class _Dense(tf.keras.layers.Layer): def __init__( self, units, activation=None, use_bias=True, kernel_initializer="glorot_uniform", bias_initializer="zeros", kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, pre_aggregation="concat", dense=None, **kwargs, ): super(_Dense, self).__init__(**kwargs) self.dense = dense or tf.keras.layers.Dense( units, activation, use_bias, kernel_initializer, bias_initializer, kernel_regularizer, bias_regularizer, activity_regularizer, kernel_constraint, bias_constraint, **kwargs, ) self.pre_aggregation = pre_aggregation self.units = units def call(self, inputs, **kwargs): if isinstance(inputs, dict): inputs = tabular_aggregation_registry.parse(self.pre_aggregation)(inputs) filtered_kwargs = filter_kwargs(kwargs, self.dense) return self.dense(inputs, **filtered_kwargs) def compute_output_shape(self, input_shape): if isinstance(input_shape, dict): agg = tabular_aggregation_registry.parse(self.pre_aggregation) input_shape = agg.compute_output_shape(input_shape) return self.dense.compute_output_shape(input_shape) def get_config(self): config = super(_Dense, self).get_config() config["pre_aggregation"] = self.pre_aggregation config["units"] = self.units return maybe_serialize_keras_objects(self, config, ["dense"]) @classmethod def from_config(cls, config): config = maybe_deserialize_keras_objects(config, {"dense": tf.keras.layers.deserialize}) return cls(**config) @tf.keras.utils.register_keras_serializable(package="merlin.models") class DenseMaybeLowRank(tf.keras.layers.Layer): def __init__( self, low_rank_dim: Optional[int] = None, use_bias: bool = True, activation: Optional[Union[str, tf.keras.layers.Layer]] = None, kernel_initializer: InitializerType = "truncated_normal", bias_initializer: InitializerType = "zeros", kernel_regularizer: Optional[RegularizerType] = None, bias_regularizer: Optional[RegularizerType] = None, pre_aggregation: Optional[TabularAggregationType] = "concat", dense: Optional[tf.keras.layers.Dense] = None, dense_u: Optional[tf.keras.layers.Dense] = None, **kwargs, ): """A block that projects the inputs the same input dim. If the input dim is very high dimensional, the low_rank_dim can be used to create a low rank matrix and reduce the necessary number of parameters for this projection. Parameters ---------- low_rank_dim : Optional[int], optional The dimension of the low rank matrix. If set, it projects the input to the low_rank_dim (`LR`) and then back to the input dim (`I`) as output. That requires much less parameters (`I*LR + LR*I`) than projecting the inputs dim directly to the same dim (`I*I`), by default None. use_bias : bool, optional Whether to use a bias in the MLP, by default True activation : Optional[Union[str,tf.keras.layers.Layer]], optional The activation function to use. By default None kernel_initializer: InitializerType Initializer for the kernel weights matrix. Defaults to "glorot_uniform". bias_initializer: InitializerType Initializer for the bias vector. Default to "zeros". kernel_regularizer: Optional[RegularizerType] Regularizer function applied to the kernel weights matrix. Default to None. bias_regularizer: Optional[RegularizerType] Regularizer function applied to the bias vector. Default to None. pre_aggregation : Optional[TabularAggregationType], optional Aggregation to be done before projection, by default "concat" dense : Optional[tf.keras.layers.Dense], optional An optional dense layer to be used for projection, by default None. If not set it is created internally. dense_u : Optional[tf.keras.layers.Dense], optional An optional dense layer to be called first if low_rank_dim is set, by default None. If not set it is created internally. """ super().__init__(**kwargs) self.low_rank_dim = low_rank_dim self.use_bias = use_bias self.activation = activation self.kernel_initializer = kernel_initializer self.bias_initializer = bias_initializer self.kernel_regularizer = kernel_regularizer self.bias_regularizer = bias_regularizer self.pre_aggregation = pre_aggregation self.dense = dense self.dense_u = dense_u def build(self, input_shape): last_dim = input_shape[-1] if self.dense is None: self.dense = _Dense( last_dim, activation=self.activation, kernel_initializer=self.kernel_initializer, bias_initializer=self.bias_initializer, kernel_regularizer=self.kernel_regularizer, bias_regularizer=self.bias_regularizer, use_bias=self.use_bias, ) if self.low_rank_dim is not None and self.dense_u is None: self.dense_u = _Dense( self.low_rank_dim, activation=self.activation, kernel_initializer=self.kernel_initializer, kernel_regularizer=self.kernel_regularizer, use_bias=False, ) super(DenseMaybeLowRank, self).build(input_shape) def call(self, inputs: tf.Tensor, **kwargs) -> tf.Tensor: if isinstance(inputs, dict): inputs = tabular_aggregation_registry.parse(self.pre_aggregation)(inputs) if self.low_rank_dim is None: return self.dense(inputs) # type: ignore return self.dense(self.dense_u(inputs)) # type: ignore def compute_output_shape(self, input_shape): if isinstance(input_shape, dict): agg = tabular_aggregation_registry.parse(self.pre_aggregation) input_shape = agg.compute_output_shape(input_shape) return input_shape def get_config(self): config = dict( low_rank_dim=self.low_rank_dim, use_bias=self.use_bias, activation=self.activation, pre_aggregation=self.pre_aggregation, ) config.update(super(DenseMaybeLowRank, self).get_config()) config = maybe_serialize_keras_objects( self, config, [ "dense", "dense_u", ], ) return config @classmethod def from_config(cls, config): config = maybe_deserialize_keras_objects( config, [ "dense", "dense_u", ], ) return cls(**config)