Source code for merlin.models.tf.outputs.classification

#
# Copyright (c) 2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import logging
from typing import List, Optional, Union

import tensorflow as tf
from tensorflow.keras.layers import Layer
from tensorflow.python.ops import embedding_ops

import merlin.io
from merlin.models.tf.inputs.embedding import EmbeddingTable
from merlin.models.tf.metrics.topk import AvgPrecisionAt, MRRAt, NDCGAt, PrecisionAt, RecallAt
from merlin.models.tf.outputs.base import MetricsFn, ModelOutput
from merlin.models.tf.utils.tf_utils import (
    maybe_deserialize_keras_objects,
    maybe_serialize_keras_objects,
    tensor_to_df,
)
from merlin.schema import ColumnSchema, Schema

LOG = logging.getLogger("merlin_models")


def default_binary_metrics() -> List[tf.keras.metrics.Metric]:
    """Returns the default binary metrics

    Returns
    -------
    List[tf.keras.metrics.Metric]
        List with metrics for binary classification
    """
    return (
        tf.keras.metrics.Precision(name="precision"),
        tf.keras.metrics.Recall(name="recall"),
        tf.keras.metrics.BinaryAccuracy(name="binary_accuracy"),
        tf.keras.metrics.AUC(name="auc"),
    )


def default_categorical_prediction_metrics(k=10):
    """Returns the default top-k metrics for
    categorical classification

    Returns
    -------
    List[tf.keras.metrics.Metric]
        List with top-k metrics for categorical classification
    """
    return (
        RecallAt(k),
        MRRAt(k),
        NDCGAt(k),
        AvgPrecisionAt(k),
        PrecisionAt(k),
    )


[docs]@tf.keras.utils.register_keras_serializable(package="merlin.models")
class BinaryOutput(ModelOutput):
    """
    Binary-classification prediction block.

    Parameters
    ----------
    target: Union[str, Schema], optional
        The name of the target. If a Schema is provided, the target is inferred from the schema.
    pre: Optional[Block], optional
        Optional block to transform predictions before computing the binary logits,
        by default None
    post: Optional[Block], optional
        Optional block to transform the binary logits,
        by default None
    name: str, optional
        The name of the task.
    logits_temperature: float, optional
        Parameter used to reduce model overconfidence, so that logits / T.
        by default 1.
    default_loss: Union[str, tf.keras.losses.Loss], optional
        Default loss to use for binary-classification
        by 'binary_crossentropy'
    default_metrics_fn: Callable
        A function returning the list of default metrics
        to use for binary-classification
    """

[docs]    def __init__(
        self,
        target: Optional[Union[str, ColumnSchema]] = None,
        pre: Optional[Layer] = None,
        post: Optional[Layer] = None,
        logits_temperature: float = 1.0,
        name: Optional[str] = None,
        default_loss: Union[str, tf.keras.losses.Loss] = "binary_crossentropy",
        default_metrics_fn: MetricsFn = default_binary_metrics,
        **kwargs,
    ):
        if isinstance(target, ColumnSchema):
            target = target.name
        to_call = kwargs.pop("to_call", None)
        super().__init__(
            to_call=to_call or tf.keras.layers.Dense(1, activation="sigmoid"),
            default_loss=default_loss,
            default_metrics_fn=default_metrics_fn,
            target=target,
            pre=pre,
            post=post,
            logits_temperature=logits_temperature,
            name=name,
            **kwargs,
        )


[docs]@tf.keras.utils.register_keras_serializable(package="merlin.models")
class CategoricalOutput(ModelOutput):
    """Categorical output

    Parameters
    ----------
    prediction: Union[Schema, ColumnSchema,
                EmbeddingTable, 'CategoricalTarget',
                'EmbeddingTablePrediction']
        The target feature to predict. To perform weight-tying [1] technique, you should provide
        the `EmbeddingTable` or `EmbeddingTablePrediction` related to the
        target feature.
    negative_samplers: ItemSamplersType, optional
        List of samplers for negative sampling,
        by default None
    pre: Optional[Block], optional
        Optional block to transform predictions before computing the binary logits,
        by default None
    post: Optional[Block], optional
        Optional block to transform the binary logits,
        by default None
    logits_temperature: float, optional
        Parameter used to reduce model overconfidence, so that logits / T.
        by default 1
    name: str, optional
        The name of the task, by default None
    default_loss: Union[str, tf.keras.losses.Loss], optional
        Default loss to use for categorical-classification
        by default 'categorical_crossentropy'
    get_default_metrics: Callable, optional
        A function returning the list of default metrics
        to use for categorical-classification

    References:
    ----------
    [1] Hakan Inan, Khashayar Khosravi, and Richard Socher. 2016. Tying word vectors
    and word classifiers: A loss framework for language modeling. arXiv preprint
    arXiv:1611.01462 (2016).

    """

[docs]    def __init__(
        self,
        to_call: Union[
            Schema, ColumnSchema, EmbeddingTable, "CategoricalTarget", "EmbeddingTablePrediction"
        ],
        target_name: str = None,
        pre: Optional[Layer] = None,
        post: Optional[Layer] = None,
        logits_temperature: float = 1.0,
        name: Optional[str] = None,
        default_loss: Union[str, tf.keras.losses.Loss] = "categorical_crossentropy",
        default_metrics_fn: MetricsFn = default_categorical_prediction_metrics,
        **kwargs,
    ):
        self.max_num_samples = kwargs.pop("max_num_samples", None)
        _to_call = kwargs.pop("to_call", None)

        if to_call is not None:
            if isinstance(to_call, (Schema, ColumnSchema)):
                _to_call = CategoricalTarget(to_call)
                if isinstance(to_call, Schema):
                    to_call = to_call.first
                target_name = target_name or to_call.name
            elif isinstance(to_call, EmbeddingTable):
                _to_call = EmbeddingTablePrediction(to_call)
                target_name = _to_call.table.col_schema.name
            else:
                _to_call = to_call

        self.target_name = kwargs.pop("target", target_name)
        super().__init__(
            to_call=_to_call,
            default_loss=default_loss,
            default_metrics_fn=default_metrics_fn,
            name=name,
            target=self.target_name,
            pre=pre,
            post=post,
            logits_temperature=logits_temperature,
            **kwargs,
        )

[docs]    def to_dataset(self, gpu=True) -> merlin.io.Dataset:
        return merlin.io.Dataset(tensor_to_df(self.to_call.embeddings, gpu=gpu))

[docs]    def get_config(self):
        config = super().get_config()
        config["max_num_samples"] = self.max_num_samples
        config["target_name"] = self.target_name
        return config


@tf.keras.utils.register_keras_serializable(package="merlin.models")
class CategoricalTarget(tf.keras.layers.Dense):
    """Prediction of a categorical feature.

    Parameters
    ----------
    feature : Union[Schema, ColumnSchema]
        The schema description of the categorical feature to predict.
    activation : optional
        Activation function to use, by default None
    use_bias : bool, optional
        Whether the layer uses a bias vector, by default True
    kernel_initializer : str, optional
        Initializer for the kernel weights matrix, by default "glorot_uniform"
    bias_initializer : str, optional
        Initializer for the bias vector., by default "zeros"
    kernel_regularizer : optional
        Regularizer function applied to the kernel weights matrix, by default None
    bias_regularizer : optional
        Regularizer function applied to the bias vector, by default None
    activity_regularizer : optional
        Regularizer function applied to the output of the layer (its "activation"),
        by default None
    kernel_constraint : optional
        Constraint function applied to the kernel weights matrix, by default None
    bias_constraint : optional
        Constraint function applied to the bias vector, by default None
    """

    def __init__(
        self,
        feature: Union[Schema, ColumnSchema] = None,
        activation=None,
        use_bias=True,
        kernel_initializer="glorot_uniform",
        bias_initializer="zeros",
        kernel_regularizer=None,
        bias_regularizer=None,
        activity_regularizer=None,
        kernel_constraint=None,
        bias_constraint=None,
        **kwargs,
    ):
        if feature is not None:
            if isinstance(feature, Schema):
                assert len(feature) == 1, "Schema can have max 1 feature"
                col_schema = feature.first
            else:
                col_schema = feature

            self.num_classes = col_schema.int_domain.max + 1
            units = self.num_classes
        else:
            units = kwargs.pop("units")
            self.num_classes = units

        super().__init__(
            units,
            activation,
            use_bias,
            kernel_initializer,
            bias_initializer,
            kernel_regularizer,
            bias_regularizer,
            activity_regularizer,
            kernel_constraint,
            bias_constraint,
            **kwargs,
        )

    def embedding_lookup(self, inputs: tf.Tensor, **kwargs):
        """Method to retrieve hidden representation vectors from the kernel weight matrix
        based on a given "input" positions

        Parameters
        ----------
        inputs : tf.Tensor
            Tensor of indices to retrieve from the weight matrix.

        Returns
        -------
        tf.Tensor
            Tensor of hidden representation vectors.
        """
        return embedding_ops.embedding_lookup(self.embeddings, inputs, **kwargs)

    @property
    def embeddings(self):
        return tf.transpose(self.kernel)


@tf.keras.utils.register_keras_serializable(package="merlin.models")
class EmbeddingTablePrediction(Layer):
    """Prediction of a categorical feature using weight-sharing [1] with an embedding table

    Parameters
    ----------
    table : EmbeddingTable
        The embedding table to use as the weight matrix
    bias_initializer : str, optional
        Initializer for the bias vector, by default "zeros"
    use_bias: bool, optional
        Whether to add a bias term to weight-tying, by default False

    References:
    ----------
    [1] Hakan Inan, Khashayar Khosravi, and Richard Socher. 2016. Tying word vectors
    and word classifiers: A loss framework for language modeling. arXiv preprint
    arXiv:1611.01462 (2016).
    """

    def __init__(self, table: EmbeddingTable, bias_initializer="zeros", use_bias=False, **kwargs):
        self.table = table
        self.num_classes = table.input_dim
        self.bias_initializer = bias_initializer
        self.use_bias = use_bias
        super().__init__(**kwargs)

    def build(self, input_shape):
        if self.use_bias:
            self.bias = self.add_weight(
                name="output_layer_bias",
                shape=(self.num_classes,),
                initializer=self.bias_initializer,
            )
        self.table.build(input_shape)
        return super().build(input_shape)

    def call(self, inputs, training=False, **kwargs) -> tf.Tensor:
        is_ragged = isinstance(inputs, tf.RaggedTensor)
        if is_ragged:
            original_inputs = inputs
            inputs = inputs.flat_values
        logits = tf.matmul(inputs, self.table.table.embeddings, transpose_b=True)
        if self.use_bias:
            logits = tf.nn.bias_add(logits, self.bias)
        if is_ragged:
            logits = original_inputs.with_flat_values(logits)
        return logits

    @property
    def embeddings(self):
        return self.table.table.embeddings

    def embedding_lookup(self, inputs, **kwargs):
        return self.table.table(inputs, **kwargs)

    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.num_classes)

    def get_config(self):
        config = maybe_serialize_keras_objects(
            self,
            {
                **super().get_config(),
            },
            ["table"],
        )
        return config

    @classmethod
    def from_config(cls, config):
        config = maybe_deserialize_keras_objects(config, ["table"])
        return super().from_config(config)