Source code for transformers4rec.torch.model.prediction_task

#
# Copyright (c) 2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#


import logging
from typing import Dict, Iterable, Optional

import torch
import torchmetrics as tm

from ..block.base import Block, BuildableBlock, SequentialBlock
from ..block.mlp import MLPBlock
from ..masking import MaskedLanguageModeling
from ..ranking_metric import AvgPrecisionAt, NDCGAt, RecallAt
from ..utils.torch_utils import LambdaModule
from .base import BlockType, PredictionTask

LOG = logging.getLogger("transformers4rec")


[docs]class BinaryClassificationPrepareBlock(BuildableBlock):
[docs] def build(self, input_size) -> SequentialBlock: return SequentialBlock( torch.nn.Linear(input_size[-1], 1, bias=False), torch.nn.Sigmoid(), LambdaModule(lambda x: x.view(-1)), output_size=[ None, ], )
[docs]class BinaryClassificationTask(PredictionTask): """Returns a ``PredictionTask`` for binary classification. Example usage:: # Define the input module to process the tabular input features. input_module = tr.TabularSequenceFeatures.from_schema( schema, max_sequence_length=max_sequence_length, continuous_projection=d_model, aggregation="concat", masking=None, ) # Define XLNetConfig class and set default parameters for HF XLNet config. transformer_config = tr.XLNetConfig.build( d_model=d_model, n_head=4, n_layer=2, total_seq_length=max_sequence_length ) # Define the model block including: inputs, masking, projection and transformer block. body = tr.SequentialBlock( input_module, tr.MLPBlock([64]), tr.TransformerBlock( transformer_config, masking=input_module.masking ) ) # Define a head with BinaryClassificationTask. head = tr.Head( body, tr.BinaryClassificationTask( "click", summary_type="mean", metrics=[ tm.Precision(task='binary'), tm.Recall(task='binary'), tm.Accuracy(task='binary'), tm.F1Score(task='binary') ] ), inputs=input_module, ) # Get the end-to-end Model class. model = tr.Model(head) Parameters ---------- target_name: Optional[str] = None Specifies the variable name that represents the positive and negative values. task_name: Optional[str] = None Specifies the name of the prediction task. If this parameter is not specified, a name is automatically constructed based on ``target_name`` and the Python class name of the model. task_block: Optional[BlockType] = None Specifies a module to transform the input tensor before computing predictions. loss: torch.nn.Module Specifies the loss function for the task. The default class is ``torch.nn.BCELoss``. metrics: Tuple[torch.nn.Module, ...] Specifies the metrics to calculate during training and evaluation. The default metrics are ``Precision``, ``Recall``, and ``Accuracy``. summary_type: str Summarizes a sequence into a single tensor. Accepted values are: - ``last`` -- Take the last token hidden state (like XLNet) - ``first`` -- Take the first token hidden state (like Bert) - ``mean`` -- Take the mean of all tokens hidden states - ``cls_index`` -- Supply a Tensor of classification token position (GPT/GPT-2) - ``attn`` -- Not implemented now, use multi-head attention """ DEFAULT_LOSS = torch.nn.BCELoss() DEFAULT_METRICS = ( tm.Precision(num_classes=2, task="binary"), tm.Recall(num_classes=2, task="binary"), tm.Accuracy(task="binary"), # TODO: Fix this: tm.AUC() ) def __init__( self, target_name: Optional[str] = None, task_name: Optional[str] = None, task_block: Optional[BlockType] = None, loss=DEFAULT_LOSS, metrics=DEFAULT_METRICS, summary_type="first", ): self.target_dim = 1 super().__init__( loss=loss, metrics=metrics, target_name=target_name, task_name=task_name, summary_type=summary_type, task_block=task_block, pre=BinaryClassificationPrepareBlock(), forward_to_prediction_fn=lambda x: torch.round(x).int(), )
[docs]class RegressionPrepareBlock(BuildableBlock):
[docs] def build(self, input_size) -> SequentialBlock: return SequentialBlock( torch.nn.Linear(input_size[-1], 1), LambdaModule(lambda x: x.view(-1)), output_size=[ None, ], )
[docs]class RegressionTask(PredictionTask): DEFAULT_LOSS = torch.nn.MSELoss() DEFAULT_METRICS = (tm.regression.MeanSquaredError(),) def __init__( self, target_name: Optional[str] = None, task_name: Optional[str] = None, task_block: Optional[BlockType] = None, loss=DEFAULT_LOSS, metrics=DEFAULT_METRICS, summary_type="first", ): self.target_dim = 1 super().__init__( loss=loss, metrics=metrics, target_name=target_name, task_name=task_name, summary_type=summary_type, task_block=task_block, pre=RegressionPrepareBlock(), )
[docs]class NextItemPredictionTask(PredictionTask): """This block performs item prediction task for session and sequential-based models. It requires a body containing a masking schema to use for training and target generation. For the supported masking schemes, please refers to: https://nvidia-merlin.github.io/Transformers4Rec/main/model_definition.html#sequence-masking Parameters ---------- loss: torch.nn.Module Loss function to use. Defaults to NLLLos. metrics: Iterable[torchmetrics.Metric] List of ranking metrics to use for evaluation. task_block: Module to transform input tensor before computing predictions. task_name: str, optional Name of the prediction task, if not provided a name will be automatically constructed based on the target-name & class-name. weight_tying: bool The item id embedding table weights are shared with the prediction network layer. softmax_temperature: float Softmax temperature, used to reduce model overconfidence, so that softmax(logits / T). Value 1.0 reduces to regular softmax. padding_idx: int pad token id. target_dim: int vocabulary size of item ids """ DEFAULT_METRICS = ( # default metrics suppose labels are int encoded NDCGAt(top_ks=[10, 20], labels_onehot=True), AvgPrecisionAt(top_ks=[10, 20], labels_onehot=True), RecallAt(top_ks=[10, 20], labels_onehot=True), ) def __init__( self, loss: torch.nn.Module = torch.nn.NLLLoss(ignore_index=0), metrics: Iterable[tm.Metric] = DEFAULT_METRICS, task_block: Optional[BlockType] = None, task_name: str = "next-item", weight_tying: bool = False, softmax_temperature: float = 1, padding_idx: int = 0, target_dim: int = None, ): super().__init__(loss=loss, metrics=metrics, task_block=task_block, task_name=task_name) self.softmax_temperature = softmax_temperature self.weight_tying = weight_tying self.padding_idx = padding_idx self.target_dim = target_dim self.item_embedding_table = None self.masking = None
[docs] def build(self, body, input_size, device=None, inputs=None, task_block=None, pre=None): """Build method, this is called by the `Head`.""" if not len(input_size) == 3 or isinstance(input_size, dict): raise ValueError( "NextItemPredictionTask needs a 3-dim vector as input, found:" f"{input_size}" ) # Retrieve the embedding module to get the name of itemid col and its related table if not inputs: inputs = body.inputs if not getattr(inputs, "item_id", None): raise ValueError( "For Item Prediction task a categorical_module " "including an item_id column is required." ) self.embeddings = inputs.categorical_module if not self.target_dim: self.target_dim = self.embeddings.item_embedding_table.num_embeddings if self.weight_tying: self.item_embedding_table = self.embeddings.item_embedding_table item_dim = self.item_embedding_table.weight.shape[1] if input_size[-1] != item_dim and not task_block: LOG.warning( f"Projecting inputs of NextItemPredictionTask to'{item_dim}' " f"As weight tying requires the input dimension '{input_size[-1]}' " f"to be equal to the item-id embedding dimension '{item_dim}'" ) # project input tensors to same dimension as item-id embeddings task_block = MLPBlock([item_dim]) # Retrieve the masking from the input block self.masking = inputs.masking if not self.masking: raise ValueError( "The input block should contain a masking schema for training and evaluation" ) self.padding_idx = self.masking.padding_idx pre = NextItemPredictionPrepareBlock( target_dim=self.target_dim, weight_tying=self.weight_tying, item_embedding_table=self.item_embedding_table, softmax_temperature=self.softmax_temperature, ) super().build( body, input_size, device=device, inputs=inputs, task_block=task_block, pre=pre )
[docs] def forward(self, inputs: torch.Tensor, targets=None, training=False, testing=False, **kwargs): if isinstance(inputs, (tuple, list)): inputs = inputs[0] x = inputs.float() if self.task_block: x = self.task_block(x) # type: ignore # Retrieve labels from masking if training or testing: labels = self.masking.masked_targets # type: ignore trg_flat = labels.flatten() non_pad_mask = trg_flat != self.padding_idx labels_all = torch.masked_select(trg_flat, non_pad_mask) # remove padded items, keep only masked positions x = self.remove_pad_3d(x, non_pad_mask) x = self.pre(x) # type: ignore loss = self.loss(x, labels_all) return { "loss": loss, "labels": labels_all, "predictions": x, # "pred_metadata": {}, # "model_outputs": [], } else: # Get the hidden position to use for predicting the next item labels = self.embeddings.item_seq non_pad_mask = labels != self.padding_idx rows_ids = torch.arange(labels.size(0), dtype=torch.long, device=labels.device) if isinstance(self.masking, MaskedLanguageModeling): last_item_sessions = non_pad_mask.sum(dim=1) else: last_item_sessions = non_pad_mask.sum(dim=1) - 1 x = x[rows_ids, last_item_sessions] # Compute predictions probs x = self.pre(x) # type: ignore return x
[docs] def remove_pad_3d(self, inp_tensor, non_pad_mask): # inp_tensor: (n_batch x seqlen x emb_dim) inp_tensor = inp_tensor.flatten(end_dim=1) inp_tensor_fl = torch.masked_select( inp_tensor, non_pad_mask.unsqueeze(1).expand_as(inp_tensor) ) out_tensor = inp_tensor_fl.view(-1, inp_tensor.size(1)) return out_tensor
[docs] def calculate_metrics(self, predictions, targets) -> Dict[str, torch.Tensor]: # type: ignore if isinstance(targets, dict) and self.target_name: targets = targets[self.target_name] outputs = {} predictions = self.forward_to_prediction_fn(predictions) for metric in self.metrics: outputs[self.metric_name(metric)] = metric(predictions, targets) return outputs
[docs] def compute_metrics(self): metrics = { self.metric_name(metric): metric.compute() for metric in self.metrics if getattr(metric, "top_ks", None) } # Explode metrics for each cut-off # TODO make result generic: # To accept a mix of ranking metrics and others not requiring top_ks ? topks = {self.metric_name(metric): metric.top_ks for metric in self.metrics} results = {} for name, metric in metrics.items(): for measure, k in zip(metric, topks[name]): results[f"{name}_{k}"] = measure return results
[docs]class NextItemPredictionPrepareBlock(BuildableBlock): def __init__( self, target_dim: int, weight_tying: bool = False, item_embedding_table: Optional[torch.nn.Module] = None, softmax_temperature: float = 0, ): super().__init__() self.target_dim = target_dim self.weight_tying = weight_tying self.item_embedding_table = item_embedding_table self.softmax_temperature = softmax_temperature
[docs] def build(self, input_size) -> Block: return Block( _NextItemPredictionTask( input_size, self.target_dim, self.weight_tying, self.item_embedding_table, self.softmax_temperature, ), [-1, self.target_dim], )
class _NextItemPredictionTask(torch.nn.Module): """Predict the interacted item-id probabilities. - During inference, the task consists of predicting the next item. - During training, the class supports the following Language modeling tasks: Causal LM, Masked LM, Permutation LM and Replacement Token Detection Parameters: ----------- input_size: int Input size of this module. target_dim: int Dimension of the target. weight_tying: bool The item id embedding table weights are shared with the prediction network layer. item_embedding_table: torch.nn.Module Module that's used to store the embedding table for the item. softmax_temperature: float Softmax temperature, used to reduce model overconfidence, so that softmax(logits / T). Value 1.0 reduces to regular softmax. """ def __init__( self, input_size: int, target_dim: int, weight_tying: bool = False, item_embedding_table: Optional[torch.nn.Module] = None, softmax_temperature: float = 0, ): super().__init__() self.input_size = input_size self.target_dim = target_dim self.weight_tying = weight_tying self.item_embedding_table = item_embedding_table self.softmax_temperature = softmax_temperature self.log_softmax = torch.nn.LogSoftmax(dim=-1) if self.weight_tying: self.output_layer_bias = torch.nn.Parameter(torch.Tensor(self.target_dim)) torch.nn.init.zeros_(self.output_layer_bias) else: self.output_layer = torch.nn.Linear( self.input_size[-1], self.target_dim # type: ignore ) def forward(self, inputs: torch.Tensor) -> torch.Tensor: if self.weight_tying: logits = torch.nn.functional.linear( inputs, weight=self.item_embedding_table.weight, # type: ignore bias=self.output_layer_bias, ) else: logits = self.output_layer(inputs) if self.softmax_temperature: # Softmax temperature to reduce model overconfidence # and better calibrate probs and accuracy logits = torch.div(logits, self.softmax_temperature) predictions = self.log_softmax(logits) return predictions def _get_name(self) -> str: return "NextItemPredictionTask"