Source code for nvtabular.framework_utils.torch.models

#
# Copyright (c) 2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import torch

from nvtabular.framework_utils.torch.layers import ConcatenatedEmbeddings, MultiHotEmbeddings


[docs]class Model(torch.nn.Module): """ Generic Base Pytorch Model, that contains support for Categorical and Continuous values. Parameters ---------- embedding_tables_shapes: dict A dictionary representing the <column>: <max cardinality of column> for all categorical columns. num_continuous: int Number of continuous columns in data. emb_dropout: float, 0 - 1 Sets the embedding dropout rate. layer_hidden_dims: list Hidden layer dimensions. layer_dropout_rates: list A list of the layer dropout rates expressed as floats, 0-1, for each layer max_output: float Signifies the max output. """ def __init__( self, embedding_table_shapes, num_continuous, emb_dropout, layer_hidden_dims, layer_dropout_rates, max_output=None, bag_mode="sum", ): super().__init__() self.max_output = max_output mh_shapes = None if isinstance(embedding_table_shapes, tuple): embedding_table_shapes, mh_shapes = embedding_table_shapes if embedding_table_shapes: self.initial_cat_layer = ConcatenatedEmbeddings( embedding_table_shapes, dropout=emb_dropout ) if mh_shapes: self.mh_cat_layer = MultiHotEmbeddings(mh_shapes, dropout=emb_dropout, mode=bag_mode) self.initial_cont_layer = torch.nn.BatchNorm1d(num_continuous) embedding_size = sum(emb_size for _, emb_size in embedding_table_shapes.values()) if mh_shapes is not None: embedding_size = embedding_size + sum(emb_size for _, emb_size in mh_shapes.values()) layer_input_sizes = [embedding_size + num_continuous] + layer_hidden_dims[:-1] layer_output_sizes = layer_hidden_dims self.layers = torch.nn.ModuleList( torch.nn.Sequential( torch.nn.Linear(input_size, output_size), torch.nn.ReLU(inplace=True), torch.nn.BatchNorm1d(output_size), torch.nn.Dropout(dropout_rate), ) for input_size, output_size, dropout_rate in zip( layer_input_sizes, layer_output_sizes, layer_dropout_rates ) ) self.output_layer = torch.nn.Linear(layer_output_sizes[-1], 1)
[docs] def forward(self, x_cat, x_cont): mh_cat = None concat_list = [] if isinstance(x_cat, tuple): x_cat, mh_cat = x_cat if mh_cat: mh_cat = self.mh_cat_layer(mh_cat) concat_list.append(mh_cat) # must use is not None for tensor, and len logic for empty list if x_cat is not None and len(x_cat) > 0: x_cat = self.initial_cat_layer(x_cat) concat_list.append(x_cat) if x_cont is not None and len(x_cont) > 0: x_cont = self.initial_cont_layer(x_cont) concat_list.append(x_cont) # if no layers in concat_list this breaks by design if len(concat_list) > 1: x = torch.cat(concat_list, 1) else: x = concat_list[0] for layer in self.layers: x = layer(x) x = self.output_layer(x) if self.max_output: x = self.max_output * torch.sigmoid(x) x = x.view(-1) return x