#
# Copyright (c) 2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import torch
from nvtabular.framework_utils.torch.layers import ConcatenatedEmbeddings, MultiHotEmbeddings
[docs]class Model(torch.nn.Module):
"""
Generic Base Pytorch Model, that contains support for Categorical and Continuous values.
Parameters
----------
embedding_tables_shapes: dict
A dictionary representing the <column>: <max cardinality of column> for all
categorical columns.
num_continuous: int
Number of continuous columns in data.
emb_dropout: float, 0 - 1
Sets the embedding dropout rate.
layer_hidden_dims: list
Hidden layer dimensions.
layer_dropout_rates: list
A list of the layer dropout rates expressed as floats, 0-1, for each layer
max_output: float
Signifies the max output.
"""
def __init__(
self,
embedding_table_shapes,
num_continuous,
emb_dropout,
layer_hidden_dims,
layer_dropout_rates,
max_output=None,
bag_mode="sum",
):
super().__init__()
self.max_output = max_output
mh_shapes = None
if isinstance(embedding_table_shapes, tuple):
embedding_table_shapes, mh_shapes = embedding_table_shapes
if embedding_table_shapes:
self.initial_cat_layer = ConcatenatedEmbeddings(
embedding_table_shapes, dropout=emb_dropout
)
if mh_shapes:
self.mh_cat_layer = MultiHotEmbeddings(mh_shapes, dropout=emb_dropout, mode=bag_mode)
self.initial_cont_layer = torch.nn.BatchNorm1d(num_continuous)
embedding_size = sum(emb_size for _, emb_size in embedding_table_shapes.values())
if mh_shapes is not None:
embedding_size = embedding_size + sum(emb_size for _, emb_size in mh_shapes.values())
layer_input_sizes = [embedding_size + num_continuous] + layer_hidden_dims[:-1]
layer_output_sizes = layer_hidden_dims
self.layers = torch.nn.ModuleList(
torch.nn.Sequential(
torch.nn.Linear(input_size, output_size),
torch.nn.ReLU(inplace=True),
torch.nn.BatchNorm1d(output_size),
torch.nn.Dropout(dropout_rate),
)
for input_size, output_size, dropout_rate in zip(
layer_input_sizes, layer_output_sizes, layer_dropout_rates
)
)
self.output_layer = torch.nn.Linear(layer_output_sizes[-1], 1)
[docs] def forward(self, x_cat, x_cont):
mh_cat = None
concat_list = []
if isinstance(x_cat, tuple):
x_cat, mh_cat = x_cat
if mh_cat:
mh_cat = self.mh_cat_layer(mh_cat)
concat_list.append(mh_cat)
# must use is not None for tensor, and len logic for empty list
if x_cat is not None and len(x_cat) > 0:
x_cat = self.initial_cat_layer(x_cat)
concat_list.append(x_cat)
if x_cont is not None and len(x_cont) > 0:
x_cont = self.initial_cont_layer(x_cont)
concat_list.append(x_cont)
# if no layers in concat_list this breaks by design
if len(concat_list) > 1:
x = torch.cat(concat_list, 1)
else:
x = concat_list[0]
for layer in self.layers:
x = layer(x)
x = self.output_layer(x)
if self.max_output:
x = self.max_output * torch.sigmoid(x)
x = x.view(-1)
return x