Source code for merlin.models.tf.blocks.dlrm

#
# Copyright (c) 2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from functools import partial
from typing import Optional

import tensorflow as tf

from merlin.models.tf.blocks.interaction import DotProductInteraction
from merlin.models.tf.blocks.mlp import _Dense
from merlin.models.tf.core.aggregation import StackFeatures
from merlin.models.tf.core.base import Block, Debug
from merlin.models.tf.core.combinators import Filter, ParallelBlock, SequentialBlock
from merlin.models.tf.inputs.continuous import ContinuousFeatures
from merlin.models.tf.inputs.embedding import EmbeddingOptions, Embeddings
from merlin.models.utils.schema_utils import infer_embedding_dim
from merlin.schema import Schema, Tags


[docs]def DLRMBlock( schema: Schema, *, embedding_dim: int = None, embedding_options: EmbeddingOptions = None, bottom_block: Optional[Block] = None, top_block: Optional[Block] = None, embeddings: Optional[Block] = None, ) -> SequentialBlock: """Builds the DLRM architecture, as proposed in the following `paper https://arxiv.org/pdf/1906.00091.pdf`_ [1]_. References ---------- .. [1] Naumov, Maxim, et al. "Deep learning recommendation model for personalization and recommendation systems." arXiv preprint arXiv:1906.00091 (2019). Parameters ---------- schema : Schema The `Schema` with the input features bottom_block : Block The `Block` that combines the continuous features (typically a `MLPBlock`) top_block : Optional[Block], optional The optional `Block` that combines the outputs of bottom layer and of the factorization machine layer, by default None embedding_dim : Optional[int], optional Dimension of the embeddings, by default None embedding_options : EmbeddingOptions Options for the input embeddings. - embedding_dim_default: int - Default dimension of the embedding table, when the feature is not found in ``embedding_dims``, by default 64 - infer_embedding_sizes : bool, Automatically defines the embedding dimension from the feature cardinality in the schema, by default False, which needs to be kept False for the DLRM architecture. Returns ------- SequentialBlock The DLRM block Raises ------ ValueError The schema is required by DLRM ValueError The bottom_block is required by DLRM ValueError The embedding_dim (X) needs to match the last layer of bottom MLP (Y). ValueError Only one-of `embeddings` or `embedding_options` can be used. """ if schema is None: raise ValueError("The schema is required by DLRM") con_schema = schema.select_by_tag(Tags.CONTINUOUS).excluding_by_tag(Tags.TARGET) cat_schema = schema.select_by_tag(Tags.CATEGORICAL).excluding_by_tag(Tags.TARGET) if not len(cat_schema) > 0: raise ValueError("DLRM requires categorical features") if embeddings is not None and embedding_options is not None: raise ValueError("Only one-of `embeddings` or `embedding_options` may be provided.") if embeddings is None: embeddings = _get_embeddings(embedding_dim, embedding_options, bottom_block, cat_schema) if len(con_schema) > 0: if bottom_block is None: raise ValueError( "The bottom_block is required by DLRM when " "continuous features are available in the schema" ) con = ContinuousFeatures.from_schema(con_schema) bottom_block = con.connect(bottom_block) # type: ignore interaction_inputs = ParallelBlock( {"embeddings": embeddings, "bottom_block": bottom_block}, is_input=True, ) else: interaction_inputs = embeddings # type: ignore bottom_block = None interaction_inputs = interaction_inputs.connect(Debug()) if not top_block: return interaction_inputs.connect(DotProductInteractionBlock()) if not bottom_block: return interaction_inputs.connect(DotProductInteractionBlock(), top_block) top_block_inputs = interaction_inputs.connect_with_shortcut( DotProductInteractionBlock(), shortcut_filter=Filter("bottom_block"), aggregation="concat", ) top_block_outputs = top_block_inputs.connect(top_block) return top_block_outputs
def _get_embeddings(embedding_dim, embedding_options, bottom_block, cat_schema): if embedding_dim is None: raise ValueError("The embedding_dim is required") if embedding_options is not None: embedding_options.embedding_dim_default = embedding_dim else: embedding_options = EmbeddingOptions(embedding_dim_default=embedding_dim) if embedding_dim is not None and bottom_block is not None: last_bottom_mlp_layer = list( [k for k in bottom_block.layers if isinstance(k, (_Dense, tf.keras.layers.Dense))] )[-1] if embedding_dim != last_bottom_mlp_layer.units: raise ValueError( f"The embedding_dim ({embedding_dim}) needs to match the " "last layer of bottom MLP ({bottom_block.layers[-1].units}) " ) embeddings_kwargs = dict( sequence_combiner=embedding_options.combiner, embeddings_initializer=embedding_options.embeddings_initializers, dim=embedding_options.embedding_dim_default, infer_dim_fn=partial( infer_embedding_dim, multiplier=embedding_options.infer_embedding_sizes_multiplier, ensure_multiple_of_8=embedding_options.infer_embeddings_ensure_dim_multiple_of_8, ), ) return Embeddings(cat_schema, **embeddings_kwargs) def DotProductInteractionBlock(): return SequentialBlock(StackFeatures(axis=1), DotProductInteraction())