Source code for sparse_operation_kit.embeddings.all2all_dense_embedding

#
# Copyright (c) 2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from sparse_operation_kit.core import EmbeddingVariable
from sparse_operation_kit.core import DenseEmbeddingLayerHandle
from sparse_operation_kit.embeddings import embedding_ops
import tensorflow as tf


[docs]class All2AllDenseEmbedding(tf.keras.layers.Layer): """ Abbreviated as ``sok.All2AllDenseEmbedding(*args, **kwargs)``. This is a wrapper class for all2all dense embedding layer. It can be used to create a dense embedding layer which will distribute keys based on `gpu_id = key % gpu_num` to each GPU. Parameters ---------- max_vocabulary_size_per_gpu: integer the first dimension of embedding variable whose shape is [max_vocabulary_size_per_gpu, embedding_vec_size]. embedding_vec_size: integer the second dimension of embedding variable whose shape is [max_vocabulary_size_per_gpu, embedding_vec_size]. slot_num: integer the number of feature-fileds which will be processed at the same time in each iteration, where all feature-fileds produce embedding vectors of the same dimension. nnz_per_slot: integer the number of valid keys in each slot. The number of valid keys in each slot is the same. dynamic_input: boolean = False whether the inputs.shape is dynamic. For example, the inputs tensor is comming from `tf.unique`. When `dynamic_input=True`, `unique->lookup->gather` pattern can be used. By default, it is False, which means the inputs.size must be `replica_batchsize * slot_num * nnz_per_slot`. use_hashtable: boolean = True whether using `Hashtable` in ``EmbeddingVariable``, if `True`, Hashtable will be created for dynamic insertion. Otherwise, the input keys will be used as the index for embedding vector looking-up, so that input keys must be in the range ``[0, max_vocabulary_size_per_gpu * gpu_num)``. key_dtype: tf.dtypes = tf.int64 the data type of input keys. By default, it is `tf.int64`. embedding_initializer: string or an instance of `tf.keras.initializers.Initializer` the initializer used to generate initial value for embedding variable. By default, it will use `random_uniform` where ``minval=-0.05, maxval=0.05``. Examples -------- .. code-block:: python initializer = tf.keras.initializers.RandomUniform() # or "random_uniform" emb_layer = sok.All2AllDenseEmbedding(max_vocabulary_size_per_gpu, embedding_vec_size, slot_num, nnz_per_slot, embedding_initializer=initializer) @tf.function def _train_step(inputs, labels): emb_vectors = emb_layer(inputs) ... for i, (inputs, labels) in enumerate(dataset): _train_step(inputs) """ def __init__( self, max_vocabulary_size_per_gpu, embedding_vec_size, slot_num, nnz_per_slot, dynamic_input=False, use_hashtable=True, key_dtype=None, embedding_initializer=None, **kwargs ): super(All2AllDenseEmbedding, self).__init__(**kwargs) self.max_vocabulary_size_per_gpu = max_vocabulary_size_per_gpu self.embedding_vec_size = embedding_vec_size self.slot_num = slot_num self.nnz_per_slot = nnz_per_slot self.dynamic_input = dynamic_input self.use_hashtable = use_hashtable if self._dtype_policy.variable_dtype is None: # in TF1 and policy is not set # therefore variable dtype and compute dtype should be fp32 from tensorflow.python.keras.mixed_precision import experimental as mixed_precision self._dtype_policy = mixed_precision.Policy("float32") self.var = EmbeddingVariable.CreateInstances( shape=[self.max_vocabulary_size_per_gpu, self.embedding_vec_size], trainable=True, use_hashtable=self.use_hashtable, dtype=self._dtype_policy.variable_dtype, key_dtype=key_dtype, initializer=embedding_initializer, ) self.emb_layer = DenseEmbeddingLayerHandle( self.var, input_dispatcher="All2AllInput", embedding_lookuper="dense_gather", output_dispatcher="All2AllOutput", slot_num=self.slot_num, nnz_per_slot=self.nnz_per_slot, compute_dtype=self._dtype_policy.compute_dtype, ) @property def embedding_variable(self): return self.var # @tf.function
[docs] def call(self, inputs, training=True): """ The forward logic of this wrapper class. Parameters ---------- inputs: tf.Tensor keys are stored in tf.Tensor. It must be stored in row-major. If `dynamic_input = True`, then inputs.shape must be [None,], otherwise, inputs.shape must be [batchsize, slot_num, nnz_per_slot]. training: boolean whether training or not. Returns ------- emb_vector: tf.float the embedding vectors for the input keys. When dynamic_input=False, its shape is *[batchsize, slot_num, nnz_per_slot, embedding_vec_size]*. Otherwise, its shape is *[None, embedding_vec_size]*, where *None* equals to the size of inputs. """ emb_vector = embedding_ops.embedding_lookup( embedding_variable=self.var, values=inputs, training=training, dynamic_input=self.dynamic_input, ) return emb_vector