Source code for merlin.models.tf.core.combinators

import copy
import sys
from functools import reduce
from typing import Dict, List, Optional, Union

import six
import tensorflow as tf
from tensorflow.keras.layers import Layer

from merlin.models.tf.core.base import (
    Block,
    BlockType,
    NoOp,
    PredictionOutput,
    is_input_block,
    right_shift_layer,
)
from merlin.models.tf.core.tabular import (
    TABULAR_MODULE_PARAMS_DOCSTRING,
    Filter,
    TabularAggregationType,
    TabularBlock,
)
from merlin.models.tf.utils import tf_utils
from merlin.models.tf.utils.tf_utils import call_layer
from merlin.models.utils import schema_utils
from merlin.models.utils.doc_utils import docstring_parameter
from merlin.schema import Schema, Tags


[docs]@tf.keras.utils.register_keras_serializable(package="merlin.models") class SequentialBlock(Block): """The SequentialLayer represents a sequence of Keras layers. It is a Keras Layer that can be used instead of tf.keras.layers.Sequential, which is actually a Keras Model. In contrast to keras Sequential, this layer can be used as a pure Layer in tf.functions and when exporting SavedModels, without having to pre-declare input and output shapes. In turn, this layer is usable as a preprocessing layer for TF Agents Networks, and can be exported via PolicySaver. Usage:: c = SequentialLayer([layer1, layer2, layer3]) output = c(inputs) # Equivalent to: output = layer3(layer2(layer1(inputs))) """
[docs] def __init__( self, *layers, filter: Optional[Union[Schema, Tags, List[str], "Filter"]] = None, pre_aggregation: Optional["TabularAggregationType"] = None, block_name: Optional[str] = None, copy_layers: bool = False, **kwargs, ): """Create a sequential composition. Parameters ---------- layers: A list or tuple of layers to compose. **kwargs: Arguments to pass to `Keras` layer initializer, including `name`. Raises ------ TypeError: If any of the layers are not instances of keras `Layer`. """ if len(layers) == 1 and isinstance(layers[0], (list, tuple)): layers = layers[0] # type: ignore self.block_name = block_name if pre_aggregation: layers = [TabularBlock(aggregation=pre_aggregation), *layers] # type: ignore for layer in layers: if not isinstance(layer, tf.keras.layers.Layer): raise TypeError( "Expected all layers to be instances of keras Layer, but saw: '{}'".format( layer ) ) super(SequentialBlock, self).__init__(**kwargs) if getattr(layers[0], "has_schema", None): super().set_schema(layers[0].schema) for layer in layers[1:]: if hasattr(layer, "set_schema"): layer.set_schema(layers[0].schema) layers = copy.copy(layers) if copy_layers else layers if filter: if not isinstance(filter, Filter): filter = Filter(filter) self.layers = [filter, *layers] else: self.layers = list(layers)
[docs] def compute_output_shape(self, input_shape): """Computes the output shape based on the input shape Parameters ---------- input_shape : tf.TensorShape The input shape Returns ------- tf.TensorShape The output shape """ return compute_output_shape_sequentially(self.layers, input_shape)
[docs] def compute_output_signature(self, input_signature): return compute_output_signature_sequentially(self.layers, input_signature)
[docs] def build(self, input_shape=None): """Builds the sequential block Parameters ---------- input_shape : tf.TensorShape, optional The input shape, by default None """ self._maybe_propagate_context(input_shape) build_sequentially(self, self.layers, input_shape)
[docs] def set_schema(self, schema=None): for layer in self.layers: self._maybe_set_schema(layer, schema) return super().set_schema(schema)
def _get_name(self): return self.block_name if self.block_name else f"{self.__class__.__name__}" @property def inputs(self): """Returns the InputBlock, if it is the first block within SequenceBlock Returns ------- InputBlock The input block """ first = list(self)[0] if isinstance(first, SequentialBlock): return first.inputs if is_input_block(first): return first @property def first(self): """Returns the first block in the SequenceBlock Returns ------- Block The first block of SequenceBlock """ return self.layers[0] @property def last(self): """Returns the last block in the SequenceBlock Returns ------- Block The last block of SequenceBlock """ return self.layers[-1] @property def filter_features(self) -> List[str]: if isinstance(self.layers[0], Filter): return self.layers[0].feature_names elif isinstance(self.layers[0], SequentialBlock): return self.layers[0].filter_features return [] @property def trainable_weights(self): """Returns trainable weights of all layers of this block Returns ------- List List with trainable weights """ if not self.trainable: return [] weights = {} for layer in self.layers: for v in layer.trainable_weights: weights[id(v)] = v return list(weights.values()) @property def non_trainable_weights(self): """Returns non-trainable weights of all layers of this block Returns ------- List List with non-trainable weights """ weights = {} for layer in self.layers: for v in layer.non_trainable_weights: weights[id(v)] = v return list(weights.values()) @property def trainable(self): """Returns whether all layer within SequentialBlock are trainable Returns ------- bool True if any layer within SequentialBlock are trainable, otherwise False """ return any(layer.trainable for layer in self.layers) @trainable.setter def trainable(self, value): """Makes all block layers trainable or not Parameters ---------- value : bool Sets all layers trainable flag """ for layer in self.layers: layer.trainable = value @property def losses(self): values, _val_names = [], set() for layer in self.layers: losses = layer.losses for loss in losses: if isinstance(loss, tf.Tensor): if loss.ref() not in _val_names: _val_names.add(loss.ref()) values.append(loss) else: raise ValueError(f"Loss should be a Tensor, found: {loss}") return values @property def regularizers(self): values = set() for layer in self.layers: regularizers = getattr(layer, "regularizers", None) if regularizers: values.update(regularizers) return list(values)
[docs] def call(self, inputs, training=False, **kwargs): return call_sequentially(self.layers, inputs, training=training, **kwargs)
[docs] def compute_loss(self, inputs, targets, **kwargs): outputs, targets = inputs, targets for layer in self.layers: outputs, targets = layer.compute_loss(outputs, targets=targets, **kwargs) return outputs, targets
[docs] def call_outputs( self, outputs: PredictionOutput, training=False, **kwargs ) -> "PredictionOutput": for layer in self.layers: outputs = layer.call_outputs(outputs, training=training, **kwargs) return outputs
[docs] def get_config(self): config = {} for i, layer in enumerate(self.layers): config[i] = tf.keras.utils.serialize_keras_object(layer) return config
def __getitem__(self, key): return self.layers[key] @property def is_tabular(self): return getattr(self.layers[-1], "is_tabular", False)
[docs] @classmethod def from_config(cls, config, custom_objects=None): layers = [ tf.keras.layers.deserialize(conf, custom_objects=custom_objects) for conf in config.values() ] return SequentialBlock(layers)
def __rrshift__(self, other): return right_shift_layer(self, other) def __rshift__(self, other): # pylint: disable=arguments-out-of-order return right_shift_layer(other, self)
[docs]@docstring_parameter(tabular_module_parameters=TABULAR_MODULE_PARAMS_DOCSTRING) @tf.keras.utils.register_keras_serializable(package="merlin.models") class ParallelBlock(TabularBlock): """Merge multiple layers or TabularModule's into a single output of TabularData. Parameters ---------- inputs: Union[tf.keras.layers.Layer, Dict[str, tf.keras.layers.Layer]] keras layers to merge into, this can also be one or multiple layers keyed by the name the module should have. {tabular_module_parameters} use_layer_name: use the original name of layers provided in inputs as key-index of the parallel branches. strict: If true, inputs must be a dictionary. Otherwise, an error will be raised. automatic_pruning: If true, branches with no output will automatically be pruned. **kwargs: Extra arguments to pass to TabularBlock. """
[docs] def __init__( self, *inputs: Union[tf.keras.layers.Layer, Dict[str, tf.keras.layers.Layer]], pre: Optional[BlockType] = None, post: Optional[BlockType] = None, aggregation: Optional[TabularAggregationType] = None, schema: Optional[Schema] = None, name: Optional[str] = None, strict: bool = False, automatic_pruning: bool = True, use_layer_name: bool = True, **kwargs, ): super().__init__( pre=pre, post=post, aggregation=aggregation, schema=schema, name=name, **kwargs ) self.strict = strict self.automatic_pruning = automatic_pruning self.parallel_layers: Union[List[TabularBlock], Dict[str, TabularBlock]] if isinstance(inputs, tuple) and len(inputs) == 1 and isinstance(inputs[0], (list, tuple)): inputs = inputs[0] if all(isinstance(x, dict) for x in inputs): to_merge: Dict[str, tf.keras.layers.Layer] = reduce( lambda a, b: dict(a, **b), inputs ) # type: ignore parsed_to_merge: Dict[str, TabularBlock] = {} for key, val in to_merge.items(): parsed_to_merge[key] = val self.parallel_layers = parsed_to_merge elif all(isinstance(x, tf.keras.layers.Layer) for x in inputs): if use_layer_name: self.parallel_layers = {layer.name: layer for layer in inputs} else: parsed: List[TabularBlock] = [] for i, inp in enumerate(inputs): parsed.append(inp) # type: ignore self.parallel_layers = parsed else: raise ValueError( "Please provide one or multiple layer's to merge or " f"dictionaries of layer. got: {inputs}" ) if schema: for branch in self.parallel_values: if not getattr(branch, "has_schema", True): branch.set_schema(schema) # Merge schemas if necessary. if not schema and all(getattr(m, "_schema", False) for m in self.parallel_values): if len(self.parallel_values) == 1: self.set_schema(self.parallel_values[0].schema) else: s = reduce( lambda a, b: a + b, [m.schema for m in self.parallel_values] ) # type: ignore self.set_schema(s)
@property def schema(self): if self.has_schema: return self._schema if all(getattr(m, "_schema", False) for m in self.parallel_values): if len(self.parallel_values) == 1: return self.parallel_values[0].schema else: s = reduce( lambda a, b: a + b, [m.schema for m in self.parallel_values] ) # type: ignore return s return None @property def parallel_values(self) -> List[tf.keras.layers.Layer]: if isinstance(self.parallel_layers, dict): return list(self.parallel_layers.values()) return self.parallel_layers @property def parallel_dict(self) -> Dict[Union[str, int], tf.keras.layers.Layer]: if isinstance(self.parallel_layers, dict): return self.parallel_layers return {i: m for i, m in enumerate(self.parallel_layers)} @property def layers(self) -> List[tf.keras.layers.Layer]: return self.parallel_values
[docs] def select_by_name(self, name: str) -> Optional["Block"]: """Select a parallel block by name Returns ------- Block The block corresponding to the name """ return self.parallel_dict.get(name)
[docs] def select_by_names(self, names: List[str]) -> Optional[List[Block]]: """Select a list of parallel blocks by names Returns ------- List[Block] The blocks corresponding to the names """ blocks = [] for name in names: if name in self.parallel_dict: blocks.append(self.parallel_dict.get(name)) else: raise ValueError(f"Given name {name} is not in ParallelBlock {self.name}") return blocks
[docs] def select_by_tag( self, tags: Union[str, Tags, List[Union[str, Tags]]], ) -> Optional["ParallelBlock"]: """Select layers of parallel blocks by tags. This method will return a ParallelBlock instance with all the branches that have at least one feature that matches any of the tags provided. For example, this method can be useful when a ParallelBlock has both item and user features in a two-tower model or DLRM, and we want to select only the item or user features. >>> all_inputs = InputBlockV2(schema) # InputBlock is also a ParallelBlock >>> item_inputs = all_inputs.select_by_tag(Tags.ITEM) ['continuous', 'embeddings'] >>> item_inputs.schema["continuous"].column_names ['item_recency'] >>> item_inputs.schema["embeddings"].column_names ['item_id', 'item_category', 'item_genres'] Parameters ---------- tags: str or Tags or List[Union[str, Tags]] List of tags that describe which blocks to match Returns ------- ParallelBlock """ if self.schema is not None and self.schema == self.schema.select_by_tag(tags): return self if not isinstance(tags, (list, tuple)): tags = [tags] selected_branches = {} selected_schemas = Schema() for name, branch in self.parallel_dict.items(): branch_has_schema = getattr(branch, "has_schema", False) if not branch_has_schema: continue if not hasattr(branch, "select_by_tag"): raise AttributeError( f"This ParallelBlock does not support select_by_tag because " f"{branch.__class__} does not support select_by_tag. Consider " "implementing a select_by_tag in an extension of " f"{branch.__class__}." ) selected_branch = branch.select_by_tag(tags) if not selected_branch: continue selected_branches[name] = selected_branch selected_schemas += selected_branch.schema if not selected_branches: return return ParallelBlock( selected_branches, schema=selected_schemas, is_input=self.is_input, post=self.post, pre=self.pre, aggregation=self.aggregation, strict=self.strict, automatic_pruning=self.automatic_pruning, )
def __getitem__(self, key) -> "Block": return self.parallel_dict[key] def __setitem__(self, key: str, item: "Block"): self.parallel_dict[key] = item @property def first(self) -> "Block": return self.parallel_values[0]
[docs] def add_branch(self, name: str, block: "Block") -> "ParallelBlock": if isinstance(self.parallel_layers, dict): self.parallel_layers[name] = block return self
[docs] def apply_to_branch(self, branch_name: str, *block: "Block"): if isinstance(self.parallel_layers, dict): self.parallel_layers[branch_name] = self.parallel_layers[branch_name].apply(*block)
[docs] def call(self, inputs, **kwargs): """The call method for ParallelBlock Parameters ---------- inputs : TabularData The inputs for the Parallel Block Returns ------- TabularData Outputs of the ParallelBlock """ if self.strict: assert isinstance(inputs, dict), "Inputs needs to be a dict" outputs = {} for name, layer in self.parallel_dict.items(): layer_inputs = self._maybe_filter_layer_inputs_using_schema(name, layer, inputs) out = call_layer(layer, layer_inputs, **kwargs) if not isinstance(out, dict): out = {name: out} outputs.update(out) return outputs
[docs] def compute_call_output_shape(self, input_shape): output_shapes = {} for name, layer in self.parallel_dict.items(): layer_input_shape = self._maybe_filter_layer_inputs_using_schema( name, layer, input_shape ) out = layer.compute_output_shape(layer_input_shape) if isinstance(out, dict): output_shapes.update(out) else: output_shapes[name] = out return output_shapes
[docs] def build(self, input_shape): to_prune = [] for name, layer in self.parallel_dict.items(): layer_input_shape = self._maybe_filter_layer_inputs_using_schema( name, layer, input_shape ) layer.build(layer_input_shape) layer_out_shape = layer.compute_output_shape(layer_input_shape) if self.automatic_pruning and layer_out_shape == {}: to_prune.append(name) if isinstance(self.parallel_layers, dict): pruned = {} for name, layer in self.parallel_layers.items(): if name not in to_prune: pruned[name] = layer self.parallel_layers = pruned else: pruned = [] for layer in self.parallel_layers: if layer not in to_prune: pruned.append(layer) self.parallel_layers = pruned return super().build(input_shape)
def _maybe_filter_layer_inputs_using_schema(self, name, layer, inputs): maybe_schema = getattr(layer, "_schema", None) if maybe_schema and isinstance(inputs, dict): layer_inputs = {k: v for k, v in inputs.items() if k in maybe_schema.column_names} else: layer_inputs = inputs if isinstance(layer_inputs, dict) and all( name in layer_inputs for name in self.parallel_dict ): layer_inputs = layer_inputs[name] return layer_inputs
[docs] def get_config(self): config = super(ParallelBlock, self).get_config() config.update({"automatic_pruning": self.automatic_pruning}) return tf_utils.maybe_serialize_keras_objects(self, config, ["parallel_layers"])
[docs] @classmethod def parse_config(cls, config, custom_objects=None): config = tf_utils.maybe_deserialize_keras_objects(config, ["pre", "post", "aggregation"]) if "schema" in config: config["schema"] = schema_utils.tensorflow_metadata_json_to_schema(config["schema"]) parallel_layers = config.pop("parallel_layers") if isinstance(parallel_layers, dict): inputs = { name: tf.keras.layers.deserialize(conf, custom_objects=custom_objects) for name, conf in parallel_layers.items() } elif isinstance(parallel_layers, (list, tuple)): inputs = [ tf.keras.layers.deserialize(conf, custom_objects=custom_objects) for conf in parallel_layers ] else: raise ValueError("Parallel layers need to be a list or a dict") return inputs, config
[docs] @classmethod def from_config(cls, config, custom_objects=None): inputs, config = cls.parse_config(config, custom_objects) return cls(inputs, **config)
@tf.keras.utils.register_keras_serializable(package="merlin.models") class WithShortcut(ParallelBlock): def __init__( self, block: Union[tf.keras.layers.Layer, Block], shortcut_filter: Optional[Filter] = None, aggregation=None, post: Optional[BlockType] = None, schema: Optional[Schema] = None, name: Optional[str] = None, strict: bool = False, block_outputs_name: Optional[str] = None, **kwargs, ): block_outputs_name = block_outputs_name or block.name shortcut = shortcut_filter if shortcut_filter else NoOp() inputs = {block_outputs_name: block, "shortcut": shortcut} super().__init__( inputs, post=post, aggregation=aggregation, schema=schema, name=name, strict=strict, **kwargs, ) @classmethod def from_config(cls, config, **kwargs): output = ParallelBlock.from_config(config, **kwargs) output.__class__ = cls return output
[docs]@tf.keras.utils.register_keras_serializable(package="merlin.models") class ResidualBlock(WithShortcut):
[docs] def __init__( self, block: Union[tf.keras.layers.Layer, Block], activation=None, post: Optional[BlockType] = None, schema: Optional[Schema] = None, name: Optional[str] = None, strict: bool = False, **kwargs, ): from merlin.models.tf.core.aggregation import SumResidual super().__init__( block, post=post, aggregation=SumResidual(activation=activation), schema=schema, name=name, strict=strict, **kwargs, )
@tf.keras.utils.register_keras_serializable(package="merlin.models") class Cond(Layer): """Layer to enable conditionally apply layers.""" def __init__(self, condition: Layer, true: Layer, false: Optional[Layer] = None, **kwargs): super(Cond, self).__init__(**kwargs) self.condition = condition self.true = true self.false = false def call(self, inputs, **kwargs): """Call layers conditionally.""" condition = call_layer(self.condition, inputs, **kwargs) def true_fn(): return call_layer(self.true, inputs, **kwargs) def false_fn(): if self.false is None: return inputs return call_layer(self.false, inputs, **kwargs) return tf.cond(tf.convert_to_tensor(condition), true_fn, false_fn) def compute_output_shape(self, input_shape): """Computes the output shape of the layer.""" true_output_shape = self.true.compute_output_shape(input_shape) if self.false: false_output_shape = self.false.compute_output_shape(input_shape) else: false_output_shape = input_shape try: if isinstance(true_output_shape, dict): for key in true_output_shape.keys(): true_output_shape[key].assert_is_compatible_with(false_output_shape[key]) else: true_output_shape.assert_is_compatible_with(false_output_shape) except ValueError as exc: raise ValueError( "Both true and false branches must return the same output shape" ) from exc return true_output_shape def get_config(self): """Returns the config of the layer as a Python dictionary.""" config = super(Cond, self).get_config() config["condition"] = tf.keras.layers.serialize(self.condition) config["true"] = tf.keras.layers.serialize(self.true) if self.false: config["false"] = tf.keras.layers.serialize(self.false) return config @classmethod def from_config(cls, config): """Creates a Cond layer from its config. Returning the instance.""" condition = tf.keras.layers.deserialize(config.pop("condition")) true = tf.keras.layers.deserialize(config.pop("true")) false = None if "false" in config: false = tf.keras.layers.deserialize(config.pop("false")) return cls(condition, true, false=false, **config) def build(self, input_shape): """Creates the variables of the layer.""" self.condition.build(input_shape) self.true.build(input_shape) if self.false: self.false.build(input_shape) return super(Cond, self).build(input_shape) @tf.keras.utils.register_keras_serializable(package="merlin.models") class MapValues(Layer): """Layer to map values of a dictionary of tensors.""" def __init__(self, layer: Layer, **kwargs): super(MapValues, self).__init__(**kwargs) self.layer = layer def call(self, inputs, **kwargs): if isinstance(inputs, dict): return {key: call_layer(self.layer, value, **kwargs) for key, value in inputs.items()} return call_layer(self.layer, inputs, **kwargs) def compute_output_shape(self, input_shape): if isinstance(input_shape, dict): return { key: self.layer.compute_output_shape(value) for key, value in input_shape.items() } return self.layer.compute_output_shape(input_shape) def get_config(self): config = super(MapValues, self).get_config() config["layer"] = tf.keras.layers.serialize(self.layer) return config @classmethod def from_config(cls, config): layer = tf.keras.layers.deserialize(config.pop("layer")) return cls(layer, **config) def call_sequentially(layers, inputs, **kwargs): """Call layers sequentially.""" outputs = inputs for layer in layers: outputs = call_layer(layer, outputs, **kwargs) return outputs def build_sequentially(self, layers, input_shape): """Build layers sequentially.""" last_layer = None for layer in layers: try: layer.build(input_shape) except TypeError: t, v, tb = sys.exc_info() if isinstance(input_shape, dict) and isinstance(last_layer, TabularBlock): v = TypeError( f"Couldn't build {layer}, " f"did you forget to add aggregation to {last_layer}?" ) six.reraise(t, v, tb) input_shape = layer.compute_output_shape(input_shape) last_layer = layer self.built = True def compute_output_signature_sequentially(layers, input_signature): """Compute output signature sequentially.""" output_signature = input_signature for layer in layers: output_signature = layer.compute_output_signature(output_signature) return output_signature def compute_output_shape_sequentially(layers, input_shape): """Compute output shape sequentially.""" output_shape = input_shape for layer in layers: output_shape = layer.compute_output_shape(output_shape) return output_shape