merlin_standard_lib.schema package

Submodules

merlin_standard_lib.schema.schema module

class merlin_standard_lib.schema.schema.ColumnSchema(name: str = <betterproto._PLACEHOLDER object>, deprecated: bool = <betterproto._PLACEHOLDER object>, presence: merlin_standard_lib.proto.schema_bp.FeaturePresence = <betterproto._PLACEHOLDER object>, group_presence: merlin_standard_lib.proto.schema_bp.FeaturePresenceWithinGroup = <betterproto._PLACEHOLDER object>, shape: merlin_standard_lib.proto.schema_bp.FixedShape = <betterproto._PLACEHOLDER object>, value_count: merlin_standard_lib.proto.schema_bp.ValueCount = <betterproto._PLACEHOLDER object>, value_counts: merlin_standard_lib.proto.schema_bp.ValueCountList = <betterproto._PLACEHOLDER object>, type: merlin_standard_lib.proto.schema_bp.FeatureType = <betterproto._PLACEHOLDER object>, domain: str = <betterproto._PLACEHOLDER object>, int_domain: merlin_standard_lib.proto.schema_bp.IntDomain = <betterproto._PLACEHOLDER object>, float_domain: merlin_standard_lib.proto.schema_bp.FloatDomain = <betterproto._PLACEHOLDER object>, string_domain: merlin_standard_lib.proto.schema_bp.StringDomain = <betterproto._PLACEHOLDER object>, bool_domain: merlin_standard_lib.proto.schema_bp.BoolDomain = <betterproto._PLACEHOLDER object>, struct_domain: merlin_standard_lib.proto.schema_bp.StructDomain = <betterproto._PLACEHOLDER object>, natural_language_domain: merlin_standard_lib.proto.schema_bp.NaturalLanguageDomain = <betterproto._PLACEHOLDER object>, image_domain: merlin_standard_lib.proto.schema_bp.ImageDomain = <betterproto._PLACEHOLDER object>, mid_domain: merlin_standard_lib.proto.schema_bp.MIDDomain = <betterproto._PLACEHOLDER object>, url_domain: merlin_standard_lib.proto.schema_bp.URLDomain = <betterproto._PLACEHOLDER object>, time_domain: merlin_standard_lib.proto.schema_bp.TimeDomain = <betterproto._PLACEHOLDER object>, time_of_day_domain: merlin_standard_lib.proto.schema_bp.TimeOfDayDomain = <betterproto._PLACEHOLDER object>, distribution_constraints: merlin_standard_lib.proto.schema_bp.DistributionConstraints = <betterproto._PLACEHOLDER object>, annotation: merlin_standard_lib.proto.schema_bp.Annotation = <betterproto._PLACEHOLDER object>, skew_comparator: merlin_standard_lib.proto.schema_bp.FeatureComparator = <betterproto._PLACEHOLDER object>, drift_comparator: merlin_standard_lib.proto.schema_bp.FeatureComparator = <betterproto._PLACEHOLDER object>, in_environment: List[str] = <betterproto._PLACEHOLDER object>, not_in_environment: List[str] = <betterproto._PLACEHOLDER object>, lifecycle_stage: merlin_standard_lib.proto.schema_bp.LifecycleStage = <betterproto._PLACEHOLDER object>, unique_constraints: merlin_standard_lib.proto.schema_bp.UniqueConstraints = <betterproto._PLACEHOLDER object>)[source]

Bases: merlin_standard_lib.proto.schema_bp.Feature

classmethod create_categorical(name: str, num_items: int, shape: Optional[Union[Tuple[int, ], List[int]]] = None, value_count: Optional[Union[merlin_standard_lib.proto.schema_bp.ValueCount, merlin_standard_lib.proto.schema_bp.ValueCountList]] = None, min_index: int = 0, tags: Optional[Union[List[str], List[merlin_standard_lib.schema.tag.Tag], List[Union[merlin_standard_lib.schema.tag.Tag, str]]]] = None, **kwargs)merlin_standard_lib.schema.schema.ColumnSchema[source]
classmethod create_continuous(name: str, is_float: bool = True, min_value: Optional[Union[int, float]] = None, max_value: Optional[Union[int, float]] = None, disallow_nan: bool = False, disallow_inf: bool = False, is_embedding: bool = False, shape: Optional[Union[Tuple[int, ], List[int]]] = None, value_count: Optional[Union[merlin_standard_lib.proto.schema_bp.ValueCount, merlin_standard_lib.proto.schema_bp.ValueCountList]] = None, tags: Optional[Union[List[str], List[merlin_standard_lib.schema.tag.Tag], List[Union[merlin_standard_lib.schema.tag.Tag, str]]]] = None, **kwargs)merlin_standard_lib.schema.schema.ColumnSchema[source]
copy(**kwargs)merlin_standard_lib.schema.schema.ColumnSchema[source]
with_name(name: str)[source]
with_tags(tags: Union[List[str], List[merlin_standard_lib.schema.tag.Tag], List[Union[merlin_standard_lib.schema.tag.Tag, str]]])merlin_standard_lib.schema.schema.ColumnSchema[source]
with_tags_based_on_properties(using_value_count=True, using_domain=True)merlin_standard_lib.schema.schema.ColumnSchema[source]
with_properties(properties: Dict[str, Union[str, int, float]])merlin_standard_lib.schema.schema.ColumnSchema[source]
to_proto_text()str[source]
property tags
property properties
class merlin_standard_lib.schema.schema.Schema(feature: Sequence[merlin_standard_lib.proto.schema_bp.Feature] = <betterproto._PLACEHOLDER object>, sparse_feature: List[merlin_standard_lib.proto.schema_bp.SparseFeature] = <betterproto._PLACEHOLDER object>, weighted_feature: List[merlin_standard_lib.proto.schema_bp.WeightedFeature] = <betterproto._PLACEHOLDER object>, string_domain: List[merlin_standard_lib.proto.schema_bp.StringDomain] = <betterproto._PLACEHOLDER object>, float_domain: List[merlin_standard_lib.proto.schema_bp.FloatDomain] = <betterproto._PLACEHOLDER object>, int_domain: List[merlin_standard_lib.proto.schema_bp.IntDomain] = <betterproto._PLACEHOLDER object>, default_environment: List[str] = <betterproto._PLACEHOLDER object>, annotation: merlin_standard_lib.proto.schema_bp.Annotation = <betterproto._PLACEHOLDER object>, dataset_constraints: merlin_standard_lib.proto.schema_bp.DatasetConstraints = <betterproto._PLACEHOLDER object>, tensor_representation_group: Dict[str, merlin_standard_lib.proto.schema_bp.TensorRepresentationGroup] = <betterproto._PLACEHOLDER object>)[source]

Bases: merlin_standard_lib.proto.schema_bp._Schema

A collection of column schemas for a dataset.

feature: List[merlin_standard_lib.schema.schema.ColumnSchema] = Field(name=None,type=None,default=<betterproto._PLACEHOLDER object>,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'betterproto': FieldMetadata(number=1, proto_type='message', map_types=None, group=None, wraps=None)}),_field_type=None)
classmethod create(column_schemas: Optional[Union[List[Union[merlin_standard_lib.schema.schema.ColumnSchema, str]], Dict[str, Union[merlin_standard_lib.schema.schema.ColumnSchema, str]]]] = None, **kwargs)[source]
with_tags_based_on_properties(using_value_count=True, using_domain=True)merlin_standard_lib.schema.schema.Schema[source]
apply(selector)merlin_standard_lib.schema.schema.Schema[source]
apply_inverse(selector)merlin_standard_lib.schema.schema.Schema[source]
filter_columns_from_dict(input_dict)[source]
select_by_type(to_select)merlin_standard_lib.schema.schema.Schema[source]
remove_by_type(to_remove)merlin_standard_lib.schema.schema.Schema[source]
select_by_tag(to_select)merlin_standard_lib.schema.schema.Schema[source]
remove_by_tag(to_remove)merlin_standard_lib.schema.schema.Schema[source]
select_by_name(to_select)merlin_standard_lib.schema.schema.Schema[source]
remove_by_name(to_remove)merlin_standard_lib.schema.schema.Schema[source]
map_column_schemas(map_fn: Callable[[merlin_standard_lib.schema.schema.ColumnSchema], merlin_standard_lib.schema.schema.ColumnSchema])merlin_standard_lib.schema.schema.Schema[source]
filter_column_schemas(filter_fn: Callable[[merlin_standard_lib.schema.schema.ColumnSchema], bool], negate=False)merlin_standard_lib.schema.schema.Schema[source]
categorical_cardinalities()Dict[str, int][source]
property column_names
property column_schemas
property item_id_column_name
from_json(value: Union[str, bytes])merlin_standard_lib.schema.schema.Schema[source]
to_proto_text()str[source]
from_proto_text(path_or_proto_text: str)merlin_standard_lib.schema.schema.Schema[source]
copy(**kwargs)merlin_standard_lib.schema.schema.Schema[source]
add(other, allow_overlap=True)merlin_standard_lib.schema.schema.Schema[source]

merlin_standard_lib.schema.tag module

class merlin_standard_lib.schema.tag.Tag(value)[source]

Bases: enum.Enum

An enumeration.

CATEGORICAL = 'categorical'
CONTINUOUS = 'continuous'
LIST = 'list'
TEXT = 'text'
TEXT_TOKENIZED = 'text_tokenized'
TIME = 'time'
USER = 'user'
USER_ID = 'user_id'
ITEM = 'item'
ITEM_ID = 'item_id'
SESSION = 'session'
SESSION_ID = 'session_id'
CONTEXT = 'context'
TARGETS = 'target'
BINARY_CLASSIFICATION = 'binary_classification'
MULTI_CLASS_CLASSIFICATION = 'multi_class'
REGRESSION = 'regression'

Module contents