merlin_standard_lib.schema package
Submodules
merlin_standard_lib.schema.schema module
-
class
merlin_standard_lib.schema.schema.
ColumnSchema
(name: str = <betterproto._PLACEHOLDER object>, deprecated: bool = <betterproto._PLACEHOLDER object>, presence: merlin_standard_lib.proto.schema_bp.FeaturePresence = <betterproto._PLACEHOLDER object>, group_presence: merlin_standard_lib.proto.schema_bp.FeaturePresenceWithinGroup = <betterproto._PLACEHOLDER object>, shape: merlin_standard_lib.proto.schema_bp.FixedShape = <betterproto._PLACEHOLDER object>, value_count: merlin_standard_lib.proto.schema_bp.ValueCount = <betterproto._PLACEHOLDER object>, value_counts: merlin_standard_lib.proto.schema_bp.ValueCountList = <betterproto._PLACEHOLDER object>, type: merlin_standard_lib.proto.schema_bp.FeatureType = <betterproto._PLACEHOLDER object>, domain: str = <betterproto._PLACEHOLDER object>, int_domain: merlin_standard_lib.proto.schema_bp.IntDomain = <betterproto._PLACEHOLDER object>, float_domain: merlin_standard_lib.proto.schema_bp.FloatDomain = <betterproto._PLACEHOLDER object>, string_domain: merlin_standard_lib.proto.schema_bp.StringDomain = <betterproto._PLACEHOLDER object>, bool_domain: merlin_standard_lib.proto.schema_bp.BoolDomain = <betterproto._PLACEHOLDER object>, struct_domain: merlin_standard_lib.proto.schema_bp.StructDomain = <betterproto._PLACEHOLDER object>, natural_language_domain: merlin_standard_lib.proto.schema_bp.NaturalLanguageDomain = <betterproto._PLACEHOLDER object>, image_domain: merlin_standard_lib.proto.schema_bp.ImageDomain = <betterproto._PLACEHOLDER object>, mid_domain: merlin_standard_lib.proto.schema_bp.MIDDomain = <betterproto._PLACEHOLDER object>, url_domain: merlin_standard_lib.proto.schema_bp.URLDomain = <betterproto._PLACEHOLDER object>, time_domain: merlin_standard_lib.proto.schema_bp.TimeDomain = <betterproto._PLACEHOLDER object>, time_of_day_domain: merlin_standard_lib.proto.schema_bp.TimeOfDayDomain = <betterproto._PLACEHOLDER object>, distribution_constraints: merlin_standard_lib.proto.schema_bp.DistributionConstraints = <betterproto._PLACEHOLDER object>, annotation: merlin_standard_lib.proto.schema_bp.Annotation = <betterproto._PLACEHOLDER object>, skew_comparator: merlin_standard_lib.proto.schema_bp.FeatureComparator = <betterproto._PLACEHOLDER object>, drift_comparator: merlin_standard_lib.proto.schema_bp.FeatureComparator = <betterproto._PLACEHOLDER object>, in_environment: List[str] = <betterproto._PLACEHOLDER object>, not_in_environment: List[str] = <betterproto._PLACEHOLDER object>, lifecycle_stage: merlin_standard_lib.proto.schema_bp.LifecycleStage = <betterproto._PLACEHOLDER object>, unique_constraints: merlin_standard_lib.proto.schema_bp.UniqueConstraints = <betterproto._PLACEHOLDER object>)[source] Bases:
merlin_standard_lib.proto.schema_bp.Feature
-
classmethod
create_categorical
(name: str, num_items: int, shape: Optional[Union[Tuple[int, …], List[int]]] = None, value_count: Optional[Union[merlin_standard_lib.proto.schema_bp.ValueCount, merlin_standard_lib.proto.schema_bp.ValueCountList]] = None, min_index: int = 0, tags: Optional[Union[List[str], List[merlin_standard_lib.schema.tag.Tag], List[Union[merlin_standard_lib.schema.tag.Tag, str]]]] = None, **kwargs) → merlin_standard_lib.schema.schema.ColumnSchema[source]
-
classmethod
create_continuous
(name: str, is_float: bool = True, min_value: Optional[Union[int, float]] = None, max_value: Optional[Union[int, float]] = None, disallow_nan: bool = False, disallow_inf: bool = False, is_embedding: bool = False, shape: Optional[Union[Tuple[int, …], List[int]]] = None, value_count: Optional[Union[merlin_standard_lib.proto.schema_bp.ValueCount, merlin_standard_lib.proto.schema_bp.ValueCountList]] = None, tags: Optional[Union[List[str], List[merlin_standard_lib.schema.tag.Tag], List[Union[merlin_standard_lib.schema.tag.Tag, str]]]] = None, **kwargs) → merlin_standard_lib.schema.schema.ColumnSchema[source]
-
copy
(**kwargs) → merlin_standard_lib.schema.schema.ColumnSchema[source]
-
with_properties
(properties: Dict[str, Union[str, int, float]]) → merlin_standard_lib.schema.schema.ColumnSchema[source]
-
property
properties
-
classmethod
-
class
merlin_standard_lib.schema.schema.
Schema
(feature: Sequence[merlin_standard_lib.proto.schema_bp.Feature] = <betterproto._PLACEHOLDER object>, sparse_feature: List[merlin_standard_lib.proto.schema_bp.SparseFeature] = <betterproto._PLACEHOLDER object>, weighted_feature: List[merlin_standard_lib.proto.schema_bp.WeightedFeature] = <betterproto._PLACEHOLDER object>, string_domain: List[merlin_standard_lib.proto.schema_bp.StringDomain] = <betterproto._PLACEHOLDER object>, float_domain: List[merlin_standard_lib.proto.schema_bp.FloatDomain] = <betterproto._PLACEHOLDER object>, int_domain: List[merlin_standard_lib.proto.schema_bp.IntDomain] = <betterproto._PLACEHOLDER object>, default_environment: List[str] = <betterproto._PLACEHOLDER object>, annotation: merlin_standard_lib.proto.schema_bp.Annotation = <betterproto._PLACEHOLDER object>, dataset_constraints: merlin_standard_lib.proto.schema_bp.DatasetConstraints = <betterproto._PLACEHOLDER object>, tensor_representation_group: Dict[str, merlin_standard_lib.proto.schema_bp.TensorRepresentationGroup] = <betterproto._PLACEHOLDER object>)[source] Bases:
merlin_standard_lib.proto.schema_bp._Schema
A collection of column schemas for a dataset.
-
feature
: List[merlin_standard_lib.schema.schema.ColumnSchema] = Field(name=None,type=None,default=<betterproto._PLACEHOLDER object>,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'betterproto': FieldMetadata(number=1, proto_type='message', map_types=None, group=None, wraps=None)}),_field_type=None)
-
classmethod
create
(column_schemas: Optional[Union[List[Union[merlin_standard_lib.schema.schema.ColumnSchema, str]], Dict[str, Union[merlin_standard_lib.schema.schema.ColumnSchema, str]]]] = None, **kwargs)[source]
-
apply
(selector) → merlin_standard_lib.schema.schema.Schema[source]
-
apply_inverse
(selector) → merlin_standard_lib.schema.schema.Schema[source]
-
select_by_type
(to_select) → merlin_standard_lib.schema.schema.Schema[source]
-
remove_by_type
(to_remove) → merlin_standard_lib.schema.schema.Schema[source]
-
select_by_tag
(to_select) → merlin_standard_lib.schema.schema.Schema[source]
-
remove_by_tag
(to_remove) → merlin_standard_lib.schema.schema.Schema[source]
-
select_by_name
(to_select) → merlin_standard_lib.schema.schema.Schema[source]
-
remove_by_name
(to_remove) → merlin_standard_lib.schema.schema.Schema[source]
-
map_column_schemas
(map_fn: Callable[[merlin_standard_lib.schema.schema.ColumnSchema], merlin_standard_lib.schema.schema.ColumnSchema]) → merlin_standard_lib.schema.schema.Schema[source]
-
filter_column_schemas
(filter_fn: Callable[[merlin_standard_lib.schema.schema.ColumnSchema], bool], negate=False) → merlin_standard_lib.schema.schema.Schema[source]
-
property
column_names
-
property
column_schemas
-
property
item_id_column_name
-
from_json
(value: Union[str, bytes]) → merlin_standard_lib.schema.schema.Schema[source]
-
from_proto_text
(path_or_proto_text: str) → merlin_standard_lib.schema.schema.Schema[source]
-
copy
(**kwargs) → merlin_standard_lib.schema.schema.Schema[source]
-
add
(other, allow_overlap=True) → merlin_standard_lib.schema.schema.Schema[source]
-
merlin_standard_lib.schema.tag module
-
class
merlin_standard_lib.schema.tag.
Tag
(value)[source] Bases:
enum.Enum
An enumeration.
-
CATEGORICAL
= 'categorical'
-
CONTINUOUS
= 'continuous'
-
LIST
= 'list'
-
TEXT
= 'text'
-
TEXT_TOKENIZED
= 'text_tokenized'
-
TIME
= 'time'
-
USER
= 'user'
-
USER_ID
= 'user_id'
-
ITEM
= 'item'
-
ITEM_ID
= 'item_id'
-
SESSION
= 'session'
-
SESSION_ID
= 'session_id'
-
CONTEXT
= 'context'
-
TARGETS
= 'target'
-
BINARY_CLASSIFICATION
= 'binary_classification'
-
MULTI_CLASS_CLASSIFICATION
= 'multi_class'
-
REGRESSION
= 'regression'
-