merlin_standard_lib.schema package
Submodules
merlin_standard_lib.schema.schema module
- 
class 
merlin_standard_lib.schema.schema.ColumnSchema(name: str = <betterproto._PLACEHOLDER object>, deprecated: bool = <betterproto._PLACEHOLDER object>, presence: merlin_standard_lib.proto.schema_bp.FeaturePresence = <betterproto._PLACEHOLDER object>, group_presence: merlin_standard_lib.proto.schema_bp.FeaturePresenceWithinGroup = <betterproto._PLACEHOLDER object>, shape: merlin_standard_lib.proto.schema_bp.FixedShape = <betterproto._PLACEHOLDER object>, value_count: merlin_standard_lib.proto.schema_bp.ValueCount = <betterproto._PLACEHOLDER object>, value_counts: merlin_standard_lib.proto.schema_bp.ValueCountList = <betterproto._PLACEHOLDER object>, type: merlin_standard_lib.proto.schema_bp.FeatureType = <betterproto._PLACEHOLDER object>, domain: str = <betterproto._PLACEHOLDER object>, int_domain: merlin_standard_lib.proto.schema_bp.IntDomain = <betterproto._PLACEHOLDER object>, float_domain: merlin_standard_lib.proto.schema_bp.FloatDomain = <betterproto._PLACEHOLDER object>, string_domain: merlin_standard_lib.proto.schema_bp.StringDomain = <betterproto._PLACEHOLDER object>, bool_domain: merlin_standard_lib.proto.schema_bp.BoolDomain = <betterproto._PLACEHOLDER object>, struct_domain: merlin_standard_lib.proto.schema_bp.StructDomain = <betterproto._PLACEHOLDER object>, natural_language_domain: merlin_standard_lib.proto.schema_bp.NaturalLanguageDomain = <betterproto._PLACEHOLDER object>, image_domain: merlin_standard_lib.proto.schema_bp.ImageDomain = <betterproto._PLACEHOLDER object>, mid_domain: merlin_standard_lib.proto.schema_bp.MIDDomain = <betterproto._PLACEHOLDER object>, url_domain: merlin_standard_lib.proto.schema_bp.URLDomain = <betterproto._PLACEHOLDER object>, time_domain: merlin_standard_lib.proto.schema_bp.TimeDomain = <betterproto._PLACEHOLDER object>, time_of_day_domain: merlin_standard_lib.proto.schema_bp.TimeOfDayDomain = <betterproto._PLACEHOLDER object>, distribution_constraints: merlin_standard_lib.proto.schema_bp.DistributionConstraints = <betterproto._PLACEHOLDER object>, annotation: merlin_standard_lib.proto.schema_bp.Annotation = <betterproto._PLACEHOLDER object>, skew_comparator: merlin_standard_lib.proto.schema_bp.FeatureComparator = <betterproto._PLACEHOLDER object>, drift_comparator: merlin_standard_lib.proto.schema_bp.FeatureComparator = <betterproto._PLACEHOLDER object>, in_environment: List[str] = <betterproto._PLACEHOLDER object>, not_in_environment: List[str] = <betterproto._PLACEHOLDER object>, lifecycle_stage: merlin_standard_lib.proto.schema_bp.LifecycleStage = <betterproto._PLACEHOLDER object>, unique_constraints: merlin_standard_lib.proto.schema_bp.UniqueConstraints = <betterproto._PLACEHOLDER object>)[source] Bases:
merlin_standard_lib.proto.schema_bp.Feature- 
classmethod 
create_categorical(name: str, num_items: int, shape: Optional[Union[Tuple[int, …], List[int]]] = None, value_count: Optional[Union[merlin_standard_lib.proto.schema_bp.ValueCount, merlin_standard_lib.proto.schema_bp.ValueCountList]] = None, min_index: int = 0, tags: Optional[Union[List[str], List[merlin_standard_lib.schema.tag.Tag], List[Union[merlin_standard_lib.schema.tag.Tag, str]]]] = None, **kwargs) → merlin_standard_lib.schema.schema.ColumnSchema[source] 
- 
classmethod 
create_continuous(name: str, is_float: bool = True, min_value: Optional[Union[int, float]] = None, max_value: Optional[Union[int, float]] = None, disallow_nan: bool = False, disallow_inf: bool = False, is_embedding: bool = False, shape: Optional[Union[Tuple[int, …], List[int]]] = None, value_count: Optional[Union[merlin_standard_lib.proto.schema_bp.ValueCount, merlin_standard_lib.proto.schema_bp.ValueCountList]] = None, tags: Optional[Union[List[str], List[merlin_standard_lib.schema.tag.Tag], List[Union[merlin_standard_lib.schema.tag.Tag, str]]]] = None, **kwargs) → merlin_standard_lib.schema.schema.ColumnSchema[source] 
- 
copy(**kwargs) → merlin_standard_lib.schema.schema.ColumnSchema[source] 
- 
with_properties(properties: Dict[str, Union[str, int, float]]) → merlin_standard_lib.schema.schema.ColumnSchema[source] 
- 
property 
properties 
- 
classmethod 
 
- 
class 
merlin_standard_lib.schema.schema.Schema(feature: Sequence[merlin_standard_lib.proto.schema_bp.Feature] = <betterproto._PLACEHOLDER object>, sparse_feature: List[merlin_standard_lib.proto.schema_bp.SparseFeature] = <betterproto._PLACEHOLDER object>, weighted_feature: List[merlin_standard_lib.proto.schema_bp.WeightedFeature] = <betterproto._PLACEHOLDER object>, string_domain: List[merlin_standard_lib.proto.schema_bp.StringDomain] = <betterproto._PLACEHOLDER object>, float_domain: List[merlin_standard_lib.proto.schema_bp.FloatDomain] = <betterproto._PLACEHOLDER object>, int_domain: List[merlin_standard_lib.proto.schema_bp.IntDomain] = <betterproto._PLACEHOLDER object>, default_environment: List[str] = <betterproto._PLACEHOLDER object>, annotation: merlin_standard_lib.proto.schema_bp.Annotation = <betterproto._PLACEHOLDER object>, dataset_constraints: merlin_standard_lib.proto.schema_bp.DatasetConstraints = <betterproto._PLACEHOLDER object>, tensor_representation_group: Dict[str, merlin_standard_lib.proto.schema_bp.TensorRepresentationGroup] = <betterproto._PLACEHOLDER object>)[source] Bases:
merlin_standard_lib.proto.schema_bp._SchemaA collection of column schemas for a dataset.
- 
feature: List[merlin_standard_lib.schema.schema.ColumnSchema] = Field(name=None,type=None,default=<betterproto._PLACEHOLDER object>,default_factory=<dataclasses._MISSING_TYPE object>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'betterproto': FieldMetadata(number=1, proto_type='message', map_types=None, group=None, wraps=None)}),_field_type=None) 
- 
classmethod 
create(column_schemas: Optional[Union[List[Union[merlin_standard_lib.schema.schema.ColumnSchema, str]], Dict[str, Union[merlin_standard_lib.schema.schema.ColumnSchema, str]]]] = None, **kwargs)[source] 
- 
apply(selector) → merlin_standard_lib.schema.schema.Schema[source] 
- 
apply_inverse(selector) → merlin_standard_lib.schema.schema.Schema[source] 
- 
select_by_type(to_select) → merlin_standard_lib.schema.schema.Schema[source] 
- 
remove_by_type(to_remove) → merlin_standard_lib.schema.schema.Schema[source] 
- 
select_by_tag(to_select) → merlin_standard_lib.schema.schema.Schema[source] 
- 
remove_by_tag(to_remove) → merlin_standard_lib.schema.schema.Schema[source] 
- 
select_by_name(to_select) → merlin_standard_lib.schema.schema.Schema[source] 
- 
remove_by_name(to_remove) → merlin_standard_lib.schema.schema.Schema[source] 
- 
map_column_schemas(map_fn: Callable[[merlin_standard_lib.schema.schema.ColumnSchema], merlin_standard_lib.schema.schema.ColumnSchema]) → merlin_standard_lib.schema.schema.Schema[source] 
- 
filter_column_schemas(filter_fn: Callable[[merlin_standard_lib.schema.schema.ColumnSchema], bool], negate=False) → merlin_standard_lib.schema.schema.Schema[source] 
- 
property 
column_names 
- 
property 
column_schemas 
- 
property 
item_id_column_name 
- 
from_json(value: Union[str, bytes]) → merlin_standard_lib.schema.schema.Schema[source] 
- 
from_proto_text(path_or_proto_text: str) → merlin_standard_lib.schema.schema.Schema[source] 
- 
copy(**kwargs) → merlin_standard_lib.schema.schema.Schema[source] 
- 
add(other, allow_overlap=True) → merlin_standard_lib.schema.schema.Schema[source] 
- 
 
merlin_standard_lib.schema.tag module
- 
class 
merlin_standard_lib.schema.tag.Tag(value)[source] Bases:
enum.EnumAn enumeration.
- 
CATEGORICAL= 'categorical' 
- 
CONTINUOUS= 'continuous' 
- 
LIST= 'list' 
- 
TEXT= 'text' 
- 
TEXT_TOKENIZED= 'text_tokenized' 
- 
TIME= 'time' 
- 
USER= 'user' 
- 
USER_ID= 'user_id' 
- 
ITEM= 'item' 
- 
ITEM_ID= 'item_id' 
- 
SESSION= 'session' 
- 
SESSION_ID= 'session_id' 
- 
CONTEXT= 'context' 
- 
TARGETS= 'target' 
- 
BINARY_CLASSIFICATION= 'binary_classification' 
- 
MULTI_CLASS_CLASSIFICATION= 'multi_class' 
- 
REGRESSION= 'regression' 
-