Package sentencepiece

Class SentencepieceModel.NormalizerSpec

    • Nested Class Summary

      Nested Classes 
      Modifier and Type Class Description
      static class  SentencepieceModel.NormalizerSpec.Builder
      NormalizerSpec encodes a various parameters for string normalizaiton
      • Nested classes/interfaces inherited from class com.google.protobuf.GeneratedMessageV3.ExtendableMessage

        com.google.protobuf.GeneratedMessageV3.ExtendableMessage.ExtensionWriter
      • Nested classes/interfaces inherited from class com.google.protobuf.GeneratedMessageV3

        com.google.protobuf.GeneratedMessageV3.BuilderParent, com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<MessageType extends com.google.protobuf.GeneratedMessageV3.ExtendableMessage,​BuilderType extends com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<MessageType,​BuilderType>>, com.google.protobuf.GeneratedMessageV3.ExtendableMessage<MessageType extends com.google.protobuf.GeneratedMessageV3.ExtendableMessage>, com.google.protobuf.GeneratedMessageV3.ExtendableMessageOrBuilder<MessageType extends com.google.protobuf.GeneratedMessageV3.ExtendableMessage>, com.google.protobuf.GeneratedMessageV3.FieldAccessorTable, com.google.protobuf.GeneratedMessageV3.UnusedPrivateParameter
      • Nested classes/interfaces inherited from class com.google.protobuf.AbstractMessageLite

        com.google.protobuf.AbstractMessageLite.InternalOneOfEnum
    • Field Detail

      • PRECOMPILED_CHARSMAP_FIELD_NUMBER

        public static final int PRECOMPILED_CHARSMAP_FIELD_NUMBER
        See Also:
        Constant Field Values
      • ADD_DUMMY_PREFIX_FIELD_NUMBER

        public static final int ADD_DUMMY_PREFIX_FIELD_NUMBER
        See Also:
        Constant Field Values
      • REMOVE_EXTRA_WHITESPACES_FIELD_NUMBER

        public static final int REMOVE_EXTRA_WHITESPACES_FIELD_NUMBER
        See Also:
        Constant Field Values
      • ESCAPE_WHITESPACES_FIELD_NUMBER

        public static final int ESCAPE_WHITESPACES_FIELD_NUMBER
        See Also:
        Constant Field Values
      • NORMALIZATION_RULE_TSV_FIELD_NUMBER

        public static final int NORMALIZATION_RULE_TSV_FIELD_NUMBER
        See Also:
        Constant Field Values
    • Method Detail

      • newInstance

        protected java.lang.Object newInstance​(com.google.protobuf.GeneratedMessageV3.UnusedPrivateParameter unused)
        Overrides:
        newInstance in class com.google.protobuf.GeneratedMessageV3
      • getUnknownFields

        public final com.google.protobuf.UnknownFieldSet getUnknownFields()
        Specified by:
        getUnknownFields in interface com.google.protobuf.MessageOrBuilder
        Overrides:
        getUnknownFields in class com.google.protobuf.GeneratedMessageV3
      • getDescriptor

        public static final com.google.protobuf.Descriptors.Descriptor getDescriptor()
      • internalGetFieldAccessorTable

        protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable internalGetFieldAccessorTable()
        Specified by:
        internalGetFieldAccessorTable in class com.google.protobuf.GeneratedMessageV3
      • hasPrecompiledCharsmap

        public boolean hasPrecompiledCharsmap()
         Pre-compiled normalization rule created by
         Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
         Usually this field is set by Builder::GetNormalizerSpec() method.
         
        optional bytes precompiled_charsmap = 2;
        Specified by:
        hasPrecompiledCharsmap in interface SentencepieceModel.NormalizerSpecOrBuilder
        Returns:
        Whether the precompiledCharsmap field is set.
      • getPrecompiledCharsmap

        public com.google.protobuf.ByteString getPrecompiledCharsmap()
         Pre-compiled normalization rule created by
         Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
         Usually this field is set by Builder::GetNormalizerSpec() method.
         
        optional bytes precompiled_charsmap = 2;
        Specified by:
        getPrecompiledCharsmap in interface SentencepieceModel.NormalizerSpecOrBuilder
        Returns:
        The precompiledCharsmap.
      • hasAddDummyPrefix

        public boolean hasAddDummyPrefix()
         Adds dummy whitespace at the beginning of text in order to
         treat "world" in "world" and "hello world" in the same way.
         
        optional bool add_dummy_prefix = 3 [default = true];
        Specified by:
        hasAddDummyPrefix in interface SentencepieceModel.NormalizerSpecOrBuilder
        Returns:
        Whether the addDummyPrefix field is set.
      • getAddDummyPrefix

        public boolean getAddDummyPrefix()
         Adds dummy whitespace at the beginning of text in order to
         treat "world" in "world" and "hello world" in the same way.
         
        optional bool add_dummy_prefix = 3 [default = true];
        Specified by:
        getAddDummyPrefix in interface SentencepieceModel.NormalizerSpecOrBuilder
        Returns:
        The addDummyPrefix.
      • hasRemoveExtraWhitespaces

        public boolean hasRemoveExtraWhitespaces()
         Removes leading, trailing, and duplicate internal whitespace.
         
        optional bool remove_extra_whitespaces = 4 [default = true];
        Specified by:
        hasRemoveExtraWhitespaces in interface SentencepieceModel.NormalizerSpecOrBuilder
        Returns:
        Whether the removeExtraWhitespaces field is set.
      • hasEscapeWhitespaces

        public boolean hasEscapeWhitespaces()
         Replaces whitespace with meta symbol.
         This field must be true to train sentence piece model.
         
        optional bool escape_whitespaces = 5 [default = true];
        Specified by:
        hasEscapeWhitespaces in interface SentencepieceModel.NormalizerSpecOrBuilder
        Returns:
        Whether the escapeWhitespaces field is set.
      • getEscapeWhitespaces

        public boolean getEscapeWhitespaces()
         Replaces whitespace with meta symbol.
         This field must be true to train sentence piece model.
         
        optional bool escape_whitespaces = 5 [default = true];
        Specified by:
        getEscapeWhitespaces in interface SentencepieceModel.NormalizerSpecOrBuilder
        Returns:
        The escapeWhitespaces.
      • hasNormalizationRuleTsv

        public boolean hasNormalizationRuleTsv()
         Custom normalization rule file in TSV format.
         https://github.com/google/sentencepiece/blob/master/doc/normalization.md
         This field is only used in SentencePieceTrainer::Train() method, which
         compiles the rule into the binary rule stored in `precompiled_charsmap`.
         
        optional string normalization_rule_tsv = 6;
        Specified by:
        hasNormalizationRuleTsv in interface SentencepieceModel.NormalizerSpecOrBuilder
        Returns:
        Whether the normalizationRuleTsv field is set.
      • getNormalizationRuleTsv

        public java.lang.String getNormalizationRuleTsv()
         Custom normalization rule file in TSV format.
         https://github.com/google/sentencepiece/blob/master/doc/normalization.md
         This field is only used in SentencePieceTrainer::Train() method, which
         compiles the rule into the binary rule stored in `precompiled_charsmap`.
         
        optional string normalization_rule_tsv = 6;
        Specified by:
        getNormalizationRuleTsv in interface SentencepieceModel.NormalizerSpecOrBuilder
        Returns:
        The normalizationRuleTsv.
      • getNormalizationRuleTsvBytes

        public com.google.protobuf.ByteString getNormalizationRuleTsvBytes()
         Custom normalization rule file in TSV format.
         https://github.com/google/sentencepiece/blob/master/doc/normalization.md
         This field is only used in SentencePieceTrainer::Train() method, which
         compiles the rule into the binary rule stored in `precompiled_charsmap`.
         
        optional string normalization_rule_tsv = 6;
        Specified by:
        getNormalizationRuleTsvBytes in interface SentencepieceModel.NormalizerSpecOrBuilder
        Returns:
        The bytes for normalizationRuleTsv.
      • isInitialized

        public final boolean isInitialized()
        Specified by:
        isInitialized in interface com.google.protobuf.MessageLiteOrBuilder
        Overrides:
        isInitialized in class com.google.protobuf.GeneratedMessageV3.ExtendableMessage<SentencepieceModel.NormalizerSpec>
      • writeTo

        public void writeTo​(com.google.protobuf.CodedOutputStream output)
                     throws java.io.IOException
        Specified by:
        writeTo in interface com.google.protobuf.MessageLite
        Overrides:
        writeTo in class com.google.protobuf.GeneratedMessageV3
        Throws:
        java.io.IOException
      • getSerializedSize

        public int getSerializedSize()
        Specified by:
        getSerializedSize in interface com.google.protobuf.MessageLite
        Overrides:
        getSerializedSize in class com.google.protobuf.GeneratedMessageV3
      • equals

        public boolean equals​(java.lang.Object obj)
        Specified by:
        equals in interface com.google.protobuf.Message
        Overrides:
        equals in class com.google.protobuf.AbstractMessage
      • hashCode

        public int hashCode()
        Specified by:
        hashCode in interface com.google.protobuf.Message
        Overrides:
        hashCode in class com.google.protobuf.AbstractMessage
      • parseFrom

        public static SentencepieceModel.NormalizerSpec parseFrom​(java.nio.ByteBuffer data)
                                                           throws com.google.protobuf.InvalidProtocolBufferException
        Throws:
        com.google.protobuf.InvalidProtocolBufferException
      • parseFrom

        public static SentencepieceModel.NormalizerSpec parseFrom​(java.nio.ByteBuffer data,
                                                                  com.google.protobuf.ExtensionRegistryLite extensionRegistry)
                                                           throws com.google.protobuf.InvalidProtocolBufferException
        Throws:
        com.google.protobuf.InvalidProtocolBufferException
      • parseFrom

        public static SentencepieceModel.NormalizerSpec parseFrom​(com.google.protobuf.ByteString data)
                                                           throws com.google.protobuf.InvalidProtocolBufferException
        Throws:
        com.google.protobuf.InvalidProtocolBufferException
      • parseFrom

        public static SentencepieceModel.NormalizerSpec parseFrom​(com.google.protobuf.ByteString data,
                                                                  com.google.protobuf.ExtensionRegistryLite extensionRegistry)
                                                           throws com.google.protobuf.InvalidProtocolBufferException
        Throws:
        com.google.protobuf.InvalidProtocolBufferException
      • parseFrom

        public static SentencepieceModel.NormalizerSpec parseFrom​(byte[] data)
                                                           throws com.google.protobuf.InvalidProtocolBufferException
        Throws:
        com.google.protobuf.InvalidProtocolBufferException
      • parseFrom

        public static SentencepieceModel.NormalizerSpec parseFrom​(byte[] data,
                                                                  com.google.protobuf.ExtensionRegistryLite extensionRegistry)
                                                           throws com.google.protobuf.InvalidProtocolBufferException
        Throws:
        com.google.protobuf.InvalidProtocolBufferException
      • parseFrom

        public static SentencepieceModel.NormalizerSpec parseFrom​(java.io.InputStream input,
                                                                  com.google.protobuf.ExtensionRegistryLite extensionRegistry)
                                                           throws java.io.IOException
        Throws:
        java.io.IOException
      • parseDelimitedFrom

        public static SentencepieceModel.NormalizerSpec parseDelimitedFrom​(java.io.InputStream input)
                                                                    throws java.io.IOException
        Throws:
        java.io.IOException
      • parseDelimitedFrom

        public static SentencepieceModel.NormalizerSpec parseDelimitedFrom​(java.io.InputStream input,
                                                                           com.google.protobuf.ExtensionRegistryLite extensionRegistry)
                                                                    throws java.io.IOException
        Throws:
        java.io.IOException
      • parseFrom

        public static SentencepieceModel.NormalizerSpec parseFrom​(com.google.protobuf.CodedInputStream input)
                                                           throws java.io.IOException
        Throws:
        java.io.IOException
      • parseFrom

        public static SentencepieceModel.NormalizerSpec parseFrom​(com.google.protobuf.CodedInputStream input,
                                                                  com.google.protobuf.ExtensionRegistryLite extensionRegistry)
                                                           throws java.io.IOException
        Throws:
        java.io.IOException
      • newBuilderForType

        public SentencepieceModel.NormalizerSpec.Builder newBuilderForType()
        Specified by:
        newBuilderForType in interface com.google.protobuf.Message
        Specified by:
        newBuilderForType in interface com.google.protobuf.MessageLite
      • toBuilder

        public SentencepieceModel.NormalizerSpec.Builder toBuilder()
        Specified by:
        toBuilder in interface com.google.protobuf.Message
        Specified by:
        toBuilder in interface com.google.protobuf.MessageLite
      • newBuilderForType

        protected SentencepieceModel.NormalizerSpec.Builder newBuilderForType​(com.google.protobuf.GeneratedMessageV3.BuilderParent parent)
        Specified by:
        newBuilderForType in class com.google.protobuf.GeneratedMessageV3
      • getParserForType

        public com.google.protobuf.Parser<SentencepieceModel.NormalizerSpec> getParserForType()
        Specified by:
        getParserForType in interface com.google.protobuf.Message
        Specified by:
        getParserForType in interface com.google.protobuf.MessageLite
        Overrides:
        getParserForType in class com.google.protobuf.GeneratedMessageV3
      • getDefaultInstanceForType

        public SentencepieceModel.NormalizerSpec getDefaultInstanceForType()
        Specified by:
        getDefaultInstanceForType in interface com.google.protobuf.GeneratedMessageV3.ExtendableMessageOrBuilder<SentencepieceModel.NormalizerSpec>
        Specified by:
        getDefaultInstanceForType in interface com.google.protobuf.MessageLiteOrBuilder
        Specified by:
        getDefaultInstanceForType in interface com.google.protobuf.MessageOrBuilder