Package sentencepiece

Class SentencepieceModel.NormalizerSpec.Builder

    • Method Detail

      • getDescriptor

        public static final com.google.protobuf.Descriptors.Descriptor getDescriptor()
      • internalGetFieldAccessorTable

        protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable internalGetFieldAccessorTable()
        Specified by:
        internalGetFieldAccessorTable in class com.google.protobuf.GeneratedMessageV3.Builder<SentencepieceModel.NormalizerSpec.Builder>
      • getDescriptorForType

        public com.google.protobuf.Descriptors.Descriptor getDescriptorForType()
        Specified by:
        getDescriptorForType in interface com.google.protobuf.Message.Builder
        Specified by:
        getDescriptorForType in interface com.google.protobuf.MessageOrBuilder
        Overrides:
        getDescriptorForType in class com.google.protobuf.GeneratedMessageV3.Builder<SentencepieceModel.NormalizerSpec.Builder>
      • getDefaultInstanceForType

        public SentencepieceModel.NormalizerSpec getDefaultInstanceForType()
        Specified by:
        getDefaultInstanceForType in interface com.google.protobuf.GeneratedMessageV3.ExtendableMessageOrBuilder<SentencepieceModel.NormalizerSpec>
        Specified by:
        getDefaultInstanceForType in interface com.google.protobuf.MessageLiteOrBuilder
        Specified by:
        getDefaultInstanceForType in interface com.google.protobuf.MessageOrBuilder
      • build

        public SentencepieceModel.NormalizerSpec build()
        Specified by:
        build in interface com.google.protobuf.Message.Builder
        Specified by:
        build in interface com.google.protobuf.MessageLite.Builder
      • buildPartial

        public SentencepieceModel.NormalizerSpec buildPartial()
        Specified by:
        buildPartial in interface com.google.protobuf.Message.Builder
        Specified by:
        buildPartial in interface com.google.protobuf.MessageLite.Builder
      • mergeFrom

        public SentencepieceModel.NormalizerSpec.Builder mergeFrom​(com.google.protobuf.CodedInputStream input,
                                                                   com.google.protobuf.ExtensionRegistryLite extensionRegistry)
                                                            throws java.io.IOException
        Specified by:
        mergeFrom in interface com.google.protobuf.Message.Builder
        Specified by:
        mergeFrom in interface com.google.protobuf.MessageLite.Builder
        Overrides:
        mergeFrom in class com.google.protobuf.AbstractMessage.Builder<SentencepieceModel.NormalizerSpec.Builder>
        Throws:
        java.io.IOException
      • setName

        public SentencepieceModel.NormalizerSpec.Builder setName​(java.lang.String value)
         name of normalization rule.
         
        optional string name = 1;
        Parameters:
        value - The name to set.
        Returns:
        This builder for chaining.
      • setNameBytes

        public SentencepieceModel.NormalizerSpec.Builder setNameBytes​(com.google.protobuf.ByteString value)
         name of normalization rule.
         
        optional string name = 1;
        Parameters:
        value - The bytes for name to set.
        Returns:
        This builder for chaining.
      • hasPrecompiledCharsmap

        public boolean hasPrecompiledCharsmap()
         Pre-compiled normalization rule created by
         Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
         Usually this field is set by Builder::GetNormalizerSpec() method.
         
        optional bytes precompiled_charsmap = 2;
        Specified by:
        hasPrecompiledCharsmap in interface SentencepieceModel.NormalizerSpecOrBuilder
        Returns:
        Whether the precompiledCharsmap field is set.
      • getPrecompiledCharsmap

        public com.google.protobuf.ByteString getPrecompiledCharsmap()
         Pre-compiled normalization rule created by
         Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
         Usually this field is set by Builder::GetNormalizerSpec() method.
         
        optional bytes precompiled_charsmap = 2;
        Specified by:
        getPrecompiledCharsmap in interface SentencepieceModel.NormalizerSpecOrBuilder
        Returns:
        The precompiledCharsmap.
      • setPrecompiledCharsmap

        public SentencepieceModel.NormalizerSpec.Builder setPrecompiledCharsmap​(com.google.protobuf.ByteString value)
         Pre-compiled normalization rule created by
         Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
         Usually this field is set by Builder::GetNormalizerSpec() method.
         
        optional bytes precompiled_charsmap = 2;
        Parameters:
        value - The precompiledCharsmap to set.
        Returns:
        This builder for chaining.
      • clearPrecompiledCharsmap

        public SentencepieceModel.NormalizerSpec.Builder clearPrecompiledCharsmap()
         Pre-compiled normalization rule created by
         Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
         Usually this field is set by Builder::GetNormalizerSpec() method.
         
        optional bytes precompiled_charsmap = 2;
        Returns:
        This builder for chaining.
      • hasAddDummyPrefix

        public boolean hasAddDummyPrefix()
         Adds dummy whitespace at the beginning of text in order to
         treat "world" in "world" and "hello world" in the same way.
         
        optional bool add_dummy_prefix = 3 [default = true];
        Specified by:
        hasAddDummyPrefix in interface SentencepieceModel.NormalizerSpecOrBuilder
        Returns:
        Whether the addDummyPrefix field is set.
      • getAddDummyPrefix

        public boolean getAddDummyPrefix()
         Adds dummy whitespace at the beginning of text in order to
         treat "world" in "world" and "hello world" in the same way.
         
        optional bool add_dummy_prefix = 3 [default = true];
        Specified by:
        getAddDummyPrefix in interface SentencepieceModel.NormalizerSpecOrBuilder
        Returns:
        The addDummyPrefix.
      • setAddDummyPrefix

        public SentencepieceModel.NormalizerSpec.Builder setAddDummyPrefix​(boolean value)
         Adds dummy whitespace at the beginning of text in order to
         treat "world" in "world" and "hello world" in the same way.
         
        optional bool add_dummy_prefix = 3 [default = true];
        Parameters:
        value - The addDummyPrefix to set.
        Returns:
        This builder for chaining.
      • clearAddDummyPrefix

        public SentencepieceModel.NormalizerSpec.Builder clearAddDummyPrefix()
         Adds dummy whitespace at the beginning of text in order to
         treat "world" in "world" and "hello world" in the same way.
         
        optional bool add_dummy_prefix = 3 [default = true];
        Returns:
        This builder for chaining.
      • hasRemoveExtraWhitespaces

        public boolean hasRemoveExtraWhitespaces()
         Removes leading, trailing, and duplicate internal whitespace.
         
        optional bool remove_extra_whitespaces = 4 [default = true];
        Specified by:
        hasRemoveExtraWhitespaces in interface SentencepieceModel.NormalizerSpecOrBuilder
        Returns:
        Whether the removeExtraWhitespaces field is set.
      • setRemoveExtraWhitespaces

        public SentencepieceModel.NormalizerSpec.Builder setRemoveExtraWhitespaces​(boolean value)
         Removes leading, trailing, and duplicate internal whitespace.
         
        optional bool remove_extra_whitespaces = 4 [default = true];
        Parameters:
        value - The removeExtraWhitespaces to set.
        Returns:
        This builder for chaining.
      • clearRemoveExtraWhitespaces

        public SentencepieceModel.NormalizerSpec.Builder clearRemoveExtraWhitespaces()
         Removes leading, trailing, and duplicate internal whitespace.
         
        optional bool remove_extra_whitespaces = 4 [default = true];
        Returns:
        This builder for chaining.
      • hasEscapeWhitespaces

        public boolean hasEscapeWhitespaces()
         Replaces whitespace with meta symbol.
         This field must be true to train sentence piece model.
         
        optional bool escape_whitespaces = 5 [default = true];
        Specified by:
        hasEscapeWhitespaces in interface SentencepieceModel.NormalizerSpecOrBuilder
        Returns:
        Whether the escapeWhitespaces field is set.
      • getEscapeWhitespaces

        public boolean getEscapeWhitespaces()
         Replaces whitespace with meta symbol.
         This field must be true to train sentence piece model.
         
        optional bool escape_whitespaces = 5 [default = true];
        Specified by:
        getEscapeWhitespaces in interface SentencepieceModel.NormalizerSpecOrBuilder
        Returns:
        The escapeWhitespaces.
      • setEscapeWhitespaces

        public SentencepieceModel.NormalizerSpec.Builder setEscapeWhitespaces​(boolean value)
         Replaces whitespace with meta symbol.
         This field must be true to train sentence piece model.
         
        optional bool escape_whitespaces = 5 [default = true];
        Parameters:
        value - The escapeWhitespaces to set.
        Returns:
        This builder for chaining.
      • clearEscapeWhitespaces

        public SentencepieceModel.NormalizerSpec.Builder clearEscapeWhitespaces()
         Replaces whitespace with meta symbol.
         This field must be true to train sentence piece model.
         
        optional bool escape_whitespaces = 5 [default = true];
        Returns:
        This builder for chaining.
      • hasNormalizationRuleTsv

        public boolean hasNormalizationRuleTsv()
         Custom normalization rule file in TSV format.
         https://github.com/google/sentencepiece/blob/master/doc/normalization.md
         This field is only used in SentencePieceTrainer::Train() method, which
         compiles the rule into the binary rule stored in `precompiled_charsmap`.
         
        optional string normalization_rule_tsv = 6;
        Specified by:
        hasNormalizationRuleTsv in interface SentencepieceModel.NormalizerSpecOrBuilder
        Returns:
        Whether the normalizationRuleTsv field is set.
      • getNormalizationRuleTsv

        public java.lang.String getNormalizationRuleTsv()
         Custom normalization rule file in TSV format.
         https://github.com/google/sentencepiece/blob/master/doc/normalization.md
         This field is only used in SentencePieceTrainer::Train() method, which
         compiles the rule into the binary rule stored in `precompiled_charsmap`.
         
        optional string normalization_rule_tsv = 6;
        Specified by:
        getNormalizationRuleTsv in interface SentencepieceModel.NormalizerSpecOrBuilder
        Returns:
        The normalizationRuleTsv.
      • getNormalizationRuleTsvBytes

        public com.google.protobuf.ByteString getNormalizationRuleTsvBytes()
         Custom normalization rule file in TSV format.
         https://github.com/google/sentencepiece/blob/master/doc/normalization.md
         This field is only used in SentencePieceTrainer::Train() method, which
         compiles the rule into the binary rule stored in `precompiled_charsmap`.
         
        optional string normalization_rule_tsv = 6;
        Specified by:
        getNormalizationRuleTsvBytes in interface SentencepieceModel.NormalizerSpecOrBuilder
        Returns:
        The bytes for normalizationRuleTsv.
      • setNormalizationRuleTsv

        public SentencepieceModel.NormalizerSpec.Builder setNormalizationRuleTsv​(java.lang.String value)
         Custom normalization rule file in TSV format.
         https://github.com/google/sentencepiece/blob/master/doc/normalization.md
         This field is only used in SentencePieceTrainer::Train() method, which
         compiles the rule into the binary rule stored in `precompiled_charsmap`.
         
        optional string normalization_rule_tsv = 6;
        Parameters:
        value - The normalizationRuleTsv to set.
        Returns:
        This builder for chaining.
      • clearNormalizationRuleTsv

        public SentencepieceModel.NormalizerSpec.Builder clearNormalizationRuleTsv()
         Custom normalization rule file in TSV format.
         https://github.com/google/sentencepiece/blob/master/doc/normalization.md
         This field is only used in SentencePieceTrainer::Train() method, which
         compiles the rule into the binary rule stored in `precompiled_charsmap`.
         
        optional string normalization_rule_tsv = 6;
        Returns:
        This builder for chaining.
      • setNormalizationRuleTsvBytes

        public SentencepieceModel.NormalizerSpec.Builder setNormalizationRuleTsvBytes​(com.google.protobuf.ByteString value)
         Custom normalization rule file in TSV format.
         https://github.com/google/sentencepiece/blob/master/doc/normalization.md
         This field is only used in SentencePieceTrainer::Train() method, which
         compiles the rule into the binary rule stored in `precompiled_charsmap`.
         
        optional string normalization_rule_tsv = 6;
        Parameters:
        value - The bytes for normalizationRuleTsv to set.
        Returns:
        This builder for chaining.