public final class StandardTokenizerImpl extends java.lang.Object implements StandardTokenizerInterface
Modifier and Type | Field and Description |
---|---|
static int |
HANGUL_TYPE |
static int |
HIRAGANA_TYPE |
static int |
IDEOGRAPHIC_TYPE |
static int |
KATAKANA_TYPE |
static int |
NUMERIC_TYPE
Numbers
|
static int |
SOUTH_EAST_ASIAN_TYPE
Chars in class \p{Line_Break = Complex_Context} are from South East Asian
scripts (Thai, Lao, Myanmar, Khmer, etc.).
|
static int |
WORD_TYPE
Alphanumeric sequences
|
static int |
YYEOF
This character denotes the end of file
|
static int |
YYINITIAL
lexical states
|
Constructor and Description |
---|
StandardTokenizerImpl(java.io.Reader in)
Creates a new scanner
|
Modifier and Type | Method and Description |
---|---|
char[] |
getArray() |
byte[] |
getBytes() |
int |
getNextToken()
Resumes scanning until the next regular expression is matched,
the end of input is encountered or an I/O-Error occurs.
|
java.lang.String |
getText() |
void |
yybegin(int newState)
Enters a new lexical state
|
int |
yychar()
Returns the current position.
|
char |
yycharat(int pos)
Returns the character at position pos from the
matched text.
|
void |
yyclose()
Closes the input stream.
|
int |
yylength()
Returns the length of the matched text region.
|
void |
yypushback(int number)
Pushes the specified amount of characters back into the input stream.
|
void |
yyreset(java.io.Reader reader)
Resets the scanner to read from a new input stream.
|
int |
yystate()
Returns the current lexical state.
|
java.lang.String |
yytext()
Returns the text matched by the current regular expression.
|
public static final int YYEOF
public static final int YYINITIAL
public static final int WORD_TYPE
public static final int NUMERIC_TYPE
public static final int SOUTH_EAST_ASIAN_TYPE
See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
public static final int IDEOGRAPHIC_TYPE
public static final int HIRAGANA_TYPE
public static final int KATAKANA_TYPE
public static final int HANGUL_TYPE
public StandardTokenizerImpl(java.io.Reader in)
in
- the java.io.Reader to read input from.public final int yychar()
StandardTokenizerInterface
yychar
in interface StandardTokenizerInterface
public java.lang.String getText()
getText
in interface StandardTokenizerInterface
public char[] getArray()
getArray
in interface StandardTokenizerInterface
public byte[] getBytes()
getBytes
in interface StandardTokenizerInterface
public final void yyclose() throws java.io.IOException
java.io.IOException
public final void yyreset(java.io.Reader reader)
yyreset
in interface StandardTokenizerInterface
reader
- the new input streampublic final int yystate()
public final void yybegin(int newState)
newState
- the new lexical statepublic final java.lang.String yytext()
public final char yycharat(int pos)
pos
- the position of the character to fetch.
A value from 0 to yylength()-1.public final int yylength()
yylength
in interface StandardTokenizerInterface
public void yypushback(int number)
number
- the number of characters to be read again.
This number must not be greater than yylength()!public int getNextToken() throws java.io.IOException
getNextToken
in interface StandardTokenizerInterface
java.io.IOException
- if any I/O-Error occursCopyright © 2017 The Apache Software Foundation