Package org.jruby.lexer
Class LexingCommon
java.lang.Object
org.jruby.lexer.LexingCommon
Code and constants common to both ripper and main parser.
-
Field Summary
FieldsModifier and TypeFieldDescriptionprotected booleanstatic final ByteListstatic final ByteListstatic final ByteListstatic final ByteListstatic final org.jcodings.Encodingstatic final ByteListstatic final ByteListstatic final ByteListstatic final ByteListstatic final ByteListstatic ByteListprotected intstatic final ByteListstatic final ByteListprotected StackStatestatic ByteListstatic final ByteListstatic final ByteListstatic final ByteListbooleanprotected StackStatestatic final ByteListstatic final ByteListstatic final ByteListstatic final ByteListstatic final ByteListstatic final ByteListlongstatic ByteListstatic ByteListstatic final intbooleanstatic final ByteListstatic final ByteListstatic final ByteListstatic final ByteListstatic final ByteListstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final ByteListstatic final ByteListstatic final ByteListprotected booleanprotected intprotected intprotected intstatic final ByteListprotected intprotected intstatic final ByteListstatic final ByteListstatic final ByteListstatic final ByteListprotected ByteListintprotected intintprotected intprotected intprotected intstatic final ByteListstatic final ByteListstatic final ByteListstatic final ByteListstatic final ByteListstatic final ByteListstatic final ByteListstatic final ByteListstatic final ByteListstatic final ByteListstatic final ByteListprotected intstatic final ByteListstatic final ByteListstatic final ByteListstatic final ByteListstatic final ByteListstatic final ByteListstatic final ByteListstatic final ByteListstatic final ByteListprotected intstatic final ByteListstatic final ByteListprotected LexerSourcestatic final ByteListstatic final ByteListlongstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final intstatic final ByteListprotected intprotected booleanintintstatic final org.jcodings.Encodingstatic final org.jcodings.Encodingprotected Object -
Constructor Summary
Constructors -
Method Summary
Modifier and TypeMethodDescriptionprotected abstract voidambiguousOperator(String op, String syn) voidcheckRegexpFragment(Ruby runtime, ByteList value, RegexpOptions options) voidcheckRegexpSyntax(Ruby runtime, ByteList value, RegexpOptions options) intcolumn()protected booleanabstract voidcompile_error(String message) voidcompile_error_pos(String message) createAsEncodedString(byte[] bytes, int start, int length) createTokenByteList(int start) createTokenString(int start) static intdedent_string(ByteList string, int width) voidflush()intorg.jcodings.EncodinggetFile()intintintintintintintgetState()intid()intprotected booleanprotected booleanIS_ARG()protected booleanIS_BEG()protected booleanIS_END()protected booleanIS_LABEL_POSSIBLE(boolean commandState) booleanstatic booleanIS_lex_state(int state, int mask) protected booleanIS_lex_state_all(int state, int mask) protected booleanIS_SPCARG(int c, boolean spaceSeen) booleanisASCII()static booleanisASCII(int c) booleanbooleanisGlobalCharPunct(int c) static booleanisHexChar(int c) static booleanisIdentifierChar(int c) This is a valid character for an identifier?protected booleanbooleanprotected booleanstatic booleanisOctChar(int c) static booleanisSpace(int c) protected static booleanISSPACE(int c) voidintlineno()protected voidmagicCommentEncoding(ByteList encoding) static intmagicCommentMarker(ByteList str, int begin) protected abstract voidmismatchedRegexpEncodingError(org.jcodings.Encoding optionEncoding, org.jcodings.Encoding encoding) voidnewtok(boolean unreadOnce) abstract intnextc()protected intnumberLiteralSuffix(int mask) protected booleanonMagicComment(String name, ByteList value) protected charoptionsEncodingChar(org.jcodings.Encoding optionEncoding) intp(int offset) abstract voidparse_error(String message) booleanparser_magic_comment(ByteList magicLine) voidprotected abstract RegexpOptionsprotected RegexpOptionsparseRegexpFlags(StringBuilder unknownFlags) booleanpeek(int c) protected booleanpeek(int c, int n) intpeekVariableName(int tSTRING_DVAR, int tSTRING_DBEG) intvoidvoidpushback(int c) intprotected voidreadUTF8EscapeIntoBuffer(int codepoint, ByteList buffer, boolean stringLiteral, boolean[] encodingDetermined) intreadUTFEscape(ByteList buffer, boolean stringLiteral, boolean[] encodingDetermined) voidreadUTFEscapeRegexpLiteral(ByteList buffer) voidreset()voidprotected intRead up to count hexadecimal digits.protected charscanHexLiteral(ByteList buffer, int count, boolean strict, String errorMessage) Read up to count hexadecimal digits and store those digits in a token numberBuffer.protected charscanOct(int count) protected voidset_file_encoding(int str, int send) protected voidprotected voidset_yylval_name(ByteList name) voidsetBraceNest(int nest) protected abstract voidsetCompileOptionFlag(String name, ByteList value) voidsetCurrentArg(ByteList current_arg) voidsetCurrentEncoding(org.jcodings.Encoding encoding) voidsetEncoding(org.jcodings.Encoding encoding) protected abstract voidsetEncoding(ByteList name) voidsetHeredocIndent(int heredoc_indent) voidsetHeredocLineIndent(int heredoc_line_indent) voidsetLeftParenBegin(int value) voidsetLexContext(LexContext context) voidsetRegexpEncoding(Ruby runtime, ByteList value, RegexpOptions options) voidsetRubySourceline(int line) voidsetSource(LexerSource source) Allow the parser to set the source for its lexer.voidsetState(int state) protected abstract voidsetTokenInfo(String name, ByteList value) voidprotected booleanvoidbooleantokadd_ident(int c) booleantokadd_mbchar(int first_byte) This differs from MRI in a few ways.booleantokadd_mbchar(int first_byte, ByteList buffer) voidThis looks deceptively like tokadd_mbchar(int, ByteList) but it differs in that it uses the bytelists encoding and the first parameter is a full codepoint and not the first byte of a mbc sequence.voidinttoken()Last token read from the lexer at the end of a call to yylex()abstract inttokenize_ident(int result) protected intbooleanupdate_heredoc_indent(int c) protected voidupdateStartPosition(int column) protected voidvoidvalidateFormalIdentifier(String identifier) Deprecated.voidvalidateFormalIdentifier(ByteList identifier) value()Value of last token (if it is a token which has a value).protected intwarn_balanced(int c, boolean spaceSeen, int token, String op, String syn) booleanwas_bol()booleanwhole_match_p(ByteList eos, boolean indent)
-
Field Details
-
EXPR_BEG
public static final int EXPR_BEG- See Also:
-
EXPR_END
public static final int EXPR_END- See Also:
-
EXPR_ENDARG
public static final int EXPR_ENDARG- See Also:
-
EXPR_ENDFN
public static final int EXPR_ENDFN- See Also:
-
EXPR_ARG
public static final int EXPR_ARG- See Also:
-
EXPR_CMDARG
public static final int EXPR_CMDARG- See Also:
-
EXPR_MID
public static final int EXPR_MID- See Also:
-
EXPR_FNAME
public static final int EXPR_FNAME- See Also:
-
EXPR_DOT
public static final int EXPR_DOT- See Also:
-
EXPR_CLASS
public static final int EXPR_CLASS- See Also:
-
EXPR_LABEL
public static final int EXPR_LABEL- See Also:
-
EXPR_LABELED
public static final int EXPR_LABELED- See Also:
-
EXPR_FITEM
public static final int EXPR_FITEM- See Also:
-
EXPR_VALUE
public static final int EXPR_VALUE- See Also:
-
EXPR_BEG_ANY
public static final int EXPR_BEG_ANY- See Also:
-
EXPR_ARG_ANY
public static final int EXPR_ARG_ANY- See Also:
-
EXPR_END_ANY
public static final int EXPR_END_ANY- See Also:
-
braceNest
protected int braceNest -
commandStart
public boolean commandStart -
conditionState
-
cmdArgumentState
-
__end__seen
protected boolean __end__seen -
eofp
public boolean eofp -
has_shebang
protected boolean has_shebang -
heredoc_end
protected int heredoc_end -
heredoc_indent
protected int heredoc_indent -
heredoc_line_indent
protected int heredoc_line_indent -
last_cr_line
protected int last_cr_line -
last_state
protected int last_state -
lexb
-
lex_lastline
-
lex_nextline
-
lex_p
public int lex_p -
lex_pbeg
protected int lex_pbeg -
lex_pend
public int lex_pend -
lex_state
protected int lex_state -
line_count
protected int line_count -
line_offset
protected int line_offset -
parenNest
protected int parenNest -
ruby_sourceline
protected int ruby_sourceline -
src
-
token
protected int token -
tokenSeen
protected boolean tokenSeen -
tokline
public int tokline -
tokp
public int tokp -
yaccValue
-
start
public long start -
end
public long end -
AND_KEYWORD
-
BACKTICK
-
EQ_EQ_EQ
-
EQ_EQ
-
EQ_TILDE
-
EQ_GT
-
EQ
-
AMPERSAND_AMPERSAND
-
AMPERSAND
-
AMPERSAND_DOT
-
BANG
-
BANG_EQ
-
BANG_TILDE
-
CARET
-
COLON_COLON
-
COLON
-
COMMA
-
DOT_DOT_DOT
-
DOT_DOT
-
DOT
-
GT_EQ
-
GT_GT
-
GT
-
LBRACKET_RBRACKET_EQ
-
LBRACKET_RBRACKET
-
LBRACKET
-
LCURLY
-
LT_EQ_RT
-
LT_EQ
-
LT_LT
-
LT
-
MINUS_AT
-
MINUS
-
MINUS_GT
-
NIL
-
PERCENT
-
OR_OR
-
OR
-
OR_KEYWORD
-
PLUS_AT
-
PLUS
-
QUESTION
-
RBRACKET
-
RCURLY
-
RPAREN
-
Q
-
SLASH
-
STAR
-
STAR_STAR
-
TILDE
-
QQ
-
SEMICOLON
-
BACKSLASH
-
CALL
-
DOLLAR_BANG
-
DOLLAR_UNDERSCORE
-
DOLLAR_DOT
-
KWNOREST
-
TAB_WIDTH
public static final int TAB_WIDTH- See Also:
-
STR_FUNC_ESCAPE
public static final int STR_FUNC_ESCAPE- See Also:
-
STR_FUNC_EXPAND
public static final int STR_FUNC_EXPAND- See Also:
-
STR_FUNC_REGEXP
public static final int STR_FUNC_REGEXP- See Also:
-
STR_FUNC_QWORDS
public static final int STR_FUNC_QWORDS- See Also:
-
STR_FUNC_SYMBOL
public static final int STR_FUNC_SYMBOL- See Also:
-
STR_FUNC_INDENT
public static final int STR_FUNC_INDENT- See Also:
-
STR_FUNC_LABEL
public static final int STR_FUNC_LABEL- See Also:
-
STR_FUNC_LIST
public static final int STR_FUNC_LIST- See Also:
-
STR_FUNC_TERM
public static final int STR_FUNC_TERM- See Also:
-
str_label
public static final int str_label- See Also:
-
str_squote
public static final int str_squote- See Also:
-
str_dquote
public static final int str_dquote- See Also:
-
str_xquote
public static final int str_xquote- See Also:
-
str_regexp
public static final int str_regexp- See Also:
-
str_sword
public static final int str_sword- See Also:
-
str_dword
public static final int str_dword- See Also:
-
str_ssym
public static final int str_ssym- See Also:
-
str_dsym
public static final int str_dsym- See Also:
-
EOF
public static final int EOF- See Also:
-
END_MARKER
-
BEGIN_DOC_MARKER
-
END_DOC_MARKER
-
CODING
-
UTF8_ENCODING
public static final org.jcodings.Encoding UTF8_ENCODING -
USASCII_ENCODING
public static final org.jcodings.Encoding USASCII_ENCODING -
ASCII8BIT_ENCODING
public static final org.jcodings.Encoding ASCII8BIT_ENCODING -
SUFFIX_R
public static final int SUFFIX_R- See Also:
-
SUFFIX_I
public static final int SUFFIX_I- See Also:
-
SUFFIX_ALL
public static final int SUFFIX_ALL- See Also:
-
-
Constructor Details
-
LexingCommon
-
-
Method Details
-
column
public int column() -
set_yylval_id
-
set_yylval_name
-
id
-
updateTokenPosition
protected void updateTokenPosition() -
updateStartPosition
protected void updateStartPosition(int column) -
compile_error_pos
-
comment_at_top
protected boolean comment_at_top() -
getRubySourceline
public int getRubySourceline() -
setRubySourceline
public void setRubySourceline(int line) -
createTokenByteList
-
createTokenByteList
-
createTokenString
-
createAsEncodedString
-
createTokenString
-
dedent_string
-
flush
public void flush() -
getLexContext
-
setLexContext
-
getBraceNest
public int getBraceNest() -
getCmdArgumentState
-
getConditionState
-
getCurrentArg
-
getCurrentLine
-
getEncoding
public org.jcodings.Encoding getEncoding() -
getFile
-
getHeredocIndent
public int getHeredocIndent() -
getHeredocLineIndent
public int getHeredocLineIndent() -
getLeftParenBegin
public int getLeftParenBegin() -
isLambdaBeginning
protected boolean isLambdaBeginning() -
getLineOffset
public int getLineOffset() -
getState
public int getState() -
getTokenCR
public int getTokenCR() -
getParenNest
public int getParenNest() -
incrementParenNest
public int incrementParenNest() -
isEndSeen
public boolean isEndSeen() -
isLookingAtEOL
public boolean isLookingAtEOL() -
isASCII
public boolean isASCII() -
isASCII
public static boolean isASCII(int c) -
peekVariableName
- Throws:
IOException
-
isGlobalCharPunct
public boolean isGlobalCharPunct(int c) -
isIdentifierChar
public static boolean isIdentifierChar(int c) This is a valid character for an identifier?- Parameters:
c- is character to be compared- Returns:
- whether c is an identifier or not mri: is_identchar
-
lex_goto_eol
public void lex_goto_eol() -
lineno
public int lineno() -
magicCommentEncoding
-
newtok
public void newtok(boolean unreadOnce) -
numberLiteralSuffix
protected int numberLiteralSuffix(int mask) -
parser_prepare
public void parser_prepare() -
p
public int p(int offset) -
peek
public boolean peek(int c) -
peek
protected boolean peek(int c, int n) -
precise_mbclen
public int precise_mbclen() -
printState
public void printState() -
pushback
public void pushback(int c) -
reset
public void reset() -
resetStacks
public void resetStacks() -
scanOct
- Throws:
IOException
-
setCurrentArg
-
setCurrentEncoding
public void setCurrentEncoding(org.jcodings.Encoding encoding) -
setEncoding
public void setEncoding(org.jcodings.Encoding encoding) -
set_file_encoding
protected void set_file_encoding(int str, int send) -
setHeredocLineIndent
public void setHeredocLineIndent(int heredoc_line_indent) -
setHeredocIndent
public void setHeredocIndent(int heredoc_indent) -
setBraceNest
public void setBraceNest(int nest) -
setLeftParenBegin
public void setLeftParenBegin(int value) -
setSource
Allow the parser to set the source for its lexer.- Parameters:
source- where the lexer gets raw data
-
setState
public void setState(int state) -
setValue
-
strncmp
-
tokAdd
-
tokCopy
-
tokadd_ident
public boolean tokadd_ident(int c) -
tokadd_mbchar
public boolean tokadd_mbchar(int first_byte) This differs from MRI in a few ways. This version does not apply value to a separate token buffer. It is for use when we know we will not be omitting or including ant non-syntactical characters. Use tokadd_mbchar(int, ByteList) if the string differs from actual source. Secondly, this returns a boolean instead of the first byte passed. MRI only used the return value as a success/failure code to return EOF. Because this version does not use a separate token buffer we only just increment lex_p. When we reach end of the token it will just get the bytes directly from source directly. -
tokadd_mbchar
-
tokaddmbc
This looks deceptively like tokadd_mbchar(int, ByteList) but it differs in that it uses the bytelists encoding and the first parameter is a full codepoint and not the first byte of a mbc sequence. -
token
public int token()Last token read from the lexer at the end of a call to yylex()- Returns:
- last token read
-
update_heredoc_indent
public boolean update_heredoc_indent(int c) -
validateFormalIdentifier
-
validateFormalIdentifier
Deprecated. -
value
Value of last token (if it is a token which has a value).- Returns:
- value of last value-laden token
-
warn_balanced
-
was_bol
public boolean was_bol() -
whole_match_p
-
ambiguousOperator
-
compile_error
-
parse_error
-
nextc
public abstract int nextc() -
setCompileOptionFlag
-
setEncoding
-
setTokenInfo
-
tokenize_ident
public abstract int tokenize_ident(int result) -
isHexChar
public static boolean isHexChar(int c) - Parameters:
c- the character to test- Returns:
- true if character is a hex value (0-9a-f)
-
IS_lex_state
public static boolean IS_lex_state(int state, int mask) -
IS_lex_state_all
protected boolean IS_lex_state_all(int state, int mask) -
ISSPACE
protected static boolean ISSPACE(int c) -
IS_ARG
protected boolean IS_ARG() -
IS_END
protected boolean IS_END() -
IS_BEG
protected boolean IS_BEG() -
IS_SPCARG
protected boolean IS_SPCARG(int c, boolean spaceSeen) -
IS_LABEL_POSSIBLE
protected boolean IS_LABEL_POSSIBLE(boolean commandState) -
IS_LABEL_SUFFIX
public boolean IS_LABEL_SUFFIX() -
IS_AFTER_OPERATOR
protected boolean IS_AFTER_OPERATOR() -
isNext_identchar
- Throws:
IOException
-
isOctChar
public static boolean isOctChar(int c) - Parameters:
c- the character to test- Returns:
- true if character is an octal value (0-7)
-
isSpace
public static boolean isSpace(int c) -
magicCommentMarker
-
parser_magic_comment
-
onMagicComment
-
parseRegexpFlags
- Throws:
IOException
-
parseRegexpFlags
- Throws:
IOException
-
checkRegexpFragment
-
checkRegexpSyntax
-
mismatchedRegexpEncodingError
protected abstract void mismatchedRegexpEncodingError(org.jcodings.Encoding optionEncoding, org.jcodings.Encoding encoding) -
setRegexpEncoding
-
optionsEncodingChar
protected char optionsEncodingChar(org.jcodings.Encoding optionEncoding) -
scanHex
Read up to count hexadecimal digits. If strict is provided then count number of hex digits must be present. If no digits can be read a syntax exception will be thrown. -
readEscape
- Throws:
IOException
-
scanHexLiteral
Read up to count hexadecimal digits and store those digits in a token numberBuffer. If strict is provided then count number of hex digits must be present. If no digits can be read a syntax exception will be thrown. This will also return the codepoint as a value so codepoint ranges can be checked. -
tokHex
-
readUTF8EscapeIntoBuffer
protected void readUTF8EscapeIntoBuffer(int codepoint, ByteList buffer, boolean stringLiteral, boolean[] encodingDetermined) throws IOException - Throws:
IOException
-
readUTFEscape
public int readUTFEscape(ByteList buffer, boolean stringLiteral, boolean[] encodingDetermined) throws IOException - Throws:
IOException
-
readUTFEscapeRegexpLiteral
-