001    /* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
002     *
003     * The contents of this file are subject to the Netscape Public
004     * License Version 1.1 (the "License"); you may not use this file
005     * except in compliance with the License. You may obtain a copy of
006     * the License at http://www.mozilla.org/NPL/
007     *
008     * Software distributed under the License is distributed on an "AS
009     * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
010     * implied. See the License for the specific language governing
011     * rights and limitations under the License.
012     *
013     * The Original Code is Rhino code, released
014     * May 6, 1999.
015     *
016     * The Initial Developer of the Original Code is Netscape
017     * Communications Corporation.  Portions created by Netscape are
018     * Copyright (C) 1997-1999 Netscape Communications Corporation. All
019     * Rights Reserved.
020     *
021     * Contributor(s):
022     * Roger Lawrence
023     * Mike McCabe
024     *
025     * Alternatively, the contents of this file may be used under the
026     * terms of the GNU Public License (the "GPL"), in which case the
027     * provisions of the GPL are applicable instead of those above.
028     * If you wish to allow use of your version of this file only
029     * under the terms of the GPL and not to allow others to use your
030     * version of this file under the NPL, indicate your decision by
031     * deleting the provisions above and replace them with the notice
032     * and other provisions required by the GPL.  If you do not delete
033     * the provisions above, a recipient may use your version of this
034     * file under either the NPL or the GPL.
035     */
036    // Modified by Google
037    
038    package com.google.gwt.dev.js.rhino;
039    
040    import java.io.*;
041    import java.util.HashMap;
042    import java.util.Map;
043    
044    /**
045     * This class implements the JavaScript scanner.
046     *
047     * It is based on the C source files jsscan.c and jsscan.h
048     * in the jsref package.
049     */
050    
051    public class TokenStream {
052    
053        private static final Map<String, Integer> KEYWORDS = new HashMap<String, Integer>();
054    
055        /*
056         * JSTokenStream flags, mirroring those in jsscan.h.  These are used
057         * by the parser to change/check the state of the scanner.
058         */
059    
060        final static int
061            TSF_NEWLINES    = 1 << 0,  // tokenize newlines
062            TSF_FUNCTION    = 1 << 1,  // scanning inside function body
063            TSF_RETURN_EXPR = 1 << 2,  // function has 'return expr;'
064            TSF_RETURN_VOID = 1 << 3,  // function has 'return;'
065            TSF_REGEXP      = 1 << 4,  // looking for a regular expression
066            TSF_DIRTYLINE   = 1 << 5;  // stuff other than whitespace since
067                                       // start of line
068    
069        /*
070         * For chars - because we need something out-of-range
071         * to check.  (And checking EOF by exception is annoying.)
072         * Note distinction from EOF token type!
073         */
074        private final static int
075            EOF_CHAR = -1;
076    
077        /**
078         * Token types.  These values correspond to JSTokenType values in
079         * jsscan.c.
080         */
081    
082        public final static int
083        // start enum
084            ERROR       = -1, // well-known as the only code < EOF
085            EOF         = 0,  // end of file token - (not EOF_CHAR)
086            EOL         = 1,  // end of line
087            // Beginning here are interpreter bytecodes. Their values
088            // must not exceed 127.
089            POPV        = 2,
090            ENTERWITH   = 3,
091            LEAVEWITH   = 4,
092            RETURN      = 5,
093            GOTO        = 6,
094            IFEQ        = 7,
095            IFNE        = 8,
096            DUP         = 9,
097            SETNAME     = 10,
098            BITOR       = 11,
099            BITXOR      = 12,
100            BITAND      = 13,
101            EQ          = 14,
102            NE          = 15,
103            LT          = 16,
104            LE          = 17,
105            GT          = 18,
106            GE          = 19,
107            LSH         = 20,
108            RSH         = 21,
109            URSH        = 22,
110            ADD         = 23,
111            SUB         = 24,
112            MUL         = 25,
113            DIV         = 26,
114            MOD         = 27,
115            BITNOT      = 28,
116            NEG         = 29,
117            NEW         = 30,
118            DELPROP     = 31,
119            TYPEOF      = 32,
120            NAMEINC     = 33,
121            PROPINC     = 34,
122            ELEMINC     = 35,
123            NAMEDEC     = 36,
124            PROPDEC     = 37,
125            ELEMDEC     = 38,
126            GETPROP     = 39,
127            SETPROP     = 40,
128            GETELEM     = 41,
129            SETELEM     = 42,
130            CALL        = 43,
131            NAME        = 44,
132            NUMBER      = 45,
133            STRING      = 46,
134            ZERO        = 47,
135            ONE         = 48,
136            NULL        = 49,
137            THIS        = 50,
138            FALSE       = 51,
139            TRUE        = 52,
140            SHEQ        = 53,   // shallow equality (===)
141            SHNE        = 54,   // shallow inequality (!==)
142            CLOSURE     = 55,
143            REGEXP      = 56,
144            POP         = 57,
145            POS         = 58,
146            VARINC      = 59,
147            VARDEC      = 60,
148            BINDNAME    = 61,
149            THROW       = 62,
150            IN          = 63,
151            INSTANCEOF  = 64,
152            GOSUB       = 65,
153            RETSUB      = 66,
154            CALLSPECIAL = 67,
155            GETTHIS     = 68,
156            NEWTEMP     = 69,
157            USETEMP     = 70,
158            GETBASE     = 71,
159            GETVAR      = 72,
160            SETVAR      = 73,
161            UNDEFINED   = 74,
162            TRY         = 75,
163            ENDTRY      = 76,
164            NEWSCOPE    = 77,
165            TYPEOFNAME  = 78,
166            ENUMINIT    = 79,
167            ENUMNEXT    = 80,
168            GETPROTO    = 81,
169            GETPARENT   = 82,
170            SETPROTO    = 83,
171            SETPARENT   = 84,
172            SCOPE       = 85,
173            GETSCOPEPARENT = 86,
174            THISFN      = 87,
175            JTHROW      = 88,
176            // End of interpreter bytecodes
177            SEMI        = 89,  // semicolon
178            LB          = 90,  // left and right brackets
179            RB          = 91,
180            LC          = 92,  // left and right curlies (braces)
181            RC          = 93,
182            LP          = 94,  // left and right parentheses
183            GWT          = 95,
184            COMMA       = 96,  // comma operator
185            ASSIGN      = 97, // assignment ops (= += -= etc.)
186            HOOK        = 98, // conditional (?:)
187            COLON       = 99,
188            OR          = 100, // logical or (||)
189            AND         = 101, // logical and (&&)
190            EQOP        = 102, // equality ops (== !=)
191            RELOP       = 103, // relational ops (< <= > >=)
192            SHOP        = 104, // shift ops (<< >> >>>)
193            UNARYOP     = 105, // unary prefix operator
194            INC         = 106, // increment/decrement (++ --)
195            DEC         = 107,
196            DOT         = 108, // member operator (.)
197            PRIMARY     = 109, // true, false, null, this
198            FUNCTION    = 110, // function keyword
199            EXPORT      = 111, // export keyword
200            IMPORT      = 112, // import keyword
201            IF          = 113, // if keyword
202            ELSE        = 114, // else keyword
203            SWITCH      = 115, // switch keyword
204            CASE        = 116, // case keyword
205            DEFAULT     = 117, // default keyword
206            WHILE       = 118, // while keyword
207            DO          = 119, // do keyword
208            FOR         = 120, // for keyword
209            BREAK       = 121, // break keyword
210            CONTINUE    = 122, // continue keyword
211            VAR         = 123, // var keyword
212            WITH        = 124, // with keyword
213            CATCH       = 125, // catch keyword
214            FINALLY     = 126, // finally keyword
215    
216            /** Added by Mike - these are JSOPs in the jsref, but I
217             * don't have them yet in the java implementation...
218             * so they go here.  Also whatever I needed.
219    
220             * Most of these go in the 'op' field when returning
221             * more general token types, eg. 'DIV' as the op of 'ASSIGN'.
222             */
223            NOP         = 128, // NOP
224            NOT         = 129, // etc.
225            PRE         = 130, // for INC, DEC nodes.
226            POST        = 131,
227    
228            /**
229             * For JSOPs associated with keywords...
230             * eg. op = THIS; token = PRIMARY
231             */
232    
233            VOID        = 132,
234    
235            /* types used for the parse tree - these never get returned
236             * by the scanner.
237             */
238            BLOCK       = 133, // statement block
239            ARRAYLIT    = 134, // array literal
240            OBJLIT      = 135, // object literal
241            LABEL       = 136, // label
242            TARGET      = 137,
243            LOOP        = 138,
244            ENUMDONE    = 139,
245            EXPRSTMT    = 140,
246            PARENT      = 141,
247            CONVERT     = 142,
248            JSR         = 143,
249            NEWLOCAL    = 144,
250            USELOCAL    = 145,
251            DEBUGGER    = 146,
252            SCRIPT      = 147,   // top-level node for entire script
253    
254            LAST_TOKEN  = 147,
255            NUMBER_INT  = 148,
256        
257            // This value is only used as a return value for getTokenHelper,
258            // which is only called from getToken and exists to avoid an excessive
259            // recursion problem if a number of lines in a row are comments.
260            RETRY_TOKEN     = 65535;
261    
262        // end enum
263    
264    
265        public static String tokenToName(int token) {
266            if (Context.printTrees || Context.printICode) {
267                switch (token) {
268                    case ERROR:           return "error";
269                    case EOF:             return "eof";
270                    case EOL:             return "eol";
271                    case POPV:            return "popv";
272                    case ENTERWITH:       return "enterwith";
273                    case LEAVEWITH:       return "leavewith";
274                    case RETURN:          return "return";
275                    case GOTO:            return "goto";
276                    case IFEQ:            return "ifeq";
277                    case IFNE:            return "ifne";
278                    case DUP:             return "dup";
279                    case SETNAME:         return "setname";
280                    case BITOR:           return "bitor";
281                    case BITXOR:          return "bitxor";
282                    case BITAND:          return "bitand";
283                    case EQ:              return "eq";
284                    case NE:              return "ne";
285                    case LT:              return "lt";
286                    case LE:              return "le";
287                    case GT:              return "gt";
288                    case GE:              return "ge";
289                    case LSH:             return "lsh";
290                    case RSH:             return "rsh";
291                    case URSH:            return "ursh";
292                    case ADD:             return "add";
293                    case SUB:             return "sub";
294                    case MUL:             return "mul";
295                    case DIV:             return "div";
296                    case MOD:             return "mod";
297                    case BITNOT:          return "bitnot";
298                    case NEG:             return "neg";
299                    case NEW:             return "new";
300                    case DELPROP:         return "delprop";
301                    case TYPEOF:          return "typeof";
302                    case NAMEINC:         return "nameinc";
303                    case PROPINC:         return "propinc";
304                    case ELEMINC:         return "eleminc";
305                    case NAMEDEC:         return "namedec";
306                    case PROPDEC:         return "propdec";
307                    case ELEMDEC:         return "elemdec";
308                    case GETPROP:         return "getprop";
309                    case SETPROP:         return "setprop";
310                    case GETELEM:         return "getelem";
311                    case SETELEM:         return "setelem";
312                    case CALL:            return "call";
313                    case NAME:            return "name";
314                    case NUMBER_INT:      return "integer";
315                    case NUMBER:          return "double";
316                    case STRING:          return "string";
317                    case ZERO:            return "zero";
318                    case ONE:             return "one";
319                    case NULL:            return "null";
320                    case THIS:            return "this";
321                    case FALSE:           return "false";
322                    case TRUE:            return "true";
323                    case SHEQ:            return "sheq";
324                    case SHNE:            return "shne";
325                    case CLOSURE:         return "closure";
326                    case REGEXP:          return "object";
327                    case POP:             return "pop";
328                    case POS:             return "pos";
329                    case VARINC:          return "varinc";
330                    case VARDEC:          return "vardec";
331                    case BINDNAME:        return "bindname";
332                    case THROW:           return "throw";
333                    case IN:              return "in";
334                    case INSTANCEOF:      return "instanceof";
335                    case GOSUB:           return "gosub";
336                    case RETSUB:          return "retsub";
337                    case CALLSPECIAL:     return "callspecial";
338                    case GETTHIS:         return "getthis";
339                    case NEWTEMP:         return "newtemp";
340                    case USETEMP:         return "usetemp";
341                    case GETBASE:         return "getbase";
342                    case GETVAR:          return "getvar";
343                    case SETVAR:          return "setvar";
344                    case UNDEFINED:       return "undefined";
345                    case TRY:             return "try";
346                    case ENDTRY:          return "endtry";
347                    case NEWSCOPE:        return "newscope";
348                    case TYPEOFNAME:      return "typeofname";
349                    case ENUMINIT:        return "enuminit";
350                    case ENUMNEXT:        return "enumnext";
351                    case GETPROTO:        return "getproto";
352                    case GETPARENT:       return "getparent";
353                    case SETPROTO:        return "setproto";
354                    case SETPARENT:       return "setparent";
355                    case SCOPE:           return "scope";
356                    case GETSCOPEPARENT:  return "getscopeparent";
357                    case THISFN:          return "thisfn";
358                    case JTHROW:          return "jthrow";
359                    case SEMI:            return "semi";
360                    case LB:              return "lb";
361                    case RB:              return "rb";
362                    case LC:              return "lc";
363                    case RC:              return "rc";
364                    case LP:              return "lp";
365                    case GWT:              return "gwt";
366                    case COMMA:           return "comma";
367                    case ASSIGN:          return "assign";
368                    case HOOK:            return "hook";
369                    case COLON:           return "colon";
370                    case OR:              return "or";
371                    case AND:             return "and";
372                    case EQOP:            return "eqop";
373                    case RELOP:           return "relop";
374                    case SHOP:            return "shop";
375                    case UNARYOP:         return "unaryop";
376                    case INC:             return "inc";
377                    case DEC:             return "dec";
378                    case DOT:             return "dot";
379                    case PRIMARY:         return "primary";
380                    case FUNCTION:        return "function";
381                    case EXPORT:          return "export";
382                    case IMPORT:          return "import";
383                    case IF:              return "if";
384                    case ELSE:            return "else";
385                    case SWITCH:          return "switch";
386                    case CASE:            return "case";
387                    case DEFAULT:         return "default";
388                    case WHILE:           return "while";
389                    case DO:              return "do";
390                    case FOR:             return "for";
391                    case BREAK:           return "break";
392                    case CONTINUE:        return "continue";
393                    case VAR:             return "var";
394                    case WITH:            return "with";
395                    case CATCH:           return "catch";
396                    case FINALLY:         return "finally";
397                    case NOP:             return "nop";
398                    case NOT:             return "not";
399                    case PRE:             return "pre";
400                    case POST:            return "post";
401                    case VOID:            return "void";
402                    case BLOCK:           return "block";
403                    case ARRAYLIT:        return "arraylit";
404                    case OBJLIT:          return "objlit";
405                    case LABEL:           return "label";
406                    case TARGET:          return "target";
407                    case LOOP:            return "loop";
408                    case ENUMDONE:        return "enumdone";
409                    case EXPRSTMT:        return "exprstmt";
410                    case PARENT:          return "parent";
411                    case CONVERT:         return "convert";
412                    case JSR:             return "jsr";
413                    case NEWLOCAL:        return "newlocal";
414                    case USELOCAL:        return "uselocal";
415                    case SCRIPT:          return "script";
416                }
417                return "<unknown="+token+">";
418            }
419            return "";
420        }
421    
422        /* This function uses the cached op, string and number fields in
423         * TokenStream; if getToken has been called since the passed token
424         * was scanned, the op or string printed may be incorrect.
425         */
426        public String tokenToString(int token) {
427            if (Context.printTrees) {
428                String name = tokenToName(token);
429    
430                switch (token) {
431                    case UNARYOP:
432                    case ASSIGN:
433                    case PRIMARY:
434                    case EQOP:
435                    case SHOP:
436                    case RELOP:
437                        return name + " " + tokenToName(this.op);
438    
439                    case STRING:
440                    case REGEXP:
441                    case NAME:
442                        return name + " `" + this.string + "'";
443    
444                    case NUMBER_INT:
445                        return "NUMBER_INT " + (int) this.number;
446                    case NUMBER:
447                        return "NUMBER " + this.number;
448                }
449    
450                return name;
451            }
452            return "";
453        }
454    
455        static {
456            KEYWORDS.put("break", BREAK);
457            KEYWORDS.put("case", CASE);
458            KEYWORDS.put("continue", CONTINUE);
459            KEYWORDS.put("default", DEFAULT);
460            KEYWORDS.put("delete", DELPROP);
461            KEYWORDS.put("do", DO);
462            KEYWORDS.put("else", ELSE);
463            KEYWORDS.put("export", EXPORT);
464            KEYWORDS.put("false", PRIMARY | (FALSE << 8));
465            KEYWORDS.put("for", FOR);
466            KEYWORDS.put("function", FUNCTION);
467            KEYWORDS.put("if", IF);
468            KEYWORDS.put("in", RELOP | (IN << 8));
469            KEYWORDS.put("new", NEW);
470            KEYWORDS.put("null", PRIMARY | (NULL << 8));
471            KEYWORDS.put("return", RETURN);
472            KEYWORDS.put("switch", SWITCH);
473            KEYWORDS.put("this", PRIMARY | (THIS << 8));
474            KEYWORDS.put("true", PRIMARY | (TRUE << 8));
475            KEYWORDS.put("typeof", UNARYOP | (TYPEOF << 8));
476            KEYWORDS.put("var", VAR);
477            KEYWORDS.put("void", UNARYOP | (VOID << 8));
478            KEYWORDS.put("while", WHILE);
479            KEYWORDS.put("with", WITH);
480            KEYWORDS.put("catch", CATCH);
481            KEYWORDS.put("debugger", DEBUGGER);
482            KEYWORDS.put("finally", FINALLY);
483            KEYWORDS.put("import", IMPORT);
484            KEYWORDS.put("instanceof", RELOP | (INSTANCEOF << 8));
485            KEYWORDS.put("throw", THROW);
486            KEYWORDS.put("try", TRY);
487        }
488        
489        private int stringToKeyword(String name) {
490            Integer id = KEYWORDS.get(name);
491            if (id == null) return EOF;
492    
493            this.op = id >> 8;
494            return id & 0xff;
495        }
496    
497        public TokenStream(Reader in,
498                           String sourceName, int lineno)
499        {
500            this.in = new LineBuffer(in, lineno);
501            this.pushbackToken = EOF;
502            this.sourceName = sourceName;
503            flags = 0;
504            secondToLastPosition = new CodePosition(lineno, 0);
505            lastPosition = new CodePosition(lineno, 0);
506        }
507    
508        /* return and pop the token from the stream if it matches...
509         * otherwise return null
510         */
511        public boolean matchToken(int toMatch) throws IOException {
512            int token = getToken();
513            if (token == toMatch)
514                return true;
515    
516            // didn't match, push back token
517            tokenno--;
518            this.pushbackToken = token;
519            return false;
520        }
521    
522        public void ungetToken(int tt) {
523            if (this.pushbackToken != EOF && tt != ERROR) {
524                String message = Context.getMessage2("msg.token.replaces.pushback",
525                    tokenToString(tt), tokenToString(this.pushbackToken));
526                throw new RuntimeException(message);
527            }
528            this.pushbackToken = tt;
529            tokenno--;
530        }
531    
532        public int peekToken() throws IOException {
533            int result = getToken();
534    
535            this.pushbackToken = result;
536            tokenno--;
537            return result;
538        }
539    
540        public int peekTokenSameLine() throws IOException {
541            int result;
542    
543            flags |= TSF_NEWLINES;          // SCAN_NEWLINES from jsscan.h
544            result = peekToken();
545            flags &= ~TSF_NEWLINES;         // HIDE_NEWLINES from jsscan.h
546            if (this.pushbackToken == EOL)
547                this.pushbackToken = EOF;
548            return result;
549        }
550    
551        private static boolean isAlpha(int c) {
552            return ((c >= 'a' && c <= 'z')
553                    || (c >= 'A' && c <= 'Z'));
554        }
555    
556        static boolean isDigit(int c) {
557            return (c >= '0' && c <= '9');
558        }
559    
560        static int xDigitToInt(int c) {
561            if ('0' <= c && c <= '9') { return c - '0'; }
562            if ('a' <= c && c <= 'f') { return c - ('a' - 10); }
563            if ('A' <= c && c <= 'F') { return c - ('A' - 10); }
564            return -1;
565        }
566    
567        /* As defined in ECMA.  jsscan.c uses C isspace() (which allows
568         * \v, I think.)  note that code in in.read() implicitly accepts
569         * '\r' == \u000D as well.
570         */
571        public static boolean isJSSpace(int c) {
572            return (c == '\u0020' || c == '\u0009'
573                    || c == '\u000C' || c == '\u000B'
574                    || c == '\u00A0'
575                    || Character.getType((char)c) == Character.SPACE_SEPARATOR);
576        }
577    
578        private void skipLine() throws IOException {
579            // skip to end of line
580            int c;
581            while ((c = in.read()) != EOF_CHAR && c != '\n') { }
582            in.unread();
583        }
584    
585        public int getToken() throws IOException {
586          int c;
587          do {
588            c = getTokenHelper();
589          } while (c == RETRY_TOKEN);
590    
591          updatePosition();
592          return c;
593        }
594    
595        private int getTokenHelper() throws IOException {
596            int c;
597            tokenno++;
598    
599            // Check for pushed-back token
600            if (this.pushbackToken != EOF) {
601                int result = this.pushbackToken;
602                this.pushbackToken = EOF;
603                return result;
604            }
605    
606            // Eat whitespace, possibly sensitive to newlines.
607            do {
608                c = in.read();
609                if (c == '\n') {
610                    flags &= ~TSF_DIRTYLINE;
611                    if ((flags & TSF_NEWLINES) != 0)
612                        break;
613                }
614            } while (isJSSpace(c) || c == '\n');
615    
616            if (c == EOF_CHAR)
617                return EOF;
618            if (c != '-' && c != '\n')
619                flags |= TSF_DIRTYLINE;
620    
621            // identifier/keyword/instanceof?
622            // watch out for starting with a <backslash>
623            boolean identifierStart;
624            boolean isUnicodeEscapeStart = false;
625            if (c == '\\') {
626                c = in.read();
627                if (c == 'u') {
628                    identifierStart = true;
629                    isUnicodeEscapeStart = true;
630                    stringBufferTop = 0;
631                } else {
632                    identifierStart = false;
633                    c = '\\';
634                    in.unread();
635                }
636            } else {
637                identifierStart = Character.isJavaIdentifierStart((char)c);
638                if (identifierStart) {
639                    stringBufferTop = 0;
640                    addToString(c);
641                }
642                
643                // bruce: special handling of JSNI signatures
644                // - it would be nice to handle Unicode escapes in the future
645                //
646                if (c == '@') {
647                  stringBufferTop = 0;
648                  addToString(c);
649                  return jsniMatchReference();
650                }
651            }
652    
653            if (identifierStart) {
654                boolean containsEscape = isUnicodeEscapeStart;
655                for (;;) {
656                    if (isUnicodeEscapeStart) {
657                        // strictly speaking we should probably push-back
658                        // all the bad characters if the <backslash>uXXXX
659                        // sequence is malformed. But since there isn't a
660                        // correct context(is there?) for a bad Unicode
661                        // escape sequence in an identifier, we can report
662                        // an error here.
663                        int escapeVal = 0;
664                        for (int i = 0; i != 4; ++i) {
665                            c = in.read();
666                            escapeVal = (escapeVal << 4) | xDigitToInt(c);
667                            // Next check takes care about c < 0 and bad escape
668                            if (escapeVal < 0) { break; }
669                        }
670                        if (escapeVal < 0) {
671                            reportTokenError("msg.invalid.escape", null);
672                            return ERROR;
673                        }
674                        addToString(escapeVal);
675                        isUnicodeEscapeStart = false;
676                    } else {
677                        c = in.read();
678                        if (c == '\\') {
679                            c = in.read();
680                            if (c == 'u') {
681                                isUnicodeEscapeStart = true;
682                                containsEscape = true;
683                            } else {
684                                reportTokenError("msg.illegal.character", null);
685                                return ERROR;
686                            }
687                        } else {
688                            if (!Character.isJavaIdentifierPart((char)c)) {
689                                break;
690                            }
691                            addToString(c);
692                        }
693                    }
694                }
695                in.unread();
696    
697                   String str = getStringFromBuffer();
698                if (!containsEscape) {
699                    // OPT we shouldn't have to make a string (object!) to
700                    // check if it's a keyword.
701    
702                    // Return the corresponding token if it's a keyword
703                    int result = stringToKeyword(str);
704                    if (result != EOF) {
705                        return result;
706                    }
707                }
708                this.string = str;
709                return NAME;
710            }
711    
712            // is it a number?
713            if (isDigit(c) || (c == '.' && isDigit(in.peek()))) {
714    
715                stringBufferTop = 0;
716                int base = 10;
717    
718                if (c == '0') {
719                    c = in.read();
720                    if (c == 'x' || c == 'X') {
721                        base = 16;
722                        c = in.read();
723                    } else if (isDigit(c)) {
724                        base = 8;
725                    } else {
726                        addToString('0');
727                    }
728                }
729    
730                if (base == 16) {
731                    while (0 <= xDigitToInt(c)) {
732                        addToString(c);
733                        c = in.read();
734                    }
735                } else {
736                    while ('0' <= c && c <= '9') {
737                        /*
738                         * We permit 08 and 09 as decimal numbers, which
739                         * makes our behavior a superset of the ECMA
740                         * numeric grammar.  We might not always be so
741                         * permissive, so we warn about it.
742                         */
743                        if (base == 8 && c >= '8') {
744                            Object[] errArgs = { c == '8' ? "8" : "9" };
745                            reportTokenWarning("msg.bad.octal.literal", errArgs);
746                            base = 10;
747                        }
748                        addToString(c);
749                        c = in.read();
750                    }
751                }
752    
753                boolean isInteger = true;
754    
755                if (base == 10 && (c == '.' || c == 'e' || c == 'E')) {
756                    isInteger = false;
757                    if (c == '.') {
758                        do {
759                            addToString(c);
760                            c = in.read();
761                        } while (isDigit(c));
762                    }
763                    if (c == 'e' || c == 'E') {
764                        addToString(c);
765                        c = in.read();
766                        if (c == '+' || c == '-') {
767                            addToString(c);
768                            c = in.read();
769                        }
770                        if (!isDigit(c)) {
771                            reportTokenError("msg.missing.exponent", null);
772                            return ERROR;
773                        }
774                        do {
775                            addToString(c);
776                            c = in.read();
777                        } while (isDigit(c));
778                    }
779                }
780                in.unread();
781                String numString = getStringFromBuffer();
782    
783                double dval;
784                if (base == 10 && !isInteger) {
785                    try {
786                        // Use Java conversion to number from string...
787                        dval = (Double.valueOf(numString)).doubleValue();
788                    }
789                    catch (NumberFormatException ex) {
790                        Object[] errArgs = { ex.getMessage() };
791                        reportTokenError("msg.caught.nfe", errArgs);
792                        return ERROR;
793                    }
794                } else {
795                    dval = ScriptRuntime.stringToNumber(numString, 0, base);
796                }
797    
798                this.number = dval;
799    
800                if (isInteger) {
801                    return NUMBER_INT;
802                }
803    
804                return NUMBER;
805            }
806    
807            // is it a string?
808            if (c == '"' || c == '\'') {
809                // We attempt to accumulate a string the fast way, by
810                // building it directly out of the reader.  But if there
811                // are any escaped characters in the string, we revert to
812                // building it out of a StringBuffer.
813    
814                int quoteChar = c;
815                int val = 0;
816                stringBufferTop = 0;
817    
818                c = in.read();
819            strLoop: while (c != quoteChar) {
820                    if (c == '\n' || c == EOF_CHAR) {
821                        in.unread();
822                        reportTokenError("msg.unterminated.string.lit", null);
823                        return ERROR;
824                    }
825    
826                    if (c == '\\') {
827                        // We've hit an escaped character
828    
829                        c = in.read();
830                        switch (c) {
831                            case 'b': c = '\b'; break;
832                            case 'f': c = '\f'; break;
833                            case 'n': c = '\n'; break;
834                            case 'r': c = '\r'; break;
835                            case 't': c = '\t'; break;
836    
837                            // \v a late addition to the ECMA spec,
838                            // it is not in Java, so use 0xb
839                            case 'v': c = 0xb; break;
840    
841                            case 'u': {
842                                /*
843                                 * Get 4 hex digits; if the u escape is not
844                                 * followed by 4 hex digits, use 'u' + the literal
845                                 * character sequence that follows.
846                                 */
847                                int escapeStart = stringBufferTop;
848                                addToString('u');
849                                int escapeVal = 0;
850                                for (int i = 0; i != 4; ++i) {
851                                    c = in.read();
852                                    escapeVal = (escapeVal << 4) | xDigitToInt(c);
853                                    if (escapeVal < 0) {
854                                        continue strLoop;
855                                    }
856                                    addToString(c);
857                                }
858                                // prepare for replace of stored 'u' sequence
859                                // by escape value
860                                stringBufferTop = escapeStart;
861                                c = escapeVal;
862                            } break;
863    
864                            case 'x': {
865                                /* Get 2 hex digits, defaulting to 'x' + literal
866                                 * sequence, as above.
867                                 */
868                                c = in.read();
869                                int escapeVal = xDigitToInt(c);
870                                if (escapeVal < 0) {
871                                    addToString('x');
872                                    continue strLoop;
873                                } else {
874                                    int c1 = c;
875                                    c = in.read();
876                                    escapeVal = (escapeVal << 4) | xDigitToInt(c);
877                                    if (escapeVal < 0) {
878                                        addToString('x');
879                                        addToString(c1);
880                                        continue strLoop;
881                                    } else {
882                                        // got 2 hex digits
883                                        c = escapeVal;
884                                    }
885                                }
886                            } break;
887    
888                            case '\n':
889                                // Remove line terminator
890                                c = in.read();
891                                continue strLoop;
892    
893                            default: if ('0' <= c && c < '8') {
894                                val = c - '0';
895                                c = in.read();
896                                if ('0' <= c && c < '8') {
897                                    val = 8 * val + c - '0';
898                                    c = in.read();
899                                    if ('0' <= c && c < '8' && val <= 037) {
900                                        // c is 3rd char of octal sequence only if
901                                        // the resulting val <= 0377
902                                        val = 8 * val + c - '0';
903                                        c = in.read();
904                                    }
905                                }
906                                in.unread();
907                                c = val;
908                            }
909                        }
910                    }
911                    addToString(c);
912                    c = in.read();
913                }
914    
915                this.string = getStringFromBuffer();
916                return STRING;
917            }
918    
919            switch (c)
920            {
921            case '\n': return EOL;
922            case ';': return SEMI;
923            case '[': return LB;
924            case ']': return RB;
925            case '{': return LC;
926            case '}': return RC;
927            case '(': return LP;
928            case ')': return GWT;
929            case ',': return COMMA;
930            case '?': return HOOK;
931            case ':': return COLON;
932            case '.': return DOT;
933    
934            case '|':
935                if (in.match('|')) {
936                    return OR;
937                } else if (in.match('=')) {
938                    this.op = BITOR;
939                    return ASSIGN;
940                } else {
941                    return BITOR;
942                }
943    
944            case '^':
945                if (in.match('=')) {
946                    this.op = BITXOR;
947                    return ASSIGN;
948                } else {
949                    return BITXOR;
950                }
951    
952            case '&':
953                if (in.match('&')) {
954                    return AND;
955                } else if (in.match('=')) {
956                    this.op = BITAND;
957                    return ASSIGN;
958                } else {
959                    return BITAND;
960                }
961    
962            case '=':
963                if (in.match('=')) {
964                    if (in.match('='))
965                        this.op = SHEQ;
966                    else
967                        this.op = EQ;
968                    return EQOP;
969                } else {
970                    this.op = NOP;
971                    return ASSIGN;
972                }
973    
974            case '!':
975                if (in.match('=')) {
976                    if (in.match('='))
977                        this.op = SHNE;
978                    else
979                        this.op = NE;
980                    return EQOP;
981                } else {
982                    this.op = NOT;
983                    return UNARYOP;
984                }
985    
986            case '<':
987                /* NB:treat HTML begin-comment as comment-till-eol */
988                if (in.match('!')) {
989                    if (in.match('-')) {
990                        if (in.match('-')) {
991                            skipLine();
992                            return RETRY_TOKEN;  // in place of 'goto retry'
993                        }
994                        in.unread();
995                    }
996                    in.unread();
997                }
998                if (in.match('<')) {
999                    if (in.match('=')) {
1000                        this.op = LSH;
1001                        return ASSIGN;
1002                    } else {
1003                        this.op = LSH;
1004                        return SHOP;
1005                    }
1006                } else {
1007                    if (in.match('=')) {
1008                        this.op = LE;
1009                        return RELOP;
1010                    } else {
1011                        this.op = LT;
1012                        return RELOP;
1013                    }
1014                }
1015    
1016            case '>':
1017                if (in.match('>')) {
1018                    if (in.match('>')) {
1019                        if (in.match('=')) {
1020                            this.op = URSH;
1021                            return ASSIGN;
1022                        } else {
1023                            this.op = URSH;
1024                            return SHOP;
1025                        }
1026                    } else {
1027                        if (in.match('=')) {
1028                            this.op = RSH;
1029                            return ASSIGN;
1030                        } else {
1031                            this.op = RSH;
1032                            return SHOP;
1033                        }
1034                    }
1035                } else {
1036                    if (in.match('=')) {
1037                        this.op = GE;
1038                        return RELOP;
1039                    } else {
1040                        this.op = GT;
1041                        return RELOP;
1042                    }
1043                }
1044    
1045            case '*':
1046                if (in.match('=')) {
1047                    this.op = MUL;
1048                    return ASSIGN;
1049                } else {
1050                    return MUL;
1051                }
1052    
1053            case '/':
1054                // is it a // comment?
1055                if (in.match('/')) {
1056                    skipLine();
1057                    return RETRY_TOKEN;
1058                }
1059                if (in.match('*')) {
1060                    while ((c = in.read()) != -1 &&
1061                           !(c == '*' && in.match('/'))) {
1062                        ; // empty loop body
1063                    }
1064                    if (c == EOF_CHAR) {
1065                        reportTokenError("msg.unterminated.comment", null);
1066                        return ERROR;
1067                    }
1068                    return RETRY_TOKEN;  // `goto retry'
1069                }
1070    
1071                // is it a regexp?
1072                if ((flags & TSF_REGEXP) != 0) {
1073                    stringBufferTop = 0;
1074                    while ((c = in.read()) != '/') {
1075                        if (c == '\n' || c == EOF_CHAR) {
1076                            in.unread();
1077                            reportTokenError("msg.unterminated.re.lit", null);
1078                            return ERROR;
1079                        }
1080                        if (c == '\\') {
1081                            addToString(c);
1082                            c = in.read();
1083                        }
1084    
1085                        addToString(c);
1086                    }
1087                    int reEnd = stringBufferTop;
1088    
1089                    while (true) {
1090                        if (in.match('g'))
1091                            addToString('g');
1092                        else if (in.match('i'))
1093                            addToString('i');
1094                        else if (in.match('m'))
1095                            addToString('m');
1096                        else
1097                            break;
1098                    }
1099    
1100                    if (isAlpha(in.peek())) {
1101                        reportTokenError("msg.invalid.re.flag", null);
1102                        return ERROR;
1103                    }
1104    
1105                    this.string = new String(stringBuffer, 0, reEnd);
1106                    this.regExpFlags = new String(stringBuffer, reEnd,
1107                                                  stringBufferTop - reEnd);
1108                    return REGEXP;
1109                }
1110    
1111    
1112                if (in.match('=')) {
1113                    this.op = DIV;
1114                    return ASSIGN;
1115                } else {
1116                    return DIV;
1117                }
1118    
1119            case '%':
1120                this.op = MOD;
1121                if (in.match('=')) {
1122                    return ASSIGN;
1123                } else {
1124                    return MOD;
1125                }
1126    
1127            case '~':
1128                this.op = BITNOT;
1129                return UNARYOP;
1130    
1131            case '+':
1132                if (in.match('=')) {
1133                    this.op = ADD;
1134                    return ASSIGN;
1135                } else if (in.match('+')) {
1136                    return INC;
1137                } else {
1138                    return ADD;
1139                }
1140    
1141            case '-':
1142                if (in.match('=')) {
1143                    this.op = SUB;
1144                    c = ASSIGN;
1145                } else if (in.match('-')) {
1146                    if (0 == (flags & TSF_DIRTYLINE)) {
1147                        // treat HTML end-comment after possible whitespace
1148                        // after line start as comment-utill-eol
1149                        if (in.match('>')) {
1150                            skipLine();
1151                            return RETRY_TOKEN;
1152                        }
1153                    }
1154                    c = DEC;
1155                } else {
1156                    c = SUB;
1157                }
1158                flags |= TSF_DIRTYLINE;
1159                return c;
1160    
1161            default:
1162                reportTokenError("msg.illegal.character", null);
1163                return ERROR;
1164            }
1165        }
1166    
1167        private void skipWhitespace() throws IOException {
1168          int tmp;
1169          do {
1170            tmp = in.read();
1171          } while (isJSSpace(tmp) || tmp == '\n');
1172          // Reposition back to first non whitespace char.
1173          in.unread();
1174        }
1175    
1176        private int jsniMatchReference() throws IOException {
1177    
1178          // First, read the type name whose member is being accessed. 
1179          if (!jsniMatchQualifiedTypeName('.', ':')) {
1180            return ERROR;
1181          }
1182    
1183          // Now we must the second colon.
1184          //
1185          int c = in.read();
1186          if (c != ':') {
1187              in.unread();
1188              reportTokenError("msg.jsni.expected.char", new String[] {":"});
1189              return ERROR;
1190          }
1191          addToString(c);
1192    
1193          // Skip whitespace starting after ::.
1194          skipWhitespace();
1195    
1196          // Finish by reading the field or method signature.
1197          if (!jsniMatchMethodSignatureOrFieldName()) {
1198            return ERROR;
1199          }
1200    
1201          this.string = new String(stringBuffer, 0, stringBufferTop);
1202          return NAME;
1203        }
1204    
1205        private boolean jsniMatchParamListSignature() throws IOException {
1206          // Assume the opening '(' has already been read.
1207          // Read param type signatures until we see a closing ')'.
1208    
1209          skipWhitespace();
1210    
1211          // First check for the special case of * as the parameter list, indicating
1212          // a wildcard
1213          if (in.peek() == '*') {
1214            addToString(in.read());
1215            if (in.peek() != ')') {
1216              reportTokenError("msg.jsni.expected.char", new String[] {")"});
1217            }
1218            addToString(in.read());
1219            return true;
1220          }
1221    
1222          // Otherwise, loop through reading one param type at a time
1223          do {
1224            // Skip whitespace between parameters.
1225            skipWhitespace();
1226    
1227            int c = in.read();
1228    
1229            if (c == ')') {
1230              // Finished successfully.
1231              //
1232              addToString(c);
1233              return true;
1234            }
1235    
1236            in.unread();
1237          } while (jsniMatchParamTypeSignature());
1238    
1239          // If we made it here, we can assume that there was an invalid type
1240          // signature that was already reported and that the offending char
1241          // was already unread.
1242          //
1243          return false;
1244        }
1245    
1246        private boolean jsniMatchParamTypeSignature() throws IOException {
1247          int c = in.read();
1248          switch (c) {
1249            case 'Z':
1250            case 'B':
1251            case 'C':
1252            case 'S':
1253            case 'I':
1254            case 'J':
1255            case 'F':
1256            case 'D':
1257              // Primitive type id.
1258              addToString(c);
1259              return true;
1260            case 'L':
1261              // Class/Interface type prefix.
1262              addToString(c);
1263              return jsniMatchQualifiedTypeName('/', ';');
1264            case '[':
1265              // Array type prefix.
1266              addToString(c);
1267              return jsniMatchParamArrayTypeSignature();
1268            default:
1269              in.unread();
1270              reportTokenError("msg.jsni.expected.param.type", null);
1271              return false;
1272          }
1273        }
1274    
1275        private boolean jsniMatchParamArrayTypeSignature() throws IOException {
1276          // Assume the leading '[' has already been read.
1277          // What follows must be another param type signature.
1278          //
1279          return jsniMatchParamTypeSignature();
1280        }
1281    
1282        private boolean jsniMatchMethodSignatureOrFieldName() throws IOException {
1283          int c = in.read();
1284    
1285    
1286          // We must see an ident start here.
1287          //
1288          if (!Character.isJavaIdentifierStart((char)c)) {
1289            in.unread();
1290            reportTokenError("msg.jsni.expected.identifier", null);
1291            return false;
1292          }
1293          
1294          addToString(c);
1295          
1296          for (;;) {
1297            c = in.read();
1298            if (Character.isJavaIdentifierPart((char)c)) {
1299              addToString(c);
1300            }
1301            else if (c == '(') {
1302              // This means we're starting a JSNI method signature.
1303              //
1304              addToString(c);
1305              if (jsniMatchParamListSignature()) {
1306                // Finished a method signature with success.
1307                // Assume the callee unread the last char.
1308                //
1309                return true;
1310              }
1311              else {
1312                // Assume the callee reported the error and unread the last char.
1313                //
1314                return false;
1315              }
1316            }
1317            else {
1318              // We don't know this char, so it finishes the token.
1319              //
1320              in.unread();
1321              return true;
1322            }
1323          }
1324        }
1325    
1326        /**
1327         * This method is called to match the fully-qualified type name that
1328         * should appear after the '@' in a JSNI reference.
1329         * @param sepChar the character that will separate the Java idents
1330         *        (either a '.' or '/')
1331         * @param endChar the character that indicates the end of the 
1332         */
1333        private boolean jsniMatchQualifiedTypeName(char sepChar, char endChar) 
1334            throws IOException {
1335          int c = in.read();
1336    
1337          // Whether nested or not, we must see an ident start here.
1338          //
1339          if (!Character.isJavaIdentifierStart((char)c)) {
1340            in.unread();
1341            reportTokenError("msg.jsni.expected.identifier", null);
1342            return false;
1343          }
1344          
1345          // Now actually add the first ident char.
1346          //
1347          addToString(c);
1348    
1349          // And append any other ident chars.
1350          //
1351          for (;;) {
1352            c = in.read();
1353            if (Character.isJavaIdentifierPart((char)c)) {
1354              addToString(c);
1355            }
1356            else {
1357              break;
1358            }
1359          }
1360          
1361          // Arrray-type reference
1362          while (c == '[') {
1363            if (']' == in.peek()) {
1364              addToString('[');
1365              addToString(in.read());
1366              c = in.read();
1367            } else {
1368              break;
1369            }
1370          }
1371    
1372          // We have a non-ident char to classify.
1373          //
1374          if (c == sepChar) {
1375            addToString(c);
1376            if (jsniMatchQualifiedTypeName(sepChar, endChar)) {
1377              // We consumed up to the endChar, so we finished with total success.
1378              //
1379              return true;
1380            } else {
1381              // Assume that the nested call reported the syntax error and
1382              // unread the last character.
1383              //
1384              return false;
1385            }
1386          } else if (c == endChar) {
1387            // Matched everything up to the specified end char.
1388            //
1389            addToString(c);
1390            return true;
1391          } else {
1392            // This is an unknown char that finishes the token.
1393            //
1394            in.unread();
1395            return true;
1396          }
1397        }
1398        
1399        private String getStringFromBuffer() {
1400            return new String(stringBuffer, 0, stringBufferTop);
1401        }
1402    
1403        private void addToString(int c) {
1404            if (stringBufferTop == stringBuffer.length) {
1405                char[] tmp = new char[stringBuffer.length * 2];
1406                System.arraycopy(stringBuffer, 0, tmp, 0, stringBufferTop);
1407                stringBuffer = tmp;
1408            }
1409            stringBuffer[stringBufferTop++] = (char)c;
1410        }
1411    
1412        /**
1413         * Positions hold offset of an corresponding token's end.
1414         * So lastPosition holds an offset of char that is next to last token.
1415         *
1416         * Use secondToLastPosition for error reporting outside of TokenStream, because
1417         * usually we want to report beginning of erroneous token,
1418         * which is end of second to last read token.
1419         */
1420        public void reportSyntaxError(String messageProperty, Object[] args) {
1421            String message = Context.getMessage(messageProperty, args);
1422            Context.reportError(message, getSourceName(), secondToLastPosition.getLine(), getLine(), secondToLastPosition.getOffset());
1423        }
1424    
1425        /**
1426         * Token errors are reported before tokes is read,
1427         * so use lastPosition for reporting.
1428         * @see #reportSyntaxError
1429         */
1430        private void reportTokenError(String messageProperty, Object[] args) {
1431            String message = Context.getMessage(messageProperty, args);
1432            Context.reportError(message, getSourceName(), lastPosition.getLine(), getLine(), lastPosition.getOffset());
1433        }
1434    
1435        private void reportTokenWarning(String messageProperty, Object[] args) {
1436            String message = Context.getMessage(messageProperty, args);
1437            Context.reportWarning(message, getSourceName(), lastPosition.getLine(), getLine(), lastPosition.getOffset());
1438        }
1439    
1440        /**
1441         * Updates last two known positions (for error reporting).
1442         */
1443        private void updatePosition() {
1444            CodePosition currentPosition = new CodePosition(getLineno(), getOffset());
1445            if (currentPosition.compareTo(lastPosition) > 0) {
1446                secondToLastPosition = lastPosition;
1447                lastPosition = currentPosition;
1448            }
1449        }
1450    
1451        public String getSourceName() { return sourceName; }
1452        public int getLineno() { return in.getLineno(); }
1453        public int getOp() { return op; }
1454        public String getString() { return string; }
1455        public double getNumber() { return number; }
1456        public String getLine() { return in.getLine(); }
1457        public int getOffset() { return in.getOffset(); }
1458        public int getTokenno() { return tokenno; }
1459        public boolean eof() { return in.eof(); }
1460    
1461        // instance variables
1462        private LineBuffer in;
1463    
1464    
1465        /* for TSF_REGEXP, etc.
1466         * should this be manipulated by gettor/settor functions?
1467         * should it be passed to getToken();
1468         */
1469        int flags;
1470        String regExpFlags;
1471    
1472        private String sourceName;
1473        private int pushbackToken;
1474        private int tokenno;
1475    
1476        CodePosition secondToLastPosition;
1477        CodePosition lastPosition;
1478    
1479        private int op;
1480    
1481        // Set this to an inital non-null value so that the Parser has
1482        // something to retrieve even if an error has occured and no
1483        // string is found.  Fosters one class of error, but saves lots of
1484        // code.
1485        private String string = "";
1486        private double number;
1487    
1488        private char[] stringBuffer = new char[128];
1489        private int stringBufferTop;
1490    }