001 /* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
002 *
003 * The contents of this file are subject to the Netscape Public
004 * License Version 1.1 (the "License"); you may not use this file
005 * except in compliance with the License. You may obtain a copy of
006 * the License at http://www.mozilla.org/NPL/
007 *
008 * Software distributed under the License is distributed on an "AS
009 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
010 * implied. See the License for the specific language governing
011 * rights and limitations under the License.
012 *
013 * The Original Code is Rhino code, released
014 * May 6, 1999.
015 *
016 * The Initial Developer of the Original Code is Netscape
017 * Communications Corporation. Portions created by Netscape are
018 * Copyright (C) 1997-1999 Netscape Communications Corporation. All
019 * Rights Reserved.
020 *
021 * Contributor(s):
022 * Roger Lawrence
023 * Mike McCabe
024 *
025 * Alternatively, the contents of this file may be used under the
026 * terms of the GNU Public License (the "GPL"), in which case the
027 * provisions of the GPL are applicable instead of those above.
028 * If you wish to allow use of your version of this file only
029 * under the terms of the GPL and not to allow others to use your
030 * version of this file under the NPL, indicate your decision by
031 * deleting the provisions above and replace them with the notice
032 * and other provisions required by the GPL. If you do not delete
033 * the provisions above, a recipient may use your version of this
034 * file under either the NPL or the GPL.
035 */
036 // Modified by Google
037
038 package com.google.gwt.dev.js.rhino;
039
040 import java.io.*;
041 import java.util.HashMap;
042 import java.util.Map;
043
044 /**
045 * This class implements the JavaScript scanner.
046 *
047 * It is based on the C source files jsscan.c and jsscan.h
048 * in the jsref package.
049 */
050
051 public class TokenStream {
052
053 private static final Map<String, Integer> KEYWORDS = new HashMap<String, Integer>();
054
055 /*
056 * JSTokenStream flags, mirroring those in jsscan.h. These are used
057 * by the parser to change/check the state of the scanner.
058 */
059
060 final static int
061 TSF_NEWLINES = 1 << 0, // tokenize newlines
062 TSF_FUNCTION = 1 << 1, // scanning inside function body
063 TSF_RETURN_EXPR = 1 << 2, // function has 'return expr;'
064 TSF_RETURN_VOID = 1 << 3, // function has 'return;'
065 TSF_REGEXP = 1 << 4, // looking for a regular expression
066 TSF_DIRTYLINE = 1 << 5; // stuff other than whitespace since
067 // start of line
068
069 /*
070 * For chars - because we need something out-of-range
071 * to check. (And checking EOF by exception is annoying.)
072 * Note distinction from EOF token type!
073 */
074 private final static int
075 EOF_CHAR = -1;
076
077 /**
078 * Token types. These values correspond to JSTokenType values in
079 * jsscan.c.
080 */
081
082 public final static int
083 // start enum
084 ERROR = -1, // well-known as the only code < EOF
085 EOF = 0, // end of file token - (not EOF_CHAR)
086 EOL = 1, // end of line
087 // Beginning here are interpreter bytecodes. Their values
088 // must not exceed 127.
089 POPV = 2,
090 ENTERWITH = 3,
091 LEAVEWITH = 4,
092 RETURN = 5,
093 GOTO = 6,
094 IFEQ = 7,
095 IFNE = 8,
096 DUP = 9,
097 SETNAME = 10,
098 BITOR = 11,
099 BITXOR = 12,
100 BITAND = 13,
101 EQ = 14,
102 NE = 15,
103 LT = 16,
104 LE = 17,
105 GT = 18,
106 GE = 19,
107 LSH = 20,
108 RSH = 21,
109 URSH = 22,
110 ADD = 23,
111 SUB = 24,
112 MUL = 25,
113 DIV = 26,
114 MOD = 27,
115 BITNOT = 28,
116 NEG = 29,
117 NEW = 30,
118 DELPROP = 31,
119 TYPEOF = 32,
120 NAMEINC = 33,
121 PROPINC = 34,
122 ELEMINC = 35,
123 NAMEDEC = 36,
124 PROPDEC = 37,
125 ELEMDEC = 38,
126 GETPROP = 39,
127 SETPROP = 40,
128 GETELEM = 41,
129 SETELEM = 42,
130 CALL = 43,
131 NAME = 44,
132 NUMBER = 45,
133 STRING = 46,
134 ZERO = 47,
135 ONE = 48,
136 NULL = 49,
137 THIS = 50,
138 FALSE = 51,
139 TRUE = 52,
140 SHEQ = 53, // shallow equality (===)
141 SHNE = 54, // shallow inequality (!==)
142 CLOSURE = 55,
143 REGEXP = 56,
144 POP = 57,
145 POS = 58,
146 VARINC = 59,
147 VARDEC = 60,
148 BINDNAME = 61,
149 THROW = 62,
150 IN = 63,
151 INSTANCEOF = 64,
152 GOSUB = 65,
153 RETSUB = 66,
154 CALLSPECIAL = 67,
155 GETTHIS = 68,
156 NEWTEMP = 69,
157 USETEMP = 70,
158 GETBASE = 71,
159 GETVAR = 72,
160 SETVAR = 73,
161 UNDEFINED = 74,
162 TRY = 75,
163 ENDTRY = 76,
164 NEWSCOPE = 77,
165 TYPEOFNAME = 78,
166 ENUMINIT = 79,
167 ENUMNEXT = 80,
168 GETPROTO = 81,
169 GETPARENT = 82,
170 SETPROTO = 83,
171 SETPARENT = 84,
172 SCOPE = 85,
173 GETSCOPEPARENT = 86,
174 THISFN = 87,
175 JTHROW = 88,
176 // End of interpreter bytecodes
177 SEMI = 89, // semicolon
178 LB = 90, // left and right brackets
179 RB = 91,
180 LC = 92, // left and right curlies (braces)
181 RC = 93,
182 LP = 94, // left and right parentheses
183 GWT = 95,
184 COMMA = 96, // comma operator
185 ASSIGN = 97, // assignment ops (= += -= etc.)
186 HOOK = 98, // conditional (?:)
187 COLON = 99,
188 OR = 100, // logical or (||)
189 AND = 101, // logical and (&&)
190 EQOP = 102, // equality ops (== !=)
191 RELOP = 103, // relational ops (< <= > >=)
192 SHOP = 104, // shift ops (<< >> >>>)
193 UNARYOP = 105, // unary prefix operator
194 INC = 106, // increment/decrement (++ --)
195 DEC = 107,
196 DOT = 108, // member operator (.)
197 PRIMARY = 109, // true, false, null, this
198 FUNCTION = 110, // function keyword
199 EXPORT = 111, // export keyword
200 IMPORT = 112, // import keyword
201 IF = 113, // if keyword
202 ELSE = 114, // else keyword
203 SWITCH = 115, // switch keyword
204 CASE = 116, // case keyword
205 DEFAULT = 117, // default keyword
206 WHILE = 118, // while keyword
207 DO = 119, // do keyword
208 FOR = 120, // for keyword
209 BREAK = 121, // break keyword
210 CONTINUE = 122, // continue keyword
211 VAR = 123, // var keyword
212 WITH = 124, // with keyword
213 CATCH = 125, // catch keyword
214 FINALLY = 126, // finally keyword
215
216 /** Added by Mike - these are JSOPs in the jsref, but I
217 * don't have them yet in the java implementation...
218 * so they go here. Also whatever I needed.
219
220 * Most of these go in the 'op' field when returning
221 * more general token types, eg. 'DIV' as the op of 'ASSIGN'.
222 */
223 NOP = 128, // NOP
224 NOT = 129, // etc.
225 PRE = 130, // for INC, DEC nodes.
226 POST = 131,
227
228 /**
229 * For JSOPs associated with keywords...
230 * eg. op = THIS; token = PRIMARY
231 */
232
233 VOID = 132,
234
235 /* types used for the parse tree - these never get returned
236 * by the scanner.
237 */
238 BLOCK = 133, // statement block
239 ARRAYLIT = 134, // array literal
240 OBJLIT = 135, // object literal
241 LABEL = 136, // label
242 TARGET = 137,
243 LOOP = 138,
244 ENUMDONE = 139,
245 EXPRSTMT = 140,
246 PARENT = 141,
247 CONVERT = 142,
248 JSR = 143,
249 NEWLOCAL = 144,
250 USELOCAL = 145,
251 DEBUGGER = 146,
252 SCRIPT = 147, // top-level node for entire script
253
254 LAST_TOKEN = 147,
255 NUMBER_INT = 148,
256
257 // This value is only used as a return value for getTokenHelper,
258 // which is only called from getToken and exists to avoid an excessive
259 // recursion problem if a number of lines in a row are comments.
260 RETRY_TOKEN = 65535;
261
262 // end enum
263
264
265 public static String tokenToName(int token) {
266 if (Context.printTrees || Context.printICode) {
267 switch (token) {
268 case ERROR: return "error";
269 case EOF: return "eof";
270 case EOL: return "eol";
271 case POPV: return "popv";
272 case ENTERWITH: return "enterwith";
273 case LEAVEWITH: return "leavewith";
274 case RETURN: return "return";
275 case GOTO: return "goto";
276 case IFEQ: return "ifeq";
277 case IFNE: return "ifne";
278 case DUP: return "dup";
279 case SETNAME: return "setname";
280 case BITOR: return "bitor";
281 case BITXOR: return "bitxor";
282 case BITAND: return "bitand";
283 case EQ: return "eq";
284 case NE: return "ne";
285 case LT: return "lt";
286 case LE: return "le";
287 case GT: return "gt";
288 case GE: return "ge";
289 case LSH: return "lsh";
290 case RSH: return "rsh";
291 case URSH: return "ursh";
292 case ADD: return "add";
293 case SUB: return "sub";
294 case MUL: return "mul";
295 case DIV: return "div";
296 case MOD: return "mod";
297 case BITNOT: return "bitnot";
298 case NEG: return "neg";
299 case NEW: return "new";
300 case DELPROP: return "delprop";
301 case TYPEOF: return "typeof";
302 case NAMEINC: return "nameinc";
303 case PROPINC: return "propinc";
304 case ELEMINC: return "eleminc";
305 case NAMEDEC: return "namedec";
306 case PROPDEC: return "propdec";
307 case ELEMDEC: return "elemdec";
308 case GETPROP: return "getprop";
309 case SETPROP: return "setprop";
310 case GETELEM: return "getelem";
311 case SETELEM: return "setelem";
312 case CALL: return "call";
313 case NAME: return "name";
314 case NUMBER_INT: return "integer";
315 case NUMBER: return "double";
316 case STRING: return "string";
317 case ZERO: return "zero";
318 case ONE: return "one";
319 case NULL: return "null";
320 case THIS: return "this";
321 case FALSE: return "false";
322 case TRUE: return "true";
323 case SHEQ: return "sheq";
324 case SHNE: return "shne";
325 case CLOSURE: return "closure";
326 case REGEXP: return "object";
327 case POP: return "pop";
328 case POS: return "pos";
329 case VARINC: return "varinc";
330 case VARDEC: return "vardec";
331 case BINDNAME: return "bindname";
332 case THROW: return "throw";
333 case IN: return "in";
334 case INSTANCEOF: return "instanceof";
335 case GOSUB: return "gosub";
336 case RETSUB: return "retsub";
337 case CALLSPECIAL: return "callspecial";
338 case GETTHIS: return "getthis";
339 case NEWTEMP: return "newtemp";
340 case USETEMP: return "usetemp";
341 case GETBASE: return "getbase";
342 case GETVAR: return "getvar";
343 case SETVAR: return "setvar";
344 case UNDEFINED: return "undefined";
345 case TRY: return "try";
346 case ENDTRY: return "endtry";
347 case NEWSCOPE: return "newscope";
348 case TYPEOFNAME: return "typeofname";
349 case ENUMINIT: return "enuminit";
350 case ENUMNEXT: return "enumnext";
351 case GETPROTO: return "getproto";
352 case GETPARENT: return "getparent";
353 case SETPROTO: return "setproto";
354 case SETPARENT: return "setparent";
355 case SCOPE: return "scope";
356 case GETSCOPEPARENT: return "getscopeparent";
357 case THISFN: return "thisfn";
358 case JTHROW: return "jthrow";
359 case SEMI: return "semi";
360 case LB: return "lb";
361 case RB: return "rb";
362 case LC: return "lc";
363 case RC: return "rc";
364 case LP: return "lp";
365 case GWT: return "gwt";
366 case COMMA: return "comma";
367 case ASSIGN: return "assign";
368 case HOOK: return "hook";
369 case COLON: return "colon";
370 case OR: return "or";
371 case AND: return "and";
372 case EQOP: return "eqop";
373 case RELOP: return "relop";
374 case SHOP: return "shop";
375 case UNARYOP: return "unaryop";
376 case INC: return "inc";
377 case DEC: return "dec";
378 case DOT: return "dot";
379 case PRIMARY: return "primary";
380 case FUNCTION: return "function";
381 case EXPORT: return "export";
382 case IMPORT: return "import";
383 case IF: return "if";
384 case ELSE: return "else";
385 case SWITCH: return "switch";
386 case CASE: return "case";
387 case DEFAULT: return "default";
388 case WHILE: return "while";
389 case DO: return "do";
390 case FOR: return "for";
391 case BREAK: return "break";
392 case CONTINUE: return "continue";
393 case VAR: return "var";
394 case WITH: return "with";
395 case CATCH: return "catch";
396 case FINALLY: return "finally";
397 case NOP: return "nop";
398 case NOT: return "not";
399 case PRE: return "pre";
400 case POST: return "post";
401 case VOID: return "void";
402 case BLOCK: return "block";
403 case ARRAYLIT: return "arraylit";
404 case OBJLIT: return "objlit";
405 case LABEL: return "label";
406 case TARGET: return "target";
407 case LOOP: return "loop";
408 case ENUMDONE: return "enumdone";
409 case EXPRSTMT: return "exprstmt";
410 case PARENT: return "parent";
411 case CONVERT: return "convert";
412 case JSR: return "jsr";
413 case NEWLOCAL: return "newlocal";
414 case USELOCAL: return "uselocal";
415 case SCRIPT: return "script";
416 }
417 return "<unknown="+token+">";
418 }
419 return "";
420 }
421
422 /* This function uses the cached op, string and number fields in
423 * TokenStream; if getToken has been called since the passed token
424 * was scanned, the op or string printed may be incorrect.
425 */
426 public String tokenToString(int token) {
427 if (Context.printTrees) {
428 String name = tokenToName(token);
429
430 switch (token) {
431 case UNARYOP:
432 case ASSIGN:
433 case PRIMARY:
434 case EQOP:
435 case SHOP:
436 case RELOP:
437 return name + " " + tokenToName(this.op);
438
439 case STRING:
440 case REGEXP:
441 case NAME:
442 return name + " `" + this.string + "'";
443
444 case NUMBER_INT:
445 return "NUMBER_INT " + (int) this.number;
446 case NUMBER:
447 return "NUMBER " + this.number;
448 }
449
450 return name;
451 }
452 return "";
453 }
454
455 static {
456 KEYWORDS.put("break", BREAK);
457 KEYWORDS.put("case", CASE);
458 KEYWORDS.put("continue", CONTINUE);
459 KEYWORDS.put("default", DEFAULT);
460 KEYWORDS.put("delete", DELPROP);
461 KEYWORDS.put("do", DO);
462 KEYWORDS.put("else", ELSE);
463 KEYWORDS.put("export", EXPORT);
464 KEYWORDS.put("false", PRIMARY | (FALSE << 8));
465 KEYWORDS.put("for", FOR);
466 KEYWORDS.put("function", FUNCTION);
467 KEYWORDS.put("if", IF);
468 KEYWORDS.put("in", RELOP | (IN << 8));
469 KEYWORDS.put("new", NEW);
470 KEYWORDS.put("null", PRIMARY | (NULL << 8));
471 KEYWORDS.put("return", RETURN);
472 KEYWORDS.put("switch", SWITCH);
473 KEYWORDS.put("this", PRIMARY | (THIS << 8));
474 KEYWORDS.put("true", PRIMARY | (TRUE << 8));
475 KEYWORDS.put("typeof", UNARYOP | (TYPEOF << 8));
476 KEYWORDS.put("var", VAR);
477 KEYWORDS.put("void", UNARYOP | (VOID << 8));
478 KEYWORDS.put("while", WHILE);
479 KEYWORDS.put("with", WITH);
480 KEYWORDS.put("catch", CATCH);
481 KEYWORDS.put("debugger", DEBUGGER);
482 KEYWORDS.put("finally", FINALLY);
483 KEYWORDS.put("import", IMPORT);
484 KEYWORDS.put("instanceof", RELOP | (INSTANCEOF << 8));
485 KEYWORDS.put("throw", THROW);
486 KEYWORDS.put("try", TRY);
487 }
488
489 private int stringToKeyword(String name) {
490 Integer id = KEYWORDS.get(name);
491 if (id == null) return EOF;
492
493 this.op = id >> 8;
494 return id & 0xff;
495 }
496
497 public TokenStream(Reader in,
498 String sourceName, int lineno)
499 {
500 this.in = new LineBuffer(in, lineno);
501 this.pushbackToken = EOF;
502 this.sourceName = sourceName;
503 flags = 0;
504 secondToLastPosition = new CodePosition(lineno, 0);
505 lastPosition = new CodePosition(lineno, 0);
506 }
507
508 /* return and pop the token from the stream if it matches...
509 * otherwise return null
510 */
511 public boolean matchToken(int toMatch) throws IOException {
512 int token = getToken();
513 if (token == toMatch)
514 return true;
515
516 // didn't match, push back token
517 tokenno--;
518 this.pushbackToken = token;
519 return false;
520 }
521
522 public void ungetToken(int tt) {
523 if (this.pushbackToken != EOF && tt != ERROR) {
524 String message = Context.getMessage2("msg.token.replaces.pushback",
525 tokenToString(tt), tokenToString(this.pushbackToken));
526 throw new RuntimeException(message);
527 }
528 this.pushbackToken = tt;
529 tokenno--;
530 }
531
532 public int peekToken() throws IOException {
533 int result = getToken();
534
535 this.pushbackToken = result;
536 tokenno--;
537 return result;
538 }
539
540 public int peekTokenSameLine() throws IOException {
541 int result;
542
543 flags |= TSF_NEWLINES; // SCAN_NEWLINES from jsscan.h
544 result = peekToken();
545 flags &= ~TSF_NEWLINES; // HIDE_NEWLINES from jsscan.h
546 if (this.pushbackToken == EOL)
547 this.pushbackToken = EOF;
548 return result;
549 }
550
551 private static boolean isAlpha(int c) {
552 return ((c >= 'a' && c <= 'z')
553 || (c >= 'A' && c <= 'Z'));
554 }
555
556 static boolean isDigit(int c) {
557 return (c >= '0' && c <= '9');
558 }
559
560 static int xDigitToInt(int c) {
561 if ('0' <= c && c <= '9') { return c - '0'; }
562 if ('a' <= c && c <= 'f') { return c - ('a' - 10); }
563 if ('A' <= c && c <= 'F') { return c - ('A' - 10); }
564 return -1;
565 }
566
567 /* As defined in ECMA. jsscan.c uses C isspace() (which allows
568 * \v, I think.) note that code in in.read() implicitly accepts
569 * '\r' == \u000D as well.
570 */
571 public static boolean isJSSpace(int c) {
572 return (c == '\u0020' || c == '\u0009'
573 || c == '\u000C' || c == '\u000B'
574 || c == '\u00A0'
575 || Character.getType((char)c) == Character.SPACE_SEPARATOR);
576 }
577
578 private void skipLine() throws IOException {
579 // skip to end of line
580 int c;
581 while ((c = in.read()) != EOF_CHAR && c != '\n') { }
582 in.unread();
583 }
584
585 public int getToken() throws IOException {
586 int c;
587 do {
588 c = getTokenHelper();
589 } while (c == RETRY_TOKEN);
590
591 updatePosition();
592 return c;
593 }
594
595 private int getTokenHelper() throws IOException {
596 int c;
597 tokenno++;
598
599 // Check for pushed-back token
600 if (this.pushbackToken != EOF) {
601 int result = this.pushbackToken;
602 this.pushbackToken = EOF;
603 return result;
604 }
605
606 // Eat whitespace, possibly sensitive to newlines.
607 do {
608 c = in.read();
609 if (c == '\n') {
610 flags &= ~TSF_DIRTYLINE;
611 if ((flags & TSF_NEWLINES) != 0)
612 break;
613 }
614 } while (isJSSpace(c) || c == '\n');
615
616 if (c == EOF_CHAR)
617 return EOF;
618 if (c != '-' && c != '\n')
619 flags |= TSF_DIRTYLINE;
620
621 // identifier/keyword/instanceof?
622 // watch out for starting with a <backslash>
623 boolean identifierStart;
624 boolean isUnicodeEscapeStart = false;
625 if (c == '\\') {
626 c = in.read();
627 if (c == 'u') {
628 identifierStart = true;
629 isUnicodeEscapeStart = true;
630 stringBufferTop = 0;
631 } else {
632 identifierStart = false;
633 c = '\\';
634 in.unread();
635 }
636 } else {
637 identifierStart = Character.isJavaIdentifierStart((char)c);
638 if (identifierStart) {
639 stringBufferTop = 0;
640 addToString(c);
641 }
642
643 // bruce: special handling of JSNI signatures
644 // - it would be nice to handle Unicode escapes in the future
645 //
646 if (c == '@') {
647 stringBufferTop = 0;
648 addToString(c);
649 return jsniMatchReference();
650 }
651 }
652
653 if (identifierStart) {
654 boolean containsEscape = isUnicodeEscapeStart;
655 for (;;) {
656 if (isUnicodeEscapeStart) {
657 // strictly speaking we should probably push-back
658 // all the bad characters if the <backslash>uXXXX
659 // sequence is malformed. But since there isn't a
660 // correct context(is there?) for a bad Unicode
661 // escape sequence in an identifier, we can report
662 // an error here.
663 int escapeVal = 0;
664 for (int i = 0; i != 4; ++i) {
665 c = in.read();
666 escapeVal = (escapeVal << 4) | xDigitToInt(c);
667 // Next check takes care about c < 0 and bad escape
668 if (escapeVal < 0) { break; }
669 }
670 if (escapeVal < 0) {
671 reportTokenError("msg.invalid.escape", null);
672 return ERROR;
673 }
674 addToString(escapeVal);
675 isUnicodeEscapeStart = false;
676 } else {
677 c = in.read();
678 if (c == '\\') {
679 c = in.read();
680 if (c == 'u') {
681 isUnicodeEscapeStart = true;
682 containsEscape = true;
683 } else {
684 reportTokenError("msg.illegal.character", null);
685 return ERROR;
686 }
687 } else {
688 if (!Character.isJavaIdentifierPart((char)c)) {
689 break;
690 }
691 addToString(c);
692 }
693 }
694 }
695 in.unread();
696
697 String str = getStringFromBuffer();
698 if (!containsEscape) {
699 // OPT we shouldn't have to make a string (object!) to
700 // check if it's a keyword.
701
702 // Return the corresponding token if it's a keyword
703 int result = stringToKeyword(str);
704 if (result != EOF) {
705 return result;
706 }
707 }
708 this.string = str;
709 return NAME;
710 }
711
712 // is it a number?
713 if (isDigit(c) || (c == '.' && isDigit(in.peek()))) {
714
715 stringBufferTop = 0;
716 int base = 10;
717
718 if (c == '0') {
719 c = in.read();
720 if (c == 'x' || c == 'X') {
721 base = 16;
722 c = in.read();
723 } else if (isDigit(c)) {
724 base = 8;
725 } else {
726 addToString('0');
727 }
728 }
729
730 if (base == 16) {
731 while (0 <= xDigitToInt(c)) {
732 addToString(c);
733 c = in.read();
734 }
735 } else {
736 while ('0' <= c && c <= '9') {
737 /*
738 * We permit 08 and 09 as decimal numbers, which
739 * makes our behavior a superset of the ECMA
740 * numeric grammar. We might not always be so
741 * permissive, so we warn about it.
742 */
743 if (base == 8 && c >= '8') {
744 Object[] errArgs = { c == '8' ? "8" : "9" };
745 reportTokenWarning("msg.bad.octal.literal", errArgs);
746 base = 10;
747 }
748 addToString(c);
749 c = in.read();
750 }
751 }
752
753 boolean isInteger = true;
754
755 if (base == 10 && (c == '.' || c == 'e' || c == 'E')) {
756 isInteger = false;
757 if (c == '.') {
758 do {
759 addToString(c);
760 c = in.read();
761 } while (isDigit(c));
762 }
763 if (c == 'e' || c == 'E') {
764 addToString(c);
765 c = in.read();
766 if (c == '+' || c == '-') {
767 addToString(c);
768 c = in.read();
769 }
770 if (!isDigit(c)) {
771 reportTokenError("msg.missing.exponent", null);
772 return ERROR;
773 }
774 do {
775 addToString(c);
776 c = in.read();
777 } while (isDigit(c));
778 }
779 }
780 in.unread();
781 String numString = getStringFromBuffer();
782
783 double dval;
784 if (base == 10 && !isInteger) {
785 try {
786 // Use Java conversion to number from string...
787 dval = (Double.valueOf(numString)).doubleValue();
788 }
789 catch (NumberFormatException ex) {
790 Object[] errArgs = { ex.getMessage() };
791 reportTokenError("msg.caught.nfe", errArgs);
792 return ERROR;
793 }
794 } else {
795 dval = ScriptRuntime.stringToNumber(numString, 0, base);
796 }
797
798 this.number = dval;
799
800 if (isInteger) {
801 return NUMBER_INT;
802 }
803
804 return NUMBER;
805 }
806
807 // is it a string?
808 if (c == '"' || c == '\'') {
809 // We attempt to accumulate a string the fast way, by
810 // building it directly out of the reader. But if there
811 // are any escaped characters in the string, we revert to
812 // building it out of a StringBuffer.
813
814 int quoteChar = c;
815 int val = 0;
816 stringBufferTop = 0;
817
818 c = in.read();
819 strLoop: while (c != quoteChar) {
820 if (c == '\n' || c == EOF_CHAR) {
821 in.unread();
822 reportTokenError("msg.unterminated.string.lit", null);
823 return ERROR;
824 }
825
826 if (c == '\\') {
827 // We've hit an escaped character
828
829 c = in.read();
830 switch (c) {
831 case 'b': c = '\b'; break;
832 case 'f': c = '\f'; break;
833 case 'n': c = '\n'; break;
834 case 'r': c = '\r'; break;
835 case 't': c = '\t'; break;
836
837 // \v a late addition to the ECMA spec,
838 // it is not in Java, so use 0xb
839 case 'v': c = 0xb; break;
840
841 case 'u': {
842 /*
843 * Get 4 hex digits; if the u escape is not
844 * followed by 4 hex digits, use 'u' + the literal
845 * character sequence that follows.
846 */
847 int escapeStart = stringBufferTop;
848 addToString('u');
849 int escapeVal = 0;
850 for (int i = 0; i != 4; ++i) {
851 c = in.read();
852 escapeVal = (escapeVal << 4) | xDigitToInt(c);
853 if (escapeVal < 0) {
854 continue strLoop;
855 }
856 addToString(c);
857 }
858 // prepare for replace of stored 'u' sequence
859 // by escape value
860 stringBufferTop = escapeStart;
861 c = escapeVal;
862 } break;
863
864 case 'x': {
865 /* Get 2 hex digits, defaulting to 'x' + literal
866 * sequence, as above.
867 */
868 c = in.read();
869 int escapeVal = xDigitToInt(c);
870 if (escapeVal < 0) {
871 addToString('x');
872 continue strLoop;
873 } else {
874 int c1 = c;
875 c = in.read();
876 escapeVal = (escapeVal << 4) | xDigitToInt(c);
877 if (escapeVal < 0) {
878 addToString('x');
879 addToString(c1);
880 continue strLoop;
881 } else {
882 // got 2 hex digits
883 c = escapeVal;
884 }
885 }
886 } break;
887
888 case '\n':
889 // Remove line terminator
890 c = in.read();
891 continue strLoop;
892
893 default: if ('0' <= c && c < '8') {
894 val = c - '0';
895 c = in.read();
896 if ('0' <= c && c < '8') {
897 val = 8 * val + c - '0';
898 c = in.read();
899 if ('0' <= c && c < '8' && val <= 037) {
900 // c is 3rd char of octal sequence only if
901 // the resulting val <= 0377
902 val = 8 * val + c - '0';
903 c = in.read();
904 }
905 }
906 in.unread();
907 c = val;
908 }
909 }
910 }
911 addToString(c);
912 c = in.read();
913 }
914
915 this.string = getStringFromBuffer();
916 return STRING;
917 }
918
919 switch (c)
920 {
921 case '\n': return EOL;
922 case ';': return SEMI;
923 case '[': return LB;
924 case ']': return RB;
925 case '{': return LC;
926 case '}': return RC;
927 case '(': return LP;
928 case ')': return GWT;
929 case ',': return COMMA;
930 case '?': return HOOK;
931 case ':': return COLON;
932 case '.': return DOT;
933
934 case '|':
935 if (in.match('|')) {
936 return OR;
937 } else if (in.match('=')) {
938 this.op = BITOR;
939 return ASSIGN;
940 } else {
941 return BITOR;
942 }
943
944 case '^':
945 if (in.match('=')) {
946 this.op = BITXOR;
947 return ASSIGN;
948 } else {
949 return BITXOR;
950 }
951
952 case '&':
953 if (in.match('&')) {
954 return AND;
955 } else if (in.match('=')) {
956 this.op = BITAND;
957 return ASSIGN;
958 } else {
959 return BITAND;
960 }
961
962 case '=':
963 if (in.match('=')) {
964 if (in.match('='))
965 this.op = SHEQ;
966 else
967 this.op = EQ;
968 return EQOP;
969 } else {
970 this.op = NOP;
971 return ASSIGN;
972 }
973
974 case '!':
975 if (in.match('=')) {
976 if (in.match('='))
977 this.op = SHNE;
978 else
979 this.op = NE;
980 return EQOP;
981 } else {
982 this.op = NOT;
983 return UNARYOP;
984 }
985
986 case '<':
987 /* NB:treat HTML begin-comment as comment-till-eol */
988 if (in.match('!')) {
989 if (in.match('-')) {
990 if (in.match('-')) {
991 skipLine();
992 return RETRY_TOKEN; // in place of 'goto retry'
993 }
994 in.unread();
995 }
996 in.unread();
997 }
998 if (in.match('<')) {
999 if (in.match('=')) {
1000 this.op = LSH;
1001 return ASSIGN;
1002 } else {
1003 this.op = LSH;
1004 return SHOP;
1005 }
1006 } else {
1007 if (in.match('=')) {
1008 this.op = LE;
1009 return RELOP;
1010 } else {
1011 this.op = LT;
1012 return RELOP;
1013 }
1014 }
1015
1016 case '>':
1017 if (in.match('>')) {
1018 if (in.match('>')) {
1019 if (in.match('=')) {
1020 this.op = URSH;
1021 return ASSIGN;
1022 } else {
1023 this.op = URSH;
1024 return SHOP;
1025 }
1026 } else {
1027 if (in.match('=')) {
1028 this.op = RSH;
1029 return ASSIGN;
1030 } else {
1031 this.op = RSH;
1032 return SHOP;
1033 }
1034 }
1035 } else {
1036 if (in.match('=')) {
1037 this.op = GE;
1038 return RELOP;
1039 } else {
1040 this.op = GT;
1041 return RELOP;
1042 }
1043 }
1044
1045 case '*':
1046 if (in.match('=')) {
1047 this.op = MUL;
1048 return ASSIGN;
1049 } else {
1050 return MUL;
1051 }
1052
1053 case '/':
1054 // is it a // comment?
1055 if (in.match('/')) {
1056 skipLine();
1057 return RETRY_TOKEN;
1058 }
1059 if (in.match('*')) {
1060 while ((c = in.read()) != -1 &&
1061 !(c == '*' && in.match('/'))) {
1062 ; // empty loop body
1063 }
1064 if (c == EOF_CHAR) {
1065 reportTokenError("msg.unterminated.comment", null);
1066 return ERROR;
1067 }
1068 return RETRY_TOKEN; // `goto retry'
1069 }
1070
1071 // is it a regexp?
1072 if ((flags & TSF_REGEXP) != 0) {
1073 stringBufferTop = 0;
1074 while ((c = in.read()) != '/') {
1075 if (c == '\n' || c == EOF_CHAR) {
1076 in.unread();
1077 reportTokenError("msg.unterminated.re.lit", null);
1078 return ERROR;
1079 }
1080 if (c == '\\') {
1081 addToString(c);
1082 c = in.read();
1083 }
1084
1085 addToString(c);
1086 }
1087 int reEnd = stringBufferTop;
1088
1089 while (true) {
1090 if (in.match('g'))
1091 addToString('g');
1092 else if (in.match('i'))
1093 addToString('i');
1094 else if (in.match('m'))
1095 addToString('m');
1096 else
1097 break;
1098 }
1099
1100 if (isAlpha(in.peek())) {
1101 reportTokenError("msg.invalid.re.flag", null);
1102 return ERROR;
1103 }
1104
1105 this.string = new String(stringBuffer, 0, reEnd);
1106 this.regExpFlags = new String(stringBuffer, reEnd,
1107 stringBufferTop - reEnd);
1108 return REGEXP;
1109 }
1110
1111
1112 if (in.match('=')) {
1113 this.op = DIV;
1114 return ASSIGN;
1115 } else {
1116 return DIV;
1117 }
1118
1119 case '%':
1120 this.op = MOD;
1121 if (in.match('=')) {
1122 return ASSIGN;
1123 } else {
1124 return MOD;
1125 }
1126
1127 case '~':
1128 this.op = BITNOT;
1129 return UNARYOP;
1130
1131 case '+':
1132 if (in.match('=')) {
1133 this.op = ADD;
1134 return ASSIGN;
1135 } else if (in.match('+')) {
1136 return INC;
1137 } else {
1138 return ADD;
1139 }
1140
1141 case '-':
1142 if (in.match('=')) {
1143 this.op = SUB;
1144 c = ASSIGN;
1145 } else if (in.match('-')) {
1146 if (0 == (flags & TSF_DIRTYLINE)) {
1147 // treat HTML end-comment after possible whitespace
1148 // after line start as comment-utill-eol
1149 if (in.match('>')) {
1150 skipLine();
1151 return RETRY_TOKEN;
1152 }
1153 }
1154 c = DEC;
1155 } else {
1156 c = SUB;
1157 }
1158 flags |= TSF_DIRTYLINE;
1159 return c;
1160
1161 default:
1162 reportTokenError("msg.illegal.character", null);
1163 return ERROR;
1164 }
1165 }
1166
1167 private void skipWhitespace() throws IOException {
1168 int tmp;
1169 do {
1170 tmp = in.read();
1171 } while (isJSSpace(tmp) || tmp == '\n');
1172 // Reposition back to first non whitespace char.
1173 in.unread();
1174 }
1175
1176 private int jsniMatchReference() throws IOException {
1177
1178 // First, read the type name whose member is being accessed.
1179 if (!jsniMatchQualifiedTypeName('.', ':')) {
1180 return ERROR;
1181 }
1182
1183 // Now we must the second colon.
1184 //
1185 int c = in.read();
1186 if (c != ':') {
1187 in.unread();
1188 reportTokenError("msg.jsni.expected.char", new String[] {":"});
1189 return ERROR;
1190 }
1191 addToString(c);
1192
1193 // Skip whitespace starting after ::.
1194 skipWhitespace();
1195
1196 // Finish by reading the field or method signature.
1197 if (!jsniMatchMethodSignatureOrFieldName()) {
1198 return ERROR;
1199 }
1200
1201 this.string = new String(stringBuffer, 0, stringBufferTop);
1202 return NAME;
1203 }
1204
1205 private boolean jsniMatchParamListSignature() throws IOException {
1206 // Assume the opening '(' has already been read.
1207 // Read param type signatures until we see a closing ')'.
1208
1209 skipWhitespace();
1210
1211 // First check for the special case of * as the parameter list, indicating
1212 // a wildcard
1213 if (in.peek() == '*') {
1214 addToString(in.read());
1215 if (in.peek() != ')') {
1216 reportTokenError("msg.jsni.expected.char", new String[] {")"});
1217 }
1218 addToString(in.read());
1219 return true;
1220 }
1221
1222 // Otherwise, loop through reading one param type at a time
1223 do {
1224 // Skip whitespace between parameters.
1225 skipWhitespace();
1226
1227 int c = in.read();
1228
1229 if (c == ')') {
1230 // Finished successfully.
1231 //
1232 addToString(c);
1233 return true;
1234 }
1235
1236 in.unread();
1237 } while (jsniMatchParamTypeSignature());
1238
1239 // If we made it here, we can assume that there was an invalid type
1240 // signature that was already reported and that the offending char
1241 // was already unread.
1242 //
1243 return false;
1244 }
1245
1246 private boolean jsniMatchParamTypeSignature() throws IOException {
1247 int c = in.read();
1248 switch (c) {
1249 case 'Z':
1250 case 'B':
1251 case 'C':
1252 case 'S':
1253 case 'I':
1254 case 'J':
1255 case 'F':
1256 case 'D':
1257 // Primitive type id.
1258 addToString(c);
1259 return true;
1260 case 'L':
1261 // Class/Interface type prefix.
1262 addToString(c);
1263 return jsniMatchQualifiedTypeName('/', ';');
1264 case '[':
1265 // Array type prefix.
1266 addToString(c);
1267 return jsniMatchParamArrayTypeSignature();
1268 default:
1269 in.unread();
1270 reportTokenError("msg.jsni.expected.param.type", null);
1271 return false;
1272 }
1273 }
1274
1275 private boolean jsniMatchParamArrayTypeSignature() throws IOException {
1276 // Assume the leading '[' has already been read.
1277 // What follows must be another param type signature.
1278 //
1279 return jsniMatchParamTypeSignature();
1280 }
1281
1282 private boolean jsniMatchMethodSignatureOrFieldName() throws IOException {
1283 int c = in.read();
1284
1285
1286 // We must see an ident start here.
1287 //
1288 if (!Character.isJavaIdentifierStart((char)c)) {
1289 in.unread();
1290 reportTokenError("msg.jsni.expected.identifier", null);
1291 return false;
1292 }
1293
1294 addToString(c);
1295
1296 for (;;) {
1297 c = in.read();
1298 if (Character.isJavaIdentifierPart((char)c)) {
1299 addToString(c);
1300 }
1301 else if (c == '(') {
1302 // This means we're starting a JSNI method signature.
1303 //
1304 addToString(c);
1305 if (jsniMatchParamListSignature()) {
1306 // Finished a method signature with success.
1307 // Assume the callee unread the last char.
1308 //
1309 return true;
1310 }
1311 else {
1312 // Assume the callee reported the error and unread the last char.
1313 //
1314 return false;
1315 }
1316 }
1317 else {
1318 // We don't know this char, so it finishes the token.
1319 //
1320 in.unread();
1321 return true;
1322 }
1323 }
1324 }
1325
1326 /**
1327 * This method is called to match the fully-qualified type name that
1328 * should appear after the '@' in a JSNI reference.
1329 * @param sepChar the character that will separate the Java idents
1330 * (either a '.' or '/')
1331 * @param endChar the character that indicates the end of the
1332 */
1333 private boolean jsniMatchQualifiedTypeName(char sepChar, char endChar)
1334 throws IOException {
1335 int c = in.read();
1336
1337 // Whether nested or not, we must see an ident start here.
1338 //
1339 if (!Character.isJavaIdentifierStart((char)c)) {
1340 in.unread();
1341 reportTokenError("msg.jsni.expected.identifier", null);
1342 return false;
1343 }
1344
1345 // Now actually add the first ident char.
1346 //
1347 addToString(c);
1348
1349 // And append any other ident chars.
1350 //
1351 for (;;) {
1352 c = in.read();
1353 if (Character.isJavaIdentifierPart((char)c)) {
1354 addToString(c);
1355 }
1356 else {
1357 break;
1358 }
1359 }
1360
1361 // Arrray-type reference
1362 while (c == '[') {
1363 if (']' == in.peek()) {
1364 addToString('[');
1365 addToString(in.read());
1366 c = in.read();
1367 } else {
1368 break;
1369 }
1370 }
1371
1372 // We have a non-ident char to classify.
1373 //
1374 if (c == sepChar) {
1375 addToString(c);
1376 if (jsniMatchQualifiedTypeName(sepChar, endChar)) {
1377 // We consumed up to the endChar, so we finished with total success.
1378 //
1379 return true;
1380 } else {
1381 // Assume that the nested call reported the syntax error and
1382 // unread the last character.
1383 //
1384 return false;
1385 }
1386 } else if (c == endChar) {
1387 // Matched everything up to the specified end char.
1388 //
1389 addToString(c);
1390 return true;
1391 } else {
1392 // This is an unknown char that finishes the token.
1393 //
1394 in.unread();
1395 return true;
1396 }
1397 }
1398
1399 private String getStringFromBuffer() {
1400 return new String(stringBuffer, 0, stringBufferTop);
1401 }
1402
1403 private void addToString(int c) {
1404 if (stringBufferTop == stringBuffer.length) {
1405 char[] tmp = new char[stringBuffer.length * 2];
1406 System.arraycopy(stringBuffer, 0, tmp, 0, stringBufferTop);
1407 stringBuffer = tmp;
1408 }
1409 stringBuffer[stringBufferTop++] = (char)c;
1410 }
1411
1412 /**
1413 * Positions hold offset of an corresponding token's end.
1414 * So lastPosition holds an offset of char that is next to last token.
1415 *
1416 * Use secondToLastPosition for error reporting outside of TokenStream, because
1417 * usually we want to report beginning of erroneous token,
1418 * which is end of second to last read token.
1419 */
1420 public void reportSyntaxError(String messageProperty, Object[] args) {
1421 String message = Context.getMessage(messageProperty, args);
1422 Context.reportError(message, secondToLastPosition, lastPosition);
1423 }
1424
1425 /**
1426 * Token errors are reported before tokes is read,
1427 * so use lastPosition for reporting.
1428 * @see #reportSyntaxError
1429 */
1430 private void reportTokenError(String messageProperty, Object[] args) {
1431 String message = Context.getMessage(messageProperty, args);
1432 Context.reportError(message, lastPosition, new CodePosition(getLineno(), getOffset()));
1433 }
1434
1435 private void reportTokenWarning(String messageProperty, Object[] args) {
1436 String message = Context.getMessage(messageProperty, args);
1437 Context.reportWarning(message, lastPosition, new CodePosition(getLineno(), getOffset()));
1438 }
1439
1440 /**
1441 * Updates last two known positions (for error reporting).
1442 */
1443 private void updatePosition() {
1444 CodePosition currentPosition = new CodePosition(getLineno(), getOffset());
1445 if (currentPosition.compareTo(lastPosition) > 0) {
1446 secondToLastPosition = lastPosition;
1447 lastPosition = currentPosition;
1448 }
1449 }
1450
1451 public String getSourceName() { return sourceName; }
1452 public int getLineno() { return in.getLineno(); }
1453 public int getOp() { return op; }
1454 public String getString() { return string; }
1455 public double getNumber() { return number; }
1456 public String getLine() { return in.getLine(); }
1457 public int getOffset() { return in.getOffset(); }
1458 public int getTokenno() { return tokenno; }
1459 public boolean eof() { return in.eof(); }
1460
1461 // instance variables
1462 private LineBuffer in;
1463
1464
1465 /* for TSF_REGEXP, etc.
1466 * should this be manipulated by gettor/settor functions?
1467 * should it be passed to getToken();
1468 */
1469 int flags;
1470 String regExpFlags;
1471
1472 private String sourceName;
1473 private int pushbackToken;
1474 private int tokenno;
1475
1476 CodePosition secondToLastPosition;
1477 CodePosition lastPosition;
1478
1479 private int op;
1480
1481 // Set this to an inital non-null value so that the Parser has
1482 // something to retrieve even if an error has occured and no
1483 // string is found. Fosters one class of error, but saves lots of
1484 // code.
1485 private String string = "";
1486 private double number;
1487
1488 private char[] stringBuffer = new char[128];
1489 private int stringBufferTop;
1490 }