001 /* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- 002 * 003 * The contents of this file are subject to the Netscape Public 004 * License Version 1.1 (the "License"); you may not use this file 005 * except in compliance with the License. You may obtain a copy of 006 * the License at http://www.mozilla.org/NPL/ 007 * 008 * Software distributed under the License is distributed on an "AS 009 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or 010 * implied. See the License for the specific language governing 011 * rights and limitations under the License. 012 * 013 * The Original Code is Rhino code, released 014 * May 6, 1999. 015 * 016 * The Initial Developer of the Original Code is Netscape 017 * Communications Corporation. Portions created by Netscape are 018 * Copyright (C) 1997-1999 Netscape Communications Corporation. All 019 * Rights Reserved. 020 * 021 * Contributor(s): 022 * Roger Lawrence 023 * Mike McCabe 024 * 025 * Alternatively, the contents of this file may be used under the 026 * terms of the GNU Public License (the "GPL"), in which case the 027 * provisions of the GPL are applicable instead of those above. 028 * If you wish to allow use of your version of this file only 029 * under the terms of the GPL and not to allow others to use your 030 * version of this file under the NPL, indicate your decision by 031 * deleting the provisions above and replace them with the notice 032 * and other provisions required by the GPL. If you do not delete 033 * the provisions above, a recipient may use your version of this 034 * file under either the NPL or the GPL. 035 */ 036 // Modified by Google 037 038 package com.google.gwt.dev.js.rhino; 039 040 import java.io.*; 041 import java.util.HashMap; 042 import java.util.Map; 043 044 /** 045 * This class implements the JavaScript scanner. 046 * 047 * It is based on the C source files jsscan.c and jsscan.h 048 * in the jsref package. 049 */ 050 051 public class TokenStream { 052 053 private static final Map<String, Integer> KEYWORDS = new HashMap<String, Integer>(); 054 055 /* 056 * JSTokenStream flags, mirroring those in jsscan.h. These are used 057 * by the parser to change/check the state of the scanner. 058 */ 059 060 final static int 061 TSF_NEWLINES = 1 << 0, // tokenize newlines 062 TSF_FUNCTION = 1 << 1, // scanning inside function body 063 TSF_RETURN_EXPR = 1 << 2, // function has 'return expr;' 064 TSF_RETURN_VOID = 1 << 3, // function has 'return;' 065 TSF_REGEXP = 1 << 4, // looking for a regular expression 066 TSF_DIRTYLINE = 1 << 5; // stuff other than whitespace since 067 // start of line 068 069 /* 070 * For chars - because we need something out-of-range 071 * to check. (And checking EOF by exception is annoying.) 072 * Note distinction from EOF token type! 073 */ 074 private final static int 075 EOF_CHAR = -1; 076 077 /** 078 * Token types. These values correspond to JSTokenType values in 079 * jsscan.c. 080 */ 081 082 public final static int 083 // start enum 084 ERROR = -1, // well-known as the only code < EOF 085 EOF = 0, // end of file token - (not EOF_CHAR) 086 EOL = 1, // end of line 087 // Beginning here are interpreter bytecodes. Their values 088 // must not exceed 127. 089 POPV = 2, 090 ENTERWITH = 3, 091 LEAVEWITH = 4, 092 RETURN = 5, 093 GOTO = 6, 094 IFEQ = 7, 095 IFNE = 8, 096 DUP = 9, 097 SETNAME = 10, 098 BITOR = 11, 099 BITXOR = 12, 100 BITAND = 13, 101 EQ = 14, 102 NE = 15, 103 LT = 16, 104 LE = 17, 105 GT = 18, 106 GE = 19, 107 LSH = 20, 108 RSH = 21, 109 URSH = 22, 110 ADD = 23, 111 SUB = 24, 112 MUL = 25, 113 DIV = 26, 114 MOD = 27, 115 BITNOT = 28, 116 NEG = 29, 117 NEW = 30, 118 DELPROP = 31, 119 TYPEOF = 32, 120 NAMEINC = 33, 121 PROPINC = 34, 122 ELEMINC = 35, 123 NAMEDEC = 36, 124 PROPDEC = 37, 125 ELEMDEC = 38, 126 GETPROP = 39, 127 SETPROP = 40, 128 GETELEM = 41, 129 SETELEM = 42, 130 CALL = 43, 131 NAME = 44, 132 NUMBER = 45, 133 STRING = 46, 134 ZERO = 47, 135 ONE = 48, 136 NULL = 49, 137 THIS = 50, 138 FALSE = 51, 139 TRUE = 52, 140 SHEQ = 53, // shallow equality (===) 141 SHNE = 54, // shallow inequality (!==) 142 CLOSURE = 55, 143 REGEXP = 56, 144 POP = 57, 145 POS = 58, 146 VARINC = 59, 147 VARDEC = 60, 148 BINDNAME = 61, 149 THROW = 62, 150 IN = 63, 151 INSTANCEOF = 64, 152 GOSUB = 65, 153 RETSUB = 66, 154 CALLSPECIAL = 67, 155 GETTHIS = 68, 156 NEWTEMP = 69, 157 USETEMP = 70, 158 GETBASE = 71, 159 GETVAR = 72, 160 SETVAR = 73, 161 UNDEFINED = 74, 162 TRY = 75, 163 ENDTRY = 76, 164 NEWSCOPE = 77, 165 TYPEOFNAME = 78, 166 ENUMINIT = 79, 167 ENUMNEXT = 80, 168 GETPROTO = 81, 169 GETPARENT = 82, 170 SETPROTO = 83, 171 SETPARENT = 84, 172 SCOPE = 85, 173 GETSCOPEPARENT = 86, 174 THISFN = 87, 175 JTHROW = 88, 176 // End of interpreter bytecodes 177 SEMI = 89, // semicolon 178 LB = 90, // left and right brackets 179 RB = 91, 180 LC = 92, // left and right curlies (braces) 181 RC = 93, 182 LP = 94, // left and right parentheses 183 GWT = 95, 184 COMMA = 96, // comma operator 185 ASSIGN = 97, // assignment ops (= += -= etc.) 186 HOOK = 98, // conditional (?:) 187 COLON = 99, 188 OR = 100, // logical or (||) 189 AND = 101, // logical and (&&) 190 EQOP = 102, // equality ops (== !=) 191 RELOP = 103, // relational ops (< <= > >=) 192 SHOP = 104, // shift ops (<< >> >>>) 193 UNARYOP = 105, // unary prefix operator 194 INC = 106, // increment/decrement (++ --) 195 DEC = 107, 196 DOT = 108, // member operator (.) 197 PRIMARY = 109, // true, false, null, this 198 FUNCTION = 110, // function keyword 199 EXPORT = 111, // export keyword 200 IMPORT = 112, // import keyword 201 IF = 113, // if keyword 202 ELSE = 114, // else keyword 203 SWITCH = 115, // switch keyword 204 CASE = 116, // case keyword 205 DEFAULT = 117, // default keyword 206 WHILE = 118, // while keyword 207 DO = 119, // do keyword 208 FOR = 120, // for keyword 209 BREAK = 121, // break keyword 210 CONTINUE = 122, // continue keyword 211 VAR = 123, // var keyword 212 WITH = 124, // with keyword 213 CATCH = 125, // catch keyword 214 FINALLY = 126, // finally keyword 215 216 /** Added by Mike - these are JSOPs in the jsref, but I 217 * don't have them yet in the java implementation... 218 * so they go here. Also whatever I needed. 219 220 * Most of these go in the 'op' field when returning 221 * more general token types, eg. 'DIV' as the op of 'ASSIGN'. 222 */ 223 NOP = 128, // NOP 224 NOT = 129, // etc. 225 PRE = 130, // for INC, DEC nodes. 226 POST = 131, 227 228 /** 229 * For JSOPs associated with keywords... 230 * eg. op = THIS; token = PRIMARY 231 */ 232 233 VOID = 132, 234 235 /* types used for the parse tree - these never get returned 236 * by the scanner. 237 */ 238 BLOCK = 133, // statement block 239 ARRAYLIT = 134, // array literal 240 OBJLIT = 135, // object literal 241 LABEL = 136, // label 242 TARGET = 137, 243 LOOP = 138, 244 ENUMDONE = 139, 245 EXPRSTMT = 140, 246 PARENT = 141, 247 CONVERT = 142, 248 JSR = 143, 249 NEWLOCAL = 144, 250 USELOCAL = 145, 251 DEBUGGER = 146, 252 SCRIPT = 147, // top-level node for entire script 253 254 LAST_TOKEN = 147, 255 NUMBER_INT = 148, 256 257 // This value is only used as a return value for getTokenHelper, 258 // which is only called from getToken and exists to avoid an excessive 259 // recursion problem if a number of lines in a row are comments. 260 RETRY_TOKEN = 65535; 261 262 // end enum 263 264 265 public static String tokenToName(int token) { 266 if (Context.printTrees || Context.printICode) { 267 switch (token) { 268 case ERROR: return "error"; 269 case EOF: return "eof"; 270 case EOL: return "eol"; 271 case POPV: return "popv"; 272 case ENTERWITH: return "enterwith"; 273 case LEAVEWITH: return "leavewith"; 274 case RETURN: return "return"; 275 case GOTO: return "goto"; 276 case IFEQ: return "ifeq"; 277 case IFNE: return "ifne"; 278 case DUP: return "dup"; 279 case SETNAME: return "setname"; 280 case BITOR: return "bitor"; 281 case BITXOR: return "bitxor"; 282 case BITAND: return "bitand"; 283 case EQ: return "eq"; 284 case NE: return "ne"; 285 case LT: return "lt"; 286 case LE: return "le"; 287 case GT: return "gt"; 288 case GE: return "ge"; 289 case LSH: return "lsh"; 290 case RSH: return "rsh"; 291 case URSH: return "ursh"; 292 case ADD: return "add"; 293 case SUB: return "sub"; 294 case MUL: return "mul"; 295 case DIV: return "div"; 296 case MOD: return "mod"; 297 case BITNOT: return "bitnot"; 298 case NEG: return "neg"; 299 case NEW: return "new"; 300 case DELPROP: return "delprop"; 301 case TYPEOF: return "typeof"; 302 case NAMEINC: return "nameinc"; 303 case PROPINC: return "propinc"; 304 case ELEMINC: return "eleminc"; 305 case NAMEDEC: return "namedec"; 306 case PROPDEC: return "propdec"; 307 case ELEMDEC: return "elemdec"; 308 case GETPROP: return "getprop"; 309 case SETPROP: return "setprop"; 310 case GETELEM: return "getelem"; 311 case SETELEM: return "setelem"; 312 case CALL: return "call"; 313 case NAME: return "name"; 314 case NUMBER_INT: return "integer"; 315 case NUMBER: return "double"; 316 case STRING: return "string"; 317 case ZERO: return "zero"; 318 case ONE: return "one"; 319 case NULL: return "null"; 320 case THIS: return "this"; 321 case FALSE: return "false"; 322 case TRUE: return "true"; 323 case SHEQ: return "sheq"; 324 case SHNE: return "shne"; 325 case CLOSURE: return "closure"; 326 case REGEXP: return "object"; 327 case POP: return "pop"; 328 case POS: return "pos"; 329 case VARINC: return "varinc"; 330 case VARDEC: return "vardec"; 331 case BINDNAME: return "bindname"; 332 case THROW: return "throw"; 333 case IN: return "in"; 334 case INSTANCEOF: return "instanceof"; 335 case GOSUB: return "gosub"; 336 case RETSUB: return "retsub"; 337 case CALLSPECIAL: return "callspecial"; 338 case GETTHIS: return "getthis"; 339 case NEWTEMP: return "newtemp"; 340 case USETEMP: return "usetemp"; 341 case GETBASE: return "getbase"; 342 case GETVAR: return "getvar"; 343 case SETVAR: return "setvar"; 344 case UNDEFINED: return "undefined"; 345 case TRY: return "try"; 346 case ENDTRY: return "endtry"; 347 case NEWSCOPE: return "newscope"; 348 case TYPEOFNAME: return "typeofname"; 349 case ENUMINIT: return "enuminit"; 350 case ENUMNEXT: return "enumnext"; 351 case GETPROTO: return "getproto"; 352 case GETPARENT: return "getparent"; 353 case SETPROTO: return "setproto"; 354 case SETPARENT: return "setparent"; 355 case SCOPE: return "scope"; 356 case GETSCOPEPARENT: return "getscopeparent"; 357 case THISFN: return "thisfn"; 358 case JTHROW: return "jthrow"; 359 case SEMI: return "semi"; 360 case LB: return "lb"; 361 case RB: return "rb"; 362 case LC: return "lc"; 363 case RC: return "rc"; 364 case LP: return "lp"; 365 case GWT: return "gwt"; 366 case COMMA: return "comma"; 367 case ASSIGN: return "assign"; 368 case HOOK: return "hook"; 369 case COLON: return "colon"; 370 case OR: return "or"; 371 case AND: return "and"; 372 case EQOP: return "eqop"; 373 case RELOP: return "relop"; 374 case SHOP: return "shop"; 375 case UNARYOP: return "unaryop"; 376 case INC: return "inc"; 377 case DEC: return "dec"; 378 case DOT: return "dot"; 379 case PRIMARY: return "primary"; 380 case FUNCTION: return "function"; 381 case EXPORT: return "export"; 382 case IMPORT: return "import"; 383 case IF: return "if"; 384 case ELSE: return "else"; 385 case SWITCH: return "switch"; 386 case CASE: return "case"; 387 case DEFAULT: return "default"; 388 case WHILE: return "while"; 389 case DO: return "do"; 390 case FOR: return "for"; 391 case BREAK: return "break"; 392 case CONTINUE: return "continue"; 393 case VAR: return "var"; 394 case WITH: return "with"; 395 case CATCH: return "catch"; 396 case FINALLY: return "finally"; 397 case NOP: return "nop"; 398 case NOT: return "not"; 399 case PRE: return "pre"; 400 case POST: return "post"; 401 case VOID: return "void"; 402 case BLOCK: return "block"; 403 case ARRAYLIT: return "arraylit"; 404 case OBJLIT: return "objlit"; 405 case LABEL: return "label"; 406 case TARGET: return "target"; 407 case LOOP: return "loop"; 408 case ENUMDONE: return "enumdone"; 409 case EXPRSTMT: return "exprstmt"; 410 case PARENT: return "parent"; 411 case CONVERT: return "convert"; 412 case JSR: return "jsr"; 413 case NEWLOCAL: return "newlocal"; 414 case USELOCAL: return "uselocal"; 415 case SCRIPT: return "script"; 416 } 417 return "<unknown="+token+">"; 418 } 419 return ""; 420 } 421 422 /* This function uses the cached op, string and number fields in 423 * TokenStream; if getToken has been called since the passed token 424 * was scanned, the op or string printed may be incorrect. 425 */ 426 public String tokenToString(int token) { 427 if (Context.printTrees) { 428 String name = tokenToName(token); 429 430 switch (token) { 431 case UNARYOP: 432 case ASSIGN: 433 case PRIMARY: 434 case EQOP: 435 case SHOP: 436 case RELOP: 437 return name + " " + tokenToName(this.op); 438 439 case STRING: 440 case REGEXP: 441 case NAME: 442 return name + " `" + this.string + "'"; 443 444 case NUMBER_INT: 445 return "NUMBER_INT " + (int) this.number; 446 case NUMBER: 447 return "NUMBER " + this.number; 448 } 449 450 return name; 451 } 452 return ""; 453 } 454 455 static { 456 KEYWORDS.put("break", BREAK); 457 KEYWORDS.put("case", CASE); 458 KEYWORDS.put("continue", CONTINUE); 459 KEYWORDS.put("default", DEFAULT); 460 KEYWORDS.put("delete", DELPROP); 461 KEYWORDS.put("do", DO); 462 KEYWORDS.put("else", ELSE); 463 KEYWORDS.put("export", EXPORT); 464 KEYWORDS.put("false", PRIMARY | (FALSE << 8)); 465 KEYWORDS.put("for", FOR); 466 KEYWORDS.put("function", FUNCTION); 467 KEYWORDS.put("if", IF); 468 KEYWORDS.put("in", RELOP | (IN << 8)); 469 KEYWORDS.put("new", NEW); 470 KEYWORDS.put("null", PRIMARY | (NULL << 8)); 471 KEYWORDS.put("return", RETURN); 472 KEYWORDS.put("switch", SWITCH); 473 KEYWORDS.put("this", PRIMARY | (THIS << 8)); 474 KEYWORDS.put("true", PRIMARY | (TRUE << 8)); 475 KEYWORDS.put("typeof", UNARYOP | (TYPEOF << 8)); 476 KEYWORDS.put("var", VAR); 477 KEYWORDS.put("void", UNARYOP | (VOID << 8)); 478 KEYWORDS.put("while", WHILE); 479 KEYWORDS.put("with", WITH); 480 KEYWORDS.put("catch", CATCH); 481 KEYWORDS.put("debugger", DEBUGGER); 482 KEYWORDS.put("finally", FINALLY); 483 KEYWORDS.put("import", IMPORT); 484 KEYWORDS.put("instanceof", RELOP | (INSTANCEOF << 8)); 485 KEYWORDS.put("throw", THROW); 486 KEYWORDS.put("try", TRY); 487 } 488 489 private int stringToKeyword(String name) { 490 Integer id = KEYWORDS.get(name); 491 if (id == null) return EOF; 492 493 this.op = id >> 8; 494 return id & 0xff; 495 } 496 497 public TokenStream(Reader in, 498 String sourceName, int lineno) 499 { 500 this.in = new LineBuffer(in, lineno); 501 this.pushbackToken = EOF; 502 this.sourceName = sourceName; 503 flags = 0; 504 secondToLastPosition = new CodePosition(lineno, 0); 505 lastPosition = new CodePosition(lineno, 0); 506 } 507 508 /* return and pop the token from the stream if it matches... 509 * otherwise return null 510 */ 511 public boolean matchToken(int toMatch) throws IOException { 512 int token = getToken(); 513 if (token == toMatch) 514 return true; 515 516 // didn't match, push back token 517 tokenno--; 518 this.pushbackToken = token; 519 return false; 520 } 521 522 public void ungetToken(int tt) { 523 if (this.pushbackToken != EOF && tt != ERROR) { 524 String message = Context.getMessage2("msg.token.replaces.pushback", 525 tokenToString(tt), tokenToString(this.pushbackToken)); 526 throw new RuntimeException(message); 527 } 528 this.pushbackToken = tt; 529 tokenno--; 530 } 531 532 public int peekToken() throws IOException { 533 int result = getToken(); 534 535 this.pushbackToken = result; 536 tokenno--; 537 return result; 538 } 539 540 public int peekTokenSameLine() throws IOException { 541 int result; 542 543 flags |= TSF_NEWLINES; // SCAN_NEWLINES from jsscan.h 544 result = peekToken(); 545 flags &= ~TSF_NEWLINES; // HIDE_NEWLINES from jsscan.h 546 if (this.pushbackToken == EOL) 547 this.pushbackToken = EOF; 548 return result; 549 } 550 551 private static boolean isAlpha(int c) { 552 return ((c >= 'a' && c <= 'z') 553 || (c >= 'A' && c <= 'Z')); 554 } 555 556 static boolean isDigit(int c) { 557 return (c >= '0' && c <= '9'); 558 } 559 560 static int xDigitToInt(int c) { 561 if ('0' <= c && c <= '9') { return c - '0'; } 562 if ('a' <= c && c <= 'f') { return c - ('a' - 10); } 563 if ('A' <= c && c <= 'F') { return c - ('A' - 10); } 564 return -1; 565 } 566 567 /* As defined in ECMA. jsscan.c uses C isspace() (which allows 568 * \v, I think.) note that code in in.read() implicitly accepts 569 * '\r' == \u000D as well. 570 */ 571 public static boolean isJSSpace(int c) { 572 return (c == '\u0020' || c == '\u0009' 573 || c == '\u000C' || c == '\u000B' 574 || c == '\u00A0' 575 || Character.getType((char)c) == Character.SPACE_SEPARATOR); 576 } 577 578 private void skipLine() throws IOException { 579 // skip to end of line 580 int c; 581 while ((c = in.read()) != EOF_CHAR && c != '\n') { } 582 in.unread(); 583 } 584 585 public int getToken() throws IOException { 586 int c; 587 do { 588 c = getTokenHelper(); 589 } while (c == RETRY_TOKEN); 590 591 updatePosition(); 592 return c; 593 } 594 595 private int getTokenHelper() throws IOException { 596 int c; 597 tokenno++; 598 599 // Check for pushed-back token 600 if (this.pushbackToken != EOF) { 601 int result = this.pushbackToken; 602 this.pushbackToken = EOF; 603 return result; 604 } 605 606 // Eat whitespace, possibly sensitive to newlines. 607 do { 608 c = in.read(); 609 if (c == '\n') { 610 flags &= ~TSF_DIRTYLINE; 611 if ((flags & TSF_NEWLINES) != 0) 612 break; 613 } 614 } while (isJSSpace(c) || c == '\n'); 615 616 if (c == EOF_CHAR) 617 return EOF; 618 if (c != '-' && c != '\n') 619 flags |= TSF_DIRTYLINE; 620 621 // identifier/keyword/instanceof? 622 // watch out for starting with a <backslash> 623 boolean identifierStart; 624 boolean isUnicodeEscapeStart = false; 625 if (c == '\\') { 626 c = in.read(); 627 if (c == 'u') { 628 identifierStart = true; 629 isUnicodeEscapeStart = true; 630 stringBufferTop = 0; 631 } else { 632 identifierStart = false; 633 c = '\\'; 634 in.unread(); 635 } 636 } else { 637 identifierStart = Character.isJavaIdentifierStart((char)c); 638 if (identifierStart) { 639 stringBufferTop = 0; 640 addToString(c); 641 } 642 643 // bruce: special handling of JSNI signatures 644 // - it would be nice to handle Unicode escapes in the future 645 // 646 if (c == '@') { 647 stringBufferTop = 0; 648 addToString(c); 649 return jsniMatchReference(); 650 } 651 } 652 653 if (identifierStart) { 654 boolean containsEscape = isUnicodeEscapeStart; 655 for (;;) { 656 if (isUnicodeEscapeStart) { 657 // strictly speaking we should probably push-back 658 // all the bad characters if the <backslash>uXXXX 659 // sequence is malformed. But since there isn't a 660 // correct context(is there?) for a bad Unicode 661 // escape sequence in an identifier, we can report 662 // an error here. 663 int escapeVal = 0; 664 for (int i = 0; i != 4; ++i) { 665 c = in.read(); 666 escapeVal = (escapeVal << 4) | xDigitToInt(c); 667 // Next check takes care about c < 0 and bad escape 668 if (escapeVal < 0) { break; } 669 } 670 if (escapeVal < 0) { 671 reportTokenError("msg.invalid.escape", null); 672 return ERROR; 673 } 674 addToString(escapeVal); 675 isUnicodeEscapeStart = false; 676 } else { 677 c = in.read(); 678 if (c == '\\') { 679 c = in.read(); 680 if (c == 'u') { 681 isUnicodeEscapeStart = true; 682 containsEscape = true; 683 } else { 684 reportTokenError("msg.illegal.character", null); 685 return ERROR; 686 } 687 } else { 688 if (!Character.isJavaIdentifierPart((char)c)) { 689 break; 690 } 691 addToString(c); 692 } 693 } 694 } 695 in.unread(); 696 697 String str = getStringFromBuffer(); 698 if (!containsEscape) { 699 // OPT we shouldn't have to make a string (object!) to 700 // check if it's a keyword. 701 702 // Return the corresponding token if it's a keyword 703 int result = stringToKeyword(str); 704 if (result != EOF) { 705 return result; 706 } 707 } 708 this.string = str; 709 return NAME; 710 } 711 712 // is it a number? 713 if (isDigit(c) || (c == '.' && isDigit(in.peek()))) { 714 715 stringBufferTop = 0; 716 int base = 10; 717 718 if (c == '0') { 719 c = in.read(); 720 if (c == 'x' || c == 'X') { 721 base = 16; 722 c = in.read(); 723 } else if (isDigit(c)) { 724 base = 8; 725 } else { 726 addToString('0'); 727 } 728 } 729 730 if (base == 16) { 731 while (0 <= xDigitToInt(c)) { 732 addToString(c); 733 c = in.read(); 734 } 735 } else { 736 while ('0' <= c && c <= '9') { 737 /* 738 * We permit 08 and 09 as decimal numbers, which 739 * makes our behavior a superset of the ECMA 740 * numeric grammar. We might not always be so 741 * permissive, so we warn about it. 742 */ 743 if (base == 8 && c >= '8') { 744 Object[] errArgs = { c == '8' ? "8" : "9" }; 745 reportTokenWarning("msg.bad.octal.literal", errArgs); 746 base = 10; 747 } 748 addToString(c); 749 c = in.read(); 750 } 751 } 752 753 boolean isInteger = true; 754 755 if (base == 10 && (c == '.' || c == 'e' || c == 'E')) { 756 isInteger = false; 757 if (c == '.') { 758 do { 759 addToString(c); 760 c = in.read(); 761 } while (isDigit(c)); 762 } 763 if (c == 'e' || c == 'E') { 764 addToString(c); 765 c = in.read(); 766 if (c == '+' || c == '-') { 767 addToString(c); 768 c = in.read(); 769 } 770 if (!isDigit(c)) { 771 reportTokenError("msg.missing.exponent", null); 772 return ERROR; 773 } 774 do { 775 addToString(c); 776 c = in.read(); 777 } while (isDigit(c)); 778 } 779 } 780 in.unread(); 781 String numString = getStringFromBuffer(); 782 783 double dval; 784 if (base == 10 && !isInteger) { 785 try { 786 // Use Java conversion to number from string... 787 dval = (Double.valueOf(numString)).doubleValue(); 788 } 789 catch (NumberFormatException ex) { 790 Object[] errArgs = { ex.getMessage() }; 791 reportTokenError("msg.caught.nfe", errArgs); 792 return ERROR; 793 } 794 } else { 795 dval = ScriptRuntime.stringToNumber(numString, 0, base); 796 } 797 798 this.number = dval; 799 800 if (isInteger) { 801 return NUMBER_INT; 802 } 803 804 return NUMBER; 805 } 806 807 // is it a string? 808 if (c == '"' || c == '\'') { 809 // We attempt to accumulate a string the fast way, by 810 // building it directly out of the reader. But if there 811 // are any escaped characters in the string, we revert to 812 // building it out of a StringBuffer. 813 814 int quoteChar = c; 815 int val = 0; 816 stringBufferTop = 0; 817 818 c = in.read(); 819 strLoop: while (c != quoteChar) { 820 if (c == '\n' || c == EOF_CHAR) { 821 in.unread(); 822 reportTokenError("msg.unterminated.string.lit", null); 823 return ERROR; 824 } 825 826 if (c == '\\') { 827 // We've hit an escaped character 828 829 c = in.read(); 830 switch (c) { 831 case 'b': c = '\b'; break; 832 case 'f': c = '\f'; break; 833 case 'n': c = '\n'; break; 834 case 'r': c = '\r'; break; 835 case 't': c = '\t'; break; 836 837 // \v a late addition to the ECMA spec, 838 // it is not in Java, so use 0xb 839 case 'v': c = 0xb; break; 840 841 case 'u': { 842 /* 843 * Get 4 hex digits; if the u escape is not 844 * followed by 4 hex digits, use 'u' + the literal 845 * character sequence that follows. 846 */ 847 int escapeStart = stringBufferTop; 848 addToString('u'); 849 int escapeVal = 0; 850 for (int i = 0; i != 4; ++i) { 851 c = in.read(); 852 escapeVal = (escapeVal << 4) | xDigitToInt(c); 853 if (escapeVal < 0) { 854 continue strLoop; 855 } 856 addToString(c); 857 } 858 // prepare for replace of stored 'u' sequence 859 // by escape value 860 stringBufferTop = escapeStart; 861 c = escapeVal; 862 } break; 863 864 case 'x': { 865 /* Get 2 hex digits, defaulting to 'x' + literal 866 * sequence, as above. 867 */ 868 c = in.read(); 869 int escapeVal = xDigitToInt(c); 870 if (escapeVal < 0) { 871 addToString('x'); 872 continue strLoop; 873 } else { 874 int c1 = c; 875 c = in.read(); 876 escapeVal = (escapeVal << 4) | xDigitToInt(c); 877 if (escapeVal < 0) { 878 addToString('x'); 879 addToString(c1); 880 continue strLoop; 881 } else { 882 // got 2 hex digits 883 c = escapeVal; 884 } 885 } 886 } break; 887 888 case '\n': 889 // Remove line terminator 890 c = in.read(); 891 continue strLoop; 892 893 default: if ('0' <= c && c < '8') { 894 val = c - '0'; 895 c = in.read(); 896 if ('0' <= c && c < '8') { 897 val = 8 * val + c - '0'; 898 c = in.read(); 899 if ('0' <= c && c < '8' && val <= 037) { 900 // c is 3rd char of octal sequence only if 901 // the resulting val <= 0377 902 val = 8 * val + c - '0'; 903 c = in.read(); 904 } 905 } 906 in.unread(); 907 c = val; 908 } 909 } 910 } 911 addToString(c); 912 c = in.read(); 913 } 914 915 this.string = getStringFromBuffer(); 916 return STRING; 917 } 918 919 switch (c) 920 { 921 case '\n': return EOL; 922 case ';': return SEMI; 923 case '[': return LB; 924 case ']': return RB; 925 case '{': return LC; 926 case '}': return RC; 927 case '(': return LP; 928 case ')': return GWT; 929 case ',': return COMMA; 930 case '?': return HOOK; 931 case ':': return COLON; 932 case '.': return DOT; 933 934 case '|': 935 if (in.match('|')) { 936 return OR; 937 } else if (in.match('=')) { 938 this.op = BITOR; 939 return ASSIGN; 940 } else { 941 return BITOR; 942 } 943 944 case '^': 945 if (in.match('=')) { 946 this.op = BITXOR; 947 return ASSIGN; 948 } else { 949 return BITXOR; 950 } 951 952 case '&': 953 if (in.match('&')) { 954 return AND; 955 } else if (in.match('=')) { 956 this.op = BITAND; 957 return ASSIGN; 958 } else { 959 return BITAND; 960 } 961 962 case '=': 963 if (in.match('=')) { 964 if (in.match('=')) 965 this.op = SHEQ; 966 else 967 this.op = EQ; 968 return EQOP; 969 } else { 970 this.op = NOP; 971 return ASSIGN; 972 } 973 974 case '!': 975 if (in.match('=')) { 976 if (in.match('=')) 977 this.op = SHNE; 978 else 979 this.op = NE; 980 return EQOP; 981 } else { 982 this.op = NOT; 983 return UNARYOP; 984 } 985 986 case '<': 987 /* NB:treat HTML begin-comment as comment-till-eol */ 988 if (in.match('!')) { 989 if (in.match('-')) { 990 if (in.match('-')) { 991 skipLine(); 992 return RETRY_TOKEN; // in place of 'goto retry' 993 } 994 in.unread(); 995 } 996 in.unread(); 997 } 998 if (in.match('<')) { 999 if (in.match('=')) { 1000 this.op = LSH; 1001 return ASSIGN; 1002 } else { 1003 this.op = LSH; 1004 return SHOP; 1005 } 1006 } else { 1007 if (in.match('=')) { 1008 this.op = LE; 1009 return RELOP; 1010 } else { 1011 this.op = LT; 1012 return RELOP; 1013 } 1014 } 1015 1016 case '>': 1017 if (in.match('>')) { 1018 if (in.match('>')) { 1019 if (in.match('=')) { 1020 this.op = URSH; 1021 return ASSIGN; 1022 } else { 1023 this.op = URSH; 1024 return SHOP; 1025 } 1026 } else { 1027 if (in.match('=')) { 1028 this.op = RSH; 1029 return ASSIGN; 1030 } else { 1031 this.op = RSH; 1032 return SHOP; 1033 } 1034 } 1035 } else { 1036 if (in.match('=')) { 1037 this.op = GE; 1038 return RELOP; 1039 } else { 1040 this.op = GT; 1041 return RELOP; 1042 } 1043 } 1044 1045 case '*': 1046 if (in.match('=')) { 1047 this.op = MUL; 1048 return ASSIGN; 1049 } else { 1050 return MUL; 1051 } 1052 1053 case '/': 1054 // is it a // comment? 1055 if (in.match('/')) { 1056 skipLine(); 1057 return RETRY_TOKEN; 1058 } 1059 if (in.match('*')) { 1060 while ((c = in.read()) != -1 && 1061 !(c == '*' && in.match('/'))) { 1062 ; // empty loop body 1063 } 1064 if (c == EOF_CHAR) { 1065 reportTokenError("msg.unterminated.comment", null); 1066 return ERROR; 1067 } 1068 return RETRY_TOKEN; // `goto retry' 1069 } 1070 1071 // is it a regexp? 1072 if ((flags & TSF_REGEXP) != 0) { 1073 stringBufferTop = 0; 1074 while ((c = in.read()) != '/') { 1075 if (c == '\n' || c == EOF_CHAR) { 1076 in.unread(); 1077 reportTokenError("msg.unterminated.re.lit", null); 1078 return ERROR; 1079 } 1080 if (c == '\\') { 1081 addToString(c); 1082 c = in.read(); 1083 } 1084 1085 addToString(c); 1086 } 1087 int reEnd = stringBufferTop; 1088 1089 while (true) { 1090 if (in.match('g')) 1091 addToString('g'); 1092 else if (in.match('i')) 1093 addToString('i'); 1094 else if (in.match('m')) 1095 addToString('m'); 1096 else 1097 break; 1098 } 1099 1100 if (isAlpha(in.peek())) { 1101 reportTokenError("msg.invalid.re.flag", null); 1102 return ERROR; 1103 } 1104 1105 this.string = new String(stringBuffer, 0, reEnd); 1106 this.regExpFlags = new String(stringBuffer, reEnd, 1107 stringBufferTop - reEnd); 1108 return REGEXP; 1109 } 1110 1111 1112 if (in.match('=')) { 1113 this.op = DIV; 1114 return ASSIGN; 1115 } else { 1116 return DIV; 1117 } 1118 1119 case '%': 1120 this.op = MOD; 1121 if (in.match('=')) { 1122 return ASSIGN; 1123 } else { 1124 return MOD; 1125 } 1126 1127 case '~': 1128 this.op = BITNOT; 1129 return UNARYOP; 1130 1131 case '+': 1132 if (in.match('=')) { 1133 this.op = ADD; 1134 return ASSIGN; 1135 } else if (in.match('+')) { 1136 return INC; 1137 } else { 1138 return ADD; 1139 } 1140 1141 case '-': 1142 if (in.match('=')) { 1143 this.op = SUB; 1144 c = ASSIGN; 1145 } else if (in.match('-')) { 1146 if (0 == (flags & TSF_DIRTYLINE)) { 1147 // treat HTML end-comment after possible whitespace 1148 // after line start as comment-utill-eol 1149 if (in.match('>')) { 1150 skipLine(); 1151 return RETRY_TOKEN; 1152 } 1153 } 1154 c = DEC; 1155 } else { 1156 c = SUB; 1157 } 1158 flags |= TSF_DIRTYLINE; 1159 return c; 1160 1161 default: 1162 reportTokenError("msg.illegal.character", null); 1163 return ERROR; 1164 } 1165 } 1166 1167 private void skipWhitespace() throws IOException { 1168 int tmp; 1169 do { 1170 tmp = in.read(); 1171 } while (isJSSpace(tmp) || tmp == '\n'); 1172 // Reposition back to first non whitespace char. 1173 in.unread(); 1174 } 1175 1176 private int jsniMatchReference() throws IOException { 1177 1178 // First, read the type name whose member is being accessed. 1179 if (!jsniMatchQualifiedTypeName('.', ':')) { 1180 return ERROR; 1181 } 1182 1183 // Now we must the second colon. 1184 // 1185 int c = in.read(); 1186 if (c != ':') { 1187 in.unread(); 1188 reportTokenError("msg.jsni.expected.char", new String[] {":"}); 1189 return ERROR; 1190 } 1191 addToString(c); 1192 1193 // Skip whitespace starting after ::. 1194 skipWhitespace(); 1195 1196 // Finish by reading the field or method signature. 1197 if (!jsniMatchMethodSignatureOrFieldName()) { 1198 return ERROR; 1199 } 1200 1201 this.string = new String(stringBuffer, 0, stringBufferTop); 1202 return NAME; 1203 } 1204 1205 private boolean jsniMatchParamListSignature() throws IOException { 1206 // Assume the opening '(' has already been read. 1207 // Read param type signatures until we see a closing ')'. 1208 1209 skipWhitespace(); 1210 1211 // First check for the special case of * as the parameter list, indicating 1212 // a wildcard 1213 if (in.peek() == '*') { 1214 addToString(in.read()); 1215 if (in.peek() != ')') { 1216 reportTokenError("msg.jsni.expected.char", new String[] {")"}); 1217 } 1218 addToString(in.read()); 1219 return true; 1220 } 1221 1222 // Otherwise, loop through reading one param type at a time 1223 do { 1224 // Skip whitespace between parameters. 1225 skipWhitespace(); 1226 1227 int c = in.read(); 1228 1229 if (c == ')') { 1230 // Finished successfully. 1231 // 1232 addToString(c); 1233 return true; 1234 } 1235 1236 in.unread(); 1237 } while (jsniMatchParamTypeSignature()); 1238 1239 // If we made it here, we can assume that there was an invalid type 1240 // signature that was already reported and that the offending char 1241 // was already unread. 1242 // 1243 return false; 1244 } 1245 1246 private boolean jsniMatchParamTypeSignature() throws IOException { 1247 int c = in.read(); 1248 switch (c) { 1249 case 'Z': 1250 case 'B': 1251 case 'C': 1252 case 'S': 1253 case 'I': 1254 case 'J': 1255 case 'F': 1256 case 'D': 1257 // Primitive type id. 1258 addToString(c); 1259 return true; 1260 case 'L': 1261 // Class/Interface type prefix. 1262 addToString(c); 1263 return jsniMatchQualifiedTypeName('/', ';'); 1264 case '[': 1265 // Array type prefix. 1266 addToString(c); 1267 return jsniMatchParamArrayTypeSignature(); 1268 default: 1269 in.unread(); 1270 reportTokenError("msg.jsni.expected.param.type", null); 1271 return false; 1272 } 1273 } 1274 1275 private boolean jsniMatchParamArrayTypeSignature() throws IOException { 1276 // Assume the leading '[' has already been read. 1277 // What follows must be another param type signature. 1278 // 1279 return jsniMatchParamTypeSignature(); 1280 } 1281 1282 private boolean jsniMatchMethodSignatureOrFieldName() throws IOException { 1283 int c = in.read(); 1284 1285 1286 // We must see an ident start here. 1287 // 1288 if (!Character.isJavaIdentifierStart((char)c)) { 1289 in.unread(); 1290 reportTokenError("msg.jsni.expected.identifier", null); 1291 return false; 1292 } 1293 1294 addToString(c); 1295 1296 for (;;) { 1297 c = in.read(); 1298 if (Character.isJavaIdentifierPart((char)c)) { 1299 addToString(c); 1300 } 1301 else if (c == '(') { 1302 // This means we're starting a JSNI method signature. 1303 // 1304 addToString(c); 1305 if (jsniMatchParamListSignature()) { 1306 // Finished a method signature with success. 1307 // Assume the callee unread the last char. 1308 // 1309 return true; 1310 } 1311 else { 1312 // Assume the callee reported the error and unread the last char. 1313 // 1314 return false; 1315 } 1316 } 1317 else { 1318 // We don't know this char, so it finishes the token. 1319 // 1320 in.unread(); 1321 return true; 1322 } 1323 } 1324 } 1325 1326 /** 1327 * This method is called to match the fully-qualified type name that 1328 * should appear after the '@' in a JSNI reference. 1329 * @param sepChar the character that will separate the Java idents 1330 * (either a '.' or '/') 1331 * @param endChar the character that indicates the end of the 1332 */ 1333 private boolean jsniMatchQualifiedTypeName(char sepChar, char endChar) 1334 throws IOException { 1335 int c = in.read(); 1336 1337 // Whether nested or not, we must see an ident start here. 1338 // 1339 if (!Character.isJavaIdentifierStart((char)c)) { 1340 in.unread(); 1341 reportTokenError("msg.jsni.expected.identifier", null); 1342 return false; 1343 } 1344 1345 // Now actually add the first ident char. 1346 // 1347 addToString(c); 1348 1349 // And append any other ident chars. 1350 // 1351 for (;;) { 1352 c = in.read(); 1353 if (Character.isJavaIdentifierPart((char)c)) { 1354 addToString(c); 1355 } 1356 else { 1357 break; 1358 } 1359 } 1360 1361 // Arrray-type reference 1362 while (c == '[') { 1363 if (']' == in.peek()) { 1364 addToString('['); 1365 addToString(in.read()); 1366 c = in.read(); 1367 } else { 1368 break; 1369 } 1370 } 1371 1372 // We have a non-ident char to classify. 1373 // 1374 if (c == sepChar) { 1375 addToString(c); 1376 if (jsniMatchQualifiedTypeName(sepChar, endChar)) { 1377 // We consumed up to the endChar, so we finished with total success. 1378 // 1379 return true; 1380 } else { 1381 // Assume that the nested call reported the syntax error and 1382 // unread the last character. 1383 // 1384 return false; 1385 } 1386 } else if (c == endChar) { 1387 // Matched everything up to the specified end char. 1388 // 1389 addToString(c); 1390 return true; 1391 } else { 1392 // This is an unknown char that finishes the token. 1393 // 1394 in.unread(); 1395 return true; 1396 } 1397 } 1398 1399 private String getStringFromBuffer() { 1400 return new String(stringBuffer, 0, stringBufferTop); 1401 } 1402 1403 private void addToString(int c) { 1404 if (stringBufferTop == stringBuffer.length) { 1405 char[] tmp = new char[stringBuffer.length * 2]; 1406 System.arraycopy(stringBuffer, 0, tmp, 0, stringBufferTop); 1407 stringBuffer = tmp; 1408 } 1409 stringBuffer[stringBufferTop++] = (char)c; 1410 } 1411 1412 /** 1413 * Positions hold offset of an corresponding token's end. 1414 * So lastPosition holds an offset of char that is next to last token. 1415 * 1416 * Use secondToLastPosition for error reporting outside of TokenStream, because 1417 * usually we want to report beginning of erroneous token, 1418 * which is end of second to last read token. 1419 */ 1420 public void reportSyntaxError(String messageProperty, Object[] args) { 1421 String message = Context.getMessage(messageProperty, args); 1422 Context.reportError(message, getSourceName(), secondToLastPosition.getLine(), getLine(), secondToLastPosition.getOffset()); 1423 } 1424 1425 /** 1426 * Token errors are reported before tokes is read, 1427 * so use lastPosition for reporting. 1428 * @see #reportSyntaxError 1429 */ 1430 private void reportTokenError(String messageProperty, Object[] args) { 1431 String message = Context.getMessage(messageProperty, args); 1432 Context.reportError(message, getSourceName(), lastPosition.getLine(), getLine(), lastPosition.getOffset()); 1433 } 1434 1435 private void reportTokenWarning(String messageProperty, Object[] args) { 1436 String message = Context.getMessage(messageProperty, args); 1437 Context.reportWarning(message, getSourceName(), lastPosition.getLine(), getLine(), lastPosition.getOffset()); 1438 } 1439 1440 /** 1441 * Updates last two known positions (for error reporting). 1442 */ 1443 private void updatePosition() { 1444 CodePosition currentPosition = new CodePosition(getLineno(), getOffset()); 1445 if (currentPosition.compareTo(lastPosition) > 0) { 1446 secondToLastPosition = lastPosition; 1447 lastPosition = currentPosition; 1448 } 1449 } 1450 1451 public String getSourceName() { return sourceName; } 1452 public int getLineno() { return in.getLineno(); } 1453 public int getOp() { return op; } 1454 public String getString() { return string; } 1455 public double getNumber() { return number; } 1456 public String getLine() { return in.getLine(); } 1457 public int getOffset() { return in.getOffset(); } 1458 public int getTokenno() { return tokenno; } 1459 public boolean eof() { return in.eof(); } 1460 1461 // instance variables 1462 private LineBuffer in; 1463 1464 1465 /* for TSF_REGEXP, etc. 1466 * should this be manipulated by gettor/settor functions? 1467 * should it be passed to getToken(); 1468 */ 1469 int flags; 1470 String regExpFlags; 1471 1472 private String sourceName; 1473 private int pushbackToken; 1474 private int tokenno; 1475 1476 CodePosition secondToLastPosition; 1477 CodePosition lastPosition; 1478 1479 private int op; 1480 1481 // Set this to an inital non-null value so that the Parser has 1482 // something to retrieve even if an error has occured and no 1483 // string is found. Fosters one class of error, but saves lots of 1484 // code. 1485 private String string = ""; 1486 private double number; 1487 1488 private char[] stringBuffer = new char[128]; 1489 private int stringBufferTop; 1490 }