001package org.hl7.fhir.r4.utils; 002 003import org.hl7.fhir.exceptions.FHIRException; 004 005/* 006 Copyright (c) 2011+, HL7, Inc. 007 All rights reserved. 008 009 Redistribution and use in source and binary forms, with or without modification, 010 are permitted provided that the following conditions are met: 011 012 * Redistributions of source code must retain the above copyright notice, this 013 list of conditions and the following disclaimer. 014 * Redistributions in binary form must reproduce the above copyright notice, 015 this list of conditions and the following disclaimer in the documentation 016 and/or other materials provided with the distribution. 017 * Neither the name of HL7 nor the names of its contributors may be used to 018 endorse or promote products derived from this software without specific 019 prior written permission. 020 021 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 022 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 023 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 024 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 025 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 026 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 027 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 028 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 029 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 030 POSSIBILITY OF SUCH DAMAGE. 031 032 */ 033 034import org.hl7.fhir.r4.model.ExpressionNode; 035import org.hl7.fhir.utilities.SourceLocation; 036import org.hl7.fhir.utilities.Utilities; 037 038// shared lexer for concrete syntaxes 039// - FluentPath 040// - Mapping language 041 042public class FHIRLexer { 043 public class FHIRLexerException extends FHIRException { 044 045 public FHIRLexerException() { 046 super(); 047 } 048 049 public FHIRLexerException(String message, Throwable cause) { 050 super(message, cause); 051 } 052 053 public FHIRLexerException(String message) { 054 super(message); 055 } 056 057 public FHIRLexerException(Throwable cause) { 058 super(cause); 059 } 060 061 } 062 063 private String source; 064 private int cursor; 065 private int currentStart; 066 private String current; 067 private SourceLocation currentLocation; 068 private SourceLocation currentStartLocation; 069 private int id; 070 private String name; 071 072 public FHIRLexer(String source, String name) throws FHIRLexerException { 073 this.source = source; 074 this.name = name == null ? "??" : name; 075 currentLocation = new SourceLocation(1, 1); 076 next(); 077 } 078 079 public FHIRLexer(String source, int i) throws FHIRLexerException { 080 this.source = source; 081 this.cursor = i; 082 currentLocation = new SourceLocation(1, 1); 083 next(); 084 } 085 086 public String getCurrent() { 087 return current; 088 } 089 090 public SourceLocation getCurrentLocation() { 091 return currentLocation; 092 } 093 094 public boolean isConstant() { 095 return current != null && (current.charAt(0) == '\'' || current.charAt(0) == '"') || current.charAt(0) == '@' 096 || current.charAt(0) == '%' || current.charAt(0) == '-' || current.charAt(0) == '+' 097 || (current.charAt(0) >= '0' && current.charAt(0) <= '9') || current.equals("true") || current.equals("false") 098 || current.equals("{}"); 099 } 100 101 public boolean isFixedName() { 102 return current != null && (current.charAt(0) == '`'); 103 } 104 105 public boolean isStringConstant() { 106 return current.charAt(0) == '\'' || current.charAt(0) == '"' || current.charAt(0) == '`'; 107 } 108 109 public String take() throws FHIRLexerException { 110 String s = current; 111 next(); 112 return s; 113 } 114 115 public int takeInt() throws FHIRLexerException { 116 String s = current; 117 if (!Utilities.isInteger(s)) 118 throw error("Found " + current + " expecting an integer"); 119 next(); 120 return Integer.parseInt(s); 121 } 122 123 public boolean isToken() { 124 if (Utilities.noString(current)) 125 return false; 126 127 if (current.startsWith("$")) 128 return true; 129 130 if (current.equals("*") || current.equals("**")) 131 return true; 132 133 if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') 134 || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) { 135 for (int i = 1; i < current.length(); i++) 136 if (!((current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') 137 || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') 138 || (current.charAt(1) >= '0' && current.charAt(1) <= '9'))) 139 return false; 140 return true; 141 } 142 return false; 143 } 144 145 public FHIRLexerException error(String msg) { 146 return error(msg, currentLocation.toString()); 147 } 148 149 public FHIRLexerException error(String msg, String location) { 150 return new FHIRLexerException("Error in " + name + " at " + location + ": " + msg); 151 } 152 153 public void next() throws FHIRLexerException { 154 skipWhitespaceAndComments(); 155 current = null; 156 currentStart = cursor; 157 currentStartLocation = currentLocation; 158 if (cursor < source.length()) { 159 char ch = source.charAt(cursor); 160 if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=') { 161 cursor++; 162 if (cursor < source.length() 163 && (source.charAt(cursor) == '=' || source.charAt(cursor) == '~' || source.charAt(cursor) == '-') 164 || (ch == '-' && source.charAt(cursor) == '>')) 165 cursor++; 166 current = source.substring(currentStart, cursor); 167 } else if (ch == '.') { 168 cursor++; 169 if (cursor < source.length() && (source.charAt(cursor) == '.')) 170 cursor++; 171 current = source.substring(currentStart, cursor); 172 } else if (ch >= '0' && ch <= '9') { 173 cursor++; 174 boolean dotted = false; 175 while (cursor < source.length() && ((source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') 176 || (source.charAt(cursor) == '.') && !dotted)) { 177 if (source.charAt(cursor) == '.') 178 dotted = true; 179 cursor++; 180 } 181 if (source.charAt(cursor - 1) == '.') 182 cursor--; 183 current = source.substring(currentStart, cursor); 184 } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { 185 while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') 186 || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') 187 || (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == '_')) 188 cursor++; 189 current = source.substring(currentStart, cursor); 190 } else if (ch == '%') { 191 cursor++; 192 if (cursor < source.length() && (source.charAt(cursor) == '`')) { 193 cursor++; 194 while (cursor < source.length() && (source.charAt(cursor) != '`')) 195 cursor++; 196 cursor++; 197 } else 198 while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') 199 || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') 200 || (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == ':' 201 || source.charAt(cursor) == '-')) 202 cursor++; 203 current = source.substring(currentStart, cursor); 204 } else if (ch == '/') { 205 cursor++; 206 if (cursor < source.length() && (source.charAt(cursor) == '/')) { 207 // this is en error - should already have been skipped 208 error("This shoudn't happen?"); 209 } 210 current = source.substring(currentStart, cursor); 211 } else if (ch == '$') { 212 cursor++; 213 while (cursor < source.length() && (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z')) 214 cursor++; 215 current = source.substring(currentStart, cursor); 216 } else if (ch == '{') { 217 cursor++; 218 ch = source.charAt(cursor); 219 if (ch == '}') 220 cursor++; 221 current = source.substring(currentStart, cursor); 222 } else if (ch == '"') { 223 cursor++; 224 boolean escape = false; 225 while (cursor < source.length() && (escape || source.charAt(cursor) != '"')) { 226 if (escape) 227 escape = false; 228 else 229 escape = (source.charAt(cursor) == '\\'); 230 cursor++; 231 } 232 if (cursor == source.length()) 233 throw error("Unterminated string"); 234 cursor++; 235 current = "\"" + source.substring(currentStart + 1, cursor - 1) + "\""; 236 } else if (ch == '`') { 237 cursor++; 238 boolean escape = false; 239 while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) { 240 if (escape) 241 escape = false; 242 else 243 escape = (source.charAt(cursor) == '\\'); 244 cursor++; 245 } 246 if (cursor == source.length()) 247 throw error("Unterminated string"); 248 cursor++; 249 current = "`" + source.substring(currentStart + 1, cursor - 1) + "`"; 250 } else if (ch == '\'') { 251 cursor++; 252 char ech = ch; 253 boolean escape = false; 254 while (cursor < source.length() && (escape || source.charAt(cursor) != ech)) { 255 if (escape) 256 escape = false; 257 else 258 escape = (source.charAt(cursor) == '\\'); 259 cursor++; 260 } 261 if (cursor == source.length()) 262 throw error("Unterminated string"); 263 cursor++; 264 current = source.substring(currentStart, cursor); 265 if (ech == '\'') 266 current = "\'" + current.substring(1, current.length() - 1) + "\'"; 267 } else if (ch == '`') { 268 cursor++; 269 boolean escape = false; 270 while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) { 271 if (escape) 272 escape = false; 273 else 274 escape = (source.charAt(cursor) == '\\'); 275 cursor++; 276 } 277 if (cursor == source.length()) 278 throw error("Unterminated string"); 279 cursor++; 280 current = "`" + source.substring(currentStart + 1, cursor - 1) + "`"; 281 } else if (ch == '@') { 282 int start = cursor; 283 cursor++; 284 while (cursor < source.length() && isDateChar(source.charAt(cursor), start)) 285 cursor++; 286 current = source.substring(currentStart, cursor); 287 } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then 288 cursor++; 289 current = source.substring(currentStart, cursor); 290 } 291 } 292 } 293 294 private void skipWhitespaceAndComments() { 295 boolean last13 = false; 296 boolean done = false; 297 while (cursor < source.length() && !done) { 298 if (cursor < source.length() - 1 && "//".equals(source.substring(cursor, cursor + 2))) { 299 while (cursor < source.length() && !((source.charAt(cursor) == '\r') || source.charAt(cursor) == '\n')) 300 cursor++; 301 } else if (cursor < source.length() - 1 && "/*".equals(source.substring(cursor, cursor + 2))) { 302 while (cursor < source.length() - 1 && !"*/".equals(source.substring(cursor, cursor + 2))) { 303 last13 = currentLocation.checkChar(source.charAt(cursor), last13); 304 cursor++; 305 } 306 if (cursor >= source.length() - 1) { 307 error("Unfinished comment"); 308 } else { 309 cursor = cursor + 2; 310 } 311 } else if (Character.isWhitespace(source.charAt(cursor))) { 312 last13 = currentLocation.checkChar(source.charAt(cursor), last13); 313 cursor++; 314 } else { 315 done = true; 316 } 317 } 318 } 319 320 private boolean isDateChar(char ch, int start) { 321 int eot = source.charAt(start + 1) == 'T' ? 10 : 20; 322 323 return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch) 324 || (cursor - start == eot && ch == '.' && cursor < source.length() - 1 325 && Character.isDigit(source.charAt(cursor + 1))); 326 } 327 328 public boolean isOp() { 329 return ExpressionNode.Operation.fromCode(current) != null; 330 } 331 332 public boolean done() { 333 return currentStart >= source.length(); 334 } 335 336 public int nextId() { 337 id++; 338 return id; 339 } 340 341 public SourceLocation getCurrentStartLocation() { 342 return currentStartLocation; 343 } 344 345 // special case use 346 public void setCurrent(String current) { 347 this.current = current; 348 } 349 350 public boolean hasComment() { 351 return !done() && current.startsWith("//"); 352 } 353 354 public boolean hasToken(String kw) { 355 return !done() && kw.equals(current); 356 } 357 358 public boolean hasToken(String... names) { 359 if (done()) 360 return false; 361 for (String s : names) 362 if (s.equals(current)) 363 return true; 364 return false; 365 } 366 367 public void token(String kw) throws FHIRLexerException { 368 if (!kw.equals(current)) 369 throw error("Found \"" + current + "\" expecting \"" + kw + "\""); 370 next(); 371 } 372 373 public String readConstant(String desc) throws FHIRLexerException { 374 if (!isStringConstant()) 375 throw error("Found " + current + " expecting \"[" + desc + "]\""); 376 377 return processConstant(take()); 378 } 379 380 public String readFixedName(String desc) throws FHIRLexerException { 381 if (!isFixedName()) 382 throw error("Found " + current + " expecting \"[" + desc + "]\""); 383 384 return processFixedName(take()); 385 } 386 387 public String processConstant(String s) throws FHIRLexerException { 388 StringBuilder b = new StringBuilder(); 389 int i = 1; 390 while (i < s.length() - 1) { 391 char ch = s.charAt(i); 392 if (ch == '\\') { 393 i++; 394 switch (s.charAt(i)) { 395 case 't': 396 b.append('\t'); 397 break; 398 case 'r': 399 b.append('\r'); 400 break; 401 case 'n': 402 b.append('\n'); 403 break; 404 case 'f': 405 b.append('\f'); 406 break; 407 case '\'': 408 b.append('\''); 409 break; 410 case '"': 411 b.append('"'); 412 break; 413 case '`': 414 b.append('`'); 415 break; 416 case '\\': 417 b.append('\\'); 418 break; 419 case '/': 420 b.append('/'); 421 break; 422 case 'u': 423 i++; 424 int uc = Integer.parseInt(s.substring(i, i + 4), 16); 425 b.append((char) uc); 426 i = i + 4; 427 break; 428 default: 429 throw new FHIRLexerException("Unknown character escape \\" + s.charAt(i)); 430 } 431 } else { 432 b.append(ch); 433 i++; 434 } 435 } 436 return b.toString(); 437 } 438 439 public String processFixedName(String s) throws FHIRLexerException { 440 StringBuilder b = new StringBuilder(); 441 int i = 1; 442 while (i < s.length() - 1) { 443 char ch = s.charAt(i); 444 if (ch == '\\') { 445 i++; 446 switch (s.charAt(i)) { 447 case 't': 448 b.append('\t'); 449 break; 450 case 'r': 451 b.append('\r'); 452 break; 453 case 'n': 454 b.append('\n'); 455 break; 456 case 'f': 457 b.append('\f'); 458 break; 459 case '\'': 460 b.append('\''); 461 break; 462 case '"': 463 b.append('"'); 464 break; 465 case '\\': 466 b.append('\\'); 467 break; 468 case '/': 469 b.append('/'); 470 break; 471 case 'u': 472 i++; 473 int uc = Integer.parseInt(s.substring(i, i + 4), 16); 474 b.append((char) uc); 475 i = i + 4; 476 break; 477 default: 478 throw new FHIRLexerException("Unknown character escape \\" + s.charAt(i)); 479 } 480 } else { 481 b.append(ch); 482 i++; 483 } 484 } 485 return b.toString(); 486 } 487 488 public void skipToken(String token) throws FHIRLexerException { 489 if (getCurrent().equals(token)) 490 next(); 491 492 } 493 494 public String takeDottedToken() throws FHIRLexerException { 495 StringBuilder b = new StringBuilder(); 496 b.append(take()); 497 while (!done() && getCurrent().equals(".")) { 498 b.append(take()); 499 b.append(take()); 500 } 501 return b.toString(); 502 } 503 504 void skipComments() throws FHIRLexerException { 505 while (!done() && hasComment()) 506 next(); 507 } 508 509 public int getCurrentStart() { 510 return currentStart; 511 } 512 513}