001package org.hl7.fhir.r4.utils;
002
003import org.hl7.fhir.exceptions.FHIRException;
004
005/*
006  Copyright (c) 2011+, HL7, Inc.
007  All rights reserved.
008  
009  Redistribution and use in source and binary forms, with or without modification, 
010  are permitted provided that the following conditions are met:
011    
012   * Redistributions of source code must retain the above copyright notice, this 
013     list of conditions and the following disclaimer.
014   * Redistributions in binary form must reproduce the above copyright notice, 
015     this list of conditions and the following disclaimer in the documentation 
016     and/or other materials provided with the distribution.
017   * Neither the name of HL7 nor the names of its contributors may be used to 
018     endorse or promote products derived from this software without specific 
019     prior written permission.
020  
021  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
022  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
023  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
024  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
025  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
026  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
027  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
028  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
029  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
030  POSSIBILITY OF SUCH DAMAGE.
031  
032 */
033
034import org.hl7.fhir.r4.model.ExpressionNode;
035import org.hl7.fhir.utilities.SourceLocation;
036import org.hl7.fhir.utilities.Utilities;
037
038// shared lexer for concrete syntaxes 
039// - FluentPath
040// - Mapping language
041
042public class FHIRLexer {
043  public class FHIRLexerException extends FHIRException {
044
045    public FHIRLexerException() {
046      super();
047    }
048
049    public FHIRLexerException(String message, Throwable cause) {
050      super(message, cause);
051    }
052
053    public FHIRLexerException(String message) {
054      super(message);
055    }
056
057    public FHIRLexerException(Throwable cause) {
058      super(cause);
059    }
060
061  }
062
063  private String source;
064  private int cursor;
065  private int currentStart;
066  private String current;
067  private SourceLocation currentLocation;
068  private SourceLocation currentStartLocation;
069  private int id;
070  private String name;
071
072  public FHIRLexer(String source, String name) throws FHIRLexerException {
073    this.source = source;
074    this.name = name == null ? "??" : name;
075    currentLocation = new SourceLocation(1, 1);
076    next();
077  }
078
079  public FHIRLexer(String source, int i) throws FHIRLexerException {
080    this.source = source;
081    this.cursor = i;
082    currentLocation = new SourceLocation(1, 1);
083    next();
084  }
085
086  public String getCurrent() {
087    return current;
088  }
089
090  public SourceLocation getCurrentLocation() {
091    return currentLocation;
092  }
093
094  public boolean isConstant() {
095    return current != null && (current.charAt(0) == '\'' || current.charAt(0) == '"') || current.charAt(0) == '@'
096        || current.charAt(0) == '%' || current.charAt(0) == '-' || current.charAt(0) == '+'
097        || (current.charAt(0) >= '0' && current.charAt(0) <= '9') || current.equals("true") || current.equals("false")
098        || current.equals("{}");
099  }
100
101  public boolean isFixedName() {
102    return current != null && (current.charAt(0) == '`');
103  }
104
105  public boolean isStringConstant() {
106    return current.charAt(0) == '\'' || current.charAt(0) == '"' || current.charAt(0) == '`';
107  }
108
109  public String take() throws FHIRLexerException {
110    String s = current;
111    next();
112    return s;
113  }
114
115  public int takeInt() throws FHIRLexerException {
116    String s = current;
117    if (!Utilities.isInteger(s))
118      throw error("Found " + current + " expecting an integer");
119    next();
120    return Integer.parseInt(s);
121  }
122
123  public boolean isToken() {
124    if (Utilities.noString(current))
125      return false;
126
127    if (current.startsWith("$"))
128      return true;
129
130    if (current.equals("*") || current.equals("**"))
131      return true;
132
133    if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z')
134        || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) {
135      for (int i = 1; i < current.length(); i++)
136        if (!((current.charAt(1) >= 'A' && current.charAt(1) <= 'Z')
137            || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z')
138            || (current.charAt(1) >= '0' && current.charAt(1) <= '9')))
139          return false;
140      return true;
141    }
142    return false;
143  }
144
145  public FHIRLexerException error(String msg) {
146    return error(msg, currentLocation.toString());
147  }
148
149  public FHIRLexerException error(String msg, String location) {
150    return new FHIRLexerException("Error in " + name + " at " + location + ": " + msg);
151  }
152
153  public void next() throws FHIRLexerException {
154    skipWhitespaceAndComments();
155    current = null;
156    currentStart = cursor;
157    currentStartLocation = currentLocation;
158    if (cursor < source.length()) {
159      char ch = source.charAt(cursor);
160      if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=') {
161        cursor++;
162        if (cursor < source.length()
163            && (source.charAt(cursor) == '=' || source.charAt(cursor) == '~' || source.charAt(cursor) == '-')
164            || (ch == '-' && source.charAt(cursor) == '>'))
165          cursor++;
166        current = source.substring(currentStart, cursor);
167      } else if (ch == '.') {
168        cursor++;
169        if (cursor < source.length() && (source.charAt(cursor) == '.'))
170          cursor++;
171        current = source.substring(currentStart, cursor);
172      } else if (ch >= '0' && ch <= '9') {
173        cursor++;
174        boolean dotted = false;
175        while (cursor < source.length() && ((source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9')
176            || (source.charAt(cursor) == '.') && !dotted)) {
177          if (source.charAt(cursor) == '.')
178            dotted = true;
179          cursor++;
180        }
181        if (source.charAt(cursor - 1) == '.')
182          cursor--;
183        current = source.substring(currentStart, cursor);
184      } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
185        while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z')
186            || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z')
187            || (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == '_'))
188          cursor++;
189        current = source.substring(currentStart, cursor);
190      } else if (ch == '%') {
191        cursor++;
192        if (cursor < source.length() && (source.charAt(cursor) == '`')) {
193          cursor++;
194          while (cursor < source.length() && (source.charAt(cursor) != '`'))
195            cursor++;
196          cursor++;
197        } else
198          while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z')
199              || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z')
200              || (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == ':'
201              || source.charAt(cursor) == '-'))
202            cursor++;
203        current = source.substring(currentStart, cursor);
204      } else if (ch == '/') {
205        cursor++;
206        if (cursor < source.length() && (source.charAt(cursor) == '/')) {
207          // this is en error - should already have been skipped
208          error("This shoudn't happen?");
209        }
210        current = source.substring(currentStart, cursor);
211      } else if (ch == '$') {
212        cursor++;
213        while (cursor < source.length() && (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z'))
214          cursor++;
215        current = source.substring(currentStart, cursor);
216      } else if (ch == '{') {
217        cursor++;
218        ch = source.charAt(cursor);
219        if (ch == '}')
220          cursor++;
221        current = source.substring(currentStart, cursor);
222      } else if (ch == '"') {
223        cursor++;
224        boolean escape = false;
225        while (cursor < source.length() && (escape || source.charAt(cursor) != '"')) {
226          if (escape)
227            escape = false;
228          else
229            escape = (source.charAt(cursor) == '\\');
230          cursor++;
231        }
232        if (cursor == source.length())
233          throw error("Unterminated string");
234        cursor++;
235        current = "\"" + source.substring(currentStart + 1, cursor - 1) + "\"";
236      } else if (ch == '`') {
237        cursor++;
238        boolean escape = false;
239        while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) {
240          if (escape)
241            escape = false;
242          else
243            escape = (source.charAt(cursor) == '\\');
244          cursor++;
245        }
246        if (cursor == source.length())
247          throw error("Unterminated string");
248        cursor++;
249        current = "`" + source.substring(currentStart + 1, cursor - 1) + "`";
250      } else if (ch == '\'') {
251        cursor++;
252        char ech = ch;
253        boolean escape = false;
254        while (cursor < source.length() && (escape || source.charAt(cursor) != ech)) {
255          if (escape)
256            escape = false;
257          else
258            escape = (source.charAt(cursor) == '\\');
259          cursor++;
260        }
261        if (cursor == source.length())
262          throw error("Unterminated string");
263        cursor++;
264        current = source.substring(currentStart, cursor);
265        if (ech == '\'')
266          current = "\'" + current.substring(1, current.length() - 1) + "\'";
267      } else if (ch == '`') {
268        cursor++;
269        boolean escape = false;
270        while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) {
271          if (escape)
272            escape = false;
273          else
274            escape = (source.charAt(cursor) == '\\');
275          cursor++;
276        }
277        if (cursor == source.length())
278          throw error("Unterminated string");
279        cursor++;
280        current = "`" + source.substring(currentStart + 1, cursor - 1) + "`";
281      } else if (ch == '@') {
282        int start = cursor;
283        cursor++;
284        while (cursor < source.length() && isDateChar(source.charAt(cursor), start))
285          cursor++;
286        current = source.substring(currentStart, cursor);
287      } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then
288        cursor++;
289        current = source.substring(currentStart, cursor);
290      }
291    }
292  }
293
294  private void skipWhitespaceAndComments() {
295    boolean last13 = false;
296    boolean done = false;
297    while (cursor < source.length() && !done) {
298      if (cursor < source.length() - 1 && "//".equals(source.substring(cursor, cursor + 2))) {
299        while (cursor < source.length() && !((source.charAt(cursor) == '\r') || source.charAt(cursor) == '\n'))
300          cursor++;
301      } else if (cursor < source.length() - 1 && "/*".equals(source.substring(cursor, cursor + 2))) {
302        while (cursor < source.length() - 1 && !"*/".equals(source.substring(cursor, cursor + 2))) {
303          last13 = currentLocation.checkChar(source.charAt(cursor), last13);
304          cursor++;
305        }
306        if (cursor >= source.length() - 1) {
307          error("Unfinished comment");
308        } else {
309          cursor = cursor + 2;
310        }
311      } else if (Character.isWhitespace(source.charAt(cursor))) {
312        last13 = currentLocation.checkChar(source.charAt(cursor), last13);
313        cursor++;
314      } else {
315        done = true;
316      }
317    }
318  }
319
320  private boolean isDateChar(char ch, int start) {
321    int eot = source.charAt(start + 1) == 'T' ? 10 : 20;
322
323    return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch)
324        || (cursor - start == eot && ch == '.' && cursor < source.length() - 1
325            && Character.isDigit(source.charAt(cursor + 1)));
326  }
327
328  public boolean isOp() {
329    return ExpressionNode.Operation.fromCode(current) != null;
330  }
331
332  public boolean done() {
333    return currentStart >= source.length();
334  }
335
336  public int nextId() {
337    id++;
338    return id;
339  }
340
341  public SourceLocation getCurrentStartLocation() {
342    return currentStartLocation;
343  }
344
345  // special case use
346  public void setCurrent(String current) {
347    this.current = current;
348  }
349
350  public boolean hasComment() {
351    return !done() && current.startsWith("//");
352  }
353
354  public boolean hasToken(String kw) {
355    return !done() && kw.equals(current);
356  }
357
358  public boolean hasToken(String... names) {
359    if (done())
360      return false;
361    for (String s : names)
362      if (s.equals(current))
363        return true;
364    return false;
365  }
366
367  public void token(String kw) throws FHIRLexerException {
368    if (!kw.equals(current))
369      throw error("Found \"" + current + "\" expecting \"" + kw + "\"");
370    next();
371  }
372
373  public String readConstant(String desc) throws FHIRLexerException {
374    if (!isStringConstant())
375      throw error("Found " + current + " expecting \"[" + desc + "]\"");
376
377    return processConstant(take());
378  }
379
380  public String readFixedName(String desc) throws FHIRLexerException {
381    if (!isFixedName())
382      throw error("Found " + current + " expecting \"[" + desc + "]\"");
383
384    return processFixedName(take());
385  }
386
387  public String processConstant(String s) throws FHIRLexerException {
388    StringBuilder b = new StringBuilder();
389    int i = 1;
390    while (i < s.length() - 1) {
391      char ch = s.charAt(i);
392      if (ch == '\\') {
393        i++;
394        switch (s.charAt(i)) {
395        case 't':
396          b.append('\t');
397          break;
398        case 'r':
399          b.append('\r');
400          break;
401        case 'n':
402          b.append('\n');
403          break;
404        case 'f':
405          b.append('\f');
406          break;
407        case '\'':
408          b.append('\'');
409          break;
410        case '"':
411          b.append('"');
412          break;
413        case '`':
414          b.append('`');
415          break;
416        case '\\':
417          b.append('\\');
418          break;
419        case '/':
420          b.append('/');
421          break;
422        case 'u':
423          i++;
424          int uc = Integer.parseInt(s.substring(i, i + 4), 16);
425          b.append((char) uc);
426          i = i + 4;
427          break;
428        default:
429          throw new FHIRLexerException("Unknown character escape \\" + s.charAt(i));
430        }
431      } else {
432        b.append(ch);
433        i++;
434      }
435    }
436    return b.toString();
437  }
438
439  public String processFixedName(String s) throws FHIRLexerException {
440    StringBuilder b = new StringBuilder();
441    int i = 1;
442    while (i < s.length() - 1) {
443      char ch = s.charAt(i);
444      if (ch == '\\') {
445        i++;
446        switch (s.charAt(i)) {
447        case 't':
448          b.append('\t');
449          break;
450        case 'r':
451          b.append('\r');
452          break;
453        case 'n':
454          b.append('\n');
455          break;
456        case 'f':
457          b.append('\f');
458          break;
459        case '\'':
460          b.append('\'');
461          break;
462        case '"':
463          b.append('"');
464          break;
465        case '\\':
466          b.append('\\');
467          break;
468        case '/':
469          b.append('/');
470          break;
471        case 'u':
472          i++;
473          int uc = Integer.parseInt(s.substring(i, i + 4), 16);
474          b.append((char) uc);
475          i = i + 4;
476          break;
477        default:
478          throw new FHIRLexerException("Unknown character escape \\" + s.charAt(i));
479        }
480      } else {
481        b.append(ch);
482        i++;
483      }
484    }
485    return b.toString();
486  }
487
488  public void skipToken(String token) throws FHIRLexerException {
489    if (getCurrent().equals(token))
490      next();
491
492  }
493
494  public String takeDottedToken() throws FHIRLexerException {
495    StringBuilder b = new StringBuilder();
496    b.append(take());
497    while (!done() && getCurrent().equals(".")) {
498      b.append(take());
499      b.append(take());
500    }
501    return b.toString();
502  }
503
504  void skipComments() throws FHIRLexerException {
505    while (!done() && hasComment())
506      next();
507  }
508
509  public int getCurrentStart() {
510    return currentStart;
511  }
512
513}