001package org.hl7.fhir.utilities;
002
003import java.util.List;
004import java.util.ArrayList;
005
006public class UnicodeUtilities {
007
008  public static class StateStackEntry {
009
010    private char c;
011    private int i;
012
013    public StateStackEntry(char c, int i) {
014      this.c = c;
015      this.i = i;
016    }
017  }
018
019  public static class StateStack {
020
021    private List<StateStackEntry> list = new ArrayList<>();
022
023    public void clear() {
024      list.clear();      
025    }
026
027    public void push(char c, int i) {
028      list.add(new StateStackEntry(c, i));      
029    }
030
031    public void popJustOne(CharSet oneSet) {
032      if (!list.isEmpty() && oneSet.contains(list.get(list.size()-1).c)) {
033        list.remove(list.size()-1);
034      }
035    }
036
037    public void popOneAndOthers(CharSet oneSet, CharSet otherSet) {
038      boolean found = false;
039      for (StateStackEntry t : list) {
040        if (oneSet.contains(t.c)) {
041          found = true;
042          break;
043        }
044      }
045      if (found) {
046        while (!list.isEmpty() && (oneSet.contains(list.get(list.size()-1).c) || otherSet.contains(list.get(list.size()-1).c))) {
047          boolean done = oneSet.contains(list.get(list.size()-1).c);
048          list.remove(list.size()-1);
049          if (done) {
050            break;
051          }
052        }
053      }
054    }
055
056    public boolean empty() {
057      return list.isEmpty();
058    }
059
060    public String summary() {
061      return "Unicode Character "+describe(list.get(list.size()-1).c)+" at index "+list.get(list.size()-1).i+" has no terminating match";
062    }
063
064  
065  }
066
067  public static class CharSet {
068
069    private char[] chars;
070
071    public CharSet(char... chars) {
072      this.chars = chars;
073    }
074
075    public boolean contains(char c) {
076      for (char t : chars) {
077        if (c == t) {
078          return true;
079        }
080      }
081      return false;
082    }
083
084  }
085
086  public static final char LRE = '\u202a';
087  public static final char RLE = '\u202b';
088  public static final char PDF = '\u202c';
089  public static final char LRO = '\u202d';
090  public static final char RLO = '\u202e';
091  public static final char LRI = '\u2066';
092  public static final char RLI = '\u2067';
093  public static final char FSI = '\u2068';
094  public static final char PDI = '\u2069';
095  public static final char LRM = '\u200E';
096  public static final char RLM = '\u200F';
097  public static final char ALM = '\u061C';
098  public static final char PARA = '\n';
099
100  private static CharSet allBiDiChars = new CharSet(LRE, RLE, PDF, LRO, RLO, LRI, RLI, FSI, PDI, LRM, RLM, ALM, PARA);
101
102  public static boolean hasBiDiChars(String src) {
103    for (char c : src.toCharArray()) {
104      if (allBiDiChars.contains(c)) {
105        return true;
106      }
107    }
108    return false;
109  }
110
111  /**
112   * returns null if src is well formed, or a description of a structure problem with bi-directional characters
113   * @param src
114   * @return
115   */
116  public static String checkUnicodeWellFormed(String src) {
117    StateStack ss = new StateStack();
118    for (int i = 0; i < src.length(); i++) {
119      char c = src.charAt(i);
120      if (allBiDiChars.contains(c)) {
121        switch (c) {
122        case PARA: 
123          ss.clear();
124          break;
125        case LRO:
126        case RLO:
127          ss.push(c, i);
128          break;
129        case PDF:
130          ss.popJustOne(new CharSet(LRE, RLE, LRO, RLO, LRM, RLM, ALM));
131          break;
132        case LRI:
133        case RLI:
134        case FSI:
135          ss.push(c, i);
136          break;
137        case PDI:
138          ss.popOneAndOthers(new CharSet(LRI, RLI, FSI), new CharSet(LRE, RLE, LRO, RLO, LRM, RLM, ALM));
139          break;
140        case LRM:
141        case RLM:
142        case ALM:
143          ss.push(c, i);
144          break;
145        }
146      }      
147    }
148    if (ss.empty()) {
149      return null;      
150    } else {
151      return ss.summary();
152    }
153  }
154
155  public static String describe(char c) {
156    switch (c) {
157    case LRE: return "LRE";
158    case RLE: return "RLE";
159    case PDF: return "PDF";
160    case LRO: return "LRO";
161    case RLO: return "RLO";
162    case LRI: return "LRI";
163    case RLI: return "RLI";
164    case FSI: return "FSI";
165    case PDI: return "PDI";
166    case LRM: return "LRM";
167    case RLM: return "RLM";
168    case ALM: return "ALM";
169    case PARA: return "PARA";
170    }
171    return String.valueOf(c);
172  }
173
174  public static Object replaceBiDiChars(String s) {
175    if (s == null) {
176      return null;
177    }
178    StringBuilder b = new StringBuilder();
179    for (char c : s.toCharArray()) {
180      if (allBiDiChars.contains(c)) {
181        b.append("|"+describe(c)+"|");
182        
183      } else {
184        b.append(c);
185      }
186    }
187    return b.toString();
188  }
189}