001package org.hl7.fhir.utilities; 002 003import java.util.List; 004import java.util.ArrayList; 005 006public class UnicodeUtilities { 007 008 public static class StateStackEntry { 009 010 private char c; 011 private int i; 012 013 public StateStackEntry(char c, int i) { 014 this.c = c; 015 this.i = i; 016 } 017 } 018 019 public static class StateStack { 020 021 private List<StateStackEntry> list = new ArrayList<>(); 022 023 public void clear() { 024 list.clear(); 025 } 026 027 public void push(char c, int i) { 028 list.add(new StateStackEntry(c, i)); 029 } 030 031 public void popJustOne(CharSet oneSet) { 032 if (!list.isEmpty() && oneSet.contains(list.get(list.size()-1).c)) { 033 list.remove(list.size()-1); 034 } 035 } 036 037 public void popOneAndOthers(CharSet oneSet, CharSet otherSet) { 038 boolean found = false; 039 for (StateStackEntry t : list) { 040 if (oneSet.contains(t.c)) { 041 found = true; 042 break; 043 } 044 } 045 if (found) { 046 while (!list.isEmpty() && (oneSet.contains(list.get(list.size()-1).c) || otherSet.contains(list.get(list.size()-1).c))) { 047 boolean done = oneSet.contains(list.get(list.size()-1).c); 048 list.remove(list.size()-1); 049 if (done) { 050 break; 051 } 052 } 053 } 054 } 055 056 public boolean empty() { 057 return list.isEmpty(); 058 } 059 060 public String summary() { 061 return "Unicode Character "+describe(list.get(list.size()-1).c)+" at index "+list.get(list.size()-1).i+" has no terminating match"; 062 } 063 064 065 } 066 067 public static class CharSet { 068 069 private char[] chars; 070 071 public CharSet(char... chars) { 072 this.chars = chars; 073 } 074 075 public boolean contains(char c) { 076 for (char t : chars) { 077 if (c == t) { 078 return true; 079 } 080 } 081 return false; 082 } 083 084 } 085 086 public static final char LRE = '\u202a'; 087 public static final char RLE = '\u202b'; 088 public static final char PDF = '\u202c'; 089 public static final char LRO = '\u202d'; 090 public static final char RLO = '\u202e'; 091 public static final char LRI = '\u2066'; 092 public static final char RLI = '\u2067'; 093 public static final char FSI = '\u2068'; 094 public static final char PDI = '\u2069'; 095 public static final char LRM = '\u200E'; 096 public static final char RLM = '\u200F'; 097 public static final char ALM = '\u061C'; 098 public static final char PARA = '\n'; 099 100 private static CharSet allBiDiChars = new CharSet(LRE, RLE, PDF, LRO, RLO, LRI, RLI, FSI, PDI, LRM, RLM, ALM, PARA); 101 102 public static boolean hasBiDiChars(String src) { 103 for (char c : src.toCharArray()) { 104 if (allBiDiChars.contains(c)) { 105 return true; 106 } 107 } 108 return false; 109 } 110 111 /** 112 * returns null if src is well formed, or a description of a structure problem with bi-directional characters 113 * @param src 114 * @return 115 */ 116 public static String checkUnicodeWellFormed(String src) { 117 StateStack ss = new StateStack(); 118 for (int i = 0; i < src.length(); i++) { 119 char c = src.charAt(i); 120 if (allBiDiChars.contains(c)) { 121 switch (c) { 122 case PARA: 123 ss.clear(); 124 break; 125 case LRO: 126 case RLO: 127 ss.push(c, i); 128 break; 129 case PDF: 130 ss.popJustOne(new CharSet(LRE, RLE, LRO, RLO, LRM, RLM, ALM)); 131 break; 132 case LRI: 133 case RLI: 134 case FSI: 135 ss.push(c, i); 136 break; 137 case PDI: 138 ss.popOneAndOthers(new CharSet(LRI, RLI, FSI), new CharSet(LRE, RLE, LRO, RLO, LRM, RLM, ALM)); 139 break; 140 case LRM: 141 case RLM: 142 case ALM: 143 ss.push(c, i); 144 break; 145 } 146 } 147 } 148 if (ss.empty()) { 149 return null; 150 } else { 151 return ss.summary(); 152 } 153 } 154 155 public static String describe(char c) { 156 switch (c) { 157 case LRE: return "LRE"; 158 case RLE: return "RLE"; 159 case PDF: return "PDF"; 160 case LRO: return "LRO"; 161 case RLO: return "RLO"; 162 case LRI: return "LRI"; 163 case RLI: return "RLI"; 164 case FSI: return "FSI"; 165 case PDI: return "PDI"; 166 case LRM: return "LRM"; 167 case RLM: return "RLM"; 168 case ALM: return "ALM"; 169 case PARA: return "PARA"; 170 } 171 return String.valueOf(c); 172 } 173 174 public static Object replaceBiDiChars(String s) { 175 if (s == null) { 176 return null; 177 } 178 StringBuilder b = new StringBuilder(); 179 for (char c : s.toCharArray()) { 180 if (allBiDiChars.contains(c)) { 181 b.append("|"+describe(c)+"|"); 182 183 } else { 184 b.append(c); 185 } 186 } 187 return b.toString(); 188 } 189}