001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.hdfs.util; 020 021 import org.apache.hadoop.classification.InterfaceAudience; 022 import org.apache.hadoop.classification.InterfaceStability; 023 import org.xml.sax.ContentHandler; 024 import org.xml.sax.SAXException; 025 import org.xml.sax.helpers.AttributesImpl; 026 027 import java.util.LinkedList; 028 import java.util.List; 029 import java.util.Map; 030 import java.util.TreeMap; 031 032 /** 033 * General xml utilities. 034 * 035 */ 036 @InterfaceAudience.Private 037 @InterfaceStability.Unstable 038 public class XMLUtils { 039 /** 040 * Exception that reflects an invalid XML document. 041 */ 042 static public class InvalidXmlException extends RuntimeException { 043 private static final long serialVersionUID = 1L; 044 public InvalidXmlException(String s) { 045 super(s); 046 } 047 } 048 049 /** 050 * Exception that reflects a string that cannot be unmangled. 051 */ 052 public static class UnmanglingError extends RuntimeException { 053 private static final long serialVersionUID = 1L; 054 055 public UnmanglingError(String str, Exception e) { 056 super(str, e); 057 } 058 059 public UnmanglingError(String str) { 060 super(str); 061 } 062 } 063 064 065 /** 066 * Given a code point, determine if it should be mangled before being 067 * represented in an XML document. 068 * 069 * Any code point that isn't valid in XML must be mangled. 070 * See http://en.wikipedia.org/wiki/Valid_characters_in_XML for a 071 * quick reference, or the w3 standard for the authoritative reference. 072 * 073 * @param cp The code point 074 * @return True if the code point should be mangled 075 */ 076 private static boolean codePointMustBeMangled(int cp) { 077 if (cp < 0x20) { 078 return ((cp != 0x9) && (cp != 0xa) && (cp != 0xd)); 079 } else if ((0xd7ff < cp) && (cp < 0xe000)) { 080 return true; 081 } else if ((cp == 0xfffe) || (cp == 0xffff)) { 082 return true; 083 } else if (cp == 0x5c) { 084 // we mangle backslash to simplify decoding... it's 085 // easier if backslashes always begin mangled sequences. 086 return true; 087 } 088 return false; 089 } 090 091 private static final int NUM_SLASH_POSITIONS = 4; 092 093 private static String mangleCodePoint(int cp) { 094 return String.format("\\%0" + NUM_SLASH_POSITIONS + "x;", cp); 095 } 096 097 /** 098 * Mangle a string so that it can be represented in an XML document. 099 * 100 * There are three kinds of code points in XML: 101 * - Those that can be represented normally, 102 * - Those that have to be escaped (for example, & must be represented 103 * as &) 104 * - Those that cannot be represented at all in XML. 105 * 106 * The built-in SAX functions will handle the first two types for us just 107 * fine. However, sometimes we come across a code point of the third type. 108 * In this case, we have to mangle the string in order to represent it at 109 * all. We also mangle backslash to avoid confusing a backslash in the 110 * string with part our escape sequence. 111 * 112 * The encoding used here is as follows: an illegal code point is 113 * represented as '\ABCD;', where ABCD is the hexadecimal value of 114 * the code point. 115 * 116 * @param str The input string. 117 * 118 * @return The mangled string. 119 */ 120 public static String mangleXmlString(String str) { 121 final StringBuilder bld = new StringBuilder(); 122 final int length = str.length(); 123 for (int offset = 0; offset < length; ) { 124 final int cp = str.codePointAt(offset); 125 final int len = Character.charCount(cp); 126 if (codePointMustBeMangled(cp)) { 127 bld.append(mangleCodePoint(cp)); 128 } else { 129 for (int i = 0; i < len; i++) { 130 bld.append(str.charAt(offset + i)); 131 } 132 } 133 offset += len; 134 } 135 return bld.toString(); 136 } 137 138 /** 139 * Demangle a string from an XML document. 140 * See {@link #mangleXmlString(String)} for a description of the mangling 141 * format. 142 * 143 * @param str The string to be demangled. 144 * 145 * @return The unmangled string 146 * @throws UnmanglingError if the input is malformed. 147 */ 148 public static String unmangleXmlString(String str) 149 throws UnmanglingError { 150 int slashPosition = -1; 151 String escapedCp = ""; 152 StringBuilder bld = new StringBuilder(); 153 for (int i = 0; i < str.length(); i++) { 154 char ch = str.charAt(i); 155 if ((slashPosition >= 0) && (slashPosition < NUM_SLASH_POSITIONS)) { 156 escapedCp += ch; 157 ++slashPosition; 158 } else if (slashPosition == NUM_SLASH_POSITIONS) { 159 if (ch != ';') { 160 throw new UnmanglingError("unterminated code point escape: " + 161 "expected semicolon at end."); 162 } 163 try { 164 bld.appendCodePoint(Integer.parseInt(escapedCp, 16)); 165 } catch (NumberFormatException e) { 166 throw new UnmanglingError("error parsing unmangling escape code", e); 167 } 168 escapedCp = ""; 169 slashPosition = -1; 170 } else if (ch == '\\') { 171 slashPosition = 0; 172 } else { 173 bld.append(ch); 174 } 175 } 176 if (slashPosition != -1) { 177 throw new UnmanglingError("unterminated code point escape: string " + 178 "broke off in the middle"); 179 } 180 return bld.toString(); 181 } 182 183 /** 184 * Add a SAX tag with a string inside. 185 * 186 * @param contentHandler the SAX content handler 187 * @param tag the element tag to use 188 * @param value the string to put inside the tag 189 */ 190 public static void addSaxString(ContentHandler contentHandler, 191 String tag, String val) throws SAXException { 192 contentHandler.startElement("", "", tag, new AttributesImpl()); 193 char c[] = mangleXmlString(val).toCharArray(); 194 contentHandler.characters(c, 0, c.length); 195 contentHandler.endElement("", "", tag); 196 } 197 198 /** 199 * Represents a bag of key-value pairs encountered during parsing an XML 200 * file. 201 */ 202 static public class Stanza { 203 private final TreeMap<String, LinkedList <Stanza > > subtrees; 204 205 /** The unmangled value of this stanza. */ 206 private String value; 207 208 public Stanza() { 209 subtrees = new TreeMap<String, LinkedList <Stanza > >(); 210 value = ""; 211 } 212 213 public void setValue(String value) { 214 this.value = value; 215 } 216 217 public String getValue() { 218 return this.value; 219 } 220 221 /** 222 * Discover if a stanza has a given entry. 223 * 224 * @param name entry to look for 225 * 226 * @return true if the entry was found 227 */ 228 public boolean hasChildren(String name) { 229 return subtrees.containsKey(name); 230 } 231 232 /** 233 * Pull an entry from a stanza. 234 * 235 * @param name entry to look for 236 * 237 * @return the entry 238 */ 239 public List<Stanza> getChildren(String name) throws InvalidXmlException { 240 LinkedList <Stanza> children = subtrees.get(name); 241 if (children == null) { 242 throw new InvalidXmlException("no entry found for " + name); 243 } 244 return children; 245 } 246 247 /** 248 * Pull a string entry from a stanza. 249 * 250 * @param name entry to look for 251 * 252 * @return the entry 253 */ 254 public String getValue(String name) throws InvalidXmlException { 255 String ret = getValueOrNull(name); 256 if (ret == null) { 257 throw new InvalidXmlException("no entry found for " + name); 258 } 259 return ret; 260 } 261 262 /** 263 * Pull a string entry from a stanza, or null. 264 * 265 * @param name entry to look for 266 * 267 * @return the entry, or null if it was not found. 268 */ 269 public String getValueOrNull(String name) throws InvalidXmlException { 270 if (!subtrees.containsKey(name)) { 271 return null; 272 } 273 LinkedList <Stanza> l = subtrees.get(name); 274 if (l.size() != 1) { 275 throw new InvalidXmlException("More than one value found for " + name); 276 } 277 return l.get(0).getValue(); 278 } 279 280 /** 281 * Add an entry to a stanza. 282 * 283 * @param name name of the entry to add 284 * @param child the entry to add 285 */ 286 public void addChild(String name, Stanza child) { 287 LinkedList<Stanza> l; 288 if (subtrees.containsKey(name)) { 289 l = subtrees.get(name); 290 } else { 291 l = new LinkedList<Stanza>(); 292 subtrees.put(name, l); 293 } 294 l.add(child); 295 } 296 297 /** 298 * Convert a stanza to a human-readable string. 299 */ 300 @Override 301 public String toString() { 302 StringBuilder bld = new StringBuilder(); 303 bld.append("{"); 304 if (!value.equals("")) { 305 bld.append("\"").append(value).append("\""); 306 } 307 String prefix = ""; 308 for (Map.Entry<String, LinkedList <Stanza > > entry : 309 subtrees.entrySet()) { 310 String key = entry.getKey(); 311 LinkedList <Stanza > ll = entry.getValue(); 312 for (Stanza child : ll) { 313 bld.append(prefix); 314 bld.append("<").append(key).append(">"); 315 bld.append(child.toString()); 316 prefix = ", "; 317 } 318 } 319 bld.append("}"); 320 return bld.toString(); 321 } 322 } 323 }