001/** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.camel.util; 018 019import java.io.IOException; 020import java.io.InputStream; 021import java.io.StringReader; 022import java.util.Stack; 023 024import javax.xml.XMLConstants; 025import javax.xml.parsers.DocumentBuilder; 026import javax.xml.parsers.DocumentBuilderFactory; 027import javax.xml.parsers.SAXParser; 028import javax.xml.parsers.SAXParserFactory; 029 030import org.w3c.dom.Document; 031import org.w3c.dom.Element; 032import org.w3c.dom.Node; 033 034import org.xml.sax.Attributes; 035import org.xml.sax.InputSource; 036import org.xml.sax.Locator; 037import org.xml.sax.SAXException; 038import org.xml.sax.helpers.DefaultHandler; 039 040/** 041 * An XML parser that uses SAX to include line and column number for each XML element in the parsed Document. 042 * <p> 043 * The line number and column number can be obtained from a Node/Element using 044 * <pre> 045 * String lineNumber = (String) node.getUserData(XmlLineNumberParser.LINE_NUMBER); 046 * String lineNumberEnd = (String) node.getUserData(XmlLineNumberParser.LINE_NUMBER_END); 047 * String columnNumber = (String) node.getUserData(XmlLineNumberParser.COLUMN_NUMBER); 048 * String columnNumberEnd = (String) node.getUserData(XmlLineNumberParser.COLUMN_NUMBER_END); 049 * </pre> 050 */ 051public final class XmlLineNumberParser { 052 053 public static final String LINE_NUMBER = "lineNumber"; 054 public static final String COLUMN_NUMBER = "colNumber"; 055 public static final String LINE_NUMBER_END = "lineNumberEnd"; 056 public static final String COLUMN_NUMBER_END = "colNumberEnd"; 057 058 /** 059 * Allows to plugin a custom text transformer in the parser, that can transform all the text content 060 */ 061 public interface XmlTextTransformer { 062 063 String transform(String text); 064 065 } 066 067 private XmlLineNumberParser() { 068 } 069 070 /** 071 * Parses the XML. 072 * 073 * @param is the XML content as an input stream 074 * @return the DOM model 075 * @throws Exception is thrown if error parsing 076 */ 077 public static Document parseXml(final InputStream is) throws Exception { 078 return parseXml(is, null); 079 } 080 081 /** 082 * Parses the XML. 083 * 084 * @param is the XML content as an input stream 085 * @param xmlTransformer the XML transformer 086 * @return the DOM model 087 * @throws Exception is thrown if error parsing 088 */ 089 public static Document parseXml(final InputStream is, final XmlTextTransformer xmlTransformer) throws Exception { 090 return parseXml(is, xmlTransformer, null, null); 091 } 092 093 /** 094 * Parses the XML. 095 * 096 * @param is the XML content as an input stream 097 * @param xmlTransformer the XML transformer 098 * @param rootNames one or more root names that is used as baseline for beginning the parsing, for example camelContext to start parsing 099 * when Camel is discovered. Multiple names can be defined separated by comma 100 * @param forceNamespace an optional namespaces to force assign to each node. This may be needed for JAXB unmarshalling from XML -> POJO. 101 * @return the DOM model 102 * @throws Exception is thrown if error parsing 103 */ 104 public static Document parseXml(final InputStream is, XmlTextTransformer xmlTransformer, String rootNames, final String forceNamespace) throws Exception { 105 ObjectHelper.notNull(is, "is"); 106 107 final XmlTextTransformer transformer = xmlTransformer == null ? new NoopTransformer() : xmlTransformer; 108 final Document doc; 109 SAXParser parser; 110 final SAXParserFactory factory = SAXParserFactory.newInstance(); 111 factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); 112 parser = factory.newSAXParser(); 113 final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); 114 // turn off validator and loading external dtd 115 dbf.setValidating(false); 116 dbf.setNamespaceAware(true); 117 dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); 118 dbf.setFeature("http://xml.org/sax/features/namespaces", false); 119 dbf.setFeature("http://xml.org/sax/features/validation", false); 120 dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false); 121 dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 122 dbf.setFeature("http://xml.org/sax/features/external-parameter-entities", false); 123 dbf.setFeature("http://xml.org/sax/features/external-general-entities", false); 124 dbf.setXIncludeAware(false); 125 dbf.setExpandEntityReferences(false); 126 final DocumentBuilder docBuilder = dbf.newDocumentBuilder(); 127 doc = docBuilder.newDocument(); 128 129 final Stack<Element> elementStack = new Stack<>(); 130 final StringBuilder textBuffer = new StringBuilder(); 131 final DefaultHandler handler = new DefaultHandler() { 132 private Locator locator; 133 private boolean found; 134 135 @Override 136 public void setDocumentLocator(final Locator locator) { 137 this.locator = locator; // Save the locator, so that it can be used later for line tracking when traversing nodes. 138 this.found = rootNames == null; 139 } 140 141 private boolean isRootName(String qName) { 142 for (String root : rootNames.split(",")) { 143 if (qName.equals(root)) { 144 return true; 145 } 146 } 147 return false; 148 } 149 150 @Override 151 public void startElement(final String uri, final String localName, final String qName, final Attributes attributes) throws SAXException { 152 addTextIfNeeded(); 153 154 if (rootNames != null && !found) { 155 if (isRootName(qName)) { 156 found = true; 157 } 158 } 159 160 if (found) { 161 Element el; 162 if (forceNamespace != null) { 163 el = doc.createElementNS(forceNamespace, qName); 164 } else { 165 el = doc.createElement(qName); 166 } 167 168 for (int i = 0; i < attributes.getLength(); i++) { 169 el.setAttribute(transformer.transform(attributes.getQName(i)), transformer.transform(attributes.getValue(i))); 170 } 171 172 el.setUserData(LINE_NUMBER, String.valueOf(this.locator.getLineNumber()), null); 173 el.setUserData(COLUMN_NUMBER, String.valueOf(this.locator.getColumnNumber()), null); 174 elementStack.push(el); 175 } 176 } 177 178 @Override 179 public void endElement(final String uri, final String localName, final String qName) { 180 if (!found) { 181 return; 182 } 183 184 addTextIfNeeded(); 185 186 final Element closedEl = elementStack.isEmpty() ? null : elementStack.pop(); 187 if (closedEl != null) { 188 if (elementStack.isEmpty()) { 189 // Is this the root element? 190 doc.appendChild(closedEl); 191 } else { 192 final Element parentEl = elementStack.peek(); 193 parentEl.appendChild(closedEl); 194 } 195 196 closedEl.setUserData(LINE_NUMBER_END, String.valueOf(this.locator.getLineNumber()), null); 197 closedEl.setUserData(COLUMN_NUMBER_END, String.valueOf(this.locator.getColumnNumber()), null); 198 } 199 } 200 201 @Override 202 public void characters(final char ch[], final int start, final int length) throws SAXException { 203 char[] chars = new char[length]; 204 System.arraycopy(ch, start, chars, 0, length); 205 String s = new String(chars); 206 s = transformer.transform(s); 207 textBuffer.append(s); 208 } 209 210 @Override 211 public InputSource resolveEntity(String publicId, String systemId) throws IOException, SAXException { 212 // do not resolve external dtd 213 return new InputSource(new StringReader("")); 214 } 215 216 // Outputs text accumulated under the current node 217 private void addTextIfNeeded() { 218 if (textBuffer.length() > 0) { 219 final Element el = elementStack.isEmpty() ? null : elementStack.peek(); 220 if (el != null) { 221 final Node textNode = doc.createTextNode(textBuffer.toString()); 222 el.appendChild(textNode); 223 textBuffer.delete(0, textBuffer.length()); 224 } 225 } 226 } 227 }; 228 parser.parse(is, handler); 229 230 return doc; 231 } 232 233 private static final class NoopTransformer implements XmlTextTransformer { 234 235 @Override 236 public String transform(String text) { 237 return text; 238 } 239 240 } 241 242}