001/** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.camel.util; 018 019import java.io.IOException; 020import java.io.InputStream; 021import java.io.StringReader; 022import java.util.Stack; 023import javax.xml.parsers.DocumentBuilder; 024import javax.xml.parsers.DocumentBuilderFactory; 025import javax.xml.parsers.SAXParser; 026import javax.xml.parsers.SAXParserFactory; 027 028import org.w3c.dom.Document; 029import org.w3c.dom.Element; 030import org.w3c.dom.Node; 031import org.xml.sax.Attributes; 032import org.xml.sax.InputSource; 033import org.xml.sax.Locator; 034import org.xml.sax.SAXException; 035import org.xml.sax.helpers.DefaultHandler; 036 037/** 038 * An XML parser that uses SAX to include line and column number for each XML element in the parsed Document. 039 * <p> 040 * The line number and column number can be obtained from a Node/Element using 041 * <pre> 042 * String lineNumber = (String) node.getUserData(XmlLineNumberParser.LINE_NUMBER); 043 * String lineNumberEnd = (String) node.getUserData(XmlLineNumberParser.LINE_NUMBER_END); 044 * String columnNumber = (String) node.getUserData(XmlLineNumberParser.COLUMN_NUMBER); 045 * String columnNumberEnd = (String) node.getUserData(XmlLineNumberParser.COLUMN_NUMBER_END); 046 * </pre> 047 */ 048public final class XmlLineNumberParser { 049 050 public static final String LINE_NUMBER = "lineNumber"; 051 public static final String COLUMN_NUMBER = "colNumber"; 052 public static final String LINE_NUMBER_END = "lineNumberEnd"; 053 public static final String COLUMN_NUMBER_END = "colNumberEnd"; 054 055 /** 056 * Allows to plugin a custom text transformer in the parser, that can transform all the text content 057 */ 058 public interface XmlTextTransformer { 059 060 String transform(String text); 061 062 } 063 064 private XmlLineNumberParser() { 065 } 066 067 /** 068 * Parses the XML. 069 * 070 * @param is the XML content as an input stream 071 * @return the DOM model 072 * @throws Exception is thrown if error parsing 073 */ 074 public static Document parseXml(final InputStream is) throws Exception { 075 return parseXml(is, null); 076 } 077 078 /** 079 * Parses the XML. 080 * 081 * @param is the XML content as an input stream 082 * @param xmlTransformer the XML transformer 083 * @return the DOM model 084 * @throws Exception is thrown if error parsing 085 */ 086 public static Document parseXml(final InputStream is, final XmlTextTransformer xmlTransformer) throws Exception { 087 return parseXml(is, xmlTransformer, null, null); 088 } 089 090 /** 091 * Parses the XML. 092 * 093 * @param is the XML content as an input stream 094 * @param xmlTransformer the XML transformer 095 * @param rootNames one or more root names that is used as baseline for beginning the parsing, for example camelContext to start parsing 096 * when Camel is discovered. Multiple names can be defined separated by comma 097 * @param forceNamespace an optional namespaces to force assign to each node. This may be needed for JAXB unmarshalling from XML -> POJO. 098 * @return the DOM model 099 * @throws Exception is thrown if error parsing 100 */ 101 public static Document parseXml(final InputStream is, XmlTextTransformer xmlTransformer, String rootNames, final String forceNamespace) throws Exception { 102 ObjectHelper.notNull(is, "is"); 103 104 final XmlTextTransformer transformer = xmlTransformer == null ? new NoopTransformer() : xmlTransformer; 105 final Document doc; 106 SAXParser parser; 107 final SAXParserFactory factory = SAXParserFactory.newInstance(); 108 parser = factory.newSAXParser(); 109 final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); 110 // turn off validator and loading external dtd 111 dbf.setValidating(false); 112 dbf.setNamespaceAware(true); 113 dbf.setFeature("http://xml.org/sax/features/namespaces", false); 114 dbf.setFeature("http://xml.org/sax/features/validation", false); 115 dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false); 116 dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 117 dbf.setFeature("http://xml.org/sax/features/external-parameter-entities", false); 118 dbf.setFeature("http://xml.org/sax/features/external-general-entities", false); 119 final DocumentBuilder docBuilder = dbf.newDocumentBuilder(); 120 doc = docBuilder.newDocument(); 121 122 final Stack<Element> elementStack = new Stack<Element>(); 123 final StringBuilder textBuffer = new StringBuilder(); 124 final DefaultHandler handler = new DefaultHandler() { 125 private Locator locator; 126 private boolean found; 127 128 @Override 129 public void setDocumentLocator(final Locator locator) { 130 this.locator = locator; // Save the locator, so that it can be used later for line tracking when traversing nodes. 131 this.found = rootNames == null; 132 } 133 134 private boolean isRootName(String qName) { 135 for (String root : rootNames.split(",")) { 136 if (qName.equals(root)) { 137 return true; 138 } 139 } 140 return false; 141 } 142 143 @Override 144 public void startElement(final String uri, final String localName, final String qName, final Attributes attributes) throws SAXException { 145 addTextIfNeeded(); 146 147 if (rootNames != null && !found) { 148 if (isRootName(qName)) { 149 found = true; 150 } 151 } 152 153 if (found) { 154 Element el; 155 if (forceNamespace != null) { 156 el = doc.createElementNS(forceNamespace, qName); 157 } else { 158 el = doc.createElement(qName); 159 } 160 161 for (int i = 0; i < attributes.getLength(); i++) { 162 el.setAttribute(transformer.transform(attributes.getQName(i)), transformer.transform(attributes.getValue(i))); 163 } 164 165 el.setUserData(LINE_NUMBER, String.valueOf(this.locator.getLineNumber()), null); 166 el.setUserData(COLUMN_NUMBER, String.valueOf(this.locator.getColumnNumber()), null); 167 elementStack.push(el); 168 } 169 } 170 171 @Override 172 public void endElement(final String uri, final String localName, final String qName) { 173 if (!found) { 174 return; 175 } 176 177 addTextIfNeeded(); 178 179 final Element closedEl = elementStack.isEmpty() ? null : elementStack.pop(); 180 if (closedEl != null) { 181 if (elementStack.isEmpty()) { 182 // Is this the root element? 183 doc.appendChild(closedEl); 184 } else { 185 final Element parentEl = elementStack.peek(); 186 parentEl.appendChild(closedEl); 187 } 188 189 closedEl.setUserData(LINE_NUMBER_END, String.valueOf(this.locator.getLineNumber()), null); 190 closedEl.setUserData(COLUMN_NUMBER_END, String.valueOf(this.locator.getColumnNumber()), null); 191 } 192 } 193 194 @Override 195 public void characters(final char ch[], final int start, final int length) throws SAXException { 196 char[] chars = new char[length]; 197 System.arraycopy(ch, start, chars, 0, length); 198 String s = new String(chars); 199 s = transformer.transform(s); 200 textBuffer.append(s); 201 } 202 203 @Override 204 public InputSource resolveEntity(String publicId, String systemId) throws IOException, SAXException { 205 // do not resolve external dtd 206 return new InputSource(new StringReader("")); 207 } 208 209 // Outputs text accumulated under the current node 210 private void addTextIfNeeded() { 211 if (textBuffer.length() > 0) { 212 final Element el = elementStack.isEmpty() ? null : elementStack.peek(); 213 if (el != null) { 214 final Node textNode = doc.createTextNode(textBuffer.toString()); 215 el.appendChild(textNode); 216 textBuffer.delete(0, textBuffer.length()); 217 } 218 } 219 } 220 }; 221 parser.parse(is, handler); 222 223 return doc; 224 } 225 226 private static final class NoopTransformer implements XmlTextTransformer { 227 228 @Override 229 public String transform(String text) { 230 return text; 231 } 232 233 } 234 235}