001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.util;
018
019import java.io.IOException;
020import java.io.InputStream;
021import java.io.StringReader;
022import java.util.Stack;
023import javax.xml.parsers.DocumentBuilder;
024import javax.xml.parsers.DocumentBuilderFactory;
025import javax.xml.parsers.SAXParser;
026import javax.xml.parsers.SAXParserFactory;
027
028import org.w3c.dom.Document;
029import org.w3c.dom.Element;
030import org.w3c.dom.Node;
031import org.xml.sax.Attributes;
032import org.xml.sax.InputSource;
033import org.xml.sax.Locator;
034import org.xml.sax.SAXException;
035import org.xml.sax.helpers.DefaultHandler;
036
037/**
038 * An XML parser that uses SAX to include line and column number for each XML element in the parsed Document.
039 * <p>
040 * The line number and column number can be obtained from a Node/Element using
041 * <pre>
042 *   String lineNumber = (String) node.getUserData(XmlLineNumberParser.LINE_NUMBER);
043 *   String lineNumberEnd = (String) node.getUserData(XmlLineNumberParser.LINE_NUMBER_END);
044 *   String columnNumber = (String) node.getUserData(XmlLineNumberParser.COLUMN_NUMBER);
045 *   String columnNumberEnd = (String) node.getUserData(XmlLineNumberParser.COLUMN_NUMBER_END);
046 * </pre>
047 */
048public final class XmlLineNumberParser {
049
050    public static final String LINE_NUMBER = "lineNumber";
051    public static final String COLUMN_NUMBER = "colNumber";
052    public static final String LINE_NUMBER_END = "lineNumberEnd";
053    public static final String COLUMN_NUMBER_END = "colNumberEnd";
054
055    /**
056     * Allows to plugin a custom text transformer in the parser, that can transform all the text content
057     */
058    public interface XmlTextTransformer {
059
060        String transform(String text);
061
062    }
063
064    private XmlLineNumberParser() {
065    }
066
067    /**
068     * Parses the XML.
069     *
070     * @param is the XML content as an input stream
071     * @return the DOM model
072     * @throws Exception is thrown if error parsing
073     */
074    public static Document parseXml(final InputStream is) throws Exception {
075        return parseXml(is, null);
076    }
077
078    /**
079     * Parses the XML.
080     *
081     * @param is             the XML content as an input stream
082     * @param xmlTransformer the XML transformer
083     * @return the DOM model
084     * @throws Exception is thrown if error parsing
085     */
086    public static Document parseXml(final InputStream is, final XmlTextTransformer xmlTransformer) throws Exception {
087        return parseXml(is, xmlTransformer, null, null);
088    }
089
090    /**
091     * Parses the XML.
092     *
093     * @param is              the XML content as an input stream
094     * @param xmlTransformer  the XML transformer
095     * @param rootNames       one or more root names that is used as baseline for beginning the parsing, for example camelContext to start parsing
096     *                        when Camel is discovered. Multiple names can be defined separated by comma
097     * @param forceNamespace  an optional namespaces to force assign to each node. This may be needed for JAXB unmarshalling from XML -> POJO.
098     * @return the DOM model
099     * @throws Exception is thrown if error parsing
100     */
101    public static Document parseXml(final InputStream is, XmlTextTransformer xmlTransformer, String rootNames, final String forceNamespace) throws Exception {
102        ObjectHelper.notNull(is, "is");
103
104        final XmlTextTransformer transformer = xmlTransformer == null ? new NoopTransformer() : xmlTransformer;
105        final Document doc;
106        SAXParser parser;
107        final SAXParserFactory factory = SAXParserFactory.newInstance();
108        parser = factory.newSAXParser();
109        final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
110        // turn off validator and loading external dtd
111        dbf.setValidating(false);
112        dbf.setNamespaceAware(true);
113        dbf.setFeature("http://xml.org/sax/features/namespaces", false);
114        dbf.setFeature("http://xml.org/sax/features/validation", false);
115        dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
116        dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
117        dbf.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
118        dbf.setFeature("http://xml.org/sax/features/external-general-entities", false);
119        final DocumentBuilder docBuilder = dbf.newDocumentBuilder();
120        doc = docBuilder.newDocument();
121
122        final Stack<Element> elementStack = new Stack<Element>();
123        final StringBuilder textBuffer = new StringBuilder();
124        final DefaultHandler handler = new DefaultHandler() {
125            private Locator locator;
126            private boolean found;
127
128            @Override
129            public void setDocumentLocator(final Locator locator) {
130                this.locator = locator; // Save the locator, so that it can be used later for line tracking when traversing nodes.
131                this.found = rootNames == null;
132            }
133
134            private boolean isRootName(String qName) {
135                for (String root : rootNames.split(",")) {
136                    if (qName.equals(root)) {
137                        return true;
138                    }
139                }
140                return false;
141            }
142
143            @Override
144            public void startElement(final String uri, final String localName, final String qName, final Attributes attributes) throws SAXException {
145                addTextIfNeeded();
146
147                if (rootNames != null && !found) {
148                    if (isRootName(qName)) {
149                        found = true;
150                    }
151                }
152
153                if (found) {
154                    Element el;
155                    if (forceNamespace != null) {
156                        el = doc.createElementNS(forceNamespace, qName);
157                    } else {
158                        el = doc.createElement(qName);
159                    }
160
161                    for (int i = 0; i < attributes.getLength(); i++) {
162                        el.setAttribute(transformer.transform(attributes.getQName(i)), transformer.transform(attributes.getValue(i)));
163                    }
164
165                    el.setUserData(LINE_NUMBER, String.valueOf(this.locator.getLineNumber()), null);
166                    el.setUserData(COLUMN_NUMBER, String.valueOf(this.locator.getColumnNumber()), null);
167                    elementStack.push(el);
168                }
169            }
170
171            @Override
172            public void endElement(final String uri, final String localName, final String qName) {
173                if (!found) {
174                    return;
175                }
176
177                addTextIfNeeded();
178
179                final Element closedEl = elementStack.isEmpty() ? null : elementStack.pop();
180                if (closedEl != null) {
181                    if (elementStack.isEmpty()) {
182                        // Is this the root element?
183                        doc.appendChild(closedEl);
184                    } else {
185                        final Element parentEl = elementStack.peek();
186                        parentEl.appendChild(closedEl);
187                    }
188
189                    closedEl.setUserData(LINE_NUMBER_END, String.valueOf(this.locator.getLineNumber()), null);
190                    closedEl.setUserData(COLUMN_NUMBER_END, String.valueOf(this.locator.getColumnNumber()), null);
191                }
192            }
193
194            @Override
195            public void characters(final char ch[], final int start, final int length) throws SAXException {
196                char[] chars = new char[length];
197                System.arraycopy(ch, start, chars, 0, length);
198                String s = new String(chars);
199                s = transformer.transform(s);
200                textBuffer.append(s);
201            }
202
203            @Override
204            public InputSource resolveEntity(String publicId, String systemId) throws IOException, SAXException {
205                // do not resolve external dtd
206                return new InputSource(new StringReader(""));
207            }
208
209            // Outputs text accumulated under the current node
210            private void addTextIfNeeded() {
211                if (textBuffer.length() > 0) {
212                    final Element el = elementStack.isEmpty() ? null : elementStack.peek();
213                    if (el != null) {
214                        final Node textNode = doc.createTextNode(textBuffer.toString());
215                        el.appendChild(textNode);
216                        textBuffer.delete(0, textBuffer.length());
217                    }
218                }
219            }
220        };
221        parser.parse(is, handler);
222
223        return doc;
224    }
225
226    private static final class NoopTransformer implements XmlTextTransformer {
227
228        @Override
229        public String transform(String text) {
230            return text;
231        }
232
233    }
234
235}