001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.util;
018
019import java.io.IOException;
020import java.io.InputStream;
021import java.io.StringReader;
022import java.util.Stack;
023
024import javax.xml.XMLConstants;
025import javax.xml.parsers.DocumentBuilder;
026import javax.xml.parsers.DocumentBuilderFactory;
027import javax.xml.parsers.SAXParser;
028import javax.xml.parsers.SAXParserFactory;
029
030import org.w3c.dom.Document;
031import org.w3c.dom.Element;
032import org.w3c.dom.Node;
033
034import org.xml.sax.Attributes;
035import org.xml.sax.InputSource;
036import org.xml.sax.Locator;
037import org.xml.sax.SAXException;
038import org.xml.sax.helpers.DefaultHandler;
039
040/**
041 * An XML parser that uses SAX to include line and column number for each XML element in the parsed Document.
042 * <p>
043 * The line number and column number can be obtained from a Node/Element using
044 * <pre>
045 *   String lineNumber = (String) node.getUserData(XmlLineNumberParser.LINE_NUMBER);
046 *   String lineNumberEnd = (String) node.getUserData(XmlLineNumberParser.LINE_NUMBER_END);
047 *   String columnNumber = (String) node.getUserData(XmlLineNumberParser.COLUMN_NUMBER);
048 *   String columnNumberEnd = (String) node.getUserData(XmlLineNumberParser.COLUMN_NUMBER_END);
049 * </pre>
050 */
051public final class XmlLineNumberParser {
052
053    public static final String LINE_NUMBER = "lineNumber";
054    public static final String COLUMN_NUMBER = "colNumber";
055    public static final String LINE_NUMBER_END = "lineNumberEnd";
056    public static final String COLUMN_NUMBER_END = "colNumberEnd";
057
058    /**
059     * Allows to plugin a custom text transformer in the parser, that can transform all the text content
060     */
061    public interface XmlTextTransformer {
062
063        String transform(String text);
064
065    }
066
067    private XmlLineNumberParser() {
068    }
069
070    /**
071     * Parses the XML.
072     *
073     * @param is the XML content as an input stream
074     * @return the DOM model
075     * @throws Exception is thrown if error parsing
076     */
077    public static Document parseXml(final InputStream is) throws Exception {
078        return parseXml(is, null);
079    }
080
081    /**
082     * Parses the XML.
083     *
084     * @param is             the XML content as an input stream
085     * @param xmlTransformer the XML transformer
086     * @return the DOM model
087     * @throws Exception is thrown if error parsing
088     */
089    public static Document parseXml(final InputStream is, final XmlTextTransformer xmlTransformer) throws Exception {
090        return parseXml(is, xmlTransformer, null, null);
091    }
092
093    /**
094     * Parses the XML.
095     *
096     * @param is              the XML content as an input stream
097     * @param xmlTransformer  the XML transformer
098     * @param rootNames       one or more root names that is used as baseline for beginning the parsing, for example camelContext to start parsing
099     *                        when Camel is discovered. Multiple names can be defined separated by comma
100     * @param forceNamespace  an optional namespaces to force assign to each node. This may be needed for JAXB unmarshalling from XML -> POJO.
101     * @return the DOM model
102     * @throws Exception is thrown if error parsing
103     */
104    public static Document parseXml(final InputStream is, XmlTextTransformer xmlTransformer, String rootNames, final String forceNamespace) throws Exception {
105        ObjectHelper.notNull(is, "is");
106
107        final XmlTextTransformer transformer = xmlTransformer == null ? new NoopTransformer() : xmlTransformer;
108        final Document doc;
109        SAXParser parser;
110        final SAXParserFactory factory = SAXParserFactory.newInstance();
111        factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
112        parser = factory.newSAXParser();
113        final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
114        // turn off validator and loading external dtd
115        dbf.setValidating(false);
116        dbf.setNamespaceAware(true);
117        dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
118        dbf.setFeature("http://xml.org/sax/features/namespaces", false);
119        dbf.setFeature("http://xml.org/sax/features/validation", false);
120        dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
121        dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
122        dbf.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
123        dbf.setFeature("http://xml.org/sax/features/external-general-entities", false);
124        dbf.setXIncludeAware(false);
125        dbf.setExpandEntityReferences(false);
126        final DocumentBuilder docBuilder = dbf.newDocumentBuilder();
127        doc = docBuilder.newDocument();
128
129        final Stack<Element> elementStack = new Stack<>();
130        final StringBuilder textBuffer = new StringBuilder();
131        final DefaultHandler handler = new DefaultHandler() {
132            private Locator locator;
133            private boolean found;
134
135            @Override
136            public void setDocumentLocator(final Locator locator) {
137                this.locator = locator; // Save the locator, so that it can be used later for line tracking when traversing nodes.
138                this.found = rootNames == null;
139            }
140
141            private boolean isRootName(String qName) {
142                for (String root : rootNames.split(",")) {
143                    if (qName.equals(root)) {
144                        return true;
145                    }
146                }
147                return false;
148            }
149
150            @Override
151            public void startElement(final String uri, final String localName, final String qName, final Attributes attributes) throws SAXException {
152                addTextIfNeeded();
153
154                if (rootNames != null && !found) {
155                    if (isRootName(qName)) {
156                        found = true;
157                    }
158                }
159
160                if (found) {
161                    Element el;
162                    if (forceNamespace != null) {
163                        el = doc.createElementNS(forceNamespace, qName);
164                    } else {
165                        el = doc.createElement(qName);
166                    }
167
168                    for (int i = 0; i < attributes.getLength(); i++) {
169                        el.setAttribute(transformer.transform(attributes.getQName(i)), transformer.transform(attributes.getValue(i)));
170                    }
171
172                    el.setUserData(LINE_NUMBER, String.valueOf(this.locator.getLineNumber()), null);
173                    el.setUserData(COLUMN_NUMBER, String.valueOf(this.locator.getColumnNumber()), null);
174                    elementStack.push(el);
175                }
176            }
177
178            @Override
179            public void endElement(final String uri, final String localName, final String qName) {
180                if (!found) {
181                    return;
182                }
183
184                addTextIfNeeded();
185
186                final Element closedEl = elementStack.isEmpty() ? null : elementStack.pop();
187                if (closedEl != null) {
188                    if (elementStack.isEmpty()) {
189                        // Is this the root element?
190                        doc.appendChild(closedEl);
191                    } else {
192                        final Element parentEl = elementStack.peek();
193                        parentEl.appendChild(closedEl);
194                    }
195
196                    closedEl.setUserData(LINE_NUMBER_END, String.valueOf(this.locator.getLineNumber()), null);
197                    closedEl.setUserData(COLUMN_NUMBER_END, String.valueOf(this.locator.getColumnNumber()), null);
198                }
199            }
200
201            @Override
202            public void characters(final char ch[], final int start, final int length) throws SAXException {
203                char[] chars = new char[length];
204                System.arraycopy(ch, start, chars, 0, length);
205                String s = new String(chars);
206                s = transformer.transform(s);
207                textBuffer.append(s);
208            }
209
210            @Override
211            public InputSource resolveEntity(String publicId, String systemId) throws IOException, SAXException {
212                // do not resolve external dtd
213                return new InputSource(new StringReader(""));
214            }
215
216            // Outputs text accumulated under the current node
217            private void addTextIfNeeded() {
218                if (textBuffer.length() > 0) {
219                    final Element el = elementStack.isEmpty() ? null : elementStack.peek();
220                    if (el != null) {
221                        final Node textNode = doc.createTextNode(textBuffer.toString());
222                        el.appendChild(textNode);
223                        textBuffer.delete(0, textBuffer.length());
224                    }
225                }
226            }
227        };
228        parser.parse(is, handler);
229
230        return doc;
231    }
232
233    private static final class NoopTransformer implements XmlTextTransformer {
234
235        @Override
236        public String transform(String text) {
237            return text;
238        }
239
240    }
241
242}