001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.support;
018
019import java.io.Closeable;
020import java.io.IOException;
021import java.io.InputStream;
022import java.text.MessageFormat;
023import java.util.ArrayList;
024import java.util.Iterator;
025import java.util.LinkedHashMap;
026import java.util.List;
027import java.util.Map;
028import java.util.Scanner;
029import java.util.regex.MatchResult;
030import java.util.regex.Matcher;
031import java.util.regex.Pattern;
032
033import org.apache.camel.Exchange;
034import org.apache.camel.InvalidPayloadException;
035import org.apache.camel.language.simple.SimpleLanguage;
036import org.apache.camel.util.IOHelper;
037import org.apache.camel.util.ObjectHelper;
038import org.apache.camel.util.StringHelper;
039
040/**
041 * {@link org.apache.camel.Expression} to walk a {@link org.apache.camel.Message} XML body
042 * using an {@link java.util.Iterator}, which grabs the content between a XML start and end token,
043 * where the end token corresponds implicitly to either the end tag or the self-closing start tag.
044 * <p/>
045 * The message body must be able to convert to {@link java.io.InputStream} type which is used as stream
046 * to access the message body.
047 * <p/>
048 * Can be used to split big XML files.
049 * <p/>
050 * This implementation supports inheriting namespaces from a parent/root tag.
051 */
052public class TokenXMLExpressionIterator extends ExpressionAdapter {
053    private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)\\s*=\\s*('[^']+'|\"[^\"]+\")");
054    private static final String SCAN_TOKEN_NS_PREFIX_REGEX = "([^:<>]{1,15}?:|)";
055    private static final String SCAN_BLOCK_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^>]*)?/>|<{0}(\\s+[^>]*)?>(?:(?!(</{0}\\s*>)).)*</{0}\\s*>";
056    private static final String SCAN_PARENT_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^>]*\\s*)?>";
057    private static final String OPTION_WRAP_TOKEN = "<*>";
058
059    protected final String tagToken;
060    protected final String inheritNamespaceToken;
061
062    public TokenXMLExpressionIterator(String tagToken, String inheritNamespaceToken) {
063        StringHelper.notEmpty(tagToken, "tagToken");
064        this.tagToken = tagToken;
065        // namespace token is optional
066        this.inheritNamespaceToken = inheritNamespaceToken;
067    }
068
069    protected Iterator<?> createIterator(Exchange exchange, InputStream in, String charset) {
070        String tag = tagToken;
071        if (SimpleLanguage.hasSimpleFunction(tag)) {
072            tag = SimpleLanguage.expression(tag).evaluate(exchange, String.class);
073        }
074        String inherit = inheritNamespaceToken;
075        if (inherit != null && SimpleLanguage.hasSimpleFunction(inherit)) {
076            inherit = SimpleLanguage.expression(inherit).evaluate(exchange, String.class);
077        }
078
079        // must be XML tokens
080        if (!tag.startsWith("<")) {
081            tag = "<" + tag;
082        }
083        if (!tag.endsWith(">")) {
084            tag = tag + ">";
085        }
086
087        if (inherit != null) {
088            if (!inherit.startsWith("<")) {
089                inherit = "<" + inherit;
090            }
091            if (!inherit.endsWith(">")) {
092                inherit = inherit + ">";
093            }
094        }
095
096        // must be XML tokens
097        if (!tag.startsWith("<") || !tag.endsWith(">")) {
098            throw new IllegalArgumentException("XML Tag token must be a valid XML tag, was: " + tag);
099        }
100        if (inherit != null && (!inherit.startsWith("<") || !inherit.endsWith(">"))) {
101            throw new IllegalArgumentException("Namespace token must be a valid XML token, was: " + inherit);
102        }
103
104        XMLTokenIterator iterator = new XMLTokenIterator(tag, inherit, in, charset);
105        iterator.init();
106        return iterator;
107    }
108
109    @Override
110    public boolean matches(Exchange exchange) {
111        // as a predicate we must close the stream, as we do not return an iterator that can be used
112        // afterwards to iterate the input stream
113        Object value = doEvaluate(exchange, true);
114        return ObjectHelper.evaluateValuePredicate(value);
115    }
116
117    @Override
118    public Object evaluate(Exchange exchange) {
119        // as we return an iterator to access the input stream, we should not close it
120        return doEvaluate(exchange, false);
121    }
122
123    /**
124     * Strategy to evaluate the exchange
125     *
126     * @param exchange   the exchange
127     * @param closeStream whether to close the stream before returning from this method.
128     * @return the evaluated value
129     */
130    protected Object doEvaluate(Exchange exchange, boolean closeStream) {
131        InputStream in = null;
132        try {
133            in = exchange.getIn().getMandatoryBody(InputStream.class);
134            // we may read from a file, and want to support custom charset defined on the exchange
135            String charset = IOHelper.getCharsetName(exchange);
136            return createIterator(exchange, in, charset);
137        } catch (InvalidPayloadException e) {
138            exchange.setException(e);
139            // must close input stream
140            IOHelper.close(in);
141            return null;
142        } finally {
143            if (closeStream) {
144                IOHelper.close(in);
145            }
146        }
147    }
148    
149    /**
150     * Iterator to walk the input stream
151     */
152    static class XMLTokenIterator implements Iterator<Object>, Closeable {
153        final String tagToken;
154        final InputStream in;
155        final String charset;
156        Scanner scanner;
157        Object image;
158
159        private final Pattern tagTokenPattern;
160        private final String inheritNamespaceToken;
161        private final boolean wrapToken;
162        private Pattern inheritNamespaceTokenPattern;
163        private String rootTokenNamespaces;
164        private String wrapHead;
165        private String wrapTail;
166
167        XMLTokenIterator(String tagToken, String inheritNamespaceToken, InputStream in, String charset) {
168            this.tagToken = tagToken;
169            this.charset = charset;
170          
171            // remove any beginning < and ending > as we need to support ns prefixes and attributes, so we use a reg exp patterns
172            this.tagTokenPattern = 
173                Pattern.compile(MessageFormat.format(SCAN_BLOCK_TOKEN_REGEX_TEMPLATE, 
174                                                     SCAN_TOKEN_NS_PREFIX_REGEX + tagToken.substring(1, tagToken.length() - 1)), 
175                                                     Pattern.MULTILINE | Pattern.DOTALL);
176            
177            this.inheritNamespaceToken = inheritNamespaceToken;
178            if (inheritNamespaceToken != null && OPTION_WRAP_TOKEN.equals(inheritNamespaceToken)) {
179                this.wrapToken = true;
180                this.in = new RecordableInputStream(in, charset);
181            } else {
182                this.wrapToken = false;
183                this.in = in;
184                if (inheritNamespaceToken != null) {
185                    // the inherit namespace token may itself have a namespace prefix
186                    // the namespaces on the parent tag can be in multi line, so we need to instruct the dot to support multilines
187                    this.inheritNamespaceTokenPattern = 
188                        Pattern.compile(MessageFormat.format(SCAN_PARENT_TOKEN_REGEX_TEMPLATE,
189                                                             SCAN_TOKEN_NS_PREFIX_REGEX + inheritNamespaceToken.substring(1, inheritNamespaceToken.length() - 1)), 
190                                                             Pattern.MULTILINE | Pattern.DOTALL);
191                }
192            }
193        }
194
195        void init() {
196            // use a scanner with the default delimiter
197            this.scanner = new Scanner(in, charset);
198            this.image = scanner.hasNext() ? (String) next(true) : null;
199        }
200
201        String getNext(boolean first) {
202            // initialize inherited namespaces on first
203            if (first && inheritNamespaceToken != null && !wrapToken) {
204                rootTokenNamespaces =  getNamespacesFromNamespaceToken(scanner.findWithinHorizon(inheritNamespaceTokenPattern, 0));
205            }
206
207            String next = scanner.findWithinHorizon(tagTokenPattern, 0);
208            if (next == null) {
209                return null;
210            }
211            if (first && wrapToken) {
212                MatchResult mres = scanner.match();
213                wrapHead = ((RecordableInputStream)in).getText(mres.start());
214                wrapTail = buildXMLTail(wrapHead);
215            }
216
217            // build answer accordingly to whether namespaces should be inherited or not
218            if (inheritNamespaceToken != null && rootTokenNamespaces != null) {
219                // REVISIT should skip the prefixes that are declared within the child itself.
220                String head = StringHelper.before(next, ">");
221                boolean empty = false;
222                if (head.endsWith("/")) {
223                    head = head.substring(0, head.length() - 1);
224                    empty = true;
225                }
226                StringBuilder sb = new StringBuilder();
227                // append root namespaces to local start token
228                // grab the text
229                String tail = StringHelper.after(next, ">");
230                // build result with inherited namespaces
231                next = sb.append(head).append(rootTokenNamespaces).append(empty ? "/>" : ">").append(tail).toString();
232            } else if (wrapToken) {
233                // wrap the token
234                StringBuilder sb = new StringBuilder();
235                next = sb.append(wrapHead).append(next).append(wrapTail).toString();
236            }
237            
238            return next;
239        }
240
241        private String getNamespacesFromNamespaceToken(String text) {
242            if (text == null) {
243                return null;
244            }
245
246            // find namespaces (there can be attributes mixed, so we should only grab the namespaces)
247            Map<String, String> namespaces = new LinkedHashMap<>();
248            Matcher matcher = NAMESPACE_PATTERN.matcher(text);
249            while (matcher.find()) {
250                String prefix = matcher.group(1);
251                String url = matcher.group(2);
252                if (ObjectHelper.isEmpty(prefix)) {
253                    prefix = "_DEFAULT_";
254                } else {
255                    // skip leading :
256                    prefix = prefix.substring(1);
257                }
258                namespaces.put(prefix, url);
259            }
260
261            // did we find any namespaces
262            if (namespaces.isEmpty()) {
263                return null;
264            }
265
266            // build namespace String
267            StringBuilder sb = new StringBuilder();
268            for (Map.Entry<String, String> entry : namespaces.entrySet()) {
269                String key = entry.getKey();
270                // note the value is already quoted
271                String value = entry.getValue();
272                if ("_DEFAULT_".equals(key)) {
273                    sb.append(" xmlns=").append(value);
274                } else {
275                    sb.append(" xmlns:").append(key).append("=").append(value);
276                }
277            }
278
279            return sb.toString();
280        }
281        
282        @Override
283        public boolean hasNext() {
284            return image != null;
285        }
286
287        @Override
288        public Object next() {
289            return next(false);
290        }
291
292        Object next(boolean first) {
293            Object answer = image;
294            // calculate next
295            if (scanner.hasNext()) {
296                image = getNext(first);
297            } else {
298                image = null;
299            }
300
301            if (answer == null) {
302                // first time the image may be null
303                answer = image;
304            }
305            return answer;
306        }
307
308        @Override
309        public void remove() {
310            // noop
311        }
312
313        @Override
314        public void close() throws IOException {
315            scanner.close();
316        }
317
318    }
319
320    private static String buildXMLTail(String xmlhead) {
321        // assume the input text is a portion of a well-formed xml
322        List<String> tags = new ArrayList<>();
323        int p = 0;
324        while (p < xmlhead.length()) {
325            p = xmlhead.indexOf('<', p);
326            if (p < 0) {
327                break;
328            }
329            int nc = xmlhead.charAt(p + 1); 
330            if (nc == '?') {
331                p++;
332                continue;
333            } else if (nc == '/') {
334                p++;
335                tags.remove(tags.size() - 1);
336            } else {
337                final int ep = xmlhead.indexOf('>', p);
338                if (xmlhead.charAt(ep - 1) == '/') {
339                    p++;
340                    continue;
341                }
342                final int sp = xmlhead.substring(p, ep).indexOf(' ');
343                tags.add(xmlhead.substring(p + 1, sp > 0 ? p + sp : ep));
344                p = ep;
345            }
346        }
347        StringBuilder sb = new StringBuilder();
348        for (int i = tags.size() - 1; i >= 0; i--) {
349            sb.append("</").append(tags.get(i)).append(">");
350        }
351        return sb.toString();
352    }
353}