001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.support;
018
019import java.io.InputStream;
020import java.util.Iterator;
021import java.util.LinkedHashMap;
022import java.util.Map;
023import java.util.regex.Matcher;
024import java.util.regex.Pattern;
025
026import org.apache.camel.Exchange;
027import org.apache.camel.language.simple.SimpleLanguage;
028import org.apache.camel.util.ObjectHelper;
029import org.apache.camel.util.Scanner;
030import org.apache.camel.util.StringHelper;
031
032/**
033 * {@link org.apache.camel.Expression} to walk a {@link org.apache.camel.Message} XML body
034 * using an {@link java.util.Iterator}, which grabs the content between a XML start and end token.
035 * <p/>
036 * The message body must be able to convert to {@link java.io.InputStream} type which is used as stream
037 * to access the message body.
038 * <p/>
039 * Can be used to split big XML files.
040 * <p/>
041 * This implementation supports inheriting namespaces from a parent/root tag.
042 *
043 * @deprecated use {@link TokenXMLExpressionIterator} instead.
044 */
045@Deprecated
046public class TokenXMLPairExpressionIterator extends TokenPairExpressionIterator {
047
048    private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)=\\\"(.*?)\\\"");
049    private static final String SCAN_TOKEN_REGEX = "(\\s+.*?|)>";
050    private static final String SCAN_TOKEN_NS_PREFIX_REGEX = "(.{1,15}?:|)";
051    protected final String inheritNamespaceToken;
052
053    public TokenXMLPairExpressionIterator(String startToken, String endToken, String inheritNamespaceToken) {
054        super(startToken, endToken, true);
055        // namespace token is optional
056        this.inheritNamespaceToken = inheritNamespaceToken;
057    }
058
059    @Override
060    protected Iterator<?> createIterator(Exchange exchange, InputStream in, String charset) {
061        String start = startToken;
062        if (SimpleLanguage.hasSimpleFunction(start)) {
063            start = SimpleLanguage.expression(start).evaluate(exchange, String.class);
064        }
065        String end = endToken;
066        if (SimpleLanguage.hasSimpleFunction(end)) {
067            end = SimpleLanguage.expression(end).evaluate(exchange, String.class);
068        }
069        String inherit = inheritNamespaceToken;
070        if (inherit != null && SimpleLanguage.hasSimpleFunction(inherit)) {
071            inherit = SimpleLanguage.expression(inherit).evaluate(exchange, String.class);
072        }
073
074        // must be XML tokens
075        if (!start.startsWith("<") || !start.endsWith(">")) {
076            throw new IllegalArgumentException("Start token must be a valid XML token, was: " + start);
077        }
078        if (!end.startsWith("<") || !end.endsWith(">")) {
079            throw new IllegalArgumentException("End token must be a valid XML token, was: " + end);
080        }
081        if (inherit != null && (!inherit.startsWith("<") || !inherit.endsWith(">"))) {
082            throw new IllegalArgumentException("Namespace token must be a valid XML token, was: " + inherit);
083        }
084
085        XMLTokenPairIterator iterator = new XMLTokenPairIterator(start, end, inherit, in, charset);
086        iterator.init();
087        return iterator;
088    }
089
090    /**
091     * Iterator to walk the input stream
092     */
093    static class XMLTokenPairIterator extends TokenPairIterator {
094
095        private final Pattern startTokenPattern;
096        private final String scanEndToken;
097        private final String inheritNamespaceToken;
098        private Pattern inheritNamespaceTokenPattern;
099        private String rootTokenNamespaces;
100
101        XMLTokenPairIterator(String startToken, String endToken, String inheritNamespaceToken, InputStream in, String charset) {
102            super(startToken, endToken, true, in, charset);
103
104            // remove any beginning < and ending > as we need to support ns prefixes and attributes, so we use a reg exp patterns
105            StringBuilder tokenSb = new StringBuilder("<").append(SCAN_TOKEN_NS_PREFIX_REGEX).
106                                append(startToken.substring(1, startToken.length() - 1)).append(SCAN_TOKEN_REGEX);
107            this.startTokenPattern = Pattern.compile(tokenSb.toString());
108            
109            tokenSb = new StringBuilder("</").append(SCAN_TOKEN_NS_PREFIX_REGEX).
110                                append(endToken.substring(2, endToken.length() - 1)).append(SCAN_TOKEN_REGEX);
111            this.scanEndToken = tokenSb.toString();
112            
113            this.inheritNamespaceToken = inheritNamespaceToken;
114            if (inheritNamespaceToken != null) {
115                // the inherit namespace token may itself have a namespace prefix
116                tokenSb = new StringBuilder("<").append(SCAN_TOKEN_NS_PREFIX_REGEX).
117                                append(inheritNamespaceToken.substring(1, inheritNamespaceToken.length() - 1)).append(SCAN_TOKEN_REGEX);  
118                // the namespaces on the parent tag can be in multi line, so we need to instruct the dot to support multilines
119                this.inheritNamespaceTokenPattern = Pattern.compile(tokenSb.toString(), Pattern.MULTILINE | Pattern.DOTALL);
120            }
121        }
122
123        @Override
124        void init() {
125            // use scan end token as delimiter which supports attributes/namespaces
126            this.scanner = new Scanner(in, charset, scanEndToken);
127            // this iterator will do look ahead as we may have data
128            // after the last end token, which the scanner would find
129            // so we need to be one step ahead of the scanner
130            this.image = scanner.hasNext() ? (String) next(true) : null;
131        }
132
133        @Override
134        String getNext(boolean first) {
135            String next = scanner.next();
136            if (next == null) {
137                return null;
138            }
139
140            // initialize inherited namespaces on first
141            if (first && inheritNamespaceToken != null) {
142                rootTokenNamespaces = getNamespacesFromNamespaceToken(next);
143            }
144
145            // make sure next is positioned at start token as we can have leading data
146            // or we reached EOL and there is no more start tags
147            Matcher matcher = startTokenPattern.matcher(next);
148            if (!matcher.find()) {
149                return null;
150            } else {
151                int index = matcher.start();
152                next = next.substring(index);
153            }
154
155            // make sure the end tag matches the begin tag if the tag has a namespace prefix
156            String tag = StringHelper.before(next, ">");
157            StringBuilder endTagSb = new StringBuilder("</");
158            int firstSpaceIndex = tag.indexOf(" ");
159            if (firstSpaceIndex > 0) {
160                endTagSb.append(tag.substring(1, firstSpaceIndex)).append(">");
161            } else {
162                endTagSb.append(tag.substring(1, tag.length())).append(">");
163            }
164            
165            // build answer accordingly to whether namespaces should be inherited or not
166            StringBuilder sb = new StringBuilder();
167            if (inheritNamespaceToken != null && rootTokenNamespaces != null) {
168                // append root namespaces to local start token
169                // grab the text
170                String text = StringHelper.after(next, ">");
171                // build result with inherited namespaces
172                next = sb.append(tag).append(rootTokenNamespaces).append(">").append(text).append(endTagSb.toString()).toString();
173            } else {
174                next = sb.append(next).append(endTagSb.toString()).toString();
175            }
176
177            return next;
178        }
179
180        private String getNamespacesFromNamespaceToken(String text) {
181            if (text == null) {
182                return null;
183            }
184
185            // grab the namespace tag
186            Matcher mat = inheritNamespaceTokenPattern.matcher(text);
187            if (mat.find()) {
188                text = mat.group(0);
189            } else {
190                // cannot find namespace tag
191                return null;
192            }
193
194            // find namespaces (there can be attributes mixed, so we should only grab the namespaces)
195            Map<String, String> namespaces = new LinkedHashMap<>();
196            Matcher matcher = NAMESPACE_PATTERN.matcher(text);
197            while (matcher.find()) {
198                String prefix = matcher.group(1);
199                String url = matcher.group(2);
200                if (ObjectHelper.isEmpty(prefix)) {
201                    prefix = "_DEFAULT_";
202                } else {
203                    // skip leading :
204                    prefix = prefix.substring(1);
205                }
206                namespaces.put(prefix, url);
207            }
208
209            // did we find any namespaces
210            if (namespaces.isEmpty()) {
211                return null;
212            }
213
214            // build namespace String
215            StringBuilder sb = new StringBuilder();
216            for (Map.Entry<String, String> entry : namespaces.entrySet()) {
217                String key = entry.getKey();
218                String value = entry.getValue();
219                if ("_DEFAULT_".equals(key)) {
220                    sb.append(" xmlns=\"").append(value).append("\"");
221                } else {
222                    sb.append(" xmlns:").append(key).append("=\"").append(value).append("\"");
223                }
224            }
225
226            return sb.toString();
227        }
228    }
229
230}