001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.support;
018
019import java.io.InputStream;
020import java.util.Iterator;
021import java.util.LinkedHashMap;
022import java.util.Map;
023import java.util.Scanner;
024import java.util.regex.Matcher;
025import java.util.regex.Pattern;
026
027import org.apache.camel.util.ObjectHelper;
028
029/**
030 * {@link org.apache.camel.Expression} to walk a {@link org.apache.camel.Message} XML body
031 * using an {@link java.util.Iterator}, which grabs the content between a XML start and end token.
032 * <p/>
033 * The message body must be able to convert to {@link java.io.InputStream} type which is used as stream
034 * to access the message body.
035 * <p/>
036 * Can be used to split big XML files.
037 * <p/>
038 * This implementation supports inheriting namespaces from a parent/root tag.
039 *
040 * @deprecated use {@link TokenXMLExpressionIterator} instead.
041 */
042@Deprecated
043public class TokenXMLPairExpressionIterator extends TokenPairExpressionIterator {
044
045    private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)=\\\"(.*?)\\\"");
046    private static final String SCAN_TOKEN_REGEX = "(\\s+.*?|)>";
047    private static final String SCAN_TOKEN_NS_PREFIX_REGEX = "(.{1,15}?:|)";
048    protected final String inheritNamespaceToken;
049
050    public TokenXMLPairExpressionIterator(String startToken, String endToken, String inheritNamespaceToken) {
051        super(startToken, endToken, true);
052        // namespace token is optional
053        this.inheritNamespaceToken = inheritNamespaceToken;
054
055        // must be XML tokens
056        if (!startToken.startsWith("<") || !startToken.endsWith(">")) {
057            throw new IllegalArgumentException("Start token must be a valid XML token, was: " + startToken);
058        }
059        if (!endToken.startsWith("<") || !endToken.endsWith(">")) {
060            throw new IllegalArgumentException("End token must be a valid XML token, was: " + endToken);
061        }
062        if (inheritNamespaceToken != null && (!inheritNamespaceToken.startsWith("<") || !inheritNamespaceToken.endsWith(">"))) {
063            throw new IllegalArgumentException("Namespace token must be a valid XML token, was: " + inheritNamespaceToken);
064        }
065    }
066
067    @Override
068    protected Iterator<?> createIterator(InputStream in, String charset) {
069        XMLTokenPairIterator iterator = new XMLTokenPairIterator(startToken, endToken, inheritNamespaceToken, in, charset);
070        iterator.init();
071        return iterator;
072    }
073
074    /**
075     * Iterator to walk the input stream
076     */
077    static class XMLTokenPairIterator extends TokenPairIterator {
078
079        private final Pattern startTokenPattern;
080        private final String scanEndToken;
081        private final String inheritNamespaceToken;
082        private Pattern inheritNamespaceTokenPattern;
083        private String rootTokenNamespaces;
084
085        XMLTokenPairIterator(String startToken, String endToken, String inheritNamespaceToken, InputStream in, String charset) {
086            super(startToken, endToken, true, in, charset);
087
088            // remove any beginning < and ending > as we need to support ns prefixes and attributes, so we use a reg exp patterns
089            StringBuilder tokenSb = new StringBuilder("<").append(SCAN_TOKEN_NS_PREFIX_REGEX).
090                                append(startToken.substring(1, startToken.length() - 1)).append(SCAN_TOKEN_REGEX);
091            this.startTokenPattern = Pattern.compile(tokenSb.toString());
092            
093            tokenSb = new StringBuilder("</").append(SCAN_TOKEN_NS_PREFIX_REGEX).
094                                append(endToken.substring(2, endToken.length() - 1)).append(SCAN_TOKEN_REGEX);
095            this.scanEndToken = tokenSb.toString();
096            
097            this.inheritNamespaceToken = inheritNamespaceToken;
098            if (inheritNamespaceToken != null) {
099                // the inherit namespace token may itself have a namespace prefix
100                tokenSb = new StringBuilder("<").append(SCAN_TOKEN_NS_PREFIX_REGEX).
101                                append(inheritNamespaceToken.substring(1, inheritNamespaceToken.length() - 1)).append(SCAN_TOKEN_REGEX);  
102                // the namespaces on the parent tag can be in multi line, so we need to instruct the dot to support multilines
103                this.inheritNamespaceTokenPattern = Pattern.compile(tokenSb.toString(), Pattern.MULTILINE | Pattern.DOTALL);
104            }
105        }
106
107        @Override
108        void init() {
109            // use scan end token as delimiter which supports attributes/namespaces
110            this.scanner = new Scanner(in, charset).useDelimiter(scanEndToken);
111            // this iterator will do look ahead as we may have data
112            // after the last end token, which the scanner would find
113            // so we need to be one step ahead of the scanner
114            this.image = scanner.hasNext() ? (String) next(true) : null;
115        }
116
117        @Override
118        String getNext(boolean first) {
119            String next = scanner.next();
120            if (next == null) {
121                return null;
122            }
123
124            // initialize inherited namespaces on first
125            if (first && inheritNamespaceToken != null) {
126                rootTokenNamespaces = getNamespacesFromNamespaceToken(next);
127            }
128
129            // make sure next is positioned at start token as we can have leading data
130            // or we reached EOL and there is no more start tags
131            Matcher matcher = startTokenPattern.matcher(next);
132            if (!matcher.find()) {
133                return null;
134            } else {
135                int index = matcher.start();
136                next = next.substring(index);
137            }
138
139            // make sure the end tag matches the begin tag if the tag has a namespace prefix
140            String tag = ObjectHelper.before(next, ">");
141            StringBuilder endTagSb = new StringBuilder("</");
142            int firstSpaceIndex = tag.indexOf(" ");
143            if (firstSpaceIndex > 0) {
144                endTagSb.append(tag.substring(1, firstSpaceIndex)).append(">");
145            } else {
146                endTagSb.append(tag.substring(1, tag.length())).append(">");
147            }
148            
149            // build answer accordingly to whether namespaces should be inherited or not
150            StringBuilder sb = new StringBuilder();
151            if (inheritNamespaceToken != null && rootTokenNamespaces != null) {
152                // append root namespaces to local start token
153                // grab the text
154                String text = ObjectHelper.after(next, ">");
155                // build result with inherited namespaces
156                next = sb.append(tag).append(rootTokenNamespaces).append(">").append(text).append(endTagSb.toString()).toString();
157            } else {
158                next = sb.append(next).append(endTagSb.toString()).toString();
159            }
160
161            return next;
162        }
163
164        private String getNamespacesFromNamespaceToken(String text) {
165            if (text == null) {
166                return null;
167            }
168
169            // grab the namespace tag
170            Matcher mat = inheritNamespaceTokenPattern.matcher(text);
171            if (mat.find()) {
172                text = mat.group(0);
173            } else {
174                // cannot find namespace tag
175                return null;
176            }
177
178            // find namespaces (there can be attributes mixed, so we should only grab the namespaces)
179            Map<String, String> namespaces = new LinkedHashMap<String, String>();
180            Matcher matcher = NAMESPACE_PATTERN.matcher(text);
181            while (matcher.find()) {
182                String prefix = matcher.group(1);
183                String url = matcher.group(2);
184                if (ObjectHelper.isEmpty(prefix)) {
185                    prefix = "_DEFAULT_";
186                } else {
187                    // skip leading :
188                    prefix = prefix.substring(1);
189                }
190                namespaces.put(prefix, url);
191            }
192
193            // did we find any namespaces
194            if (namespaces.isEmpty()) {
195                return null;
196            }
197
198            // build namespace String
199            StringBuilder sb = new StringBuilder();
200            for (Map.Entry<String, String> entry : namespaces.entrySet()) {
201                String key = entry.getKey();
202                String value = entry.getValue();
203                if ("_DEFAULT_".equals(key)) {
204                    sb.append(" xmlns=\"").append(value).append("\"");
205                } else {
206                    sb.append(" xmlns:").append(key).append("=\"").append(value).append("\"");
207                }
208            }
209
210            return sb.toString();
211        }
212    }
213
214}