001/** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.camel.support; 018 019import java.io.InputStream; 020import java.util.Iterator; 021import java.util.LinkedHashMap; 022import java.util.Map; 023import java.util.regex.Matcher; 024import java.util.regex.Pattern; 025 026import org.apache.camel.Exchange; 027import org.apache.camel.language.simple.SimpleLanguage; 028import org.apache.camel.util.ObjectHelper; 029import org.apache.camel.util.Scanner; 030import org.apache.camel.util.StringHelper; 031 032/** 033 * {@link org.apache.camel.Expression} to walk a {@link org.apache.camel.Message} XML body 034 * using an {@link java.util.Iterator}, which grabs the content between a XML start and end token. 035 * <p/> 036 * The message body must be able to convert to {@link java.io.InputStream} type which is used as stream 037 * to access the message body. 038 * <p/> 039 * Can be used to split big XML files. 040 * <p/> 041 * This implementation supports inheriting namespaces from a parent/root tag. 042 * 043 * @deprecated use {@link TokenXMLExpressionIterator} instead. 044 */ 045@Deprecated 046public class TokenXMLPairExpressionIterator extends TokenPairExpressionIterator { 047 048 private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)=\\\"(.*?)\\\""); 049 private static final String SCAN_TOKEN_REGEX = "(\\s+.*?|)>"; 050 private static final String SCAN_TOKEN_NS_PREFIX_REGEX = "(.{1,15}?:|)"; 051 protected final String inheritNamespaceToken; 052 053 public TokenXMLPairExpressionIterator(String startToken, String endToken, String inheritNamespaceToken) { 054 super(startToken, endToken, true); 055 // namespace token is optional 056 this.inheritNamespaceToken = inheritNamespaceToken; 057 } 058 059 @Override 060 protected Iterator<?> createIterator(Exchange exchange, InputStream in, String charset) { 061 String start = startToken; 062 if (SimpleLanguage.hasSimpleFunction(start)) { 063 start = SimpleLanguage.expression(start).evaluate(exchange, String.class); 064 } 065 String end = endToken; 066 if (SimpleLanguage.hasSimpleFunction(end)) { 067 end = SimpleLanguage.expression(end).evaluate(exchange, String.class); 068 } 069 String inherit = inheritNamespaceToken; 070 if (inherit != null && SimpleLanguage.hasSimpleFunction(inherit)) { 071 inherit = SimpleLanguage.expression(inherit).evaluate(exchange, String.class); 072 } 073 074 // must be XML tokens 075 if (!start.startsWith("<") || !start.endsWith(">")) { 076 throw new IllegalArgumentException("Start token must be a valid XML token, was: " + start); 077 } 078 if (!end.startsWith("<") || !end.endsWith(">")) { 079 throw new IllegalArgumentException("End token must be a valid XML token, was: " + end); 080 } 081 if (inherit != null && (!inherit.startsWith("<") || !inherit.endsWith(">"))) { 082 throw new IllegalArgumentException("Namespace token must be a valid XML token, was: " + inherit); 083 } 084 085 XMLTokenPairIterator iterator = new XMLTokenPairIterator(start, end, inherit, in, charset); 086 iterator.init(); 087 return iterator; 088 } 089 090 /** 091 * Iterator to walk the input stream 092 */ 093 static class XMLTokenPairIterator extends TokenPairIterator { 094 095 private final Pattern startTokenPattern; 096 private final String scanEndToken; 097 private final String inheritNamespaceToken; 098 private Pattern inheritNamespaceTokenPattern; 099 private String rootTokenNamespaces; 100 101 XMLTokenPairIterator(String startToken, String endToken, String inheritNamespaceToken, InputStream in, String charset) { 102 super(startToken, endToken, true, in, charset); 103 104 // remove any beginning < and ending > as we need to support ns prefixes and attributes, so we use a reg exp patterns 105 StringBuilder tokenSb = new StringBuilder("<").append(SCAN_TOKEN_NS_PREFIX_REGEX). 106 append(startToken.substring(1, startToken.length() - 1)).append(SCAN_TOKEN_REGEX); 107 this.startTokenPattern = Pattern.compile(tokenSb.toString()); 108 109 tokenSb = new StringBuilder("</").append(SCAN_TOKEN_NS_PREFIX_REGEX). 110 append(endToken.substring(2, endToken.length() - 1)).append(SCAN_TOKEN_REGEX); 111 this.scanEndToken = tokenSb.toString(); 112 113 this.inheritNamespaceToken = inheritNamespaceToken; 114 if (inheritNamespaceToken != null) { 115 // the inherit namespace token may itself have a namespace prefix 116 tokenSb = new StringBuilder("<").append(SCAN_TOKEN_NS_PREFIX_REGEX). 117 append(inheritNamespaceToken.substring(1, inheritNamespaceToken.length() - 1)).append(SCAN_TOKEN_REGEX); 118 // the namespaces on the parent tag can be in multi line, so we need to instruct the dot to support multilines 119 this.inheritNamespaceTokenPattern = Pattern.compile(tokenSb.toString(), Pattern.MULTILINE | Pattern.DOTALL); 120 } 121 } 122 123 @Override 124 void init() { 125 // use scan end token as delimiter which supports attributes/namespaces 126 this.scanner = new Scanner(in, charset, scanEndToken); 127 // this iterator will do look ahead as we may have data 128 // after the last end token, which the scanner would find 129 // so we need to be one step ahead of the scanner 130 this.image = scanner.hasNext() ? (String) next(true) : null; 131 } 132 133 @Override 134 String getNext(boolean first) { 135 String next = scanner.next(); 136 if (next == null) { 137 return null; 138 } 139 140 // initialize inherited namespaces on first 141 if (first && inheritNamespaceToken != null) { 142 rootTokenNamespaces = getNamespacesFromNamespaceToken(next); 143 } 144 145 // make sure next is positioned at start token as we can have leading data 146 // or we reached EOL and there is no more start tags 147 Matcher matcher = startTokenPattern.matcher(next); 148 if (!matcher.find()) { 149 return null; 150 } else { 151 int index = matcher.start(); 152 next = next.substring(index); 153 } 154 155 // make sure the end tag matches the begin tag if the tag has a namespace prefix 156 String tag = StringHelper.before(next, ">"); 157 StringBuilder endTagSb = new StringBuilder("</"); 158 int firstSpaceIndex = tag.indexOf(" "); 159 if (firstSpaceIndex > 0) { 160 endTagSb.append(tag.substring(1, firstSpaceIndex)).append(">"); 161 } else { 162 endTagSb.append(tag.substring(1, tag.length())).append(">"); 163 } 164 165 // build answer accordingly to whether namespaces should be inherited or not 166 StringBuilder sb = new StringBuilder(); 167 if (inheritNamespaceToken != null && rootTokenNamespaces != null) { 168 // append root namespaces to local start token 169 // grab the text 170 String text = StringHelper.after(next, ">"); 171 // build result with inherited namespaces 172 next = sb.append(tag).append(rootTokenNamespaces).append(">").append(text).append(endTagSb.toString()).toString(); 173 } else { 174 next = sb.append(next).append(endTagSb.toString()).toString(); 175 } 176 177 return next; 178 } 179 180 private String getNamespacesFromNamespaceToken(String text) { 181 if (text == null) { 182 return null; 183 } 184 185 // grab the namespace tag 186 Matcher mat = inheritNamespaceTokenPattern.matcher(text); 187 if (mat.find()) { 188 text = mat.group(0); 189 } else { 190 // cannot find namespace tag 191 return null; 192 } 193 194 // find namespaces (there can be attributes mixed, so we should only grab the namespaces) 195 Map<String, String> namespaces = new LinkedHashMap<>(); 196 Matcher matcher = NAMESPACE_PATTERN.matcher(text); 197 while (matcher.find()) { 198 String prefix = matcher.group(1); 199 String url = matcher.group(2); 200 if (ObjectHelper.isEmpty(prefix)) { 201 prefix = "_DEFAULT_"; 202 } else { 203 // skip leading : 204 prefix = prefix.substring(1); 205 } 206 namespaces.put(prefix, url); 207 } 208 209 // did we find any namespaces 210 if (namespaces.isEmpty()) { 211 return null; 212 } 213 214 // build namespace String 215 StringBuilder sb = new StringBuilder(); 216 for (Map.Entry<String, String> entry : namespaces.entrySet()) { 217 String key = entry.getKey(); 218 String value = entry.getValue(); 219 if ("_DEFAULT_".equals(key)) { 220 sb.append(" xmlns=\"").append(value).append("\""); 221 } else { 222 sb.append(" xmlns:").append(key).append("=\"").append(value).append("\""); 223 } 224 } 225 226 return sb.toString(); 227 } 228 } 229 230}