001/** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.camel.support; 018 019import java.io.Closeable; 020import java.io.IOException; 021import java.io.InputStream; 022import java.text.MessageFormat; 023import java.util.ArrayList; 024import java.util.Iterator; 025import java.util.LinkedHashMap; 026import java.util.List; 027import java.util.Map; 028import java.util.Scanner; 029import java.util.regex.MatchResult; 030import java.util.regex.Matcher; 031import java.util.regex.Pattern; 032 033import org.apache.camel.Exchange; 034import org.apache.camel.InvalidPayloadException; 035import org.apache.camel.language.simple.SimpleLanguage; 036import org.apache.camel.util.IOHelper; 037import org.apache.camel.util.ObjectHelper; 038import org.apache.camel.util.StringHelper; 039 040/** 041 * {@link org.apache.camel.Expression} to walk a {@link org.apache.camel.Message} XML body 042 * using an {@link java.util.Iterator}, which grabs the content between a XML start and end token, 043 * where the end token corresponds implicitly to either the end tag or the self-closing start tag. 044 * <p/> 045 * The message body must be able to convert to {@link java.io.InputStream} type which is used as stream 046 * to access the message body. 047 * <p/> 048 * Can be used to split big XML files. 049 * <p/> 050 * This implementation supports inheriting namespaces from a parent/root tag. 051 */ 052public class TokenXMLExpressionIterator extends ExpressionAdapter { 053 private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)\\s*=\\s*('[^']+'|\"[^\"]+\")"); 054 private static final String SCAN_TOKEN_NS_PREFIX_REGEX = "([^:<>]{1,15}?:|)"; 055 private static final String SCAN_BLOCK_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^>]*)?/>|<{0}(\\s+[^>]*)?>(?:(?!(</{0}\\s*>)).)*</{0}\\s*>"; 056 private static final String SCAN_PARENT_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^>]*\\s*)?>"; 057 private static final String OPTION_WRAP_TOKEN = "<*>"; 058 059 protected final String tagToken; 060 protected final String inheritNamespaceToken; 061 062 public TokenXMLExpressionIterator(String tagToken, String inheritNamespaceToken) { 063 StringHelper.notEmpty(tagToken, "tagToken"); 064 this.tagToken = tagToken; 065 // namespace token is optional 066 this.inheritNamespaceToken = inheritNamespaceToken; 067 } 068 069 protected Iterator<?> createIterator(Exchange exchange, InputStream in, String charset) { 070 String tag = tagToken; 071 if (SimpleLanguage.hasSimpleFunction(tag)) { 072 tag = SimpleLanguage.expression(tag).evaluate(exchange, String.class); 073 } 074 String inherit = inheritNamespaceToken; 075 if (inherit != null && SimpleLanguage.hasSimpleFunction(inherit)) { 076 inherit = SimpleLanguage.expression(inherit).evaluate(exchange, String.class); 077 } 078 079 // must be XML tokens 080 if (!tag.startsWith("<")) { 081 tag = "<" + tag; 082 } 083 if (!tag.endsWith(">")) { 084 tag = tag + ">"; 085 } 086 087 if (inherit != null) { 088 if (!inherit.startsWith("<")) { 089 inherit = "<" + inherit; 090 } 091 if (!inherit.endsWith(">")) { 092 inherit = inherit + ">"; 093 } 094 } 095 096 // must be XML tokens 097 if (!tag.startsWith("<") || !tag.endsWith(">")) { 098 throw new IllegalArgumentException("XML Tag token must be a valid XML tag, was: " + tag); 099 } 100 if (inherit != null && (!inherit.startsWith("<") || !inherit.endsWith(">"))) { 101 throw new IllegalArgumentException("Namespace token must be a valid XML token, was: " + inherit); 102 } 103 104 XMLTokenIterator iterator = new XMLTokenIterator(tag, inherit, in, charset); 105 iterator.init(); 106 return iterator; 107 } 108 109 @Override 110 public boolean matches(Exchange exchange) { 111 // as a predicate we must close the stream, as we do not return an iterator that can be used 112 // afterwards to iterate the input stream 113 Object value = doEvaluate(exchange, true); 114 return ObjectHelper.evaluateValuePredicate(value); 115 } 116 117 @Override 118 public Object evaluate(Exchange exchange) { 119 // as we return an iterator to access the input stream, we should not close it 120 return doEvaluate(exchange, false); 121 } 122 123 /** 124 * Strategy to evaluate the exchange 125 * 126 * @param exchange the exchange 127 * @param closeStream whether to close the stream before returning from this method. 128 * @return the evaluated value 129 */ 130 protected Object doEvaluate(Exchange exchange, boolean closeStream) { 131 InputStream in = null; 132 try { 133 in = exchange.getIn().getMandatoryBody(InputStream.class); 134 // we may read from a file, and want to support custom charset defined on the exchange 135 String charset = IOHelper.getCharsetName(exchange); 136 return createIterator(exchange, in, charset); 137 } catch (InvalidPayloadException e) { 138 exchange.setException(e); 139 // must close input stream 140 IOHelper.close(in); 141 return null; 142 } finally { 143 if (closeStream) { 144 IOHelper.close(in); 145 } 146 } 147 } 148 149 /** 150 * Iterator to walk the input stream 151 */ 152 static class XMLTokenIterator implements Iterator<Object>, Closeable { 153 final String tagToken; 154 final InputStream in; 155 final String charset; 156 Scanner scanner; 157 Object image; 158 159 private final Pattern tagTokenPattern; 160 private final String inheritNamespaceToken; 161 private final boolean wrapToken; 162 private Pattern inheritNamespaceTokenPattern; 163 private String rootTokenNamespaces; 164 private String wrapHead; 165 private String wrapTail; 166 167 XMLTokenIterator(String tagToken, String inheritNamespaceToken, InputStream in, String charset) { 168 this.tagToken = tagToken; 169 this.charset = charset; 170 171 // remove any beginning < and ending > as we need to support ns prefixes and attributes, so we use a reg exp patterns 172 this.tagTokenPattern = 173 Pattern.compile(MessageFormat.format(SCAN_BLOCK_TOKEN_REGEX_TEMPLATE, 174 SCAN_TOKEN_NS_PREFIX_REGEX + tagToken.substring(1, tagToken.length() - 1)), 175 Pattern.MULTILINE | Pattern.DOTALL); 176 177 this.inheritNamespaceToken = inheritNamespaceToken; 178 if (inheritNamespaceToken != null && OPTION_WRAP_TOKEN.equals(inheritNamespaceToken)) { 179 this.wrapToken = true; 180 this.in = new RecordableInputStream(in, charset); 181 } else { 182 this.wrapToken = false; 183 this.in = in; 184 if (inheritNamespaceToken != null) { 185 // the inherit namespace token may itself have a namespace prefix 186 // the namespaces on the parent tag can be in multi line, so we need to instruct the dot to support multilines 187 this.inheritNamespaceTokenPattern = 188 Pattern.compile(MessageFormat.format(SCAN_PARENT_TOKEN_REGEX_TEMPLATE, 189 SCAN_TOKEN_NS_PREFIX_REGEX + inheritNamespaceToken.substring(1, inheritNamespaceToken.length() - 1)), 190 Pattern.MULTILINE | Pattern.DOTALL); 191 } 192 } 193 } 194 195 void init() { 196 // use a scanner with the default delimiter 197 this.scanner = new Scanner(in, charset); 198 this.image = scanner.hasNext() ? (String) next(true) : null; 199 } 200 201 String getNext(boolean first) { 202 // initialize inherited namespaces on first 203 if (first && inheritNamespaceToken != null && !wrapToken) { 204 rootTokenNamespaces = getNamespacesFromNamespaceToken(scanner.findWithinHorizon(inheritNamespaceTokenPattern, 0)); 205 } 206 207 String next = scanner.findWithinHorizon(tagTokenPattern, 0); 208 if (next == null) { 209 return null; 210 } 211 if (first && wrapToken) { 212 MatchResult mres = scanner.match(); 213 wrapHead = ((RecordableInputStream)in).getText(mres.start()); 214 wrapTail = buildXMLTail(wrapHead); 215 } 216 217 // build answer accordingly to whether namespaces should be inherited or not 218 if (inheritNamespaceToken != null && rootTokenNamespaces != null) { 219 // REVISIT should skip the prefixes that are declared within the child itself. 220 String head = StringHelper.before(next, ">"); 221 boolean empty = false; 222 if (head.endsWith("/")) { 223 head = head.substring(0, head.length() - 1); 224 empty = true; 225 } 226 StringBuilder sb = new StringBuilder(); 227 // append root namespaces to local start token 228 // grab the text 229 String tail = StringHelper.after(next, ">"); 230 // build result with inherited namespaces 231 next = sb.append(head).append(rootTokenNamespaces).append(empty ? "/>" : ">").append(tail).toString(); 232 } else if (wrapToken) { 233 // wrap the token 234 StringBuilder sb = new StringBuilder(); 235 next = sb.append(wrapHead).append(next).append(wrapTail).toString(); 236 } 237 238 return next; 239 } 240 241 private String getNamespacesFromNamespaceToken(String text) { 242 if (text == null) { 243 return null; 244 } 245 246 // find namespaces (there can be attributes mixed, so we should only grab the namespaces) 247 Map<String, String> namespaces = new LinkedHashMap<>(); 248 Matcher matcher = NAMESPACE_PATTERN.matcher(text); 249 while (matcher.find()) { 250 String prefix = matcher.group(1); 251 String url = matcher.group(2); 252 if (ObjectHelper.isEmpty(prefix)) { 253 prefix = "_DEFAULT_"; 254 } else { 255 // skip leading : 256 prefix = prefix.substring(1); 257 } 258 namespaces.put(prefix, url); 259 } 260 261 // did we find any namespaces 262 if (namespaces.isEmpty()) { 263 return null; 264 } 265 266 // build namespace String 267 StringBuilder sb = new StringBuilder(); 268 for (Map.Entry<String, String> entry : namespaces.entrySet()) { 269 String key = entry.getKey(); 270 // note the value is already quoted 271 String value = entry.getValue(); 272 if ("_DEFAULT_".equals(key)) { 273 sb.append(" xmlns=").append(value); 274 } else { 275 sb.append(" xmlns:").append(key).append("=").append(value); 276 } 277 } 278 279 return sb.toString(); 280 } 281 282 @Override 283 public boolean hasNext() { 284 return image != null; 285 } 286 287 @Override 288 public Object next() { 289 return next(false); 290 } 291 292 Object next(boolean first) { 293 Object answer = image; 294 // calculate next 295 if (scanner.hasNext()) { 296 image = getNext(first); 297 } else { 298 image = null; 299 } 300 301 if (answer == null) { 302 // first time the image may be null 303 answer = image; 304 } 305 return answer; 306 } 307 308 @Override 309 public void remove() { 310 // noop 311 } 312 313 @Override 314 public void close() throws IOException { 315 scanner.close(); 316 } 317 318 } 319 320 private static String buildXMLTail(String xmlhead) { 321 // assume the input text is a portion of a well-formed xml 322 List<String> tags = new ArrayList<>(); 323 int p = 0; 324 while (p < xmlhead.length()) { 325 p = xmlhead.indexOf('<', p); 326 if (p < 0) { 327 break; 328 } 329 int nc = xmlhead.charAt(p + 1); 330 if (nc == '?') { 331 p++; 332 continue; 333 } else if (nc == '/') { 334 p++; 335 tags.remove(tags.size() - 1); 336 } else { 337 final int ep = xmlhead.indexOf('>', p); 338 if (xmlhead.charAt(ep - 1) == '/') { 339 p++; 340 continue; 341 } 342 final int sp = xmlhead.substring(p, ep).indexOf(' '); 343 tags.add(xmlhead.substring(p + 1, sp > 0 ? p + sp : ep)); 344 p = ep; 345 } 346 } 347 StringBuilder sb = new StringBuilder(); 348 for (int i = tags.size() - 1; i >= 0; i--) { 349 sb.append("</").append(tags.get(i)).append(">"); 350 } 351 return sb.toString(); 352 } 353}