001/** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.camel.support; 018 019import java.io.Closeable; 020import java.io.IOException; 021import java.io.InputStream; 022import java.io.InputStreamReader; 023import java.io.Reader; 024import java.io.UnsupportedEncodingException; 025import java.util.ArrayList; 026import java.util.HashMap; 027import java.util.HashSet; 028import java.util.Iterator; 029import java.util.List; 030import java.util.Map; 031import java.util.Map.Entry; 032import java.util.Set; 033import java.util.regex.Matcher; 034import java.util.regex.Pattern; 035 036import javax.xml.namespace.QName; 037import javax.xml.stream.XMLStreamConstants; 038import javax.xml.stream.XMLStreamException; 039import javax.xml.stream.XMLStreamReader; 040 041import org.apache.camel.Exchange; 042import org.apache.camel.InvalidPayloadException; 043import org.apache.camel.converter.jaxp.StaxConverter; 044import org.apache.camel.spi.NamespaceAware; 045import org.apache.camel.util.IOHelper; 046import org.apache.camel.util.ObjectHelper; 047import org.slf4j.Logger; 048import org.slf4j.LoggerFactory; 049 050/** 051 * An {@link org.apache.camel.language.tokenizer.XMLTokenizeLanguage} based iterator. 052 */ 053public class XMLTokenExpressionIterator extends ExpressionAdapter implements NamespaceAware { 054 protected final String path; 055 protected char mode; 056 protected int group; 057 protected Map<String, String> nsmap; 058 059 public XMLTokenExpressionIterator(String path, char mode) { 060 this(path, mode, 1); 061 } 062 063 public XMLTokenExpressionIterator(String path, char mode, int group) { 064 ObjectHelper.notEmpty(path, "path"); 065 this.path = path; 066 this.mode = mode; 067 this.group = group > 1 ? group : 1; 068 } 069 070 @Override 071 public void setNamespaces(Map<String, String> nsmap) { 072 this.nsmap = nsmap; 073 } 074 075 @Override 076 public Map<String, String> getNamespaces() { 077 return nsmap; 078 } 079 080 public void setMode(char mode) { 081 this.mode = mode; 082 } 083 084 public void setMode(String mode) { 085 this.mode = mode != null ? mode.charAt(0) : 0; 086 } 087 088 public int getGroup() { 089 return group; 090 } 091 092 public void setGroup(int group) { 093 this.group = group; 094 } 095 096 protected Iterator<?> createIterator(InputStream in, String charset) throws XMLStreamException, UnsupportedEncodingException { 097 return new XMLTokenIterator(path, nsmap, mode, group, in, charset); 098 } 099 100 protected Iterator<?> createIterator(Reader in) throws XMLStreamException { 101 return new XMLTokenIterator(path, nsmap, mode, group, in); 102 } 103 104 @Override 105 public boolean matches(Exchange exchange) { 106 // as a predicate we must close the stream, as we do not return an iterator that can be used 107 // afterwards to iterate the input stream 108 Object value = doEvaluate(exchange, true); 109 return ObjectHelper.evaluateValuePredicate(value); 110 } 111 112 @Override 113 public Object evaluate(Exchange exchange) { 114 // as we return an iterator to access the input stream, we should not close it 115 return doEvaluate(exchange, false); 116 } 117 118 /** 119 * Strategy to evaluate the exchange 120 * 121 * @param exchange the exchange 122 * @param closeStream whether to close the stream before returning from this method. 123 * @return the evaluated value 124 */ 125 protected Object doEvaluate(Exchange exchange, boolean closeStream) { 126 InputStream in = null; 127 try { 128 in = exchange.getIn().getMandatoryBody(InputStream.class); 129 String charset = IOHelper.getCharsetName(exchange); 130 return createIterator(in, charset); 131 } catch (InvalidPayloadException e) { 132 exchange.setException(e); 133 // must close input stream 134 IOHelper.close(in); 135 return null; 136 } catch (XMLStreamException e) { 137 exchange.setException(e); 138 // must close input stream 139 IOHelper.close(in); 140 return null; 141 } catch (UnsupportedEncodingException e) { 142 exchange.setException(e); 143 // must close input stream 144 IOHelper.close(in); 145 return null; 146 } finally { 147 if (closeStream) { 148 IOHelper.close(in); 149 } 150 } 151 } 152 153 154 static class XMLTokenIterator implements Iterator<Object>, Closeable { 155 private static final Logger LOG = LoggerFactory.getLogger(XMLTokenIterator.class); 156 private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)\\s*=\\s*('[^']*'|\"[^\"]*\")"); 157 158 private transient InputStream originalInputStream; 159 160 private AttributedQName[] splitpath; 161 private int index; 162 private char mode; 163 private int group; 164 private RecordableReader in; 165 private XMLStreamReader reader; 166 private List<QName> path; 167 private List<Map<String, String>> namespaces; 168 private List<String> segments; 169 private List<QName> segmentlog; 170 private List<String> tokens; 171 private int code; 172 private int consumed; 173 private boolean backtrack; 174 private int trackdepth = -1; 175 private int depth; 176 private boolean compliant; 177 178 private Object nextToken; 179 180 XMLTokenIterator(String path, Map<String, String> nsmap, char mode, InputStream in, String charset) 181 throws XMLStreamException, UnsupportedEncodingException { 182 // woodstox's getLocation().etCharOffset() does not return the offset correctly for InputStream, so use Reader instead. 183 this(path, nsmap, mode, 1, new InputStreamReader(in, charset)); 184 this.originalInputStream = in; 185 } 186 187 XMLTokenIterator(String path, Map<String, String> nsmap, char mode, int group, InputStream in, String charset) 188 throws XMLStreamException, UnsupportedEncodingException { 189 // woodstox's getLocation().etCharOffset() does not return the offset correctly for InputStream, so use Reader instead. 190 this(path, nsmap, mode, group, new InputStreamReader(in, charset)); 191 this.originalInputStream = in; 192 } 193 194 XMLTokenIterator(String path, Map<String, String> nsmap, char mode, Reader in) throws XMLStreamException { 195 this(path, nsmap, mode, 1, in); 196 } 197 198 XMLTokenIterator(String path, Map<String, String> nsmap, char mode, int group, Reader in) throws XMLStreamException { 199 final String[] sl = path.substring(1).split("/"); 200 this.splitpath = new AttributedQName[sl.length]; 201 for (int i = 0; i < sl.length; i++) { 202 String s = sl[i]; 203 if (s.length() > 0) { 204 int d = s.indexOf(':'); 205 String pfx = d > 0 ? s.substring(0, d) : ""; 206 this.splitpath[i] = 207 new AttributedQName( 208 "*".equals(pfx) ? "*" : nsmap == null ? "" : nsmap.get(pfx), d > 0 ? s.substring(d + 1) : s, pfx); 209 } 210 } 211 212 this.mode = mode != 0 ? mode : 'i'; 213 this.group = group > 0 ? group : 1; 214 this.in = new RecordableReader(in); 215 this.reader = new StaxConverter().createXMLStreamReader(this.in); 216 217 LOG.trace("reader.class: {}", reader.getClass()); 218 // perform the first offset compliance test 219 int coff = reader.getLocation().getCharacterOffset(); 220 if (coff != 0) { 221 LOG.error("XMLStreamReader {} not supporting Location"); 222 throw new XMLStreamException("reader not supporting Location"); 223 } 224 225 this.path = new ArrayList<QName>(); 226 227 // wrapped mode needs the segments and the injected mode needs the namespaces 228 if (this.mode == 'w') { 229 this.segments = new ArrayList<String>(); 230 this.segmentlog = new ArrayList<QName>(); 231 } else if (this.mode == 'i') { 232 this.namespaces = new ArrayList<Map<String, String>>(); 233 } 234 // when grouping the tokens, allocate the storage to temporarily store tokens. 235 if (this.group > 1) { 236 this.tokens = new ArrayList<String>(); 237 } 238 this.nextToken = getNextToken(); 239 } 240 241 private boolean isDoS() { 242 return splitpath[index] == null; 243 } 244 245 private AttributedQName current() { 246 return splitpath[index + (isDoS() ? 1 : 0)]; 247 } 248 249 private AttributedQName ancestor() { 250 return index == 0 ? null : splitpath[index - 1]; 251 } 252 253 private void down() { 254 if (isDoS()) { 255 index++; 256 } 257 index++; 258 } 259 260 private void up() { 261 index--; 262 } 263 264 private boolean isBottom() { 265 return index == splitpath.length - (isDoS() ? 2 : 1); 266 } 267 268 private boolean isTop() { 269 return index == 0; 270 } 271 272 private int readNext() throws XMLStreamException { 273 int c = code; 274 if (c > 0) { 275 code = 0; 276 } else { 277 c = reader.next(); 278 } 279 return c; 280 } 281 282 private String getCurrentText() { 283 int pos = reader.getLocation().getCharacterOffset(); 284 String txt = in.getText(pos - consumed); 285 consumed = pos; 286 // keep recording 287 in.record(); 288 return txt; 289 } 290 291 private void pushName(QName name) { 292 path.add(name); 293 } 294 295 private QName popName() { 296 return path.remove(path.size() - 1); 297 } 298 299 private void pushSegment(QName qname, String token) { 300 segments.add(token); 301 segmentlog.add(qname); 302 } 303 304 private String popSegment() { 305 return segments.remove(segments.size() - 1); 306 } 307 308 private QName peekLog() { 309 return segmentlog.get(segmentlog.size() - 1); 310 } 311 312 private QName popLog() { 313 return segmentlog.remove(segmentlog.size() - 1); 314 } 315 316 private void pushNamespaces(XMLStreamReader reader) { 317 Map<String, String> m = new HashMap<String, String>(); 318 if (namespaces.size() > 0) { 319 m.putAll(namespaces.get(namespaces.size() - 1)); 320 } 321 for (int i = 0; i < reader.getNamespaceCount(); i++) { 322 m.put(reader.getNamespacePrefix(i), reader.getNamespaceURI(i)); 323 } 324 namespaces.add(m); 325 } 326 327 private void popNamespaces() { 328 namespaces.remove(namespaces.size() - 1); 329 } 330 331 private Map<String, String> getCurrentNamespaceBindings() { 332 return namespaces.get(namespaces.size() - 1); 333 } 334 335 private void readCurrent(boolean incl) throws XMLStreamException { 336 int d = depth; 337 while (d <= depth) { 338 int code = reader.next(); 339 if (code == XMLStreamConstants.START_ELEMENT) { 340 depth++; 341 } else if (code == XMLStreamConstants.END_ELEMENT) { 342 depth--; 343 } 344 } 345 // either look ahead to the next token or stay at the end element token 346 if (incl) { 347 code = reader.next(); 348 } else { 349 code = reader.getEventType(); 350 if (code == XMLStreamConstants.END_ELEMENT) { 351 // revert the depth count to avoid double counting the up event 352 depth++; 353 } 354 } 355 } 356 357 private String getCurrentToken() throws XMLStreamException { 358 readCurrent(true); 359 popName(); 360 361 String token = createContextualToken(getCurrentText()); 362 if (mode == 'i') { 363 popNamespaces(); 364 } 365 366 return token; 367 } 368 369 private String createContextualToken(String token) { 370 StringBuilder sb = new StringBuilder(); 371 if (mode == 'w' && group == 1) { 372 for (int i = 0; i < segments.size(); i++) { 373 sb.append(segments.get(i)); 374 } 375 sb.append(token); 376 for (int i = path.size() - 1; i >= 0; i--) { 377 QName q = path.get(i); 378 sb.append("</").append(makeName(q)).append(">"); 379 } 380 381 } else if (mode == 'i') { 382 final String stag = token.substring(0, token.indexOf('>') + 1); 383 Set<String> skip = new HashSet<String>(); 384 Matcher matcher = NAMESPACE_PATTERN.matcher(stag); 385 char quote = 0; 386 while (matcher.find()) { 387 String prefix = matcher.group(1); 388 if (prefix.length() > 0) { 389 prefix = prefix.substring(1); 390 } 391 skip.add(prefix); 392 if (quote == 0) { 393 quote = matcher.group(2).charAt(0); 394 } 395 } 396 if (quote == 0) { 397 quote = '"'; 398 } 399 boolean empty = stag.endsWith("/>"); 400 sb.append(token.substring(0, stag.length() - (empty ? 2 : 1))); 401 for (Entry<String, String> e : getCurrentNamespaceBindings().entrySet()) { 402 if (!skip.contains(e.getKey())) { 403 sb.append(e.getKey().length() == 0 ? " xmlns" : " xmlns:") 404 .append(e.getKey()).append("=").append(quote).append(e.getValue()).append(quote); 405 } 406 } 407 sb.append(token.substring(stag.length() - (empty ? 2 : 1))); 408 } else if (mode == 'u') { 409 int bp = token.indexOf(">"); 410 int ep = token.lastIndexOf("</"); 411 if (bp > 0 && ep > 0) { 412 sb.append(token.substring(bp + 1, ep)); 413 } 414 } else if (mode == 't') { 415 int bp = 0; 416 for (;;) { 417 int ep = token.indexOf('>', bp); 418 bp = token.indexOf('<', ep); 419 if (bp < 0) { 420 break; 421 } 422 sb.append(token.substring(ep + 1, bp)); 423 } 424 } else { 425 return token; 426 } 427 428 return sb.toString(); 429 } 430 431 private String getGroupedToken() { 432 StringBuilder sb = new StringBuilder(); 433 if (mode == 'w') { 434 // for wrapped 435 for (int i = 0; i < segments.size(); i++) { 436 sb.append(segments.get(i)); 437 } 438 for (String s : tokens) { 439 sb.append(s); 440 } 441 for (int i = path.size() - 1; i >= 0; i--) { 442 QName q = path.get(i); 443 sb.append("</").append(makeName(q)).append(">"); 444 } 445 } else { 446 // for injected, unwrapped, text 447 sb.append("<group>"); 448 for (String s : tokens) { 449 sb.append(s); 450 } 451 sb.append("</group>"); 452 } 453 tokens.clear(); 454 return sb.toString(); 455 } 456 457 private String getNextToken() throws XMLStreamException { 458 int xcode = 0; 459 while (xcode != XMLStreamConstants.END_DOCUMENT) { 460 xcode = readNext(); 461 462 switch (xcode) { 463 case XMLStreamConstants.START_ELEMENT: 464 depth++; 465 QName name = reader.getName(); 466 if (LOG.isTraceEnabled()) { 467 LOG.trace("se={}; depth={}; trackdepth={}", new Object[]{name, depth, trackdepth}); 468 } 469 470 String token = getCurrentText(); 471 // perform the second compliance test 472 if (!compliant) { 473 if (token != null && token.startsWith("<") && !token.startsWith("<?")) { 474 LOG.error("XMLStreamReader {} not supporting Location"); 475 throw new XMLStreamException("reader not supporting Location"); 476 } 477 compliant = true; 478 } 479 480 LOG.trace("token={}", token); 481 if (!backtrack && mode == 'w') { 482 pushSegment(name, token); 483 } 484 pushName(name); 485 if (mode == 'i') { 486 pushNamespaces(reader); 487 } 488 backtrack = false; 489 if (current().matches(name)) { 490 // mark the position of the match in the segments list 491 if (isBottom()) { 492 // final match 493 token = getCurrentToken(); 494 backtrack = true; 495 trackdepth = depth; 496 if (group > 1) { 497 tokens.add(token); 498 if (group == tokens.size()) { 499 return getGroupedToken(); 500 } 501 } else { 502 return token; 503 } 504 } else { 505 // intermediary match 506 down(); 507 } 508 } else if (isDoS()) { 509 // continue 510 } else { 511 // skip 512 readCurrent(false); 513 } 514 break; 515 case XMLStreamConstants.END_ELEMENT: 516 if ((backtrack || (trackdepth > 0 && depth == trackdepth)) 517 && (mode == 'w' && group > 1 && tokens.size() > 0)) { 518 // flush the left over using the current context 519 code = XMLStreamConstants.END_ELEMENT; 520 return getGroupedToken(); 521 } 522 523 depth--; 524 QName endname = reader.getName(); 525 LOG.trace("ee={}", endname); 526 527 popName(); 528 if (mode == 'i') { 529 popNamespaces(); 530 } 531 532 int pc = 0; 533 if (backtrack || (trackdepth > 0 && depth == trackdepth - 1)) { 534 // reactive backtrack if not backtracking and update the track depth 535 backtrack = true; 536 trackdepth--; 537 if (mode == 'w') { 538 while (!endname.equals(peekLog())) { 539 pc++; 540 popLog(); 541 } 542 } 543 } 544 545 if (backtrack) { 546 if (mode == 'w') { 547 for (int i = 0; i < pc; i++) { 548 popSegment(); 549 } 550 } 551 552 if ((ancestor() == null && !isTop()) 553 || (ancestor() != null && ancestor().matches(endname))) { 554 up(); 555 } 556 } 557 break; 558 case XMLStreamConstants.END_DOCUMENT: 559 LOG.trace("depth={}", depth); 560 if (group > 1 && tokens.size() > 0) { 561 // flush the left over before really going EoD 562 code = XMLStreamConstants.END_DOCUMENT; 563 return getGroupedToken(); 564 } 565 break; 566 default: 567 break; 568 } 569 } 570 return null; 571 } 572 573 private static String makeName(QName qname) { 574 String pfx = qname.getPrefix(); 575 return pfx.length() == 0 ? qname.getLocalPart() : qname.getPrefix() + ":" + qname.getLocalPart(); 576 } 577 578 @Override 579 public boolean hasNext() { 580 return nextToken != null; 581 } 582 583 @Override 584 public Object next() { 585 Object o = nextToken; 586 try { 587 nextToken = getNextToken(); 588 } catch (XMLStreamException e) { 589 nextToken = null; 590 throw new RuntimeException(e); 591 } 592 return o; 593 } 594 595 @Override 596 public void remove() { 597 // noop 598 } 599 600 @Override 601 public void close() throws IOException { 602 try { 603 reader.close(); 604 } catch (Exception e) { 605 // ignore 606 } 607 // need to close the original input stream as well as the reader do not delegate close it 608 if (originalInputStream != null) { 609 IOHelper.close(originalInputStream); 610 } 611 } 612 } 613 614 static class AttributedQName extends QName { 615 private static final long serialVersionUID = 9878370226894144L; 616 private Pattern lcpattern; 617 private boolean nsany; 618 619 AttributedQName(String localPart) { 620 super(localPart); 621 checkWildcard("", localPart); 622 } 623 624 AttributedQName(String namespaceURI, String localPart, String prefix) { 625 super(namespaceURI, localPart, prefix); 626 checkWildcard(namespaceURI, localPart); 627 } 628 629 AttributedQName(String namespaceURI, String localPart) { 630 super(namespaceURI, localPart); 631 checkWildcard(namespaceURI, localPart); 632 } 633 634 public boolean matches(QName qname) { 635 return (nsany || getNamespaceURI().equals(qname.getNamespaceURI())) 636 && (lcpattern != null 637 ? lcpattern.matcher(qname.getLocalPart()).matches() 638 : getLocalPart().equals(qname.getLocalPart())); 639 } 640 641 private void checkWildcard(String nsa, String lcp) { 642 nsany = "*".equals(nsa); 643 boolean wc = false; 644 for (int i = 0; i < lcp.length(); i++) { 645 char c = lcp.charAt(i); 646 if (c == '?' || c == '*') { 647 wc = true; 648 break; 649 } 650 } 651 if (wc) { 652 StringBuilder sb = new StringBuilder(); 653 for (int i = 0; i < lcp.length(); i++) { 654 char c = lcp.charAt(i); 655 switch (c) { 656 case '.': 657 sb.append("\\."); 658 break; 659 case '*': 660 sb.append(".*"); 661 break; 662 case '?': 663 sb.append('.'); 664 break; 665 default: 666 sb.append(c); 667 break; 668 } 669 } 670 lcpattern = Pattern.compile(sb.toString()); 671 } 672 } 673 } 674}