001/** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.camel.support; 018 019import java.io.Closeable; 020import java.io.IOException; 021import java.io.InputStream; 022import java.io.InputStreamReader; 023import java.io.Reader; 024import java.io.UnsupportedEncodingException; 025import java.util.ArrayList; 026import java.util.HashMap; 027import java.util.HashSet; 028import java.util.Iterator; 029import java.util.List; 030import java.util.Map; 031import java.util.Map.Entry; 032import java.util.Set; 033import java.util.regex.Matcher; 034import java.util.regex.Pattern; 035 036import javax.xml.namespace.QName; 037import javax.xml.stream.XMLStreamConstants; 038import javax.xml.stream.XMLStreamException; 039import javax.xml.stream.XMLStreamReader; 040 041import org.apache.camel.Exchange; 042import org.apache.camel.InvalidPayloadException; 043import org.apache.camel.converter.jaxp.StaxConverter; 044import org.apache.camel.spi.NamespaceAware; 045import org.apache.camel.util.IOHelper; 046import org.apache.camel.util.ObjectHelper; 047import org.apache.camel.util.StringHelper; 048import org.slf4j.Logger; 049import org.slf4j.LoggerFactory; 050 051/** 052 * An {@link org.apache.camel.language.tokenizer.XMLTokenizeLanguage} based iterator. 053 */ 054public class XMLTokenExpressionIterator extends ExpressionAdapter implements NamespaceAware { 055 protected final String path; 056 protected char mode; 057 protected int group; 058 protected Map<String, String> nsmap; 059 060 public XMLTokenExpressionIterator(String path, char mode) { 061 this(path, mode, 1); 062 } 063 064 public XMLTokenExpressionIterator(String path, char mode, int group) { 065 StringHelper.notEmpty(path, "path"); 066 this.path = path; 067 this.mode = mode; 068 this.group = group > 1 ? group : 1; 069 } 070 071 @Override 072 public void setNamespaces(Map<String, String> nsmap) { 073 this.nsmap = nsmap; 074 } 075 076 @Override 077 public Map<String, String> getNamespaces() { 078 return nsmap; 079 } 080 081 public void setMode(char mode) { 082 this.mode = mode; 083 } 084 085 public void setMode(String mode) { 086 this.mode = mode != null ? mode.charAt(0) : 0; 087 } 088 089 public int getGroup() { 090 return group; 091 } 092 093 public void setGroup(int group) { 094 this.group = group; 095 } 096 097 protected Iterator<?> createIterator(InputStream in, String charset) throws XMLStreamException, UnsupportedEncodingException { 098 return new XMLTokenIterator(path, nsmap, mode, group, in, charset); 099 } 100 101 protected Iterator<?> createIterator(Reader in) throws XMLStreamException { 102 return new XMLTokenIterator(path, nsmap, mode, group, in); 103 } 104 105 @Override 106 public boolean matches(Exchange exchange) { 107 // as a predicate we must close the stream, as we do not return an iterator that can be used 108 // afterwards to iterate the input stream 109 Object value = doEvaluate(exchange, true); 110 return ObjectHelper.evaluateValuePredicate(value); 111 } 112 113 @Override 114 public Object evaluate(Exchange exchange) { 115 // as we return an iterator to access the input stream, we should not close it 116 return doEvaluate(exchange, false); 117 } 118 119 /** 120 * Strategy to evaluate the exchange 121 * 122 * @param exchange the exchange 123 * @param closeStream whether to close the stream before returning from this method. 124 * @return the evaluated value 125 */ 126 protected Object doEvaluate(Exchange exchange, boolean closeStream) { 127 InputStream in = null; 128 try { 129 in = exchange.getIn().getMandatoryBody(InputStream.class); 130 String charset = IOHelper.getCharsetName(exchange); 131 return createIterator(in, charset); 132 } catch (InvalidPayloadException e) { 133 exchange.setException(e); 134 // must close input stream 135 IOHelper.close(in); 136 return null; 137 } catch (XMLStreamException e) { 138 exchange.setException(e); 139 // must close input stream 140 IOHelper.close(in); 141 return null; 142 } catch (UnsupportedEncodingException e) { 143 exchange.setException(e); 144 // must close input stream 145 IOHelper.close(in); 146 return null; 147 } finally { 148 if (closeStream) { 149 IOHelper.close(in); 150 } 151 } 152 } 153 154 155 static class XMLTokenIterator implements Iterator<Object>, Closeable { 156 private static final Logger LOG = LoggerFactory.getLogger(XMLTokenIterator.class); 157 private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)\\s*=\\s*('[^']*'|\"[^\"]*\")"); 158 159 private transient InputStream originalInputStream; 160 161 private AttributedQName[] splitpath; 162 private int index; 163 private char mode; 164 private int group; 165 private RecordableReader in; 166 private XMLStreamReader reader; 167 private List<QName> path; 168 private List<Map<String, String>> namespaces; 169 private List<String> segments; 170 private List<QName> segmentlog; 171 private List<String> tokens; 172 private int code; 173 private int consumed; 174 private boolean backtrack; 175 private int trackdepth = -1; 176 private int depth; 177 private boolean compliant; 178 179 private Object nextToken; 180 181 XMLTokenIterator(String path, Map<String, String> nsmap, char mode, InputStream in, String charset) 182 throws XMLStreamException, UnsupportedEncodingException { 183 // woodstox's getLocation().etCharOffset() does not return the offset correctly for InputStream, so use Reader instead. 184 this(path, nsmap, mode, 1, new InputStreamReader(in, charset)); 185 this.originalInputStream = in; 186 } 187 188 XMLTokenIterator(String path, Map<String, String> nsmap, char mode, int group, InputStream in, String charset) 189 throws XMLStreamException, UnsupportedEncodingException { 190 // woodstox's getLocation().etCharOffset() does not return the offset correctly for InputStream, so use Reader instead. 191 this(path, nsmap, mode, group, new InputStreamReader(in, charset)); 192 this.originalInputStream = in; 193 } 194 195 XMLTokenIterator(String path, Map<String, String> nsmap, char mode, Reader in) throws XMLStreamException { 196 this(path, nsmap, mode, 1, in); 197 } 198 199 XMLTokenIterator(String path, Map<String, String> nsmap, char mode, int group, Reader in) throws XMLStreamException { 200 final String[] sl = path.substring(1).split("/"); 201 this.splitpath = new AttributedQName[sl.length]; 202 for (int i = 0; i < sl.length; i++) { 203 String s = sl[i]; 204 if (s.length() > 0) { 205 int d = s.indexOf(':'); 206 String pfx = d > 0 ? s.substring(0, d) : ""; 207 this.splitpath[i] = 208 new AttributedQName( 209 "*".equals(pfx) ? "*" : nsmap == null ? "" : nsmap.get(pfx), d > 0 ? s.substring(d + 1) : s, pfx); 210 } 211 } 212 213 this.mode = mode != 0 ? mode : 'i'; 214 this.group = group > 0 ? group : 1; 215 this.in = new RecordableReader(in); 216 this.reader = new StaxConverter().createXMLStreamReader(this.in); 217 218 LOG.trace("reader.class: {}", reader.getClass()); 219 // perform the first offset compliance test 220 int coff = reader.getLocation().getCharacterOffset(); 221 if (coff != 0) { 222 LOG.error("XMLStreamReader {} not supporting Location"); 223 throw new XMLStreamException("reader not supporting Location"); 224 } 225 226 this.path = new ArrayList<>(); 227 228 // wrapped mode needs the segments and the injected mode needs the namespaces 229 if (this.mode == 'w') { 230 this.segments = new ArrayList<>(); 231 this.segmentlog = new ArrayList<>(); 232 } else if (this.mode == 'i') { 233 this.namespaces = new ArrayList<>(); 234 } 235 // when grouping the tokens, allocate the storage to temporarily store tokens. 236 if (this.group > 1) { 237 this.tokens = new ArrayList<>(); 238 } 239 this.nextToken = getNextToken(); 240 } 241 242 private boolean isDoS() { 243 return splitpath[index] == null; 244 } 245 246 private AttributedQName current() { 247 return splitpath[index + (isDoS() ? 1 : 0)]; 248 } 249 250 private AttributedQName ancestor() { 251 return index == 0 ? null : splitpath[index - 1]; 252 } 253 254 private void down() { 255 if (isDoS()) { 256 index++; 257 } 258 index++; 259 } 260 261 private void up() { 262 index--; 263 } 264 265 private boolean isBottom() { 266 return index == splitpath.length - (isDoS() ? 2 : 1); 267 } 268 269 private boolean isTop() { 270 return index == 0; 271 } 272 273 private int readNext() throws XMLStreamException { 274 int c = code; 275 if (c > 0) { 276 code = 0; 277 } else { 278 c = reader.next(); 279 } 280 return c; 281 } 282 283 private String getCurrentText() { 284 int pos = reader.getLocation().getCharacterOffset(); 285 String txt = in.getText(pos - consumed); 286 consumed = pos; 287 // keep recording 288 in.record(); 289 return txt; 290 } 291 292 private void pushName(QName name) { 293 path.add(name); 294 } 295 296 private QName popName() { 297 return path.remove(path.size() - 1); 298 } 299 300 private void pushSegment(QName qname, String token) { 301 segments.add(token); 302 segmentlog.add(qname); 303 } 304 305 private String popSegment() { 306 return segments.remove(segments.size() - 1); 307 } 308 309 private QName peekLog() { 310 return segmentlog.get(segmentlog.size() - 1); 311 } 312 313 private QName popLog() { 314 return segmentlog.remove(segmentlog.size() - 1); 315 } 316 317 private void pushNamespaces(XMLStreamReader reader) { 318 Map<String, String> m = new HashMap<>(); 319 if (namespaces.size() > 0) { 320 m.putAll(namespaces.get(namespaces.size() - 1)); 321 } 322 for (int i = 0; i < reader.getNamespaceCount(); i++) { 323 m.put(reader.getNamespacePrefix(i), reader.getNamespaceURI(i)); 324 } 325 namespaces.add(m); 326 } 327 328 private void popNamespaces() { 329 namespaces.remove(namespaces.size() - 1); 330 } 331 332 private Map<String, String> getCurrentNamespaceBindings() { 333 return namespaces.get(namespaces.size() - 1); 334 } 335 336 private void readCurrent(boolean incl) throws XMLStreamException { 337 int d = depth; 338 while (d <= depth) { 339 int code = reader.next(); 340 if (code == XMLStreamConstants.START_ELEMENT) { 341 depth++; 342 } else if (code == XMLStreamConstants.END_ELEMENT) { 343 depth--; 344 } 345 } 346 // either look ahead to the next token or stay at the end element token 347 if (incl) { 348 code = reader.next(); 349 } else { 350 code = reader.getEventType(); 351 if (code == XMLStreamConstants.END_ELEMENT) { 352 // revert the depth count to avoid double counting the up event 353 depth++; 354 } 355 } 356 } 357 358 private String getCurrentToken() throws XMLStreamException { 359 readCurrent(true); 360 popName(); 361 362 String token = createContextualToken(getCurrentText()); 363 if (mode == 'i') { 364 popNamespaces(); 365 } 366 367 return token; 368 } 369 370 private String createContextualToken(String token) { 371 StringBuilder sb = new StringBuilder(); 372 if (mode == 'w' && group == 1) { 373 for (int i = 0; i < segments.size(); i++) { 374 sb.append(segments.get(i)); 375 } 376 sb.append(token); 377 for (int i = path.size() - 1; i >= 0; i--) { 378 QName q = path.get(i); 379 sb.append("</").append(makeName(q)).append(">"); 380 } 381 382 } else if (mode == 'i') { 383 final String stag = token.substring(0, token.indexOf('>') + 1); 384 Set<String> skip = new HashSet<>(); 385 Matcher matcher = NAMESPACE_PATTERN.matcher(stag); 386 char quote = 0; 387 while (matcher.find()) { 388 String prefix = matcher.group(1); 389 if (prefix.length() > 0) { 390 prefix = prefix.substring(1); 391 } 392 skip.add(prefix); 393 if (quote == 0) { 394 quote = matcher.group(2).charAt(0); 395 } 396 } 397 if (quote == 0) { 398 quote = '"'; 399 } 400 boolean empty = stag.endsWith("/>"); 401 sb.append(token.substring(0, stag.length() - (empty ? 2 : 1))); 402 for (Entry<String, String> e : getCurrentNamespaceBindings().entrySet()) { 403 if (!skip.contains(e.getKey())) { 404 sb.append(e.getKey().length() == 0 ? " xmlns" : " xmlns:") 405 .append(e.getKey()).append("=").append(quote).append(e.getValue()).append(quote); 406 } 407 } 408 sb.append(token.substring(stag.length() - (empty ? 2 : 1))); 409 } else if (mode == 'u') { 410 int bp = token.indexOf(">"); 411 int ep = token.lastIndexOf("</"); 412 if (bp > 0 && ep > 0) { 413 sb.append(token.substring(bp + 1, ep)); 414 } 415 } else if (mode == 't') { 416 int bp = 0; 417 for (;;) { 418 int ep = token.indexOf('>', bp); 419 bp = token.indexOf('<', ep); 420 if (bp < 0) { 421 break; 422 } 423 sb.append(token.substring(ep + 1, bp)); 424 } 425 } else { 426 return token; 427 } 428 429 return sb.toString(); 430 } 431 432 private String getGroupedToken() { 433 StringBuilder sb = new StringBuilder(); 434 if (mode == 'w') { 435 // for wrapped 436 for (int i = 0; i < segments.size(); i++) { 437 sb.append(segments.get(i)); 438 } 439 for (String s : tokens) { 440 sb.append(s); 441 } 442 for (int i = path.size() - 1; i >= 0; i--) { 443 QName q = path.get(i); 444 sb.append("</").append(makeName(q)).append(">"); 445 } 446 } else { 447 // for injected, unwrapped, text 448 sb.append("<group>"); 449 for (String s : tokens) { 450 sb.append(s); 451 } 452 sb.append("</group>"); 453 } 454 tokens.clear(); 455 return sb.toString(); 456 } 457 458 private String getNextToken() throws XMLStreamException { 459 int xcode = 0; 460 while (xcode != XMLStreamConstants.END_DOCUMENT) { 461 xcode = readNext(); 462 463 switch (xcode) { 464 case XMLStreamConstants.START_ELEMENT: 465 depth++; 466 QName name = reader.getName(); 467 if (LOG.isTraceEnabled()) { 468 LOG.trace("se={}; depth={}; trackdepth={}", name, depth, trackdepth); 469 } 470 471 String token = getCurrentText(); 472 // perform the second compliance test 473 if (!compliant) { 474 if (token != null && token.startsWith("<") && !token.startsWith("<?")) { 475 LOG.error("XMLStreamReader {} not supporting Location"); 476 throw new XMLStreamException("reader not supporting Location"); 477 } 478 compliant = true; 479 } 480 481 LOG.trace("token={}", token); 482 if (!backtrack && mode == 'w') { 483 pushSegment(name, token); 484 } 485 pushName(name); 486 if (mode == 'i') { 487 pushNamespaces(reader); 488 } 489 backtrack = false; 490 if (current().matches(name)) { 491 // mark the position of the match in the segments list 492 if (isBottom()) { 493 // final match 494 token = getCurrentToken(); 495 backtrack = true; 496 trackdepth = depth; 497 if (group > 1) { 498 tokens.add(token); 499 if (group == tokens.size()) { 500 return getGroupedToken(); 501 } 502 } else { 503 return token; 504 } 505 } else { 506 // intermediary match 507 down(); 508 } 509 } else if (isDoS()) { 510 // continue 511 } else { 512 // skip 513 readCurrent(false); 514 } 515 break; 516 case XMLStreamConstants.END_ELEMENT: 517 if ((backtrack || (trackdepth > 0 && depth == trackdepth)) 518 && (mode == 'w' && group > 1 && tokens.size() > 0)) { 519 // flush the left over using the current context 520 code = XMLStreamConstants.END_ELEMENT; 521 return getGroupedToken(); 522 } 523 524 depth--; 525 QName endname = reader.getName(); 526 LOG.trace("ee={}", endname); 527 528 popName(); 529 if (mode == 'i') { 530 popNamespaces(); 531 } 532 533 int pc = 0; 534 if (backtrack || (trackdepth > 0 && depth == trackdepth - 1)) { 535 // reactive backtrack if not backtracking and update the track depth 536 backtrack = true; 537 trackdepth--; 538 if (mode == 'w') { 539 while (!endname.equals(peekLog())) { 540 pc++; 541 popLog(); 542 } 543 } 544 } 545 546 if (backtrack) { 547 if (mode == 'w') { 548 for (int i = 0; i < pc; i++) { 549 popSegment(); 550 } 551 } 552 553 if ((ancestor() == null && !isTop()) 554 || (ancestor() != null && ancestor().matches(endname))) { 555 up(); 556 } 557 } 558 break; 559 case XMLStreamConstants.END_DOCUMENT: 560 LOG.trace("depth={}", depth); 561 if (group > 1 && tokens.size() > 0) { 562 // flush the left over before really going EoD 563 code = XMLStreamConstants.END_DOCUMENT; 564 return getGroupedToken(); 565 } 566 break; 567 default: 568 break; 569 } 570 } 571 return null; 572 } 573 574 private static String makeName(QName qname) { 575 String pfx = qname.getPrefix(); 576 return pfx.length() == 0 ? qname.getLocalPart() : qname.getPrefix() + ":" + qname.getLocalPart(); 577 } 578 579 @Override 580 public boolean hasNext() { 581 return nextToken != null; 582 } 583 584 @Override 585 public Object next() { 586 Object o = nextToken; 587 try { 588 nextToken = getNextToken(); 589 } catch (XMLStreamException e) { 590 nextToken = null; 591 throw new RuntimeException(e); 592 } 593 return o; 594 } 595 596 @Override 597 public void remove() { 598 // noop 599 } 600 601 @Override 602 public void close() throws IOException { 603 try { 604 reader.close(); 605 } catch (Exception e) { 606 // ignore 607 } 608 // need to close the original input stream as well as the reader do not delegate close it 609 if (originalInputStream != null) { 610 IOHelper.close(originalInputStream); 611 } 612 } 613 } 614 615 static class AttributedQName extends QName { 616 private static final long serialVersionUID = 9878370226894144L; 617 private Pattern lcpattern; 618 private boolean nsany; 619 620 AttributedQName(String localPart) { 621 super(localPart); 622 checkWildcard("", localPart); 623 } 624 625 AttributedQName(String namespaceURI, String localPart, String prefix) { 626 super(namespaceURI, localPart, prefix); 627 checkWildcard(namespaceURI, localPart); 628 } 629 630 AttributedQName(String namespaceURI, String localPart) { 631 super(namespaceURI, localPart); 632 checkWildcard(namespaceURI, localPart); 633 } 634 635 public boolean matches(QName qname) { 636 return (nsany || getNamespaceURI().equals(qname.getNamespaceURI())) 637 && (lcpattern != null 638 ? lcpattern.matcher(qname.getLocalPart()).matches() 639 : getLocalPart().equals(qname.getLocalPart())); 640 } 641 642 private void checkWildcard(String nsa, String lcp) { 643 nsany = "*".equals(nsa); 644 boolean wc = false; 645 for (int i = 0; i < lcp.length(); i++) { 646 char c = lcp.charAt(i); 647 if (c == '?' || c == '*') { 648 wc = true; 649 break; 650 } 651 } 652 if (wc) { 653 StringBuilder sb = new StringBuilder(); 654 for (int i = 0; i < lcp.length(); i++) { 655 char c = lcp.charAt(i); 656 switch (c) { 657 case '.': 658 sb.append("\\."); 659 break; 660 case '*': 661 sb.append(".*"); 662 break; 663 case '?': 664 sb.append('.'); 665 break; 666 default: 667 sb.append(c); 668 break; 669 } 670 } 671 lcpattern = Pattern.compile(sb.toString()); 672 } 673 } 674 } 675}