001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.support;
018
019import java.io.Closeable;
020import java.io.IOException;
021import java.io.InputStream;
022import java.io.InputStreamReader;
023import java.io.Reader;
024import java.io.UnsupportedEncodingException;
025import java.util.ArrayList;
026import java.util.HashMap;
027import java.util.HashSet;
028import java.util.Iterator;
029import java.util.List;
030import java.util.Map;
031import java.util.Map.Entry;
032import java.util.Set;
033import java.util.regex.Matcher;
034import java.util.regex.Pattern;
035
036import javax.xml.namespace.QName;
037import javax.xml.stream.XMLStreamConstants;
038import javax.xml.stream.XMLStreamException;
039import javax.xml.stream.XMLStreamReader;
040
041import org.apache.camel.Exchange;
042import org.apache.camel.InvalidPayloadException;
043import org.apache.camel.converter.jaxp.StaxConverter;
044import org.apache.camel.spi.NamespaceAware;
045import org.apache.camel.util.IOHelper;
046import org.apache.camel.util.ObjectHelper;
047import org.slf4j.Logger;
048import org.slf4j.LoggerFactory;
049
050/**
051 * An {@link org.apache.camel.language.tokenizer.XMLTokenizeLanguage} based iterator.
052 */
053public class XMLTokenExpressionIterator extends ExpressionAdapter implements NamespaceAware {
054    protected final String path;
055    protected char mode;
056    protected int group;
057    protected Map<String, String> nsmap;
058
059    public XMLTokenExpressionIterator(String path, char mode) {
060        this(path, mode, 1);
061    }
062
063    public XMLTokenExpressionIterator(String path, char mode, int group) {
064        ObjectHelper.notEmpty(path, "path");
065        this.path = path;
066        this.mode = mode;
067        this.group = group > 1 ? group : 1;
068    }
069
070    @Override
071    public void setNamespaces(Map<String, String> nsmap) {
072        this.nsmap = nsmap;
073    }
074
075    @Override
076    public Map<String, String> getNamespaces() {
077        return nsmap;
078    }
079
080    public void setMode(char mode) {
081        this.mode = mode;
082    }
083
084    public void setMode(String mode) {
085        this.mode = mode != null ? mode.charAt(0) : 0;
086    }
087    
088    public int getGroup() {
089        return group;
090    }
091
092    public void setGroup(int group) {
093        this.group = group;
094    }
095
096    protected Iterator<?> createIterator(InputStream in, String charset) throws XMLStreamException, UnsupportedEncodingException {
097        return new XMLTokenIterator(path, nsmap, mode, group, in, charset);
098    }
099
100    protected Iterator<?> createIterator(Reader in) throws XMLStreamException {
101        return new XMLTokenIterator(path, nsmap, mode, group, in);
102    }
103
104    @Override
105    public boolean matches(Exchange exchange) {
106        // as a predicate we must close the stream, as we do not return an iterator that can be used
107        // afterwards to iterate the input stream
108        Object value = doEvaluate(exchange, true);
109        return ObjectHelper.evaluateValuePredicate(value);
110    }
111
112    @Override
113    public Object evaluate(Exchange exchange) {
114        // as we return an iterator to access the input stream, we should not close it
115        return doEvaluate(exchange, false);
116    }
117
118    /**
119     * Strategy to evaluate the exchange
120     *
121     * @param exchange   the exchange
122     * @param closeStream whether to close the stream before returning from this method.
123     * @return the evaluated value
124     */
125    protected Object doEvaluate(Exchange exchange, boolean closeStream) {
126        InputStream in = null;
127        try {
128            in = exchange.getIn().getMandatoryBody(InputStream.class);
129            String charset = IOHelper.getCharsetName(exchange);
130            return createIterator(in, charset);
131        } catch (InvalidPayloadException e) {
132            exchange.setException(e);
133            // must close input stream
134            IOHelper.close(in);
135            return null;
136        } catch (XMLStreamException e) {
137            exchange.setException(e);
138            // must close input stream
139            IOHelper.close(in);
140            return null;
141        } catch (UnsupportedEncodingException e) {
142            exchange.setException(e);
143            // must close input stream
144            IOHelper.close(in);
145            return null;
146        } finally {
147            if (closeStream) {
148                IOHelper.close(in);
149            }
150        }
151    }
152    
153
154    static class XMLTokenIterator implements Iterator<Object>, Closeable {
155        private static final Logger LOG = LoggerFactory.getLogger(XMLTokenIterator.class);
156        private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)\\s*=\\s*('[^']*'|\"[^\"]*\")");
157
158        private transient InputStream originalInputStream;
159
160        private AttributedQName[] splitpath;
161        private int index;
162        private char mode;
163        private int group;
164        private RecordableReader in;
165        private XMLStreamReader reader;
166        private List<QName> path;
167        private List<Map<String, String>> namespaces;
168        private List<String> segments;
169        private List<QName> segmentlog;
170        private List<String> tokens;
171        private int code;
172        private int consumed;
173        private boolean backtrack;
174        private int trackdepth = -1;
175        private int depth;
176        private boolean compliant;
177
178        private Object nextToken;
179        
180        XMLTokenIterator(String path, Map<String, String> nsmap, char mode, InputStream in, String charset) 
181            throws XMLStreamException, UnsupportedEncodingException {
182            // woodstox's getLocation().etCharOffset() does not return the offset correctly for InputStream, so use Reader instead.
183            this(path, nsmap, mode, 1, new InputStreamReader(in, charset));
184            this.originalInputStream = in;
185        }
186
187        XMLTokenIterator(String path, Map<String, String> nsmap, char mode, int group, InputStream in, String charset) 
188            throws XMLStreamException, UnsupportedEncodingException {
189            // woodstox's getLocation().etCharOffset() does not return the offset correctly for InputStream, so use Reader instead.
190            this(path, nsmap, mode, group, new InputStreamReader(in, charset));
191            this.originalInputStream = in;
192        }
193
194        XMLTokenIterator(String path, Map<String, String> nsmap, char mode, Reader in) throws XMLStreamException {
195            this(path, nsmap, mode, 1, in);
196        }
197
198        XMLTokenIterator(String path, Map<String, String> nsmap, char mode, int group, Reader in) throws XMLStreamException {
199            final String[] sl = path.substring(1).split("/");
200            this.splitpath = new AttributedQName[sl.length];
201            for (int i = 0; i < sl.length; i++) {
202                String s = sl[i];
203                if (s.length() > 0) {
204                    int d = s.indexOf(':');
205                    String pfx = d > 0 ? s.substring(0, d) : "";
206                    this.splitpath[i] = 
207                        new AttributedQName(
208                            "*".equals(pfx) ? "*" : nsmap == null ? "" : nsmap.get(pfx), d > 0 ? s.substring(d + 1) : s, pfx);
209                }
210            }
211            
212            this.mode = mode != 0 ? mode : 'i';
213            this.group = group > 0 ? group : 1;
214            this.in = new RecordableReader(in);
215            this.reader = new StaxConverter().createXMLStreamReader(this.in);
216
217            LOG.trace("reader.class: {}", reader.getClass());
218            // perform the first offset compliance test
219            int coff = reader.getLocation().getCharacterOffset();
220            if (coff != 0) {
221                LOG.error("XMLStreamReader {} not supporting Location");
222                throw new XMLStreamException("reader not supporting Location");
223            }
224
225            this.path = new ArrayList<QName>();
226            
227            // wrapped mode needs the segments and the injected mode needs the namespaces
228            if (this.mode == 'w') {
229                this.segments = new ArrayList<String>();
230                this.segmentlog = new ArrayList<QName>();
231            } else if (this.mode == 'i') {
232                this.namespaces = new ArrayList<Map<String, String>>();
233            }
234            // when grouping the tokens, allocate the storage to temporarily store tokens. 
235            if (this.group > 1) {
236                this.tokens = new ArrayList<String>();
237            }       
238            this.nextToken = getNextToken();
239        }
240        
241        private boolean isDoS() {
242            return splitpath[index] == null;
243        }
244        
245        private AttributedQName current() {
246            return splitpath[index + (isDoS() ? 1 : 0)];
247        }
248        
249        private AttributedQName ancestor() {
250            return index == 0 ? null : splitpath[index - 1];
251        }
252
253        private void down() {
254            if (isDoS()) {
255                index++;
256            }
257            index++;
258        }
259        
260        private void up() {
261            index--;
262        }
263        
264        private boolean isBottom() {
265            return index == splitpath.length - (isDoS() ? 2 : 1);
266        }
267        
268        private boolean isTop() {
269            return index == 0;
270        }
271        
272        private int readNext() throws XMLStreamException {
273            int c = code;
274            if (c > 0) {
275                code = 0;
276            } else {
277                c = reader.next();
278            }
279            return c;
280        }
281        
282        private String getCurrentText() {
283            int pos = reader.getLocation().getCharacterOffset();
284            String txt = in.getText(pos - consumed);
285            consumed = pos;
286            // keep recording
287            in.record();
288            return txt;
289        }
290
291        private void pushName(QName name) {
292            path.add(name);
293        }
294
295        private QName popName() {
296            return path.remove(path.size() - 1);
297        }
298
299        private void pushSegment(QName qname, String token) {
300            segments.add(token);
301            segmentlog.add(qname);
302        }
303
304        private String popSegment() {
305            return segments.remove(segments.size() - 1);
306        }
307        
308        private QName peekLog() {
309            return segmentlog.get(segmentlog.size() - 1);
310        }
311        
312        private QName popLog() {
313            return segmentlog.remove(segmentlog.size() - 1);
314        }
315
316        private void pushNamespaces(XMLStreamReader reader) {
317            Map<String, String> m = new HashMap<String, String>();
318            if (namespaces.size() > 0) {
319                m.putAll(namespaces.get(namespaces.size() - 1));
320            }
321            for (int i = 0; i < reader.getNamespaceCount(); i++) {
322                m.put(reader.getNamespacePrefix(i), reader.getNamespaceURI(i));
323            }
324            namespaces.add(m);
325        }
326
327        private void popNamespaces() {
328            namespaces.remove(namespaces.size() - 1);
329        }
330
331        private Map<String, String> getCurrentNamespaceBindings() {
332            return namespaces.get(namespaces.size() - 1);
333        }
334
335        private void readCurrent(boolean incl) throws XMLStreamException {
336            int d = depth;
337            while (d <= depth) {
338                int code = reader.next();
339                if (code == XMLStreamConstants.START_ELEMENT) {
340                    depth++;
341                } else if (code == XMLStreamConstants.END_ELEMENT) {
342                    depth--;
343                }
344            }
345            // either look ahead to the next token or stay at the end element token
346            if (incl) {
347                code = reader.next();
348            } else {
349                code = reader.getEventType();
350                if (code == XMLStreamConstants.END_ELEMENT) {
351                    // revert the depth count to avoid double counting the up event
352                    depth++;
353                }
354            }
355        }
356
357        private String getCurrentToken() throws XMLStreamException {
358            readCurrent(true);
359            popName();
360            
361            String token = createContextualToken(getCurrentText());
362            if (mode == 'i') {
363                popNamespaces();
364            }
365            
366            return token;
367        }
368
369        private String createContextualToken(String token) {
370            StringBuilder sb = new StringBuilder();
371            if (mode == 'w' && group == 1) {
372                for (int i = 0; i < segments.size(); i++) {
373                    sb.append(segments.get(i));
374                }
375                sb.append(token);
376                for (int i = path.size() - 1; i >= 0; i--) {
377                    QName q = path.get(i);
378                    sb.append("</").append(makeName(q)).append(">");
379                }
380
381            } else if (mode == 'i') {
382                final String stag = token.substring(0, token.indexOf('>') + 1);
383                Set<String> skip = new HashSet<String>();
384                Matcher matcher = NAMESPACE_PATTERN.matcher(stag);
385                char quote = 0;
386                while (matcher.find()) {
387                    String prefix = matcher.group(1);
388                    if (prefix.length() > 0) {
389                        prefix = prefix.substring(1);
390                    }
391                    skip.add(prefix);
392                    if (quote == 0) {
393                        quote = matcher.group(2).charAt(0);
394                    }
395                }
396                if (quote == 0) {
397                    quote = '"';
398                }
399                boolean empty = stag.endsWith("/>"); 
400                sb.append(token.substring(0, stag.length() - (empty ? 2 : 1)));
401                for (Entry<String, String> e : getCurrentNamespaceBindings().entrySet()) {
402                    if (!skip.contains(e.getKey())) {
403                        sb.append(e.getKey().length() == 0 ? " xmlns" : " xmlns:")
404                            .append(e.getKey()).append("=").append(quote).append(e.getValue()).append(quote);
405                    }
406                }
407                sb.append(token.substring(stag.length() - (empty ? 2 : 1)));
408            } else if (mode == 'u') {
409                int bp = token.indexOf(">");
410                int ep = token.lastIndexOf("</");
411                if (bp > 0 && ep > 0) {
412                    sb.append(token.substring(bp + 1, ep));
413                }
414            } else if (mode == 't') {
415                int bp = 0;
416                for (;;) {
417                    int ep = token.indexOf('>', bp);
418                    bp = token.indexOf('<', ep);
419                    if (bp < 0) {
420                        break;
421                    }
422                    sb.append(token.substring(ep + 1, bp));
423                }
424            } else {
425                return token;
426            }
427
428            return sb.toString();
429        }
430
431        private String getGroupedToken() {
432            StringBuilder sb = new StringBuilder();
433            if (mode == 'w') {
434                 // for wrapped
435                for (int i = 0; i < segments.size(); i++) {
436                    sb.append(segments.get(i));
437                }
438                for (String s : tokens) {
439                    sb.append(s);
440                }
441                for (int i = path.size() - 1; i >= 0; i--) {
442                    QName q = path.get(i);
443                    sb.append("</").append(makeName(q)).append(">");
444                }
445            } else {
446                // for injected, unwrapped, text
447                sb.append("<group>");
448                for (String s : tokens) {
449                    sb.append(s);
450                }
451                sb.append("</group>");
452            }
453            tokens.clear();
454            return sb.toString();
455        }
456        
457        private String getNextToken() throws XMLStreamException {
458            int xcode = 0;
459            while (xcode != XMLStreamConstants.END_DOCUMENT) {
460                xcode = readNext();
461
462                switch (xcode) {
463                case XMLStreamConstants.START_ELEMENT:
464                    depth++;
465                    QName name = reader.getName();
466                    if (LOG.isTraceEnabled()) {
467                        LOG.trace("se={}; depth={}; trackdepth={}", new Object[]{name, depth, trackdepth});
468                    }
469                    
470                    String token = getCurrentText();
471                    // perform the second compliance test
472                    if (!compliant) {
473                        if (token != null && token.startsWith("<") && !token.startsWith("<?")) {
474                            LOG.error("XMLStreamReader {} not supporting Location");
475                            throw new XMLStreamException("reader not supporting Location");
476                        }
477                        compliant = true;
478                    }
479
480                    LOG.trace("token={}", token);
481                    if (!backtrack && mode == 'w') {
482                        pushSegment(name, token);
483                    }
484                    pushName(name);
485                    if (mode == 'i') {
486                        pushNamespaces(reader);
487                    }
488                    backtrack = false;
489                    if (current().matches(name)) {
490                        // mark the position of the match in the segments list
491                        if (isBottom()) {
492                            // final match
493                            token = getCurrentToken();
494                            backtrack = true;
495                            trackdepth = depth;
496                            if (group > 1) {
497                                tokens.add(token);
498                                if (group == tokens.size()) {
499                                    return getGroupedToken();
500                                }
501                            } else {
502                                return token;    
503                            }
504                        } else {
505                            // intermediary match
506                            down();
507                        }
508                    } else if (isDoS()) {
509                        // continue
510                    } else {
511                        // skip
512                        readCurrent(false);
513                    }
514                    break;
515                case XMLStreamConstants.END_ELEMENT:
516                    if ((backtrack || (trackdepth > 0 && depth == trackdepth))
517                        && (mode == 'w' && group > 1 && tokens.size() > 0)) {
518                        // flush the left over using the current context
519                        code = XMLStreamConstants.END_ELEMENT;
520                        return getGroupedToken();
521                    }
522
523                    depth--;
524                    QName endname = reader.getName();
525                    LOG.trace("ee={}", endname);
526                    
527                    popName();
528                    if (mode == 'i') {
529                        popNamespaces();
530                    }
531                    
532                    int pc = 0;
533                    if (backtrack || (trackdepth > 0 && depth == trackdepth - 1)) {
534                        // reactive backtrack if not backtracking and update the track depth
535                        backtrack = true;
536                        trackdepth--;
537                        if (mode == 'w') {
538                            while (!endname.equals(peekLog())) {
539                                pc++;
540                                popLog();
541                            }
542                        }
543                    }
544
545                    if (backtrack) {
546                        if (mode == 'w') {
547                            for (int i = 0; i < pc; i++) {
548                                popSegment();
549                            }
550                        }
551
552                        if ((ancestor() == null && !isTop())
553                            || (ancestor() != null && ancestor().matches(endname))) {
554                            up();
555                        }
556                    }
557                    break;
558                case XMLStreamConstants.END_DOCUMENT:
559                    LOG.trace("depth={}", depth);
560                    if (group > 1 && tokens.size() > 0) {
561                        // flush the left over before really going EoD
562                        code = XMLStreamConstants.END_DOCUMENT;
563                        return getGroupedToken();
564                    }
565                    break;
566                default:
567                    break;
568                }
569            }
570            return null;
571        }
572
573        private static String makeName(QName qname) {
574            String pfx = qname.getPrefix();
575            return pfx.length() == 0 ? qname.getLocalPart() : qname.getPrefix() + ":" + qname.getLocalPart();
576        }
577
578        @Override
579        public boolean hasNext() {
580            return nextToken != null;
581        }
582
583        @Override
584        public Object next() {
585            Object o = nextToken;
586            try {
587                nextToken = getNextToken();
588            } catch (XMLStreamException e) {
589                nextToken = null;
590                throw new RuntimeException(e);
591            }
592            return o;
593        }
594
595        @Override
596        public void remove() {
597            // noop
598        }
599
600        @Override
601        public void close() throws IOException {
602            try {
603                reader.close();
604            } catch (Exception e) {
605                // ignore
606            }
607            // need to close the original input stream as well as the reader do not delegate close it
608            if (originalInputStream != null) {
609                IOHelper.close(originalInputStream);
610            }
611        }
612    }
613
614    static class AttributedQName extends QName {
615        private static final long serialVersionUID = 9878370226894144L;
616        private Pattern lcpattern;
617        private boolean nsany;
618        
619        AttributedQName(String localPart) {
620            super(localPart);
621            checkWildcard("", localPart);
622        }
623
624        AttributedQName(String namespaceURI, String localPart, String prefix) {
625            super(namespaceURI, localPart, prefix);
626            checkWildcard(namespaceURI, localPart);
627        }
628
629        AttributedQName(String namespaceURI, String localPart) {
630            super(namespaceURI, localPart);
631            checkWildcard(namespaceURI, localPart);
632        }
633
634        public boolean matches(QName qname) {
635            return (nsany || getNamespaceURI().equals(qname.getNamespaceURI()))
636                && (lcpattern != null 
637                ? lcpattern.matcher(qname.getLocalPart()).matches() 
638                : getLocalPart().equals(qname.getLocalPart()));
639        }
640        
641        private void checkWildcard(String nsa, String lcp) {
642            nsany = "*".equals(nsa);
643            boolean wc = false;
644            for (int i = 0; i < lcp.length(); i++) {
645                char c = lcp.charAt(i);
646                if (c == '?' || c == '*') {
647                    wc = true;
648                    break;
649                }
650            }
651            if (wc) {
652                StringBuilder sb = new StringBuilder();
653                for (int i = 0; i < lcp.length(); i++) {
654                    char c = lcp.charAt(i);
655                    switch (c) {
656                    case '.':
657                        sb.append("\\.");
658                        break;
659                    case '*':
660                        sb.append(".*");
661                        break;
662                    case '?':
663                        sb.append('.');
664                        break;
665                    default:
666                        sb.append(c);
667                        break;
668                    }
669                }
670                lcpattern = Pattern.compile(sb.toString());
671            }
672        }
673    }
674}