001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.support;
018
019import java.io.Closeable;
020import java.io.IOException;
021import java.io.InputStream;
022import java.io.InputStreamReader;
023import java.io.Reader;
024import java.io.UnsupportedEncodingException;
025import java.util.ArrayList;
026import java.util.HashMap;
027import java.util.HashSet;
028import java.util.Iterator;
029import java.util.List;
030import java.util.Map;
031import java.util.Map.Entry;
032import java.util.Set;
033import java.util.regex.Matcher;
034import java.util.regex.Pattern;
035
036import javax.xml.namespace.QName;
037import javax.xml.stream.XMLStreamConstants;
038import javax.xml.stream.XMLStreamException;
039import javax.xml.stream.XMLStreamReader;
040
041import org.apache.camel.Exchange;
042import org.apache.camel.InvalidPayloadException;
043import org.apache.camel.converter.jaxp.StaxConverter;
044import org.apache.camel.spi.NamespaceAware;
045import org.apache.camel.util.IOHelper;
046import org.apache.camel.util.ObjectHelper;
047import org.apache.camel.util.StringHelper;
048import org.slf4j.Logger;
049import org.slf4j.LoggerFactory;
050
051/**
052 * An {@link org.apache.camel.language.tokenizer.XMLTokenizeLanguage} based iterator.
053 */
054public class XMLTokenExpressionIterator extends ExpressionAdapter implements NamespaceAware {
055    protected final String path;
056    protected char mode;
057    protected int group;
058    protected Map<String, String> nsmap;
059
060    public XMLTokenExpressionIterator(String path, char mode) {
061        this(path, mode, 1);
062    }
063
064    public XMLTokenExpressionIterator(String path, char mode, int group) {
065        StringHelper.notEmpty(path, "path");
066        this.path = path;
067        this.mode = mode;
068        this.group = group > 1 ? group : 1;
069    }
070
071    @Override
072    public void setNamespaces(Map<String, String> nsmap) {
073        this.nsmap = nsmap;
074    }
075
076    @Override
077    public Map<String, String> getNamespaces() {
078        return nsmap;
079    }
080
081    public void setMode(char mode) {
082        this.mode = mode;
083    }
084
085    public void setMode(String mode) {
086        this.mode = mode != null ? mode.charAt(0) : 0;
087    }
088    
089    public int getGroup() {
090        return group;
091    }
092
093    public void setGroup(int group) {
094        this.group = group;
095    }
096
097    protected Iterator<?> createIterator(InputStream in, String charset) throws XMLStreamException, UnsupportedEncodingException {
098        return new XMLTokenIterator(path, nsmap, mode, group, in, charset);
099    }
100
101    protected Iterator<?> createIterator(Reader in) throws XMLStreamException {
102        return new XMLTokenIterator(path, nsmap, mode, group, in);
103    }
104
105    @Override
106    public boolean matches(Exchange exchange) {
107        // as a predicate we must close the stream, as we do not return an iterator that can be used
108        // afterwards to iterate the input stream
109        Object value = doEvaluate(exchange, true);
110        return ObjectHelper.evaluateValuePredicate(value);
111    }
112
113    @Override
114    public Object evaluate(Exchange exchange) {
115        // as we return an iterator to access the input stream, we should not close it
116        return doEvaluate(exchange, false);
117    }
118
119    /**
120     * Strategy to evaluate the exchange
121     *
122     * @param exchange   the exchange
123     * @param closeStream whether to close the stream before returning from this method.
124     * @return the evaluated value
125     */
126    protected Object doEvaluate(Exchange exchange, boolean closeStream) {
127        InputStream in = null;
128        try {
129            in = exchange.getIn().getMandatoryBody(InputStream.class);
130            String charset = IOHelper.getCharsetName(exchange);
131            return createIterator(in, charset);
132        } catch (InvalidPayloadException e) {
133            exchange.setException(e);
134            // must close input stream
135            IOHelper.close(in);
136            return null;
137        } catch (XMLStreamException e) {
138            exchange.setException(e);
139            // must close input stream
140            IOHelper.close(in);
141            return null;
142        } catch (UnsupportedEncodingException e) {
143            exchange.setException(e);
144            // must close input stream
145            IOHelper.close(in);
146            return null;
147        } finally {
148            if (closeStream) {
149                IOHelper.close(in);
150            }
151        }
152    }
153    
154
155    static class XMLTokenIterator implements Iterator<Object>, Closeable {
156        private static final Logger LOG = LoggerFactory.getLogger(XMLTokenIterator.class);
157        private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)\\s*=\\s*('[^']*'|\"[^\"]*\")");
158
159        private transient InputStream originalInputStream;
160
161        private AttributedQName[] splitpath;
162        private int index;
163        private char mode;
164        private int group;
165        private RecordableReader in;
166        private XMLStreamReader reader;
167        private List<QName> path;
168        private List<Map<String, String>> namespaces;
169        private List<String> segments;
170        private List<QName> segmentlog;
171        private List<String> tokens;
172        private int code;
173        private int consumed;
174        private boolean backtrack;
175        private int trackdepth = -1;
176        private int depth;
177        private boolean compliant;
178
179        private Object nextToken;
180        
181        XMLTokenIterator(String path, Map<String, String> nsmap, char mode, InputStream in, String charset) 
182            throws XMLStreamException, UnsupportedEncodingException {
183            // woodstox's getLocation().etCharOffset() does not return the offset correctly for InputStream, so use Reader instead.
184            this(path, nsmap, mode, 1, new InputStreamReader(in, charset));
185            this.originalInputStream = in;
186        }
187
188        XMLTokenIterator(String path, Map<String, String> nsmap, char mode, int group, InputStream in, String charset) 
189            throws XMLStreamException, UnsupportedEncodingException {
190            // woodstox's getLocation().etCharOffset() does not return the offset correctly for InputStream, so use Reader instead.
191            this(path, nsmap, mode, group, new InputStreamReader(in, charset));
192            this.originalInputStream = in;
193        }
194
195        XMLTokenIterator(String path, Map<String, String> nsmap, char mode, Reader in) throws XMLStreamException {
196            this(path, nsmap, mode, 1, in);
197        }
198
199        XMLTokenIterator(String path, Map<String, String> nsmap, char mode, int group, Reader in) throws XMLStreamException {
200            final String[] sl = path.substring(1).split("/");
201            this.splitpath = new AttributedQName[sl.length];
202            for (int i = 0; i < sl.length; i++) {
203                String s = sl[i];
204                if (s.length() > 0) {
205                    int d = s.indexOf(':');
206                    String pfx = d > 0 ? s.substring(0, d) : "";
207                    this.splitpath[i] = 
208                        new AttributedQName(
209                            "*".equals(pfx) ? "*" : nsmap == null ? "" : nsmap.get(pfx), d > 0 ? s.substring(d + 1) : s, pfx);
210                }
211            }
212            
213            this.mode = mode != 0 ? mode : 'i';
214            this.group = group > 0 ? group : 1;
215            this.in = new RecordableReader(in);
216            this.reader = new StaxConverter().createXMLStreamReader(this.in);
217
218            LOG.trace("reader.class: {}", reader.getClass());
219            // perform the first offset compliance test
220            int coff = reader.getLocation().getCharacterOffset();
221            if (coff != 0) {
222                LOG.error("XMLStreamReader {} not supporting Location");
223                throw new XMLStreamException("reader not supporting Location");
224            }
225
226            this.path = new ArrayList<>();
227            
228            // wrapped mode needs the segments and the injected mode needs the namespaces
229            if (this.mode == 'w') {
230                this.segments = new ArrayList<>();
231                this.segmentlog = new ArrayList<>();
232            } else if (this.mode == 'i') {
233                this.namespaces = new ArrayList<>();
234            }
235            // when grouping the tokens, allocate the storage to temporarily store tokens. 
236            if (this.group > 1) {
237                this.tokens = new ArrayList<>();
238            }       
239            this.nextToken = getNextToken();
240        }
241        
242        private boolean isDoS() {
243            return splitpath[index] == null;
244        }
245        
246        private AttributedQName current() {
247            return splitpath[index + (isDoS() ? 1 : 0)];
248        }
249        
250        private AttributedQName ancestor() {
251            return index == 0 ? null : splitpath[index - 1];
252        }
253
254        private void down() {
255            if (isDoS()) {
256                index++;
257            }
258            index++;
259        }
260        
261        private void up() {
262            index--;
263        }
264        
265        private boolean isBottom() {
266            return index == splitpath.length - (isDoS() ? 2 : 1);
267        }
268        
269        private boolean isTop() {
270            return index == 0;
271        }
272        
273        private int readNext() throws XMLStreamException {
274            int c = code;
275            if (c > 0) {
276                code = 0;
277            } else {
278                c = reader.next();
279            }
280            return c;
281        }
282        
283        private String getCurrentText() {
284            int pos = reader.getLocation().getCharacterOffset();
285            String txt = in.getText(pos - consumed);
286            consumed = pos;
287            // keep recording
288            in.record();
289            return txt;
290        }
291
292        private void pushName(QName name) {
293            path.add(name);
294        }
295
296        private QName popName() {
297            return path.remove(path.size() - 1);
298        }
299
300        private void pushSegment(QName qname, String token) {
301            segments.add(token);
302            segmentlog.add(qname);
303        }
304
305        private String popSegment() {
306            return segments.remove(segments.size() - 1);
307        }
308        
309        private QName peekLog() {
310            return segmentlog.get(segmentlog.size() - 1);
311        }
312        
313        private QName popLog() {
314            return segmentlog.remove(segmentlog.size() - 1);
315        }
316
317        private void pushNamespaces(XMLStreamReader reader) {
318            Map<String, String> m = new HashMap<>();
319            if (namespaces.size() > 0) {
320                m.putAll(namespaces.get(namespaces.size() - 1));
321            }
322            for (int i = 0; i < reader.getNamespaceCount(); i++) {
323                m.put(reader.getNamespacePrefix(i), reader.getNamespaceURI(i));
324            }
325            namespaces.add(m);
326        }
327
328        private void popNamespaces() {
329            namespaces.remove(namespaces.size() - 1);
330        }
331
332        private Map<String, String> getCurrentNamespaceBindings() {
333            return namespaces.get(namespaces.size() - 1);
334        }
335
336        private void readCurrent(boolean incl) throws XMLStreamException {
337            int d = depth;
338            while (d <= depth) {
339                int code = reader.next();
340                if (code == XMLStreamConstants.START_ELEMENT) {
341                    depth++;
342                } else if (code == XMLStreamConstants.END_ELEMENT) {
343                    depth--;
344                }
345            }
346            // either look ahead to the next token or stay at the end element token
347            if (incl) {
348                code = reader.next();
349            } else {
350                code = reader.getEventType();
351                if (code == XMLStreamConstants.END_ELEMENT) {
352                    // revert the depth count to avoid double counting the up event
353                    depth++;
354                }
355            }
356        }
357
358        private String getCurrentToken() throws XMLStreamException {
359            readCurrent(true);
360            popName();
361            
362            String token = createContextualToken(getCurrentText());
363            if (mode == 'i') {
364                popNamespaces();
365            }
366            
367            return token;
368        }
369
370        private String createContextualToken(String token) {
371            StringBuilder sb = new StringBuilder();
372            if (mode == 'w' && group == 1) {
373                for (int i = 0; i < segments.size(); i++) {
374                    sb.append(segments.get(i));
375                }
376                sb.append(token);
377                for (int i = path.size() - 1; i >= 0; i--) {
378                    QName q = path.get(i);
379                    sb.append("</").append(makeName(q)).append(">");
380                }
381
382            } else if (mode == 'i') {
383                final String stag = token.substring(0, token.indexOf('>') + 1);
384                Set<String> skip = new HashSet<>();
385                Matcher matcher = NAMESPACE_PATTERN.matcher(stag);
386                char quote = 0;
387                while (matcher.find()) {
388                    String prefix = matcher.group(1);
389                    if (prefix.length() > 0) {
390                        prefix = prefix.substring(1);
391                    }
392                    skip.add(prefix);
393                    if (quote == 0) {
394                        quote = matcher.group(2).charAt(0);
395                    }
396                }
397                if (quote == 0) {
398                    quote = '"';
399                }
400                boolean empty = stag.endsWith("/>"); 
401                sb.append(token.substring(0, stag.length() - (empty ? 2 : 1)));
402                for (Entry<String, String> e : getCurrentNamespaceBindings().entrySet()) {
403                    if (!skip.contains(e.getKey())) {
404                        sb.append(e.getKey().length() == 0 ? " xmlns" : " xmlns:")
405                            .append(e.getKey()).append("=").append(quote).append(e.getValue()).append(quote);
406                    }
407                }
408                sb.append(token.substring(stag.length() - (empty ? 2 : 1)));
409            } else if (mode == 'u') {
410                int bp = token.indexOf(">");
411                int ep = token.lastIndexOf("</");
412                if (bp > 0 && ep > 0) {
413                    sb.append(token.substring(bp + 1, ep));
414                }
415            } else if (mode == 't') {
416                int bp = 0;
417                for (;;) {
418                    int ep = token.indexOf('>', bp);
419                    bp = token.indexOf('<', ep);
420                    if (bp < 0) {
421                        break;
422                    }
423                    sb.append(token.substring(ep + 1, bp));
424                }
425            } else {
426                return token;
427            }
428
429            return sb.toString();
430        }
431
432        private String getGroupedToken() {
433            StringBuilder sb = new StringBuilder();
434            if (mode == 'w') {
435                 // for wrapped
436                for (int i = 0; i < segments.size(); i++) {
437                    sb.append(segments.get(i));
438                }
439                for (String s : tokens) {
440                    sb.append(s);
441                }
442                for (int i = path.size() - 1; i >= 0; i--) {
443                    QName q = path.get(i);
444                    sb.append("</").append(makeName(q)).append(">");
445                }
446            } else {
447                // for injected, unwrapped, text
448                sb.append("<group>");
449                for (String s : tokens) {
450                    sb.append(s);
451                }
452                sb.append("</group>");
453            }
454            tokens.clear();
455            return sb.toString();
456        }
457        
458        private String getNextToken() throws XMLStreamException {
459            int xcode = 0;
460            while (xcode != XMLStreamConstants.END_DOCUMENT) {
461                xcode = readNext();
462
463                switch (xcode) {
464                case XMLStreamConstants.START_ELEMENT:
465                    depth++;
466                    QName name = reader.getName();
467                    if (LOG.isTraceEnabled()) {
468                        LOG.trace("se={}; depth={}; trackdepth={}", name, depth, trackdepth);
469                    }
470                    
471                    String token = getCurrentText();
472                    // perform the second compliance test
473                    if (!compliant) {
474                        if (token != null && token.startsWith("<") && !token.startsWith("<?")) {
475                            LOG.error("XMLStreamReader {} not supporting Location");
476                            throw new XMLStreamException("reader not supporting Location");
477                        }
478                        compliant = true;
479                    }
480
481                    LOG.trace("token={}", token);
482                    if (!backtrack && mode == 'w') {
483                        pushSegment(name, token);
484                    }
485                    pushName(name);
486                    if (mode == 'i') {
487                        pushNamespaces(reader);
488                    }
489                    backtrack = false;
490                    if (current().matches(name)) {
491                        // mark the position of the match in the segments list
492                        if (isBottom()) {
493                            // final match
494                            token = getCurrentToken();
495                            backtrack = true;
496                            trackdepth = depth;
497                            if (group > 1) {
498                                tokens.add(token);
499                                if (group == tokens.size()) {
500                                    return getGroupedToken();
501                                }
502                            } else {
503                                return token;    
504                            }
505                        } else {
506                            // intermediary match
507                            down();
508                        }
509                    } else if (isDoS()) {
510                        // continue
511                    } else {
512                        // skip
513                        readCurrent(false);
514                    }
515                    break;
516                case XMLStreamConstants.END_ELEMENT:
517                    if ((backtrack || (trackdepth > 0 && depth == trackdepth))
518                        && (mode == 'w' && group > 1 && tokens.size() > 0)) {
519                        // flush the left over using the current context
520                        code = XMLStreamConstants.END_ELEMENT;
521                        return getGroupedToken();
522                    }
523
524                    depth--;
525                    QName endname = reader.getName();
526                    LOG.trace("ee={}", endname);
527                    
528                    popName();
529                    if (mode == 'i') {
530                        popNamespaces();
531                    }
532                    
533                    int pc = 0;
534                    if (backtrack || (trackdepth > 0 && depth == trackdepth - 1)) {
535                        // reactive backtrack if not backtracking and update the track depth
536                        backtrack = true;
537                        trackdepth--;
538                        if (mode == 'w') {
539                            while (!endname.equals(peekLog())) {
540                                pc++;
541                                popLog();
542                            }
543                        }
544                    }
545
546                    if (backtrack) {
547                        if (mode == 'w') {
548                            for (int i = 0; i < pc; i++) {
549                                popSegment();
550                            }
551                        }
552
553                        if ((ancestor() == null && !isTop())
554                            || (ancestor() != null && ancestor().matches(endname))) {
555                            up();
556                        }
557                    }
558                    break;
559                case XMLStreamConstants.END_DOCUMENT:
560                    LOG.trace("depth={}", depth);
561                    if (group > 1 && tokens.size() > 0) {
562                        // flush the left over before really going EoD
563                        code = XMLStreamConstants.END_DOCUMENT;
564                        return getGroupedToken();
565                    }
566                    break;
567                default:
568                    break;
569                }
570            }
571            return null;
572        }
573
574        private static String makeName(QName qname) {
575            String pfx = qname.getPrefix();
576            return pfx.length() == 0 ? qname.getLocalPart() : qname.getPrefix() + ":" + qname.getLocalPart();
577        }
578
579        @Override
580        public boolean hasNext() {
581            return nextToken != null;
582        }
583
584        @Override
585        public Object next() {
586            Object o = nextToken;
587            try {
588                nextToken = getNextToken();
589            } catch (XMLStreamException e) {
590                nextToken = null;
591                throw new RuntimeException(e);
592            }
593            return o;
594        }
595
596        @Override
597        public void remove() {
598            // noop
599        }
600
601        @Override
602        public void close() throws IOException {
603            try {
604                reader.close();
605            } catch (Exception e) {
606                // ignore
607            }
608            // need to close the original input stream as well as the reader do not delegate close it
609            if (originalInputStream != null) {
610                IOHelper.close(originalInputStream);
611            }
612        }
613    }
614
615    static class AttributedQName extends QName {
616        private static final long serialVersionUID = 9878370226894144L;
617        private Pattern lcpattern;
618        private boolean nsany;
619        
620        AttributedQName(String localPart) {
621            super(localPart);
622            checkWildcard("", localPart);
623        }
624
625        AttributedQName(String namespaceURI, String localPart, String prefix) {
626            super(namespaceURI, localPart, prefix);
627            checkWildcard(namespaceURI, localPart);
628        }
629
630        AttributedQName(String namespaceURI, String localPart) {
631            super(namespaceURI, localPart);
632            checkWildcard(namespaceURI, localPart);
633        }
634
635        public boolean matches(QName qname) {
636            return (nsany || getNamespaceURI().equals(qname.getNamespaceURI()))
637                && (lcpattern != null 
638                ? lcpattern.matcher(qname.getLocalPart()).matches() 
639                : getLocalPart().equals(qname.getLocalPart()));
640        }
641        
642        private void checkWildcard(String nsa, String lcp) {
643            nsany = "*".equals(nsa);
644            boolean wc = false;
645            for (int i = 0; i < lcp.length(); i++) {
646                char c = lcp.charAt(i);
647                if (c == '?' || c == '*') {
648                    wc = true;
649                    break;
650                }
651            }
652            if (wc) {
653                StringBuilder sb = new StringBuilder();
654                for (int i = 0; i < lcp.length(); i++) {
655                    char c = lcp.charAt(i);
656                    switch (c) {
657                    case '.':
658                        sb.append("\\.");
659                        break;
660                    case '*':
661                        sb.append(".*");
662                        break;
663                    case '?':
664                        sb.append('.');
665                        break;
666                    default:
667                        sb.append(c);
668                        break;
669                    }
670                }
671                lcpattern = Pattern.compile(sb.toString());
672            }
673        }
674    }
675}