001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.util;
018
019import java.io.Closeable;
020import java.io.File;
021import java.io.FileInputStream;
022import java.io.FileNotFoundException;
023import java.io.IOException;
024import java.io.InputStream;
025import java.io.InputStreamReader;
026import java.io.StringReader;
027import java.nio.CharBuffer;
028import java.nio.channels.Channels;
029import java.nio.channels.ReadableByteChannel;
030import java.nio.charset.Charset;
031import java.nio.charset.CharsetDecoder;
032import java.nio.charset.IllegalCharsetNameException;
033import java.nio.charset.UnsupportedCharsetException;
034import java.util.InputMismatchException;
035import java.util.Iterator;
036import java.util.LinkedHashMap;
037import java.util.Map;
038import java.util.Map.Entry;
039import java.util.NoSuchElementException;
040import java.util.Objects;
041import java.util.regex.Matcher;
042import java.util.regex.Pattern;
043
044import static org.apache.camel.util.BufferCaster.cast;
045
046public final class Scanner implements Iterator<String>, Closeable {
047
048    static {
049        WHITESPACE_PATTERN = Pattern.compile("\\s+");
050        FIND_ANY_PATTERN = Pattern.compile("(?s).*");
051    }
052
053    private static final Map<String, Pattern> CACHE = new LinkedHashMap<String, Pattern>() {
054        @Override
055        protected boolean removeEldestEntry(Entry<String, Pattern> eldest) {
056            return size() >= 7;
057        }
058    };
059
060    private static final Pattern WHITESPACE_PATTERN;
061
062    private static final Pattern FIND_ANY_PATTERN;
063
064    private static final int BUFFER_SIZE = 1024;
065
066    private Readable source;
067    private Pattern delimPattern;
068    private Matcher matcher;
069    private CharBuffer buf;
070    private int position;
071    private boolean inputExhausted;
072    private boolean needInput;
073    private boolean skipped;
074    private int savedPosition = -1;
075    private boolean closed;
076    private IOException lastIOException;
077
078    public Scanner(InputStream source, String charsetName, String pattern) {
079        this(new InputStreamReader(Objects.requireNonNull(source, "source"), toDecoder(charsetName)), cachePattern(pattern));
080    }
081
082    public Scanner(File source, String charsetName, String pattern) throws FileNotFoundException {
083        this(new FileInputStream(Objects.requireNonNull(source, "source")).getChannel(), charsetName, pattern);
084    }
085
086    public Scanner(String source, String pattern) {
087        this(new StringReader(Objects.requireNonNull(source, "source")), cachePattern(pattern));
088    }
089
090    public Scanner(String source, Pattern pattern) {
091        this(new StringReader(Objects.requireNonNull(source, "source")), pattern);
092    }
093
094    public Scanner(ReadableByteChannel source, String charsetName, String pattern) {
095        this(Channels.newReader(Objects.requireNonNull(source, "source"), toDecoder(charsetName), -1), cachePattern(pattern));
096    }
097
098    public Scanner(Readable source, String pattern) {
099        this(Objects.requireNonNull(source, "source"), cachePattern(pattern));
100    }
101
102    private Scanner(Readable source, Pattern pattern) {
103        this.source = source;
104        delimPattern = pattern != null ? pattern : WHITESPACE_PATTERN;
105        buf = CharBuffer.allocate(BUFFER_SIZE);
106        cast(buf).limit(0);
107        matcher = delimPattern.matcher(buf);
108        matcher.useTransparentBounds(true);
109        matcher.useAnchoringBounds(false);
110    }
111
112    private static CharsetDecoder toDecoder(String charsetName) {
113        try {
114            Charset cs = charsetName != null ? Charset.forName(charsetName) : Charset.defaultCharset();
115            return cs.newDecoder();
116        } catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
117            throw new IllegalArgumentException(e);
118        }
119    }
120
121    @Override
122    public boolean hasNext() {
123        if (closed) {
124            return false;
125        }
126        saveState();
127        while (!inputExhausted) {
128            if (hasTokenInBuffer()) {
129                revertState();
130                return true;
131            }
132            readMore();
133        }
134        boolean result = hasTokenInBuffer();
135        revertState();
136        return result;
137    }
138
139    @Override
140    public String next() {
141        checkClosed();
142        while (true) {
143            String token = getCompleteTokenInBuffer();
144            if (token != null) {
145                skipped = false;
146                return token;
147            }
148            if (needInput) {
149                readMore();
150            } else {
151                throwFor();
152            }
153        }
154    }
155
156    public String getDelim() {
157        return delimPattern.pattern();
158    }
159
160    private void saveState() {
161        savedPosition = position;
162    }
163
164    private void revertState() {
165        position = savedPosition;
166        savedPosition = -1;
167        skipped = false;
168    }
169
170    private void readMore() {
171        if (buf.limit() == buf.capacity()) {
172            expandBuffer();
173        }
174        int p = buf.position();
175        cast(buf).position(buf.limit());
176        cast(buf).limit(buf.capacity());
177        int n;
178        try {
179            n = source.read(buf);
180        } catch (IOException ioe) {
181            lastIOException = ioe;
182            n = -1;
183        }
184        if (n == -1) {
185            inputExhausted = true;
186            needInput = false;
187        } else if (n > 0) {
188            needInput = false;
189        }
190        cast(buf).limit(buf.position());
191        cast(buf).position(p);
192    }
193
194    private void expandBuffer() {
195        int offset = savedPosition == -1 ? position : savedPosition;
196        cast(buf).position(offset);
197        if (offset > 0) {
198            buf.compact();
199            translateSavedIndexes(offset);
200            position -= offset;
201            cast(buf).flip();
202        } else {
203            int newSize = buf.capacity() * 2;
204            CharBuffer newBuf = CharBuffer.allocate(newSize);
205            newBuf.put(buf);
206            cast(newBuf).flip();
207            translateSavedIndexes(offset);
208            position -= offset;
209            buf = newBuf;
210            matcher.reset(buf);
211        }
212    }
213
214    private void translateSavedIndexes(int offset) {
215        if (savedPosition != -1) {
216            savedPosition -= offset;
217        }
218    }
219
220    private void throwFor() {
221        skipped = false;
222        if (inputExhausted && position == buf.limit()) {
223            throw new NoSuchElementException();
224        } else {
225            throw new InputMismatchException();
226        }
227    }
228
229    private boolean hasTokenInBuffer() {
230        matcher.usePattern(delimPattern);
231        matcher.region(position, buf.limit());
232        if (matcher.lookingAt()) {
233            position = matcher.end();
234        }
235        return position != buf.limit();
236    }
237
238    private String getCompleteTokenInBuffer() {
239        matcher.usePattern(delimPattern);
240        if (!skipped) {
241            matcher.region(position, buf.limit());
242            if (matcher.lookingAt()) {
243                if (matcher.hitEnd() && !inputExhausted) {
244                    needInput = true;
245                    return null;
246                }
247                skipped = true;
248                position = matcher.end();
249            }
250        }
251        if (position == buf.limit()) {
252            if (inputExhausted) {
253                return null;
254            }
255            needInput = true;
256            return null;
257        }
258        matcher.region(position, buf.limit());
259        boolean foundNextDelim = matcher.find();
260        if (foundNextDelim && matcher.end() == position) {
261            foundNextDelim = matcher.find();
262        }
263        if (foundNextDelim) {
264            if (matcher.requireEnd() && !inputExhausted) {
265                needInput = true;
266                return null;
267            }
268            int tokenEnd = matcher.start();
269            matcher.usePattern(FIND_ANY_PATTERN);
270            matcher.region(position, tokenEnd);
271            if (matcher.matches()) {
272                String s = matcher.group();
273                position = matcher.end();
274                return s;
275            } else {
276                return null;
277            }
278        }
279        if (inputExhausted) {
280            matcher.usePattern(FIND_ANY_PATTERN);
281            matcher.region(position, buf.limit());
282            if (matcher.matches()) {
283                String s = matcher.group();
284                position = matcher.end();
285                return s;
286            }
287            return null;
288        }
289        needInput = true;
290        return null;
291    }
292
293    private void checkClosed() {
294        if (closed) {
295            throw new IllegalStateException();
296        }
297    }
298
299    @Override
300    public void close() throws IOException {
301        if (!closed) {
302            closed = true;
303            if (source instanceof Closeable) {
304                try {
305                    ((Closeable) source).close();
306                } catch (IOException e) {
307                    lastIOException = e;
308                }
309            }
310        }
311        if (lastIOException != null) {
312            throw lastIOException;
313        }
314    }
315
316    private static Pattern cachePattern(String pattern) {
317        if (pattern == null) {
318            return null;
319        }
320        synchronized (CACHE) {
321            return CACHE.computeIfAbsent(pattern, Pattern::compile);
322        }
323    }
324
325}