001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.camel.util; 018 019import java.io.Closeable; 020import java.io.File; 021import java.io.FileInputStream; 022import java.io.FileNotFoundException; 023import java.io.IOException; 024import java.io.InputStream; 025import java.io.InputStreamReader; 026import java.io.StringReader; 027import java.nio.CharBuffer; 028import java.nio.channels.Channels; 029import java.nio.channels.ReadableByteChannel; 030import java.nio.charset.Charset; 031import java.nio.charset.CharsetDecoder; 032import java.nio.charset.IllegalCharsetNameException; 033import java.nio.charset.UnsupportedCharsetException; 034import java.util.InputMismatchException; 035import java.util.Iterator; 036import java.util.LinkedHashMap; 037import java.util.Map; 038import java.util.Map.Entry; 039import java.util.NoSuchElementException; 040import java.util.Objects; 041import java.util.regex.Matcher; 042import java.util.regex.Pattern; 043 044import static org.apache.camel.util.BufferCaster.cast; 045 046public final class Scanner implements Iterator<String>, Closeable { 047 048 static { 049 WHITESPACE_PATTERN = Pattern.compile("\\s+"); 050 FIND_ANY_PATTERN = Pattern.compile("(?s).*"); 051 } 052 053 private static final Map<String, Pattern> CACHE = new LinkedHashMap<String, Pattern>() { 054 @Override 055 protected boolean removeEldestEntry(Entry<String, Pattern> eldest) { 056 return size() >= 7; 057 } 058 }; 059 060 private static final Pattern WHITESPACE_PATTERN; 061 062 private static final Pattern FIND_ANY_PATTERN; 063 064 private static final int BUFFER_SIZE = 1024; 065 066 private Readable source; 067 private Pattern delimPattern; 068 private Matcher matcher; 069 private CharBuffer buf; 070 private int position; 071 private boolean inputExhausted; 072 private boolean needInput; 073 private boolean skipped; 074 private int savedPosition = -1; 075 private boolean closed; 076 private IOException lastIOException; 077 078 public Scanner(InputStream source, String charsetName, String pattern) { 079 this(new InputStreamReader(Objects.requireNonNull(source, "source"), toDecoder(charsetName)), cachePattern(pattern)); 080 } 081 082 public Scanner(File source, String charsetName, String pattern) throws FileNotFoundException { 083 this(new FileInputStream(Objects.requireNonNull(source, "source")).getChannel(), charsetName, pattern); 084 } 085 086 public Scanner(String source, String pattern) { 087 this(new StringReader(Objects.requireNonNull(source, "source")), cachePattern(pattern)); 088 } 089 090 public Scanner(String source, Pattern pattern) { 091 this(new StringReader(Objects.requireNonNull(source, "source")), pattern); 092 } 093 094 public Scanner(ReadableByteChannel source, String charsetName, String pattern) { 095 this(Channels.newReader(Objects.requireNonNull(source, "source"), toDecoder(charsetName), -1), cachePattern(pattern)); 096 } 097 098 public Scanner(Readable source, String pattern) { 099 this(Objects.requireNonNull(source, "source"), cachePattern(pattern)); 100 } 101 102 private Scanner(Readable source, Pattern pattern) { 103 this.source = source; 104 delimPattern = pattern != null ? pattern : WHITESPACE_PATTERN; 105 buf = CharBuffer.allocate(BUFFER_SIZE); 106 cast(buf).limit(0); 107 matcher = delimPattern.matcher(buf); 108 matcher.useTransparentBounds(true); 109 matcher.useAnchoringBounds(false); 110 } 111 112 private static CharsetDecoder toDecoder(String charsetName) { 113 try { 114 Charset cs = charsetName != null ? Charset.forName(charsetName) : Charset.defaultCharset(); 115 return cs.newDecoder(); 116 } catch (IllegalCharsetNameException | UnsupportedCharsetException e) { 117 throw new IllegalArgumentException(e); 118 } 119 } 120 121 @Override 122 public boolean hasNext() { 123 if (closed) { 124 return false; 125 } 126 saveState(); 127 while (!inputExhausted) { 128 if (hasTokenInBuffer()) { 129 revertState(); 130 return true; 131 } 132 readMore(); 133 } 134 boolean result = hasTokenInBuffer(); 135 revertState(); 136 return result; 137 } 138 139 @Override 140 public String next() { 141 checkClosed(); 142 while (true) { 143 String token = getCompleteTokenInBuffer(); 144 if (token != null) { 145 skipped = false; 146 return token; 147 } 148 if (needInput) { 149 readMore(); 150 } else { 151 throwFor(); 152 } 153 } 154 } 155 156 public String getDelim() { 157 return delimPattern.pattern(); 158 } 159 160 private void saveState() { 161 savedPosition = position; 162 } 163 164 private void revertState() { 165 position = savedPosition; 166 savedPosition = -1; 167 skipped = false; 168 } 169 170 private void readMore() { 171 if (buf.limit() == buf.capacity()) { 172 expandBuffer(); 173 } 174 int p = buf.position(); 175 cast(buf).position(buf.limit()); 176 cast(buf).limit(buf.capacity()); 177 int n; 178 try { 179 n = source.read(buf); 180 } catch (IOException ioe) { 181 lastIOException = ioe; 182 n = -1; 183 } 184 if (n == -1) { 185 inputExhausted = true; 186 needInput = false; 187 } else if (n > 0) { 188 needInput = false; 189 } 190 cast(buf).limit(buf.position()); 191 cast(buf).position(p); 192 } 193 194 private void expandBuffer() { 195 int offset = savedPosition == -1 ? position : savedPosition; 196 cast(buf).position(offset); 197 if (offset > 0) { 198 buf.compact(); 199 translateSavedIndexes(offset); 200 position -= offset; 201 cast(buf).flip(); 202 } else { 203 int newSize = buf.capacity() * 2; 204 CharBuffer newBuf = CharBuffer.allocate(newSize); 205 newBuf.put(buf); 206 cast(newBuf).flip(); 207 translateSavedIndexes(offset); 208 position -= offset; 209 buf = newBuf; 210 matcher.reset(buf); 211 } 212 } 213 214 private void translateSavedIndexes(int offset) { 215 if (savedPosition != -1) { 216 savedPosition -= offset; 217 } 218 } 219 220 private void throwFor() { 221 skipped = false; 222 if (inputExhausted && position == buf.limit()) { 223 throw new NoSuchElementException(); 224 } else { 225 throw new InputMismatchException(); 226 } 227 } 228 229 private boolean hasTokenInBuffer() { 230 matcher.usePattern(delimPattern); 231 matcher.region(position, buf.limit()); 232 if (matcher.lookingAt()) { 233 position = matcher.end(); 234 } 235 return position != buf.limit(); 236 } 237 238 private String getCompleteTokenInBuffer() { 239 matcher.usePattern(delimPattern); 240 if (!skipped) { 241 matcher.region(position, buf.limit()); 242 if (matcher.lookingAt()) { 243 if (matcher.hitEnd() && !inputExhausted) { 244 needInput = true; 245 return null; 246 } 247 skipped = true; 248 position = matcher.end(); 249 } 250 } 251 if (position == buf.limit()) { 252 if (inputExhausted) { 253 return null; 254 } 255 needInput = true; 256 return null; 257 } 258 matcher.region(position, buf.limit()); 259 boolean foundNextDelim = matcher.find(); 260 if (foundNextDelim && matcher.end() == position) { 261 foundNextDelim = matcher.find(); 262 } 263 if (foundNextDelim) { 264 if (matcher.requireEnd() && !inputExhausted) { 265 needInput = true; 266 return null; 267 } 268 int tokenEnd = matcher.start(); 269 matcher.usePattern(FIND_ANY_PATTERN); 270 matcher.region(position, tokenEnd); 271 if (matcher.matches()) { 272 String s = matcher.group(); 273 position = matcher.end(); 274 return s; 275 } else { 276 return null; 277 } 278 } 279 if (inputExhausted) { 280 matcher.usePattern(FIND_ANY_PATTERN); 281 matcher.region(position, buf.limit()); 282 if (matcher.matches()) { 283 String s = matcher.group(); 284 position = matcher.end(); 285 return s; 286 } 287 return null; 288 } 289 needInput = true; 290 return null; 291 } 292 293 private void checkClosed() { 294 if (closed) { 295 throw new IllegalStateException(); 296 } 297 } 298 299 @Override 300 public void close() throws IOException { 301 if (!closed) { 302 closed = true; 303 if (source instanceof Closeable) { 304 try { 305 ((Closeable) source).close(); 306 } catch (IOException e) { 307 lastIOException = e; 308 } 309 } 310 } 311 if (lastIOException != null) { 312 throw lastIOException; 313 } 314 } 315 316 private static Pattern cachePattern(String pattern) { 317 if (pattern == null) { 318 return null; 319 } 320 synchronized (CACHE) { 321 return CACHE.computeIfAbsent(pattern, Pattern::compile); 322 } 323 } 324 325}