001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.hdfs;
020    
021    import java.io.IOException;
022    import java.io.InputStream;
023    import java.net.HttpURLConnection;
024    import java.net.URL;
025    import java.util.List;
026    import java.util.Map;
027    import java.util.StringTokenizer;
028    
029    import org.apache.commons.io.input.BoundedInputStream;
030    import org.apache.hadoop.fs.FSInputStream;
031    
032    import com.google.common.annotations.VisibleForTesting;
033    import com.google.common.net.HttpHeaders;
034    
035    /**
036     * To support HTTP byte streams, a new connection to an HTTP server needs to be
037     * created each time. This class hides the complexity of those multiple 
038     * connections from the client. Whenever seek() is called, a new connection
039     * is made on the successive read(). The normal input stream functions are 
040     * connected to the currently active input stream. 
041     */
042    public abstract class ByteRangeInputStream extends FSInputStream {
043      
044      /**
045       * This class wraps a URL and provides method to open connection.
046       * It can be overridden to change how a connection is opened.
047       */
048      public static abstract class URLOpener {
049        protected URL url;
050      
051        public URLOpener(URL u) {
052          url = u;
053        }
054      
055        public void setURL(URL u) {
056          url = u;
057        }
058      
059        public URL getURL() {
060          return url;
061        }
062    
063        protected abstract HttpURLConnection openConnection() throws IOException;
064    
065        protected abstract HttpURLConnection openConnection(final long offset) throws IOException;
066      }
067    
068      enum StreamStatus {
069        NORMAL, SEEK, CLOSED
070      }
071      protected InputStream in;
072      protected URLOpener originalURL;
073      protected URLOpener resolvedURL;
074      protected long startPos = 0;
075      protected long currentPos = 0;
076      protected Long fileLength = null;
077    
078      StreamStatus status = StreamStatus.SEEK;
079    
080      /**
081       * Create with the specified URLOpeners. Original url is used to open the 
082       * stream for the first time. Resolved url is used in subsequent requests.
083       * @param o Original url
084       * @param r Resolved url
085       */
086      public ByteRangeInputStream(URLOpener o, URLOpener r) {
087        this.originalURL = o;
088        this.resolvedURL = r;
089      }
090      
091      protected abstract void checkResponseCode(final HttpURLConnection connection
092          ) throws IOException;
093      
094      protected abstract URL getResolvedUrl(final HttpURLConnection connection
095          ) throws IOException;
096    
097      @VisibleForTesting
098      protected InputStream getInputStream() throws IOException {
099        switch (status) {
100          case NORMAL:
101            break;
102          case SEEK:
103            if (in != null) {
104              in.close();
105            }
106            in = openInputStream();
107            status = StreamStatus.NORMAL;
108            break;
109          case CLOSED:
110            throw new IOException("Stream closed");
111        }
112        return in;
113      }
114      
115      @VisibleForTesting
116      protected InputStream openInputStream() throws IOException {
117        // Use the original url if no resolved url exists, eg. if
118        // it's the first time a request is made.
119        final URLOpener opener =
120          (resolvedURL.getURL() == null) ? originalURL : resolvedURL;
121    
122        final HttpURLConnection connection = opener.openConnection(startPos);
123        connection.connect();
124        checkResponseCode(connection);
125    
126        resolvedURL.setURL(getResolvedUrl(connection));
127    
128        InputStream in = connection.getInputStream();
129        final Map<String, List<String>> headers = connection.getHeaderFields();
130        if (isChunkedTransferEncoding(headers)) {
131          // file length is not known
132          fileLength = null;
133        } else {
134          // for non-chunked transfer-encoding, get content-length
135          final String cl = connection.getHeaderField(HttpHeaders.CONTENT_LENGTH);
136          if (cl == null) {
137            throw new IOException(HttpHeaders.CONTENT_LENGTH + " is missing: "
138                + headers);
139          }
140          final long streamlength = Long.parseLong(cl);
141          fileLength = startPos + streamlength;
142    
143          // Java has a bug with >2GB request streams.  It won't bounds check
144          // the reads so the transfer blocks until the server times out
145          in = new BoundedInputStream(in, streamlength);
146        }
147    
148        return in;
149      }
150      
151      private static boolean isChunkedTransferEncoding(
152          final Map<String, List<String>> headers) {
153        return contains(headers, HttpHeaders.TRANSFER_ENCODING, "chunked")
154            || contains(headers, HttpHeaders.TE, "chunked");
155      }
156    
157      /** Does the HTTP header map contain the given key, value pair? */
158      private static boolean contains(final Map<String, List<String>> headers,
159          final String key, final String value) {
160        final List<String> values = headers.get(key);
161        if (values != null) {
162          for(String v : values) {
163            for(final StringTokenizer t = new StringTokenizer(v, ",");
164                t.hasMoreTokens(); ) {
165              if (value.equalsIgnoreCase(t.nextToken())) {
166                return true;
167              }
168            }
169          }
170        }
171        return false;
172      }
173    
174      private int update(final int n) throws IOException {
175        if (n != -1) {
176          currentPos += n;
177        } else if (fileLength != null && currentPos < fileLength) {
178          throw new IOException("Got EOF but currentPos = " + currentPos
179              + " < filelength = " + fileLength);
180        }
181        return n;
182      }
183    
184      @Override
185      public int read() throws IOException {
186        final int b = getInputStream().read();
187        update((b == -1) ? -1 : 1);
188        return b;
189      }
190    
191      @Override
192      public int read(byte b[], int off, int len) throws IOException {
193        return update(getInputStream().read(b, off, len));
194      }
195      
196      /**
197       * Seek to the given offset from the start of the file.
198       * The next read() will be from that location.  Can't
199       * seek past the end of the file.
200       */
201      @Override
202      public void seek(long pos) throws IOException {
203        if (pos != currentPos) {
204          startPos = pos;
205          currentPos = pos;
206          if (status != StreamStatus.CLOSED) {
207            status = StreamStatus.SEEK;
208          }
209        }
210      }
211    
212      /**
213       * Return the current offset from the start of the file
214       */
215      @Override
216      public long getPos() throws IOException {
217        return currentPos;
218      }
219    
220      /**
221       * Seeks a different copy of the data.  Returns true if
222       * found a new source, false otherwise.
223       */
224      @Override
225      public boolean seekToNewSource(long targetPos) throws IOException {
226        return false;
227      }
228      
229      @Override
230      public void close() throws IOException {
231        if (in != null) {
232          in.close();
233          in = null;
234        }
235        status = StreamStatus.CLOSED;
236      }
237    }