001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.hdfs.web;
020    
021    import java.io.IOException;
022    import java.io.InputStream;
023    import java.net.HttpURLConnection;
024    import java.net.URL;
025    import java.util.List;
026    import java.util.Map;
027    import java.util.StringTokenizer;
028    
029    import org.apache.commons.io.input.BoundedInputStream;
030    import org.apache.hadoop.fs.FSInputStream;
031    
032    import com.google.common.annotations.VisibleForTesting;
033    import com.google.common.net.HttpHeaders;
034    
035    /**
036     * To support HTTP byte streams, a new connection to an HTTP server needs to be
037     * created each time. This class hides the complexity of those multiple
038     * connections from the client. Whenever seek() is called, a new connection
039     * is made on the successive read(). The normal input stream functions are
040     * connected to the currently active input stream.
041     */
042    public abstract class ByteRangeInputStream extends FSInputStream {
043    
044      /**
045       * This class wraps a URL and provides method to open connection.
046       * It can be overridden to change how a connection is opened.
047       */
048      public static abstract class URLOpener {
049        protected URL url;
050    
051        public URLOpener(URL u) {
052          url = u;
053        }
054    
055        public void setURL(URL u) {
056          url = u;
057        }
058    
059        public URL getURL() {
060          return url;
061        }
062    
063        /** Connect to server with a data offset. */
064        protected abstract HttpURLConnection connect(final long offset,
065            final boolean resolved) throws IOException;
066      }
067    
068      enum StreamStatus {
069        NORMAL, SEEK, CLOSED
070      }
071      protected InputStream in;
072      protected final URLOpener originalURL;
073      protected final URLOpener resolvedURL;
074      protected long startPos = 0;
075      protected long currentPos = 0;
076      protected Long fileLength = null;
077    
078      StreamStatus status = StreamStatus.SEEK;
079    
080      /**
081       * Create with the specified URLOpeners. Original url is used to open the
082       * stream for the first time. Resolved url is used in subsequent requests.
083       * @param o Original url
084       * @param r Resolved url
085       */
086      public ByteRangeInputStream(URLOpener o, URLOpener r) throws IOException {
087        this.originalURL = o;
088        this.resolvedURL = r;
089        getInputStream();
090      }
091    
092      protected abstract URL getResolvedUrl(final HttpURLConnection connection
093          ) throws IOException;
094    
095      @VisibleForTesting
096      protected InputStream getInputStream() throws IOException {
097        switch (status) {
098          case NORMAL:
099            break;
100          case SEEK:
101            if (in != null) {
102              in.close();
103            }
104            in = openInputStream();
105            status = StreamStatus.NORMAL;
106            break;
107          case CLOSED:
108            throw new IOException("Stream closed");
109        }
110        return in;
111      }
112    
113      @VisibleForTesting
114      protected InputStream openInputStream() throws IOException {
115        // Use the original url if no resolved url exists, eg. if
116        // it's the first time a request is made.
117        final boolean resolved = resolvedURL.getURL() != null;
118        final URLOpener opener = resolved? resolvedURL: originalURL;
119    
120        final HttpURLConnection connection = opener.connect(startPos, resolved);
121        resolvedURL.setURL(getResolvedUrl(connection));
122    
123        InputStream in = connection.getInputStream();
124        final Map<String, List<String>> headers = connection.getHeaderFields();
125        if (isChunkedTransferEncoding(headers)) {
126          // file length is not known
127          fileLength = null;
128        } else {
129          // for non-chunked transfer-encoding, get content-length
130          final String cl = connection.getHeaderField(HttpHeaders.CONTENT_LENGTH);
131          if (cl == null) {
132            throw new IOException(HttpHeaders.CONTENT_LENGTH + " is missing: "
133                + headers);
134          }
135          final long streamlength = Long.parseLong(cl);
136          fileLength = startPos + streamlength;
137    
138          // Java has a bug with >2GB request streams.  It won't bounds check
139          // the reads so the transfer blocks until the server times out
140          in = new BoundedInputStream(in, streamlength);
141        }
142    
143        return in;
144      }
145    
146      private static boolean isChunkedTransferEncoding(
147          final Map<String, List<String>> headers) {
148        return contains(headers, HttpHeaders.TRANSFER_ENCODING, "chunked")
149            || contains(headers, HttpHeaders.TE, "chunked");
150      }
151    
152      /** Does the HTTP header map contain the given key, value pair? */
153      private static boolean contains(final Map<String, List<String>> headers,
154          final String key, final String value) {
155        final List<String> values = headers.get(key);
156        if (values != null) {
157          for(String v : values) {
158            for(final StringTokenizer t = new StringTokenizer(v, ",");
159                t.hasMoreTokens(); ) {
160              if (value.equalsIgnoreCase(t.nextToken())) {
161                return true;
162              }
163            }
164          }
165        }
166        return false;
167      }
168    
169      private int update(final int n) throws IOException {
170        if (n != -1) {
171          currentPos += n;
172        } else if (fileLength != null && currentPos < fileLength) {
173          throw new IOException("Got EOF but currentPos = " + currentPos
174              + " < filelength = " + fileLength);
175        }
176        return n;
177      }
178    
179      @Override
180      public int read() throws IOException {
181        final int b = getInputStream().read();
182        update((b == -1) ? -1 : 1);
183        return b;
184      }
185    
186      @Override
187      public int read(byte b[], int off, int len) throws IOException {
188        return update(getInputStream().read(b, off, len));
189      }
190    
191      /**
192       * Seek to the given offset from the start of the file.
193       * The next read() will be from that location.  Can't
194       * seek past the end of the file.
195       */
196      @Override
197      public void seek(long pos) throws IOException {
198        if (pos != currentPos) {
199          startPos = pos;
200          currentPos = pos;
201          if (status != StreamStatus.CLOSED) {
202            status = StreamStatus.SEEK;
203          }
204        }
205      }
206    
207      /**
208       * Return the current offset from the start of the file
209       */
210      @Override
211      public long getPos() throws IOException {
212        return currentPos;
213      }
214    
215      /**
216       * Seeks a different copy of the data.  Returns true if
217       * found a new source, false otherwise.
218       */
219      @Override
220      public boolean seekToNewSource(long targetPos) throws IOException {
221        return false;
222      }
223    
224      @Override
225      public void close() throws IOException {
226        if (in != null) {
227          in.close();
228          in = null;
229        }
230        status = StreamStatus.CLOSED;
231      }
232    }