001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.hdfs.web;
020    
021    import java.io.IOException;
022    import java.io.InputStream;
023    import java.net.HttpURLConnection;
024    import java.net.URL;
025    import java.util.List;
026    import java.util.Map;
027    import java.util.StringTokenizer;
028    
029    import org.apache.commons.io.input.BoundedInputStream;
030    import org.apache.hadoop.fs.FSInputStream;
031    
032    import com.google.common.annotations.VisibleForTesting;
033    import com.google.common.net.HttpHeaders;
034    
035    /**
036     * To support HTTP byte streams, a new connection to an HTTP server needs to be
037     * created each time. This class hides the complexity of those multiple
038     * connections from the client. Whenever seek() is called, a new connection
039     * is made on the successive read(). The normal input stream functions are
040     * connected to the currently active input stream.
041     */
042    public abstract class ByteRangeInputStream extends FSInputStream {
043    
044      /**
045       * This class wraps a URL and provides method to open connection.
046       * It can be overridden to change how a connection is opened.
047       */
048      public static abstract class URLOpener {
049        protected URL url;
050    
051        public URLOpener(URL u) {
052          url = u;
053        }
054    
055        public void setURL(URL u) {
056          url = u;
057        }
058    
059        public URL getURL() {
060          return url;
061        }
062    
063        /** Connect to server with a data offset. */
064        protected abstract HttpURLConnection connect(final long offset,
065            final boolean resolved) throws IOException;
066      }
067    
068      enum StreamStatus {
069        NORMAL, SEEK, CLOSED
070      }
071      protected InputStream in;
072      protected URLOpener originalURL;
073      protected URLOpener resolvedURL;
074      protected long startPos = 0;
075      protected long currentPos = 0;
076      protected Long fileLength = null;
077    
078      StreamStatus status = StreamStatus.SEEK;
079    
080      /**
081       * Create with the specified URLOpeners. Original url is used to open the
082       * stream for the first time. Resolved url is used in subsequent requests.
083       * @param o Original url
084       * @param r Resolved url
085       */
086      public ByteRangeInputStream(URLOpener o, URLOpener r) {
087        this.originalURL = o;
088        this.resolvedURL = r;
089      }
090    
091      protected abstract URL getResolvedUrl(final HttpURLConnection connection
092          ) throws IOException;
093    
094      @VisibleForTesting
095      protected InputStream getInputStream() throws IOException {
096        switch (status) {
097          case NORMAL:
098            break;
099          case SEEK:
100            if (in != null) {
101              in.close();
102            }
103            in = openInputStream();
104            status = StreamStatus.NORMAL;
105            break;
106          case CLOSED:
107            throw new IOException("Stream closed");
108        }
109        return in;
110      }
111    
112      @VisibleForTesting
113      protected InputStream openInputStream() throws IOException {
114        // Use the original url if no resolved url exists, eg. if
115        // it's the first time a request is made.
116        final boolean resolved = resolvedURL.getURL() != null;
117        final URLOpener opener = resolved? resolvedURL: originalURL;
118    
119        final HttpURLConnection connection = opener.connect(startPos, resolved);
120        resolvedURL.setURL(getResolvedUrl(connection));
121    
122        InputStream in = connection.getInputStream();
123        final Map<String, List<String>> headers = connection.getHeaderFields();
124        if (isChunkedTransferEncoding(headers)) {
125          // file length is not known
126          fileLength = null;
127        } else {
128          // for non-chunked transfer-encoding, get content-length
129          final String cl = connection.getHeaderField(HttpHeaders.CONTENT_LENGTH);
130          if (cl == null) {
131            throw new IOException(HttpHeaders.CONTENT_LENGTH + " is missing: "
132                + headers);
133          }
134          final long streamlength = Long.parseLong(cl);
135          fileLength = startPos + streamlength;
136    
137          // Java has a bug with >2GB request streams.  It won't bounds check
138          // the reads so the transfer blocks until the server times out
139          in = new BoundedInputStream(in, streamlength);
140        }
141    
142        return in;
143      }
144    
145      private static boolean isChunkedTransferEncoding(
146          final Map<String, List<String>> headers) {
147        return contains(headers, HttpHeaders.TRANSFER_ENCODING, "chunked")
148            || contains(headers, HttpHeaders.TE, "chunked");
149      }
150    
151      /** Does the HTTP header map contain the given key, value pair? */
152      private static boolean contains(final Map<String, List<String>> headers,
153          final String key, final String value) {
154        final List<String> values = headers.get(key);
155        if (values != null) {
156          for(String v : values) {
157            for(final StringTokenizer t = new StringTokenizer(v, ",");
158                t.hasMoreTokens(); ) {
159              if (value.equalsIgnoreCase(t.nextToken())) {
160                return true;
161              }
162            }
163          }
164        }
165        return false;
166      }
167    
168      private int update(final int n) throws IOException {
169        if (n != -1) {
170          currentPos += n;
171        } else if (fileLength != null && currentPos < fileLength) {
172          throw new IOException("Got EOF but currentPos = " + currentPos
173              + " < filelength = " + fileLength);
174        }
175        return n;
176      }
177    
178      @Override
179      public int read() throws IOException {
180        final int b = getInputStream().read();
181        update((b == -1) ? -1 : 1);
182        return b;
183      }
184    
185      @Override
186      public int read(byte b[], int off, int len) throws IOException {
187        return update(getInputStream().read(b, off, len));
188      }
189    
190      /**
191       * Seek to the given offset from the start of the file.
192       * The next read() will be from that location.  Can't
193       * seek past the end of the file.
194       */
195      @Override
196      public void seek(long pos) throws IOException {
197        if (pos != currentPos) {
198          startPos = pos;
199          currentPos = pos;
200          if (status != StreamStatus.CLOSED) {
201            status = StreamStatus.SEEK;
202          }
203        }
204      }
205    
206      /**
207       * Return the current offset from the start of the file
208       */
209      @Override
210      public long getPos() throws IOException {
211        return currentPos;
212      }
213    
214      /**
215       * Seeks a different copy of the data.  Returns true if
216       * found a new source, false otherwise.
217       */
218      @Override
219      public boolean seekToNewSource(long targetPos) throws IOException {
220        return false;
221      }
222    
223      @Override
224      public void close() throws IOException {
225        if (in != null) {
226          in.close();
227          in = null;
228        }
229        status = StreamStatus.CLOSED;
230      }
231    }