001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.hdfs.web;
020
021import java.io.IOException;
022import java.io.InputStream;
023import java.net.HttpURLConnection;
024import java.net.URL;
025import java.util.List;
026import java.util.Map;
027import java.util.StringTokenizer;
028
029import org.apache.commons.io.input.BoundedInputStream;
030import org.apache.hadoop.fs.FSInputStream;
031
032import com.google.common.annotations.VisibleForTesting;
033import com.google.common.net.HttpHeaders;
034
035/**
036 * To support HTTP byte streams, a new connection to an HTTP server needs to be
037 * created each time. This class hides the complexity of those multiple
038 * connections from the client. Whenever seek() is called, a new connection
039 * is made on the successive read(). The normal input stream functions are
040 * connected to the currently active input stream.
041 */
042public abstract class ByteRangeInputStream extends FSInputStream {
043
044  /**
045   * This class wraps a URL and provides method to open connection.
046   * It can be overridden to change how a connection is opened.
047   */
048  public static abstract class URLOpener {
049    protected URL url;
050
051    public URLOpener(URL u) {
052      url = u;
053    }
054
055    public void setURL(URL u) {
056      url = u;
057    }
058
059    public URL getURL() {
060      return url;
061    }
062
063    /** Connect to server with a data offset. */
064    protected abstract HttpURLConnection connect(final long offset,
065        final boolean resolved) throws IOException;
066  }
067
068  enum StreamStatus {
069    NORMAL, SEEK, CLOSED
070  }
071  protected InputStream in;
072  protected final URLOpener originalURL;
073  protected final URLOpener resolvedURL;
074  protected long startPos = 0;
075  protected long currentPos = 0;
076  protected Long fileLength = null;
077
078  StreamStatus status = StreamStatus.SEEK;
079
080  /**
081   * Create with the specified URLOpeners. Original url is used to open the
082   * stream for the first time. Resolved url is used in subsequent requests.
083   * @param o Original url
084   * @param r Resolved url
085   */
086  public ByteRangeInputStream(URLOpener o, URLOpener r) throws IOException {
087    this.originalURL = o;
088    this.resolvedURL = r;
089    getInputStream();
090  }
091
092  protected abstract URL getResolvedUrl(final HttpURLConnection connection
093      ) throws IOException;
094
095  @VisibleForTesting
096  protected InputStream getInputStream() throws IOException {
097    switch (status) {
098      case NORMAL:
099        break;
100      case SEEK:
101        if (in != null) {
102          in.close();
103        }
104        in = openInputStream();
105        status = StreamStatus.NORMAL;
106        break;
107      case CLOSED:
108        throw new IOException("Stream closed");
109    }
110    return in;
111  }
112
113  @VisibleForTesting
114  protected InputStream openInputStream() throws IOException {
115    // Use the original url if no resolved url exists, eg. if
116    // it's the first time a request is made.
117    final boolean resolved = resolvedURL.getURL() != null;
118    final URLOpener opener = resolved? resolvedURL: originalURL;
119
120    final HttpURLConnection connection = opener.connect(startPos, resolved);
121    resolvedURL.setURL(getResolvedUrl(connection));
122
123    InputStream in = connection.getInputStream();
124    final Map<String, List<String>> headers = connection.getHeaderFields();
125    if (isChunkedTransferEncoding(headers)) {
126      // file length is not known
127      fileLength = null;
128    } else {
129      // for non-chunked transfer-encoding, get content-length
130      final String cl = connection.getHeaderField(HttpHeaders.CONTENT_LENGTH);
131      if (cl == null) {
132        throw new IOException(HttpHeaders.CONTENT_LENGTH + " is missing: "
133            + headers);
134      }
135      final long streamlength = Long.parseLong(cl);
136      fileLength = startPos + streamlength;
137
138      // Java has a bug with >2GB request streams.  It won't bounds check
139      // the reads so the transfer blocks until the server times out
140      in = new BoundedInputStream(in, streamlength);
141    }
142
143    return in;
144  }
145
146  private static boolean isChunkedTransferEncoding(
147      final Map<String, List<String>> headers) {
148    return contains(headers, HttpHeaders.TRANSFER_ENCODING, "chunked")
149        || contains(headers, HttpHeaders.TE, "chunked");
150  }
151
152  /** Does the HTTP header map contain the given key, value pair? */
153  private static boolean contains(final Map<String, List<String>> headers,
154      final String key, final String value) {
155    final List<String> values = headers.get(key);
156    if (values != null) {
157      for(String v : values) {
158        for(final StringTokenizer t = new StringTokenizer(v, ",");
159            t.hasMoreTokens(); ) {
160          if (value.equalsIgnoreCase(t.nextToken())) {
161            return true;
162          }
163        }
164      }
165    }
166    return false;
167  }
168
169  private int update(final int n) throws IOException {
170    if (n != -1) {
171      currentPos += n;
172    } else if (fileLength != null && currentPos < fileLength) {
173      throw new IOException("Got EOF but currentPos = " + currentPos
174          + " < filelength = " + fileLength);
175    }
176    return n;
177  }
178
179  @Override
180  public int read() throws IOException {
181    final int b = getInputStream().read();
182    update((b == -1) ? -1 : 1);
183    return b;
184  }
185
186  @Override
187  public int read(byte b[], int off, int len) throws IOException {
188    return update(getInputStream().read(b, off, len));
189  }
190
191  /**
192   * Seek to the given offset from the start of the file.
193   * The next read() will be from that location.  Can't
194   * seek past the end of the file.
195   */
196  @Override
197  public void seek(long pos) throws IOException {
198    if (pos != currentPos) {
199      startPos = pos;
200      currentPos = pos;
201      if (status != StreamStatus.CLOSED) {
202        status = StreamStatus.SEEK;
203      }
204    }
205  }
206
207  /**
208   * Return the current offset from the start of the file
209   */
210  @Override
211  public long getPos() throws IOException {
212    return currentPos;
213  }
214
215  /**
216   * Seeks a different copy of the data.  Returns true if
217   * found a new source, false otherwise.
218   */
219  @Override
220  public boolean seekToNewSource(long targetPos) throws IOException {
221    return false;
222  }
223
224  @Override
225  public void close() throws IOException {
226    if (in != null) {
227      in.close();
228      in = null;
229    }
230    status = StreamStatus.CLOSED;
231  }
232}