001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.hdfs;
020
021import java.io.IOException;
022import java.io.InputStream;
023import java.net.HttpURLConnection;
024import java.net.URL;
025import java.util.List;
026import java.util.Map;
027import java.util.StringTokenizer;
028
029import org.apache.commons.io.input.BoundedInputStream;
030import org.apache.hadoop.fs.FSInputStream;
031
032import com.google.common.annotations.VisibleForTesting;
033import com.google.common.net.HttpHeaders;
034
035/**
036 * To support HTTP byte streams, a new connection to an HTTP server needs to be
037 * created each time. This class hides the complexity of those multiple 
038 * connections from the client. Whenever seek() is called, a new connection
039 * is made on the successive read(). The normal input stream functions are 
040 * connected to the currently active input stream. 
041 */
042public abstract class ByteRangeInputStream extends FSInputStream {
043  
044  /**
045   * This class wraps a URL and provides method to open connection.
046   * It can be overridden to change how a connection is opened.
047   */
048  public static abstract class URLOpener {
049    protected URL url;
050  
051    public URLOpener(URL u) {
052      url = u;
053    }
054  
055    public void setURL(URL u) {
056      url = u;
057    }
058  
059    public URL getURL() {
060      return url;
061    }
062
063    protected abstract HttpURLConnection openConnection() throws IOException;
064
065    protected abstract HttpURLConnection openConnection(final long offset) throws IOException;
066  }
067
068  enum StreamStatus {
069    NORMAL, SEEK, CLOSED
070  }
071  protected InputStream in;
072  protected URLOpener originalURL;
073  protected URLOpener resolvedURL;
074  protected long startPos = 0;
075  protected long currentPos = 0;
076  protected Long fileLength = null;
077
078  StreamStatus status = StreamStatus.SEEK;
079
080  /**
081   * Create with the specified URLOpeners. Original url is used to open the 
082   * stream for the first time. Resolved url is used in subsequent requests.
083   * @param o Original url
084   * @param r Resolved url
085   */
086  public ByteRangeInputStream(URLOpener o, URLOpener r) {
087    this.originalURL = o;
088    this.resolvedURL = r;
089  }
090  
091  protected abstract void checkResponseCode(final HttpURLConnection connection
092      ) throws IOException;
093  
094  protected abstract URL getResolvedUrl(final HttpURLConnection connection
095      ) throws IOException;
096
097  @VisibleForTesting
098  protected InputStream getInputStream() throws IOException {
099    switch (status) {
100      case NORMAL:
101        break;
102      case SEEK:
103        if (in != null) {
104          in.close();
105        }
106        in = openInputStream();
107        status = StreamStatus.NORMAL;
108        break;
109      case CLOSED:
110        throw new IOException("Stream closed");
111    }
112    return in;
113  }
114  
115  @VisibleForTesting
116  protected InputStream openInputStream() throws IOException {
117    // Use the original url if no resolved url exists, eg. if
118    // it's the first time a request is made.
119    final URLOpener opener =
120      (resolvedURL.getURL() == null) ? originalURL : resolvedURL;
121
122    final HttpURLConnection connection = opener.openConnection(startPos);
123    connection.connect();
124    checkResponseCode(connection);
125
126    resolvedURL.setURL(getResolvedUrl(connection));
127
128    InputStream in = connection.getInputStream();
129    final Map<String, List<String>> headers = connection.getHeaderFields();
130    if (isChunkedTransferEncoding(headers)) {
131      // file length is not known
132      fileLength = null;
133    } else {
134      // for non-chunked transfer-encoding, get content-length
135      final String cl = connection.getHeaderField(HttpHeaders.CONTENT_LENGTH);
136      if (cl == null) {
137        throw new IOException(HttpHeaders.CONTENT_LENGTH + " is missing: "
138            + headers);
139      }
140      final long streamlength = Long.parseLong(cl);
141      fileLength = startPos + streamlength;
142
143      // Java has a bug with >2GB request streams.  It won't bounds check
144      // the reads so the transfer blocks until the server times out
145      in = new BoundedInputStream(in, streamlength);
146    }
147
148    return in;
149  }
150  
151  private static boolean isChunkedTransferEncoding(
152      final Map<String, List<String>> headers) {
153    return contains(headers, HttpHeaders.TRANSFER_ENCODING, "chunked")
154        || contains(headers, HttpHeaders.TE, "chunked");
155  }
156
157  /** Does the HTTP header map contain the given key, value pair? */
158  private static boolean contains(final Map<String, List<String>> headers,
159      final String key, final String value) {
160    final List<String> values = headers.get(key);
161    if (values != null) {
162      for(String v : values) {
163        for(final StringTokenizer t = new StringTokenizer(v, ",");
164            t.hasMoreTokens(); ) {
165          if (value.equalsIgnoreCase(t.nextToken())) {
166            return true;
167          }
168        }
169      }
170    }
171    return false;
172  }
173
174  private int update(final int n) throws IOException {
175    if (n != -1) {
176      currentPos += n;
177    } else if (fileLength != null && currentPos < fileLength) {
178      throw new IOException("Got EOF but currentPos = " + currentPos
179          + " < filelength = " + fileLength);
180    }
181    return n;
182  }
183
184  @Override
185  public int read() throws IOException {
186    final int b = getInputStream().read();
187    update((b == -1) ? -1 : 1);
188    return b;
189  }
190
191  @Override
192  public int read(byte b[], int off, int len) throws IOException {
193    return update(getInputStream().read(b, off, len));
194  }
195  
196  /**
197   * Seek to the given offset from the start of the file.
198   * The next read() will be from that location.  Can't
199   * seek past the end of the file.
200   */
201  @Override
202  public void seek(long pos) throws IOException {
203    if (pos != currentPos) {
204      startPos = pos;
205      currentPos = pos;
206      if (status != StreamStatus.CLOSED) {
207        status = StreamStatus.SEEK;
208      }
209    }
210  }
211
212  /**
213   * Return the current offset from the start of the file
214   */
215  @Override
216  public long getPos() throws IOException {
217    return currentPos;
218  }
219
220  /**
221   * Seeks a different copy of the data.  Returns true if
222   * found a new source, false otherwise.
223   */
224  @Override
225  public boolean seekToNewSource(long targetPos) throws IOException {
226    return false;
227  }
228  
229  @Override
230  public void close() throws IOException {
231    if (in != null) {
232      in.close();
233      in = null;
234    }
235    status = StreamStatus.CLOSED;
236  }
237}