001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.hdfs;
020    
021    import java.io.IOException;
022    import java.io.InputStream;
023    import java.net.HttpURLConnection;
024    import java.net.URL;
025    
026    import org.apache.commons.io.input.BoundedInputStream;
027    import org.apache.hadoop.fs.FSInputStream;
028    import org.apache.hadoop.hdfs.server.namenode.StreamFile;
029    
030    import com.google.common.annotations.VisibleForTesting;
031    
032    /**
033     * To support HTTP byte streams, a new connection to an HTTP server needs to be
034     * created each time. This class hides the complexity of those multiple 
035     * connections from the client. Whenever seek() is called, a new connection
036     * is made on the successive read(). The normal input stream functions are 
037     * connected to the currently active input stream. 
038     */
039    public abstract class ByteRangeInputStream extends FSInputStream {
040      
041      /**
042       * This class wraps a URL and provides method to open connection.
043       * It can be overridden to change how a connection is opened.
044       */
045      public static abstract class URLOpener {
046        protected URL url;
047      
048        public URLOpener(URL u) {
049          url = u;
050        }
051      
052        public void setURL(URL u) {
053          url = u;
054        }
055      
056        public URL getURL() {
057          return url;
058        }
059    
060        protected abstract HttpURLConnection openConnection() throws IOException;
061    
062        protected abstract HttpURLConnection openConnection(final long offset) throws IOException;
063      }
064    
065      enum StreamStatus {
066        NORMAL, SEEK, CLOSED
067      }
068      protected InputStream in;
069      protected URLOpener originalURL;
070      protected URLOpener resolvedURL;
071      protected long startPos = 0;
072      protected long currentPos = 0;
073      protected long filelength;
074    
075      StreamStatus status = StreamStatus.SEEK;
076    
077      /**
078       * Create with the specified URLOpeners. Original url is used to open the 
079       * stream for the first time. Resolved url is used in subsequent requests.
080       * @param o Original url
081       * @param r Resolved url
082       */
083      public ByteRangeInputStream(URLOpener o, URLOpener r) {
084        this.originalURL = o;
085        this.resolvedURL = r;
086      }
087      
088      protected abstract void checkResponseCode(final HttpURLConnection connection
089          ) throws IOException;
090      
091      protected abstract URL getResolvedUrl(final HttpURLConnection connection
092          ) throws IOException;
093    
094      @VisibleForTesting
095      protected InputStream getInputStream() throws IOException {
096        switch (status) {
097          case NORMAL:
098            break;
099          case SEEK:
100            if (in != null) {
101              in.close();
102            }
103            in = openInputStream();
104            status = StreamStatus.NORMAL;
105            break;
106          case CLOSED:
107            throw new IOException("Stream closed");
108        }
109        return in;
110      }
111      
112      @VisibleForTesting
113      protected InputStream openInputStream() throws IOException {
114        // Use the original url if no resolved url exists, eg. if
115        // it's the first time a request is made.
116        final URLOpener opener =
117          (resolvedURL.getURL() == null) ? originalURL : resolvedURL;
118    
119        final HttpURLConnection connection = opener.openConnection(startPos);
120        connection.connect();
121        checkResponseCode(connection);
122    
123        final String cl = connection.getHeaderField(StreamFile.CONTENT_LENGTH);
124        if (cl == null) {
125          throw new IOException(StreamFile.CONTENT_LENGTH+" header is missing");
126        }
127        final long streamlength = Long.parseLong(cl);
128        filelength = startPos + streamlength;
129        // Java has a bug with >2GB request streams.  It won't bounds check
130        // the reads so the transfer blocks until the server times out
131        InputStream is =
132            new BoundedInputStream(connection.getInputStream(), streamlength);
133    
134        resolvedURL.setURL(getResolvedUrl(connection));
135        
136        return is;
137      }
138      
139      private int update(final int n) throws IOException {
140        if (n != -1) {
141          currentPos += n;
142        } else if (currentPos < filelength) {
143          throw new IOException("Got EOF but currentPos = " + currentPos
144              + " < filelength = " + filelength);
145        }
146        return n;
147      }
148    
149      @Override
150      public int read() throws IOException {
151        final int b = getInputStream().read();
152        update((b == -1) ? -1 : 1);
153        return b;
154      }
155    
156      @Override
157      public int read(byte b[], int off, int len) throws IOException {
158        return update(getInputStream().read(b, off, len));
159      }
160      
161      /**
162       * Seek to the given offset from the start of the file.
163       * The next read() will be from that location.  Can't
164       * seek past the end of the file.
165       */
166      @Override
167      public void seek(long pos) throws IOException {
168        if (pos != currentPos) {
169          startPos = pos;
170          currentPos = pos;
171          if (status != StreamStatus.CLOSED) {
172            status = StreamStatus.SEEK;
173          }
174        }
175      }
176    
177      /**
178       * Return the current offset from the start of the file
179       */
180      @Override
181      public long getPos() throws IOException {
182        return currentPos;
183      }
184    
185      /**
186       * Seeks a different copy of the data.  Returns true if
187       * found a new source, false otherwise.
188       */
189      @Override
190      public boolean seekToNewSource(long targetPos) throws IOException {
191        return false;
192      }
193      
194      @Override
195      public void close() throws IOException {
196        if (in != null) {
197          in.close();
198          in = null;
199        }
200        status = StreamStatus.CLOSED;
201      }
202    }