001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.hdfs; 020 021 import java.io.IOException; 022 import java.io.InputStream; 023 import java.net.HttpURLConnection; 024 import java.net.URL; 025 026 import org.apache.commons.io.input.BoundedInputStream; 027 import org.apache.hadoop.fs.FSInputStream; 028 import org.apache.hadoop.hdfs.server.namenode.StreamFile; 029 030 import com.google.common.annotations.VisibleForTesting; 031 032 /** 033 * To support HTTP byte streams, a new connection to an HTTP server needs to be 034 * created each time. This class hides the complexity of those multiple 035 * connections from the client. Whenever seek() is called, a new connection 036 * is made on the successive read(). The normal input stream functions are 037 * connected to the currently active input stream. 038 */ 039 public abstract class ByteRangeInputStream extends FSInputStream { 040 041 /** 042 * This class wraps a URL and provides method to open connection. 043 * It can be overridden to change how a connection is opened. 044 */ 045 public static abstract class URLOpener { 046 protected URL url; 047 048 public URLOpener(URL u) { 049 url = u; 050 } 051 052 public void setURL(URL u) { 053 url = u; 054 } 055 056 public URL getURL() { 057 return url; 058 } 059 060 protected abstract HttpURLConnection openConnection() throws IOException; 061 062 protected abstract HttpURLConnection openConnection(final long offset) throws IOException; 063 } 064 065 enum StreamStatus { 066 NORMAL, SEEK, CLOSED 067 } 068 protected InputStream in; 069 protected URLOpener originalURL; 070 protected URLOpener resolvedURL; 071 protected long startPos = 0; 072 protected long currentPos = 0; 073 protected long filelength; 074 075 StreamStatus status = StreamStatus.SEEK; 076 077 /** 078 * Create with the specified URLOpeners. Original url is used to open the 079 * stream for the first time. Resolved url is used in subsequent requests. 080 * @param o Original url 081 * @param r Resolved url 082 */ 083 public ByteRangeInputStream(URLOpener o, URLOpener r) { 084 this.originalURL = o; 085 this.resolvedURL = r; 086 } 087 088 protected abstract void checkResponseCode(final HttpURLConnection connection 089 ) throws IOException; 090 091 protected abstract URL getResolvedUrl(final HttpURLConnection connection 092 ) throws IOException; 093 094 @VisibleForTesting 095 protected InputStream getInputStream() throws IOException { 096 switch (status) { 097 case NORMAL: 098 break; 099 case SEEK: 100 if (in != null) { 101 in.close(); 102 } 103 in = openInputStream(); 104 status = StreamStatus.NORMAL; 105 break; 106 case CLOSED: 107 throw new IOException("Stream closed"); 108 } 109 return in; 110 } 111 112 @VisibleForTesting 113 protected InputStream openInputStream() throws IOException { 114 // Use the original url if no resolved url exists, eg. if 115 // it's the first time a request is made. 116 final URLOpener opener = 117 (resolvedURL.getURL() == null) ? originalURL : resolvedURL; 118 119 final HttpURLConnection connection = opener.openConnection(startPos); 120 connection.connect(); 121 checkResponseCode(connection); 122 123 final String cl = connection.getHeaderField(StreamFile.CONTENT_LENGTH); 124 if (cl == null) { 125 throw new IOException(StreamFile.CONTENT_LENGTH+" header is missing"); 126 } 127 final long streamlength = Long.parseLong(cl); 128 filelength = startPos + streamlength; 129 // Java has a bug with >2GB request streams. It won't bounds check 130 // the reads so the transfer blocks until the server times out 131 InputStream is = 132 new BoundedInputStream(connection.getInputStream(), streamlength); 133 134 resolvedURL.setURL(getResolvedUrl(connection)); 135 136 return is; 137 } 138 139 private int update(final int n) throws IOException { 140 if (n != -1) { 141 currentPos += n; 142 } else if (currentPos < filelength) { 143 throw new IOException("Got EOF but currentPos = " + currentPos 144 + " < filelength = " + filelength); 145 } 146 return n; 147 } 148 149 @Override 150 public int read() throws IOException { 151 final int b = getInputStream().read(); 152 update((b == -1) ? -1 : 1); 153 return b; 154 } 155 156 @Override 157 public int read(byte b[], int off, int len) throws IOException { 158 return update(getInputStream().read(b, off, len)); 159 } 160 161 /** 162 * Seek to the given offset from the start of the file. 163 * The next read() will be from that location. Can't 164 * seek past the end of the file. 165 */ 166 @Override 167 public void seek(long pos) throws IOException { 168 if (pos != currentPos) { 169 startPos = pos; 170 currentPos = pos; 171 if (status != StreamStatus.CLOSED) { 172 status = StreamStatus.SEEK; 173 } 174 } 175 } 176 177 /** 178 * Return the current offset from the start of the file 179 */ 180 @Override 181 public long getPos() throws IOException { 182 return currentPos; 183 } 184 185 /** 186 * Seeks a different copy of the data. Returns true if 187 * found a new source, false otherwise. 188 */ 189 @Override 190 public boolean seekToNewSource(long targetPos) throws IOException { 191 return false; 192 } 193 194 @Override 195 public void close() throws IOException { 196 if (in != null) { 197 in.close(); 198 in = null; 199 } 200 status = StreamStatus.CLOSED; 201 } 202 }