001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.hdfs; 020 021 import java.io.IOException; 022 import java.io.InputStream; 023 import java.net.HttpURLConnection; 024 import java.net.URL; 025 import java.util.List; 026 import java.util.Map; 027 import java.util.StringTokenizer; 028 029 import org.apache.commons.io.input.BoundedInputStream; 030 import org.apache.hadoop.fs.FSInputStream; 031 032 import com.google.common.annotations.VisibleForTesting; 033 import com.google.common.net.HttpHeaders; 034 035 /** 036 * To support HTTP byte streams, a new connection to an HTTP server needs to be 037 * created each time. This class hides the complexity of those multiple 038 * connections from the client. Whenever seek() is called, a new connection 039 * is made on the successive read(). The normal input stream functions are 040 * connected to the currently active input stream. 041 */ 042 public abstract class ByteRangeInputStream extends FSInputStream { 043 044 /** 045 * This class wraps a URL and provides method to open connection. 046 * It can be overridden to change how a connection is opened. 047 */ 048 public static abstract class URLOpener { 049 protected URL url; 050 051 public URLOpener(URL u) { 052 url = u; 053 } 054 055 public void setURL(URL u) { 056 url = u; 057 } 058 059 public URL getURL() { 060 return url; 061 } 062 063 protected abstract HttpURLConnection openConnection() throws IOException; 064 065 protected abstract HttpURLConnection openConnection(final long offset) throws IOException; 066 } 067 068 enum StreamStatus { 069 NORMAL, SEEK, CLOSED 070 } 071 protected InputStream in; 072 protected URLOpener originalURL; 073 protected URLOpener resolvedURL; 074 protected long startPos = 0; 075 protected long currentPos = 0; 076 protected Long fileLength = null; 077 078 StreamStatus status = StreamStatus.SEEK; 079 080 /** 081 * Create with the specified URLOpeners. Original url is used to open the 082 * stream for the first time. Resolved url is used in subsequent requests. 083 * @param o Original url 084 * @param r Resolved url 085 */ 086 public ByteRangeInputStream(URLOpener o, URLOpener r) { 087 this.originalURL = o; 088 this.resolvedURL = r; 089 } 090 091 protected abstract void checkResponseCode(final HttpURLConnection connection 092 ) throws IOException; 093 094 protected abstract URL getResolvedUrl(final HttpURLConnection connection 095 ) throws IOException; 096 097 @VisibleForTesting 098 protected InputStream getInputStream() throws IOException { 099 switch (status) { 100 case NORMAL: 101 break; 102 case SEEK: 103 if (in != null) { 104 in.close(); 105 } 106 in = openInputStream(); 107 status = StreamStatus.NORMAL; 108 break; 109 case CLOSED: 110 throw new IOException("Stream closed"); 111 } 112 return in; 113 } 114 115 @VisibleForTesting 116 protected InputStream openInputStream() throws IOException { 117 // Use the original url if no resolved url exists, eg. if 118 // it's the first time a request is made. 119 final URLOpener opener = 120 (resolvedURL.getURL() == null) ? originalURL : resolvedURL; 121 122 final HttpURLConnection connection = opener.openConnection(startPos); 123 connection.connect(); 124 checkResponseCode(connection); 125 126 resolvedURL.setURL(getResolvedUrl(connection)); 127 128 InputStream in = connection.getInputStream(); 129 final Map<String, List<String>> headers = connection.getHeaderFields(); 130 if (isChunkedTransferEncoding(headers)) { 131 // file length is not known 132 fileLength = null; 133 } else { 134 // for non-chunked transfer-encoding, get content-length 135 final String cl = connection.getHeaderField(HttpHeaders.CONTENT_LENGTH); 136 if (cl == null) { 137 throw new IOException(HttpHeaders.CONTENT_LENGTH + " is missing: " 138 + headers); 139 } 140 final long streamlength = Long.parseLong(cl); 141 fileLength = startPos + streamlength; 142 143 // Java has a bug with >2GB request streams. It won't bounds check 144 // the reads so the transfer blocks until the server times out 145 in = new BoundedInputStream(in, streamlength); 146 } 147 148 return in; 149 } 150 151 private static boolean isChunkedTransferEncoding( 152 final Map<String, List<String>> headers) { 153 return contains(headers, HttpHeaders.TRANSFER_ENCODING, "chunked") 154 || contains(headers, HttpHeaders.TE, "chunked"); 155 } 156 157 /** Does the HTTP header map contain the given key, value pair? */ 158 private static boolean contains(final Map<String, List<String>> headers, 159 final String key, final String value) { 160 final List<String> values = headers.get(key); 161 if (values != null) { 162 for(String v : values) { 163 for(final StringTokenizer t = new StringTokenizer(v, ","); 164 t.hasMoreTokens(); ) { 165 if (value.equalsIgnoreCase(t.nextToken())) { 166 return true; 167 } 168 } 169 } 170 } 171 return false; 172 } 173 174 private int update(final int n) throws IOException { 175 if (n != -1) { 176 currentPos += n; 177 } else if (fileLength != null && currentPos < fileLength) { 178 throw new IOException("Got EOF but currentPos = " + currentPos 179 + " < filelength = " + fileLength); 180 } 181 return n; 182 } 183 184 @Override 185 public int read() throws IOException { 186 final int b = getInputStream().read(); 187 update((b == -1) ? -1 : 1); 188 return b; 189 } 190 191 @Override 192 public int read(byte b[], int off, int len) throws IOException { 193 return update(getInputStream().read(b, off, len)); 194 } 195 196 /** 197 * Seek to the given offset from the start of the file. 198 * The next read() will be from that location. Can't 199 * seek past the end of the file. 200 */ 201 @Override 202 public void seek(long pos) throws IOException { 203 if (pos != currentPos) { 204 startPos = pos; 205 currentPos = pos; 206 if (status != StreamStatus.CLOSED) { 207 status = StreamStatus.SEEK; 208 } 209 } 210 } 211 212 /** 213 * Return the current offset from the start of the file 214 */ 215 @Override 216 public long getPos() throws IOException { 217 return currentPos; 218 } 219 220 /** 221 * Seeks a different copy of the data. Returns true if 222 * found a new source, false otherwise. 223 */ 224 @Override 225 public boolean seekToNewSource(long targetPos) throws IOException { 226 return false; 227 } 228 229 @Override 230 public void close() throws IOException { 231 if (in != null) { 232 in.close(); 233 in = null; 234 } 235 status = StreamStatus.CLOSED; 236 } 237 }