001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.hdfs.web; 020 021import java.io.IOException; 022import java.io.InputStream; 023import java.net.HttpURLConnection; 024import java.net.URL; 025import java.util.List; 026import java.util.Map; 027import java.util.StringTokenizer; 028 029import org.apache.commons.io.input.BoundedInputStream; 030import org.apache.hadoop.fs.FSInputStream; 031import org.apache.http.HttpStatus; 032 033import com.google.common.annotations.VisibleForTesting; 034import com.google.common.net.HttpHeaders; 035 036/** 037 * To support HTTP byte streams, a new connection to an HTTP server needs to be 038 * created each time. This class hides the complexity of those multiple 039 * connections from the client. Whenever seek() is called, a new connection 040 * is made on the successive read(). The normal input stream functions are 041 * connected to the currently active input stream. 042 */ 043public abstract class ByteRangeInputStream extends FSInputStream { 044 045 /** 046 * This class wraps a URL and provides method to open connection. 047 * It can be overridden to change how a connection is opened. 048 */ 049 public static abstract class URLOpener { 050 protected URL url; 051 052 public URLOpener(URL u) { 053 url = u; 054 } 055 056 public void setURL(URL u) { 057 url = u; 058 } 059 060 public URL getURL() { 061 return url; 062 } 063 064 /** Connect to server with a data offset. */ 065 protected abstract HttpURLConnection connect(final long offset, 066 final boolean resolved) throws IOException; 067 } 068 069 enum StreamStatus { 070 NORMAL, SEEK, CLOSED 071 } 072 protected InputStream in; 073 protected final URLOpener originalURL; 074 protected final URLOpener resolvedURL; 075 protected long startPos = 0; 076 protected long currentPos = 0; 077 protected Long fileLength = null; 078 079 StreamStatus status = StreamStatus.SEEK; 080 081 /** 082 * Create with the specified URLOpeners. Original url is used to open the 083 * stream for the first time. Resolved url is used in subsequent requests. 084 * @param o Original url 085 * @param r Resolved url 086 */ 087 public ByteRangeInputStream(URLOpener o, URLOpener r) throws IOException { 088 this.originalURL = o; 089 this.resolvedURL = r; 090 getInputStream(); 091 } 092 093 protected abstract URL getResolvedUrl(final HttpURLConnection connection 094 ) throws IOException; 095 096 @VisibleForTesting 097 protected InputStream getInputStream() throws IOException { 098 switch (status) { 099 case NORMAL: 100 break; 101 case SEEK: 102 if (in != null) { 103 in.close(); 104 } 105 in = openInputStream(); 106 status = StreamStatus.NORMAL; 107 break; 108 case CLOSED: 109 throw new IOException("Stream closed"); 110 } 111 return in; 112 } 113 114 @VisibleForTesting 115 protected InputStream openInputStream() throws IOException { 116 // Use the original url if no resolved url exists, eg. if 117 // it's the first time a request is made. 118 final boolean resolved = resolvedURL.getURL() != null; 119 final URLOpener opener = resolved? resolvedURL: originalURL; 120 121 final HttpURLConnection connection = opener.connect(startPos, resolved); 122 resolvedURL.setURL(getResolvedUrl(connection)); 123 124 InputStream in = connection.getInputStream(); 125 final Map<String, List<String>> headers = connection.getHeaderFields(); 126 if (isChunkedTransferEncoding(headers)) { 127 // file length is not known 128 fileLength = null; 129 } else { 130 // for non-chunked transfer-encoding, get content-length 131 long streamlength = getStreamLength(connection, headers); 132 fileLength = startPos + streamlength; 133 134 // Java has a bug with >2GB request streams. It won't bounds check 135 // the reads so the transfer blocks until the server times out 136 in = new BoundedInputStream(in, streamlength); 137 } 138 139 return in; 140 } 141 142 private static long getStreamLength(HttpURLConnection connection, 143 Map<String, List<String>> headers) throws IOException { 144 String cl = connection.getHeaderField(HttpHeaders.CONTENT_LENGTH); 145 if (cl == null) { 146 // Try to get the content length by parsing the content range 147 // because HftpFileSystem does not return the content length 148 // if the content is partial. 149 if (connection.getResponseCode() == HttpStatus.SC_PARTIAL_CONTENT) { 150 cl = connection.getHeaderField(HttpHeaders.CONTENT_RANGE); 151 return getLengthFromRange(cl); 152 } else { 153 throw new IOException(HttpHeaders.CONTENT_LENGTH + " is missing: " 154 + headers); 155 } 156 } 157 return Long.parseLong(cl); 158 } 159 160 private static long getLengthFromRange(String cl) throws IOException { 161 try { 162 163 String[] str = cl.substring(6).split("[-/]"); 164 return Long.parseLong(str[1]) - Long.parseLong(str[0]) + 1; 165 } catch (Exception e) { 166 throw new IOException( 167 "failed to get content length by parsing the content range: " + cl 168 + " " + e.getMessage()); 169 } 170 } 171 172 private static boolean isChunkedTransferEncoding( 173 final Map<String, List<String>> headers) { 174 return contains(headers, HttpHeaders.TRANSFER_ENCODING, "chunked") 175 || contains(headers, HttpHeaders.TE, "chunked"); 176 } 177 178 /** Does the HTTP header map contain the given key, value pair? */ 179 private static boolean contains(final Map<String, List<String>> headers, 180 final String key, final String value) { 181 final List<String> values = headers.get(key); 182 if (values != null) { 183 for(String v : values) { 184 for(final StringTokenizer t = new StringTokenizer(v, ","); 185 t.hasMoreTokens(); ) { 186 if (value.equalsIgnoreCase(t.nextToken())) { 187 return true; 188 } 189 } 190 } 191 } 192 return false; 193 } 194 195 private int update(final int n) throws IOException { 196 if (n != -1) { 197 currentPos += n; 198 } else if (fileLength != null && currentPos < fileLength) { 199 throw new IOException("Got EOF but currentPos = " + currentPos 200 + " < filelength = " + fileLength); 201 } 202 return n; 203 } 204 205 @Override 206 public int read() throws IOException { 207 final int b = getInputStream().read(); 208 update((b == -1) ? -1 : 1); 209 return b; 210 } 211 212 @Override 213 public int read(byte b[], int off, int len) throws IOException { 214 return update(getInputStream().read(b, off, len)); 215 } 216 217 /** 218 * Seek to the given offset from the start of the file. 219 * The next read() will be from that location. Can't 220 * seek past the end of the file. 221 */ 222 @Override 223 public void seek(long pos) throws IOException { 224 if (pos != currentPos) { 225 startPos = pos; 226 currentPos = pos; 227 if (status != StreamStatus.CLOSED) { 228 status = StreamStatus.SEEK; 229 } 230 } 231 } 232 233 /** 234 * Return the current offset from the start of the file 235 */ 236 @Override 237 public long getPos() throws IOException { 238 return currentPos; 239 } 240 241 /** 242 * Seeks a different copy of the data. Returns true if 243 * found a new source, false otherwise. 244 */ 245 @Override 246 public boolean seekToNewSource(long targetPos) throws IOException { 247 return false; 248 } 249 250 @Override 251 public void close() throws IOException { 252 if (in != null) { 253 in.close(); 254 in = null; 255 } 256 status = StreamStatus.CLOSED; 257 } 258}