001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.hdfs.web; 020 021 import java.io.IOException; 022 import java.io.InputStream; 023 import java.net.HttpURLConnection; 024 import java.net.URL; 025 import java.util.List; 026 import java.util.Map; 027 import java.util.StringTokenizer; 028 029 import org.apache.commons.io.input.BoundedInputStream; 030 import org.apache.hadoop.fs.FSInputStream; 031 032 import com.google.common.annotations.VisibleForTesting; 033 import com.google.common.net.HttpHeaders; 034 035 /** 036 * To support HTTP byte streams, a new connection to an HTTP server needs to be 037 * created each time. This class hides the complexity of those multiple 038 * connections from the client. Whenever seek() is called, a new connection 039 * is made on the successive read(). The normal input stream functions are 040 * connected to the currently active input stream. 041 */ 042 public abstract class ByteRangeInputStream extends FSInputStream { 043 044 /** 045 * This class wraps a URL and provides method to open connection. 046 * It can be overridden to change how a connection is opened. 047 */ 048 public static abstract class URLOpener { 049 protected URL url; 050 051 public URLOpener(URL u) { 052 url = u; 053 } 054 055 public void setURL(URL u) { 056 url = u; 057 } 058 059 public URL getURL() { 060 return url; 061 } 062 063 /** Connect to server with a data offset. */ 064 protected abstract HttpURLConnection connect(final long offset, 065 final boolean resolved) throws IOException; 066 } 067 068 enum StreamStatus { 069 NORMAL, SEEK, CLOSED 070 } 071 protected InputStream in; 072 protected URLOpener originalURL; 073 protected URLOpener resolvedURL; 074 protected long startPos = 0; 075 protected long currentPos = 0; 076 protected Long fileLength = null; 077 078 StreamStatus status = StreamStatus.SEEK; 079 080 /** 081 * Create with the specified URLOpeners. Original url is used to open the 082 * stream for the first time. Resolved url is used in subsequent requests. 083 * @param o Original url 084 * @param r Resolved url 085 */ 086 public ByteRangeInputStream(URLOpener o, URLOpener r) { 087 this.originalURL = o; 088 this.resolvedURL = r; 089 } 090 091 protected abstract URL getResolvedUrl(final HttpURLConnection connection 092 ) throws IOException; 093 094 @VisibleForTesting 095 protected InputStream getInputStream() throws IOException { 096 switch (status) { 097 case NORMAL: 098 break; 099 case SEEK: 100 if (in != null) { 101 in.close(); 102 } 103 in = openInputStream(); 104 status = StreamStatus.NORMAL; 105 break; 106 case CLOSED: 107 throw new IOException("Stream closed"); 108 } 109 return in; 110 } 111 112 @VisibleForTesting 113 protected InputStream openInputStream() throws IOException { 114 // Use the original url if no resolved url exists, eg. if 115 // it's the first time a request is made. 116 final boolean resolved = resolvedURL.getURL() != null; 117 final URLOpener opener = resolved? resolvedURL: originalURL; 118 119 final HttpURLConnection connection = opener.connect(startPos, resolved); 120 resolvedURL.setURL(getResolvedUrl(connection)); 121 122 InputStream in = connection.getInputStream(); 123 final Map<String, List<String>> headers = connection.getHeaderFields(); 124 if (isChunkedTransferEncoding(headers)) { 125 // file length is not known 126 fileLength = null; 127 } else { 128 // for non-chunked transfer-encoding, get content-length 129 final String cl = connection.getHeaderField(HttpHeaders.CONTENT_LENGTH); 130 if (cl == null) { 131 throw new IOException(HttpHeaders.CONTENT_LENGTH + " is missing: " 132 + headers); 133 } 134 final long streamlength = Long.parseLong(cl); 135 fileLength = startPos + streamlength; 136 137 // Java has a bug with >2GB request streams. It won't bounds check 138 // the reads so the transfer blocks until the server times out 139 in = new BoundedInputStream(in, streamlength); 140 } 141 142 return in; 143 } 144 145 private static boolean isChunkedTransferEncoding( 146 final Map<String, List<String>> headers) { 147 return contains(headers, HttpHeaders.TRANSFER_ENCODING, "chunked") 148 || contains(headers, HttpHeaders.TE, "chunked"); 149 } 150 151 /** Does the HTTP header map contain the given key, value pair? */ 152 private static boolean contains(final Map<String, List<String>> headers, 153 final String key, final String value) { 154 final List<String> values = headers.get(key); 155 if (values != null) { 156 for(String v : values) { 157 for(final StringTokenizer t = new StringTokenizer(v, ","); 158 t.hasMoreTokens(); ) { 159 if (value.equalsIgnoreCase(t.nextToken())) { 160 return true; 161 } 162 } 163 } 164 } 165 return false; 166 } 167 168 private int update(final int n) throws IOException { 169 if (n != -1) { 170 currentPos += n; 171 } else if (fileLength != null && currentPos < fileLength) { 172 throw new IOException("Got EOF but currentPos = " + currentPos 173 + " < filelength = " + fileLength); 174 } 175 return n; 176 } 177 178 @Override 179 public int read() throws IOException { 180 final int b = getInputStream().read(); 181 update((b == -1) ? -1 : 1); 182 return b; 183 } 184 185 @Override 186 public int read(byte b[], int off, int len) throws IOException { 187 return update(getInputStream().read(b, off, len)); 188 } 189 190 /** 191 * Seek to the given offset from the start of the file. 192 * The next read() will be from that location. Can't 193 * seek past the end of the file. 194 */ 195 @Override 196 public void seek(long pos) throws IOException { 197 if (pos != currentPos) { 198 startPos = pos; 199 currentPos = pos; 200 if (status != StreamStatus.CLOSED) { 201 status = StreamStatus.SEEK; 202 } 203 } 204 } 205 206 /** 207 * Return the current offset from the start of the file 208 */ 209 @Override 210 public long getPos() throws IOException { 211 return currentPos; 212 } 213 214 /** 215 * Seeks a different copy of the data. Returns true if 216 * found a new source, false otherwise. 217 */ 218 @Override 219 public boolean seekToNewSource(long targetPos) throws IOException { 220 return false; 221 } 222 223 @Override 224 public void close() throws IOException { 225 if (in != null) { 226 in.close(); 227 in = null; 228 } 229 status = StreamStatus.CLOSED; 230 } 231 }