001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.hdfs.web; 020 021import java.io.IOException; 022import java.io.InputStream; 023import java.net.HttpURLConnection; 024import java.net.URL; 025import java.util.List; 026import java.util.Map; 027import java.util.StringTokenizer; 028 029import org.apache.commons.io.input.BoundedInputStream; 030import org.apache.hadoop.fs.FSInputStream; 031 032import com.google.common.annotations.VisibleForTesting; 033import com.google.common.net.HttpHeaders; 034 035/** 036 * To support HTTP byte streams, a new connection to an HTTP server needs to be 037 * created each time. This class hides the complexity of those multiple 038 * connections from the client. Whenever seek() is called, a new connection 039 * is made on the successive read(). The normal input stream functions are 040 * connected to the currently active input stream. 041 */ 042public abstract class ByteRangeInputStream extends FSInputStream { 043 044 /** 045 * This class wraps a URL and provides method to open connection. 046 * It can be overridden to change how a connection is opened. 047 */ 048 public static abstract class URLOpener { 049 protected URL url; 050 051 public URLOpener(URL u) { 052 url = u; 053 } 054 055 public void setURL(URL u) { 056 url = u; 057 } 058 059 public URL getURL() { 060 return url; 061 } 062 063 /** Connect to server with a data offset. */ 064 protected abstract HttpURLConnection connect(final long offset, 065 final boolean resolved) throws IOException; 066 } 067 068 enum StreamStatus { 069 NORMAL, SEEK, CLOSED 070 } 071 protected InputStream in; 072 protected final URLOpener originalURL; 073 protected final URLOpener resolvedURL; 074 protected long startPos = 0; 075 protected long currentPos = 0; 076 protected Long fileLength = null; 077 078 StreamStatus status = StreamStatus.SEEK; 079 080 /** 081 * Create with the specified URLOpeners. Original url is used to open the 082 * stream for the first time. Resolved url is used in subsequent requests. 083 * @param o Original url 084 * @param r Resolved url 085 */ 086 public ByteRangeInputStream(URLOpener o, URLOpener r) throws IOException { 087 this.originalURL = o; 088 this.resolvedURL = r; 089 getInputStream(); 090 } 091 092 protected abstract URL getResolvedUrl(final HttpURLConnection connection 093 ) throws IOException; 094 095 @VisibleForTesting 096 protected InputStream getInputStream() throws IOException { 097 switch (status) { 098 case NORMAL: 099 break; 100 case SEEK: 101 if (in != null) { 102 in.close(); 103 } 104 in = openInputStream(); 105 status = StreamStatus.NORMAL; 106 break; 107 case CLOSED: 108 throw new IOException("Stream closed"); 109 } 110 return in; 111 } 112 113 @VisibleForTesting 114 protected InputStream openInputStream() throws IOException { 115 // Use the original url if no resolved url exists, eg. if 116 // it's the first time a request is made. 117 final boolean resolved = resolvedURL.getURL() != null; 118 final URLOpener opener = resolved? resolvedURL: originalURL; 119 120 final HttpURLConnection connection = opener.connect(startPos, resolved); 121 resolvedURL.setURL(getResolvedUrl(connection)); 122 123 InputStream in = connection.getInputStream(); 124 final Map<String, List<String>> headers = connection.getHeaderFields(); 125 if (isChunkedTransferEncoding(headers)) { 126 // file length is not known 127 fileLength = null; 128 } else { 129 // for non-chunked transfer-encoding, get content-length 130 final String cl = connection.getHeaderField(HttpHeaders.CONTENT_LENGTH); 131 if (cl == null) { 132 throw new IOException(HttpHeaders.CONTENT_LENGTH + " is missing: " 133 + headers); 134 } 135 final long streamlength = Long.parseLong(cl); 136 fileLength = startPos + streamlength; 137 138 // Java has a bug with >2GB request streams. It won't bounds check 139 // the reads so the transfer blocks until the server times out 140 in = new BoundedInputStream(in, streamlength); 141 } 142 143 return in; 144 } 145 146 private static boolean isChunkedTransferEncoding( 147 final Map<String, List<String>> headers) { 148 return contains(headers, HttpHeaders.TRANSFER_ENCODING, "chunked") 149 || contains(headers, HttpHeaders.TE, "chunked"); 150 } 151 152 /** Does the HTTP header map contain the given key, value pair? */ 153 private static boolean contains(final Map<String, List<String>> headers, 154 final String key, final String value) { 155 final List<String> values = headers.get(key); 156 if (values != null) { 157 for(String v : values) { 158 for(final StringTokenizer t = new StringTokenizer(v, ","); 159 t.hasMoreTokens(); ) { 160 if (value.equalsIgnoreCase(t.nextToken())) { 161 return true; 162 } 163 } 164 } 165 } 166 return false; 167 } 168 169 private int update(final int n) throws IOException { 170 if (n != -1) { 171 currentPos += n; 172 } else if (fileLength != null && currentPos < fileLength) { 173 throw new IOException("Got EOF but currentPos = " + currentPos 174 + " < filelength = " + fileLength); 175 } 176 return n; 177 } 178 179 @Override 180 public int read() throws IOException { 181 final int b = getInputStream().read(); 182 update((b == -1) ? -1 : 1); 183 return b; 184 } 185 186 @Override 187 public int read(byte b[], int off, int len) throws IOException { 188 return update(getInputStream().read(b, off, len)); 189 } 190 191 /** 192 * Seek to the given offset from the start of the file. 193 * The next read() will be from that location. Can't 194 * seek past the end of the file. 195 */ 196 @Override 197 public void seek(long pos) throws IOException { 198 if (pos != currentPos) { 199 startPos = pos; 200 currentPos = pos; 201 if (status != StreamStatus.CLOSED) { 202 status = StreamStatus.SEEK; 203 } 204 } 205 } 206 207 /** 208 * Return the current offset from the start of the file 209 */ 210 @Override 211 public long getPos() throws IOException { 212 return currentPos; 213 } 214 215 /** 216 * Seeks a different copy of the data. Returns true if 217 * found a new source, false otherwise. 218 */ 219 @Override 220 public boolean seekToNewSource(long targetPos) throws IOException { 221 return false; 222 } 223 224 @Override 225 public void close() throws IOException { 226 if (in != null) { 227 in.close(); 228 in = null; 229 } 230 status = StreamStatus.CLOSED; 231 } 232}