001/** 002res * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.hdfs.web; 020 021import java.io.BufferedInputStream; 022import java.io.BufferedOutputStream; 023import java.io.EOFException; 024import java.io.FileNotFoundException; 025import java.io.IOException; 026import java.io.InputStream; 027import java.lang.reflect.InvocationTargetException; 028import java.net.HttpURLConnection; 029import java.net.InetSocketAddress; 030import java.net.MalformedURLException; 031import java.net.URI; 032import java.net.URL; 033import java.security.PrivilegedExceptionAction; 034import java.util.ArrayList; 035import java.util.EnumSet; 036import java.util.List; 037import java.util.Map; 038import java.util.StringTokenizer; 039 040import javax.ws.rs.core.HttpHeaders; 041import javax.ws.rs.core.MediaType; 042 043import org.apache.commons.io.IOUtils; 044import org.apache.commons.io.input.BoundedInputStream; 045import org.apache.commons.logging.Log; 046import org.apache.commons.logging.LogFactory; 047import org.apache.hadoop.conf.Configuration; 048import org.apache.hadoop.fs.BlockLocation; 049import org.apache.hadoop.fs.CommonConfigurationKeys; 050import org.apache.hadoop.fs.ContentSummary; 051import org.apache.hadoop.fs.DelegationTokenRenewer; 052import org.apache.hadoop.fs.FSDataInputStream; 053import org.apache.hadoop.fs.FSDataOutputStream; 054import org.apache.hadoop.fs.FSInputStream; 055import org.apache.hadoop.fs.FileStatus; 056import org.apache.hadoop.fs.FileSystem; 057import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum; 058import org.apache.hadoop.fs.Options; 059import org.apache.hadoop.fs.Path; 060import org.apache.hadoop.fs.XAttrCodec; 061import org.apache.hadoop.fs.XAttrSetFlag; 062import org.apache.hadoop.fs.permission.AclEntry; 063import org.apache.hadoop.fs.permission.AclStatus; 064import org.apache.hadoop.fs.permission.FsAction; 065import org.apache.hadoop.fs.permission.FsPermission; 066import org.apache.hadoop.hdfs.DFSConfigKeys; 067import org.apache.hadoop.hdfs.DFSUtil; 068import org.apache.hadoop.hdfs.HAUtil; 069import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; 070import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; 071import org.apache.hadoop.hdfs.server.namenode.SafeModeException; 072import org.apache.hadoop.hdfs.web.resources.*; 073import org.apache.hadoop.hdfs.web.resources.HttpOpParam.Op; 074import org.apache.hadoop.io.Text; 075import org.apache.hadoop.io.retry.RetryPolicies; 076import org.apache.hadoop.io.retry.RetryPolicy; 077import org.apache.hadoop.io.retry.RetryUtils; 078import org.apache.hadoop.ipc.RemoteException; 079import org.apache.hadoop.net.NetUtils; 080import org.apache.hadoop.security.AccessControlException; 081import org.apache.hadoop.security.SecurityUtil; 082import org.apache.hadoop.security.UserGroupInformation; 083import org.apache.hadoop.security.token.SecretManager.InvalidToken; 084import org.apache.hadoop.security.token.Token; 085import org.apache.hadoop.security.token.TokenIdentifier; 086import org.apache.hadoop.security.token.TokenSelector; 087import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSelector; 088import org.apache.hadoop.util.Progressable; 089import org.apache.hadoop.util.StringUtils; 090import org.codehaus.jackson.map.ObjectMapper; 091import org.codehaus.jackson.map.ObjectReader; 092 093import com.google.common.annotations.VisibleForTesting; 094import com.google.common.base.Preconditions; 095import com.google.common.collect.Lists; 096 097/** A FileSystem for HDFS over the web. */ 098public class WebHdfsFileSystem extends FileSystem 099 implements DelegationTokenRenewer.Renewable, TokenAspect.TokenManagementDelegator { 100 public static final Log LOG = LogFactory.getLog(WebHdfsFileSystem.class); 101 /** File System URI: {SCHEME}://namenode:port/path/to/file */ 102 public static final String SCHEME = "webhdfs"; 103 /** WebHdfs version. */ 104 public static final int VERSION = 1; 105 /** Http URI: http://namenode:port/{PATH_PREFIX}/path/to/file */ 106 public static final String PATH_PREFIX = "/" + SCHEME + "/v" + VERSION; 107 108 /** Default connection factory may be overridden in tests to use smaller timeout values */ 109 protected URLConnectionFactory connectionFactory; 110 111 /** Delegation token kind */ 112 public static final Text TOKEN_KIND = new Text("WEBHDFS delegation"); 113 114 @VisibleForTesting 115 public static final String CANT_FALLBACK_TO_INSECURE_MSG = 116 "The client is configured to only allow connecting to secure cluster"; 117 118 private boolean canRefreshDelegationToken; 119 120 private UserGroupInformation ugi; 121 private URI uri; 122 private Token<?> delegationToken; 123 protected Text tokenServiceName; 124 private RetryPolicy retryPolicy = null; 125 private Path workingDir; 126 private InetSocketAddress nnAddrs[]; 127 private int currentNNAddrIndex; 128 private boolean disallowFallbackToInsecureCluster; 129 private static final ObjectReader READER = 130 new ObjectMapper().reader(Map.class); 131 132 /** 133 * Return the protocol scheme for the FileSystem. 134 * <p/> 135 * 136 * @return <code>webhdfs</code> 137 */ 138 @Override 139 public String getScheme() { 140 return SCHEME; 141 } 142 143 /** 144 * return the underlying transport protocol (http / https). 145 */ 146 protected String getTransportScheme() { 147 return "http"; 148 } 149 150 protected Text getTokenKind() { 151 return TOKEN_KIND; 152 } 153 154 @Override 155 public synchronized void initialize(URI uri, Configuration conf 156 ) throws IOException { 157 super.initialize(uri, conf); 158 setConf(conf); 159 /** set user pattern based on configuration file */ 160 UserParam.setUserPattern(conf.get( 161 DFSConfigKeys.DFS_WEBHDFS_USER_PATTERN_KEY, 162 DFSConfigKeys.DFS_WEBHDFS_USER_PATTERN_DEFAULT)); 163 164 connectionFactory = URLConnectionFactory 165 .newDefaultURLConnectionFactory(conf); 166 167 ugi = UserGroupInformation.getCurrentUser(); 168 this.uri = URI.create(uri.getScheme() + "://" + uri.getAuthority()); 169 this.nnAddrs = resolveNNAddr(); 170 171 boolean isHA = HAUtil.isClientFailoverConfigured(conf, this.uri); 172 boolean isLogicalUri = isHA && HAUtil.isLogicalUri(conf, this.uri); 173 // In non-HA or non-logical URI case, the code needs to call 174 // getCanonicalUri() in order to handle the case where no port is 175 // specified in the URI 176 this.tokenServiceName = isLogicalUri ? 177 HAUtil.buildTokenServiceForLogicalUri(uri, getScheme()) 178 : SecurityUtil.buildTokenService(getCanonicalUri()); 179 180 if (!isHA) { 181 this.retryPolicy = 182 RetryUtils.getDefaultRetryPolicy( 183 conf, 184 DFSConfigKeys.DFS_HTTP_CLIENT_RETRY_POLICY_ENABLED_KEY, 185 DFSConfigKeys.DFS_HTTP_CLIENT_RETRY_POLICY_ENABLED_DEFAULT, 186 DFSConfigKeys.DFS_HTTP_CLIENT_RETRY_POLICY_SPEC_KEY, 187 DFSConfigKeys.DFS_HTTP_CLIENT_RETRY_POLICY_SPEC_DEFAULT, 188 SafeModeException.class); 189 } else { 190 191 int maxFailoverAttempts = conf.getInt( 192 DFSConfigKeys.DFS_HTTP_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY, 193 DFSConfigKeys.DFS_HTTP_CLIENT_FAILOVER_MAX_ATTEMPTS_DEFAULT); 194 int maxRetryAttempts = conf.getInt( 195 DFSConfigKeys.DFS_HTTP_CLIENT_RETRY_MAX_ATTEMPTS_KEY, 196 DFSConfigKeys.DFS_HTTP_CLIENT_RETRY_MAX_ATTEMPTS_DEFAULT); 197 int failoverSleepBaseMillis = conf.getInt( 198 DFSConfigKeys.DFS_HTTP_CLIENT_FAILOVER_SLEEPTIME_BASE_KEY, 199 DFSConfigKeys.DFS_HTTP_CLIENT_FAILOVER_SLEEPTIME_BASE_DEFAULT); 200 int failoverSleepMaxMillis = conf.getInt( 201 DFSConfigKeys.DFS_HTTP_CLIENT_FAILOVER_SLEEPTIME_MAX_KEY, 202 DFSConfigKeys.DFS_HTTP_CLIENT_FAILOVER_SLEEPTIME_MAX_DEFAULT); 203 204 this.retryPolicy = RetryPolicies 205 .failoverOnNetworkException(RetryPolicies.TRY_ONCE_THEN_FAIL, 206 maxFailoverAttempts, maxRetryAttempts, failoverSleepBaseMillis, 207 failoverSleepMaxMillis); 208 } 209 210 this.workingDir = getHomeDirectory(); 211 this.canRefreshDelegationToken = UserGroupInformation.isSecurityEnabled(); 212 this.disallowFallbackToInsecureCluster = !conf.getBoolean( 213 CommonConfigurationKeys.IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_KEY, 214 CommonConfigurationKeys.IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_DEFAULT); 215 this.delegationToken = null; 216 } 217 218 @Override 219 public URI getCanonicalUri() { 220 return super.getCanonicalUri(); 221 } 222 223 /** Is WebHDFS enabled in conf? */ 224 public static boolean isEnabled(final Configuration conf, final Log log) { 225 final boolean b = conf.getBoolean(DFSConfigKeys.DFS_WEBHDFS_ENABLED_KEY, 226 DFSConfigKeys.DFS_WEBHDFS_ENABLED_DEFAULT); 227 return b; 228 } 229 230 TokenSelector<DelegationTokenIdentifier> tokenSelector = 231 new AbstractDelegationTokenSelector<DelegationTokenIdentifier>(getTokenKind()){}; 232 233 // the first getAuthParams() for a non-token op will either get the 234 // internal token from the ugi or lazy fetch one 235 protected synchronized Token<?> getDelegationToken() throws IOException { 236 if (canRefreshDelegationToken && delegationToken == null) { 237 Token<?> token = tokenSelector.selectToken( 238 new Text(getCanonicalServiceName()), ugi.getTokens()); 239 // ugi tokens are usually indicative of a task which can't 240 // refetch tokens. even if ugi has credentials, don't attempt 241 // to get another token to match hdfs/rpc behavior 242 if (token != null) { 243 LOG.debug("Using UGI token: " + token); 244 canRefreshDelegationToken = false; 245 } else { 246 token = getDelegationToken(null); 247 if (token != null) { 248 LOG.debug("Fetched new token: " + token); 249 } else { // security is disabled 250 canRefreshDelegationToken = false; 251 } 252 } 253 setDelegationToken(token); 254 } 255 return delegationToken; 256 } 257 258 @VisibleForTesting 259 synchronized boolean replaceExpiredDelegationToken() throws IOException { 260 boolean replaced = false; 261 if (canRefreshDelegationToken) { 262 Token<?> token = getDelegationToken(null); 263 LOG.debug("Replaced expired token: " + token); 264 setDelegationToken(token); 265 replaced = (token != null); 266 } 267 return replaced; 268 } 269 270 @Override 271 @VisibleForTesting 272 public int getDefaultPort() { 273 return getConf().getInt(DFSConfigKeys.DFS_NAMENODE_HTTP_PORT_KEY, 274 DFSConfigKeys.DFS_NAMENODE_HTTP_PORT_DEFAULT); 275 } 276 277 @Override 278 public URI getUri() { 279 return this.uri; 280 } 281 282 @Override 283 protected URI canonicalizeUri(URI uri) { 284 return NetUtils.getCanonicalUri(uri, getDefaultPort()); 285 } 286 287 /** @return the home directory. */ 288 public static String getHomeDirectoryString(final UserGroupInformation ugi) { 289 return "/user/" + ugi.getShortUserName(); 290 } 291 292 @Override 293 public Path getHomeDirectory() { 294 return makeQualified(new Path(getHomeDirectoryString(ugi))); 295 } 296 297 @Override 298 public synchronized Path getWorkingDirectory() { 299 return workingDir; 300 } 301 302 @Override 303 public synchronized void setWorkingDirectory(final Path dir) { 304 String result = makeAbsolute(dir).toUri().getPath(); 305 if (!DFSUtil.isValidName(result)) { 306 throw new IllegalArgumentException("Invalid DFS directory name " + 307 result); 308 } 309 workingDir = makeAbsolute(dir); 310 } 311 312 private Path makeAbsolute(Path f) { 313 return f.isAbsolute()? f: new Path(workingDir, f); 314 } 315 316 static Map<?, ?> jsonParse(final HttpURLConnection c, final boolean useErrorStream 317 ) throws IOException { 318 if (c.getContentLength() == 0) { 319 return null; 320 } 321 final InputStream in = useErrorStream? c.getErrorStream(): c.getInputStream(); 322 if (in == null) { 323 throw new IOException("The " + (useErrorStream? "error": "input") + " stream is null."); 324 } 325 try { 326 final String contentType = c.getContentType(); 327 if (contentType != null) { 328 final MediaType parsed = MediaType.valueOf(contentType); 329 if (!MediaType.APPLICATION_JSON_TYPE.isCompatible(parsed)) { 330 throw new IOException("Content-Type \"" + contentType 331 + "\" is incompatible with \"" + MediaType.APPLICATION_JSON 332 + "\" (parsed=\"" + parsed + "\")"); 333 } 334 } 335 return READER.readValue(in); 336 } finally { 337 in.close(); 338 } 339 } 340 341 private static Map<?, ?> validateResponse(final HttpOpParam.Op op, 342 final HttpURLConnection conn, boolean unwrapException) throws IOException { 343 final int code = conn.getResponseCode(); 344 // server is demanding an authentication we don't support 345 if (code == HttpURLConnection.HTTP_UNAUTHORIZED) { 346 // match hdfs/rpc exception 347 throw new AccessControlException(conn.getResponseMessage()); 348 } 349 if (code != op.getExpectedHttpResponseCode()) { 350 final Map<?, ?> m; 351 try { 352 m = jsonParse(conn, true); 353 } catch(Exception e) { 354 throw new IOException("Unexpected HTTP response: code=" + code + " != " 355 + op.getExpectedHttpResponseCode() + ", " + op.toQueryString() 356 + ", message=" + conn.getResponseMessage(), e); 357 } 358 359 if (m == null) { 360 throw new IOException("Unexpected HTTP response: code=" + code + " != " 361 + op.getExpectedHttpResponseCode() + ", " + op.toQueryString() 362 + ", message=" + conn.getResponseMessage()); 363 } else if (m.get(RemoteException.class.getSimpleName()) == null) { 364 return m; 365 } 366 367 IOException re = JsonUtil.toRemoteException(m); 368 // extract UGI-related exceptions and unwrap InvalidToken 369 // the NN mangles these exceptions but the DN does not and may need 370 // to re-fetch a token if either report the token is expired 371 if (re.getMessage() != null && re.getMessage().startsWith( 372 SecurityUtil.FAILED_TO_GET_UGI_MSG_HEADER)) { 373 String[] parts = re.getMessage().split(":\\s+", 3); 374 re = new RemoteException(parts[1], parts[2]); 375 re = ((RemoteException)re).unwrapRemoteException(InvalidToken.class); 376 } 377 throw unwrapException? toIOException(re): re; 378 } 379 return null; 380 } 381 382 /** 383 * Covert an exception to an IOException. 384 * 385 * For a non-IOException, wrap it with IOException. 386 * For a RemoteException, unwrap it. 387 * For an IOException which is not a RemoteException, return it. 388 */ 389 private static IOException toIOException(Exception e) { 390 if (!(e instanceof IOException)) { 391 return new IOException(e); 392 } 393 394 final IOException ioe = (IOException)e; 395 if (!(ioe instanceof RemoteException)) { 396 return ioe; 397 } 398 399 return ((RemoteException)ioe).unwrapRemoteException(); 400 } 401 402 private synchronized InetSocketAddress getCurrentNNAddr() { 403 return nnAddrs[currentNNAddrIndex]; 404 } 405 406 /** 407 * Reset the appropriate state to gracefully fail over to another name node 408 */ 409 private synchronized void resetStateToFailOver() { 410 currentNNAddrIndex = (currentNNAddrIndex + 1) % nnAddrs.length; 411 } 412 413 /** 414 * Return a URL pointing to given path on the namenode. 415 * 416 * @param path to obtain the URL for 417 * @param query string to append to the path 418 * @return namenode URL referring to the given path 419 * @throws IOException on error constructing the URL 420 */ 421 private URL getNamenodeURL(String path, String query) throws IOException { 422 InetSocketAddress nnAddr = getCurrentNNAddr(); 423 final URL url = new URL(getTransportScheme(), nnAddr.getHostName(), 424 nnAddr.getPort(), path + '?' + query); 425 if (LOG.isTraceEnabled()) { 426 LOG.trace("url=" + url); 427 } 428 return url; 429 } 430 431 Param<?,?>[] getAuthParameters(final HttpOpParam.Op op) throws IOException { 432 List<Param<?,?>> authParams = Lists.newArrayList(); 433 // Skip adding delegation token for token operations because these 434 // operations require authentication. 435 Token<?> token = null; 436 if (!op.getRequireAuth()) { 437 token = getDelegationToken(); 438 } 439 if (token != null) { 440 authParams.add(new DelegationParam(token.encodeToUrlString())); 441 } else { 442 UserGroupInformation userUgi = ugi; 443 UserGroupInformation realUgi = userUgi.getRealUser(); 444 if (realUgi != null) { // proxy user 445 authParams.add(new DoAsParam(userUgi.getShortUserName())); 446 userUgi = realUgi; 447 } 448 authParams.add(new UserParam(userUgi.getShortUserName())); 449 } 450 return authParams.toArray(new Param<?,?>[0]); 451 } 452 453 URL toUrl(final HttpOpParam.Op op, final Path fspath, 454 final Param<?,?>... parameters) throws IOException { 455 //initialize URI path and query 456 final String path = PATH_PREFIX 457 + (fspath == null? "/": makeQualified(fspath).toUri().getRawPath()); 458 final String query = op.toQueryString() 459 + Param.toSortedString("&", getAuthParameters(op)) 460 + Param.toSortedString("&", parameters); 461 final URL url = getNamenodeURL(path, query); 462 if (LOG.isTraceEnabled()) { 463 LOG.trace("url=" + url); 464 } 465 return url; 466 } 467 468 /** 469 * This class is for initialing a HTTP connection, connecting to server, 470 * obtaining a response, and also handling retry on failures. 471 */ 472 abstract class AbstractRunner<T> { 473 abstract protected URL getUrl() throws IOException; 474 475 protected final HttpOpParam.Op op; 476 private final boolean redirected; 477 protected ExcludeDatanodesParam excludeDatanodes = new ExcludeDatanodesParam(""); 478 479 private boolean checkRetry; 480 private String redirectHost; 481 482 protected AbstractRunner(final HttpOpParam.Op op, boolean redirected) { 483 this.op = op; 484 this.redirected = redirected; 485 } 486 487 T run() throws IOException { 488 UserGroupInformation connectUgi = ugi.getRealUser(); 489 if (connectUgi == null) { 490 connectUgi = ugi; 491 } 492 if (op.getRequireAuth()) { 493 connectUgi.checkTGTAndReloginFromKeytab(); 494 } 495 try { 496 // the entire lifecycle of the connection must be run inside the 497 // doAs to ensure authentication is performed correctly 498 return connectUgi.doAs( 499 new PrivilegedExceptionAction<T>() { 500 @Override 501 public T run() throws IOException { 502 return runWithRetry(); 503 } 504 }); 505 } catch (InterruptedException e) { 506 throw new IOException(e); 507 } 508 } 509 510 /** 511 * Two-step requests redirected to a DN 512 * 513 * Create/Append: 514 * Step 1) Submit a Http request with neither auto-redirect nor data. 515 * Step 2) Submit another Http request with the URL from the Location header with data. 516 * 517 * The reason of having two-step create/append is for preventing clients to 518 * send out the data before the redirect. This issue is addressed by the 519 * "Expect: 100-continue" header in HTTP/1.1; see RFC 2616, Section 8.2.3. 520 * Unfortunately, there are software library bugs (e.g. Jetty 6 http server 521 * and Java 6 http client), which do not correctly implement "Expect: 522 * 100-continue". The two-step create/append is a temporary workaround for 523 * the software library bugs. 524 * 525 * Open/Checksum 526 * Also implements two-step connects for other operations redirected to 527 * a DN such as open and checksum 528 */ 529 protected HttpURLConnection connect(URL url) throws IOException { 530 //redirect hostname and port 531 redirectHost = null; 532 533 534 // resolve redirects for a DN operation unless already resolved 535 if (op.getRedirect() && !redirected) { 536 final HttpOpParam.Op redirectOp = 537 HttpOpParam.TemporaryRedirectOp.valueOf(op); 538 final HttpURLConnection conn = connect(redirectOp, url); 539 // application level proxy like httpfs might not issue a redirect 540 if (conn.getResponseCode() == op.getExpectedHttpResponseCode()) { 541 return conn; 542 } 543 try { 544 validateResponse(redirectOp, conn, false); 545 url = new URL(conn.getHeaderField("Location")); 546 redirectHost = url.getHost() + ":" + url.getPort(); 547 } finally { 548 conn.disconnect(); 549 } 550 } 551 try { 552 return connect(op, url); 553 } catch (IOException ioe) { 554 if (redirectHost != null) { 555 if (excludeDatanodes.getValue() != null) { 556 excludeDatanodes = new ExcludeDatanodesParam(redirectHost + "," 557 + excludeDatanodes.getValue()); 558 } else { 559 excludeDatanodes = new ExcludeDatanodesParam(redirectHost); 560 } 561 } 562 throw ioe; 563 } 564 } 565 566 private HttpURLConnection connect(final HttpOpParam.Op op, final URL url) 567 throws IOException { 568 final HttpURLConnection conn = 569 (HttpURLConnection)connectionFactory.openConnection(url); 570 final boolean doOutput = op.getDoOutput(); 571 conn.setRequestMethod(op.getType().toString()); 572 conn.setInstanceFollowRedirects(false); 573 switch (op.getType()) { 574 // if not sending a message body for a POST or PUT operation, need 575 // to ensure the server/proxy knows this 576 case POST: 577 case PUT: { 578 conn.setDoOutput(true); 579 if (!doOutput) { 580 // explicitly setting content-length to 0 won't do spnego!! 581 // opening and closing the stream will send "Content-Length: 0" 582 conn.getOutputStream().close(); 583 } else { 584 conn.setRequestProperty("Content-Type", 585 MediaType.APPLICATION_OCTET_STREAM); 586 conn.setChunkedStreamingMode(32 << 10); //32kB-chunk 587 } 588 break; 589 } 590 default: { 591 conn.setDoOutput(doOutput); 592 break; 593 } 594 } 595 conn.connect(); 596 return conn; 597 } 598 599 private T runWithRetry() throws IOException { 600 /** 601 * Do the real work. 602 * 603 * There are three cases that the code inside the loop can throw an 604 * IOException: 605 * 606 * <ul> 607 * <li>The connection has failed (e.g., ConnectException, 608 * @see FailoverOnNetworkExceptionRetry for more details)</li> 609 * <li>The namenode enters the standby state (i.e., StandbyException).</li> 610 * <li>The server returns errors for the command (i.e., RemoteException)</li> 611 * </ul> 612 * 613 * The call to shouldRetry() will conduct the retry policy. The policy 614 * examines the exception and swallows it if it decides to rerun the work. 615 */ 616 for(int retry = 0; ; retry++) { 617 checkRetry = !redirected; 618 final URL url = getUrl(); 619 try { 620 final HttpURLConnection conn = connect(url); 621 // output streams will validate on close 622 if (!op.getDoOutput()) { 623 validateResponse(op, conn, false); 624 } 625 return getResponse(conn); 626 } catch (AccessControlException ace) { 627 // no retries for auth failures 628 throw ace; 629 } catch (InvalidToken it) { 630 // try to replace the expired token with a new one. the attempt 631 // to acquire a new token must be outside this operation's retry 632 // so if it fails after its own retries, this operation fails too. 633 if (op.getRequireAuth() || !replaceExpiredDelegationToken()) { 634 throw it; 635 } 636 } catch (IOException ioe) { 637 // Attempt to include the redirected node in the exception. If the 638 // attempt to recreate the exception fails, just use the original. 639 String node = redirectHost; 640 if (node == null) { 641 node = url.getAuthority(); 642 } 643 try { 644 IOException newIoe = ioe.getClass().getConstructor(String.class) 645 .newInstance(node + ": " + ioe.getMessage()); 646 newIoe.setStackTrace(ioe.getStackTrace()); 647 ioe = newIoe; 648 } catch (NoSuchMethodException | SecurityException 649 | InstantiationException | IllegalAccessException 650 | IllegalArgumentException | InvocationTargetException e) { 651 } 652 shouldRetry(ioe, retry); 653 } 654 } 655 } 656 657 private void shouldRetry(final IOException ioe, final int retry 658 ) throws IOException { 659 InetSocketAddress nnAddr = getCurrentNNAddr(); 660 if (checkRetry) { 661 try { 662 final RetryPolicy.RetryAction a = retryPolicy.shouldRetry( 663 ioe, retry, 0, true); 664 665 boolean isRetry = a.action == RetryPolicy.RetryAction.RetryDecision.RETRY; 666 boolean isFailoverAndRetry = 667 a.action == RetryPolicy.RetryAction.RetryDecision.FAILOVER_AND_RETRY; 668 669 if (isRetry || isFailoverAndRetry) { 670 LOG.info("Retrying connect to namenode: " + nnAddr 671 + ". Already tried " + retry + " time(s); retry policy is " 672 + retryPolicy + ", delay " + a.delayMillis + "ms."); 673 674 if (isFailoverAndRetry) { 675 resetStateToFailOver(); 676 } 677 678 Thread.sleep(a.delayMillis); 679 return; 680 } 681 } catch(Exception e) { 682 LOG.warn("Original exception is ", ioe); 683 throw toIOException(e); 684 } 685 } 686 throw toIOException(ioe); 687 } 688 689 abstract T getResponse(HttpURLConnection conn) throws IOException; 690 } 691 692 /** 693 * Abstract base class to handle path-based operations with params 694 */ 695 abstract class AbstractFsPathRunner<T> extends AbstractRunner<T> { 696 private final Path fspath; 697 private Param<?,?>[] parameters; 698 699 AbstractFsPathRunner(final HttpOpParam.Op op, final Path fspath, 700 Param<?,?>... parameters) { 701 super(op, false); 702 this.fspath = fspath; 703 this.parameters = parameters; 704 } 705 706 AbstractFsPathRunner(final HttpOpParam.Op op, Param<?,?>[] parameters, 707 final Path fspath) { 708 super(op, false); 709 this.fspath = fspath; 710 this.parameters = parameters; 711 } 712 713 protected void updateURLParameters(Param<?, ?>... p) { 714 this.parameters = p; 715 } 716 717 @Override 718 protected URL getUrl() throws IOException { 719 if (excludeDatanodes.getValue() != null) { 720 Param<?, ?>[] tmpParam = new Param<?, ?>[parameters.length + 1]; 721 System.arraycopy(parameters, 0, tmpParam, 0, parameters.length); 722 tmpParam[parameters.length] = excludeDatanodes; 723 return toUrl(op, fspath, tmpParam); 724 } else { 725 return toUrl(op, fspath, parameters); 726 } 727 } 728 } 729 730 /** 731 * Default path-based implementation expects no json response 732 */ 733 class FsPathRunner extends AbstractFsPathRunner<Void> { 734 FsPathRunner(Op op, Path fspath, Param<?,?>... parameters) { 735 super(op, fspath, parameters); 736 } 737 738 @Override 739 Void getResponse(HttpURLConnection conn) throws IOException { 740 return null; 741 } 742 } 743 744 /** 745 * Handle path-based operations with a json response 746 */ 747 abstract class FsPathResponseRunner<T> extends AbstractFsPathRunner<T> { 748 FsPathResponseRunner(final HttpOpParam.Op op, final Path fspath, 749 Param<?,?>... parameters) { 750 super(op, fspath, parameters); 751 } 752 753 FsPathResponseRunner(final HttpOpParam.Op op, Param<?,?>[] parameters, 754 final Path fspath) { 755 super(op, parameters, fspath); 756 } 757 758 @Override 759 final T getResponse(HttpURLConnection conn) throws IOException { 760 try { 761 final Map<?,?> json = jsonParse(conn, false); 762 if (json == null) { 763 // match exception class thrown by parser 764 throw new IllegalStateException("Missing response"); 765 } 766 return decodeResponse(json); 767 } catch (IOException ioe) { 768 throw ioe; 769 } catch (Exception e) { // catch json parser errors 770 final IOException ioe = 771 new IOException("Response decoding failure: "+e.toString(), e); 772 if (LOG.isDebugEnabled()) { 773 LOG.debug(ioe); 774 } 775 throw ioe; 776 } finally { 777 conn.disconnect(); 778 } 779 } 780 781 abstract T decodeResponse(Map<?,?> json) throws IOException; 782 } 783 784 /** 785 * Handle path-based operations with json boolean response 786 */ 787 class FsPathBooleanRunner extends FsPathResponseRunner<Boolean> { 788 FsPathBooleanRunner(Op op, Path fspath, Param<?,?>... parameters) { 789 super(op, fspath, parameters); 790 } 791 792 @Override 793 Boolean decodeResponse(Map<?,?> json) throws IOException { 794 return (Boolean)json.get("boolean"); 795 } 796 } 797 798 /** 799 * Handle create/append output streams 800 */ 801 class FsPathOutputStreamRunner extends AbstractFsPathRunner<FSDataOutputStream> { 802 private final int bufferSize; 803 804 FsPathOutputStreamRunner(Op op, Path fspath, int bufferSize, 805 Param<?,?>... parameters) { 806 super(op, fspath, parameters); 807 this.bufferSize = bufferSize; 808 } 809 810 @Override 811 FSDataOutputStream getResponse(final HttpURLConnection conn) 812 throws IOException { 813 return new FSDataOutputStream(new BufferedOutputStream( 814 conn.getOutputStream(), bufferSize), statistics) { 815 @Override 816 public void close() throws IOException { 817 try { 818 super.close(); 819 } finally { 820 try { 821 validateResponse(op, conn, true); 822 } finally { 823 conn.disconnect(); 824 } 825 } 826 } 827 }; 828 } 829 } 830 831 class FsPathConnectionRunner extends AbstractFsPathRunner<HttpURLConnection> { 832 FsPathConnectionRunner(Op op, Path fspath, Param<?,?>... parameters) { 833 super(op, fspath, parameters); 834 } 835 @Override 836 HttpURLConnection getResponse(final HttpURLConnection conn) 837 throws IOException { 838 return conn; 839 } 840 } 841 842 /** 843 * Used by open() which tracks the resolved url itself 844 */ 845 final class URLRunner extends AbstractRunner<HttpURLConnection> { 846 private final URL url; 847 @Override 848 protected URL getUrl() { 849 return url; 850 } 851 852 protected URLRunner(final HttpOpParam.Op op, final URL url, boolean redirected) { 853 super(op, redirected); 854 this.url = url; 855 } 856 857 @Override 858 HttpURLConnection getResponse(HttpURLConnection conn) throws IOException { 859 return conn; 860 } 861 } 862 863 private FsPermission applyUMask(FsPermission permission) { 864 if (permission == null) { 865 permission = FsPermission.getDefault(); 866 } 867 return permission.applyUMask(FsPermission.getUMask(getConf())); 868 } 869 870 private HdfsFileStatus getHdfsFileStatus(Path f) throws IOException { 871 final HttpOpParam.Op op = GetOpParam.Op.GETFILESTATUS; 872 HdfsFileStatus status = new FsPathResponseRunner<HdfsFileStatus>(op, f) { 873 @Override 874 HdfsFileStatus decodeResponse(Map<?,?> json) { 875 return JsonUtil.toFileStatus(json, true); 876 } 877 }.run(); 878 if (status == null) { 879 throw new FileNotFoundException("File does not exist: " + f); 880 } 881 return status; 882 } 883 884 @Override 885 public FileStatus getFileStatus(Path f) throws IOException { 886 statistics.incrementReadOps(1); 887 return makeQualified(getHdfsFileStatus(f), f); 888 } 889 890 private FileStatus makeQualified(HdfsFileStatus f, Path parent) { 891 return new FileStatus(f.getLen(), f.isDir(), f.getReplication(), 892 f.getBlockSize(), f.getModificationTime(), f.getAccessTime(), 893 f.getPermission(), f.getOwner(), f.getGroup(), 894 f.isSymlink() ? new Path(f.getSymlink()) : null, 895 f.getFullPath(parent).makeQualified(getUri(), getWorkingDirectory())); 896 } 897 898 @Override 899 public AclStatus getAclStatus(Path f) throws IOException { 900 final HttpOpParam.Op op = GetOpParam.Op.GETACLSTATUS; 901 AclStatus status = new FsPathResponseRunner<AclStatus>(op, f) { 902 @Override 903 AclStatus decodeResponse(Map<?,?> json) { 904 return JsonUtil.toAclStatus(json); 905 } 906 }.run(); 907 if (status == null) { 908 throw new FileNotFoundException("File does not exist: " + f); 909 } 910 return status; 911 } 912 913 @Override 914 public boolean mkdirs(Path f, FsPermission permission) throws IOException { 915 statistics.incrementWriteOps(1); 916 final HttpOpParam.Op op = PutOpParam.Op.MKDIRS; 917 return new FsPathBooleanRunner(op, f, 918 new PermissionParam(applyUMask(permission)) 919 ).run(); 920 } 921 922 /** 923 * Create a symlink pointing to the destination path. 924 * @see org.apache.hadoop.fs.Hdfs#createSymlink(Path, Path, boolean) 925 */ 926 public void createSymlink(Path destination, Path f, boolean createParent 927 ) throws IOException { 928 statistics.incrementWriteOps(1); 929 final HttpOpParam.Op op = PutOpParam.Op.CREATESYMLINK; 930 new FsPathRunner(op, f, 931 new DestinationParam(makeQualified(destination).toUri().getPath()), 932 new CreateParentParam(createParent) 933 ).run(); 934 } 935 936 @Override 937 public boolean rename(final Path src, final Path dst) throws IOException { 938 statistics.incrementWriteOps(1); 939 final HttpOpParam.Op op = PutOpParam.Op.RENAME; 940 return new FsPathBooleanRunner(op, src, 941 new DestinationParam(makeQualified(dst).toUri().getPath()) 942 ).run(); 943 } 944 945 @SuppressWarnings("deprecation") 946 @Override 947 public void rename(final Path src, final Path dst, 948 final Options.Rename... options) throws IOException { 949 statistics.incrementWriteOps(1); 950 final HttpOpParam.Op op = PutOpParam.Op.RENAME; 951 new FsPathRunner(op, src, 952 new DestinationParam(makeQualified(dst).toUri().getPath()), 953 new RenameOptionSetParam(options) 954 ).run(); 955 } 956 957 @Override 958 public void setXAttr(Path p, String name, byte[] value, 959 EnumSet<XAttrSetFlag> flag) throws IOException { 960 statistics.incrementWriteOps(1); 961 final HttpOpParam.Op op = PutOpParam.Op.SETXATTR; 962 if (value != null) { 963 new FsPathRunner(op, p, new XAttrNameParam(name), new XAttrValueParam( 964 XAttrCodec.encodeValue(value, XAttrCodec.HEX)), 965 new XAttrSetFlagParam(flag)).run(); 966 } else { 967 new FsPathRunner(op, p, new XAttrNameParam(name), 968 new XAttrSetFlagParam(flag)).run(); 969 } 970 } 971 972 @Override 973 public byte[] getXAttr(Path p, final String name) throws IOException { 974 final HttpOpParam.Op op = GetOpParam.Op.GETXATTRS; 975 return new FsPathResponseRunner<byte[]>(op, p, new XAttrNameParam(name), 976 new XAttrEncodingParam(XAttrCodec.HEX)) { 977 @Override 978 byte[] decodeResponse(Map<?, ?> json) throws IOException { 979 return JsonUtil.getXAttr(json, name); 980 } 981 }.run(); 982 } 983 984 @Override 985 public Map<String, byte[]> getXAttrs(Path p) throws IOException { 986 final HttpOpParam.Op op = GetOpParam.Op.GETXATTRS; 987 return new FsPathResponseRunner<Map<String, byte[]>>(op, p, 988 new XAttrEncodingParam(XAttrCodec.HEX)) { 989 @Override 990 Map<String, byte[]> decodeResponse(Map<?, ?> json) throws IOException { 991 return JsonUtil.toXAttrs(json); 992 } 993 }.run(); 994 } 995 996 @Override 997 public Map<String, byte[]> getXAttrs(Path p, final List<String> names) 998 throws IOException { 999 Preconditions.checkArgument(names != null && !names.isEmpty(), 1000 "XAttr names cannot be null or empty."); 1001 Param<?,?>[] parameters = new Param<?,?>[names.size() + 1]; 1002 for (int i = 0; i < parameters.length - 1; i++) { 1003 parameters[i] = new XAttrNameParam(names.get(i)); 1004 } 1005 parameters[parameters.length - 1] = new XAttrEncodingParam(XAttrCodec.HEX); 1006 1007 final HttpOpParam.Op op = GetOpParam.Op.GETXATTRS; 1008 return new FsPathResponseRunner<Map<String, byte[]>>(op, parameters, p) { 1009 @Override 1010 Map<String, byte[]> decodeResponse(Map<?, ?> json) throws IOException { 1011 return JsonUtil.toXAttrs(json); 1012 } 1013 }.run(); 1014 } 1015 1016 @Override 1017 public List<String> listXAttrs(Path p) throws IOException { 1018 final HttpOpParam.Op op = GetOpParam.Op.LISTXATTRS; 1019 return new FsPathResponseRunner<List<String>>(op, p) { 1020 @Override 1021 List<String> decodeResponse(Map<?, ?> json) throws IOException { 1022 return JsonUtil.toXAttrNames(json); 1023 } 1024 }.run(); 1025 } 1026 1027 @Override 1028 public void removeXAttr(Path p, String name) throws IOException { 1029 statistics.incrementWriteOps(1); 1030 final HttpOpParam.Op op = PutOpParam.Op.REMOVEXATTR; 1031 new FsPathRunner(op, p, new XAttrNameParam(name)).run(); 1032 } 1033 1034 @Override 1035 public void setOwner(final Path p, final String owner, final String group 1036 ) throws IOException { 1037 if (owner == null && group == null) { 1038 throw new IOException("owner == null && group == null"); 1039 } 1040 1041 statistics.incrementWriteOps(1); 1042 final HttpOpParam.Op op = PutOpParam.Op.SETOWNER; 1043 new FsPathRunner(op, p, 1044 new OwnerParam(owner), new GroupParam(group) 1045 ).run(); 1046 } 1047 1048 @Override 1049 public void setPermission(final Path p, final FsPermission permission 1050 ) throws IOException { 1051 statistics.incrementWriteOps(1); 1052 final HttpOpParam.Op op = PutOpParam.Op.SETPERMISSION; 1053 new FsPathRunner(op, p,new PermissionParam(permission)).run(); 1054 } 1055 1056 @Override 1057 public void modifyAclEntries(Path path, List<AclEntry> aclSpec) 1058 throws IOException { 1059 statistics.incrementWriteOps(1); 1060 final HttpOpParam.Op op = PutOpParam.Op.MODIFYACLENTRIES; 1061 new FsPathRunner(op, path, new AclPermissionParam(aclSpec)).run(); 1062 } 1063 1064 @Override 1065 public void removeAclEntries(Path path, List<AclEntry> aclSpec) 1066 throws IOException { 1067 statistics.incrementWriteOps(1); 1068 final HttpOpParam.Op op = PutOpParam.Op.REMOVEACLENTRIES; 1069 new FsPathRunner(op, path, new AclPermissionParam(aclSpec)).run(); 1070 } 1071 1072 @Override 1073 public void removeDefaultAcl(Path path) throws IOException { 1074 statistics.incrementWriteOps(1); 1075 final HttpOpParam.Op op = PutOpParam.Op.REMOVEDEFAULTACL; 1076 new FsPathRunner(op, path).run(); 1077 } 1078 1079 @Override 1080 public void removeAcl(Path path) throws IOException { 1081 statistics.incrementWriteOps(1); 1082 final HttpOpParam.Op op = PutOpParam.Op.REMOVEACL; 1083 new FsPathRunner(op, path).run(); 1084 } 1085 1086 @Override 1087 public void setAcl(final Path p, final List<AclEntry> aclSpec) 1088 throws IOException { 1089 statistics.incrementWriteOps(1); 1090 final HttpOpParam.Op op = PutOpParam.Op.SETACL; 1091 new FsPathRunner(op, p, new AclPermissionParam(aclSpec)).run(); 1092 } 1093 1094 @Override 1095 public Path createSnapshot(final Path path, final String snapshotName) 1096 throws IOException { 1097 statistics.incrementWriteOps(1); 1098 final HttpOpParam.Op op = PutOpParam.Op.CREATESNAPSHOT; 1099 Path spath = new FsPathResponseRunner<Path>(op, path, 1100 new SnapshotNameParam(snapshotName)) { 1101 @Override 1102 Path decodeResponse(Map<?,?> json) { 1103 return new Path((String) json.get(Path.class.getSimpleName())); 1104 } 1105 }.run(); 1106 return spath; 1107 } 1108 1109 @Override 1110 public void deleteSnapshot(final Path path, final String snapshotName) 1111 throws IOException { 1112 statistics.incrementWriteOps(1); 1113 final HttpOpParam.Op op = DeleteOpParam.Op.DELETESNAPSHOT; 1114 new FsPathRunner(op, path, new SnapshotNameParam(snapshotName)).run(); 1115 } 1116 1117 @Override 1118 public void renameSnapshot(final Path path, final String snapshotOldName, 1119 final String snapshotNewName) throws IOException { 1120 statistics.incrementWriteOps(1); 1121 final HttpOpParam.Op op = PutOpParam.Op.RENAMESNAPSHOT; 1122 new FsPathRunner(op, path, new OldSnapshotNameParam(snapshotOldName), 1123 new SnapshotNameParam(snapshotNewName)).run(); 1124 } 1125 1126 @Override 1127 public boolean setReplication(final Path p, final short replication 1128 ) throws IOException { 1129 statistics.incrementWriteOps(1); 1130 final HttpOpParam.Op op = PutOpParam.Op.SETREPLICATION; 1131 return new FsPathBooleanRunner(op, p, 1132 new ReplicationParam(replication) 1133 ).run(); 1134 } 1135 1136 @Override 1137 public void setTimes(final Path p, final long mtime, final long atime 1138 ) throws IOException { 1139 statistics.incrementWriteOps(1); 1140 final HttpOpParam.Op op = PutOpParam.Op.SETTIMES; 1141 new FsPathRunner(op, p, 1142 new ModificationTimeParam(mtime), 1143 new AccessTimeParam(atime) 1144 ).run(); 1145 } 1146 1147 @Override 1148 public long getDefaultBlockSize() { 1149 return getConf().getLongBytes(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1150 DFSConfigKeys.DFS_BLOCK_SIZE_DEFAULT); 1151 } 1152 1153 @Override 1154 public short getDefaultReplication() { 1155 return (short)getConf().getInt(DFSConfigKeys.DFS_REPLICATION_KEY, 1156 DFSConfigKeys.DFS_REPLICATION_DEFAULT); 1157 } 1158 1159 @Override 1160 public void concat(final Path trg, final Path [] srcs) throws IOException { 1161 statistics.incrementWriteOps(1); 1162 final HttpOpParam.Op op = PostOpParam.Op.CONCAT; 1163 new FsPathRunner(op, trg, new ConcatSourcesParam(srcs)).run(); 1164 } 1165 1166 @Override 1167 public FSDataOutputStream create(final Path f, final FsPermission permission, 1168 final boolean overwrite, final int bufferSize, final short replication, 1169 final long blockSize, final Progressable progress) throws IOException { 1170 statistics.incrementWriteOps(1); 1171 1172 final HttpOpParam.Op op = PutOpParam.Op.CREATE; 1173 return new FsPathOutputStreamRunner(op, f, bufferSize, 1174 new PermissionParam(applyUMask(permission)), 1175 new OverwriteParam(overwrite), 1176 new BufferSizeParam(bufferSize), 1177 new ReplicationParam(replication), 1178 new BlockSizeParam(blockSize) 1179 ).run(); 1180 } 1181 1182 @Override 1183 public FSDataOutputStream append(final Path f, final int bufferSize, 1184 final Progressable progress) throws IOException { 1185 statistics.incrementWriteOps(1); 1186 1187 final HttpOpParam.Op op = PostOpParam.Op.APPEND; 1188 return new FsPathOutputStreamRunner(op, f, bufferSize, 1189 new BufferSizeParam(bufferSize) 1190 ).run(); 1191 } 1192 1193 @Override 1194 public boolean truncate(Path f, long newLength) throws IOException { 1195 statistics.incrementWriteOps(1); 1196 1197 final HttpOpParam.Op op = PostOpParam.Op.TRUNCATE; 1198 return new FsPathBooleanRunner(op, f, new NewLengthParam(newLength)).run(); 1199 } 1200 1201 @Override 1202 public boolean delete(Path f, boolean recursive) throws IOException { 1203 final HttpOpParam.Op op = DeleteOpParam.Op.DELETE; 1204 return new FsPathBooleanRunner(op, f, 1205 new RecursiveParam(recursive) 1206 ).run(); 1207 } 1208 1209 @Override 1210 public FSDataInputStream open(final Path f, final int bufferSize 1211 ) throws IOException { 1212 statistics.incrementReadOps(1); 1213 return new FSDataInputStream(new WebHdfsInputStream(f, bufferSize)); 1214 } 1215 1216 @Override 1217 public synchronized void close() throws IOException { 1218 try { 1219 if (canRefreshDelegationToken && delegationToken != null) { 1220 cancelDelegationToken(delegationToken); 1221 } 1222 } catch (IOException ioe) { 1223 LOG.debug("Token cancel failed: "+ioe); 1224 } finally { 1225 super.close(); 1226 } 1227 } 1228 1229 // use FsPathConnectionRunner to ensure retries for InvalidTokens 1230 class UnresolvedUrlOpener extends ByteRangeInputStream.URLOpener { 1231 private final FsPathConnectionRunner runner; 1232 UnresolvedUrlOpener(FsPathConnectionRunner runner) { 1233 super(null); 1234 this.runner = runner; 1235 } 1236 1237 @Override 1238 protected HttpURLConnection connect(long offset, boolean resolved) 1239 throws IOException { 1240 assert offset == 0; 1241 HttpURLConnection conn = runner.run(); 1242 setURL(conn.getURL()); 1243 return conn; 1244 } 1245 } 1246 1247 class OffsetUrlOpener extends ByteRangeInputStream.URLOpener { 1248 OffsetUrlOpener(final URL url) { 1249 super(url); 1250 } 1251 1252 /** Setup offset url and connect. */ 1253 @Override 1254 protected HttpURLConnection connect(final long offset, 1255 final boolean resolved) throws IOException { 1256 final URL offsetUrl = offset == 0L? url 1257 : new URL(url + "&" + new OffsetParam(offset)); 1258 return new URLRunner(GetOpParam.Op.OPEN, offsetUrl, resolved).run(); 1259 } 1260 } 1261 1262 private static final String OFFSET_PARAM_PREFIX = OffsetParam.NAME + "="; 1263 1264 /** Remove offset parameter, if there is any, from the url */ 1265 static URL removeOffsetParam(final URL url) throws MalformedURLException { 1266 String query = url.getQuery(); 1267 if (query == null) { 1268 return url; 1269 } 1270 final String lower = StringUtils.toLowerCase(query); 1271 if (!lower.startsWith(OFFSET_PARAM_PREFIX) 1272 && !lower.contains("&" + OFFSET_PARAM_PREFIX)) { 1273 return url; 1274 } 1275 1276 //rebuild query 1277 StringBuilder b = null; 1278 for(final StringTokenizer st = new StringTokenizer(query, "&"); 1279 st.hasMoreTokens();) { 1280 final String token = st.nextToken(); 1281 if (!StringUtils.toLowerCase(token).startsWith(OFFSET_PARAM_PREFIX)) { 1282 if (b == null) { 1283 b = new StringBuilder("?").append(token); 1284 } else { 1285 b.append('&').append(token); 1286 } 1287 } 1288 } 1289 query = b == null? "": b.toString(); 1290 1291 final String urlStr = url.toString(); 1292 return new URL(urlStr.substring(0, urlStr.indexOf('?')) + query); 1293 } 1294 1295 static class OffsetUrlInputStream extends ByteRangeInputStream { 1296 OffsetUrlInputStream(UnresolvedUrlOpener o, OffsetUrlOpener r) 1297 throws IOException { 1298 super(o, r); 1299 } 1300 1301 /** Remove offset parameter before returning the resolved url. */ 1302 @Override 1303 protected URL getResolvedUrl(final HttpURLConnection connection 1304 ) throws MalformedURLException { 1305 return removeOffsetParam(connection.getURL()); 1306 } 1307 } 1308 1309 @Override 1310 public FileStatus[] listStatus(final Path f) throws IOException { 1311 statistics.incrementReadOps(1); 1312 1313 final HttpOpParam.Op op = GetOpParam.Op.LISTSTATUS; 1314 return new FsPathResponseRunner<FileStatus[]>(op, f) { 1315 @Override 1316 FileStatus[] decodeResponse(Map<?,?> json) { 1317 final Map<?, ?> rootmap = (Map<?, ?>)json.get(FileStatus.class.getSimpleName() + "es"); 1318 final List<?> array = JsonUtil.getList( 1319 rootmap, FileStatus.class.getSimpleName()); 1320 1321 //convert FileStatus 1322 final FileStatus[] statuses = new FileStatus[array.size()]; 1323 int i = 0; 1324 for (Object object : array) { 1325 final Map<?, ?> m = (Map<?, ?>) object; 1326 statuses[i++] = makeQualified(JsonUtil.toFileStatus(m, false), f); 1327 } 1328 return statuses; 1329 } 1330 }.run(); 1331 } 1332 1333 @Override 1334 public Token<DelegationTokenIdentifier> getDelegationToken( 1335 final String renewer) throws IOException { 1336 final HttpOpParam.Op op = GetOpParam.Op.GETDELEGATIONTOKEN; 1337 Token<DelegationTokenIdentifier> token = 1338 new FsPathResponseRunner<Token<DelegationTokenIdentifier>>( 1339 op, null, new RenewerParam(renewer)) { 1340 @Override 1341 Token<DelegationTokenIdentifier> decodeResponse(Map<?,?> json) 1342 throws IOException { 1343 return JsonUtil.toDelegationToken(json); 1344 } 1345 }.run(); 1346 if (token != null) { 1347 token.setService(tokenServiceName); 1348 } else { 1349 if (disallowFallbackToInsecureCluster) { 1350 throw new AccessControlException(CANT_FALLBACK_TO_INSECURE_MSG); 1351 } 1352 } 1353 return token; 1354 } 1355 1356 @Override 1357 public synchronized Token<?> getRenewToken() { 1358 return delegationToken; 1359 } 1360 1361 @Override 1362 public <T extends TokenIdentifier> void setDelegationToken( 1363 final Token<T> token) { 1364 synchronized (this) { 1365 delegationToken = token; 1366 } 1367 } 1368 1369 @Override 1370 public synchronized long renewDelegationToken(final Token<?> token 1371 ) throws IOException { 1372 final HttpOpParam.Op op = PutOpParam.Op.RENEWDELEGATIONTOKEN; 1373 return new FsPathResponseRunner<Long>(op, null, 1374 new TokenArgumentParam(token.encodeToUrlString())) { 1375 @Override 1376 Long decodeResponse(Map<?,?> json) throws IOException { 1377 return ((Number) json.get("long")).longValue(); 1378 } 1379 }.run(); 1380 } 1381 1382 @Override 1383 public synchronized void cancelDelegationToken(final Token<?> token 1384 ) throws IOException { 1385 final HttpOpParam.Op op = PutOpParam.Op.CANCELDELEGATIONTOKEN; 1386 new FsPathRunner(op, null, 1387 new TokenArgumentParam(token.encodeToUrlString()) 1388 ).run(); 1389 } 1390 1391 @Override 1392 public BlockLocation[] getFileBlockLocations(final FileStatus status, 1393 final long offset, final long length) throws IOException { 1394 if (status == null) { 1395 return null; 1396 } 1397 return getFileBlockLocations(status.getPath(), offset, length); 1398 } 1399 1400 @Override 1401 public BlockLocation[] getFileBlockLocations(final Path p, 1402 final long offset, final long length) throws IOException { 1403 statistics.incrementReadOps(1); 1404 1405 final HttpOpParam.Op op = GetOpParam.Op.GET_BLOCK_LOCATIONS; 1406 return new FsPathResponseRunner<BlockLocation[]>(op, p, 1407 new OffsetParam(offset), new LengthParam(length)) { 1408 @Override 1409 BlockLocation[] decodeResponse(Map<?,?> json) throws IOException { 1410 return DFSUtil.locatedBlocks2Locations( 1411 JsonUtil.toLocatedBlocks(json)); 1412 } 1413 }.run(); 1414 } 1415 1416 @Override 1417 public void access(final Path path, final FsAction mode) throws IOException { 1418 final HttpOpParam.Op op = GetOpParam.Op.CHECKACCESS; 1419 new FsPathRunner(op, path, new FsActionParam(mode)).run(); 1420 } 1421 1422 @Override 1423 public ContentSummary getContentSummary(final Path p) throws IOException { 1424 statistics.incrementReadOps(1); 1425 1426 final HttpOpParam.Op op = GetOpParam.Op.GETCONTENTSUMMARY; 1427 return new FsPathResponseRunner<ContentSummary>(op, p) { 1428 @Override 1429 ContentSummary decodeResponse(Map<?,?> json) { 1430 return JsonUtil.toContentSummary(json); 1431 } 1432 }.run(); 1433 } 1434 1435 @Override 1436 public MD5MD5CRC32FileChecksum getFileChecksum(final Path p 1437 ) throws IOException { 1438 statistics.incrementReadOps(1); 1439 1440 final HttpOpParam.Op op = GetOpParam.Op.GETFILECHECKSUM; 1441 return new FsPathResponseRunner<MD5MD5CRC32FileChecksum>(op, p) { 1442 @Override 1443 MD5MD5CRC32FileChecksum decodeResponse(Map<?,?> json) throws IOException { 1444 return JsonUtil.toMD5MD5CRC32FileChecksum(json); 1445 } 1446 }.run(); 1447 } 1448 1449 /** 1450 * Resolve an HDFS URL into real INetSocketAddress. It works like a DNS 1451 * resolver when the URL points to an non-HA cluster. When the URL points to 1452 * an HA cluster with its logical name, the resolver further resolves the 1453 * logical name(i.e., the authority in the URL) into real namenode addresses. 1454 */ 1455 private InetSocketAddress[] resolveNNAddr() throws IOException { 1456 Configuration conf = getConf(); 1457 final String scheme = uri.getScheme(); 1458 1459 ArrayList<InetSocketAddress> ret = new ArrayList<InetSocketAddress>(); 1460 1461 if (!HAUtil.isLogicalUri(conf, uri)) { 1462 InetSocketAddress addr = NetUtils.createSocketAddr(uri.getAuthority(), 1463 getDefaultPort()); 1464 ret.add(addr); 1465 1466 } else { 1467 Map<String, Map<String, InetSocketAddress>> addresses = DFSUtil 1468 .getHaNnWebHdfsAddresses(conf, scheme); 1469 1470 // Extract the entry corresponding to the logical name. 1471 Map<String, InetSocketAddress> addrs = addresses.get(uri.getHost()); 1472 for (InetSocketAddress addr : addrs.values()) { 1473 ret.add(addr); 1474 } 1475 } 1476 1477 InetSocketAddress[] r = new InetSocketAddress[ret.size()]; 1478 return ret.toArray(r); 1479 } 1480 1481 @Override 1482 public String getCanonicalServiceName() { 1483 return tokenServiceName == null ? super.getCanonicalServiceName() 1484 : tokenServiceName.toString(); 1485 } 1486 1487 @VisibleForTesting 1488 InetSocketAddress[] getResolvedNNAddr() { 1489 return nnAddrs; 1490 } 1491 1492 @VisibleForTesting 1493 public void setRetryPolicy(RetryPolicy rp) { 1494 this.retryPolicy = rp; 1495 } 1496 1497 /** 1498 * This class is used for opening, reading, and seeking files while using the 1499 * WebHdfsFileSystem. This class will invoke the retry policy when performing 1500 * any of these actions. 1501 */ 1502 @VisibleForTesting 1503 public class WebHdfsInputStream extends FSInputStream { 1504 private ReadRunner readRunner = null; 1505 1506 WebHdfsInputStream(Path path, int buffersize) throws IOException { 1507 // Only create the ReadRunner once. Each read's byte array and position 1508 // will be updated within the ReadRunner object before every read. 1509 readRunner = new ReadRunner(path, buffersize); 1510 } 1511 1512 @Override 1513 public int read() throws IOException { 1514 final byte[] b = new byte[1]; 1515 return (read(b, 0, 1) == -1) ? -1 : (b[0] & 0xff); 1516 } 1517 1518 @Override 1519 public int read(byte b[], int off, int len) throws IOException { 1520 return readRunner.read(b, off, len); 1521 } 1522 1523 @Override 1524 public void seek(long newPos) throws IOException { 1525 readRunner.seek(newPos); 1526 } 1527 1528 @Override 1529 public long getPos() throws IOException { 1530 return readRunner.getPos(); 1531 } 1532 1533 protected int getBufferSize() throws IOException { 1534 return readRunner.getBufferSize(); 1535 } 1536 1537 protected Path getPath() throws IOException { 1538 return readRunner.getPath(); 1539 } 1540 1541 @Override 1542 public boolean seekToNewSource(long targetPos) throws IOException { 1543 return false; 1544 } 1545 1546 @Override 1547 public void close() throws IOException { 1548 readRunner.close(); 1549 } 1550 1551 public void setFileLength(long len) { 1552 readRunner.setFileLength(len); 1553 } 1554 1555 public long getFileLength() { 1556 return readRunner.getFileLength(); 1557 } 1558 1559 @VisibleForTesting 1560 ReadRunner getReadRunner() { 1561 return readRunner; 1562 } 1563 1564 @VisibleForTesting 1565 void setReadRunner(ReadRunner rr) { 1566 this.readRunner = rr; 1567 } 1568 } 1569 1570 enum RunnerState { 1571 DISCONNECTED, // Connection is closed programmatically by ReadRunner 1572 OPEN, // Connection has been established by ReadRunner 1573 SEEK, // Calling code has explicitly called seek() 1574 CLOSED // Calling code has explicitly called close() 1575 } 1576 1577 /** 1578 * This class will allow retries to occur for both open and read operations. 1579 * The first WebHdfsFileSystem#open creates a new WebHdfsInputStream object, 1580 * which creates a new ReadRunner object that will be used to open a 1581 * connection and read or seek into the input stream. 1582 * 1583 * ReadRunner is a subclass of the AbstractRunner class, which will run the 1584 * ReadRunner#getUrl(), ReadRunner#connect(URL), and ReadRunner#getResponse 1585 * methods within a retry loop, based on the configured retry policy. 1586 * ReadRunner#connect will create a connection if one has not already been 1587 * created. Otherwise, it will return the previously created connection 1588 * object. This is necessary because a new connection should not be created 1589 * for every read. 1590 * Likewise, ReadRunner#getUrl will construct a new URL object only if the 1591 * connection has not previously been established. Otherwise, it will return 1592 * the previously created URL object. 1593 * ReadRunner#getResponse will initialize the input stream if it has not 1594 * already been initialized and read the requested data from the specified 1595 * input stream. 1596 */ 1597 @VisibleForTesting 1598 protected class ReadRunner extends AbstractFsPathRunner<Integer> { 1599 private InputStream in = null; 1600 private HttpURLConnection cachedConnection = null; 1601 private byte[] readBuffer; 1602 private int readOffset; 1603 private int readLength; 1604 private RunnerState runnerState = RunnerState.DISCONNECTED; 1605 private URL originalUrl = null; 1606 private URL resolvedUrl = null; 1607 1608 private final Path path; 1609 private final int bufferSize; 1610 private long pos = 0; 1611 private long fileLength = 0; 1612 1613 /* The following methods are WebHdfsInputStream helpers. */ 1614 1615 ReadRunner(Path p, int bs) throws IOException { 1616 super(GetOpParam.Op.OPEN, p, new BufferSizeParam(bs)); 1617 this.path = p; 1618 this.bufferSize = bs; 1619 } 1620 1621 int read(byte[] b, int off, int len) throws IOException { 1622 if (runnerState == RunnerState.CLOSED) { 1623 throw new IOException("Stream closed"); 1624 } 1625 1626 // Before the first read, pos and fileLength will be 0 and readBuffer 1627 // will all be null. They will be initialized once the first connection 1628 // is made. Only after that it makes sense to compare pos and fileLength. 1629 if (pos >= fileLength && readBuffer != null) { 1630 return -1; 1631 } 1632 1633 // If a seek is occurring, the input stream will have been closed, so it 1634 // needs to be reopened. Use the URLRunner to call AbstractRunner#connect 1635 // with the previously-cached resolved URL and with the 'redirected' flag 1636 // set to 'true'. The resolved URL contains the URL of the previously 1637 // opened DN as opposed to the NN. It is preferable to use the resolved 1638 // URL when creating a connection because it does not hit the NN or every 1639 // seek, nor does it open a connection to a new DN after every seek. 1640 // The redirect flag is needed so that AbstractRunner#connect knows the 1641 // URL is already resolved. 1642 // Note that when the redirected flag is set, retries are not attempted. 1643 // So, if the connection fails using URLRunner, clear out the connection 1644 // and fall through to establish the connection using ReadRunner. 1645 if (runnerState == RunnerState.SEEK) { 1646 try { 1647 final URL rurl = new URL(resolvedUrl + "&" + new OffsetParam(pos)); 1648 cachedConnection = new URLRunner(GetOpParam.Op.OPEN, rurl, true).run(); 1649 } catch (IOException ioe) { 1650 closeInputStream(RunnerState.DISCONNECTED); 1651 } 1652 } 1653 1654 readBuffer = b; 1655 readOffset = off; 1656 readLength = len; 1657 1658 int count = -1; 1659 count = this.run(); 1660 if (count >= 0) { 1661 statistics.incrementBytesRead(count); 1662 pos += count; 1663 } else if (pos < fileLength) { 1664 throw new EOFException( 1665 "Premature EOF: pos=" + pos + " < filelength=" + fileLength); 1666 } 1667 return count; 1668 } 1669 1670 void seek(long newPos) throws IOException { 1671 if (pos != newPos) { 1672 pos = newPos; 1673 closeInputStream(RunnerState.SEEK); 1674 } 1675 } 1676 1677 public void close() throws IOException { 1678 closeInputStream(RunnerState.CLOSED); 1679 } 1680 1681 /* The following methods are overriding AbstractRunner methods, 1682 * to be called within the retry policy context by runWithRetry. 1683 */ 1684 1685 @Override 1686 protected URL getUrl() throws IOException { 1687 // This method is called every time either a read is executed. 1688 // The check for connection == null is to ensure that a new URL is only 1689 // created upon a new connection and not for every read. 1690 if (cachedConnection == null) { 1691 // Update URL with current offset. BufferSize doesn't change, but it 1692 // still must be included when creating the new URL. 1693 updateURLParameters(new BufferSizeParam(bufferSize), 1694 new OffsetParam(pos)); 1695 originalUrl = super.getUrl(); 1696 } 1697 return originalUrl; 1698 } 1699 1700 /* Only make the connection if it is not already open. Don't cache the 1701 * connection here. After this method is called, runWithRetry will call 1702 * validateResponse, and then call the below ReadRunner#getResponse. If 1703 * the code path makes it that far, then we can cache the connection. 1704 */ 1705 @Override 1706 protected HttpURLConnection connect(URL url) throws IOException { 1707 HttpURLConnection conn = cachedConnection; 1708 if (conn == null) { 1709 try { 1710 conn = super.connect(url); 1711 } catch (IOException e) { 1712 closeInputStream(RunnerState.DISCONNECTED); 1713 throw e; 1714 } 1715 } 1716 return conn; 1717 } 1718 1719 /* 1720 * This method is used to perform reads within the retry policy context. 1721 * This code is relying on runWithRetry to always call the above connect 1722 * method and the verifyResponse method prior to calling getResponse. 1723 */ 1724 @Override 1725 Integer getResponse(final HttpURLConnection conn) 1726 throws IOException { 1727 try { 1728 // In the "open-then-read" use case, runWithRetry will have executed 1729 // ReadRunner#connect to make the connection and then executed 1730 // validateResponse to validate the response code. Only then do we want 1731 // to cache the connection. 1732 // In the "read-after-seek" use case, the connection is made and the 1733 // response is validated by the URLRunner. ReadRunner#read then caches 1734 // the connection and the ReadRunner#connect will pass on the cached 1735 // connection 1736 // In either case, stream initialization is done here if necessary. 1737 cachedConnection = conn; 1738 if (in == null) { 1739 in = initializeInputStream(conn); 1740 } 1741 1742 int count = in.read(readBuffer, readOffset, readLength); 1743 if (count < 0 && pos < fileLength) { 1744 throw new EOFException( 1745 "Premature EOF: pos=" + pos + " < filelength=" + fileLength); 1746 } 1747 return Integer.valueOf(count); 1748 } catch (IOException e) { 1749 String redirectHost = resolvedUrl.getAuthority(); 1750 if (excludeDatanodes.getValue() != null) { 1751 excludeDatanodes = new ExcludeDatanodesParam(redirectHost + "," 1752 + excludeDatanodes.getValue()); 1753 } else { 1754 excludeDatanodes = new ExcludeDatanodesParam(redirectHost); 1755 } 1756 1757 // If an exception occurs, close the input stream and null it out so 1758 // that if the abstract runner decides to retry, it will reconnect. 1759 closeInputStream(RunnerState.DISCONNECTED); 1760 throw e; 1761 } 1762 } 1763 1764 @VisibleForTesting 1765 InputStream initializeInputStream(HttpURLConnection conn) 1766 throws IOException { 1767 // Cache the resolved URL so that it can be used in the event of 1768 // a future seek operation. 1769 resolvedUrl = removeOffsetParam(conn.getURL()); 1770 final String cl = conn.getHeaderField(HttpHeaders.CONTENT_LENGTH); 1771 InputStream inStream = conn.getInputStream(); 1772 if (LOG.isDebugEnabled()) { 1773 LOG.debug("open file: " + conn.getURL()); 1774 } 1775 if (cl != null) { 1776 long streamLength = Long.parseLong(cl); 1777 fileLength = pos + streamLength; 1778 // Java has a bug with >2GB request streams. It won't bounds check 1779 // the reads so the transfer blocks until the server times out 1780 inStream = new BoundedInputStream(inStream, streamLength); 1781 } else { 1782 fileLength = getHdfsFileStatus(path).getLen(); 1783 } 1784 // Wrapping in BufferedInputStream because it is more performant than 1785 // BoundedInputStream by itself. 1786 runnerState = RunnerState.OPEN; 1787 return new BufferedInputStream(inStream, bufferSize); 1788 } 1789 1790 // Close both the InputStream and the connection. 1791 @VisibleForTesting 1792 void closeInputStream(RunnerState rs) throws IOException { 1793 if (in != null) { 1794 IOUtils.close(cachedConnection); 1795 in = null; 1796 } 1797 cachedConnection = null; 1798 runnerState = rs; 1799 } 1800 1801 /* Getters and Setters */ 1802 1803 @VisibleForTesting 1804 protected InputStream getInputStream() { 1805 return in; 1806 } 1807 1808 @VisibleForTesting 1809 protected void setInputStream(InputStream inStream) { 1810 in = inStream; 1811 } 1812 1813 Path getPath() { 1814 return path; 1815 } 1816 1817 int getBufferSize() { 1818 return bufferSize; 1819 } 1820 1821 long getFileLength() { 1822 return fileLength; 1823 } 1824 1825 void setFileLength(long len) { 1826 fileLength = len; 1827 } 1828 1829 long getPos() { 1830 return pos; 1831 } 1832 } 1833}