001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 package org.apache.hadoop.fs; 019 020 import java.io.FileNotFoundException; 021 import java.io.IOException; 022 import java.io.InputStream; 023 import java.io.OutputStream; 024 import java.net.URI; 025 import java.security.PrivilegedExceptionAction; 026 import java.util.ArrayList; 027 import java.util.Arrays; 028 import java.util.EnumSet; 029 import java.util.HashSet; 030 import java.util.IdentityHashMap; 031 import java.util.List; 032 import java.util.Map; 033 import java.util.Set; 034 import java.util.Stack; 035 import java.util.TreeSet; 036 import java.util.Map.Entry; 037 038 import org.apache.commons.logging.Log; 039 import org.apache.commons.logging.LogFactory; 040 import org.apache.hadoop.HadoopIllegalArgumentException; 041 import org.apache.hadoop.classification.InterfaceAudience; 042 import org.apache.hadoop.classification.InterfaceStability; 043 import org.apache.hadoop.conf.Configuration; 044 import org.apache.hadoop.fs.FileSystem.Statistics; 045 import org.apache.hadoop.fs.Options.CreateOpts; 046 import org.apache.hadoop.fs.permission.FsPermission; 047 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY; 048 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_DEFAULT; 049 import org.apache.hadoop.io.IOUtils; 050 import org.apache.hadoop.ipc.RpcClientException; 051 import org.apache.hadoop.ipc.RpcServerException; 052 import org.apache.hadoop.ipc.UnexpectedServerException; 053 import org.apache.hadoop.fs.InvalidPathException; 054 import org.apache.hadoop.security.AccessControlException; 055 import org.apache.hadoop.security.UserGroupInformation; 056 import org.apache.hadoop.security.token.Token; 057 import org.apache.hadoop.util.ShutdownHookManager; 058 059 /** 060 * The FileContext class provides an interface to the application writer for 061 * using the Hadoop file system. 062 * It provides a set of methods for the usual operation: create, open, 063 * list, etc 064 * 065 * <p> 066 * <b> *** Path Names *** </b> 067 * <p> 068 * 069 * The Hadoop file system supports a URI name space and URI names. 070 * It offers a forest of file systems that can be referenced using fully 071 * qualified URIs. 072 * Two common Hadoop file systems implementations are 073 * <ul> 074 * <li> the local file system: file:///path 075 * <li> the hdfs file system hdfs://nnAddress:nnPort/path 076 * </ul> 077 * 078 * While URI names are very flexible, it requires knowing the name or address 079 * of the server. For convenience one often wants to access the default system 080 * in one's environment without knowing its name/address. This has an 081 * additional benefit that it allows one to change one's default fs 082 * (e.g. admin moves application from cluster1 to cluster2). 083 * <p> 084 * 085 * To facilitate this, Hadoop supports a notion of a default file system. 086 * The user can set his default file system, although this is 087 * typically set up for you in your environment via your default config. 088 * A default file system implies a default scheme and authority; slash-relative 089 * names (such as /for/bar) are resolved relative to that default FS. 090 * Similarly a user can also have working-directory-relative names (i.e. names 091 * not starting with a slash). While the working directory is generally in the 092 * same default FS, the wd can be in a different FS. 093 * <p> 094 * Hence Hadoop path names can be one of: 095 * <ul> 096 * <li> fully qualified URI: scheme://authority/path 097 * <li> slash relative names: /path relative to the default file system 098 * <li> wd-relative names: path relative to the working dir 099 * </ul> 100 * Relative paths with scheme (scheme:foo/bar) are illegal. 101 * 102 * <p> 103 * <b>****The Role of the FileContext and configuration defaults****</b> 104 * <p> 105 * The FileContext provides file namespace context for resolving file names; 106 * it also contains the umask for permissions, In that sense it is like the 107 * per-process file-related state in Unix system. 108 * These two properties 109 * <ul> 110 * <li> default file system i.e your slash) 111 * <li> umask 112 * </ul> 113 * in general, are obtained from the default configuration file 114 * in your environment, (@see {@link Configuration}). 115 * 116 * No other configuration parameters are obtained from the default config as 117 * far as the file context layer is concerned. All file system instances 118 * (i.e. deployments of file systems) have default properties; we call these 119 * server side (SS) defaults. Operation like create allow one to select many 120 * properties: either pass them in as explicit parameters or use 121 * the SS properties. 122 * <p> 123 * The file system related SS defaults are 124 * <ul> 125 * <li> the home directory (default is "/user/userName") 126 * <li> the initial wd (only for local fs) 127 * <li> replication factor 128 * <li> block size 129 * <li> buffer size 130 * <li> encryptDataTransfer 131 * <li> checksum option. (checksumType and bytesPerChecksum) 132 * </ul> 133 * 134 * <p> 135 * <b> *** Usage Model for the FileContext class *** </b> 136 * <p> 137 * Example 1: use the default config read from the $HADOOP_CONFIG/core.xml. 138 * Unspecified values come from core-defaults.xml in the release jar. 139 * <ul> 140 * <li> myFContext = FileContext.getFileContext(); // uses the default config 141 * // which has your default FS 142 * <li> myFContext.create(path, ...); 143 * <li> myFContext.setWorkingDir(path) 144 * <li> myFContext.open (path, ...); 145 * </ul> 146 * Example 2: Get a FileContext with a specific URI as the default FS 147 * <ul> 148 * <li> myFContext = FileContext.getFileContext(URI) 149 * <li> myFContext.create(path, ...); 150 * ... 151 * </ul> 152 * Example 3: FileContext with local file system as the default 153 * <ul> 154 * <li> myFContext = FileContext.getLocalFSFileContext() 155 * <li> myFContext.create(path, ...); 156 * <li> ... 157 * </ul> 158 * Example 4: Use a specific config, ignoring $HADOOP_CONFIG 159 * Generally you should not need use a config unless you are doing 160 * <ul> 161 * <li> configX = someConfigSomeOnePassedToYou. 162 * <li> myFContext = getFileContext(configX); // configX is not changed, 163 * // is passed down 164 * <li> myFContext.create(path, ...); 165 * <li>... 166 * </ul> 167 * 168 */ 169 170 @InterfaceAudience.Public 171 @InterfaceStability.Evolving /*Evolving for a release,to be changed to Stable */ 172 public final class FileContext { 173 174 public static final Log LOG = LogFactory.getLog(FileContext.class); 175 /** 176 * Default permission for directory and symlink 177 * In previous versions, this default permission was also used to 178 * create files, so files created end up with ugo+x permission. 179 * See HADOOP-9155 for detail. 180 * Two new constants are added to solve this, please use 181 * {@link FileContext#DIR_DEFAULT_PERM} for directory, and use 182 * {@link FileContext#FILE_DEFAULT_PERM} for file. 183 * This constant is kept for compatibility. 184 */ 185 public static final FsPermission DEFAULT_PERM = FsPermission.getDefault(); 186 /** 187 * Default permission for directory 188 */ 189 public static final FsPermission DIR_DEFAULT_PERM = FsPermission.getDirDefault(); 190 /** 191 * Default permission for file 192 */ 193 public static final FsPermission FILE_DEFAULT_PERM = FsPermission.getFileDefault(); 194 195 /** 196 * Priority of the FileContext shutdown hook. 197 */ 198 public static final int SHUTDOWN_HOOK_PRIORITY = 20; 199 200 /** 201 * List of files that should be deleted on JVM shutdown. 202 */ 203 static final Map<FileContext, Set<Path>> DELETE_ON_EXIT = 204 new IdentityHashMap<FileContext, Set<Path>>(); 205 206 /** JVM shutdown hook thread. */ 207 static final FileContextFinalizer FINALIZER = 208 new FileContextFinalizer(); 209 210 private static final PathFilter DEFAULT_FILTER = new PathFilter() { 211 public boolean accept(final Path file) { 212 return true; 213 } 214 }; 215 216 /** 217 * The FileContext is defined by. 218 * 1) defaultFS (slash) 219 * 2) wd 220 * 3) umask 221 */ 222 private final AbstractFileSystem defaultFS; //default FS for this FileContext. 223 private Path workingDir; // Fully qualified 224 private FsPermission umask; 225 private final Configuration conf; 226 private final UserGroupInformation ugi; 227 228 private FileContext(final AbstractFileSystem defFs, 229 final FsPermission theUmask, final Configuration aConf) { 230 defaultFS = defFs; 231 umask = FsPermission.getUMask(aConf); 232 conf = aConf; 233 try { 234 ugi = UserGroupInformation.getCurrentUser(); 235 } catch (IOException e) { 236 LOG.error("Exception in getCurrentUser: ",e); 237 throw new RuntimeException("Failed to get the current user " + 238 "while creating a FileContext", e); 239 } 240 /* 241 * Init the wd. 242 * WorkingDir is implemented at the FileContext layer 243 * NOT at the AbstractFileSystem layer. 244 * If the DefaultFS, such as localFilesystem has a notion of 245 * builtin WD, we use that as the initial WD. 246 * Otherwise the WD is initialized to the home directory. 247 */ 248 workingDir = defaultFS.getInitialWorkingDirectory(); 249 if (workingDir == null) { 250 workingDir = defaultFS.getHomeDirectory(); 251 } 252 util = new Util(); // for the inner class 253 } 254 255 /* 256 * Remove relative part - return "absolute": 257 * If input is relative path ("foo/bar") add wd: ie "/<workingDir>/foo/bar" 258 * A fully qualified uri ("hdfs://nn:p/foo/bar") or a slash-relative path 259 * ("/foo/bar") are returned unchanged. 260 * 261 * Applications that use FileContext should use #makeQualified() since 262 * they really want a fully qualified URI. 263 * Hence this method is not called makeAbsolute() and 264 * has been deliberately declared private. 265 */ 266 private Path fixRelativePart(Path p) { 267 if (p.isUriPathAbsolute()) { 268 return p; 269 } else { 270 return new Path(workingDir, p); 271 } 272 } 273 274 /** 275 * Delete all the paths that were marked as delete-on-exit. 276 */ 277 static void processDeleteOnExit() { 278 synchronized (DELETE_ON_EXIT) { 279 Set<Entry<FileContext, Set<Path>>> set = DELETE_ON_EXIT.entrySet(); 280 for (Entry<FileContext, Set<Path>> entry : set) { 281 FileContext fc = entry.getKey(); 282 Set<Path> paths = entry.getValue(); 283 for (Path path : paths) { 284 try { 285 fc.delete(path, true); 286 } catch (IOException e) { 287 LOG.warn("Ignoring failure to deleteOnExit for path " + path); 288 } 289 } 290 } 291 DELETE_ON_EXIT.clear(); 292 } 293 } 294 295 /** 296 * Pathnames with scheme and relative path are illegal. 297 * @param path to be checked 298 */ 299 private static void checkNotSchemeWithRelative(final Path path) { 300 if (path.toUri().isAbsolute() && !path.isUriPathAbsolute()) { 301 throw new HadoopIllegalArgumentException( 302 "Unsupported name: has scheme but relative path-part"); 303 } 304 } 305 306 /** 307 * Get the file system of supplied path. 308 * 309 * @param absOrFqPath - absolute or fully qualified path 310 * @return the file system of the path 311 * 312 * @throws UnsupportedFileSystemException If the file system for 313 * <code>absOrFqPath</code> is not supported. 314 * @throws IOExcepton If the file system for <code>absOrFqPath</code> could 315 * not be instantiated. 316 */ 317 private AbstractFileSystem getFSofPath(final Path absOrFqPath) 318 throws UnsupportedFileSystemException, IOException { 319 checkNotSchemeWithRelative(absOrFqPath); 320 if (!absOrFqPath.isAbsolute() && absOrFqPath.toUri().getScheme() == null) { 321 throw new HadoopIllegalArgumentException( 322 "FileContext Bug: path is relative"); 323 } 324 325 try { 326 // Is it the default FS for this FileContext? 327 defaultFS.checkPath(absOrFqPath); 328 return defaultFS; 329 } catch (Exception e) { // it is different FileSystem 330 return getAbstractFileSystem(ugi, absOrFqPath.toUri(), conf); 331 } 332 } 333 334 private static AbstractFileSystem getAbstractFileSystem( 335 UserGroupInformation user, final URI uri, final Configuration conf) 336 throws UnsupportedFileSystemException, IOException { 337 try { 338 return user.doAs(new PrivilegedExceptionAction<AbstractFileSystem>() { 339 public AbstractFileSystem run() throws UnsupportedFileSystemException { 340 return AbstractFileSystem.get(uri, conf); 341 } 342 }); 343 } catch (InterruptedException ex) { 344 LOG.error(ex); 345 throw new IOException("Failed to get the AbstractFileSystem for path: " 346 + uri, ex); 347 } 348 } 349 350 /** 351 * Protected Static Factory methods for getting a FileContexts 352 * that take a AbstractFileSystem as input. To be used for testing. 353 */ 354 355 /** 356 * Create a FileContext with specified FS as default using the specified 357 * config. 358 * 359 * @param defFS 360 * @param aConf 361 * @return new FileContext with specifed FS as default. 362 */ 363 public static FileContext getFileContext(final AbstractFileSystem defFS, 364 final Configuration aConf) { 365 return new FileContext(defFS, FsPermission.getUMask(aConf), aConf); 366 } 367 368 /** 369 * Create a FileContext for specified file system using the default config. 370 * 371 * @param defaultFS 372 * @return a FileContext with the specified AbstractFileSystem 373 * as the default FS. 374 */ 375 protected static FileContext getFileContext( 376 final AbstractFileSystem defaultFS) { 377 return getFileContext(defaultFS, new Configuration()); 378 } 379 380 /** 381 * Static Factory methods for getting a FileContext. 382 * Note new file contexts are created for each call. 383 * The only singleton is the local FS context using the default config. 384 * 385 * Methods that use the default config: the default config read from the 386 * $HADOOP_CONFIG/core.xml, 387 * Unspecified key-values for config are defaulted from core-defaults.xml 388 * in the release jar. 389 * 390 * The keys relevant to the FileContext layer are extracted at time of 391 * construction. Changes to the config after the call are ignore 392 * by the FileContext layer. 393 * The conf is passed to lower layers like AbstractFileSystem and HDFS which 394 * pick up their own config variables. 395 */ 396 397 /** 398 * Create a FileContext using the default config read from the 399 * $HADOOP_CONFIG/core.xml, Unspecified key-values for config are defaulted 400 * from core-defaults.xml in the release jar. 401 * 402 * @throws UnsupportedFileSystemException If the file system from the default 403 * configuration is not supported 404 */ 405 public static FileContext getFileContext() 406 throws UnsupportedFileSystemException { 407 return getFileContext(new Configuration()); 408 } 409 410 /** 411 * @return a FileContext for the local file system using the default config. 412 * @throws UnsupportedFileSystemException If the file system for 413 * {@link FsConstants#LOCAL_FS_URI} is not supported. 414 */ 415 public static FileContext getLocalFSFileContext() 416 throws UnsupportedFileSystemException { 417 return getFileContext(FsConstants.LOCAL_FS_URI); 418 } 419 420 /** 421 * Create a FileContext for specified URI using the default config. 422 * 423 * @param defaultFsUri 424 * @return a FileContext with the specified URI as the default FS. 425 * 426 * @throws UnsupportedFileSystemException If the file system for 427 * <code>defaultFsUri</code> is not supported 428 */ 429 public static FileContext getFileContext(final URI defaultFsUri) 430 throws UnsupportedFileSystemException { 431 return getFileContext(defaultFsUri, new Configuration()); 432 } 433 434 /** 435 * Create a FileContext for specified default URI using the specified config. 436 * 437 * @param defaultFsUri 438 * @param aConf 439 * @return new FileContext for specified uri 440 * @throws UnsupportedFileSystemException If the file system with specified is 441 * not supported 442 * @throws RuntimeException If the file system specified is supported but 443 * could not be instantiated, or if login fails. 444 */ 445 public static FileContext getFileContext(final URI defaultFsUri, 446 final Configuration aConf) throws UnsupportedFileSystemException { 447 UserGroupInformation currentUser = null; 448 AbstractFileSystem defaultAfs = null; 449 try { 450 currentUser = UserGroupInformation.getCurrentUser(); 451 defaultAfs = getAbstractFileSystem(currentUser, defaultFsUri, aConf); 452 } catch (UnsupportedFileSystemException ex) { 453 throw ex; 454 } catch (IOException ex) { 455 LOG.error(ex); 456 throw new RuntimeException(ex); 457 } 458 return getFileContext(defaultAfs, aConf); 459 } 460 461 /** 462 * Create a FileContext using the passed config. Generally it is better to use 463 * {@link #getFileContext(URI, Configuration)} instead of this one. 464 * 465 * 466 * @param aConf 467 * @return new FileContext 468 * @throws UnsupportedFileSystemException If file system in the config 469 * is not supported 470 */ 471 public static FileContext getFileContext(final Configuration aConf) 472 throws UnsupportedFileSystemException { 473 return getFileContext( 474 URI.create(aConf.get(FS_DEFAULT_NAME_KEY, FS_DEFAULT_NAME_DEFAULT)), 475 aConf); 476 } 477 478 /** 479 * @param aConf - from which the FileContext is configured 480 * @return a FileContext for the local file system using the specified config. 481 * 482 * @throws UnsupportedFileSystemException If default file system in the config 483 * is not supported 484 * 485 */ 486 public static FileContext getLocalFSFileContext(final Configuration aConf) 487 throws UnsupportedFileSystemException { 488 return getFileContext(FsConstants.LOCAL_FS_URI, aConf); 489 } 490 491 /* This method is needed for tests. */ 492 @InterfaceAudience.Private 493 @InterfaceStability.Unstable /* return type will change to AFS once 494 HADOOP-6223 is completed */ 495 public AbstractFileSystem getDefaultFileSystem() { 496 return defaultFS; 497 } 498 499 /** 500 * Set the working directory for wd-relative names (such a "foo/bar"). Working 501 * directory feature is provided by simply prefixing relative names with the 502 * working dir. Note this is different from Unix where the wd is actually set 503 * to the inode. Hence setWorkingDir does not follow symlinks etc. This works 504 * better in a distributed environment that has multiple independent roots. 505 * {@link #getWorkingDirectory()} should return what setWorkingDir() set. 506 * 507 * @param newWDir new working directory 508 * @throws IOException 509 * <br> 510 * NewWdir can be one of: 511 * <ul> 512 * <li>relative path: "foo/bar";</li> 513 * <li>absolute without scheme: "/foo/bar"</li> 514 * <li>fully qualified with scheme: "xx://auth/foo/bar"</li> 515 * </ul> 516 * <br> 517 * Illegal WDs: 518 * <ul> 519 * <li>relative with scheme: "xx:foo/bar"</li> 520 * <li>non existent directory</li> 521 * </ul> 522 */ 523 public void setWorkingDirectory(final Path newWDir) throws IOException { 524 checkNotSchemeWithRelative(newWDir); 525 /* wd is stored as a fully qualified path. We check if the given 526 * path is not relative first since resolve requires and returns 527 * an absolute path. 528 */ 529 final Path newWorkingDir = new Path(workingDir, newWDir); 530 FileStatus status = getFileStatus(newWorkingDir); 531 if (status.isFile()) { 532 throw new FileNotFoundException("Cannot setWD to a file"); 533 } 534 workingDir = newWorkingDir; 535 } 536 537 /** 538 * Gets the working directory for wd-relative names (such a "foo/bar"). 539 */ 540 public Path getWorkingDirectory() { 541 return workingDir; 542 } 543 544 /** 545 * Gets the ugi in the file-context 546 * @return UserGroupInformation 547 */ 548 public UserGroupInformation getUgi() { 549 return ugi; 550 } 551 552 /** 553 * Return the current user's home directory in this file system. 554 * The default implementation returns "/user/$USER/". 555 * @return the home directory 556 */ 557 public Path getHomeDirectory() { 558 return defaultFS.getHomeDirectory(); 559 } 560 561 /** 562 * 563 * @return the umask of this FileContext 564 */ 565 public FsPermission getUMask() { 566 return umask; 567 } 568 569 /** 570 * Set umask to the supplied parameter. 571 * @param newUmask the new umask 572 */ 573 public void setUMask(final FsPermission newUmask) { 574 umask = newUmask; 575 } 576 577 578 /** 579 * Resolve the path following any symlinks or mount points 580 * @param f to be resolved 581 * @return fully qualified resolved path 582 * 583 * @throws FileNotFoundException If <code>f</code> does not exist 584 * @throws AccessControlException if access denied 585 * @throws IOException If an IO Error occurred 586 * 587 * Exceptions applicable to file systems accessed over RPC: 588 * @throws RpcClientException If an exception occurred in the RPC client 589 * @throws RpcServerException If an exception occurred in the RPC server 590 * @throws UnexpectedServerException If server implementation throws 591 * undeclared exception to RPC server 592 * 593 * RuntimeExceptions: 594 * @throws InvalidPathException If path <code>f</code> is not valid 595 */ 596 public Path resolvePath(final Path f) throws FileNotFoundException, 597 UnresolvedLinkException, AccessControlException, IOException { 598 return resolve(f); 599 } 600 601 /** 602 * Make the path fully qualified if it is isn't. 603 * A Fully-qualified path has scheme and authority specified and an absolute 604 * path. 605 * Use the default file system and working dir in this FileContext to qualify. 606 * @param path 607 * @return qualified path 608 */ 609 public Path makeQualified(final Path path) { 610 return path.makeQualified(defaultFS.getUri(), getWorkingDirectory()); 611 } 612 613 /** 614 * Create or overwrite file on indicated path and returns an output stream for 615 * writing into the file. 616 * 617 * @param f the file name to open 618 * @param createFlag gives the semantics of create; see {@link CreateFlag} 619 * @param opts file creation options; see {@link Options.CreateOpts}. 620 * <ul> 621 * <li>Progress - to report progress on the operation - default null 622 * <li>Permission - umask is applied against permisssion: default is 623 * FsPermissions:getDefault() 624 * 625 * <li>CreateParent - create missing parent path; default is to not 626 * to create parents 627 * <li>The defaults for the following are SS defaults of the file 628 * server implementing the target path. Not all parameters make sense 629 * for all kinds of file system - eg. localFS ignores Blocksize, 630 * replication, checksum 631 * <ul> 632 * <li>BufferSize - buffersize used in FSDataOutputStream 633 * <li>Blocksize - block size for file blocks 634 * <li>ReplicationFactor - replication for blocks 635 * <li>ChecksumParam - Checksum parameters. server default is used 636 * if not specified. 637 * </ul> 638 * </ul> 639 * 640 * @return {@link FSDataOutputStream} for created file 641 * 642 * @throws AccessControlException If access is denied 643 * @throws FileAlreadyExistsException If file <code>f</code> already exists 644 * @throws FileNotFoundException If parent of <code>f</code> does not exist 645 * and <code>createParent</code> is false 646 * @throws ParentNotDirectoryException If parent of <code>f</code> is not a 647 * directory. 648 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 649 * not supported 650 * @throws IOException If an I/O error occurred 651 * 652 * Exceptions applicable to file systems accessed over RPC: 653 * @throws RpcClientException If an exception occurred in the RPC client 654 * @throws RpcServerException If an exception occurred in the RPC server 655 * @throws UnexpectedServerException If server implementation throws 656 * undeclared exception to RPC server 657 * 658 * RuntimeExceptions: 659 * @throws InvalidPathException If path <code>f</code> is not valid 660 */ 661 public FSDataOutputStream create(final Path f, 662 final EnumSet<CreateFlag> createFlag, Options.CreateOpts... opts) 663 throws AccessControlException, FileAlreadyExistsException, 664 FileNotFoundException, ParentNotDirectoryException, 665 UnsupportedFileSystemException, IOException { 666 Path absF = fixRelativePart(f); 667 668 // If one of the options is a permission, extract it & apply umask 669 // If not, add a default Perms and apply umask; 670 // AbstractFileSystem#create 671 672 CreateOpts.Perms permOpt = 673 (CreateOpts.Perms) CreateOpts.getOpt(CreateOpts.Perms.class, opts); 674 FsPermission permission = (permOpt != null) ? permOpt.getValue() : 675 FILE_DEFAULT_PERM; 676 permission = permission.applyUMask(umask); 677 678 final CreateOpts[] updatedOpts = 679 CreateOpts.setOpt(CreateOpts.perms(permission), opts); 680 return new FSLinkResolver<FSDataOutputStream>() { 681 public FSDataOutputStream next(final AbstractFileSystem fs, final Path p) 682 throws IOException { 683 return fs.create(p, createFlag, updatedOpts); 684 } 685 }.resolve(this, absF); 686 } 687 688 /** 689 * Make(create) a directory and all the non-existent parents. 690 * 691 * @param dir - the dir to make 692 * @param permission - permissions is set permission&~umask 693 * @param createParent - if true then missing parent dirs are created if false 694 * then parent must exist 695 * 696 * @throws AccessControlException If access is denied 697 * @throws FileAlreadyExistsException If directory <code>dir</code> already 698 * exists 699 * @throws FileNotFoundException If parent of <code>dir</code> does not exist 700 * and <code>createParent</code> is false 701 * @throws ParentNotDirectoryException If parent of <code>dir</code> is not a 702 * directory 703 * @throws UnsupportedFileSystemException If file system for <code>dir</code> 704 * is not supported 705 * @throws IOException If an I/O error occurred 706 * 707 * Exceptions applicable to file systems accessed over RPC: 708 * @throws RpcClientException If an exception occurred in the RPC client 709 * @throws UnexpectedServerException If server implementation throws 710 * undeclared exception to RPC server 711 * 712 * RuntimeExceptions: 713 * @throws InvalidPathException If path <code>dir</code> is not valid 714 */ 715 public void mkdir(final Path dir, final FsPermission permission, 716 final boolean createParent) throws AccessControlException, 717 FileAlreadyExistsException, FileNotFoundException, 718 ParentNotDirectoryException, UnsupportedFileSystemException, 719 IOException { 720 final Path absDir = fixRelativePart(dir); 721 final FsPermission absFerms = (permission == null ? 722 FsPermission.getDirDefault() : permission).applyUMask(umask); 723 new FSLinkResolver<Void>() { 724 public Void next(final AbstractFileSystem fs, final Path p) 725 throws IOException, UnresolvedLinkException { 726 fs.mkdir(p, absFerms, createParent); 727 return null; 728 } 729 }.resolve(this, absDir); 730 } 731 732 /** 733 * Delete a file. 734 * @param f the path to delete. 735 * @param recursive if path is a directory and set to 736 * true, the directory is deleted else throws an exception. In 737 * case of a file the recursive can be set to either true or false. 738 * 739 * @throws AccessControlException If access is denied 740 * @throws FileNotFoundException If <code>f</code> does not exist 741 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 742 * not supported 743 * @throws IOException If an I/O error occurred 744 * 745 * Exceptions applicable to file systems accessed over RPC: 746 * @throws RpcClientException If an exception occurred in the RPC client 747 * @throws RpcServerException If an exception occurred in the RPC server 748 * @throws UnexpectedServerException If server implementation throws 749 * undeclared exception to RPC server 750 * 751 * RuntimeExceptions: 752 * @throws InvalidPathException If path <code>f</code> is invalid 753 */ 754 public boolean delete(final Path f, final boolean recursive) 755 throws AccessControlException, FileNotFoundException, 756 UnsupportedFileSystemException, IOException { 757 Path absF = fixRelativePart(f); 758 return new FSLinkResolver<Boolean>() { 759 public Boolean next(final AbstractFileSystem fs, final Path p) 760 throws IOException, UnresolvedLinkException { 761 return Boolean.valueOf(fs.delete(p, recursive)); 762 } 763 }.resolve(this, absF); 764 } 765 766 /** 767 * Opens an FSDataInputStream at the indicated Path using 768 * default buffersize. 769 * @param f the file name to open 770 * 771 * @throws AccessControlException If access is denied 772 * @throws FileNotFoundException If file <code>f</code> does not exist 773 * @throws UnsupportedFileSystemException If file system for <code>f</code> 774 * is not supported 775 * @throws IOException If an I/O error occurred 776 * 777 * Exceptions applicable to file systems accessed over RPC: 778 * @throws RpcClientException If an exception occurred in the RPC client 779 * @throws RpcServerException If an exception occurred in the RPC server 780 * @throws UnexpectedServerException If server implementation throws 781 * undeclared exception to RPC server 782 */ 783 public FSDataInputStream open(final Path f) throws AccessControlException, 784 FileNotFoundException, UnsupportedFileSystemException, IOException { 785 final Path absF = fixRelativePart(f); 786 return new FSLinkResolver<FSDataInputStream>() { 787 public FSDataInputStream next(final AbstractFileSystem fs, final Path p) 788 throws IOException, UnresolvedLinkException { 789 return fs.open(p); 790 } 791 }.resolve(this, absF); 792 } 793 794 /** 795 * Opens an FSDataInputStream at the indicated Path. 796 * 797 * @param f the file name to open 798 * @param bufferSize the size of the buffer to be used. 799 * 800 * @throws AccessControlException If access is denied 801 * @throws FileNotFoundException If file <code>f</code> does not exist 802 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 803 * not supported 804 * @throws IOException If an I/O error occurred 805 * 806 * Exceptions applicable to file systems accessed over RPC: 807 * @throws RpcClientException If an exception occurred in the RPC client 808 * @throws RpcServerException If an exception occurred in the RPC server 809 * @throws UnexpectedServerException If server implementation throws 810 * undeclared exception to RPC server 811 */ 812 public FSDataInputStream open(final Path f, final int bufferSize) 813 throws AccessControlException, FileNotFoundException, 814 UnsupportedFileSystemException, IOException { 815 final Path absF = fixRelativePart(f); 816 return new FSLinkResolver<FSDataInputStream>() { 817 public FSDataInputStream next(final AbstractFileSystem fs, final Path p) 818 throws IOException, UnresolvedLinkException { 819 return fs.open(p, bufferSize); 820 } 821 }.resolve(this, absF); 822 } 823 824 /** 825 * Set replication for an existing file. 826 * 827 * @param f file name 828 * @param replication new replication 829 * 830 * @return true if successful 831 * 832 * @throws AccessControlException If access is denied 833 * @throws FileNotFoundException If file <code>f</code> does not exist 834 * @throws IOException If an I/O error occurred 835 * 836 * Exceptions applicable to file systems accessed over RPC: 837 * @throws RpcClientException If an exception occurred in the RPC client 838 * @throws RpcServerException If an exception occurred in the RPC server 839 * @throws UnexpectedServerException If server implementation throws 840 * undeclared exception to RPC server 841 */ 842 public boolean setReplication(final Path f, final short replication) 843 throws AccessControlException, FileNotFoundException, 844 IOException { 845 final Path absF = fixRelativePart(f); 846 return new FSLinkResolver<Boolean>() { 847 public Boolean next(final AbstractFileSystem fs, final Path p) 848 throws IOException, UnresolvedLinkException { 849 return Boolean.valueOf(fs.setReplication(p, replication)); 850 } 851 }.resolve(this, absF); 852 } 853 854 /** 855 * Renames Path src to Path dst 856 * <ul> 857 * <li 858 * <li>Fails if src is a file and dst is a directory. 859 * <li>Fails if src is a directory and dst is a file. 860 * <li>Fails if the parent of dst does not exist or is a file. 861 * </ul> 862 * <p> 863 * If OVERWRITE option is not passed as an argument, rename fails if the dst 864 * already exists. 865 * <p> 866 * If OVERWRITE option is passed as an argument, rename overwrites the dst if 867 * it is a file or an empty directory. Rename fails if dst is a non-empty 868 * directory. 869 * <p> 870 * Note that atomicity of rename is dependent on the file system 871 * implementation. Please refer to the file system documentation for details 872 * <p> 873 * 874 * @param src path to be renamed 875 * @param dst new path after rename 876 * 877 * @throws AccessControlException If access is denied 878 * @throws FileAlreadyExistsException If <code>dst</code> already exists and 879 * <code>options</options> has {@link Options.Rename#OVERWRITE} 880 * option false. 881 * @throws FileNotFoundException If <code>src</code> does not exist 882 * @throws ParentNotDirectoryException If parent of <code>dst</code> is not a 883 * directory 884 * @throws UnsupportedFileSystemException If file system for <code>src</code> 885 * and <code>dst</code> is not supported 886 * @throws IOException If an I/O error occurred 887 * 888 * Exceptions applicable to file systems accessed over RPC: 889 * @throws RpcClientException If an exception occurred in the RPC client 890 * @throws RpcServerException If an exception occurred in the RPC server 891 * @throws UnexpectedServerException If server implementation throws 892 * undeclared exception to RPC server 893 */ 894 public void rename(final Path src, final Path dst, 895 final Options.Rename... options) throws AccessControlException, 896 FileAlreadyExistsException, FileNotFoundException, 897 ParentNotDirectoryException, UnsupportedFileSystemException, 898 IOException { 899 final Path absSrc = fixRelativePart(src); 900 final Path absDst = fixRelativePart(dst); 901 AbstractFileSystem srcFS = getFSofPath(absSrc); 902 AbstractFileSystem dstFS = getFSofPath(absDst); 903 if(!srcFS.getUri().equals(dstFS.getUri())) { 904 throw new IOException("Renames across AbstractFileSystems not supported"); 905 } 906 try { 907 srcFS.rename(absSrc, absDst, options); 908 } catch (UnresolvedLinkException e) { 909 /* We do not know whether the source or the destination path 910 * was unresolved. Resolve the source path up until the final 911 * path component, then fully resolve the destination. 912 */ 913 final Path source = resolveIntermediate(absSrc); 914 new FSLinkResolver<Void>() { 915 public Void next(final AbstractFileSystem fs, final Path p) 916 throws IOException, UnresolvedLinkException { 917 fs.rename(source, p, options); 918 return null; 919 } 920 }.resolve(this, absDst); 921 } 922 } 923 924 /** 925 * Set permission of a path. 926 * @param f 927 * @param permission - the new absolute permission (umask is not applied) 928 * 929 * @throws AccessControlException If access is denied 930 * @throws FileNotFoundException If <code>f</code> does not exist 931 * @throws UnsupportedFileSystemException If file system for <code>f</code> 932 * is not supported 933 * @throws IOException If an I/O error occurred 934 * 935 * Exceptions applicable to file systems accessed over RPC: 936 * @throws RpcClientException If an exception occurred in the RPC client 937 * @throws RpcServerException If an exception occurred in the RPC server 938 * @throws UnexpectedServerException If server implementation throws 939 * undeclared exception to RPC server 940 */ 941 public void setPermission(final Path f, final FsPermission permission) 942 throws AccessControlException, FileNotFoundException, 943 UnsupportedFileSystemException, IOException { 944 final Path absF = fixRelativePart(f); 945 new FSLinkResolver<Void>() { 946 public Void next(final AbstractFileSystem fs, final Path p) 947 throws IOException, UnresolvedLinkException { 948 fs.setPermission(p, permission); 949 return null; 950 } 951 }.resolve(this, absF); 952 } 953 954 /** 955 * Set owner of a path (i.e. a file or a directory). The parameters username 956 * and groupname cannot both be null. 957 * 958 * @param f The path 959 * @param username If it is null, the original username remains unchanged. 960 * @param groupname If it is null, the original groupname remains unchanged. 961 * 962 * @throws AccessControlException If access is denied 963 * @throws FileNotFoundException If <code>f</code> does not exist 964 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 965 * not supported 966 * @throws IOException If an I/O error occurred 967 * 968 * Exceptions applicable to file systems accessed over RPC: 969 * @throws RpcClientException If an exception occurred in the RPC client 970 * @throws RpcServerException If an exception occurred in the RPC server 971 * @throws UnexpectedServerException If server implementation throws 972 * undeclared exception to RPC server 973 * 974 * RuntimeExceptions: 975 * @throws HadoopIllegalArgumentException If <code>username</code> or 976 * <code>groupname</code> is invalid. 977 */ 978 public void setOwner(final Path f, final String username, 979 final String groupname) throws AccessControlException, 980 UnsupportedFileSystemException, FileNotFoundException, 981 IOException { 982 if ((username == null) && (groupname == null)) { 983 throw new HadoopIllegalArgumentException( 984 "username and groupname cannot both be null"); 985 } 986 final Path absF = fixRelativePart(f); 987 new FSLinkResolver<Void>() { 988 public Void next(final AbstractFileSystem fs, final Path p) 989 throws IOException, UnresolvedLinkException { 990 fs.setOwner(p, username, groupname); 991 return null; 992 } 993 }.resolve(this, absF); 994 } 995 996 /** 997 * Set access time of a file. 998 * @param f The path 999 * @param mtime Set the modification time of this file. 1000 * The number of milliseconds since epoch (Jan 1, 1970). 1001 * A value of -1 means that this call should not set modification time. 1002 * @param atime Set the access time of this file. 1003 * The number of milliseconds since Jan 1, 1970. 1004 * A value of -1 means that this call should not set access time. 1005 * 1006 * @throws AccessControlException If access is denied 1007 * @throws FileNotFoundException If <code>f</code> does not exist 1008 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1009 * not supported 1010 * @throws IOException If an I/O error occurred 1011 * 1012 * Exceptions applicable to file systems accessed over RPC: 1013 * @throws RpcClientException If an exception occurred in the RPC client 1014 * @throws RpcServerException If an exception occurred in the RPC server 1015 * @throws UnexpectedServerException If server implementation throws 1016 * undeclared exception to RPC server 1017 */ 1018 public void setTimes(final Path f, final long mtime, final long atime) 1019 throws AccessControlException, FileNotFoundException, 1020 UnsupportedFileSystemException, IOException { 1021 final Path absF = fixRelativePart(f); 1022 new FSLinkResolver<Void>() { 1023 public Void next(final AbstractFileSystem fs, final Path p) 1024 throws IOException, UnresolvedLinkException { 1025 fs.setTimes(p, mtime, atime); 1026 return null; 1027 } 1028 }.resolve(this, absF); 1029 } 1030 1031 /** 1032 * Get the checksum of a file. 1033 * 1034 * @param f file path 1035 * 1036 * @return The file checksum. The default return value is null, 1037 * which indicates that no checksum algorithm is implemented 1038 * in the corresponding FileSystem. 1039 * 1040 * @throws AccessControlException If access is denied 1041 * @throws FileNotFoundException If <code>f</code> does not exist 1042 * @throws IOException If an I/O error occurred 1043 * 1044 * Exceptions applicable to file systems accessed over RPC: 1045 * @throws RpcClientException If an exception occurred in the RPC client 1046 * @throws RpcServerException If an exception occurred in the RPC server 1047 * @throws UnexpectedServerException If server implementation throws 1048 * undeclared exception to RPC server 1049 */ 1050 public FileChecksum getFileChecksum(final Path f) 1051 throws AccessControlException, FileNotFoundException, 1052 IOException { 1053 final Path absF = fixRelativePart(f); 1054 return new FSLinkResolver<FileChecksum>() { 1055 public FileChecksum next(final AbstractFileSystem fs, final Path p) 1056 throws IOException, UnresolvedLinkException { 1057 return fs.getFileChecksum(p); 1058 } 1059 }.resolve(this, absF); 1060 } 1061 1062 /** 1063 * Set the verify checksum flag for the file system denoted by the path. 1064 * This is only applicable if the 1065 * corresponding FileSystem supports checksum. By default doesn't do anything. 1066 * @param verifyChecksum 1067 * @param f set the verifyChecksum for the Filesystem containing this path 1068 * 1069 * @throws AccessControlException If access is denied 1070 * @throws FileNotFoundException If <code>f</code> does not exist 1071 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1072 * not supported 1073 * @throws IOException If an I/O error occurred 1074 * 1075 * Exceptions applicable to file systems accessed over RPC: 1076 * @throws RpcClientException If an exception occurred in the RPC client 1077 * @throws RpcServerException If an exception occurred in the RPC server 1078 * @throws UnexpectedServerException If server implementation throws 1079 * undeclared exception to RPC server 1080 */ 1081 public void setVerifyChecksum(final boolean verifyChecksum, final Path f) 1082 throws AccessControlException, FileNotFoundException, 1083 UnsupportedFileSystemException, IOException { 1084 final Path absF = resolve(fixRelativePart(f)); 1085 getFSofPath(absF).setVerifyChecksum(verifyChecksum); 1086 } 1087 1088 /** 1089 * Return a file status object that represents the path. 1090 * @param f The path we want information from 1091 * 1092 * @return a FileStatus object 1093 * 1094 * @throws AccessControlException If access is denied 1095 * @throws FileNotFoundException If <code>f</code> does not exist 1096 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1097 * not supported 1098 * @throws IOException If an I/O error occurred 1099 * 1100 * Exceptions applicable to file systems accessed over RPC: 1101 * @throws RpcClientException If an exception occurred in the RPC client 1102 * @throws RpcServerException If an exception occurred in the RPC server 1103 * @throws UnexpectedServerException If server implementation throws 1104 * undeclared exception to RPC server 1105 */ 1106 public FileStatus getFileStatus(final Path f) throws AccessControlException, 1107 FileNotFoundException, UnsupportedFileSystemException, IOException { 1108 final Path absF = fixRelativePart(f); 1109 return new FSLinkResolver<FileStatus>() { 1110 public FileStatus next(final AbstractFileSystem fs, final Path p) 1111 throws IOException, UnresolvedLinkException { 1112 return fs.getFileStatus(p); 1113 } 1114 }.resolve(this, absF); 1115 } 1116 1117 /** 1118 * Return a fully qualified version of the given symlink target if it 1119 * has no scheme and authority. Partially and fully qualified paths 1120 * are returned unmodified. 1121 * @param pathFS The AbstractFileSystem of the path 1122 * @param pathWithLink Path that contains the symlink 1123 * @param target The symlink's absolute target 1124 * @return Fully qualified version of the target. 1125 */ 1126 private Path qualifySymlinkTarget(final AbstractFileSystem pathFS, 1127 Path pathWithLink, Path target) { 1128 // NB: makeQualified uses the target's scheme and authority, if 1129 // specified, and the scheme and authority of pathFS, if not. 1130 final String scheme = target.toUri().getScheme(); 1131 final String auth = target.toUri().getAuthority(); 1132 return (scheme == null && auth == null) 1133 ? target.makeQualified(pathFS.getUri(), pathWithLink.getParent()) 1134 : target; 1135 } 1136 1137 /** 1138 * Return a file status object that represents the path. If the path 1139 * refers to a symlink then the FileStatus of the symlink is returned. 1140 * The behavior is equivalent to #getFileStatus() if the underlying 1141 * file system does not support symbolic links. 1142 * @param f The path we want information from. 1143 * @return A FileStatus object 1144 * 1145 * @throws AccessControlException If access is denied 1146 * @throws FileNotFoundException If <code>f</code> does not exist 1147 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1148 * not supported 1149 * @throws IOException If an I/O error occurred 1150 */ 1151 public FileStatus getFileLinkStatus(final Path f) 1152 throws AccessControlException, FileNotFoundException, 1153 UnsupportedFileSystemException, IOException { 1154 final Path absF = fixRelativePart(f); 1155 return new FSLinkResolver<FileStatus>() { 1156 public FileStatus next(final AbstractFileSystem fs, final Path p) 1157 throws IOException, UnresolvedLinkException { 1158 FileStatus fi = fs.getFileLinkStatus(p); 1159 if (fi.isSymlink()) { 1160 fi.setSymlink(qualifySymlinkTarget(fs, p, fi.getSymlink())); 1161 } 1162 return fi; 1163 } 1164 }.resolve(this, absF); 1165 } 1166 1167 /** 1168 * Returns the target of the given symbolic link as it was specified 1169 * when the link was created. Links in the path leading up to the 1170 * final path component are resolved transparently. 1171 * 1172 * @param f the path to return the target of 1173 * @return The un-interpreted target of the symbolic link. 1174 * 1175 * @throws AccessControlException If access is denied 1176 * @throws FileNotFoundException If path <code>f</code> does not exist 1177 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1178 * not supported 1179 * @throws IOException If the given path does not refer to a symlink 1180 * or an I/O error occurred 1181 */ 1182 public Path getLinkTarget(final Path f) throws AccessControlException, 1183 FileNotFoundException, UnsupportedFileSystemException, IOException { 1184 final Path absF = fixRelativePart(f); 1185 return new FSLinkResolver<Path>() { 1186 public Path next(final AbstractFileSystem fs, final Path p) 1187 throws IOException, UnresolvedLinkException { 1188 FileStatus fi = fs.getFileLinkStatus(p); 1189 return fi.getSymlink(); 1190 } 1191 }.resolve(this, absF); 1192 } 1193 1194 /** 1195 * Return blockLocation of the given file for the given offset and len. 1196 * For a nonexistent file or regions, null will be returned. 1197 * 1198 * This call is most helpful with DFS, where it returns 1199 * hostnames of machines that contain the given file. 1200 * 1201 * @param f - get blocklocations of this file 1202 * @param start position (byte offset) 1203 * @param len (in bytes) 1204 * 1205 * @return block locations for given file at specified offset of len 1206 * 1207 * @throws AccessControlException If access is denied 1208 * @throws FileNotFoundException If <code>f</code> does not exist 1209 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1210 * not supported 1211 * @throws IOException If an I/O error occurred 1212 * 1213 * Exceptions applicable to file systems accessed over RPC: 1214 * @throws RpcClientException If an exception occurred in the RPC client 1215 * @throws RpcServerException If an exception occurred in the RPC server 1216 * @throws UnexpectedServerException If server implementation throws 1217 * undeclared exception to RPC server 1218 * 1219 * RuntimeExceptions: 1220 * @throws InvalidPathException If path <code>f</code> is invalid 1221 */ 1222 @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) 1223 @InterfaceStability.Evolving 1224 public BlockLocation[] getFileBlockLocations(final Path f, final long start, 1225 final long len) throws AccessControlException, FileNotFoundException, 1226 UnsupportedFileSystemException, IOException { 1227 final Path absF = fixRelativePart(f); 1228 return new FSLinkResolver<BlockLocation[]>() { 1229 public BlockLocation[] next(final AbstractFileSystem fs, final Path p) 1230 throws IOException, UnresolvedLinkException { 1231 return fs.getFileBlockLocations(p, start, len); 1232 } 1233 }.resolve(this, absF); 1234 } 1235 1236 /** 1237 * Returns a status object describing the use and capacity of the 1238 * file system denoted by the Parh argument p. 1239 * If the file system has multiple partitions, the 1240 * use and capacity of the partition pointed to by the specified 1241 * path is reflected. 1242 * 1243 * @param f Path for which status should be obtained. null means the 1244 * root partition of the default file system. 1245 * 1246 * @return a FsStatus object 1247 * 1248 * @throws AccessControlException If access is denied 1249 * @throws FileNotFoundException If <code>f</code> does not exist 1250 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1251 * not supported 1252 * @throws IOException If an I/O error occurred 1253 * 1254 * Exceptions applicable to file systems accessed over RPC: 1255 * @throws RpcClientException If an exception occurred in the RPC client 1256 * @throws RpcServerException If an exception occurred in the RPC server 1257 * @throws UnexpectedServerException If server implementation throws 1258 * undeclared exception to RPC server 1259 */ 1260 public FsStatus getFsStatus(final Path f) throws AccessControlException, 1261 FileNotFoundException, UnsupportedFileSystemException, IOException { 1262 if (f == null) { 1263 return defaultFS.getFsStatus(); 1264 } 1265 final Path absF = fixRelativePart(f); 1266 return new FSLinkResolver<FsStatus>() { 1267 public FsStatus next(final AbstractFileSystem fs, final Path p) 1268 throws IOException, UnresolvedLinkException { 1269 return fs.getFsStatus(p); 1270 } 1271 }.resolve(this, absF); 1272 } 1273 1274 /** 1275 * Creates a symbolic link to an existing file. An exception is thrown if 1276 * the symlink exits, the user does not have permission to create symlink, 1277 * or the underlying file system does not support symlinks. 1278 * 1279 * Symlink permissions are ignored, access to a symlink is determined by 1280 * the permissions of the symlink target. 1281 * 1282 * Symlinks in paths leading up to the final path component are resolved 1283 * transparently. If the final path component refers to a symlink some 1284 * functions operate on the symlink itself, these are: 1285 * - delete(f) and deleteOnExit(f) - Deletes the symlink. 1286 * - rename(src, dst) - If src refers to a symlink, the symlink is 1287 * renamed. If dst refers to a symlink, the symlink is over-written. 1288 * - getLinkTarget(f) - Returns the target of the symlink. 1289 * - getFileLinkStatus(f) - Returns a FileStatus object describing 1290 * the symlink. 1291 * Some functions, create() and mkdir(), expect the final path component 1292 * does not exist. If they are given a path that refers to a symlink that 1293 * does exist they behave as if the path referred to an existing file or 1294 * directory. All other functions fully resolve, ie follow, the symlink. 1295 * These are: open, setReplication, setOwner, setTimes, setWorkingDirectory, 1296 * setPermission, getFileChecksum, setVerifyChecksum, getFileBlockLocations, 1297 * getFsStatus, getFileStatus, exists, and listStatus. 1298 * 1299 * Symlink targets are stored as given to createSymlink, assuming the 1300 * underlying file system is capable of storing a fully qualified URI. 1301 * Dangling symlinks are permitted. FileContext supports four types of 1302 * symlink targets, and resolves them as follows 1303 * <pre> 1304 * Given a path referring to a symlink of form: 1305 * 1306 * <---X---> 1307 * fs://host/A/B/link 1308 * <-----Y-----> 1309 * 1310 * In this path X is the scheme and authority that identify the file system, 1311 * and Y is the path leading up to the final path component "link". If Y is 1312 * a symlink itself then let Y' be the target of Y and X' be the scheme and 1313 * authority of Y'. Symlink targets may: 1314 * 1315 * 1. Fully qualified URIs 1316 * 1317 * fs://hostX/A/B/file Resolved according to the target file system. 1318 * 1319 * 2. Partially qualified URIs (eg scheme but no host) 1320 * 1321 * fs:///A/B/file Resolved according to the target file sytem. Eg resolving 1322 * a symlink to hdfs:///A results in an exception because 1323 * HDFS URIs must be fully qualified, while a symlink to 1324 * file:///A will not since Hadoop's local file systems 1325 * require partially qualified URIs. 1326 * 1327 * 3. Relative paths 1328 * 1329 * path Resolves to [Y'][path]. Eg if Y resolves to hdfs://host/A and path 1330 * is "../B/file" then [Y'][path] is hdfs://host/B/file 1331 * 1332 * 4. Absolute paths 1333 * 1334 * path Resolves to [X'][path]. Eg if Y resolves hdfs://host/A/B and path 1335 * is "/file" then [X][path] is hdfs://host/file 1336 * </pre> 1337 * 1338 * @param target the target of the symbolic link 1339 * @param link the path to be created that points to target 1340 * @param createParent if true then missing parent dirs are created if 1341 * false then parent must exist 1342 * 1343 * 1344 * @throws AccessControlException If access is denied 1345 * @throws FileAlreadyExistsException If file <code>linkcode> already exists 1346 * @throws FileNotFoundException If <code>target</code> does not exist 1347 * @throws ParentNotDirectoryException If parent of <code>link</code> is not a 1348 * directory. 1349 * @throws UnsupportedFileSystemException If file system for 1350 * <code>target</code> or <code>link</code> is not supported 1351 * @throws IOException If an I/O error occurred 1352 */ 1353 public void createSymlink(final Path target, final Path link, 1354 final boolean createParent) throws AccessControlException, 1355 FileAlreadyExistsException, FileNotFoundException, 1356 ParentNotDirectoryException, UnsupportedFileSystemException, 1357 IOException { 1358 final Path nonRelLink = fixRelativePart(link); 1359 new FSLinkResolver<Void>() { 1360 public Void next(final AbstractFileSystem fs, final Path p) 1361 throws IOException, UnresolvedLinkException { 1362 fs.createSymlink(target, p, createParent); 1363 return null; 1364 } 1365 }.resolve(this, nonRelLink); 1366 } 1367 1368 /** 1369 * List the statuses of the files/directories in the given path if the path is 1370 * a directory. 1371 * 1372 * @param f is the path 1373 * 1374 * @return an iterator that traverses statuses of the files/directories 1375 * in the given path 1376 * 1377 * @throws AccessControlException If access is denied 1378 * @throws FileNotFoundException If <code>f</code> does not exist 1379 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1380 * not supported 1381 * @throws IOException If an I/O error occurred 1382 * 1383 * Exceptions applicable to file systems accessed over RPC: 1384 * @throws RpcClientException If an exception occurred in the RPC client 1385 * @throws RpcServerException If an exception occurred in the RPC server 1386 * @throws UnexpectedServerException If server implementation throws 1387 * undeclared exception to RPC server 1388 */ 1389 public RemoteIterator<FileStatus> listStatus(final Path f) throws 1390 AccessControlException, FileNotFoundException, 1391 UnsupportedFileSystemException, IOException { 1392 final Path absF = fixRelativePart(f); 1393 return new FSLinkResolver<RemoteIterator<FileStatus>>() { 1394 public RemoteIterator<FileStatus> next( 1395 final AbstractFileSystem fs, final Path p) 1396 throws IOException, UnresolvedLinkException { 1397 return fs.listStatusIterator(p); 1398 } 1399 }.resolve(this, absF); 1400 } 1401 1402 /** 1403 * @return an iterator over the corrupt files under the given path 1404 * (may contain duplicates if a file has more than one corrupt block) 1405 * @throws IOException 1406 */ 1407 public RemoteIterator<Path> listCorruptFileBlocks(Path path) 1408 throws IOException { 1409 final Path absF = fixRelativePart(path); 1410 return new FSLinkResolver<RemoteIterator<Path>>() { 1411 @Override 1412 public RemoteIterator<Path> next(final AbstractFileSystem fs, 1413 final Path p) 1414 throws IOException, UnresolvedLinkException { 1415 return fs.listCorruptFileBlocks(p); 1416 } 1417 }.resolve(this, absF); 1418 } 1419 1420 /** 1421 * List the statuses of the files/directories in the given path if the path is 1422 * a directory. 1423 * Return the file's status and block locations If the path is a file. 1424 * 1425 * If a returned status is a file, it contains the file's block locations. 1426 * 1427 * @param f is the path 1428 * 1429 * @return an iterator that traverses statuses of the files/directories 1430 * in the given path 1431 * If any IO exception (for example the input directory gets deleted while 1432 * listing is being executed), next() or hasNext() of the returned iterator 1433 * may throw a RuntimeException with the io exception as the cause. 1434 * 1435 * @throws AccessControlException If access is denied 1436 * @throws FileNotFoundException If <code>f</code> does not exist 1437 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1438 * not supported 1439 * @throws IOException If an I/O error occurred 1440 * 1441 * Exceptions applicable to file systems accessed over RPC: 1442 * @throws RpcClientException If an exception occurred in the RPC client 1443 * @throws RpcServerException If an exception occurred in the RPC server 1444 * @throws UnexpectedServerException If server implementation throws 1445 * undeclared exception to RPC server 1446 */ 1447 public RemoteIterator<LocatedFileStatus> listLocatedStatus( 1448 final Path f) throws 1449 AccessControlException, FileNotFoundException, 1450 UnsupportedFileSystemException, IOException { 1451 final Path absF = fixRelativePart(f); 1452 return new FSLinkResolver<RemoteIterator<LocatedFileStatus>>() { 1453 public RemoteIterator<LocatedFileStatus> next( 1454 final AbstractFileSystem fs, final Path p) 1455 throws IOException, UnresolvedLinkException { 1456 return fs.listLocatedStatus(p); 1457 } 1458 }.resolve(this, absF); 1459 } 1460 1461 /** 1462 * Mark a path to be deleted on JVM shutdown. 1463 * 1464 * @param f the existing path to delete. 1465 * 1466 * @return true if deleteOnExit is successful, otherwise false. 1467 * 1468 * @throws AccessControlException If access is denied 1469 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1470 * not supported 1471 * @throws IOException If an I/O error occurred 1472 * 1473 * Exceptions applicable to file systems accessed over RPC: 1474 * @throws RpcClientException If an exception occurred in the RPC client 1475 * @throws RpcServerException If an exception occurred in the RPC server 1476 * @throws UnexpectedServerException If server implementation throws 1477 * undeclared exception to RPC server 1478 */ 1479 public boolean deleteOnExit(Path f) throws AccessControlException, 1480 IOException { 1481 if (!this.util().exists(f)) { 1482 return false; 1483 } 1484 synchronized (DELETE_ON_EXIT) { 1485 if (DELETE_ON_EXIT.isEmpty()) { 1486 ShutdownHookManager.get().addShutdownHook(FINALIZER, SHUTDOWN_HOOK_PRIORITY); 1487 } 1488 1489 Set<Path> set = DELETE_ON_EXIT.get(this); 1490 if (set == null) { 1491 set = new TreeSet<Path>(); 1492 DELETE_ON_EXIT.put(this, set); 1493 } 1494 set.add(f); 1495 } 1496 return true; 1497 } 1498 1499 private final Util util; 1500 public Util util() { 1501 return util; 1502 } 1503 1504 1505 /** 1506 * Utility/library methods built over the basic FileContext methods. 1507 * Since this are library functions, the oprtation are not atomic 1508 * and some of them may partially complete if other threads are making 1509 * changes to the same part of the name space. 1510 */ 1511 public class Util { 1512 /** 1513 * Does the file exist? 1514 * Note: Avoid using this method if you already have FileStatus in hand. 1515 * Instead reuse the FileStatus 1516 * @param f the file or dir to be checked 1517 * 1518 * @throws AccessControlException If access is denied 1519 * @throws IOException If an I/O error occurred 1520 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1521 * not supported 1522 * 1523 * Exceptions applicable to file systems accessed over RPC: 1524 * @throws RpcClientException If an exception occurred in the RPC client 1525 * @throws RpcServerException If an exception occurred in the RPC server 1526 * @throws UnexpectedServerException If server implementation throws 1527 * undeclared exception to RPC server 1528 */ 1529 public boolean exists(final Path f) throws AccessControlException, 1530 UnsupportedFileSystemException, IOException { 1531 try { 1532 FileStatus fs = FileContext.this.getFileStatus(f); 1533 assert fs != null; 1534 return true; 1535 } catch (FileNotFoundException e) { 1536 return false; 1537 } 1538 } 1539 1540 /** 1541 * Return a list of file status objects that corresponds to supplied paths 1542 * excluding those non-existent paths. 1543 * 1544 * @param paths list of paths we want information from 1545 * 1546 * @return a list of FileStatus objects 1547 * 1548 * @throws AccessControlException If access is denied 1549 * @throws IOException If an I/O error occurred 1550 * 1551 * Exceptions applicable to file systems accessed over RPC: 1552 * @throws RpcClientException If an exception occurred in the RPC client 1553 * @throws RpcServerException If an exception occurred in the RPC server 1554 * @throws UnexpectedServerException If server implementation throws 1555 * undeclared exception to RPC server 1556 */ 1557 private FileStatus[] getFileStatus(Path[] paths) 1558 throws AccessControlException, IOException { 1559 if (paths == null) { 1560 return null; 1561 } 1562 ArrayList<FileStatus> results = new ArrayList<FileStatus>(paths.length); 1563 for (int i = 0; i < paths.length; i++) { 1564 try { 1565 results.add(FileContext.this.getFileStatus(paths[i])); 1566 } catch (FileNotFoundException fnfe) { 1567 // ignoring 1568 } 1569 } 1570 return results.toArray(new FileStatus[results.size()]); 1571 } 1572 1573 1574 /** 1575 * Return the {@link ContentSummary} of path f. 1576 * @param f path 1577 * 1578 * @return the {@link ContentSummary} of path f. 1579 * 1580 * @throws AccessControlException If access is denied 1581 * @throws FileNotFoundException If <code>f</code> does not exist 1582 * @throws UnsupportedFileSystemException If file system for 1583 * <code>f</code> is not supported 1584 * @throws IOException If an I/O error occurred 1585 * 1586 * Exceptions applicable to file systems accessed over RPC: 1587 * @throws RpcClientException If an exception occurred in the RPC client 1588 * @throws RpcServerException If an exception occurred in the RPC server 1589 * @throws UnexpectedServerException If server implementation throws 1590 * undeclared exception to RPC server 1591 */ 1592 public ContentSummary getContentSummary(Path f) 1593 throws AccessControlException, FileNotFoundException, 1594 UnsupportedFileSystemException, IOException { 1595 FileStatus status = FileContext.this.getFileStatus(f); 1596 if (status.isFile()) { 1597 return new ContentSummary(status.getLen(), 1, 0); 1598 } 1599 long[] summary = {0, 0, 1}; 1600 RemoteIterator<FileStatus> statusIterator = 1601 FileContext.this.listStatus(f); 1602 while(statusIterator.hasNext()) { 1603 FileStatus s = statusIterator.next(); 1604 ContentSummary c = s.isDirectory() ? getContentSummary(s.getPath()) : 1605 new ContentSummary(s.getLen(), 1, 0); 1606 summary[0] += c.getLength(); 1607 summary[1] += c.getFileCount(); 1608 summary[2] += c.getDirectoryCount(); 1609 } 1610 return new ContentSummary(summary[0], summary[1], summary[2]); 1611 } 1612 1613 /** 1614 * See {@link #listStatus(Path[], PathFilter)} 1615 */ 1616 public FileStatus[] listStatus(Path[] files) throws AccessControlException, 1617 FileNotFoundException, IOException { 1618 return listStatus(files, DEFAULT_FILTER); 1619 } 1620 1621 /** 1622 * Filter files/directories in the given path using the user-supplied path 1623 * filter. 1624 * 1625 * @param f is the path name 1626 * @param filter is the user-supplied path filter 1627 * 1628 * @return an array of FileStatus objects for the files under the given path 1629 * after applying the filter 1630 * 1631 * @throws AccessControlException If access is denied 1632 * @throws FileNotFoundException If <code>f</code> does not exist 1633 * @throws UnsupportedFileSystemException If file system for 1634 * <code>pathPattern</code> is not supported 1635 * @throws IOException If an I/O error occurred 1636 * 1637 * Exceptions applicable to file systems accessed over RPC: 1638 * @throws RpcClientException If an exception occurred in the RPC client 1639 * @throws RpcServerException If an exception occurred in the RPC server 1640 * @throws UnexpectedServerException If server implementation throws 1641 * undeclared exception to RPC server 1642 */ 1643 public FileStatus[] listStatus(Path f, PathFilter filter) 1644 throws AccessControlException, FileNotFoundException, 1645 UnsupportedFileSystemException, IOException { 1646 ArrayList<FileStatus> results = new ArrayList<FileStatus>(); 1647 listStatus(results, f, filter); 1648 return results.toArray(new FileStatus[results.size()]); 1649 } 1650 1651 /** 1652 * Filter files/directories in the given list of paths using user-supplied 1653 * path filter. 1654 * 1655 * @param files is a list of paths 1656 * @param filter is the filter 1657 * 1658 * @return a list of statuses for the files under the given paths after 1659 * applying the filter 1660 * 1661 * @throws AccessControlException If access is denied 1662 * @throws FileNotFoundException If a file in <code>files</code> does not 1663 * exist 1664 * @throws IOException If an I/O error occurred 1665 * 1666 * Exceptions applicable to file systems accessed over RPC: 1667 * @throws RpcClientException If an exception occurred in the RPC client 1668 * @throws RpcServerException If an exception occurred in the RPC server 1669 * @throws UnexpectedServerException If server implementation throws 1670 * undeclared exception to RPC server 1671 */ 1672 public FileStatus[] listStatus(Path[] files, PathFilter filter) 1673 throws AccessControlException, FileNotFoundException, IOException { 1674 ArrayList<FileStatus> results = new ArrayList<FileStatus>(); 1675 for (int i = 0; i < files.length; i++) { 1676 listStatus(results, files[i], filter); 1677 } 1678 return results.toArray(new FileStatus[results.size()]); 1679 } 1680 1681 /* 1682 * Filter files/directories in the given path using the user-supplied path 1683 * filter. Results are added to the given array <code>results</code>. 1684 */ 1685 private void listStatus(ArrayList<FileStatus> results, Path f, 1686 PathFilter filter) throws AccessControlException, 1687 FileNotFoundException, IOException { 1688 FileStatus[] listing = listStatus(f); 1689 if (listing != null) { 1690 for (int i = 0; i < listing.length; i++) { 1691 if (filter.accept(listing[i].getPath())) { 1692 results.add(listing[i]); 1693 } 1694 } 1695 } 1696 } 1697 1698 /** 1699 * List the statuses of the files/directories in the given path 1700 * if the path is a directory. 1701 * 1702 * @param f is the path 1703 * 1704 * @return an array that contains statuses of the files/directories 1705 * in the given path 1706 * 1707 * @throws AccessControlException If access is denied 1708 * @throws FileNotFoundException If <code>f</code> does not exist 1709 * @throws UnsupportedFileSystemException If file system for <code>f</code> is 1710 * not supported 1711 * @throws IOException If an I/O error occurred 1712 * 1713 * Exceptions applicable to file systems accessed over RPC: 1714 * @throws RpcClientException If an exception occurred in the RPC client 1715 * @throws RpcServerException If an exception occurred in the RPC server 1716 * @throws UnexpectedServerException If server implementation throws 1717 * undeclared exception to RPC server 1718 */ 1719 public FileStatus[] listStatus(final Path f) throws AccessControlException, 1720 FileNotFoundException, UnsupportedFileSystemException, 1721 IOException { 1722 final Path absF = fixRelativePart(f); 1723 return new FSLinkResolver<FileStatus[]>() { 1724 public FileStatus[] next(final AbstractFileSystem fs, final Path p) 1725 throws IOException, UnresolvedLinkException { 1726 return fs.listStatus(p); 1727 } 1728 }.resolve(FileContext.this, absF); 1729 } 1730 1731 /** 1732 * List the statuses and block locations of the files in the given path. 1733 * 1734 * If the path is a directory, 1735 * if recursive is false, returns files in the directory; 1736 * if recursive is true, return files in the subtree rooted at the path. 1737 * The subtree is traversed in the depth-first order. 1738 * If the path is a file, return the file's status and block locations. 1739 * Files across symbolic links are also returned. 1740 * 1741 * @param f is the path 1742 * @param recursive if the subdirectories need to be traversed recursively 1743 * 1744 * @return an iterator that traverses statuses of the files 1745 * If any IO exception (for example a sub-directory gets deleted while 1746 * listing is being executed), next() or hasNext() of the returned iterator 1747 * may throw a RuntimeException with the IO exception as the cause. 1748 * 1749 * @throws AccessControlException If access is denied 1750 * @throws FileNotFoundException If <code>f</code> does not exist 1751 * @throws UnsupportedFileSystemException If file system for <code>f</code> 1752 * is not supported 1753 * @throws IOException If an I/O error occurred 1754 * 1755 * Exceptions applicable to file systems accessed over RPC: 1756 * @throws RpcClientException If an exception occurred in the RPC client 1757 * @throws RpcServerException If an exception occurred in the RPC server 1758 * @throws UnexpectedServerException If server implementation throws 1759 * undeclared exception to RPC server 1760 */ 1761 public RemoteIterator<LocatedFileStatus> listFiles( 1762 final Path f, final boolean recursive) throws AccessControlException, 1763 FileNotFoundException, UnsupportedFileSystemException, 1764 IOException { 1765 return new RemoteIterator<LocatedFileStatus>() { 1766 private Stack<RemoteIterator<LocatedFileStatus>> itors = 1767 new Stack<RemoteIterator<LocatedFileStatus>>(); 1768 RemoteIterator<LocatedFileStatus> curItor = listLocatedStatus(f); 1769 LocatedFileStatus curFile; 1770 1771 /** 1772 * Returns <tt>true</tt> if the iterator has more files. 1773 * 1774 * @return <tt>true</tt> if the iterator has more files. 1775 * @throws AccessControlException if not allowed to access next 1776 * file's status or locations 1777 * @throws FileNotFoundException if next file does not exist any more 1778 * @throws UnsupportedFileSystemException if next file's 1779 * fs is unsupported 1780 * @throws IOException for all other IO errors 1781 * for example, NameNode is not avaialbe or 1782 * NameNode throws IOException due to an error 1783 * while getting the status or block locations 1784 */ 1785 @Override 1786 public boolean hasNext() throws IOException { 1787 while (curFile == null) { 1788 if (curItor.hasNext()) { 1789 handleFileStat(curItor.next()); 1790 } else if (!itors.empty()) { 1791 curItor = itors.pop(); 1792 } else { 1793 return false; 1794 } 1795 } 1796 return true; 1797 } 1798 1799 /** 1800 * Process the input stat. 1801 * If it is a file, return the file stat. 1802 * If it is a directory, traverse the directory if recursive is true; 1803 * ignore it if recursive is false. 1804 * If it is a symlink, resolve the symlink first and then process it 1805 * depending on if it is a file or directory. 1806 * @param stat input status 1807 * @throws AccessControlException if access is denied 1808 * @throws FileNotFoundException if file is not found 1809 * @throws UnsupportedFileSystemException if fs is not supported 1810 * @throws IOException for all other IO errors 1811 */ 1812 private void handleFileStat(LocatedFileStatus stat) 1813 throws IOException { 1814 if (stat.isFile()) { // file 1815 curFile = stat; 1816 } else if (stat.isSymlink()) { // symbolic link 1817 // resolve symbolic link 1818 FileStatus symstat = FileContext.this.getFileStatus( 1819 stat.getSymlink()); 1820 if (symstat.isFile() || (recursive && symstat.isDirectory())) { 1821 itors.push(curItor); 1822 curItor = listLocatedStatus(stat.getPath()); 1823 } 1824 } else if (recursive) { // directory 1825 itors.push(curItor); 1826 curItor = listLocatedStatus(stat.getPath()); 1827 } 1828 } 1829 1830 /** 1831 * Returns the next file's status with its block locations 1832 * 1833 * @throws AccessControlException if not allowed to access next 1834 * file's status or locations 1835 * @throws FileNotFoundException if next file does not exist any more 1836 * @throws UnsupportedFileSystemException if next file's 1837 * fs is unsupported 1838 * @throws IOException for all other IO errors 1839 * for example, NameNode is not avaialbe or 1840 * NameNode throws IOException due to an error 1841 * while getting the status or block locations 1842 */ 1843 @Override 1844 public LocatedFileStatus next() throws IOException { 1845 if (hasNext()) { 1846 LocatedFileStatus result = curFile; 1847 curFile = null; 1848 return result; 1849 } 1850 throw new java.util.NoSuchElementException("No more entry in " + f); 1851 } 1852 }; 1853 } 1854 1855 /** 1856 * <p>Return all the files that match filePattern and are not checksum 1857 * files. Results are sorted by their names. 1858 * 1859 * <p> 1860 * A filename pattern is composed of <i>regular</i> characters and 1861 * <i>special pattern matching</i> characters, which are: 1862 * 1863 * <dl> 1864 * <dd> 1865 * <dl> 1866 * <p> 1867 * <dt> <tt> ? </tt> 1868 * <dd> Matches any single character. 1869 * 1870 * <p> 1871 * <dt> <tt> * </tt> 1872 * <dd> Matches zero or more characters. 1873 * 1874 * <p> 1875 * <dt> <tt> [<i>abc</i>] </tt> 1876 * <dd> Matches a single character from character set 1877 * <tt>{<i>a,b,c</i>}</tt>. 1878 * 1879 * <p> 1880 * <dt> <tt> [<i>a</i>-<i>b</i>] </tt> 1881 * <dd> Matches a single character from the character range 1882 * <tt>{<i>a...b</i>}</tt>. Note: character <tt><i>a</i></tt> must be 1883 * lexicographically less than or equal to character <tt><i>b</i></tt>. 1884 * 1885 * <p> 1886 * <dt> <tt> [^<i>a</i>] </tt> 1887 * <dd> Matches a single char that is not from character set or range 1888 * <tt>{<i>a</i>}</tt>. Note that the <tt>^</tt> character must occur 1889 * immediately to the right of the opening bracket. 1890 * 1891 * <p> 1892 * <dt> <tt> \<i>c</i> </tt> 1893 * <dd> Removes (escapes) any special meaning of character <i>c</i>. 1894 * 1895 * <p> 1896 * <dt> <tt> {ab,cd} </tt> 1897 * <dd> Matches a string from the string set <tt>{<i>ab, cd</i>} </tt> 1898 * 1899 * <p> 1900 * <dt> <tt> {ab,c{de,fh}} </tt> 1901 * <dd> Matches a string from string set <tt>{<i>ab, cde, cfh</i>}</tt> 1902 * 1903 * </dl> 1904 * </dd> 1905 * </dl> 1906 * 1907 * @param pathPattern a regular expression specifying a pth pattern 1908 * 1909 * @return an array of paths that match the path pattern 1910 * 1911 * @throws AccessControlException If access is denied 1912 * @throws UnsupportedFileSystemException If file system for 1913 * <code>pathPattern</code> is not supported 1914 * @throws IOException If an I/O error occurred 1915 * 1916 * Exceptions applicable to file systems accessed over RPC: 1917 * @throws RpcClientException If an exception occurred in the RPC client 1918 * @throws RpcServerException If an exception occurred in the RPC server 1919 * @throws UnexpectedServerException If server implementation throws 1920 * undeclared exception to RPC server 1921 */ 1922 public FileStatus[] globStatus(Path pathPattern) 1923 throws AccessControlException, UnsupportedFileSystemException, 1924 IOException { 1925 return globStatus(pathPattern, DEFAULT_FILTER); 1926 } 1927 1928 /** 1929 * Return an array of FileStatus objects whose path names match pathPattern 1930 * and is accepted by the user-supplied path filter. Results are sorted by 1931 * their path names. 1932 * Return null if pathPattern has no glob and the path does not exist. 1933 * Return an empty array if pathPattern has a glob and no path matches it. 1934 * 1935 * @param pathPattern regular expression specifying the path pattern 1936 * @param filter user-supplied path filter 1937 * 1938 * @return an array of FileStatus objects 1939 * 1940 * @throws AccessControlException If access is denied 1941 * @throws UnsupportedFileSystemException If file system for 1942 * <code>pathPattern</code> is not supported 1943 * @throws IOException If an I/O error occurred 1944 * 1945 * Exceptions applicable to file systems accessed over RPC: 1946 * @throws RpcClientException If an exception occurred in the RPC client 1947 * @throws RpcServerException If an exception occurred in the RPC server 1948 * @throws UnexpectedServerException If server implementation throws 1949 * undeclared exception to RPC server 1950 */ 1951 public FileStatus[] globStatus(final Path pathPattern, 1952 final PathFilter filter) throws AccessControlException, 1953 UnsupportedFileSystemException, IOException { 1954 URI uri = getFSofPath(fixRelativePart(pathPattern)).getUri(); 1955 1956 String filename = pathPattern.toUri().getPath(); 1957 1958 List<String> filePatterns = GlobExpander.expand(filename); 1959 if (filePatterns.size() == 1) { 1960 Path absPathPattern = fixRelativePart(pathPattern); 1961 return globStatusInternal(uri, new Path(absPathPattern.toUri() 1962 .getPath()), filter); 1963 } else { 1964 List<FileStatus> results = new ArrayList<FileStatus>(); 1965 for (String iFilePattern : filePatterns) { 1966 Path iAbsFilePattern = fixRelativePart(new Path(iFilePattern)); 1967 FileStatus[] files = globStatusInternal(uri, iAbsFilePattern, filter); 1968 for (FileStatus file : files) { 1969 results.add(file); 1970 } 1971 } 1972 return results.toArray(new FileStatus[results.size()]); 1973 } 1974 } 1975 1976 /** 1977 * 1978 * @param uri for all the inPathPattern 1979 * @param inPathPattern - without the scheme & authority (take from uri) 1980 * @param filter 1981 * 1982 * @return an array of FileStatus objects 1983 * 1984 * @throws AccessControlException If access is denied 1985 * @throws IOException If an I/O error occurred 1986 */ 1987 private FileStatus[] globStatusInternal(final URI uri, 1988 final Path inPathPattern, final PathFilter filter) 1989 throws AccessControlException, IOException 1990 { 1991 Path[] parents = new Path[1]; 1992 int level = 0; 1993 1994 assert(inPathPattern.toUri().getScheme() == null && 1995 inPathPattern.toUri().getAuthority() == null && 1996 inPathPattern.isUriPathAbsolute()); 1997 1998 1999 String filename = inPathPattern.toUri().getPath(); 2000 2001 // path has only zero component 2002 if ("".equals(filename) || Path.SEPARATOR.equals(filename)) { 2003 Path p = inPathPattern.makeQualified(uri, null); 2004 return getFileStatus(new Path[]{p}); 2005 } 2006 2007 // path has at least one component 2008 String[] components = filename.split(Path.SEPARATOR); 2009 2010 // Path is absolute, first component is "/" hence first component 2011 // is the uri root 2012 parents[0] = new Path(new Path(uri), new Path("/")); 2013 level = 1; 2014 2015 // glob the paths that match the parent path, ie. [0, components.length-1] 2016 boolean[] hasGlob = new boolean[]{false}; 2017 Path[] relParentPaths = 2018 globPathsLevel(parents, components, level, hasGlob); 2019 FileStatus[] results; 2020 2021 if (relParentPaths == null || relParentPaths.length == 0) { 2022 results = null; 2023 } else { 2024 // fix the pathes to be abs 2025 Path[] parentPaths = new Path [relParentPaths.length]; 2026 for(int i=0; i<relParentPaths.length; i++) { 2027 parentPaths[i] = relParentPaths[i].makeQualified(uri, null); 2028 } 2029 2030 // Now work on the last component of the path 2031 GlobFilter fp = 2032 new GlobFilter(components[components.length - 1], filter); 2033 if (fp.hasPattern()) { // last component has a pattern 2034 // list parent directories and then glob the results 2035 try { 2036 results = listStatus(parentPaths, fp); 2037 } catch (FileNotFoundException e) { 2038 results = null; 2039 } 2040 hasGlob[0] = true; 2041 } else { // last component does not have a pattern 2042 // get all the path names 2043 ArrayList<Path> filteredPaths = 2044 new ArrayList<Path>(parentPaths.length); 2045 for (int i = 0; i < parentPaths.length; i++) { 2046 parentPaths[i] = new Path(parentPaths[i], 2047 components[components.length - 1]); 2048 if (fp.accept(parentPaths[i])) { 2049 filteredPaths.add(parentPaths[i]); 2050 } 2051 } 2052 // get all their statuses 2053 results = getFileStatus( 2054 filteredPaths.toArray(new Path[filteredPaths.size()])); 2055 } 2056 } 2057 2058 // Decide if the pathPattern contains a glob or not 2059 if (results == null) { 2060 if (hasGlob[0]) { 2061 results = new FileStatus[0]; 2062 } 2063 } else { 2064 if (results.length == 0) { 2065 if (!hasGlob[0]) { 2066 results = null; 2067 } 2068 } else { 2069 Arrays.sort(results); 2070 } 2071 } 2072 return results; 2073 } 2074 2075 /* 2076 * For a path of N components, return a list of paths that match the 2077 * components [<code>level</code>, <code>N-1</code>]. 2078 */ 2079 private Path[] globPathsLevel(Path[] parents, String[] filePattern, 2080 int level, boolean[] hasGlob) throws AccessControlException, 2081 FileNotFoundException, IOException { 2082 if (level == filePattern.length - 1) { 2083 return parents; 2084 } 2085 if (parents == null || parents.length == 0) { 2086 return null; 2087 } 2088 GlobFilter fp = new GlobFilter(filePattern[level]); 2089 if (fp.hasPattern()) { 2090 try { 2091 parents = FileUtil.stat2Paths(listStatus(parents, fp)); 2092 } catch (FileNotFoundException e) { 2093 parents = null; 2094 } 2095 hasGlob[0] = true; 2096 } else { 2097 for (int i = 0; i < parents.length; i++) { 2098 parents[i] = new Path(parents[i], filePattern[level]); 2099 } 2100 } 2101 return globPathsLevel(parents, filePattern, level + 1, hasGlob); 2102 } 2103 2104 /** 2105 * Copy file from src to dest. See 2106 * {@link #copy(Path, Path, boolean, boolean)} 2107 */ 2108 public boolean copy(final Path src, final Path dst) 2109 throws AccessControlException, FileAlreadyExistsException, 2110 FileNotFoundException, ParentNotDirectoryException, 2111 UnsupportedFileSystemException, IOException { 2112 return copy(src, dst, false, false); 2113 } 2114 2115 /** 2116 * Copy from src to dst, optionally deleting src and overwriting dst. 2117 * @param src 2118 * @param dst 2119 * @param deleteSource - delete src if true 2120 * @param overwrite overwrite dst if true; throw IOException if dst exists 2121 * and overwrite is false. 2122 * 2123 * @return true if copy is successful 2124 * 2125 * @throws AccessControlException If access is denied 2126 * @throws FileAlreadyExistsException If <code>dst</code> already exists 2127 * @throws FileNotFoundException If <code>src</code> does not exist 2128 * @throws ParentNotDirectoryException If parent of <code>dst</code> is not 2129 * a directory 2130 * @throws UnsupportedFileSystemException If file system for 2131 * <code>src</code> or <code>dst</code> is not supported 2132 * @throws IOException If an I/O error occurred 2133 * 2134 * Exceptions applicable to file systems accessed over RPC: 2135 * @throws RpcClientException If an exception occurred in the RPC client 2136 * @throws RpcServerException If an exception occurred in the RPC server 2137 * @throws UnexpectedServerException If server implementation throws 2138 * undeclared exception to RPC server 2139 * 2140 * RuntimeExceptions: 2141 * @throws InvalidPathException If path <code>dst</code> is invalid 2142 */ 2143 public boolean copy(final Path src, final Path dst, boolean deleteSource, 2144 boolean overwrite) throws AccessControlException, 2145 FileAlreadyExistsException, FileNotFoundException, 2146 ParentNotDirectoryException, UnsupportedFileSystemException, 2147 IOException { 2148 checkNotSchemeWithRelative(src); 2149 checkNotSchemeWithRelative(dst); 2150 Path qSrc = makeQualified(src); 2151 Path qDst = makeQualified(dst); 2152 checkDest(qSrc.getName(), qDst, overwrite); 2153 FileStatus fs = FileContext.this.getFileStatus(qSrc); 2154 if (fs.isDirectory()) { 2155 checkDependencies(qSrc, qDst); 2156 mkdir(qDst, FsPermission.getDirDefault(), true); 2157 FileStatus[] contents = listStatus(qSrc); 2158 for (FileStatus content : contents) { 2159 copy(makeQualified(content.getPath()), makeQualified(new Path(qDst, 2160 content.getPath().getName())), deleteSource, overwrite); 2161 } 2162 } else { 2163 InputStream in=null; 2164 OutputStream out = null; 2165 try { 2166 in = open(qSrc); 2167 EnumSet<CreateFlag> createFlag = overwrite ? EnumSet.of( 2168 CreateFlag.CREATE, CreateFlag.OVERWRITE) : 2169 EnumSet.of(CreateFlag.CREATE); 2170 out = create(qDst, createFlag); 2171 IOUtils.copyBytes(in, out, conf, true); 2172 } catch (IOException e) { 2173 IOUtils.closeStream(out); 2174 IOUtils.closeStream(in); 2175 throw e; 2176 } 2177 } 2178 if (deleteSource) { 2179 return delete(qSrc, true); 2180 } else { 2181 return true; 2182 } 2183 } 2184 } 2185 2186 /** 2187 * Check if copying srcName to dst would overwrite an existing 2188 * file or directory. 2189 * @param srcName File or directory to be copied. 2190 * @param dst Destination to copy srcName to. 2191 * @param overwrite Whether it's ok to overwrite an existing file. 2192 * @throws AccessControlException If access is denied. 2193 * @throws IOException If dst is an existing directory, or dst is an 2194 * existing file and the overwrite option is not passed. 2195 */ 2196 private void checkDest(String srcName, Path dst, boolean overwrite) 2197 throws AccessControlException, IOException { 2198 try { 2199 FileStatus dstFs = getFileStatus(dst); 2200 if (dstFs.isDirectory()) { 2201 if (null == srcName) { 2202 throw new IOException("Target " + dst + " is a directory"); 2203 } 2204 // Recurse to check if dst/srcName exists. 2205 checkDest(null, new Path(dst, srcName), overwrite); 2206 } else if (!overwrite) { 2207 throw new IOException("Target " + new Path(dst, srcName) 2208 + " already exists"); 2209 } 2210 } catch (FileNotFoundException e) { 2211 // dst does not exist - OK to copy. 2212 } 2213 } 2214 2215 // 2216 // If the destination is a subdirectory of the source, then 2217 // generate exception 2218 // 2219 private static void checkDependencies(Path qualSrc, Path qualDst) 2220 throws IOException { 2221 if (isSameFS(qualSrc, qualDst)) { 2222 String srcq = qualSrc.toString() + Path.SEPARATOR; 2223 String dstq = qualDst.toString() + Path.SEPARATOR; 2224 if (dstq.startsWith(srcq)) { 2225 if (srcq.length() == dstq.length()) { 2226 throw new IOException("Cannot copy " + qualSrc + " to itself."); 2227 } else { 2228 throw new IOException("Cannot copy " + qualSrc + 2229 " to its subdirectory " + qualDst); 2230 } 2231 } 2232 } 2233 } 2234 2235 /** 2236 * Are qualSrc and qualDst of the same file system? 2237 * @param qualPath1 - fully qualified path 2238 * @param qualPath2 - fully qualified path 2239 * @return 2240 */ 2241 private static boolean isSameFS(Path qualPath1, Path qualPath2) { 2242 URI srcUri = qualPath1.toUri(); 2243 URI dstUri = qualPath2.toUri(); 2244 return (srcUri.getScheme().equals(dstUri.getScheme()) && 2245 !(srcUri.getAuthority() != null && dstUri.getAuthority() != null && srcUri 2246 .getAuthority().equals(dstUri.getAuthority()))); 2247 } 2248 2249 /** 2250 * Deletes all the paths in deleteOnExit on JVM shutdown. 2251 */ 2252 static class FileContextFinalizer implements Runnable { 2253 public synchronized void run() { 2254 processDeleteOnExit(); 2255 } 2256 } 2257 2258 /** 2259 * Resolves all symbolic links in the specified path. 2260 * Returns the new path object. 2261 */ 2262 protected Path resolve(final Path f) throws FileNotFoundException, 2263 UnresolvedLinkException, AccessControlException, IOException { 2264 return new FSLinkResolver<Path>() { 2265 public Path next(final AbstractFileSystem fs, final Path p) 2266 throws IOException, UnresolvedLinkException { 2267 return fs.resolvePath(p); 2268 } 2269 }.resolve(this, f); 2270 } 2271 2272 /** 2273 * Resolves all symbolic links in the specified path leading up 2274 * to, but not including the final path component. 2275 * @param f path to resolve 2276 * @return the new path object. 2277 */ 2278 protected Path resolveIntermediate(final Path f) throws IOException { 2279 return new FSLinkResolver<FileStatus>() { 2280 public FileStatus next(final AbstractFileSystem fs, final Path p) 2281 throws IOException, UnresolvedLinkException { 2282 return fs.getFileLinkStatus(p); 2283 } 2284 }.resolve(this, f).getPath(); 2285 } 2286 2287 /** 2288 * Returns the list of AbstractFileSystems accessed in the path. The list may 2289 * contain more than one AbstractFileSystems objects in case of symlinks. 2290 * 2291 * @param f 2292 * Path which needs to be resolved 2293 * @return List of AbstractFileSystems accessed in the path 2294 * @throws IOException 2295 */ 2296 Set<AbstractFileSystem> resolveAbstractFileSystems(final Path f) 2297 throws IOException { 2298 final Path absF = fixRelativePart(f); 2299 final HashSet<AbstractFileSystem> result 2300 = new HashSet<AbstractFileSystem>(); 2301 new FSLinkResolver<Void>() { 2302 public Void next(final AbstractFileSystem fs, final Path p) 2303 throws IOException, UnresolvedLinkException { 2304 result.add(fs); 2305 fs.getFileStatus(p); 2306 return null; 2307 } 2308 }.resolve(this, absF); 2309 return result; 2310 } 2311 2312 /** 2313 * Class used to perform an operation on and resolve symlinks in a 2314 * path. The operation may potentially span multiple file systems. 2315 */ 2316 protected abstract class FSLinkResolver<T> { 2317 // The maximum number of symbolic link components in a path 2318 private static final int MAX_PATH_LINKS = 32; 2319 2320 /** 2321 * Generic helper function overridden on instantiation to perform a 2322 * specific operation on the given file system using the given path 2323 * which may result in an UnresolvedLinkException. 2324 * @param fs AbstractFileSystem to perform the operation on. 2325 * @param p Path given the file system. 2326 * @return Generic type determined by the specific implementation. 2327 * @throws UnresolvedLinkException If symbolic link <code>path</code> could 2328 * not be resolved 2329 * @throws IOException an I/O error occured 2330 */ 2331 public abstract T next(final AbstractFileSystem fs, final Path p) 2332 throws IOException, UnresolvedLinkException; 2333 2334 /** 2335 * Performs the operation specified by the next function, calling it 2336 * repeatedly until all symlinks in the given path are resolved. 2337 * @param fc FileContext used to access file systems. 2338 * @param p The path to resolve symlinks in. 2339 * @return Generic type determined by the implementation of next. 2340 * @throws IOException 2341 */ 2342 public T resolve(final FileContext fc, Path p) throws IOException { 2343 int count = 0; 2344 T in = null; 2345 Path first = p; 2346 // NB: More than one AbstractFileSystem can match a scheme, eg 2347 // "file" resolves to LocalFs but could have come by RawLocalFs. 2348 AbstractFileSystem fs = fc.getFSofPath(p); 2349 2350 // Loop until all symlinks are resolved or the limit is reached 2351 for (boolean isLink = true; isLink;) { 2352 try { 2353 in = next(fs, p); 2354 isLink = false; 2355 } catch (UnresolvedLinkException e) { 2356 if (count++ > MAX_PATH_LINKS) { 2357 throw new IOException("Possible cyclic loop while " + 2358 "following symbolic link " + first); 2359 } 2360 // Resolve the first unresolved path component 2361 p = qualifySymlinkTarget(fs, p, fs.getLinkTarget(p)); 2362 fs = fc.getFSofPath(p); 2363 } 2364 } 2365 return in; 2366 } 2367 } 2368 2369 /** 2370 * Get the statistics for a particular file system 2371 * 2372 * @param uri 2373 * the uri to lookup the statistics. Only scheme and authority part 2374 * of the uri are used as the key to store and lookup. 2375 * @return a statistics object 2376 */ 2377 public static Statistics getStatistics(URI uri) { 2378 return AbstractFileSystem.getStatistics(uri); 2379 } 2380 2381 /** 2382 * Clears all the statistics stored in AbstractFileSystem, for all the file 2383 * systems. 2384 */ 2385 public static void clearStatistics() { 2386 AbstractFileSystem.clearStatistics(); 2387 } 2388 2389 /** 2390 * Prints the statistics to standard output. File System is identified by the 2391 * scheme and authority. 2392 */ 2393 public static void printStatistics() { 2394 AbstractFileSystem.printStatistics(); 2395 } 2396 2397 /** 2398 * @return Map of uri and statistics for each filesystem instantiated. The uri 2399 * consists of scheme and authority for the filesystem. 2400 */ 2401 public static Map<URI, Statistics> getAllStatistics() { 2402 return AbstractFileSystem.getAllStatistics(); 2403 } 2404 2405 /** 2406 * Get delegation tokens for the file systems accessed for a given 2407 * path. 2408 * @param p Path for which delegations tokens are requested. 2409 * @param renewer the account name that is allowed to renew the token. 2410 * @return List of delegation tokens. 2411 * @throws IOException 2412 */ 2413 @InterfaceAudience.LimitedPrivate( { "HDFS", "MapReduce" }) 2414 public List<Token<?>> getDelegationTokens( 2415 Path p, String renewer) throws IOException { 2416 Set<AbstractFileSystem> afsSet = resolveAbstractFileSystems(p); 2417 List<Token<?>> tokenList = 2418 new ArrayList<Token<?>>(); 2419 for (AbstractFileSystem afs : afsSet) { 2420 List<Token<?>> afsTokens = afs.getDelegationTokens(renewer); 2421 tokenList.addAll(afsTokens); 2422 } 2423 return tokenList; 2424 } 2425 }