001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 package org.apache.hadoop.fs;
020
021 import java.io.BufferedReader;
022 import java.io.File;
023 import java.io.FileNotFoundException;
024 import java.io.IOException;
025 import java.io.InputStreamReader;
026 import java.util.Arrays;
027
028 import org.apache.hadoop.util.Shell;
029
030 /**
031 * Class for creating hardlinks.
032 * Supports Unix/Linux, Windows via winutils , and Mac OS X.
033 *
034 * The HardLink class was formerly a static inner class of FSUtil,
035 * and the methods provided were blatantly non-thread-safe.
036 * To enable volume-parallel Update snapshots, we now provide static
037 * threadsafe methods that allocate new buffer string arrays
038 * upon each call. We also provide an API to hardlink all files in a
039 * directory with a single command, which is up to 128 times more
040 * efficient - and minimizes the impact of the extra buffer creations.
041 */
042 public class HardLink {
043
044 public enum OSType {
045 OS_TYPE_UNIX,
046 OS_TYPE_WIN,
047 OS_TYPE_SOLARIS,
048 OS_TYPE_MAC,
049 OS_TYPE_FREEBSD
050 }
051
052 public static OSType osType;
053 private static HardLinkCommandGetter getHardLinkCommand;
054
055 public final LinkStats linkStats; //not static
056
057 //initialize the command "getters" statically, so can use their
058 //methods without instantiating the HardLink object
059 static {
060 osType = getOSType();
061 if (osType == OSType.OS_TYPE_WIN) {
062 // Windows
063 getHardLinkCommand = new HardLinkCGWin();
064 } else {
065 // Unix
066 getHardLinkCommand = new HardLinkCGUnix();
067 //override getLinkCountCommand for the particular Unix variant
068 //Linux is already set as the default - {"stat","-c%h", null}
069 if (osType == OSType.OS_TYPE_MAC || osType == OSType.OS_TYPE_FREEBSD) {
070 String[] linkCountCmdTemplate = {"/usr/bin/stat","-f%l", null};
071 HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate);
072 } else if (osType == OSType.OS_TYPE_SOLARIS) {
073 String[] linkCountCmdTemplate = {"ls","-l", null};
074 HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate);
075 }
076 }
077 }
078
079 public HardLink() {
080 linkStats = new LinkStats();
081 }
082
083 static private OSType getOSType() {
084 String osName = System.getProperty("os.name");
085 if (Shell.WINDOWS) {
086 return OSType.OS_TYPE_WIN;
087 }
088 else if (osName.contains("SunOS")
089 || osName.contains("Solaris")) {
090 return OSType.OS_TYPE_SOLARIS;
091 }
092 else if (osName.contains("Mac")) {
093 return OSType.OS_TYPE_MAC;
094 }
095 else if (osName.contains("FreeBSD")) {
096 return OSType.OS_TYPE_FREEBSD;
097 }
098 else {
099 return OSType.OS_TYPE_UNIX;
100 }
101 }
102
103 /**
104 * This abstract class bridges the OS-dependent implementations of the
105 * needed functionality for creating hardlinks and querying link counts.
106 * The particular implementation class is chosen during
107 * static initialization phase of the HardLink class.
108 * The "getter" methods construct shell command strings for various purposes.
109 */
110 private static abstract class HardLinkCommandGetter {
111
112 /**
113 * Get the command string needed to hardlink a bunch of files from
114 * a single source directory into a target directory. The source directory
115 * is not specified here, but the command will be executed using the source
116 * directory as the "current working directory" of the shell invocation.
117 *
118 * @param fileBaseNames - array of path-less file names, relative
119 * to the source directory
120 * @param linkDir - target directory where the hardlinks will be put
121 * @return - an array of Strings suitable for use as a single shell command
122 * with {@link Runtime.exec()}
123 * @throws IOException - if any of the file or path names misbehave
124 */
125 abstract String[] linkMult(String[] fileBaseNames, File linkDir)
126 throws IOException;
127
128 /**
129 * Get the command string needed to hardlink a single file
130 */
131 abstract String[] linkOne(File file, File linkName) throws IOException;
132
133 /**
134 * Get the command string to query the hardlink count of a file
135 */
136 abstract String[] linkCount(File file) throws IOException;
137
138 /**
139 * Calculate the total string length of the shell command
140 * resulting from execution of linkMult, plus the length of the
141 * source directory name (which will also be provided to the shell)
142 *
143 * @param fileDir - source directory, parent of fileBaseNames
144 * @param fileBaseNames - array of path-less file names, relative
145 * to the source directory
146 * @param linkDir - target directory where the hardlinks will be put
147 * @return - total data length (must not exceed maxAllowedCmdArgLength)
148 * @throws IOException
149 */
150 abstract int getLinkMultArgLength(
151 File fileDir, String[] fileBaseNames, File linkDir)
152 throws IOException;
153
154 /**
155 * Get the maximum allowed string length of a shell command on this OS,
156 * which is just the documented minimum guaranteed supported command
157 * length - aprx. 32KB for Unix, and 8KB for Windows.
158 */
159 abstract int getMaxAllowedCmdArgLength();
160 }
161
162 /**
163 * Implementation of HardLinkCommandGetter class for Unix
164 */
165 static class HardLinkCGUnix extends HardLinkCommandGetter {
166 private static String[] hardLinkCommand = {"ln", null, null};
167 private static String[] hardLinkMultPrefix = {"ln"};
168 private static String[] hardLinkMultSuffix = {null};
169 private static String[] getLinkCountCommand = {"stat","-c%h", null};
170 //Unix guarantees at least 32K bytes cmd length.
171 //Subtract another 64b to allow for Java 'exec' overhead
172 private static final int maxAllowedCmdArgLength = 32*1024 - 65;
173
174 private static synchronized
175 void setLinkCountCmdTemplate(String[] template) {
176 //May update this for specific unix variants,
177 //after static initialization phase
178 getLinkCountCommand = template;
179 }
180
181 /*
182 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File)
183 */
184 @Override
185 String[] linkOne(File file, File linkName)
186 throws IOException {
187 String[] buf = new String[hardLinkCommand.length];
188 System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length);
189 //unix wants argument order: "ln <existing> <new>"
190 buf[1] = FileUtil.makeShellPath(file, true);
191 buf[2] = FileUtil.makeShellPath(linkName, true);
192 return buf;
193 }
194
195 /*
196 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File)
197 */
198 @Override
199 String[] linkMult(String[] fileBaseNames, File linkDir)
200 throws IOException {
201 String[] buf = new String[fileBaseNames.length
202 + hardLinkMultPrefix.length
203 + hardLinkMultSuffix.length];
204 int mark=0;
205 System.arraycopy(hardLinkMultPrefix, 0, buf, mark,
206 hardLinkMultPrefix.length);
207 mark += hardLinkMultPrefix.length;
208 System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length);
209 mark += fileBaseNames.length;
210 buf[mark] = FileUtil.makeShellPath(linkDir, true);
211 return buf;
212 }
213
214 /*
215 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File)
216 */
217 @Override
218 String[] linkCount(File file)
219 throws IOException {
220 String[] buf = new String[getLinkCountCommand.length];
221 System.arraycopy(getLinkCountCommand, 0, buf, 0,
222 getLinkCountCommand.length);
223 buf[getLinkCountCommand.length - 1] = FileUtil.makeShellPath(file, true);
224 return buf;
225 }
226
227 /*
228 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File)
229 */
230 @Override
231 int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir)
232 throws IOException{
233 int sum = 0;
234 for (String x : fileBaseNames) {
235 // add 1 to account for terminal null or delimiter space
236 sum += 1 + ((x == null) ? 0 : x.length());
237 }
238 sum += 2 + FileUtil.makeShellPath(fileDir, true).length()
239 + FileUtil.makeShellPath(linkDir, true).length();
240 //add the fixed overhead of the hardLinkMult prefix and suffix
241 sum += 3; //length("ln") + 1
242 return sum;
243 }
244
245 /*
246 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength()
247 */
248 @Override
249 int getMaxAllowedCmdArgLength() {
250 return maxAllowedCmdArgLength;
251 }
252 }
253
254
255 /**
256 * Implementation of HardLinkCommandGetter class for Windows
257 */
258 static class HardLinkCGWin extends HardLinkCommandGetter {
259 //The Windows command getter impl class and its member fields are
260 //package-private ("default") access instead of "private" to assist
261 //unit testing (sort of) on non-Win servers
262
263 static String[] hardLinkCommand = {
264 Shell.WINUTILS,"hardlink","create", null, null};
265 static String[] hardLinkMultPrefix = {
266 "cmd","/q","/c","for", "%f", "in", "("};
267 static String hardLinkMultDir = "\\%f";
268 static String[] hardLinkMultSuffix = {
269 ")", "do", Shell.WINUTILS, "hardlink", "create", null,
270 "%f", "1>NUL"};
271 static String[] getLinkCountCommand = {
272 Shell.WINUTILS, "hardlink",
273 "stat", null};
274 //Windows guarantees only 8K - 1 bytes cmd length.
275 //Subtract another 64b to allow for Java 'exec' overhead
276 static final int maxAllowedCmdArgLength = 8*1024 - 65;
277
278 /*
279 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File)
280 */
281 @Override
282 String[] linkOne(File file, File linkName)
283 throws IOException {
284 String[] buf = new String[hardLinkCommand.length];
285 System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length);
286 //windows wants argument order: "create <new> <existing>"
287 buf[4] = file.getCanonicalPath();
288 buf[3] = linkName.getCanonicalPath();
289 return buf;
290 }
291
292 /*
293 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File)
294 */
295 @Override
296 String[] linkMult(String[] fileBaseNames, File linkDir)
297 throws IOException {
298 String[] buf = new String[fileBaseNames.length
299 + hardLinkMultPrefix.length
300 + hardLinkMultSuffix.length];
301 String td = linkDir.getCanonicalPath() + hardLinkMultDir;
302 int mark=0;
303 System.arraycopy(hardLinkMultPrefix, 0, buf, mark,
304 hardLinkMultPrefix.length);
305 mark += hardLinkMultPrefix.length;
306 System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length);
307 mark += fileBaseNames.length;
308 System.arraycopy(hardLinkMultSuffix, 0, buf, mark,
309 hardLinkMultSuffix.length);
310 mark += hardLinkMultSuffix.length;
311 buf[mark - 3] = td;
312 return buf;
313 }
314
315 /*
316 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File)
317 */
318 @Override
319 String[] linkCount(File file)
320 throws IOException {
321 String[] buf = new String[getLinkCountCommand.length];
322 System.arraycopy(getLinkCountCommand, 0, buf, 0,
323 getLinkCountCommand.length);
324 buf[getLinkCountCommand.length - 1] = file.getCanonicalPath();
325 return buf;
326 }
327
328 /*
329 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File)
330 */
331 @Override
332 int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir)
333 throws IOException {
334 int sum = 0;
335 for (String x : fileBaseNames) {
336 // add 1 to account for terminal null or delimiter space
337 sum += 1 + ((x == null) ? 0 : x.length());
338 }
339 sum += 2 + fileDir.getCanonicalPath().length() +
340 linkDir.getCanonicalPath().length();
341 //add the fixed overhead of the hardLinkMult command
342 //(prefix, suffix, and Dir suffix)
343 sum += ("cmd.exe /q /c for %f in ( ) do "
344 + Shell.WINUTILS + " hardlink create \\%f %f 1>NUL ").length();
345 return sum;
346 }
347
348 /*
349 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength()
350 */
351 @Override
352 int getMaxAllowedCmdArgLength() {
353 return maxAllowedCmdArgLength;
354 }
355 }
356
357
358 /**
359 * Calculate the nominal length of all contributors to the total
360 * commandstring length, including fixed overhead of the OS-dependent
361 * command. It's protected rather than private, to assist unit testing,
362 * but real clients are not expected to need it -- see the way
363 * createHardLinkMult() uses it internally so the user doesn't need to worry
364 * about it.
365 *
366 * @param fileDir - source directory, parent of fileBaseNames
367 * @param fileBaseNames - array of path-less file names, relative
368 * to the source directory
369 * @param linkDir - target directory where the hardlinks will be put
370 * @return - total data length (must not exceed maxAllowedCmdArgLength)
371 * @throws IOException
372 */
373 protected static int getLinkMultArgLength(
374 File fileDir, String[] fileBaseNames, File linkDir)
375 throws IOException {
376 return getHardLinkCommand.getLinkMultArgLength(fileDir,
377 fileBaseNames, linkDir);
378 }
379
380 /**
381 * Return this private value for use by unit tests.
382 * Shell commands are not allowed to have a total string length
383 * exceeding this size.
384 */
385 protected static int getMaxAllowedCmdArgLength() {
386 return getHardLinkCommand.getMaxAllowedCmdArgLength();
387 }
388
389 /*
390 * ****************************************************
391 * Complexity is above. User-visible functionality is below
392 * ****************************************************
393 */
394
395 /**
396 * Creates a hardlink
397 * @param file - existing source file
398 * @param linkName - desired target link file
399 */
400 public static void createHardLink(File file, File linkName)
401 throws IOException {
402 if (file == null) {
403 throw new IOException(
404 "invalid arguments to createHardLink: source file is null");
405 }
406 if (linkName == null) {
407 throw new IOException(
408 "invalid arguments to createHardLink: link name is null");
409 }
410 // construct and execute shell command
411 String[] hardLinkCommand = getHardLinkCommand.linkOne(file, linkName);
412 Process process = Runtime.getRuntime().exec(hardLinkCommand);
413 try {
414 if (process.waitFor() != 0) {
415 String errMsg = new BufferedReader(new InputStreamReader(
416 process.getInputStream())).readLine();
417 if (errMsg == null) errMsg = "";
418 String inpMsg = new BufferedReader(new InputStreamReader(
419 process.getErrorStream())).readLine();
420 if (inpMsg == null) inpMsg = "";
421 throw new IOException(errMsg + inpMsg);
422 }
423 } catch (InterruptedException e) {
424 throw new IOException(e);
425 } finally {
426 process.destroy();
427 }
428 }
429
430 /**
431 * Creates hardlinks from multiple existing files within one parent
432 * directory, into one target directory.
433 * @param parentDir - directory containing source files
434 * @param fileBaseNames - list of path-less file names, as returned by
435 * parentDir.list()
436 * @param linkDir - where the hardlinks should be put. It must already exist.
437 *
438 * If the list of files is too long (overflows maxAllowedCmdArgLength),
439 * we will automatically split it into multiple invocations of the
440 * underlying method.
441 */
442 public static void createHardLinkMult(File parentDir, String[] fileBaseNames,
443 File linkDir) throws IOException {
444 //This is the public method all non-test clients are expected to use.
445 //Normal case - allow up to maxAllowedCmdArgLength characters in the cmd
446 createHardLinkMult(parentDir, fileBaseNames, linkDir,
447 getHardLinkCommand.getMaxAllowedCmdArgLength());
448 }
449
450 /*
451 * Implements {@link createHardLinkMult} with added variable "maxLength",
452 * to ease unit testing of the auto-splitting feature for long lists.
453 * Likewise why it returns "callCount", the number of sub-arrays that
454 * the file list had to be split into.
455 * Non-test clients are expected to call the public method instead.
456 */
457 protected static int createHardLinkMult(File parentDir,
458 String[] fileBaseNames, File linkDir, int maxLength)
459 throws IOException {
460 if (parentDir == null) {
461 throw new IOException(
462 "invalid arguments to createHardLinkMult: parent directory is null");
463 }
464 if (linkDir == null) {
465 throw new IOException(
466 "invalid arguments to createHardLinkMult: link directory is null");
467 }
468 if (fileBaseNames == null) {
469 throw new IOException(
470 "invalid arguments to createHardLinkMult: "
471 + "filename list can be empty but not null");
472 }
473 if (fileBaseNames.length == 0) {
474 //the OS cmds can't handle empty list of filenames,
475 //but it's legal, so just return.
476 return 0;
477 }
478 if (!linkDir.exists()) {
479 throw new FileNotFoundException(linkDir + " not found.");
480 }
481
482 //if the list is too long, split into multiple invocations
483 int callCount = 0;
484 if (getLinkMultArgLength(parentDir, fileBaseNames, linkDir) > maxLength
485 && fileBaseNames.length > 1) {
486 String[] list1 = Arrays.copyOf(fileBaseNames, fileBaseNames.length/2);
487 callCount += createHardLinkMult(parentDir, list1, linkDir, maxLength);
488 String[] list2 = Arrays.copyOfRange(fileBaseNames, fileBaseNames.length/2,
489 fileBaseNames.length);
490 callCount += createHardLinkMult(parentDir, list2, linkDir, maxLength);
491 return callCount;
492 } else {
493 callCount = 1;
494 }
495
496 // construct and execute shell command
497 String[] hardLinkCommand = getHardLinkCommand.linkMult(fileBaseNames,
498 linkDir);
499 Process process = Runtime.getRuntime().exec(hardLinkCommand, null,
500 parentDir);
501 try {
502 if (process.waitFor() != 0) {
503 String errMsg = new BufferedReader(new InputStreamReader(
504 process.getInputStream())).readLine();
505 if (errMsg == null) errMsg = "";
506 String inpMsg = new BufferedReader(new InputStreamReader(
507 process.getErrorStream())).readLine();
508 if (inpMsg == null) inpMsg = "";
509 throw new IOException(errMsg + inpMsg);
510 }
511 } catch (InterruptedException e) {
512 throw new IOException(e);
513 } finally {
514 process.destroy();
515 }
516 return callCount;
517 }
518
519 /**
520 * Retrieves the number of links to the specified file.
521 */
522 public static int getLinkCount(File fileName) throws IOException {
523 if (fileName == null) {
524 throw new IOException(
525 "invalid argument to getLinkCount: file name is null");
526 }
527 if (!fileName.exists()) {
528 throw new FileNotFoundException(fileName + " not found.");
529 }
530
531 // construct and execute shell command
532 String[] cmd = getHardLinkCommand.linkCount(fileName);
533 String inpMsg = null;
534 String errMsg = null;
535 int exitValue = -1;
536 BufferedReader in = null;
537 BufferedReader err = null;
538
539 Process process = Runtime.getRuntime().exec(cmd);
540 try {
541 exitValue = process.waitFor();
542 in = new BufferedReader(new InputStreamReader(
543 process.getInputStream()));
544 inpMsg = in.readLine();
545 err = new BufferedReader(new InputStreamReader(
546 process.getErrorStream()));
547 errMsg = err.readLine();
548 if (inpMsg == null || exitValue != 0) {
549 throw createIOException(fileName, inpMsg, errMsg, exitValue, null);
550 }
551 if (osType == OSType.OS_TYPE_SOLARIS) {
552 String[] result = inpMsg.split("\\s+");
553 return Integer.parseInt(result[1]);
554 } else {
555 return Integer.parseInt(inpMsg);
556 }
557 } catch (NumberFormatException e) {
558 throw createIOException(fileName, inpMsg, errMsg, exitValue, e);
559 } catch (InterruptedException e) {
560 throw createIOException(fileName, inpMsg, errMsg, exitValue, e);
561 } finally {
562 process.destroy();
563 if (in != null) in.close();
564 if (err != null) err.close();
565 }
566 }
567
568 /* Create an IOException for failing to get link count. */
569 private static IOException createIOException(File f, String message,
570 String error, int exitvalue, Exception cause) {
571
572 final String s = "Failed to get link count on file " + f
573 + ": message=" + message
574 + "; error=" + error
575 + "; exit value=" + exitvalue;
576 return (cause == null) ? new IOException(s) : new IOException(s, cause);
577 }
578
579
580 /**
581 * HardLink statistics counters and methods.
582 * Not multi-thread safe, obviously.
583 * Init is called during HardLink instantiation, above.
584 *
585 * These are intended for use by knowledgeable clients, not internally,
586 * because many of the internal methods are static and can't update these
587 * per-instance counters.
588 */
589 public static class LinkStats {
590 public int countDirs = 0;
591 public int countSingleLinks = 0;
592 public int countMultLinks = 0;
593 public int countFilesMultLinks = 0;
594 public int countEmptyDirs = 0;
595 public int countPhysicalFileCopies = 0;
596
597 public void clear() {
598 countDirs = 0;
599 countSingleLinks = 0;
600 countMultLinks = 0;
601 countFilesMultLinks = 0;
602 countEmptyDirs = 0;
603 countPhysicalFileCopies = 0;
604 }
605
606 public String report() {
607 return "HardLinkStats: " + countDirs + " Directories, including "
608 + countEmptyDirs + " Empty Directories, "
609 + countSingleLinks
610 + " single Link operations, " + countMultLinks
611 + " multi-Link operations, linking " + countFilesMultLinks
612 + " files, total " + (countSingleLinks + countFilesMultLinks)
613 + " linkable files. Also physically copied "
614 + countPhysicalFileCopies + " other files.";
615 }
616 }
617 }
618