001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 package org.apache.hadoop.fs;
020
021 import java.io.BufferedReader;
022 import java.io.File;
023 import java.io.FileNotFoundException;
024 import java.io.IOException;
025 import java.io.StringReader;
026 import java.util.Arrays;
027
028 import org.apache.hadoop.io.IOUtils;
029 import org.apache.hadoop.util.Shell;
030 import org.apache.hadoop.util.Shell.ExitCodeException;
031 import org.apache.hadoop.util.Shell.ShellCommandExecutor;
032
033 /**
034 * Class for creating hardlinks.
035 * Supports Unix/Linux, Windows via winutils , and Mac OS X.
036 *
037 * The HardLink class was formerly a static inner class of FSUtil,
038 * and the methods provided were blatantly non-thread-safe.
039 * To enable volume-parallel Update snapshots, we now provide static
040 * threadsafe methods that allocate new buffer string arrays
041 * upon each call. We also provide an API to hardlink all files in a
042 * directory with a single command, which is up to 128 times more
043 * efficient - and minimizes the impact of the extra buffer creations.
044 */
045 public class HardLink {
046
047 private static HardLinkCommandGetter getHardLinkCommand;
048
049 public final LinkStats linkStats; //not static
050
051 //initialize the command "getters" statically, so can use their
052 //methods without instantiating the HardLink object
053 static {
054 if (Shell.WINDOWS) {
055 // Windows
056 getHardLinkCommand = new HardLinkCGWin();
057 } else {
058 // Unix or Linux
059 getHardLinkCommand = new HardLinkCGUnix();
060 //override getLinkCountCommand for the particular Unix variant
061 //Linux is already set as the default - {"stat","-c%h", null}
062 if (Shell.MAC || Shell.FREEBSD) {
063 String[] linkCountCmdTemplate = {"/usr/bin/stat","-f%l", null};
064 HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate);
065 } else if (Shell.SOLARIS) {
066 String[] linkCountCmdTemplate = {"ls","-l", null};
067 HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate);
068 }
069 }
070 }
071
072 public HardLink() {
073 linkStats = new LinkStats();
074 }
075
076 /**
077 * This abstract class bridges the OS-dependent implementations of the
078 * needed functionality for creating hardlinks and querying link counts.
079 * The particular implementation class is chosen during
080 * static initialization phase of the HardLink class.
081 * The "getter" methods construct shell command strings for various purposes.
082 */
083 private static abstract class HardLinkCommandGetter {
084
085 /**
086 * Get the command string needed to hardlink a bunch of files from
087 * a single source directory into a target directory. The source directory
088 * is not specified here, but the command will be executed using the source
089 * directory as the "current working directory" of the shell invocation.
090 *
091 * @param fileBaseNames - array of path-less file names, relative
092 * to the source directory
093 * @param linkDir - target directory where the hardlinks will be put
094 * @return - an array of Strings suitable for use as a single shell command
095 * @throws IOException - if any of the file or path names misbehave
096 */
097 abstract String[] linkMult(String[] fileBaseNames, File linkDir)
098 throws IOException;
099
100 /**
101 * Get the command string needed to hardlink a single file
102 */
103 abstract String[] linkOne(File file, File linkName) throws IOException;
104
105 /**
106 * Get the command string to query the hardlink count of a file
107 */
108 abstract String[] linkCount(File file) throws IOException;
109
110 /**
111 * Calculate the total string length of the shell command
112 * resulting from execution of linkMult, plus the length of the
113 * source directory name (which will also be provided to the shell)
114 *
115 * @param fileDir - source directory, parent of fileBaseNames
116 * @param fileBaseNames - array of path-less file names, relative
117 * to the source directory
118 * @param linkDir - target directory where the hardlinks will be put
119 * @return - total data length (must not exceed maxAllowedCmdArgLength)
120 * @throws IOException
121 */
122 abstract int getLinkMultArgLength(
123 File fileDir, String[] fileBaseNames, File linkDir)
124 throws IOException;
125
126 /**
127 * Get the maximum allowed string length of a shell command on this OS,
128 * which is just the documented minimum guaranteed supported command
129 * length - aprx. 32KB for Unix, and 8KB for Windows.
130 */
131 abstract int getMaxAllowedCmdArgLength();
132 }
133
134 /**
135 * Implementation of HardLinkCommandGetter class for Unix
136 */
137 static class HardLinkCGUnix extends HardLinkCommandGetter {
138 private static String[] hardLinkCommand = {"ln", null, null};
139 private static String[] hardLinkMultPrefix = {"ln"};
140 private static String[] hardLinkMultSuffix = {null};
141 private static String[] getLinkCountCommand = {"stat","-c%h", null};
142 //Unix guarantees at least 32K bytes cmd length.
143 //Subtract another 64b to allow for Java 'exec' overhead
144 private static final int maxAllowedCmdArgLength = 32*1024 - 65;
145
146 private static synchronized
147 void setLinkCountCmdTemplate(String[] template) {
148 //May update this for specific unix variants,
149 //after static initialization phase
150 getLinkCountCommand = template;
151 }
152
153 /*
154 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File)
155 */
156 @Override
157 String[] linkOne(File file, File linkName)
158 throws IOException {
159 String[] buf = new String[hardLinkCommand.length];
160 System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length);
161 //unix wants argument order: "ln <existing> <new>"
162 buf[1] = FileUtil.makeShellPath(file, true);
163 buf[2] = FileUtil.makeShellPath(linkName, true);
164 return buf;
165 }
166
167 /*
168 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File)
169 */
170 @Override
171 String[] linkMult(String[] fileBaseNames, File linkDir)
172 throws IOException {
173 String[] buf = new String[fileBaseNames.length
174 + hardLinkMultPrefix.length
175 + hardLinkMultSuffix.length];
176 int mark=0;
177 System.arraycopy(hardLinkMultPrefix, 0, buf, mark,
178 hardLinkMultPrefix.length);
179 mark += hardLinkMultPrefix.length;
180 System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length);
181 mark += fileBaseNames.length;
182 buf[mark] = FileUtil.makeShellPath(linkDir, true);
183 return buf;
184 }
185
186 /*
187 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File)
188 */
189 @Override
190 String[] linkCount(File file)
191 throws IOException {
192 String[] buf = new String[getLinkCountCommand.length];
193 System.arraycopy(getLinkCountCommand, 0, buf, 0,
194 getLinkCountCommand.length);
195 buf[getLinkCountCommand.length - 1] = FileUtil.makeShellPath(file, true);
196 return buf;
197 }
198
199 /*
200 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File)
201 */
202 @Override
203 int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir)
204 throws IOException{
205 int sum = 0;
206 for (String x : fileBaseNames) {
207 // add 1 to account for terminal null or delimiter space
208 sum += 1 + ((x == null) ? 0 : x.length());
209 }
210 sum += 2 + FileUtil.makeShellPath(fileDir, true).length()
211 + FileUtil.makeShellPath(linkDir, true).length();
212 //add the fixed overhead of the hardLinkMult prefix and suffix
213 sum += 3; //length("ln") + 1
214 return sum;
215 }
216
217 /*
218 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength()
219 */
220 @Override
221 int getMaxAllowedCmdArgLength() {
222 return maxAllowedCmdArgLength;
223 }
224 }
225
226
227 /**
228 * Implementation of HardLinkCommandGetter class for Windows
229 */
230 static class HardLinkCGWin extends HardLinkCommandGetter {
231 //The Windows command getter impl class and its member fields are
232 //package-private ("default") access instead of "private" to assist
233 //unit testing (sort of) on non-Win servers
234
235 static String CMD_EXE = "cmd.exe";
236 static String[] hardLinkCommand = {
237 Shell.WINUTILS,"hardlink","create", null, null};
238 static String[] hardLinkMultPrefix = {
239 CMD_EXE, "/q", "/c", "for", "%f", "in", "("};
240 static String hardLinkMultDir = "\\%f";
241 static String[] hardLinkMultSuffix = {
242 ")", "do", Shell.WINUTILS, "hardlink", "create", null,
243 "%f"};
244 static String[] getLinkCountCommand = {
245 Shell.WINUTILS, "hardlink", "stat", null};
246 //Windows guarantees only 8K - 1 bytes cmd length.
247 //Subtract another 64b to allow for Java 'exec' overhead
248 static final int maxAllowedCmdArgLength = 8*1024 - 65;
249
250 /*
251 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File)
252 */
253 @Override
254 String[] linkOne(File file, File linkName)
255 throws IOException {
256 String[] buf = new String[hardLinkCommand.length];
257 System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length);
258 //windows wants argument order: "create <new> <existing>"
259 buf[4] = file.getCanonicalPath();
260 buf[3] = linkName.getCanonicalPath();
261 return buf;
262 }
263
264 /*
265 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File)
266 */
267 @Override
268 String[] linkMult(String[] fileBaseNames, File linkDir)
269 throws IOException {
270 String[] buf = new String[fileBaseNames.length
271 + hardLinkMultPrefix.length
272 + hardLinkMultSuffix.length];
273 String td = linkDir.getCanonicalPath() + hardLinkMultDir;
274 int mark=0;
275 System.arraycopy(hardLinkMultPrefix, 0, buf, mark,
276 hardLinkMultPrefix.length);
277 mark += hardLinkMultPrefix.length;
278 System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length);
279 mark += fileBaseNames.length;
280 System.arraycopy(hardLinkMultSuffix, 0, buf, mark,
281 hardLinkMultSuffix.length);
282 mark += hardLinkMultSuffix.length;
283 buf[mark - 2] = td;
284 return buf;
285 }
286
287 /*
288 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File)
289 */
290 @Override
291 String[] linkCount(File file)
292 throws IOException {
293 String[] buf = new String[getLinkCountCommand.length];
294 System.arraycopy(getLinkCountCommand, 0, buf, 0,
295 getLinkCountCommand.length);
296 buf[getLinkCountCommand.length - 1] = file.getCanonicalPath();
297 return buf;
298 }
299
300 /*
301 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File)
302 */
303 @Override
304 int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir)
305 throws IOException {
306 int sum = 0;
307 for (String x : fileBaseNames) {
308 // add 1 to account for terminal null or delimiter space
309 sum += 1 + ((x == null) ? 0 : x.length());
310 }
311 sum += 2 + fileDir.getCanonicalPath().length() +
312 linkDir.getCanonicalPath().length();
313 //add the fixed overhead of the hardLinkMult command
314 //(prefix, suffix, and Dir suffix)
315 sum += (CMD_EXE + " /q /c for %f in ( ) do "
316 + Shell.WINUTILS + " hardlink create \\%f %f").length();
317 return sum;
318 }
319
320 /*
321 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength()
322 */
323 @Override
324 int getMaxAllowedCmdArgLength() {
325 return maxAllowedCmdArgLength;
326 }
327 }
328
329
330 /**
331 * Calculate the nominal length of all contributors to the total
332 * commandstring length, including fixed overhead of the OS-dependent
333 * command. It's protected rather than private, to assist unit testing,
334 * but real clients are not expected to need it -- see the way
335 * createHardLinkMult() uses it internally so the user doesn't need to worry
336 * about it.
337 *
338 * @param fileDir - source directory, parent of fileBaseNames
339 * @param fileBaseNames - array of path-less file names, relative
340 * to the source directory
341 * @param linkDir - target directory where the hardlinks will be put
342 * @return - total data length (must not exceed maxAllowedCmdArgLength)
343 * @throws IOException
344 */
345 protected static int getLinkMultArgLength(
346 File fileDir, String[] fileBaseNames, File linkDir)
347 throws IOException {
348 return getHardLinkCommand.getLinkMultArgLength(fileDir,
349 fileBaseNames, linkDir);
350 }
351
352 /**
353 * Return this private value for use by unit tests.
354 * Shell commands are not allowed to have a total string length
355 * exceeding this size.
356 */
357 protected static int getMaxAllowedCmdArgLength() {
358 return getHardLinkCommand.getMaxAllowedCmdArgLength();
359 }
360
361 /*
362 * ****************************************************
363 * Complexity is above. User-visible functionality is below
364 * ****************************************************
365 */
366
367 /**
368 * Creates a hardlink
369 * @param file - existing source file
370 * @param linkName - desired target link file
371 */
372 public static void createHardLink(File file, File linkName)
373 throws IOException {
374 if (file == null) {
375 throw new IOException(
376 "invalid arguments to createHardLink: source file is null");
377 }
378 if (linkName == null) {
379 throw new IOException(
380 "invalid arguments to createHardLink: link name is null");
381 }
382 // construct and execute shell command
383 String[] hardLinkCommand = getHardLinkCommand.linkOne(file, linkName);
384 ShellCommandExecutor shexec = new ShellCommandExecutor(hardLinkCommand);
385 try {
386 shexec.execute();
387 } catch (ExitCodeException e) {
388 throw new IOException("Failed to execute command " +
389 Arrays.toString(hardLinkCommand) +
390 "; command output: \"" + shexec.getOutput() + "\"" +
391 "; WrappedException: \"" + e.getMessage() + "\"");
392 }
393 }
394
395 /**
396 * Creates hardlinks from multiple existing files within one parent
397 * directory, into one target directory.
398 * @param parentDir - directory containing source files
399 * @param fileBaseNames - list of path-less file names, as returned by
400 * parentDir.list()
401 * @param linkDir - where the hardlinks should be put. It must already exist.
402 *
403 * If the list of files is too long (overflows maxAllowedCmdArgLength),
404 * we will automatically split it into multiple invocations of the
405 * underlying method.
406 */
407 public static void createHardLinkMult(File parentDir, String[] fileBaseNames,
408 File linkDir) throws IOException {
409 //This is the public method all non-test clients are expected to use.
410 //Normal case - allow up to maxAllowedCmdArgLength characters in the cmd
411 createHardLinkMult(parentDir, fileBaseNames, linkDir,
412 getHardLinkCommand.getMaxAllowedCmdArgLength());
413 }
414
415 /*
416 * Implements {@link createHardLinkMult} with added variable "maxLength",
417 * to ease unit testing of the auto-splitting feature for long lists.
418 * Likewise why it returns "callCount", the number of sub-arrays that
419 * the file list had to be split into.
420 * Non-test clients are expected to call the public method instead.
421 */
422 protected static int createHardLinkMult(File parentDir,
423 String[] fileBaseNames, File linkDir, int maxLength)
424 throws IOException {
425 if (parentDir == null) {
426 throw new IOException(
427 "invalid arguments to createHardLinkMult: parent directory is null");
428 }
429 if (linkDir == null) {
430 throw new IOException(
431 "invalid arguments to createHardLinkMult: link directory is null");
432 }
433 if (fileBaseNames == null) {
434 throw new IOException(
435 "invalid arguments to createHardLinkMult: "
436 + "filename list can be empty but not null");
437 }
438 if (fileBaseNames.length == 0) {
439 //the OS cmds can't handle empty list of filenames,
440 //but it's legal, so just return.
441 return 0;
442 }
443 if (!linkDir.exists()) {
444 throw new FileNotFoundException(linkDir + " not found.");
445 }
446
447 //if the list is too long, split into multiple invocations
448 int callCount = 0;
449 if (getLinkMultArgLength(parentDir, fileBaseNames, linkDir) > maxLength
450 && fileBaseNames.length > 1) {
451 String[] list1 = Arrays.copyOf(fileBaseNames, fileBaseNames.length/2);
452 callCount += createHardLinkMult(parentDir, list1, linkDir, maxLength);
453 String[] list2 = Arrays.copyOfRange(fileBaseNames, fileBaseNames.length/2,
454 fileBaseNames.length);
455 callCount += createHardLinkMult(parentDir, list2, linkDir, maxLength);
456 return callCount;
457 } else {
458 callCount = 1;
459 }
460
461 // construct and execute shell command
462 String[] hardLinkCommand = getHardLinkCommand.linkMult(fileBaseNames,
463 linkDir);
464 ShellCommandExecutor shexec = new ShellCommandExecutor(hardLinkCommand,
465 parentDir, null, 0L);
466 try {
467 shexec.execute();
468 } catch (ExitCodeException e) {
469 throw new IOException(shexec.getOutput() + e.getMessage());
470 }
471 return callCount;
472 }
473
474 /**
475 * Retrieves the number of links to the specified file.
476 */
477 public static int getLinkCount(File fileName) throws IOException {
478 if (fileName == null) {
479 throw new IOException(
480 "invalid argument to getLinkCount: file name is null");
481 }
482 if (!fileName.exists()) {
483 throw new FileNotFoundException(fileName + " not found.");
484 }
485
486 // construct and execute shell command
487 String[] cmd = getHardLinkCommand.linkCount(fileName);
488 String inpMsg = null;
489 String errMsg = null;
490 int exitValue = -1;
491 BufferedReader in = null;
492
493 ShellCommandExecutor shexec = new ShellCommandExecutor(cmd);
494 try {
495 shexec.execute();
496 in = new BufferedReader(new StringReader(shexec.getOutput()));
497 inpMsg = in.readLine();
498 exitValue = shexec.getExitCode();
499 if (inpMsg == null || exitValue != 0) {
500 throw createIOException(fileName, inpMsg, errMsg, exitValue, null);
501 }
502 if (Shell.SOLARIS) {
503 String[] result = inpMsg.split("\\s+");
504 return Integer.parseInt(result[1]);
505 } else {
506 return Integer.parseInt(inpMsg);
507 }
508 } catch (ExitCodeException e) {
509 inpMsg = shexec.getOutput();
510 errMsg = e.getMessage();
511 exitValue = e.getExitCode();
512 throw createIOException(fileName, inpMsg, errMsg, exitValue, e);
513 } catch (NumberFormatException e) {
514 throw createIOException(fileName, inpMsg, errMsg, exitValue, e);
515 } finally {
516 IOUtils.closeStream(in);
517 }
518 }
519
520 /* Create an IOException for failing to get link count. */
521 private static IOException createIOException(File f, String message,
522 String error, int exitvalue, Exception cause) {
523
524 final String s = "Failed to get link count on file " + f
525 + ": message=" + message
526 + "; error=" + error
527 + "; exit value=" + exitvalue;
528 return (cause == null) ? new IOException(s) : new IOException(s, cause);
529 }
530
531
532 /**
533 * HardLink statistics counters and methods.
534 * Not multi-thread safe, obviously.
535 * Init is called during HardLink instantiation, above.
536 *
537 * These are intended for use by knowledgeable clients, not internally,
538 * because many of the internal methods are static and can't update these
539 * per-instance counters.
540 */
541 public static class LinkStats {
542 public int countDirs = 0;
543 public int countSingleLinks = 0;
544 public int countMultLinks = 0;
545 public int countFilesMultLinks = 0;
546 public int countEmptyDirs = 0;
547 public int countPhysicalFileCopies = 0;
548
549 public void clear() {
550 countDirs = 0;
551 countSingleLinks = 0;
552 countMultLinks = 0;
553 countFilesMultLinks = 0;
554 countEmptyDirs = 0;
555 countPhysicalFileCopies = 0;
556 }
557
558 public String report() {
559 return "HardLinkStats: " + countDirs + " Directories, including "
560 + countEmptyDirs + " Empty Directories, "
561 + countSingleLinks
562 + " single Link operations, " + countMultLinks
563 + " multi-Link operations, linking " + countFilesMultLinks
564 + " files, total " + (countSingleLinks + countFilesMultLinks)
565 + " linkable files. Also physically copied "
566 + countPhysicalFileCopies + " other files.";
567 }
568 }
569 }
570