001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 package org.apache.hadoop.hdfs.server.namenode; 019 020 import java.io.File; 021 import java.io.FileInputStream; 022 import java.io.FileNotFoundException; 023 import java.io.FileOutputStream; 024 import java.io.IOException; 025 import java.io.InputStream; 026 import java.io.OutputStream; 027 import java.net.HttpURLConnection; 028 import java.net.URISyntaxException; 029 import java.net.URL; 030 import java.security.DigestInputStream; 031 import java.security.MessageDigest; 032 import java.util.ArrayList; 033 import java.util.List; 034 import java.util.Map; 035 import java.util.Map.Entry; 036 037 import javax.servlet.http.HttpServletRequest; 038 import javax.servlet.http.HttpServletResponse; 039 040 import org.apache.commons.logging.Log; 041 import org.apache.commons.logging.LogFactory; 042 import org.apache.hadoop.classification.InterfaceAudience; 043 import org.apache.hadoop.conf.Configuration; 044 import org.apache.hadoop.fs.FileUtil; 045 import org.apache.hadoop.hdfs.DFSConfigKeys; 046 import org.apache.hadoop.hdfs.HdfsConfiguration; 047 import org.apache.hadoop.hdfs.protocol.HdfsConstants; 048 import org.apache.hadoop.hdfs.server.common.Storage; 049 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; 050 import org.apache.hadoop.hdfs.server.common.StorageErrorReporter; 051 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType; 052 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile; 053 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog; 054 import org.apache.hadoop.hdfs.util.DataTransferThrottler; 055 import org.apache.hadoop.hdfs.web.URLConnectionFactory; 056 import org.apache.hadoop.io.IOUtils; 057 import org.apache.hadoop.io.MD5Hash; 058 import org.apache.hadoop.security.UserGroupInformation; 059 import org.apache.hadoop.security.authentication.client.AuthenticationException; 060 import org.apache.hadoop.util.Time; 061 import org.apache.http.client.utils.URIBuilder; 062 063 import com.google.common.annotations.VisibleForTesting; 064 import com.google.common.collect.Lists; 065 066 067 /** 068 * This class provides fetching a specified file from the NameNode. 069 */ 070 @InterfaceAudience.Private 071 public class TransferFsImage { 072 073 public final static String CONTENT_LENGTH = "Content-Length"; 074 public final static String FILE_LENGTH = "File-Length"; 075 public final static String MD5_HEADER = "X-MD5-Digest"; 076 077 private final static String CONTENT_TYPE = "Content-Type"; 078 private final static String CONTENT_TRANSFER_ENCODING = "Content-Transfer-Encoding"; 079 080 @VisibleForTesting 081 static int timeout = 0; 082 private static final URLConnectionFactory connectionFactory; 083 private static final boolean isSpnegoEnabled; 084 085 static { 086 Configuration conf = new Configuration(); 087 connectionFactory = URLConnectionFactory 088 .newDefaultURLConnectionFactory(conf); 089 isSpnegoEnabled = UserGroupInformation.isSecurityEnabled(); 090 } 091 092 private static final Log LOG = LogFactory.getLog(TransferFsImage.class); 093 094 public static void downloadMostRecentImageToDirectory(URL infoServer, 095 File dir) throws IOException { 096 String fileId = ImageServlet.getParamStringForMostRecentImage(); 097 getFileClient(infoServer, fileId, Lists.newArrayList(dir), 098 null, false); 099 } 100 101 public static MD5Hash downloadImageToStorage(URL fsName, long imageTxId, 102 Storage dstStorage, boolean needDigest) throws IOException { 103 String fileid = ImageServlet.getParamStringForImage(null, 104 imageTxId, dstStorage); 105 String fileName = NNStorage.getCheckpointImageFileName(imageTxId); 106 107 List<File> dstFiles = dstStorage.getFiles( 108 NameNodeDirType.IMAGE, fileName); 109 if (dstFiles.isEmpty()) { 110 throw new IOException("No targets in destination storage!"); 111 } 112 113 MD5Hash hash = getFileClient(fsName, fileid, dstFiles, dstStorage, needDigest); 114 LOG.info("Downloaded file " + dstFiles.get(0).getName() + " size " + 115 dstFiles.get(0).length() + " bytes."); 116 return hash; 117 } 118 119 static MD5Hash handleUploadImageRequest(HttpServletRequest request, 120 long imageTxId, Storage dstStorage, InputStream stream, 121 long advertisedSize, DataTransferThrottler throttler) throws IOException { 122 123 String fileName = NNStorage.getCheckpointImageFileName(imageTxId); 124 125 List<File> dstFiles = dstStorage.getFiles(NameNodeDirType.IMAGE, fileName); 126 if (dstFiles.isEmpty()) { 127 throw new IOException("No targets in destination storage!"); 128 } 129 130 MD5Hash advertisedDigest = parseMD5Header(request); 131 MD5Hash hash = receiveFile(fileName, dstFiles, dstStorage, true, 132 advertisedSize, advertisedDigest, fileName, stream, throttler); 133 LOG.info("Downloaded file " + dstFiles.get(0).getName() + " size " 134 + dstFiles.get(0).length() + " bytes."); 135 return hash; 136 } 137 138 static void downloadEditsToStorage(URL fsName, RemoteEditLog log, 139 NNStorage dstStorage) throws IOException { 140 assert log.getStartTxId() > 0 && log.getEndTxId() > 0 : 141 "bad log: " + log; 142 String fileid = ImageServlet.getParamStringForLog( 143 log, dstStorage); 144 String finalFileName = NNStorage.getFinalizedEditsFileName( 145 log.getStartTxId(), log.getEndTxId()); 146 147 List<File> finalFiles = dstStorage.getFiles(NameNodeDirType.EDITS, 148 finalFileName); 149 assert !finalFiles.isEmpty() : "No checkpoint targets."; 150 151 for (File f : finalFiles) { 152 if (f.exists() && FileUtil.canRead(f)) { 153 LOG.info("Skipping download of remote edit log " + 154 log + " since it already is stored locally at " + f); 155 return; 156 } else if (LOG.isDebugEnabled()) { 157 LOG.debug("Dest file: " + f); 158 } 159 } 160 161 final long milliTime = System.currentTimeMillis(); 162 String tmpFileName = NNStorage.getTemporaryEditsFileName( 163 log.getStartTxId(), log.getEndTxId(), milliTime); 164 List<File> tmpFiles = dstStorage.getFiles(NameNodeDirType.EDITS, 165 tmpFileName); 166 getFileClient(fsName, fileid, tmpFiles, dstStorage, false); 167 LOG.info("Downloaded file " + tmpFiles.get(0).getName() + " size " + 168 finalFiles.get(0).length() + " bytes."); 169 170 CheckpointFaultInjector.getInstance().beforeEditsRename(); 171 172 for (StorageDirectory sd : dstStorage.dirIterable(NameNodeDirType.EDITS)) { 173 File tmpFile = NNStorage.getTemporaryEditsFile(sd, 174 log.getStartTxId(), log.getEndTxId(), milliTime); 175 File finalizedFile = NNStorage.getFinalizedEditsFile(sd, 176 log.getStartTxId(), log.getEndTxId()); 177 if (LOG.isDebugEnabled()) { 178 LOG.debug("Renaming " + tmpFile + " to " + finalizedFile); 179 } 180 boolean success = tmpFile.renameTo(finalizedFile); 181 if (!success) { 182 LOG.warn("Unable to rename edits file from " + tmpFile 183 + " to " + finalizedFile); 184 } 185 } 186 } 187 188 /** 189 * Requests that the NameNode download an image from this node. 190 * 191 * @param fsName the http address for the remote NN 192 * @param conf Configuration 193 * @param storage the storage directory to transfer the image from 194 * @param nnf the NameNodeFile type of the image 195 * @param txid the transaction ID of the image to be uploaded 196 */ 197 public static void uploadImageFromStorage(URL fsName, Configuration conf, 198 NNStorage storage, NameNodeFile nnf, long txid) throws IOException { 199 200 URL url = new URL(fsName, ImageServlet.PATH_SPEC); 201 long startTime = Time.monotonicNow(); 202 try { 203 uploadImage(url, conf, storage, nnf, txid); 204 } catch (HttpPutFailedException e) { 205 if (e.getResponseCode() == HttpServletResponse.SC_CONFLICT) { 206 // this is OK - this means that a previous attempt to upload 207 // this checkpoint succeeded even though we thought it failed. 208 LOG.info("Image upload with txid " + txid + 209 " conflicted with a previous image upload to the " + 210 "same NameNode. Continuing...", e); 211 return; 212 } else { 213 throw e; 214 } 215 } 216 double xferSec = Math.max( 217 ((float) (Time.monotonicNow() - startTime)) / 1000.0, 0.001); 218 LOG.info("Uploaded image with txid " + txid + " to namenode at " + fsName 219 + " in " + xferSec + " seconds"); 220 } 221 222 /* 223 * Uploads the imagefile using HTTP PUT method 224 */ 225 private static void uploadImage(URL url, Configuration conf, 226 NNStorage storage, NameNodeFile nnf, long txId) throws IOException { 227 228 File imageFile = storage.findImageFile(nnf, txId); 229 if (imageFile == null) { 230 throw new IOException("Could not find image with txid " + txId); 231 } 232 233 HttpURLConnection connection = null; 234 try { 235 URIBuilder uriBuilder = new URIBuilder(url.toURI()); 236 237 // write all params for image upload request as query itself. 238 // Request body contains the image to be uploaded. 239 Map<String, String> params = ImageServlet.getParamsForPutImage(storage, 240 txId, imageFile.length(), nnf); 241 for (Entry<String, String> entry : params.entrySet()) { 242 uriBuilder.addParameter(entry.getKey(), entry.getValue()); 243 } 244 245 URL urlWithParams = uriBuilder.build().toURL(); 246 connection = (HttpURLConnection) connectionFactory.openConnection( 247 urlWithParams, UserGroupInformation.isSecurityEnabled()); 248 // Set the request to PUT 249 connection.setRequestMethod("PUT"); 250 connection.setDoOutput(true); 251 252 253 int chunkSize = conf.getInt( 254 DFSConfigKeys.DFS_IMAGE_TRANSFER_CHUNKSIZE_KEY, 255 DFSConfigKeys.DFS_IMAGE_TRANSFER_CHUNKSIZE_DEFAULT); 256 if (imageFile.length() > chunkSize) { 257 // using chunked streaming mode to support upload of 2GB+ files and to 258 // avoid internal buffering. 259 // this mode should be used only if more than chunkSize data is present 260 // to upload. otherwise upload may not happen sometimes. 261 connection.setChunkedStreamingMode(chunkSize); 262 } 263 264 setTimeout(connection); 265 266 // set headers for verification 267 ImageServlet.setVerificationHeadersForPut(connection, imageFile); 268 269 // Write the file to output stream. 270 writeFileToPutRequest(conf, connection, imageFile); 271 272 int responseCode = connection.getResponseCode(); 273 if (responseCode != HttpURLConnection.HTTP_OK) { 274 throw new HttpPutFailedException(connection.getResponseMessage(), 275 responseCode); 276 } 277 } catch (AuthenticationException e) { 278 throw new IOException(e); 279 } catch (URISyntaxException e) { 280 throw new IOException(e); 281 } finally { 282 if (connection != null) { 283 connection.disconnect(); 284 } 285 } 286 } 287 288 private static void writeFileToPutRequest(Configuration conf, 289 HttpURLConnection connection, File imageFile) 290 throws FileNotFoundException, IOException { 291 connection.setRequestProperty(CONTENT_TYPE, "application/octet-stream"); 292 connection.setRequestProperty(CONTENT_TRANSFER_ENCODING, "binary"); 293 OutputStream output = connection.getOutputStream(); 294 FileInputStream input = new FileInputStream(imageFile); 295 try { 296 copyFileToStream(output, imageFile, input, 297 ImageServlet.getThrottler(conf)); 298 } finally { 299 IOUtils.closeStream(input); 300 IOUtils.closeStream(output); 301 } 302 } 303 304 /** 305 * A server-side method to respond to a getfile http request 306 * Copies the contents of the local file into the output stream. 307 */ 308 public static void copyFileToStream(OutputStream out, File localfile, 309 FileInputStream infile, DataTransferThrottler throttler) 310 throws IOException { 311 byte buf[] = new byte[HdfsConstants.IO_FILE_BUFFER_SIZE]; 312 try { 313 CheckpointFaultInjector.getInstance() 314 .aboutToSendFile(localfile); 315 316 if (CheckpointFaultInjector.getInstance(). 317 shouldSendShortFile(localfile)) { 318 // Test sending image shorter than localfile 319 long len = localfile.length(); 320 buf = new byte[(int)Math.min(len/2, HdfsConstants.IO_FILE_BUFFER_SIZE)]; 321 // This will read at most half of the image 322 // and the rest of the image will be sent over the wire 323 infile.read(buf); 324 } 325 int num = 1; 326 while (num > 0) { 327 num = infile.read(buf); 328 if (num <= 0) { 329 break; 330 } 331 if (CheckpointFaultInjector.getInstance() 332 .shouldCorruptAByte(localfile)) { 333 // Simulate a corrupted byte on the wire 334 LOG.warn("SIMULATING A CORRUPT BYTE IN IMAGE TRANSFER!"); 335 buf[0]++; 336 } 337 338 out.write(buf, 0, num); 339 if (throttler != null) { 340 throttler.throttle(num); 341 } 342 } 343 } finally { 344 if (out != null) { 345 out.close(); 346 } 347 } 348 } 349 350 /** 351 * Client-side Method to fetch file from a server 352 * Copies the response from the URL to a list of local files. 353 * @param dstStorage if an error occurs writing to one of the files, 354 * this storage object will be notified. 355 * @Return a digest of the received file if getChecksum is true 356 */ 357 static MD5Hash getFileClient(URL infoServer, 358 String queryString, List<File> localPaths, 359 Storage dstStorage, boolean getChecksum) throws IOException { 360 URL url = new URL(infoServer, ImageServlet.PATH_SPEC + "?" + queryString); 361 LOG.info("Opening connection to " + url); 362 return doGetUrl(url, localPaths, dstStorage, getChecksum); 363 } 364 365 public static MD5Hash doGetUrl(URL url, List<File> localPaths, 366 Storage dstStorage, boolean getChecksum) throws IOException { 367 HttpURLConnection connection; 368 try { 369 connection = (HttpURLConnection) 370 connectionFactory.openConnection(url, isSpnegoEnabled); 371 } catch (AuthenticationException e) { 372 throw new IOException(e); 373 } 374 375 setTimeout(connection); 376 377 if (connection.getResponseCode() != HttpURLConnection.HTTP_OK) { 378 throw new HttpGetFailedException( 379 "Image transfer servlet at " + url + 380 " failed with status code " + connection.getResponseCode() + 381 "\nResponse message:\n" + connection.getResponseMessage(), 382 connection); 383 } 384 385 long advertisedSize; 386 String contentLength = connection.getHeaderField(CONTENT_LENGTH); 387 if (contentLength != null) { 388 advertisedSize = Long.parseLong(contentLength); 389 } else { 390 throw new IOException(CONTENT_LENGTH + " header is not provided " + 391 "by the namenode when trying to fetch " + url); 392 } 393 MD5Hash advertisedDigest = parseMD5Header(connection); 394 String fsImageName = connection 395 .getHeaderField(ImageServlet.HADOOP_IMAGE_EDITS_HEADER); 396 InputStream stream = connection.getInputStream(); 397 398 return receiveFile(url.toExternalForm(), localPaths, dstStorage, 399 getChecksum, advertisedSize, advertisedDigest, fsImageName, stream, 400 null); 401 } 402 403 private static void setTimeout(HttpURLConnection connection) { 404 if (timeout <= 0) { 405 Configuration conf = new HdfsConfiguration(); 406 timeout = conf.getInt(DFSConfigKeys.DFS_IMAGE_TRANSFER_TIMEOUT_KEY, 407 DFSConfigKeys.DFS_IMAGE_TRANSFER_TIMEOUT_DEFAULT); 408 LOG.info("Image Transfer timeout configured to " + timeout 409 + " milliseconds"); 410 } 411 412 if (timeout > 0) { 413 connection.setConnectTimeout(timeout); 414 connection.setReadTimeout(timeout); 415 } 416 } 417 418 private static MD5Hash receiveFile(String url, List<File> localPaths, 419 Storage dstStorage, boolean getChecksum, long advertisedSize, 420 MD5Hash advertisedDigest, String fsImageName, InputStream stream, 421 DataTransferThrottler throttler) throws IOException { 422 long startTime = Time.monotonicNow(); 423 if (localPaths != null) { 424 // If the local paths refer to directories, use the server-provided header 425 // as the filename within that directory 426 List<File> newLocalPaths = new ArrayList<File>(); 427 for (File localPath : localPaths) { 428 if (localPath.isDirectory()) { 429 if (fsImageName == null) { 430 throw new IOException("No filename header provided by server"); 431 } 432 newLocalPaths.add(new File(localPath, fsImageName)); 433 } else { 434 newLocalPaths.add(localPath); 435 } 436 } 437 localPaths = newLocalPaths; 438 } 439 440 441 long received = 0; 442 MessageDigest digester = null; 443 if (getChecksum) { 444 digester = MD5Hash.getDigester(); 445 stream = new DigestInputStream(stream, digester); 446 } 447 boolean finishedReceiving = false; 448 449 List<FileOutputStream> outputStreams = Lists.newArrayList(); 450 451 try { 452 if (localPaths != null) { 453 for (File f : localPaths) { 454 try { 455 if (f.exists()) { 456 LOG.warn("Overwriting existing file " + f 457 + " with file downloaded from " + url); 458 } 459 outputStreams.add(new FileOutputStream(f)); 460 } catch (IOException ioe) { 461 LOG.warn("Unable to download file " + f, ioe); 462 // This will be null if we're downloading the fsimage to a file 463 // outside of an NNStorage directory. 464 if (dstStorage != null && 465 (dstStorage instanceof StorageErrorReporter)) { 466 ((StorageErrorReporter)dstStorage).reportErrorOnFile(f); 467 } 468 } 469 } 470 471 if (outputStreams.isEmpty()) { 472 throw new IOException( 473 "Unable to download to any storage directory"); 474 } 475 } 476 477 int num = 1; 478 byte[] buf = new byte[HdfsConstants.IO_FILE_BUFFER_SIZE]; 479 while (num > 0) { 480 num = stream.read(buf); 481 if (num > 0) { 482 received += num; 483 for (FileOutputStream fos : outputStreams) { 484 fos.write(buf, 0, num); 485 } 486 if (throttler != null) { 487 throttler.throttle(num); 488 } 489 } 490 } 491 finishedReceiving = true; 492 } finally { 493 stream.close(); 494 for (FileOutputStream fos : outputStreams) { 495 fos.getChannel().force(true); 496 fos.close(); 497 } 498 if (finishedReceiving && received != advertisedSize) { 499 // only throw this exception if we think we read all of it on our end 500 // -- otherwise a client-side IOException would be masked by this 501 // exception that makes it look like a server-side problem! 502 throw new IOException("File " + url + " received length " + received + 503 " is not of the advertised size " + 504 advertisedSize); 505 } 506 } 507 double xferSec = Math.max( 508 ((float)(Time.monotonicNow() - startTime)) / 1000.0, 0.001); 509 long xferKb = received / 1024; 510 LOG.info(String.format("Transfer took %.2fs at %.2f KB/s", 511 xferSec, xferKb / xferSec)); 512 513 if (digester != null) { 514 MD5Hash computedDigest = new MD5Hash(digester.digest()); 515 516 if (advertisedDigest != null && 517 !computedDigest.equals(advertisedDigest)) { 518 throw new IOException("File " + url + " computed digest " + 519 computedDigest + " does not match advertised digest " + 520 advertisedDigest); 521 } 522 return computedDigest; 523 } else { 524 return null; 525 } 526 } 527 528 private static MD5Hash parseMD5Header(HttpURLConnection connection) { 529 String header = connection.getHeaderField(MD5_HEADER); 530 return (header != null) ? new MD5Hash(header) : null; 531 } 532 533 private static MD5Hash parseMD5Header(HttpServletRequest request) { 534 String header = request.getHeader(MD5_HEADER); 535 return (header != null) ? new MD5Hash(header) : null; 536 } 537 538 public static class HttpGetFailedException extends IOException { 539 private static final long serialVersionUID = 1L; 540 private final int responseCode; 541 542 HttpGetFailedException(String msg, HttpURLConnection connection) throws IOException { 543 super(msg); 544 this.responseCode = connection.getResponseCode(); 545 } 546 547 public int getResponseCode() { 548 return responseCode; 549 } 550 } 551 552 public static class HttpPutFailedException extends IOException { 553 private static final long serialVersionUID = 1L; 554 private final int responseCode; 555 556 HttpPutFailedException(String msg, int responseCode) throws IOException { 557 super(msg); 558 this.responseCode = responseCode; 559 } 560 561 public int getResponseCode() { 562 return responseCode; 563 } 564 } 565 566 }