001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 package org.apache.hadoop.hdfs.util; 019 020 import java.io.BufferedReader; 021 import java.io.File; 022 import java.io.FileInputStream; 023 import java.io.FileReader; 024 import java.io.IOException; 025 import java.io.InputStream; 026 import java.security.DigestInputStream; 027 import java.security.MessageDigest; 028 import java.util.regex.Matcher; 029 import java.util.regex.Pattern; 030 031 import org.apache.commons.logging.Log; 032 import org.apache.commons.logging.LogFactory; 033 import org.apache.hadoop.io.IOUtils; 034 import org.apache.hadoop.io.MD5Hash; 035 import org.apache.hadoop.util.StringUtils; 036 037 /** 038 * Static functions for dealing with files of the same format 039 * that the Unix "md5sum" utility writes. 040 */ 041 public abstract class MD5FileUtils { 042 private static final Log LOG = LogFactory.getLog( 043 MD5FileUtils.class); 044 045 private static final String MD5_SUFFIX = ".md5"; 046 private static final Pattern LINE_REGEX = 047 Pattern.compile("([0-9a-f]{32}) [ \\*](.+)"); 048 049 /** 050 * Verify that the previously saved md5 for the given file matches 051 * expectedMd5. 052 * @throws IOException 053 */ 054 public static void verifySavedMD5(File dataFile, MD5Hash expectedMD5) 055 throws IOException { 056 MD5Hash storedHash = readStoredMd5ForFile(dataFile); 057 // Check the hash itself 058 if (!expectedMD5.equals(storedHash)) { 059 throw new IOException( 060 "File " + dataFile + " did not match stored MD5 checksum " + 061 " (stored: " + storedHash + ", computed: " + expectedMD5); 062 } 063 } 064 065 /** 066 * Read the md5 checksum stored alongside the given file, or null 067 * if no md5 is stored. 068 * @param dataFile the file containing data 069 * @return the checksum stored in dataFile.md5 070 */ 071 public static MD5Hash readStoredMd5ForFile(File dataFile) throws IOException { 072 File md5File = getDigestFileForFile(dataFile); 073 074 String md5Line; 075 076 if (!md5File.exists()) { 077 return null; 078 } 079 080 BufferedReader reader = 081 new BufferedReader(new FileReader(md5File)); 082 try { 083 md5Line = reader.readLine(); 084 if (md5Line == null) { md5Line = ""; } 085 md5Line = md5Line.trim(); 086 } catch (IOException ioe) { 087 throw new IOException("Error reading md5 file at " + md5File, ioe); 088 } finally { 089 IOUtils.cleanup(LOG, reader); 090 } 091 092 Matcher matcher = LINE_REGEX.matcher(md5Line); 093 if (!matcher.matches()) { 094 throw new IOException("Invalid MD5 file at " + md5File 095 + " (does not match expected pattern)"); 096 } 097 String storedHash = matcher.group(1); 098 File referencedFile = new File(matcher.group(2)); 099 100 // Sanity check: Make sure that the file referenced in the .md5 file at 101 // least has the same name as the file we expect 102 if (!referencedFile.getName().equals(dataFile.getName())) { 103 throw new IOException( 104 "MD5 file at " + md5File + " references file named " + 105 referencedFile.getName() + " but we expected it to reference " + 106 dataFile); 107 } 108 return new MD5Hash(storedHash); 109 } 110 111 /** 112 * Read dataFile and compute its MD5 checksum. 113 */ 114 public static MD5Hash computeMd5ForFile(File dataFile) throws IOException { 115 InputStream in = new FileInputStream(dataFile); 116 try { 117 MessageDigest digester = MD5Hash.getDigester(); 118 DigestInputStream dis = new DigestInputStream(in, digester); 119 IOUtils.copyBytes(dis, new IOUtils.NullOutputStream(), 128*1024); 120 121 return new MD5Hash(digester.digest()); 122 } finally { 123 IOUtils.closeStream(in); 124 } 125 } 126 127 /** 128 * Save the ".md5" file that lists the md5sum of another file. 129 * @param dataFile the original file whose md5 was computed 130 * @param digest the computed digest 131 * @throws IOException 132 */ 133 public static void saveMD5File(File dataFile, MD5Hash digest) 134 throws IOException { 135 File md5File = getDigestFileForFile(dataFile); 136 String digestString = StringUtils.byteToHexString( 137 digest.getDigest()); 138 String md5Line = digestString + " *" + dataFile.getName() + "\n"; 139 140 AtomicFileOutputStream afos = new AtomicFileOutputStream(md5File); 141 afos.write(md5Line.getBytes()); 142 afos.close(); 143 LOG.debug("Saved MD5 " + digest + " to " + md5File); 144 } 145 146 /** 147 * @return a reference to the file with .md5 suffix that will 148 * contain the md5 checksum for the given data file. 149 */ 150 public static File getDigestFileForFile(File file) { 151 return new File(file.getParentFile(), file.getName() + MD5_SUFFIX); 152 } 153 }