001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.hdfs.server.datanode;
019    
020    import java.io.File;
021    import java.io.FileOutputStream;
022    import java.io.IOException;
023    import java.io.RandomAccessFile;
024    
025    import org.apache.hadoop.hdfs.protocol.Block;
026    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState;
027    import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
028    import org.apache.hadoop.hdfs.server.datanode.fsdataset.ReplicaOutputStreams;
029    import org.apache.hadoop.io.IOUtils;
030    import org.apache.hadoop.util.DataChecksum;
031    import org.apache.hadoop.util.StringUtils;
032    
033    /** 
034     * This class defines a replica in a pipeline, which
035     * includes a persistent replica being written to by a dfs client or
036     * a temporary replica being replicated by a source datanode or
037     * being copied for the balancing purpose.
038     * 
039     * The base class implements a temporary replica
040     */
041    public class ReplicaInPipeline extends ReplicaInfo
042                            implements ReplicaInPipelineInterface {
043      private long bytesAcked;
044      private long bytesOnDisk;
045      private byte[] lastChecksum;  
046      private Thread writer;
047      
048      /**
049       * Constructor for a zero length replica
050       * @param blockId block id
051       * @param genStamp replica generation stamp
052       * @param vol volume where replica is located
053       * @param dir directory path where block and meta files are located
054       */
055      public ReplicaInPipeline(long blockId, long genStamp, 
056            FsVolumeSpi vol, File dir) {
057        this( blockId, 0L, genStamp, vol, dir, Thread.currentThread());
058      }
059    
060      /**
061       * Constructor
062       * @param block a block
063       * @param vol volume where replica is located
064       * @param dir directory path where block and meta files are located
065       * @param writer a thread that is writing to this replica
066       */
067      ReplicaInPipeline(Block block, 
068          FsVolumeSpi vol, File dir, Thread writer) {
069        this( block.getBlockId(), block.getNumBytes(), block.getGenerationStamp(),
070            vol, dir, writer);
071      }
072    
073      /**
074       * Constructor
075       * @param blockId block id
076       * @param len replica length
077       * @param genStamp replica generation stamp
078       * @param vol volume where replica is located
079       * @param dir directory path where block and meta files are located
080       * @param writer a thread that is writing to this replica
081       */
082      ReplicaInPipeline(long blockId, long len, long genStamp,
083          FsVolumeSpi vol, File dir, Thread writer ) {
084        super( blockId, len, genStamp, vol, dir);
085        this.bytesAcked = len;
086        this.bytesOnDisk = len;
087        this.writer = writer;
088      }
089    
090      /**
091       * Copy constructor.
092       * @param from
093       */
094      public ReplicaInPipeline(ReplicaInPipeline from) {
095        super(from);
096        this.bytesAcked = from.getBytesAcked();
097        this.bytesOnDisk = from.getBytesOnDisk();
098        this.writer = from.writer;
099      }
100    
101      @Override
102      public long getVisibleLength() {
103        return -1;
104      }
105      
106      @Override  //ReplicaInfo
107      public ReplicaState getState() {
108        return ReplicaState.TEMPORARY;
109      }
110      
111      @Override // ReplicaInPipelineInterface
112      public long getBytesAcked() {
113        return bytesAcked;
114      }
115      
116      @Override // ReplicaInPipelineInterface
117      public void setBytesAcked(long bytesAcked) {
118        this.bytesAcked = bytesAcked;
119      }
120      
121      @Override // ReplicaInPipelineInterface
122      public long getBytesOnDisk() {
123        return bytesOnDisk;
124      }
125      
126      @Override // ReplicaInPipelineInterface
127      public synchronized void setLastChecksumAndDataLen(long dataLength, byte[] lastChecksum) {
128        this.bytesOnDisk = dataLength;
129        this.lastChecksum = lastChecksum;
130      }
131      
132      @Override // ReplicaInPipelineInterface
133      public synchronized ChunkChecksum getLastChecksumAndDataLen() {
134        return new ChunkChecksum(getBytesOnDisk(), lastChecksum);
135      }
136    
137      /**
138       * Set the thread that is writing to this replica
139       * @param writer a thread writing to this replica
140       */
141      public void setWriter(Thread writer) {
142        this.writer = writer;
143      }
144      
145      @Override  // Object
146      public boolean equals(Object o) {
147        return super.equals(o);
148      }
149      
150      /**
151       * Interrupt the writing thread and wait until it dies
152       * @throws IOException the waiting is interrupted
153       */
154      public void stopWriter(long xceiverStopTimeout) throws IOException {
155        if (writer != null && writer != Thread.currentThread() && writer.isAlive()) {
156          writer.interrupt();
157          try {
158            writer.join(xceiverStopTimeout);
159            if (writer.isAlive()) {
160              final String msg = "Join on writer thread " + writer + " timed out";
161              DataNode.LOG.warn(msg + "\n" + StringUtils.getStackTrace(writer));
162              throw new IOException(msg);
163            }
164          } catch (InterruptedException e) {
165            throw new IOException("Waiting for writer thread is interrupted.");
166          }
167        }
168      }
169      
170      @Override  // Object
171      public int hashCode() {
172        return super.hashCode();
173      }
174      
175      @Override // ReplicaInPipelineInterface
176      public ReplicaOutputStreams createStreams(boolean isCreate, 
177          DataChecksum requestedChecksum) throws IOException {
178        File blockFile = getBlockFile();
179        File metaFile = getMetaFile();
180        if (DataNode.LOG.isDebugEnabled()) {
181          DataNode.LOG.debug("writeTo blockfile is " + blockFile +
182                             " of size " + blockFile.length());
183          DataNode.LOG.debug("writeTo metafile is " + metaFile +
184                             " of size " + metaFile.length());
185        }
186        long blockDiskSize = 0L;
187        long crcDiskSize = 0L;
188        
189        // the checksum that should actually be used -- this
190        // may differ from requestedChecksum for appends.
191        DataChecksum checksum;
192        
193        RandomAccessFile metaRAF = new RandomAccessFile(metaFile, "rw");
194        
195        if (!isCreate) {
196          // For append or recovery, we must enforce the existing checksum.
197          // Also, verify that the file has correct lengths, etc.
198          boolean checkedMeta = false;
199          try {
200            BlockMetadataHeader header = BlockMetadataHeader.readHeader(metaRAF);
201            checksum = header.getChecksum();
202            
203            if (checksum.getBytesPerChecksum() !=
204                requestedChecksum.getBytesPerChecksum()) {
205              throw new IOException("Client requested checksum " +
206                  requestedChecksum + " when appending to an existing block " +
207                  "with different chunk size: " + checksum);
208            }
209            
210            int bytesPerChunk = checksum.getBytesPerChecksum();
211            int checksumSize = checksum.getChecksumSize();
212            
213            blockDiskSize = bytesOnDisk;
214            crcDiskSize = BlockMetadataHeader.getHeaderSize() +
215              (blockDiskSize+bytesPerChunk-1)/bytesPerChunk*checksumSize;
216            if (blockDiskSize>0 && 
217                (blockDiskSize>blockFile.length() || crcDiskSize>metaFile.length())) {
218              throw new IOException("Corrupted block: " + this);
219            }
220            checkedMeta = true;
221          } finally {
222            if (!checkedMeta) {
223              // clean up in case of exceptions.
224              IOUtils.closeStream(metaRAF);
225            }
226          }
227        } else {
228          // for create, we can use the requested checksum
229          checksum = requestedChecksum;
230        }
231        
232        FileOutputStream blockOut = null;
233        FileOutputStream crcOut = null;
234        try {
235          blockOut = new FileOutputStream(
236              new RandomAccessFile( blockFile, "rw" ).getFD() );
237          crcOut = new FileOutputStream(metaRAF.getFD() );
238          if (!isCreate) {
239            blockOut.getChannel().position(blockDiskSize);
240            crcOut.getChannel().position(crcDiskSize);
241          }
242          return new ReplicaOutputStreams(blockOut, crcOut, checksum);
243        } catch (IOException e) {
244          IOUtils.closeStream(blockOut);
245          IOUtils.closeStream(metaRAF);
246          throw e;
247        }
248      }
249      
250      @Override
251      public String toString() {
252        return super.toString()
253            + "\n  bytesAcked=" + bytesAcked
254            + "\n  bytesOnDisk=" + bytesOnDisk;
255      }
256    }