001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.datanode;
019
020import java.io.File;
021import java.io.FileOutputStream;
022import java.io.IOException;
023import java.io.RandomAccessFile;
024
025import org.apache.hadoop.hdfs.protocol.Block;
026import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState;
027import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
028import org.apache.hadoop.hdfs.server.datanode.fsdataset.ReplicaOutputStreams;
029import org.apache.hadoop.io.IOUtils;
030import org.apache.hadoop.util.DataChecksum;
031import org.apache.hadoop.util.StringUtils;
032
033/** 
034 * This class defines a replica in a pipeline, which
035 * includes a persistent replica being written to by a dfs client or
036 * a temporary replica being replicated by a source datanode or
037 * being copied for the balancing purpose.
038 * 
039 * The base class implements a temporary replica
040 */
041public class ReplicaInPipeline extends ReplicaInfo
042                        implements ReplicaInPipelineInterface {
043  private long bytesAcked;
044  private long bytesOnDisk;
045  private byte[] lastChecksum;  
046  private Thread writer;
047  
048  /**
049   * Constructor for a zero length replica
050   * @param blockId block id
051   * @param genStamp replica generation stamp
052   * @param vol volume where replica is located
053   * @param dir directory path where block and meta files are located
054   */
055  public ReplicaInPipeline(long blockId, long genStamp, 
056        FsVolumeSpi vol, File dir) {
057    this( blockId, 0L, genStamp, vol, dir, Thread.currentThread());
058  }
059
060  /**
061   * Constructor
062   * @param block a block
063   * @param vol volume where replica is located
064   * @param dir directory path where block and meta files are located
065   * @param writer a thread that is writing to this replica
066   */
067  ReplicaInPipeline(Block block, 
068      FsVolumeSpi vol, File dir, Thread writer) {
069    this( block.getBlockId(), block.getNumBytes(), block.getGenerationStamp(),
070        vol, dir, writer);
071  }
072
073  /**
074   * Constructor
075   * @param blockId block id
076   * @param len replica length
077   * @param genStamp replica generation stamp
078   * @param vol volume where replica is located
079   * @param dir directory path where block and meta files are located
080   * @param writer a thread that is writing to this replica
081   */
082  ReplicaInPipeline(long blockId, long len, long genStamp,
083      FsVolumeSpi vol, File dir, Thread writer ) {
084    super( blockId, len, genStamp, vol, dir);
085    this.bytesAcked = len;
086    this.bytesOnDisk = len;
087    this.writer = writer;
088  }
089
090  /**
091   * Copy constructor.
092   * @param from where to copy from
093   */
094  public ReplicaInPipeline(ReplicaInPipeline from) {
095    super(from);
096    this.bytesAcked = from.getBytesAcked();
097    this.bytesOnDisk = from.getBytesOnDisk();
098    this.writer = from.writer;
099  }
100
101  @Override
102  public long getVisibleLength() {
103    return -1;
104  }
105  
106  @Override  //ReplicaInfo
107  public ReplicaState getState() {
108    return ReplicaState.TEMPORARY;
109  }
110  
111  @Override // ReplicaInPipelineInterface
112  public long getBytesAcked() {
113    return bytesAcked;
114  }
115  
116  @Override // ReplicaInPipelineInterface
117  public void setBytesAcked(long bytesAcked) {
118    this.bytesAcked = bytesAcked;
119  }
120  
121  @Override // ReplicaInPipelineInterface
122  public long getBytesOnDisk() {
123    return bytesOnDisk;
124  }
125  
126  @Override // ReplicaInPipelineInterface
127  public synchronized void setLastChecksumAndDataLen(long dataLength, byte[] lastChecksum) {
128    this.bytesOnDisk = dataLength;
129    this.lastChecksum = lastChecksum;
130  }
131  
132  @Override // ReplicaInPipelineInterface
133  public synchronized ChunkChecksum getLastChecksumAndDataLen() {
134    return new ChunkChecksum(getBytesOnDisk(), lastChecksum);
135  }
136
137  /**
138   * Set the thread that is writing to this replica
139   * @param writer a thread writing to this replica
140   */
141  public void setWriter(Thread writer) {
142    this.writer = writer;
143  }
144  
145  @Override  // Object
146  public boolean equals(Object o) {
147    return super.equals(o);
148  }
149  
150  /**
151   * Interrupt the writing thread and wait until it dies
152   * @throws IOException the waiting is interrupted
153   */
154  public void stopWriter(long xceiverStopTimeout) throws IOException {
155    if (writer != null && writer != Thread.currentThread() && writer.isAlive()) {
156      writer.interrupt();
157      try {
158        writer.join(xceiverStopTimeout);
159        if (writer.isAlive()) {
160          final String msg = "Join on writer thread " + writer + " timed out";
161          DataNode.LOG.warn(msg + "\n" + StringUtils.getStackTrace(writer));
162          throw new IOException(msg);
163        }
164      } catch (InterruptedException e) {
165        throw new IOException("Waiting for writer thread is interrupted.");
166      }
167    }
168  }
169  
170  @Override  // Object
171  public int hashCode() {
172    return super.hashCode();
173  }
174  
175  @Override // ReplicaInPipelineInterface
176  public ReplicaOutputStreams createStreams(boolean isCreate, 
177      DataChecksum requestedChecksum) throws IOException {
178    File blockFile = getBlockFile();
179    File metaFile = getMetaFile();
180    if (DataNode.LOG.isDebugEnabled()) {
181      DataNode.LOG.debug("writeTo blockfile is " + blockFile +
182                         " of size " + blockFile.length());
183      DataNode.LOG.debug("writeTo metafile is " + metaFile +
184                         " of size " + metaFile.length());
185    }
186    long blockDiskSize = 0L;
187    long crcDiskSize = 0L;
188    
189    // the checksum that should actually be used -- this
190    // may differ from requestedChecksum for appends.
191    DataChecksum checksum;
192    
193    RandomAccessFile metaRAF = new RandomAccessFile(metaFile, "rw");
194    
195    if (!isCreate) {
196      // For append or recovery, we must enforce the existing checksum.
197      // Also, verify that the file has correct lengths, etc.
198      boolean checkedMeta = false;
199      try {
200        BlockMetadataHeader header = BlockMetadataHeader.readHeader(metaRAF);
201        checksum = header.getChecksum();
202        
203        if (checksum.getBytesPerChecksum() !=
204            requestedChecksum.getBytesPerChecksum()) {
205          throw new IOException("Client requested checksum " +
206              requestedChecksum + " when appending to an existing block " +
207              "with different chunk size: " + checksum);
208        }
209        
210        int bytesPerChunk = checksum.getBytesPerChecksum();
211        int checksumSize = checksum.getChecksumSize();
212        
213        blockDiskSize = bytesOnDisk;
214        crcDiskSize = BlockMetadataHeader.getHeaderSize() +
215          (blockDiskSize+bytesPerChunk-1)/bytesPerChunk*checksumSize;
216        if (blockDiskSize>0 && 
217            (blockDiskSize>blockFile.length() || crcDiskSize>metaFile.length())) {
218          throw new IOException("Corrupted block: " + this);
219        }
220        checkedMeta = true;
221      } finally {
222        if (!checkedMeta) {
223          // clean up in case of exceptions.
224          IOUtils.closeStream(metaRAF);
225        }
226      }
227    } else {
228      // for create, we can use the requested checksum
229      checksum = requestedChecksum;
230    }
231    
232    FileOutputStream blockOut = null;
233    FileOutputStream crcOut = null;
234    try {
235      blockOut = new FileOutputStream(
236          new RandomAccessFile( blockFile, "rw" ).getFD() );
237      crcOut = new FileOutputStream(metaRAF.getFD() );
238      if (!isCreate) {
239        blockOut.getChannel().position(blockDiskSize);
240        crcOut.getChannel().position(crcDiskSize);
241      }
242      return new ReplicaOutputStreams(blockOut, crcOut, checksum);
243    } catch (IOException e) {
244      IOUtils.closeStream(blockOut);
245      IOUtils.closeStream(metaRAF);
246      throw e;
247    }
248  }
249  
250  @Override
251  public String toString() {
252    return super.toString()
253        + "\n  bytesAcked=" + bytesAcked
254        + "\n  bytesOnDisk=" + bytesOnDisk;
255  }
256}