001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.hdfs.protocol.datatransfer;
019    
020    import org.apache.hadoop.HadoopIllegalArgumentException;
021    import org.apache.hadoop.classification.InterfaceAudience;
022    import org.apache.hadoop.classification.InterfaceStability;
023    import org.apache.hadoop.conf.Configuration;
024    import org.apache.hadoop.hdfs.DFSConfigKeys;
025    import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
026    
027    /**
028     * The setting of replace-datanode-on-failure feature.
029     */
030    @InterfaceAudience.Private
031    @InterfaceStability.Evolving
032    public class ReplaceDatanodeOnFailure {
033      /** The replacement policies */
034      public enum Policy {
035        /** The feature is disabled in the entire site. */
036        DISABLE(Condition.FALSE),
037        /** Never add a new datanode. */
038        NEVER(Condition.FALSE),
039        /** @see ReplaceDatanodeOnFailure.Condition#DEFAULT */
040        DEFAULT(Condition.DEFAULT),
041        /** Always add a new datanode when an existing datanode is removed. */
042        ALWAYS(Condition.TRUE);
043    
044        private final Condition condition;
045    
046        private Policy(Condition condition) {
047          this.condition = condition;
048        }
049        
050        Condition getCondition() {
051          return condition;
052        }
053      }
054    
055      /** Datanode replacement condition */
056      private static interface Condition {
057        /** Return true unconditionally. */
058        static final Condition TRUE = new Condition() {
059          @Override
060          public boolean satisfy(short replication, DatanodeInfo[] existings,
061              int nExistings, boolean isAppend, boolean isHflushed) {
062            return true;
063          }
064        };
065    
066        /** Return false unconditionally. */
067        static final Condition FALSE = new Condition() {
068          @Override
069          public boolean satisfy(short replication, DatanodeInfo[] existings,
070              int nExistings, boolean isAppend, boolean isHflushed) {
071            return false;
072          }
073        };
074    
075        /**
076         * DEFAULT condition:
077         *   Let r be the replication number.
078         *   Let n be the number of existing datanodes.
079         *   Add a new datanode only if r >= 3 and either
080         *   (1) floor(r/2) >= n; or
081         *   (2) r > n and the block is hflushed/appended.
082         */
083        static final Condition DEFAULT = new Condition() {
084          @Override
085          public boolean satisfy(final short replication,
086              final DatanodeInfo[] existings, final int n, final boolean isAppend,
087              final boolean isHflushed) {
088            if (replication < 3) {
089              return false;
090            } else {
091              if (n <= (replication/2)) {
092                return true;
093              } else {
094                return isAppend || isHflushed;
095              }
096            }
097          }
098        };
099    
100        /** Is the condition satisfied? */
101        public boolean satisfy(short replication, DatanodeInfo[] existings,
102            int nExistings, boolean isAppend, boolean isHflushed);
103      }
104    
105      private final Policy policy;
106      private final boolean bestEffort;
107      
108      public ReplaceDatanodeOnFailure(Policy policy, boolean bestEffort) {
109        this.policy = policy;
110        this.bestEffort = bestEffort;
111      }
112    
113      /** Check if the feature is enabled. */
114      public void checkEnabled() {
115        if (policy == Policy.DISABLE) {
116          throw new UnsupportedOperationException(
117              "This feature is disabled.  Please refer to "
118              + DFSConfigKeys.DFS_CLIENT_WRITE_REPLACE_DATANODE_ON_FAILURE_ENABLE_KEY
119              + " configuration property.");
120        }
121      }
122    
123      /**
124       * Best effort means that the client will try to replace the failed datanode
125       * (provided that the policy is satisfied), however, it will continue the
126       * write operation in case that the datanode replacement also fails.
127       * 
128       * @return Suppose the datanode replacement fails.
129       *     false: An exception should be thrown so that the write will fail.
130       *     true : The write should be resumed with the remaining datandoes.
131       */
132      public boolean isBestEffort() {
133        return bestEffort;
134      }
135    
136      /** Does it need a replacement according to the policy? */
137      public boolean satisfy(
138          final short replication, final DatanodeInfo[] existings,
139          final boolean isAppend, final boolean isHflushed) {
140        final int n = existings == null? 0: existings.length;
141        if (n == 0 || n >= replication) {
142          //don't need to add datanode for any policy.
143          return false;
144        } else {
145          return policy.getCondition().satisfy(
146              replication, existings, n, isAppend, isHflushed);
147        }
148      }
149      
150      @Override
151      public String toString() {
152        return policy.toString();
153      }
154    
155      /** Get the setting from configuration. */
156      public static ReplaceDatanodeOnFailure get(final Configuration conf) {
157        final Policy policy = getPolicy(conf);
158        final boolean bestEffort = conf.getBoolean(
159            DFSConfigKeys.DFS_CLIENT_WRITE_REPLACE_DATANODE_ON_FAILURE_BEST_EFFORT_KEY,
160            DFSConfigKeys.DFS_CLIENT_WRITE_REPLACE_DATANODE_ON_FAILURE_BEST_EFFORT_DEFAULT);
161        
162        return new ReplaceDatanodeOnFailure(policy, bestEffort);
163      }
164    
165      private static Policy getPolicy(final Configuration conf) {
166        final boolean enabled = conf.getBoolean(
167            DFSConfigKeys.DFS_CLIENT_WRITE_REPLACE_DATANODE_ON_FAILURE_ENABLE_KEY,
168            DFSConfigKeys.DFS_CLIENT_WRITE_REPLACE_DATANODE_ON_FAILURE_ENABLE_DEFAULT);
169        if (!enabled) {
170          return Policy.DISABLE;
171        }
172    
173        final String policy = conf.get(
174            DFSConfigKeys.DFS_CLIENT_WRITE_REPLACE_DATANODE_ON_FAILURE_POLICY_KEY,
175            DFSConfigKeys.DFS_CLIENT_WRITE_REPLACE_DATANODE_ON_FAILURE_POLICY_DEFAULT);
176        for(int i = 1; i < Policy.values().length; i++) {
177          final Policy p = Policy.values()[i];
178          if (p.name().equalsIgnoreCase(policy)) {
179            return p;
180          }
181        }
182        throw new HadoopIllegalArgumentException("Illegal configuration value for "
183            + DFSConfigKeys.DFS_CLIENT_WRITE_REPLACE_DATANODE_ON_FAILURE_POLICY_KEY
184            + ": " + policy);
185      }
186    
187      /** Write the setting to configuration. */
188      public static void write(final Policy policy,
189          final boolean bestEffort, final Configuration conf) {
190        conf.setBoolean(
191            DFSConfigKeys.DFS_CLIENT_WRITE_REPLACE_DATANODE_ON_FAILURE_ENABLE_KEY,
192            policy != Policy.DISABLE);
193        conf.set(
194            DFSConfigKeys.DFS_CLIENT_WRITE_REPLACE_DATANODE_ON_FAILURE_POLICY_KEY,
195            policy.name());
196        conf.setBoolean(
197            DFSConfigKeys.DFS_CLIENT_WRITE_REPLACE_DATANODE_ON_FAILURE_BEST_EFFORT_KEY,
198            bestEffort);
199      }
200    }