001/////////////////////////////////////////////////////////////////////////////////////////////// 002// checkstyle: Checks Java source code and other text files for adherence to a set of rules. 003// Copyright (C) 2001-2023 the original author or authors. 004// 005// This library is free software; you can redistribute it and/or 006// modify it under the terms of the GNU Lesser General Public 007// License as published by the Free Software Foundation; either 008// version 2.1 of the License, or (at your option) any later version. 009// 010// This library is distributed in the hope that it will be useful, 011// but WITHOUT ANY WARRANTY; without even the implied warranty of 012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013// Lesser General Public License for more details. 014// 015// You should have received a copy of the GNU Lesser General Public 016// License along with this library; if not, write to the Free Software 017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 018/////////////////////////////////////////////////////////////////////////////////////////////// 019 020package com.puppycrawl.tools.checkstyle.checks.coding; 021 022import java.util.ArrayList; 023import java.util.BitSet; 024import java.util.HashMap; 025import java.util.List; 026import java.util.Map; 027import java.util.regex.Pattern; 028 029import com.puppycrawl.tools.checkstyle.FileStatefulCheck; 030import com.puppycrawl.tools.checkstyle.PropertyType; 031import com.puppycrawl.tools.checkstyle.XdocsPropertyType; 032import com.puppycrawl.tools.checkstyle.api.AbstractCheck; 033import com.puppycrawl.tools.checkstyle.api.DetailAST; 034import com.puppycrawl.tools.checkstyle.api.TokenTypes; 035import com.puppycrawl.tools.checkstyle.utils.CheckUtil; 036import com.puppycrawl.tools.checkstyle.utils.TokenUtil; 037 038/** 039 * <p> 040 * Checks for multiple occurrences of the same string literal within a single file. 041 * </p> 042 * <p> 043 * Rationale: Code duplication makes maintenance more difficult, so it can be better 044 * to replace the multiple occurrences with a constant. 045 * </p> 046 * <ul> 047 * <li> 048 * Property {@code allowedDuplicates} - Specify the maximum number of occurrences 049 * to allow without generating a warning. 050 * Type is {@code int}. 051 * Default value is {@code 1}. 052 * </li> 053 * <li> 054 * Property {@code ignoreStringsRegexp} - Specify RegExp for ignored strings (with quotation marks). 055 * Type is {@code java.util.regex.Pattern}. 056 * Default value is {@code "^""$"}. 057 * </li> 058 * <li> 059 * Property {@code ignoreOccurrenceContext} - Specify token type names where duplicate 060 * strings are ignored even if they don't match ignoredStringsRegexp. This allows you to 061 * exclude syntactical contexts like annotations or static initializers from the check. 062 * Type is {@code java.lang.String[]}. 063 * Validation type is {@code tokenTypesSet}. 064 * Default value is {@code ANNOTATION}. 065 * </li> 066 * </ul> 067 * <p> 068 * To configure the check: 069 * </p> 070 * <pre> 071 * <module name="MultipleStringLiterals"/> 072 * </pre> 073 * <p> 074 * Example: 075 * </p> 076 * <pre> 077 * public class MyClass { 078 * String a = "StringContents"; 079 * String a1 = "unchecked"; 080 * @SuppressWarnings("unchecked") // OK, duplicate strings are ignored in annotations 081 * public void myTest() { 082 * String a2 = "StringContents"; // violation, "StringContents" occurs twice 083 * String a3 = "DoubleString" + "DoubleString"; // violation, "DoubleString" occurs twice 084 * String a4 = "SingleString"; // OK 085 * String a5 = ", " + ", " + ", "; // violation, ", " occurs three times 086 * } 087 * } 088 * </pre> 089 * <p> 090 * To configure the check so that it allows two occurrences of each string: 091 * </p> 092 * <pre> 093 * <module name="MultipleStringLiterals"> 094 * <property name="allowedDuplicates" value="2"/> 095 * </module> 096 * </pre> 097 * <p> 098 * Example: 099 * </p> 100 * <pre> 101 * public class MyClass { 102 * String a = "StringContents"; 103 * String a1 = "unchecked"; 104 * @SuppressWarnings("unchecked") // OK, duplicate strings are ignored in annotations 105 * public void myTest() { 106 * String a2 = "StringContents"; // OK, two occurrences are allowed 107 * String a3 = "DoubleString" + "DoubleString"; // OK, two occurrences are allowed 108 * String a4 = "SingleString"; // OK 109 * String a5 = ", " + ", " + ", "; // violation, three occurrences are NOT allowed 110 * } 111 * } 112 * </pre> 113 * <p> 114 * To configure the check so that it ignores ", " and empty strings: 115 * </p> 116 * <pre> 117 * <module name="MultipleStringLiterals"> 118 * <property name="ignoreStringsRegexp" 119 * value='^(("")|(", "))$'/> 120 * </module> 121 * </pre> 122 * <p> 123 * Example: 124 * </p> 125 * <pre> 126 * public class MyClass { 127 * String a = "StringContents"; 128 * String a1 = "unchecked"; 129 * @SuppressWarnings("unchecked") // OK, duplicate strings are ignored in annotations 130 * public void myTest() { 131 * String a2 = "StringContents"; // violation, "StringContents" occurs twice 132 * String a3 = "DoubleString" + "DoubleString"; // violation, "DoubleString" occurs twice 133 * String a4 = "SingleString"; // OK 134 * String a5 = ", " + ", " + ", "; // OK, multiple occurrences of ", " are allowed 135 * } 136 * } 137 * </pre> 138 * <p> 139 * To configure the check so that it flags duplicate strings in all syntactical contexts, 140 * even in annotations like {@code @SuppressWarnings("unchecked")}: 141 * </p> 142 * <pre> 143 * <module name="MultipleStringLiterals"> 144 * <property name="ignoreOccurrenceContext" value=""/> 145 * </module> 146 * </pre> 147 * <p> 148 * Example: 149 * </p> 150 * <pre> 151 * public class MyClass { 152 * String a = "StringContents"; 153 * String a1 = "unchecked"; 154 * @SuppressWarnings("unchecked") // violation, "unchecked" occurs twice 155 * public void myTest() { 156 * String a2 = "StringContents"; // violation, "StringContents" occurs twice 157 * String a3 = "DoubleString" + "DoubleString"; // violation, "DoubleString" occurs twice 158 * String a4 = "SingleString"; // OK 159 * String a5 = ", " + ", " + ", "; // violation, ", " occurs three times 160 * } 161 * } 162 * </pre> 163 * <p> 164 * Parent is {@code com.puppycrawl.tools.checkstyle.TreeWalker} 165 * </p> 166 * <p> 167 * Violation Message Keys: 168 * </p> 169 * <ul> 170 * <li> 171 * {@code multiple.string.literal} 172 * </li> 173 * </ul> 174 * 175 * @since 3.5 176 */ 177@FileStatefulCheck 178public class MultipleStringLiteralsCheck extends AbstractCheck { 179 180 /** 181 * A key is pointing to the warning message text in "messages.properties" 182 * file. 183 */ 184 public static final String MSG_KEY = "multiple.string.literal"; 185 186 /** 187 * Compiled pattern for all system newlines. 188 */ 189 private static final Pattern ALL_NEW_LINES = Pattern.compile("\\R"); 190 191 /** 192 * String used to amend TEXT_BLOCK_CONTENT so that it matches STRING_LITERAL. 193 */ 194 private static final String QUOTE = "\""; 195 196 /** 197 * The found strings and their tokens. 198 */ 199 private final Map<String, List<DetailAST>> stringMap = new HashMap<>(); 200 201 /** 202 * Specify token type names where duplicate strings are ignored even if they 203 * don't match ignoredStringsRegexp. This allows you to exclude syntactical 204 * contexts like annotations or static initializers from the check. 205 */ 206 @XdocsPropertyType(PropertyType.TOKEN_ARRAY) 207 private final BitSet ignoreOccurrenceContext = new BitSet(); 208 209 /** 210 * Specify the maximum number of occurrences to allow without generating a warning. 211 */ 212 private int allowedDuplicates = 1; 213 214 /** 215 * Specify RegExp for ignored strings (with quotation marks). 216 */ 217 private Pattern ignoreStringsRegexp; 218 219 /** 220 * Construct an instance with default values. 221 */ 222 public MultipleStringLiteralsCheck() { 223 setIgnoreStringsRegexp(Pattern.compile("^\"\"$")); 224 ignoreOccurrenceContext.set(TokenTypes.ANNOTATION); 225 } 226 227 /** 228 * Setter to specify the maximum number of occurrences to allow without generating a warning. 229 * 230 * @param allowedDuplicates The maximum number of duplicates. 231 */ 232 public void setAllowedDuplicates(int allowedDuplicates) { 233 this.allowedDuplicates = allowedDuplicates; 234 } 235 236 /** 237 * Setter to specify RegExp for ignored strings (with quotation marks). 238 * 239 * @param ignoreStringsRegexp 240 * regular expression pattern for ignored strings 241 * @noinspection WeakerAccess 242 * @noinspectionreason WeakerAccess - we avoid 'protected' when possible 243 */ 244 public final void setIgnoreStringsRegexp(Pattern ignoreStringsRegexp) { 245 if (ignoreStringsRegexp == null || ignoreStringsRegexp.pattern().isEmpty()) { 246 this.ignoreStringsRegexp = null; 247 } 248 else { 249 this.ignoreStringsRegexp = ignoreStringsRegexp; 250 } 251 } 252 253 /** 254 * Setter to specify token type names where duplicate strings are ignored even 255 * if they don't match ignoredStringsRegexp. This allows you to exclude 256 * syntactical contexts like annotations or static initializers from the check. 257 * 258 * @param strRep the string representation of the tokens interested in 259 */ 260 public final void setIgnoreOccurrenceContext(String... strRep) { 261 ignoreOccurrenceContext.clear(); 262 for (final String s : strRep) { 263 final int type = TokenUtil.getTokenId(s); 264 ignoreOccurrenceContext.set(type); 265 } 266 } 267 268 @Override 269 public int[] getDefaultTokens() { 270 return getRequiredTokens(); 271 } 272 273 @Override 274 public int[] getAcceptableTokens() { 275 return getRequiredTokens(); 276 } 277 278 @Override 279 public int[] getRequiredTokens() { 280 return new int[] { 281 TokenTypes.STRING_LITERAL, 282 TokenTypes.TEXT_BLOCK_CONTENT, 283 }; 284 } 285 286 @Override 287 public void visitToken(DetailAST ast) { 288 if (!isInIgnoreOccurrenceContext(ast)) { 289 final String currentString; 290 if (ast.getType() == TokenTypes.TEXT_BLOCK_CONTENT) { 291 final String strippedString = 292 CheckUtil.stripIndentAndInitialNewLineFromTextBlock(ast.getText()); 293 // We need to add quotes here to be consistent with STRING_LITERAL text. 294 currentString = QUOTE + strippedString + QUOTE; 295 } 296 else { 297 currentString = ast.getText(); 298 } 299 if (ignoreStringsRegexp == null 300 || !ignoreStringsRegexp.matcher(currentString).find()) { 301 stringMap.computeIfAbsent(currentString, key -> new ArrayList<>()).add(ast); 302 } 303 } 304 } 305 306 /** 307 * Analyses the path from the AST root to a given AST for occurrences 308 * of the token types in {@link #ignoreOccurrenceContext}. 309 * 310 * @param ast the node from where to start searching towards the root node 311 * @return whether the path from the root node to ast contains one of the 312 * token type in {@link #ignoreOccurrenceContext}. 313 */ 314 private boolean isInIgnoreOccurrenceContext(DetailAST ast) { 315 boolean isInIgnoreOccurrenceContext = false; 316 for (DetailAST token = ast; 317 token.getParent() != null; 318 token = token.getParent()) { 319 final int type = token.getType(); 320 if (ignoreOccurrenceContext.get(type)) { 321 isInIgnoreOccurrenceContext = true; 322 break; 323 } 324 } 325 return isInIgnoreOccurrenceContext; 326 } 327 328 @Override 329 public void beginTree(DetailAST rootAST) { 330 stringMap.clear(); 331 } 332 333 @Override 334 public void finishTree(DetailAST rootAST) { 335 for (Map.Entry<String, List<DetailAST>> stringListEntry : stringMap.entrySet()) { 336 final List<DetailAST> hits = stringListEntry.getValue(); 337 if (hits.size() > allowedDuplicates) { 338 final DetailAST firstFinding = hits.get(0); 339 final String recurringString = 340 ALL_NEW_LINES.matcher( 341 stringListEntry.getKey()).replaceAll("\\\\n"); 342 log(firstFinding, MSG_KEY, recurringString, hits.size()); 343 } 344 } 345 } 346} 347