001///////////////////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code and other text files for adherence to a set of rules.
003// Copyright (C) 2001-2023 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018///////////////////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.checks.coding;
021
022import java.util.ArrayList;
023import java.util.BitSet;
024import java.util.HashMap;
025import java.util.List;
026import java.util.Map;
027import java.util.regex.Pattern;
028
029import com.puppycrawl.tools.checkstyle.FileStatefulCheck;
030import com.puppycrawl.tools.checkstyle.PropertyType;
031import com.puppycrawl.tools.checkstyle.XdocsPropertyType;
032import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
033import com.puppycrawl.tools.checkstyle.api.DetailAST;
034import com.puppycrawl.tools.checkstyle.api.TokenTypes;
035import com.puppycrawl.tools.checkstyle.utils.CheckUtil;
036import com.puppycrawl.tools.checkstyle.utils.TokenUtil;
037
038/**
039 * <p>
040 * Checks for multiple occurrences of the same string literal within a single file.
041 * </p>
042 * <p>
043 * Rationale: Code duplication makes maintenance more difficult, so it can be better
044 * to replace the multiple occurrences with a constant.
045 * </p>
046 * <ul>
047 * <li>
048 * Property {@code allowedDuplicates} - Specify the maximum number of occurrences
049 * to allow without generating a warning.
050 * Type is {@code int}.
051 * Default value is {@code 1}.
052 * </li>
053 * <li>
054 * Property {@code ignoreStringsRegexp} - Specify RegExp for ignored strings (with quotation marks).
055 * Type is {@code java.util.regex.Pattern}.
056 * Default value is {@code "^""$"}.
057 * </li>
058 * <li>
059 * Property {@code ignoreOccurrenceContext} - Specify token type names where duplicate
060 * strings are ignored even if they don't match ignoredStringsRegexp. This allows you to
061 * exclude syntactical contexts like annotations or static initializers from the check.
062 * Type is {@code java.lang.String[]}.
063 * Validation type is {@code tokenTypesSet}.
064 * Default value is {@code ANNOTATION}.
065 * </li>
066 * </ul>
067 * <p>
068 * To configure the check:
069 * </p>
070 * <pre>
071 * &lt;module name=&quot;MultipleStringLiterals&quot;/&gt;
072 * </pre>
073 * <p>
074 * Example:
075 * </p>
076 * <pre>
077 * public class MyClass {
078 *   String a = "StringContents";
079 *   String a1 = "unchecked";
080 *   &#64;SuppressWarnings("unchecked") // OK, duplicate strings are ignored in annotations
081 *   public void myTest() {
082 *     String a2 = "StringContents"; // violation, "StringContents" occurs twice
083 *     String a3 = "DoubleString" + "DoubleString"; // violation, "DoubleString" occurs twice
084 *     String a4 = "SingleString"; // OK
085 *     String a5 = ", " + ", " + ", "; // violation, ", " occurs three times
086 *   }
087 * }
088 * </pre>
089 * <p>
090 * To configure the check so that it allows two occurrences of each string:
091 * </p>
092 * <pre>
093 * &lt;module name=&quot;MultipleStringLiterals&quot;&gt;
094 *   &lt;property name=&quot;allowedDuplicates&quot; value=&quot;2&quot;/&gt;
095 * &lt;/module&gt;
096 * </pre>
097 * <p>
098 * Example:
099 * </p>
100 * <pre>
101 * public class MyClass {
102 *   String a = "StringContents";
103 *   String a1 = "unchecked";
104 *   &#64;SuppressWarnings("unchecked") // OK, duplicate strings are ignored in annotations
105 *   public void myTest() {
106 *     String a2 = "StringContents"; // OK, two occurrences are allowed
107 *     String a3 = "DoubleString" + "DoubleString"; // OK, two occurrences are allowed
108 *     String a4 = "SingleString"; // OK
109 *     String a5 = ", " + ", " + ", "; // violation, three occurrences are NOT allowed
110 *   }
111 * }
112 * </pre>
113 * <p>
114 * To configure the check so that it ignores ", " and empty strings:
115 * </p>
116 * <pre>
117 * &lt;module name=&quot;MultipleStringLiterals&quot;&gt;
118 *   &lt;property name=&quot;ignoreStringsRegexp&quot;
119 *     value='^((&quot;&quot;)|(&quot;, &quot;))$'/&gt;
120 * &lt;/module&gt;
121 * </pre>
122 * <p>
123 * Example:
124 * </p>
125 * <pre>
126 * public class MyClass {
127 *   String a = "StringContents";
128 *   String a1 = "unchecked";
129 *   &#64;SuppressWarnings("unchecked") // OK, duplicate strings are ignored in annotations
130 *   public void myTest() {
131 *     String a2 = "StringContents"; // violation, "StringContents" occurs twice
132 *     String a3 = "DoubleString" + "DoubleString"; // violation, "DoubleString" occurs twice
133 *     String a4 = "SingleString"; // OK
134 *     String a5 = ", " + ", " + ", "; // OK, multiple occurrences of ", " are allowed
135 *   }
136 * }
137 * </pre>
138 * <p>
139 * To configure the check so that it flags duplicate strings in all syntactical contexts,
140 * even in annotations like {@code @SuppressWarnings("unchecked")}:
141 * </p>
142 * <pre>
143 * &lt;module name=&quot;MultipleStringLiterals&quot;&gt;
144 *   &lt;property name=&quot;ignoreOccurrenceContext&quot; value=&quot;&quot;/&gt;
145 * &lt;/module&gt;
146 * </pre>
147 * <p>
148 * Example:
149 * </p>
150 * <pre>
151 * public class MyClass {
152 *   String a = "StringContents";
153 *   String a1 = "unchecked";
154 *   &#64;SuppressWarnings("unchecked") // violation, "unchecked" occurs twice
155 *   public void myTest() {
156 *     String a2 = "StringContents"; // violation, "StringContents" occurs twice
157 *     String a3 = "DoubleString" + "DoubleString"; // violation, "DoubleString" occurs twice
158 *     String a4 = "SingleString"; // OK
159 *     String a5 = ", " + ", " + ", "; // violation, ", " occurs three times
160 *   }
161 * }
162 * </pre>
163 * <p>
164 * Parent is {@code com.puppycrawl.tools.checkstyle.TreeWalker}
165 * </p>
166 * <p>
167 * Violation Message Keys:
168 * </p>
169 * <ul>
170 * <li>
171 * {@code multiple.string.literal}
172 * </li>
173 * </ul>
174 *
175 * @since 3.5
176 */
177@FileStatefulCheck
178public class MultipleStringLiteralsCheck extends AbstractCheck {
179
180    /**
181     * A key is pointing to the warning message text in "messages.properties"
182     * file.
183     */
184    public static final String MSG_KEY = "multiple.string.literal";
185
186    /**
187     * Compiled pattern for all system newlines.
188     */
189    private static final Pattern ALL_NEW_LINES = Pattern.compile("\\R");
190
191    /**
192     * String used to amend TEXT_BLOCK_CONTENT so that it matches STRING_LITERAL.
193     */
194    private static final String QUOTE = "\"";
195
196    /**
197     * The found strings and their tokens.
198     */
199    private final Map<String, List<DetailAST>> stringMap = new HashMap<>();
200
201    /**
202     * Specify token type names where duplicate strings are ignored even if they
203     * don't match ignoredStringsRegexp. This allows you to exclude syntactical
204     * contexts like annotations or static initializers from the check.
205     */
206    @XdocsPropertyType(PropertyType.TOKEN_ARRAY)
207    private final BitSet ignoreOccurrenceContext = new BitSet();
208
209    /**
210     * Specify the maximum number of occurrences to allow without generating a warning.
211     */
212    private int allowedDuplicates = 1;
213
214    /**
215     * Specify RegExp for ignored strings (with quotation marks).
216     */
217    private Pattern ignoreStringsRegexp;
218
219    /**
220     * Construct an instance with default values.
221     */
222    public MultipleStringLiteralsCheck() {
223        setIgnoreStringsRegexp(Pattern.compile("^\"\"$"));
224        ignoreOccurrenceContext.set(TokenTypes.ANNOTATION);
225    }
226
227    /**
228     * Setter to specify the maximum number of occurrences to allow without generating a warning.
229     *
230     * @param allowedDuplicates The maximum number of duplicates.
231     */
232    public void setAllowedDuplicates(int allowedDuplicates) {
233        this.allowedDuplicates = allowedDuplicates;
234    }
235
236    /**
237     * Setter to specify RegExp for ignored strings (with quotation marks).
238     *
239     * @param ignoreStringsRegexp
240     *        regular expression pattern for ignored strings
241     * @noinspection WeakerAccess
242     * @noinspectionreason WeakerAccess - we avoid 'protected' when possible
243     */
244    public final void setIgnoreStringsRegexp(Pattern ignoreStringsRegexp) {
245        if (ignoreStringsRegexp == null || ignoreStringsRegexp.pattern().isEmpty()) {
246            this.ignoreStringsRegexp = null;
247        }
248        else {
249            this.ignoreStringsRegexp = ignoreStringsRegexp;
250        }
251    }
252
253    /**
254     * Setter to specify token type names where duplicate strings are ignored even
255     * if they don't match ignoredStringsRegexp. This allows you to exclude
256     * syntactical contexts like annotations or static initializers from the check.
257     *
258     * @param strRep the string representation of the tokens interested in
259     */
260    public final void setIgnoreOccurrenceContext(String... strRep) {
261        ignoreOccurrenceContext.clear();
262        for (final String s : strRep) {
263            final int type = TokenUtil.getTokenId(s);
264            ignoreOccurrenceContext.set(type);
265        }
266    }
267
268    @Override
269    public int[] getDefaultTokens() {
270        return getRequiredTokens();
271    }
272
273    @Override
274    public int[] getAcceptableTokens() {
275        return getRequiredTokens();
276    }
277
278    @Override
279    public int[] getRequiredTokens() {
280        return new int[] {
281            TokenTypes.STRING_LITERAL,
282            TokenTypes.TEXT_BLOCK_CONTENT,
283        };
284    }
285
286    @Override
287    public void visitToken(DetailAST ast) {
288        if (!isInIgnoreOccurrenceContext(ast)) {
289            final String currentString;
290            if (ast.getType() == TokenTypes.TEXT_BLOCK_CONTENT) {
291                final String strippedString =
292                    CheckUtil.stripIndentAndInitialNewLineFromTextBlock(ast.getText());
293                // We need to add quotes here to be consistent with STRING_LITERAL text.
294                currentString = QUOTE + strippedString + QUOTE;
295            }
296            else {
297                currentString = ast.getText();
298            }
299            if (ignoreStringsRegexp == null
300                    || !ignoreStringsRegexp.matcher(currentString).find()) {
301                stringMap.computeIfAbsent(currentString, key -> new ArrayList<>()).add(ast);
302            }
303        }
304    }
305
306    /**
307     * Analyses the path from the AST root to a given AST for occurrences
308     * of the token types in {@link #ignoreOccurrenceContext}.
309     *
310     * @param ast the node from where to start searching towards the root node
311     * @return whether the path from the root node to ast contains one of the
312     *     token type in {@link #ignoreOccurrenceContext}.
313     */
314    private boolean isInIgnoreOccurrenceContext(DetailAST ast) {
315        boolean isInIgnoreOccurrenceContext = false;
316        for (DetailAST token = ast;
317             token.getParent() != null;
318             token = token.getParent()) {
319            final int type = token.getType();
320            if (ignoreOccurrenceContext.get(type)) {
321                isInIgnoreOccurrenceContext = true;
322                break;
323            }
324        }
325        return isInIgnoreOccurrenceContext;
326    }
327
328    @Override
329    public void beginTree(DetailAST rootAST) {
330        stringMap.clear();
331    }
332
333    @Override
334    public void finishTree(DetailAST rootAST) {
335        for (Map.Entry<String, List<DetailAST>> stringListEntry : stringMap.entrySet()) {
336            final List<DetailAST> hits = stringListEntry.getValue();
337            if (hits.size() > allowedDuplicates) {
338                final DetailAST firstFinding = hits.get(0);
339                final String recurringString =
340                    ALL_NEW_LINES.matcher(
341                        stringListEntry.getKey()).replaceAll("\\\\n");
342                log(firstFinding, MSG_KEY, recurringString, hits.size());
343            }
344        }
345    }
346}
347