Source code

001///////////////////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code and other text files for adherence to a set of rules.
003// Copyright (C) 2001-2023 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018///////////////////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.checks.javadoc;
021
022import java.util.LinkedList;
023import java.util.List;
024
025/**
026 * <p>
027 * Helper class used to parse HTML tags or generic type identifiers
028 * from a single-line of text. Just the beginning of the HTML tag
029 * is located.  No attempt is made to parse out the complete tag,
030 * particularly since some of the tag parameters could be located
031 * on the following line of text.  The {@code hasNextTag} and
032 * {@code nextTag} methods are used to iterate through the HTML
033 * tags or generic type identifiers that were found on the line of text.
034 * </p>
035 *
036 * <p>
037 * This class isn't really specific to HTML tags. Currently, the only HTML
038 * tag that this class looks specifically for is the HTML comment tag.
039 * This class helps figure out if a tag exists and if it is well-formed.
040 * It does not know whether it is valid HTML.  This class is also used for
041 * generics types which looks like opening HTML tags ex: {@code <T>, <E>, <V>,
042 * <MY_FOO_TYPE>}, etc. According to this class they are valid tags.
043 * </p>
044 *
045 */
046class TagParser {
047
048    /** HtmlTags found on the input line of text. */
049    private final List<HtmlTag> tags = new LinkedList<>();
050
051    /**
052     * Constructs a TagParser and finds the first tag if any.
053     *
054     * @param text the line of text to parse.
055     * @param lineNo the source line number.
056     */
057    /* package */ TagParser(String[] text, int lineNo) {
058        parseTags(text, lineNo);
059    }
060
061    /**
062     * Returns the next available HtmlTag.
063     *
064     * @return a HtmlTag or {@code null} if none available.
065     * @throws IndexOutOfBoundsException if there are no HtmlTags
066     *         left to return.
067     */
068    public HtmlTag nextTag() {
069        return tags.remove(0);
070    }
071
072    /**
073     * Indicates if there are any more HtmlTag to retrieve.
074     *
075     * @return {@code true} if there are more tags.
076     */
077    public boolean hasNextTag() {
078        return !tags.isEmpty();
079    }
080
081    /**
082     * Performs lazy initialization on the internal tags List
083     * and adds the tag.
084     *
085     * @param tag the HtmlTag to add.
086     */
087    private void add(HtmlTag tag) {
088        tags.add(tag);
089    }
090
091    /**
092     * Parses the text line for any HTML tags and adds them to the internal
093     * List of tags.
094     *
095     * @param text the source line to parse.
096     * @param lineNo the source line number.
097     */
098    private void parseTags(String[] text, int lineNo) {
099        final int nLines = text.length;
100        Point position = findChar(text, '<', new Point(0, 0));
101        while (position.getLineNo() < nLines) {
102            // if this is html comment then skip it
103            if (isCommentTag(text, position)) {
104                position = skipHtmlComment(text, position);
105            }
106            else if (isTag(text, position)) {
107                position = parseTag(text, lineNo, nLines, position);
108            }
109            else {
110                position = getNextPoint(text, position);
111            }
112            position = findChar(text, '<', position);
113        }
114    }
115
116    /**
117     * Parses the tag and return position after it.
118     *
119     * @param text the source line to parse.
120     * @param lineNo the source line number.
121     * @param nLines line length
122     * @param position start position for parsing
123     * @return position after tag
124     */
125    private Point parseTag(String[] text, int lineNo, final int nLines, Point position) {
126        // find end of tag
127        final Point endTag = findChar(text, '>', position);
128        final boolean incompleteTag = endTag.getLineNo() >= nLines;
129        // get tag id (one word)
130        final String tagId;
131
132        if (incompleteTag) {
133            tagId = "";
134        }
135        else {
136            tagId = getTagId(text, position);
137        }
138        // is this closed tag
139        final boolean closedTag =
140                endTag.getLineNo() < nLines
141                 && text[endTag.getLineNo()]
142                 .charAt(endTag.getColumnNo() - 1) == '/';
143        // add new tag
144        add(new HtmlTag(tagId,
145                        position.getLineNo() + lineNo,
146                        position.getColumnNo(),
147                        closedTag,
148                        incompleteTag,
149                        text[position.getLineNo()]));
150        return endTag;
151    }
152
153    /**
154     * Checks if the given position is start one for HTML tag.
155     *
156     * @param javadocText text of javadoc comments.
157     * @param pos position to check.
158     * @return {@code true} some HTML tag starts from given position.
159     */
160    private static boolean isTag(String[] javadocText, Point pos) {
161        final int column = pos.getColumnNo() + 1;
162        final String text = javadocText[pos.getLineNo()];
163
164        // Character.isJavaIdentifier... may not be a valid HTML
165        // identifier but is valid for generics
166        return column >= text.length()
167                || Character.isJavaIdentifierStart(text.charAt(column))
168                    || text.charAt(column) == '/';
169    }
170
171    /**
172     * Parse tag id.
173     *
174     * @param javadocText text of javadoc comments.
175     * @param tagStart start position of the tag
176     * @return id for given tag
177     */
178    private static String getTagId(String[] javadocText, Point tagStart) {
179        String tagId = "";
180        int column = tagStart.getColumnNo() + 1;
181        String text = javadocText[tagStart.getLineNo()];
182        if (column < text.length()) {
183            if (text.charAt(column) == '/') {
184                column++;
185            }
186
187            text = text.substring(column).trim();
188            int position = 0;
189
190            // Character.isJavaIdentifier... may not be a valid HTML
191            // identifier but is valid for generics
192            while (position < text.length()
193                    && (Character.isJavaIdentifierStart(text.charAt(position))
194                        || Character.isJavaIdentifierPart(text.charAt(position)))) {
195                position++;
196            }
197
198            tagId = text.substring(0, position);
199        }
200        return tagId;
201    }
202
203    /**
204     * If this is a HTML-comments.
205     *
206     * @param text text of javadoc comments
207     * @param pos position to check
208     * @return {@code true} if HTML-comments
209     *         starts form given position.
210     */
211    private static boolean isCommentTag(String[] text, Point pos) {
212        return text[pos.getLineNo()].startsWith("<!--", pos.getColumnNo());
213    }
214
215    /**
216     * Skips HTML comments.
217     *
218     * @param text text of javadoc comments.
219     * @param fromPoint start position of HTML-comments
220     * @return position after HTML-comments
221     */
222    private static Point skipHtmlComment(String[] text, Point fromPoint) {
223        Point toPoint = fromPoint;
224        toPoint = findChar(text, '>', toPoint);
225        while (toPoint.getLineNo() < text.length && !text[toPoint.getLineNo()]
226                .substring(0, toPoint.getColumnNo() + 1).endsWith("-->")) {
227            toPoint = findChar(text, '>', getNextPoint(text, toPoint));
228        }
229        return toPoint;
230    }
231
232    /**
233     * Finds next occurrence of given character.
234     *
235     * @param text text to search
236     * @param character character to search
237     * @param from position to start search
238     * @return position of next occurrence of given character
239     */
240    private static Point findChar(String[] text, char character, Point from) {
241        Point curr = new Point(from.getLineNo(), from.getColumnNo());
242        while (curr.getLineNo() < text.length
243               && text[curr.getLineNo()].charAt(curr.getColumnNo()) != character) {
244            curr = getNextPoint(text, curr);
245        }
246
247        return curr;
248    }
249
250    /**
251     * Increments column number to be examined, moves onto the next line when no
252     * more characters are available.
253     *
254     * @param text to search.
255     * @param from location to search from
256     * @return next point to be examined
257     */
258    private static Point getNextPoint(String[] text, Point from) {
259        int line = from.getLineNo();
260        int column = from.getColumnNo() + 1;
261        while (line < text.length && column >= text[line].length()) {
262            // go to the next line
263            line++;
264            column = 0;
265        }
266        return new Point(line, column);
267    }
268
269    /**
270     * Represents current position in the text.
271     */
272    private static final class Point {
273
274        /** Line number. */
275        private final int lineNo;
276        /** Column number.*/
277        private final int columnNo;
278
279        /**
280         * Creates new {@code Point} instance.
281         *
282         * @param lineNo line number
283         * @param columnNo column number
284         */
285        private Point(int lineNo, int columnNo) {
286            this.lineNo = lineNo;
287            this.columnNo = columnNo;
288        }
289
290        /**
291         * Getter for line number.
292         *
293         * @return line number of the position.
294         */
295        public int getLineNo() {
296            return lineNo;
297        }
298
299        /**
300         * Getter for column number.
301         *
302         * @return column number of the position.
303         */
304        public int getColumnNo() {
305            return columnNo;
306        }
307
308    }
309
310}