001/////////////////////////////////////////////////////////////////////////////////////////////// 002// checkstyle: Checks Java source code and other text files for adherence to a set of rules. 003// Copyright (C) 2001-2023 the original author or authors. 004// 005// This library is free software; you can redistribute it and/or 006// modify it under the terms of the GNU Lesser General Public 007// License as published by the Free Software Foundation; either 008// version 2.1 of the License, or (at your option) any later version. 009// 010// This library is distributed in the hope that it will be useful, 011// but WITHOUT ANY WARRANTY; without even the implied warranty of 012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013// Lesser General Public License for more details. 014// 015// You should have received a copy of the GNU Lesser General Public 016// License along with this library; if not, write to the Free Software 017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 018/////////////////////////////////////////////////////////////////////////////////////////////// 019 020package com.puppycrawl.tools.checkstyle.checks.javadoc; 021 022import java.util.LinkedList; 023import java.util.List; 024 025/** 026 * <p> 027 * Helper class used to parse HTML tags or generic type identifiers 028 * from a single-line of text. Just the beginning of the HTML tag 029 * is located. No attempt is made to parse out the complete tag, 030 * particularly since some of the tag parameters could be located 031 * on the following line of text. The {@code hasNextTag} and 032 * {@code nextTag} methods are used to iterate through the HTML 033 * tags or generic type identifiers that were found on the line of text. 034 * </p> 035 * 036 * <p> 037 * This class isn't really specific to HTML tags. Currently, the only HTML 038 * tag that this class looks specifically for is the HTML comment tag. 039 * This class helps figure out if a tag exists and if it is well-formed. 040 * It does not know whether it is valid HTML. This class is also used for 041 * generics types which looks like opening HTML tags ex: {@code <T>, <E>, <V>, 042 * <MY_FOO_TYPE>}, etc. According to this class they are valid tags. 043 * </p> 044 * 045 */ 046class TagParser { 047 048 /** HtmlTags found on the input line of text. */ 049 private final List<HtmlTag> tags = new LinkedList<>(); 050 051 /** 052 * Constructs a TagParser and finds the first tag if any. 053 * 054 * @param text the line of text to parse. 055 * @param lineNo the source line number. 056 */ 057 /* package */ TagParser(String[] text, int lineNo) { 058 parseTags(text, lineNo); 059 } 060 061 /** 062 * Returns the next available HtmlTag. 063 * 064 * @return a HtmlTag or {@code null} if none available. 065 * @throws IndexOutOfBoundsException if there are no HtmlTags 066 * left to return. 067 */ 068 public HtmlTag nextTag() { 069 return tags.remove(0); 070 } 071 072 /** 073 * Indicates if there are any more HtmlTag to retrieve. 074 * 075 * @return {@code true} if there are more tags. 076 */ 077 public boolean hasNextTag() { 078 return !tags.isEmpty(); 079 } 080 081 /** 082 * Performs lazy initialization on the internal tags List 083 * and adds the tag. 084 * 085 * @param tag the HtmlTag to add. 086 */ 087 private void add(HtmlTag tag) { 088 tags.add(tag); 089 } 090 091 /** 092 * Parses the text line for any HTML tags and adds them to the internal 093 * List of tags. 094 * 095 * @param text the source line to parse. 096 * @param lineNo the source line number. 097 */ 098 private void parseTags(String[] text, int lineNo) { 099 final int nLines = text.length; 100 Point position = findChar(text, '<', new Point(0, 0)); 101 while (position.getLineNo() < nLines) { 102 // if this is html comment then skip it 103 if (isCommentTag(text, position)) { 104 position = skipHtmlComment(text, position); 105 } 106 else if (isTag(text, position)) { 107 position = parseTag(text, lineNo, nLines, position); 108 } 109 else { 110 position = getNextPoint(text, position); 111 } 112 position = findChar(text, '<', position); 113 } 114 } 115 116 /** 117 * Parses the tag and return position after it. 118 * 119 * @param text the source line to parse. 120 * @param lineNo the source line number. 121 * @param nLines line length 122 * @param position start position for parsing 123 * @return position after tag 124 */ 125 private Point parseTag(String[] text, int lineNo, final int nLines, Point position) { 126 // find end of tag 127 final Point endTag = findChar(text, '>', position); 128 final boolean incompleteTag = endTag.getLineNo() >= nLines; 129 // get tag id (one word) 130 final String tagId; 131 132 if (incompleteTag) { 133 tagId = ""; 134 } 135 else { 136 tagId = getTagId(text, position); 137 } 138 // is this closed tag 139 final boolean closedTag = 140 endTag.getLineNo() < nLines 141 && text[endTag.getLineNo()] 142 .charAt(endTag.getColumnNo() - 1) == '/'; 143 // add new tag 144 add(new HtmlTag(tagId, 145 position.getLineNo() + lineNo, 146 position.getColumnNo(), 147 closedTag, 148 incompleteTag, 149 text[position.getLineNo()])); 150 return endTag; 151 } 152 153 /** 154 * Checks if the given position is start one for HTML tag. 155 * 156 * @param javadocText text of javadoc comments. 157 * @param pos position to check. 158 * @return {@code true} some HTML tag starts from given position. 159 */ 160 private static boolean isTag(String[] javadocText, Point pos) { 161 final int column = pos.getColumnNo() + 1; 162 final String text = javadocText[pos.getLineNo()]; 163 164 // Character.isJavaIdentifier... may not be a valid HTML 165 // identifier but is valid for generics 166 return column >= text.length() 167 || Character.isJavaIdentifierStart(text.charAt(column)) 168 || text.charAt(column) == '/'; 169 } 170 171 /** 172 * Parse tag id. 173 * 174 * @param javadocText text of javadoc comments. 175 * @param tagStart start position of the tag 176 * @return id for given tag 177 */ 178 private static String getTagId(String[] javadocText, Point tagStart) { 179 String tagId = ""; 180 int column = tagStart.getColumnNo() + 1; 181 String text = javadocText[tagStart.getLineNo()]; 182 if (column < text.length()) { 183 if (text.charAt(column) == '/') { 184 column++; 185 } 186 187 text = text.substring(column).trim(); 188 int position = 0; 189 190 // Character.isJavaIdentifier... may not be a valid HTML 191 // identifier but is valid for generics 192 while (position < text.length() 193 && (Character.isJavaIdentifierStart(text.charAt(position)) 194 || Character.isJavaIdentifierPart(text.charAt(position)))) { 195 position++; 196 } 197 198 tagId = text.substring(0, position); 199 } 200 return tagId; 201 } 202 203 /** 204 * If this is a HTML-comments. 205 * 206 * @param text text of javadoc comments 207 * @param pos position to check 208 * @return {@code true} if HTML-comments 209 * starts form given position. 210 */ 211 private static boolean isCommentTag(String[] text, Point pos) { 212 return text[pos.getLineNo()].startsWith("<!--", pos.getColumnNo()); 213 } 214 215 /** 216 * Skips HTML comments. 217 * 218 * @param text text of javadoc comments. 219 * @param fromPoint start position of HTML-comments 220 * @return position after HTML-comments 221 */ 222 private static Point skipHtmlComment(String[] text, Point fromPoint) { 223 Point toPoint = fromPoint; 224 toPoint = findChar(text, '>', toPoint); 225 while (toPoint.getLineNo() < text.length && !text[toPoint.getLineNo()] 226 .substring(0, toPoint.getColumnNo() + 1).endsWith("-->")) { 227 toPoint = findChar(text, '>', getNextPoint(text, toPoint)); 228 } 229 return toPoint; 230 } 231 232 /** 233 * Finds next occurrence of given character. 234 * 235 * @param text text to search 236 * @param character character to search 237 * @param from position to start search 238 * @return position of next occurrence of given character 239 */ 240 private static Point findChar(String[] text, char character, Point from) { 241 Point curr = new Point(from.getLineNo(), from.getColumnNo()); 242 while (curr.getLineNo() < text.length 243 && text[curr.getLineNo()].charAt(curr.getColumnNo()) != character) { 244 curr = getNextPoint(text, curr); 245 } 246 247 return curr; 248 } 249 250 /** 251 * Increments column number to be examined, moves onto the next line when no 252 * more characters are available. 253 * 254 * @param text to search. 255 * @param from location to search from 256 * @return next point to be examined 257 */ 258 private static Point getNextPoint(String[] text, Point from) { 259 int line = from.getLineNo(); 260 int column = from.getColumnNo() + 1; 261 while (line < text.length && column >= text[line].length()) { 262 // go to the next line 263 line++; 264 column = 0; 265 } 266 return new Point(line, column); 267 } 268 269 /** 270 * Represents current position in the text. 271 */ 272 private static final class Point { 273 274 /** Line number. */ 275 private final int lineNo; 276 /** Column number.*/ 277 private final int columnNo; 278 279 /** 280 * Creates new {@code Point} instance. 281 * 282 * @param lineNo line number 283 * @param columnNo column number 284 */ 285 private Point(int lineNo, int columnNo) { 286 this.lineNo = lineNo; 287 this.columnNo = columnNo; 288 } 289 290 /** 291 * Getter for line number. 292 * 293 * @return line number of the position. 294 */ 295 public int getLineNo() { 296 return lineNo; 297 } 298 299 /** 300 * Getter for column number. 301 * 302 * @return column number of the position. 303 */ 304 public int getColumnNo() { 305 return columnNo; 306 } 307 308 } 309 310}