001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.wicket.util.string; 018 019import java.io.UnsupportedEncodingException; 020import java.nio.charset.Charset; 021import java.util.ArrayList; 022import java.util.List; 023import java.util.Locale; 024import java.util.regex.Matcher; 025import java.util.regex.Pattern; 026 027import org.apache.wicket.util.lang.Args; 028 029/** 030 * A variety of static String utility methods. 031 * <p> 032 * The escapeMarkup() and toMultilineMarkup() methods are useful for turning normal Java Strings 033 * into HTML strings. 034 * <p> 035 * The lastPathComponent(), firstPathComponent(), afterFirstPathComponent() and 036 * beforeLastPathComponent() methods can chop up a String into path components using a separator 037 * character. If the separator cannot be found the original String is returned. 038 * <p> 039 * Similarly, the beforeLast(), beforeFirst(), afterFirst() and afterLast() methods return sections 040 * before and after a separator character. But if the separator cannot be found, an empty string is 041 * returned. 042 * <p> 043 * Some other miscellaneous methods will strip a given ending off a String if it can be found 044 * (stripEnding()), replace all occurrences of one String with another (replaceAll), do type 045 * conversions (toBoolean(), toChar(), toString()), check a String for emptiness (isEmpty()), 046 * convert a Throwable to a String (toString(Throwable)) or capitalize a String (capitalize()). 047 * 048 * @author Jonathan Locke 049 */ 050public final class Strings 051{ 052 /** A table of hex digits */ 053 private static final char[] HEX_DIGIT = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 054 'A', 'B', 'C', 'D', 'E', 'F' }; 055 056 private static final Pattern HTML_NUMBER_REGEX = Pattern.compile("&#\\d+;"); 057 058 private static final String[] NO_STRINGS = new String[0]; 059 060 /** 061 * The name of the parameter used to keep the session id. 062 * The Servlet specification mandates <em>jsessionid</em> but the web containers 063 * provide ways to set a custom one, e.g. <em>sid</em>. 064 * Since Wicket doesn't have access to the web container internals the name should be set explicitly. 065 */ 066 public static final String SESSION_ID_PARAM_NAME = System.getProperty("wicket.jsessionid.name", "jsessionid"); 067 068 /** 069 * Constructs something like <em>;jsessionid=</em>. This is what {@linkplain Strings#stripJSessionId(String)} 070 * actually uses. 071 */ 072 // the field is not 'final' because we need to modify it in a unit test 073 // see https://github.com/openjdk/jdk/pull/5027#issuecomment-968177213 074 private static String SESSION_ID_PARAM = ';' + SESSION_ID_PARAM_NAME + '='; 075 076 /** 077 * Private constructor prevents construction. 078 */ 079 private Strings() 080 { 081 } 082 083 /** 084 * Returns everything after the first occurrence of the given character in s. 085 * 086 * @param s 087 * The string 088 * @param c 089 * The character 090 * @return Everything after the first occurrence of the given character in s. If the character 091 * cannot be found, an empty string is returned. 092 */ 093 public static String afterFirst(final String s, final char c) 094 { 095 if (s == null) 096 { 097 return null; 098 } 099 final int index = s.indexOf(c); 100 101 if (index == -1) 102 { 103 return ""; 104 } 105 106 return s.substring(index + 1); 107 } 108 109 /** 110 * Gets everything after the first path component of a path using a given separator. If the 111 * separator cannot be found, an empty String is returned. 112 * <p> 113 * For example, afterFirstPathComponent("foo:bar:baz", ':') would return "bar:baz" and 114 * afterFirstPathComponent("foo", ':') would return "". 115 * 116 * @param path 117 * The path to parse 118 * @param separator 119 * The path separator character 120 * @return Everything after the first component in the path 121 */ 122 public static String afterFirstPathComponent(final String path, final char separator) 123 { 124 return afterFirst(path, separator); 125 } 126 127 /** 128 * Returns everything after the last occurrence of the given character in s. 129 * 130 * @param s 131 * The string 132 * @param c 133 * The character 134 * @return Everything after the last occurrence of the given character in s. If the character 135 * cannot be found, an empty string is returned. 136 */ 137 public static String afterLast(final String s, final char c) 138 { 139 if (s == null) 140 { 141 return null; 142 } 143 final int index = s.lastIndexOf(c); 144 145 if (index == -1) 146 { 147 return ""; 148 } 149 150 return s.substring(index + 1); 151 } 152 153 /** 154 * Returns everything before the first occurrence of the given character in s. 155 * 156 * @param s 157 * The string 158 * @param c 159 * The character 160 * @return Everything before the first occurrence of the given character in s. If the character 161 * cannot be found, an empty string is returned. 162 */ 163 public static String beforeFirst(final String s, final char c) 164 { 165 if (s == null) 166 { 167 return null; 168 } 169 final int index = s.indexOf(c); 170 171 if (index == -1) 172 { 173 return ""; 174 } 175 176 return s.substring(0, index); 177 } 178 179 /** 180 * Returns everything before the last occurrence of the given character in s. 181 * 182 * @param s 183 * The string 184 * @param c 185 * The character 186 * @return Everything before the last occurrence of the given character in s. If the character 187 * cannot be found, an empty string is returned. 188 */ 189 public static String beforeLast(final String s, final char c) 190 { 191 if (s == null) 192 { 193 return null; 194 } 195 final int index = s.lastIndexOf(c); 196 197 if (index == -1) 198 { 199 return ""; 200 } 201 202 return s.substring(0, index); 203 } 204 205 /** 206 * Gets everything before the last path component of a path using a given separator. If the 207 * separator cannot be found, the path itself is returned. 208 * <p> 209 * For example, beforeLastPathComponent("foo.bar.baz", '.') would return "foo.bar" and 210 * beforeLastPathComponent("foo", '.') would return "". 211 * 212 * @param path 213 * The path to parse 214 * @param separator 215 * The path separator character 216 * @return Everything before the last component in the path 217 */ 218 public static String beforeLastPathComponent(final String path, final char separator) 219 { 220 return beforeLast(path, separator); 221 } 222 223 /** 224 * Capitalizes a string. 225 * 226 * @param s 227 * The string 228 * @return The capitalized string 229 */ 230 public static String capitalize(final String s) 231 { 232 if (s == null) 233 { 234 return null; 235 } 236 final char[] chars = s.toCharArray(); 237 238 if (chars.length > 0) 239 { 240 chars[0] = Character.toUpperCase(chars[0]); 241 } 242 243 return new String(chars); 244 } 245 246 /** 247 * Converts a Java String to an HTML markup string, but does not convert normal spaces to 248 * non-breaking space entities (<nbsp>). 249 * 250 * @param s 251 * The characters to escape 252 * @see Strings#escapeMarkup(CharSequence, boolean) 253 * @return The escaped string 254 */ 255 public static CharSequence escapeMarkup(final CharSequence s) 256 { 257 return escapeMarkup(s, false); 258 } 259 260 /** 261 * Converts a Java String to an HTML markup String by replacing illegal characters with HTML 262 * entities where appropriate. Spaces are converted to non-breaking spaces (<nbsp>) if 263 * escapeSpaces is true, tabs are converted to four non-breaking spaces, less than signs are 264 * converted to &lt; entities and greater than signs to &gt; entities. 265 * 266 * @param s 267 * The characters to escape 268 * @param escapeSpaces 269 * True to replace ' ' with nonbreaking space 270 * @return The escaped string 271 */ 272 public static CharSequence escapeMarkup(final CharSequence s, final boolean escapeSpaces) 273 { 274 return escapeMarkup(s, escapeSpaces, false); 275 } 276 277 /** 278 * Converts a Java String to an HTML markup String by replacing illegal characters with HTML 279 * entities where appropriate. Spaces are converted to non-breaking spaces (<nbsp>) if 280 * escapeSpaces is true, tabs are converted to four non-breaking spaces, less than signs are 281 * converted to &lt; entities and greater than signs to &gt; entities. 282 * 283 * @param s 284 * The characters to escape 285 * @param escapeSpaces 286 * True to replace ' ' with nonbreaking space 287 * @param convertToHtmlUnicodeEscapes 288 * True to convert non-7 bit characters to unicode HTML (&#...) 289 * @return The escaped string 290 */ 291 public static CharSequence escapeMarkup(final CharSequence s, final boolean escapeSpaces, 292 final boolean convertToHtmlUnicodeEscapes) 293 { 294 if (s == null) 295 { 296 return null; 297 } 298 299 final int len = s.length(); 300 if (len == 0) 301 { 302 return s; 303 } 304 305 final AppendingStringBuffer buffer = new AppendingStringBuffer((int)(len * 1.1)); 306 307 for (int i = 0; i < len; i++) 308 { 309 final char c = s.charAt(i); 310 311 if (Character.getType(c) == Character.UNASSIGNED) 312 { 313 continue; 314 } 315 switch (c) 316 { 317 case '\t' : 318 if (escapeSpaces) 319 { 320 // Assumption is four space tabs (sorry, but that's 321 // just how it is!) 322 buffer.append(" "); 323 } 324 else 325 { 326 buffer.append(c); 327 } 328 break; 329 330 case ' ' : 331 if (escapeSpaces) 332 { 333 buffer.append(" "); 334 } 335 else 336 { 337 buffer.append(c); 338 } 339 break; 340 341 case '<' : 342 buffer.append("<"); 343 break; 344 345 case '>' : 346 buffer.append(">"); 347 break; 348 349 case '&' : 350 351 buffer.append("&"); 352 break; 353 354 case '"' : 355 buffer.append("""); 356 break; 357 358 case '\'' : 359 buffer.append("'"); 360 break; 361 362 default : 363 364 int ci = 0xffff & c; 365 366 if ( 367 // if this is non-printable and not whitespace (TAB, LF, CR) 368 ((ci < 32) && (ci != 9) && (ci != 10) && (ci != 13)) || 369 // or non-ASCII (XXX: why 160+ ?!) and need to UNICODE escape it 370 (convertToHtmlUnicodeEscapes && (ci > 159))) 371 { 372 buffer.append("&#"); 373 buffer.append(Integer.toString(ci)); 374 buffer.append(';'); 375 } 376 else 377 { 378 // ASCII or whitespace 379 buffer.append(c); 380 } 381 break; 382 } 383 } 384 385 return buffer; 386 } 387 388 /** 389 * Unescapes the escaped entities in the <code>markup</code> passed. 390 * 391 * @param markup 392 * The source <code>String</code> to unescape. 393 * @return the unescaped markup or <code>null</null> if the input is <code>null</code> 394 */ 395 public static CharSequence unescapeMarkup(final String markup) 396 { 397 String unescapedMarkup = StringEscapeUtils.unescapeHtml(markup); 398 return unescapedMarkup; 399 } 400 401 /** 402 * Gets the first path component of a path using a given separator. If the separator cannot be 403 * found, the path itself is returned. 404 * <p> 405 * For example, firstPathComponent("foo.bar", '.') would return "foo" and 406 * firstPathComponent("foo", '.') would return "foo". 407 * 408 * @param path 409 * The path to parse 410 * @param separator 411 * The path separator character 412 * @return The first component in the path or path itself if no separator characters exist. 413 */ 414 public static String firstPathComponent(final String path, final char separator) 415 { 416 if (path == null) 417 { 418 return null; 419 } 420 final int index = path.indexOf(separator); 421 422 if (index == -1) 423 { 424 return path; 425 } 426 427 return path.substring(0, index); 428 } 429 430 /** 431 * Converts encoded \uxxxx to unicode chars and changes special saved chars to their 432 * original forms. 433 * 434 * @param escapedUnicodeString 435 * escaped unicode string, like '\u4F60\u597D'. 436 * 437 * @return The actual unicode. Can be used for instance with message bundles 438 */ 439 public static String fromEscapedUnicode(final String escapedUnicodeString) 440 { 441 int off = 0; 442 char[] in = escapedUnicodeString.toCharArray(); 443 int len = in.length; 444 char[] out = new char[len]; 445 char aChar; 446 int outLen = 0; 447 int end = off + len; 448 449 while (off < end) 450 { 451 aChar = in[off++]; 452 if (aChar == '\\') 453 { 454 aChar = in[off++]; 455 if (aChar == 'u') 456 { 457 // Read the xxxx 458 int value = 0; 459 for (int i = 0; i < 4; i++) 460 { 461 aChar = in[off++]; 462 switch (aChar) 463 { 464 case '0' : 465 case '1' : 466 case '2' : 467 case '3' : 468 case '4' : 469 case '5' : 470 case '6' : 471 case '7' : 472 case '8' : 473 case '9' : 474 value = (value << 4) + aChar - '0'; 475 break; 476 case 'a' : 477 case 'b' : 478 case 'c' : 479 case 'd' : 480 case 'e' : 481 case 'f' : 482 value = (value << 4) + 10 + aChar - 'a'; 483 break; 484 case 'A' : 485 case 'B' : 486 case 'C' : 487 case 'D' : 488 case 'E' : 489 case 'F' : 490 value = (value << 4) + 10 + aChar - 'A'; 491 break; 492 default : 493 throw new IllegalArgumentException("Malformed \\uxxxx encoding."); 494 } 495 } 496 out[outLen++] = (char)value; 497 } 498 else 499 { 500 if (aChar == 't') 501 { 502 aChar = '\t'; 503 } 504 else if (aChar == 'r') 505 { 506 aChar = '\r'; 507 } 508 else if (aChar == 'n') 509 { 510 aChar = '\n'; 511 } 512 else if (aChar == 'f') 513 { 514 aChar = '\f'; 515 } 516 out[outLen++] = aChar; 517 } 518 } 519 else 520 { 521 out[outLen++] = aChar; 522 } 523 } 524 return new String(out, 0, outLen); 525 } 526 527 /** 528 * Checks whether the <code>string</code> is considered empty. Empty means that the string may 529 * contain whitespace, but no visible characters. 530 * 531 * "\n\t " is considered empty, while " a" is not. 532 * 533 * @param string 534 * The string 535 * @return True if the string is null or "" 536 */ 537 public static boolean isEmpty(final CharSequence string) 538 { 539 return string == null || string.length() == 0 || 540 (string.charAt(0) <= ' ' && string.toString().trim().isEmpty()); 541 } 542 543 /** 544 * Checks whether the <code>string</code> is considered empty. Empty means that the string may 545 * contain whitespace, but no visible characters. 546 * 547 * "\n\t " is considered empty, while " a" is not. 548 * 549 * Note: This method overloads {@link #isEmpty(CharSequence)} for performance reasons. 550 * 551 * @param string 552 * The string 553 * @return True if the string is null or "" 554 */ 555 public static boolean isEmpty(final String string) 556 { 557 return string == null || string.isEmpty() || 558 (string.charAt(0) <= ' ' && string.trim().isEmpty()); 559 } 560 561 /** 562 * Checks whether two strings are equals taken care of 'null' values and treating 'null' same as 563 * trim(string).equals("") 564 * 565 * @param string1 566 * @param string2 567 * @return true, if both strings are equal 568 */ 569 public static boolean isEqual(final String string1, final String string2) 570 { 571 if ((string1 == null) && (string2 == null)) 572 { 573 return true; 574 } 575 576 if (isEmpty(string1) && isEmpty(string2)) 577 { 578 return true; 579 } 580 if ((string1 == null) || (string2 == null)) 581 { 582 return false; 583 } 584 585 return string1.equals(string2); 586 } 587 588 /** 589 * Converts the text in <code>s</code> to a corresponding boolean. On, yes, y, true and 1 are 590 * converted to <code>true</code>. Off, no, n, false and 0 (zero) are converted to 591 * <code>false</code>. An empty string is converted to <code>false</code>. Conversion is 592 * case-insensitive, and does <em>not</em> take internationalization into account. 593 * 594 * 'Ja', 'Oui', 'Igen', 'Nein', 'Nee', 'Non', 'Nem' are all illegal values. 595 * 596 * @param s 597 * the value to convert into a boolean 598 * @return Boolean the converted value of <code>s</code> 599 * @throws StringValueConversionException 600 * when the value of <code>s</code> is not recognized. 601 */ 602 public static boolean isTrue(final String s) throws StringValueConversionException 603 { 604 if (s != null) 605 { 606 if (s.equalsIgnoreCase("true")) 607 { 608 return true; 609 } 610 611 if (s.equalsIgnoreCase("false")) 612 { 613 return false; 614 } 615 616 if (s.equalsIgnoreCase("on") || s.equalsIgnoreCase("yes") || s.equalsIgnoreCase("y") || 617 s.equalsIgnoreCase("1")) 618 { 619 return true; 620 } 621 622 if (s.equalsIgnoreCase("off") || s.equalsIgnoreCase("no") || s.equalsIgnoreCase("n") || 623 s.equalsIgnoreCase("0")) 624 { 625 return false; 626 } 627 628 if (isEmpty(s)) 629 { 630 return false; 631 } 632 633 throw new StringValueConversionException("Boolean value \"" + s + "\" not recognized"); 634 } 635 636 return false; 637 } 638 639 /** 640 * Joins string fragments using the specified separator 641 * 642 * @param separator 643 * @param fragments 644 * @return combined fragments 645 */ 646 public static String join(final String separator, final List<String> fragments) 647 { 648 if (fragments == null) 649 { 650 return ""; 651 } 652 return join(separator, fragments.toArray(new String[0])); 653 } 654 655 /** 656 * Joins string fragments using the specified separator 657 * 658 * @param separator 659 * @param fragments 660 * @return combined fragments 661 */ 662 public static String join(final String separator, final String... fragments) 663 { 664 if ((fragments == null) || (fragments.length < 1)) 665 { 666 // no elements 667 return ""; 668 } 669 else if (fragments.length < 2) 670 { 671 // single element 672 return fragments[0]; 673 } 674 else 675 { 676 // two or more elements 677 AppendingStringBuffer buff = new AppendingStringBuffer(128); 678 if (fragments[0] != null) 679 { 680 buff.append(fragments[0]); 681 } 682 boolean separatorNotEmpty = !Strings.isEmpty(separator); 683 for (int i = 1; i < fragments.length; i++) 684 { 685 String fragment = fragments[i]; 686 String previousFragment = fragments[i - 1]; 687 if (previousFragment != null || fragment != null) 688 { 689 boolean lhsClosed = previousFragment.endsWith(separator); 690 boolean rhsClosed = fragment.startsWith(separator); 691 if (separatorNotEmpty && lhsClosed && rhsClosed) 692 { 693 buff.append(fragment.substring(1)); 694 } 695 else if (!lhsClosed && !rhsClosed) 696 { 697 if (!Strings.isEmpty(fragment)) 698 { 699 buff.append(separator); 700 } 701 buff.append(fragment); 702 } 703 else 704 { 705 buff.append(fragment); 706 } 707 } 708 } 709 return buff.toString(); 710 } 711 } 712 713 /** 714 * Gets the last path component of a path using a given separator. If the separator cannot be 715 * found, the path itself is returned. 716 * <p> 717 * For example, lastPathComponent("foo.bar", '.') would return "bar" and 718 * lastPathComponent("foo", '.') would return "foo". 719 * 720 * @param path 721 * The path to parse 722 * @param separator 723 * The path separator character 724 * @return The last component in the path or path itself if no separator characters exist. 725 */ 726 public static String lastPathComponent(final String path, final char separator) 727 { 728 if (path == null) 729 { 730 return null; 731 } 732 733 final int index = path.lastIndexOf(separator); 734 735 if (index == -1) 736 { 737 return path; 738 } 739 740 return path.substring(index + 1); 741 } 742 743 /** 744 * Replace all occurrences of one string replaceWith another string. 745 * 746 * @param s 747 * The string to process 748 * @param searchFor 749 * The value to search for 750 * @param replaceWith 751 * The value to searchFor replaceWith 752 * @return The resulting string with searchFor replaced with replaceWith 753 */ 754 public static CharSequence replaceAll(final CharSequence s, final CharSequence searchFor, 755 CharSequence replaceWith) 756 { 757 if (s == null) 758 { 759 return null; 760 } 761 762 // If searchFor is null or the empty string, then there is nothing to 763 // replace, so returning s is the only option here. 764 if ((searchFor == null) || searchFor.length() == 0) 765 { 766 return s; 767 } 768 769 // If replaceWith is null, then the searchFor should be replaced with 770 // nothing, which can be seen as the empty string. 771 if (replaceWith == null) 772 { 773 replaceWith = ""; 774 } 775 776 String searchString = searchFor.toString(); 777 // Look for first occurrence of searchFor 778 int matchIndex = search(s, searchString, 0); 779 if (matchIndex == -1) 780 { 781 // No replace operation needs to happen 782 return s; 783 } 784 else 785 { 786 return s.toString().replace(searchString, replaceWith); 787 } 788 } 789 790 /** 791 * Replace HTML numbers like &#20540; by the appropriate character. 792 * 793 * @param str 794 * The text to be evaluated 795 * @return The text with "numbers" replaced 796 */ 797 public static String replaceHtmlEscapeNumber(String str) 798 { 799 if (str == null) 800 { 801 return null; 802 } 803 Matcher matcher = HTML_NUMBER_REGEX.matcher(str); 804 while (matcher.find()) 805 { 806 int pos = matcher.start(); 807 int end = matcher.end(); 808 int number = Integer.parseInt(str.substring(pos + 2, end - 1)); 809 char ch = (char)number; 810 str = str.substring(0, pos) + ch + str.substring(end); 811 matcher = HTML_NUMBER_REGEX.matcher(str); 812 } 813 814 return str; 815 } 816 817 /** 818 * Simpler, faster version of String.split() for splitting on a simple character. 819 * 820 * @param s 821 * The string to split 822 * @param c 823 * The character to split on 824 * @return The array of strings 825 */ 826 public static String[] split(final String s, final char c) 827 { 828 if (s == null || s.isEmpty()) 829 { 830 return NO_STRINGS; 831 } 832 833 int pos = s.indexOf(c); 834 if (pos == -1) 835 { 836 return new String[] { s }; 837 } 838 839 int next = s.indexOf(c, pos + 1); 840 if (next == -1) 841 { 842 return new String[] { s.substring(0, pos), s.substring(pos + 1) }; 843 } 844 845 final List<String> strings = new ArrayList<>(); 846 strings.add(s.substring(0, pos)); 847 strings.add(s.substring(pos + 1, next)); 848 while (true) 849 { 850 pos = next + 1; 851 next = s.indexOf(c, pos); 852 if (next == -1) 853 { 854 strings.add(s.substring(pos)); 855 break; 856 } 857 else 858 { 859 strings.add(s.substring(pos, next)); 860 } 861 } 862 final String[] result = new String[strings.size()]; 863 strings.toArray(result); 864 return result; 865 } 866 867 /** 868 * Strips the ending from the string <code>s</code>. 869 * 870 * @param s 871 * The string to strip 872 * @param ending 873 * The ending to strip off 874 * @return The stripped string or the original string if the ending did not exist 875 */ 876 public static String stripEnding(final String s, final String ending) 877 { 878 if (s == null) 879 { 880 return null; 881 } 882 883 // Stripping a null or empty string from the end returns the 884 // original string. 885 if (ending == null || ending.isEmpty()) 886 { 887 return s; 888 } 889 final int endingLength = ending.length(); 890 final int sLength = s.length(); 891 892 // When the length of the ending string is larger 893 // than the original string, the original string is returned. 894 if (endingLength > sLength) 895 { 896 return s; 897 } 898 final int index = s.lastIndexOf(ending); 899 final int endpos = sLength - endingLength; 900 901 if (index == endpos) 902 { 903 return s.substring(0, endpos); 904 } 905 906 return s; 907 } 908 909 /** 910 * Strip any jsessionid and possibly other redundant info that might be in our way. 911 * 912 * @param url 913 * The url to strip 914 * @return The stripped url 915 */ 916 public static String stripJSessionId(final String url) 917 { 918 if (Strings.isEmpty(url)) 919 { 920 return url; 921 } 922 923 // http://.../abc;jsessionid=...?param=... 924 int ixSemiColon = url.indexOf(SESSION_ID_PARAM); 925 if (ixSemiColon == -1) 926 { 927 return url; 928 } 929 930 int ixQuestionMark = url.indexOf('?'); 931 if (ixQuestionMark == -1) 932 { 933 // no query paramaters; cut off at ";" 934 // http://.../abc;jsession=... 935 return url.substring(0, ixSemiColon); 936 } 937 938 if (ixQuestionMark <= ixSemiColon) 939 { 940 // ? is before ; - no jsessionid in the url 941 return url; 942 } 943 944 return url.substring(0, ixSemiColon) + url.substring(ixQuestionMark); 945 } 946 947 /** 948 * Converts the string s to a Boolean. See <code>isTrue</code> for valid values of s. 949 * 950 * @param s 951 * The string to convert. 952 * @return Boolean <code>TRUE</code> when <code>isTrue(s)</code>. 953 * @throws StringValueConversionException 954 * when s is not a valid value 955 * @see #isTrue(String) 956 */ 957 public static Boolean toBoolean(final String s) throws StringValueConversionException 958 { 959 return isTrue(s); 960 } 961 962 /** 963 * Converts the 1 character string s to a character. 964 * 965 * @param s 966 * The 1 character string to convert to a char. 967 * @return Character value to convert 968 * @throws StringValueConversionException 969 * when the string is longer or shorter than 1 character, or <code>null</code>. 970 */ 971 public static char toChar(final String s) throws StringValueConversionException 972 { 973 if (s != null) 974 { 975 if (s.length() == 1) 976 { 977 return s.charAt(0); 978 } 979 else 980 { 981 throw new StringValueConversionException("Expected single character, not \"" + s + 982 "\""); 983 } 984 } 985 986 throw new StringValueConversionException("Character value was null"); 987 } 988 989 /** 990 * Converts unicodes to encoded \uxxxx. 991 * 992 * @param unicodeString 993 * The unicode string 994 * @return The escaped unicode string, like '\u4F60\u597D'. 995 */ 996 public static String toEscapedUnicode(final String unicodeString) 997 { 998 if (unicodeString == null || unicodeString.isEmpty()) 999 { 1000 return unicodeString; 1001 } 1002 int len = unicodeString.length(); 1003 int bufLen = len * 2; 1004 StringBuilder outBuffer = new StringBuilder(bufLen); 1005 for (int x = 0; x < len; x++) 1006 { 1007 char aChar = unicodeString.charAt(x); 1008 if (Character.getType(aChar) == Character.UNASSIGNED) 1009 { 1010 continue; 1011 } 1012 // Handle common case first, selecting largest block that 1013 // avoids the specials below 1014 if ((aChar > 61) && (aChar < 127)) 1015 { 1016 if (aChar == '\\') 1017 { 1018 outBuffer.append('\\'); 1019 outBuffer.append('\\'); 1020 continue; 1021 } 1022 outBuffer.append(aChar); 1023 continue; 1024 } 1025 switch (aChar) 1026 { 1027 case ' ' : 1028 if (x == 0) 1029 { 1030 outBuffer.append('\\'); 1031 } 1032 outBuffer.append(' '); 1033 break; 1034 case '\t' : 1035 outBuffer.append('\\'); 1036 outBuffer.append('t'); 1037 break; 1038 case '\n' : 1039 outBuffer.append('\\'); 1040 outBuffer.append('n'); 1041 break; 1042 case '\r' : 1043 outBuffer.append('\\'); 1044 outBuffer.append('r'); 1045 break; 1046 case '\f' : 1047 outBuffer.append('\\'); 1048 outBuffer.append('f'); 1049 break; 1050 case '=' : // Fall through 1051 case ':' : // Fall through 1052 case '#' : // Fall through 1053 case '!' : 1054 outBuffer.append('\\'); 1055 outBuffer.append(aChar); 1056 break; 1057 default : 1058 if ((aChar < 0x0020) || (aChar > 0x007e)) 1059 { 1060 outBuffer.append('\\'); 1061 outBuffer.append('u'); 1062 outBuffer.append(toHex((aChar >> 12) & 0xF)); 1063 outBuffer.append(toHex((aChar >> 8) & 0xF)); 1064 outBuffer.append(toHex((aChar >> 4) & 0xF)); 1065 outBuffer.append(toHex(aChar & 0xF)); 1066 } 1067 else 1068 { 1069 outBuffer.append(aChar); 1070 } 1071 } 1072 } 1073 return outBuffer.toString(); 1074 } 1075 1076 /** 1077 * Converts a String to multiline HTML markup by replacing newlines with line break entities 1078 * (<br/>) and multiple occurrences of newline with paragraph break entities (<p>). 1079 * 1080 * @param s 1081 * String to transform 1082 * @return String with all single occurrences of newline replaced with <br/> and all 1083 * multiple occurrences of newline replaced with <p>. 1084 */ 1085 public static CharSequence toMultilineMarkup(final CharSequence s) 1086 { 1087 if (s == null) 1088 { 1089 return null; 1090 } 1091 1092 final int len = s.length(); 1093 1094 // allocate a buffer that is 10% larger than the original string to account for markup 1095 final AppendingStringBuffer buffer = new AppendingStringBuffer((int) (len * 1.1) + 16); 1096 int newlineCount = 0; 1097 1098 buffer.append("<p>"); 1099 for (int i = 0; i < len; i++) 1100 { 1101 final char c = s.charAt(i); 1102 1103 switch (c) 1104 { 1105 case '\n' : 1106 newlineCount++; 1107 break; 1108 1109 case '\r' : 1110 break; 1111 1112 default : 1113 if (newlineCount == 1) 1114 { 1115 buffer.append("<br/>"); 1116 } 1117 else if (newlineCount > 1) 1118 { 1119 buffer.append("</p><p>"); 1120 } 1121 1122 buffer.append(c); 1123 newlineCount = 0; 1124 break; 1125 } 1126 } 1127 if (newlineCount == 1) 1128 { 1129 buffer.append("<br/>"); 1130 } 1131 else if (newlineCount > 1) 1132 { 1133 buffer.append("</p><p>"); 1134 } 1135 buffer.append("</p>"); 1136 return buffer; 1137 } 1138 1139 /** 1140 * Converts the given object to a string. Does special conversion for {@link Throwable 1141 * throwables} and String arrays of length 1 (in which case it just returns to string in that 1142 * array, as this is a common thing to have in the Servlet API). 1143 * 1144 * @param object 1145 * The object 1146 * @return The string 1147 */ 1148 public static String toString(final Object object) 1149 { 1150 if (object == null) 1151 { 1152 return null; 1153 } 1154 1155 if (object instanceof Throwable) 1156 { 1157 return toString((Throwable)object); 1158 } 1159 1160 if (object instanceof String) 1161 { 1162 return (String)object; 1163 } 1164 1165 if ((object instanceof String[]) && (((String[])object).length == 1)) 1166 { 1167 return ((String[])object)[0]; 1168 } 1169 1170 return object.toString(); 1171 } 1172 1173 1174 /** 1175 * Converts a Throwable to a string. 1176 * 1177 * @param throwable 1178 * The throwable 1179 * @return The string 1180 */ 1181 public static String toString(final Throwable throwable) 1182 { 1183 if (throwable != null) 1184 { 1185 List<Throwable> al = new ArrayList<>(); 1186 Throwable cause = throwable; 1187 al.add(cause); 1188 while ((cause.getCause() != null) && (cause != cause.getCause())) 1189 { 1190 cause = cause.getCause(); 1191 al.add(cause); 1192 } 1193 1194 AppendingStringBuffer sb = new AppendingStringBuffer(256); 1195 // first print the last cause 1196 int length = al.size() - 1; 1197 cause = al.get(length); 1198 if (throwable instanceof RuntimeException) 1199 { 1200 sb.append("Message: "); 1201 sb.append(throwable.getMessage()); 1202 sb.append("\n\n"); 1203 } 1204 sb.append("Root cause:\n\n"); 1205 outputThrowable(cause, sb, false); 1206 1207 if (length > 0) 1208 { 1209 sb.append("\n\nComplete stack:\n\n"); 1210 for (int i = 0; i < length; i++) 1211 { 1212 outputThrowable(al.get(i), sb, true); 1213 sb.append('\n'); 1214 } 1215 } 1216 return sb.toString(); 1217 } 1218 else 1219 { 1220 return "<Null Throwable>"; 1221 } 1222 } 1223 1224 private static void append(final AppendingStringBuffer buffer, final CharSequence s, 1225 final int from, final int to) 1226 { 1227 if (s instanceof AppendingStringBuffer) 1228 { 1229 AppendingStringBuffer asb = (AppendingStringBuffer)s; 1230 buffer.append(asb.getValue(), from, to - from); 1231 } 1232 else 1233 { 1234 buffer.append(s.subSequence(from, to)); 1235 } 1236 } 1237 1238 /** 1239 * Outputs the throwable and its stacktrace to the stringbuffer. If stopAtWicketSerlvet is true 1240 * then the output will stop when the org.apache.wicket servlet is reached. sun.reflect. 1241 * packages are filtered out. 1242 * 1243 * @param cause 1244 * @param sb 1245 * @param stopAtWicketServlet 1246 */ 1247 private static void outputThrowable(final Throwable cause, final AppendingStringBuffer sb, 1248 final boolean stopAtWicketServlet) 1249 { 1250 sb.append(cause); 1251 sb.append("\n"); 1252 StackTraceElement[] trace = cause.getStackTrace(); 1253 for (int i = 0; i < trace.length; i++) 1254 { 1255 String traceString = trace[i].toString(); 1256 if (!(traceString.startsWith("sun.reflect.") && (i > 1))) 1257 { 1258 sb.append(" at "); 1259 sb.append(traceString); 1260 sb.append("\n"); 1261 if (stopAtWicketServlet && 1262 (traceString.startsWith("org.apache.wicket.protocol.http.WicketServlet") || traceString.startsWith("org.apache.wicket.protocol.http.WicketFilter"))) 1263 { 1264 return; 1265 } 1266 } 1267 } 1268 } 1269 1270 private static int search(final CharSequence s, final String searchString, final int pos) 1271 { 1272 if (s instanceof String) 1273 { 1274 return ((String)s).indexOf(searchString, pos); 1275 } 1276 else if (s instanceof StringBuffer) 1277 { 1278 return ((StringBuffer)s).indexOf(searchString, pos); 1279 } 1280 else if (s instanceof StringBuilder) 1281 { 1282 return ((StringBuilder)s).indexOf(searchString, pos); 1283 } 1284 else if (s instanceof AppendingStringBuffer) 1285 { 1286 return ((AppendingStringBuffer)s).indexOf(searchString, pos); 1287 } 1288 else 1289 { 1290 return s.toString().indexOf(searchString, pos); 1291 } 1292 } 1293 1294 /** 1295 * Convert a nibble to a hex character 1296 * 1297 * @param nibble 1298 * the nibble to convert. 1299 * @return hex character 1300 */ 1301 private static char toHex(final int nibble) 1302 { 1303 return HEX_DIGIT[(nibble & 0xF)]; 1304 } 1305 1306 /** 1307 * Calculates the length of string in bytes, uses specified <code>charset</code> if provided. 1308 * 1309 * @param string 1310 * @param charset 1311 * (optional) character set to use when converting string to bytes 1312 * @return length of string in bytes 1313 */ 1314 public static int lengthInBytes(final String string, final Charset charset) 1315 { 1316 Args.notNull(string, "string"); 1317 if (charset != null) 1318 { 1319 try 1320 { 1321 return string.getBytes(charset.name()).length; 1322 } 1323 catch (UnsupportedEncodingException e) 1324 { 1325 throw new RuntimeException( 1326 "StringResourceStream created with unsupported charset: " + charset.name()); 1327 } 1328 } 1329 else 1330 { 1331 return string.getBytes().length; 1332 } 1333 } 1334 1335 /** 1336 * Extended {@link String#startsWith(String)} with support for case sensitivity 1337 * 1338 * @param str 1339 * @param prefix 1340 * @param caseSensitive 1341 * @return <code>true</code> if <code>str</code> starts with <code>prefix</code> 1342 */ 1343 public static boolean startsWith(final String str, final String prefix, 1344 final boolean caseSensitive) 1345 { 1346 if (caseSensitive) 1347 { 1348 return str.startsWith(prefix); 1349 } 1350 else 1351 { 1352 return str.toLowerCase(Locale.ROOT).startsWith(prefix.toLowerCase(Locale.ROOT)); 1353 } 1354 } 1355 1356 /** 1357 * returns the zero-based index of a character within a char sequence. this method mainly exists 1358 * as an faster alternative for <code>sequence.toString().indexOf(ch)</code>. 1359 * 1360 * @param sequence 1361 * character sequence 1362 * @param ch 1363 * character to search for 1364 * @return index of character within character sequence or <code>-1</code> if not found 1365 */ 1366 public static int indexOf(final CharSequence sequence, final char ch) 1367 { 1368 if (sequence != null) 1369 { 1370 for (int i = 0; i < sequence.length(); i++) 1371 { 1372 if (sequence.charAt(i) == ch) 1373 { 1374 return i; 1375 } 1376 } 1377 } 1378 1379 return -1; 1380 } 1381 1382 /** 1383 * <p> 1384 * Find the Levenshtein distance between two Strings. 1385 * </p> 1386 * 1387 * <p> 1388 * This is the number of changes needed to change one String into another, where each change is 1389 * a single character modification (deletion, insertion or substitution). 1390 * </p> 1391 * 1392 * <p> 1393 * The previous implementation of the Levenshtein distance algorithm was from <a 1394 * href="http://www.merriampark.com/ld.htm">http://www.merriampark.com/ld.htm</a> 1395 * </p> 1396 * 1397 * <p> 1398 * Chas Emerick has written an implementation in Java, which avoids an OutOfMemoryError which 1399 * can occur when my Java implementation is used with very large strings.<br> 1400 * This implementation of the Levenshtein distance algorithm is from <a 1401 * href="http://www.merriampark.com/ldjava.htm">http://www.merriampark.com/ldjava.htm</a> 1402 * </p> 1403 * 1404 * <pre> 1405 * Strings.getLevenshteinDistance(null, *) = IllegalArgumentException 1406 * Strings.getLevenshteinDistance(*, null) = IllegalArgumentException 1407 * Strings.getLevenshteinDistance("","") = 0 1408 * Strings.getLevenshteinDistance("","a") = 1 1409 * Strings.getLevenshteinDistance("aaapppp", "") = 7 1410 * Strings.getLevenshteinDistance("frog", "fog") = 1 1411 * Strings.getLevenshteinDistance("fly", "ant") = 3 1412 * Strings.getLevenshteinDistance("elephant", "hippo") = 7 1413 * Strings.getLevenshteinDistance("hippo", "elephant") = 7 1414 * Strings.getLevenshteinDistance("hippo", "zzzzzzzz") = 8 1415 * Strings.getLevenshteinDistance("hello", "hallo") = 1 1416 * </pre> 1417 * 1418 * Copied from Apache commons-lang StringUtils 3.0 1419 * 1420 * @param s 1421 * the first String, must not be null 1422 * @param t 1423 * the second String, must not be null 1424 * @return result distance 1425 * @throws IllegalArgumentException 1426 * if either String input {@code null} 1427 */ 1428 public static int getLevenshteinDistance(CharSequence s, CharSequence t) 1429 { 1430 if (s == null || t == null) 1431 { 1432 throw new IllegalArgumentException("Strings must not be null"); 1433 } 1434 1435 /* 1436 * The difference between this impl. and the previous is that, rather than creating and 1437 * retaining a matrix of size s.length()+1 by t.length()+1, we maintain two 1438 * single-dimensional arrays of length s.length()+1. The first, d, is the 'current working' 1439 * distance array that maintains the newest distance cost counts as we iterate through the 1440 * characters of String s. Each time we increment the index of String t we are comparing, d 1441 * is copied to p, the second int[]. Doing so allows us to retain the previous cost counts 1442 * as required by the algorithm (taking the minimum of the cost count to the left, up one, 1443 * and diagonally up and to the left of the current cost count being calculated). (Note that 1444 * the arrays aren't really copied anymore, just switched...this is clearly much better than 1445 * cloning an array or doing a System.arraycopy() each time through the outer loop.) 1446 * 1447 * Effectively, the difference between the two implementations is this one does not cause an 1448 * out of memory condition when calculating the LD over two very large strings. 1449 */ 1450 1451 int n = s.length(); // length of s 1452 int m = t.length(); // length of t 1453 1454 if (n == 0) 1455 { 1456 return m; 1457 } 1458 else if (m == 0) 1459 { 1460 return n; 1461 } 1462 1463 if (n > m) 1464 { 1465 // swap the input strings to consume less memory 1466 CharSequence tmp = s; 1467 s = t; 1468 t = tmp; 1469 n = m; 1470 m = t.length(); 1471 } 1472 1473 int p[] = new int[n + 1]; // 'previous' cost array, horizontally 1474 int d[] = new int[n + 1]; // cost array, horizontally 1475 int _d[]; // placeholder to assist in swapping p and d 1476 1477 // indexes into strings s and t 1478 int i; // iterates through s 1479 int j; // iterates through t 1480 1481 char t_j; // jth character of t 1482 1483 int cost; // cost 1484 1485 for (i = 0; i <= n; i++) 1486 { 1487 p[i] = i; 1488 } 1489 1490 for (j = 1; j <= m; j++) 1491 { 1492 t_j = t.charAt(j - 1); 1493 d[0] = j; 1494 1495 for (i = 1; i <= n; i++) 1496 { 1497 cost = s.charAt(i - 1) == t_j ? 0 : 1; 1498 // minimum of cell to the left+1, to the top+1, diagonally left and up +cost 1499 d[i] = Math.min(Math.min(d[i - 1] + 1, p[i] + 1), p[i - 1] + cost); 1500 } 1501 1502 // copy current distance counts to 'previous row' distance counts 1503 _d = p; 1504 p = d; 1505 d = _d; 1506 } 1507 1508 // our last action in the above loop was to switch d and p, so p now 1509 // actually has the most recent cost counts 1510 return p[n]; 1511 } 1512 1513 /** 1514 * convert byte array to hex string 1515 * 1516 * @param bytes 1517 * bytes to convert to hexadecimal representation 1518 * 1519 * @return hex string 1520 */ 1521 public static String toHexString(byte[] bytes) 1522 { 1523 Args.notNull(bytes, "bytes"); 1524 1525 final StringBuilder hex = new StringBuilder(bytes.length << 1); 1526 1527 for (final byte b : bytes) 1528 { 1529 hex.append(toHex(b >> 4)); 1530 hex.append(toHex(b)); 1531 } 1532 return hex.toString(); 1533 } 1534 1535 1536 /** 1537 * Return this value as en enum value. 1538 * 1539 * @param value 1540 * the value to convert to an enum value 1541 * @param enumClass 1542 * the enum type 1543 * @return an enum value 1544 */ 1545 public static <T extends Enum<T>> T toEnum(final CharSequence value, final Class<T> enumClass) 1546 { 1547 Args.notNull(enumClass, "enumClass"); 1548 Args.notNull(value, "value"); 1549 1550 try 1551 { 1552 return Enum.valueOf(enumClass, value.toString()); 1553 } 1554 catch (Exception e) 1555 { 1556 throw new StringValueConversionException( 1557 String.format("Cannot convert '%s' to enum constant of type '%s'.", value, enumClass), e); 1558 } 1559 } 1560 1561 /** 1562 * Returns the original string if this one is not empty (i.e. {@link #isEmpty(CharSequence)} returns false), 1563 * otherwise the default one is returned. The default string might be itself an empty one. 1564 * 1565 * @param originalString 1566 * the original sting value 1567 * @param defaultValue 1568 * the default string to return if the original is empty 1569 * @return the original string value if not empty, the default one otherwise 1570 */ 1571 public static String defaultIfEmpty(String originalString, String defaultValue) 1572 { 1573 return isEmpty(originalString) ? defaultValue : originalString; 1574 } 1575}