001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.text; 018 019import java.io.IOException; 020import java.io.Writer; 021import java.util.Collections; 022import java.util.HashMap; 023import java.util.Map; 024 025import org.apache.commons.lang3.StringUtils; 026import org.apache.commons.text.translate.AggregateTranslator; 027import org.apache.commons.text.translate.CharSequenceTranslator; 028import org.apache.commons.text.translate.CsvTranslators; 029import org.apache.commons.text.translate.EntityArrays; 030import org.apache.commons.text.translate.JavaUnicodeEscaper; 031import org.apache.commons.text.translate.LookupTranslator; 032import org.apache.commons.text.translate.NumericEntityEscaper; 033import org.apache.commons.text.translate.NumericEntityUnescaper; 034import org.apache.commons.text.translate.OctalUnescaper; 035import org.apache.commons.text.translate.UnicodeUnescaper; 036import org.apache.commons.text.translate.UnicodeUnpairedSurrogateRemover; 037 038/** 039 * <p> 040 * Escapes and unescapes {@code String}s for Java, Java Script, HTML and XML. 041 * </p> 042 * 043 * <p> 044 * #ThreadSafe# 045 * </p> 046 * 047 * <p> 048 * This code has been adapted from Apache Commons Lang 3.5. 049 * </p> 050 * 051 * @since 1.0 052 */ 053public class StringEscapeUtils { 054 055 /* ESCAPE TRANSLATORS */ 056 057 /** 058 * Convenience wrapper for {@link java.lang.StringBuilder} providing escape methods. 059 * 060 * <p>Example:</p> 061 * <pre> 062 * new Builder(ESCAPE_HTML4) 063 * .append("<p>") 064 * .escape("This is paragraph 1 and special chars like & get escaped.") 065 * .append("</p><p>") 066 * .escape("This is paragraph 2 & more...") 067 * .append("</p>") 068 * .toString() 069 * </pre> 070 * 071 */ 072 public static final class Builder { 073 074 /** 075 * StringBuilder to be used in the Builder class. 076 */ 077 private final StringBuilder sb; 078 079 /** 080 * CharSequenceTranslator to be used in the Builder class. 081 */ 082 private final CharSequenceTranslator translator; 083 084 /** 085 * Builder constructor. 086 * 087 * @param translator a CharSequenceTranslator. 088 */ 089 private Builder(final CharSequenceTranslator translator) { 090 this.sb = new StringBuilder(); 091 this.translator = translator; 092 } 093 094 /** 095 * Literal append, no escaping being done. 096 * 097 * @param input the String to append 098 * @return {@code this}, to enable chaining 099 */ 100 public Builder append(final String input) { 101 sb.append(input); 102 return this; 103 } 104 105 /** 106 * Escape {@code input} according to the given {@link CharSequenceTranslator}. 107 * 108 * @param input the String to escape 109 * @return {@code this}, to enable chaining 110 */ 111 public Builder escape(final String input) { 112 sb.append(translator.translate(input)); 113 return this; 114 } 115 116 /** 117 * Return the escaped string. 118 * 119 * @return The escaped string 120 */ 121 @Override 122 public String toString() { 123 return sb.toString(); 124 } 125 } 126 /** 127 * Translator object for unescaping backslash escaped entries. 128 */ 129 static class XsiUnescaper extends CharSequenceTranslator { 130 131 /** 132 * Escaped backslash constant. 133 */ 134 private static final char BACKSLASH = '\\'; 135 136 @Override 137 public int translate(final CharSequence input, final int index, final Writer writer) throws IOException { 138 139 if (index != 0) { 140 throw new IllegalStateException("XsiUnescaper should never reach the [1] index"); 141 } 142 143 final String s = input.toString(); 144 145 int segmentStart = 0; 146 int searchOffset = 0; 147 while (true) { 148 final int pos = s.indexOf(BACKSLASH, searchOffset); 149 if (pos == -1) { 150 if (segmentStart < s.length()) { 151 writer.write(s.substring(segmentStart)); 152 } 153 break; 154 } 155 if (pos > segmentStart) { 156 writer.write(s.substring(segmentStart, pos)); 157 } 158 segmentStart = pos + 1; 159 searchOffset = pos + 2; 160 } 161 162 return Character.codePointCount(input, 0, input.length()); 163 } 164 } 165 166 /** 167 * Translator object for escaping Java. 168 * 169 * While {@link #escapeJava(String)} is the expected method of use, this 170 * object allows the Java escaping functionality to be used 171 * as the foundation for a custom translator. 172 */ 173 public static final CharSequenceTranslator ESCAPE_JAVA; 174 static { 175 final Map<CharSequence, CharSequence> escapeJavaMap = new HashMap<>(); 176 escapeJavaMap.put("\"", "\\\""); 177 escapeJavaMap.put("\\", "\\\\"); 178 ESCAPE_JAVA = new AggregateTranslator( 179 new LookupTranslator(Collections.unmodifiableMap(escapeJavaMap)), 180 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE), 181 JavaUnicodeEscaper.outsideOf(32, 0x7f) 182 ); 183 } 184 185 /** 186 * Translator object for escaping EcmaScript/JavaScript. 187 * 188 * While {@link #escapeEcmaScript(String)} is the expected method of use, this 189 * object allows the EcmaScript escaping functionality to be used 190 * as the foundation for a custom translator. 191 */ 192 public static final CharSequenceTranslator ESCAPE_ECMASCRIPT; 193 static { 194 final Map<CharSequence, CharSequence> escapeEcmaScriptMap = new HashMap<>(); 195 escapeEcmaScriptMap.put("'", "\\'"); 196 escapeEcmaScriptMap.put("\"", "\\\""); 197 escapeEcmaScriptMap.put("\\", "\\\\"); 198 escapeEcmaScriptMap.put("/", "\\/"); 199 ESCAPE_ECMASCRIPT = new AggregateTranslator( 200 new LookupTranslator(Collections.unmodifiableMap(escapeEcmaScriptMap)), 201 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE), 202 JavaUnicodeEscaper.outsideOf(32, 0x7f) 203 ); 204 } 205 206 /** 207 * Translator object for escaping Json. 208 * 209 * While {@link #escapeJson(String)} is the expected method of use, this 210 * object allows the Json escaping functionality to be used 211 * as the foundation for a custom translator. 212 */ 213 public static final CharSequenceTranslator ESCAPE_JSON; 214 static { 215 final Map<CharSequence, CharSequence> escapeJsonMap = new HashMap<>(); 216 escapeJsonMap.put("\"", "\\\""); 217 escapeJsonMap.put("\\", "\\\\"); 218 escapeJsonMap.put("/", "\\/"); 219 ESCAPE_JSON = new AggregateTranslator( 220 new LookupTranslator(Collections.unmodifiableMap(escapeJsonMap)), 221 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE), 222 JavaUnicodeEscaper.outsideOf(32, 0x7e) 223 ); 224 } 225 226 /** 227 * Translator object for escaping XML 1.0. 228 * 229 * While {@link #escapeXml10(String)} is the expected method of use, this 230 * object allows the XML escaping functionality to be used 231 * as the foundation for a custom translator. 232 */ 233 public static final CharSequenceTranslator ESCAPE_XML10; 234 static { 235 final Map<CharSequence, CharSequence> escapeXml10Map = new HashMap<>(); 236 escapeXml10Map.put("\u0000", StringUtils.EMPTY); 237 escapeXml10Map.put("\u0001", StringUtils.EMPTY); 238 escapeXml10Map.put("\u0002", StringUtils.EMPTY); 239 escapeXml10Map.put("\u0003", StringUtils.EMPTY); 240 escapeXml10Map.put("\u0004", StringUtils.EMPTY); 241 escapeXml10Map.put("\u0005", StringUtils.EMPTY); 242 escapeXml10Map.put("\u0006", StringUtils.EMPTY); 243 escapeXml10Map.put("\u0007", StringUtils.EMPTY); 244 escapeXml10Map.put("\u0008", StringUtils.EMPTY); 245 escapeXml10Map.put("\u000b", StringUtils.EMPTY); 246 escapeXml10Map.put("\u000c", StringUtils.EMPTY); 247 escapeXml10Map.put("\u000e", StringUtils.EMPTY); 248 escapeXml10Map.put("\u000f", StringUtils.EMPTY); 249 escapeXml10Map.put("\u0010", StringUtils.EMPTY); 250 escapeXml10Map.put("\u0011", StringUtils.EMPTY); 251 escapeXml10Map.put("\u0012", StringUtils.EMPTY); 252 escapeXml10Map.put("\u0013", StringUtils.EMPTY); 253 escapeXml10Map.put("\u0014", StringUtils.EMPTY); 254 escapeXml10Map.put("\u0015", StringUtils.EMPTY); 255 escapeXml10Map.put("\u0016", StringUtils.EMPTY); 256 escapeXml10Map.put("\u0017", StringUtils.EMPTY); 257 escapeXml10Map.put("\u0018", StringUtils.EMPTY); 258 escapeXml10Map.put("\u0019", StringUtils.EMPTY); 259 escapeXml10Map.put("\u001a", StringUtils.EMPTY); 260 escapeXml10Map.put("\u001b", StringUtils.EMPTY); 261 escapeXml10Map.put("\u001c", StringUtils.EMPTY); 262 escapeXml10Map.put("\u001d", StringUtils.EMPTY); 263 escapeXml10Map.put("\u001e", StringUtils.EMPTY); 264 escapeXml10Map.put("\u001f", StringUtils.EMPTY); 265 escapeXml10Map.put("\ufffe", StringUtils.EMPTY); 266 escapeXml10Map.put("\uffff", StringUtils.EMPTY); 267 ESCAPE_XML10 = new AggregateTranslator( 268 new LookupTranslator(EntityArrays.BASIC_ESCAPE), 269 new LookupTranslator(EntityArrays.APOS_ESCAPE), 270 new LookupTranslator(Collections.unmodifiableMap(escapeXml10Map)), 271 NumericEntityEscaper.between(0x7f, 0x84), 272 NumericEntityEscaper.between(0x86, 0x9f), 273 new UnicodeUnpairedSurrogateRemover() 274 ); 275 } 276 277 /** 278 * Translator object for escaping XML 1.1. 279 * 280 * While {@link #escapeXml11(String)} is the expected method of use, this 281 * object allows the XML escaping functionality to be used 282 * as the foundation for a custom translator. 283 */ 284 public static final CharSequenceTranslator ESCAPE_XML11; 285 286 static { 287 final Map<CharSequence, CharSequence> escapeXml11Map = new HashMap<>(); 288 escapeXml11Map.put("\u0000", StringUtils.EMPTY); 289 escapeXml11Map.put("\u000b", ""); 290 escapeXml11Map.put("\u000c", ""); 291 escapeXml11Map.put("\ufffe", StringUtils.EMPTY); 292 escapeXml11Map.put("\uffff", StringUtils.EMPTY); 293 ESCAPE_XML11 = new AggregateTranslator( 294 new LookupTranslator(EntityArrays.BASIC_ESCAPE), 295 new LookupTranslator(EntityArrays.APOS_ESCAPE), 296 new LookupTranslator(Collections.unmodifiableMap(escapeXml11Map)), 297 NumericEntityEscaper.between(0x1, 0x8), 298 NumericEntityEscaper.between(0xe, 0x1f), 299 NumericEntityEscaper.between(0x7f, 0x84), 300 NumericEntityEscaper.between(0x86, 0x9f), 301 new UnicodeUnpairedSurrogateRemover() 302 ); 303 } 304 305 /** 306 * Translator object for escaping HTML version 3.0. 307 * 308 * While {@link #escapeHtml3(String)} is the expected method of use, this 309 * object allows the HTML escaping functionality to be used 310 * as the foundation for a custom translator. 311 */ 312 public static final CharSequenceTranslator ESCAPE_HTML3 = 313 new AggregateTranslator( 314 new LookupTranslator(EntityArrays.BASIC_ESCAPE), 315 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE) 316 ); 317 318 /** 319 * Translator object for escaping HTML version 4.0. 320 * 321 * While {@link #escapeHtml4(String)} is the expected method of use, this 322 * object allows the HTML escaping functionality to be used 323 * as the foundation for a custom translator. 324 */ 325 public static final CharSequenceTranslator ESCAPE_HTML4 = 326 new AggregateTranslator( 327 new LookupTranslator(EntityArrays.BASIC_ESCAPE), 328 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE), 329 new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE) 330 ); 331 /** 332 * Translator object for escaping individual Comma Separated Values. 333 * 334 * While {@link #escapeCsv(String)} is the expected method of use, this 335 * object allows the CSV escaping functionality to be used 336 * as the foundation for a custom translator. 337 */ 338 public static final CharSequenceTranslator ESCAPE_CSV = new CsvTranslators.CsvEscaper(); 339 340 /* UNESCAPE TRANSLATORS */ 341 342 /** 343 * Translator object for escaping Shell command language. 344 * 345 * @see <a href="http://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a> 346 */ 347 public static final CharSequenceTranslator ESCAPE_XSI; 348 static { 349 final Map<CharSequence, CharSequence> escapeXsiMap = new HashMap<>(); 350 escapeXsiMap.put("|", "\\|"); 351 escapeXsiMap.put("&", "\\&"); 352 escapeXsiMap.put(";", "\\;"); 353 escapeXsiMap.put("<", "\\<"); 354 escapeXsiMap.put(">", "\\>"); 355 escapeXsiMap.put("(", "\\("); 356 escapeXsiMap.put(")", "\\)"); 357 escapeXsiMap.put("$", "\\$"); 358 escapeXsiMap.put("`", "\\`"); 359 escapeXsiMap.put("\\", "\\\\"); 360 escapeXsiMap.put("\"", "\\\""); 361 escapeXsiMap.put("'", "\\'"); 362 escapeXsiMap.put(" ", "\\ "); 363 escapeXsiMap.put("\t", "\\\t"); 364 escapeXsiMap.put("\r\n", StringUtils.EMPTY); 365 escapeXsiMap.put("\n", StringUtils.EMPTY); 366 escapeXsiMap.put("*", "\\*"); 367 escapeXsiMap.put("?", "\\?"); 368 escapeXsiMap.put("[", "\\["); 369 escapeXsiMap.put("#", "\\#"); 370 escapeXsiMap.put("~", "\\~"); 371 escapeXsiMap.put("=", "\\="); 372 escapeXsiMap.put("%", "\\%"); 373 ESCAPE_XSI = new LookupTranslator( 374 Collections.unmodifiableMap(escapeXsiMap) 375 ); 376 } 377 378 /** 379 * Translator object for unescaping escaped Java. 380 * 381 * While {@link #unescapeJava(String)} is the expected method of use, this 382 * object allows the Java unescaping functionality to be used 383 * as the foundation for a custom translator. 384 */ 385 public static final CharSequenceTranslator UNESCAPE_JAVA; 386 387 static { 388 final Map<CharSequence, CharSequence> unescapeJavaMap = new HashMap<>(); 389 unescapeJavaMap.put("\\\\", "\\"); 390 unescapeJavaMap.put("\\\"", "\""); 391 unescapeJavaMap.put("\\'", "'"); 392 unescapeJavaMap.put("\\", StringUtils.EMPTY); 393 UNESCAPE_JAVA = new AggregateTranslator( 394 new OctalUnescaper(), // .between('\1', '\377'), 395 new UnicodeUnescaper(), 396 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE), 397 new LookupTranslator(Collections.unmodifiableMap(unescapeJavaMap)) 398 ); 399 } 400 401 /** 402 * Translator object for unescaping escaped EcmaScript. 403 * 404 * While {@link #unescapeEcmaScript(String)} is the expected method of use, this 405 * object allows the EcmaScript unescaping functionality to be used 406 * as the foundation for a custom translator. 407 */ 408 public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA; 409 410 /** 411 * Translator object for unescaping escaped Json. 412 * 413 * While {@link #unescapeJson(String)} is the expected method of use, this 414 * object allows the Json unescaping functionality to be used 415 * as the foundation for a custom translator. 416 */ 417 public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA; 418 419 /** 420 * Translator object for unescaping escaped HTML 3.0. 421 * 422 * While {@link #unescapeHtml3(String)} is the expected method of use, this 423 * object allows the HTML unescaping functionality to be used 424 * as the foundation for a custom translator. 425 */ 426 public static final CharSequenceTranslator UNESCAPE_HTML3 = 427 new AggregateTranslator( 428 new LookupTranslator(EntityArrays.BASIC_UNESCAPE), 429 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE), 430 new NumericEntityUnescaper() 431 ); 432 433 /** 434 * Translator object for unescaping escaped HTML 4.0. 435 * 436 * While {@link #unescapeHtml4(String)} is the expected method of use, this 437 * object allows the HTML unescaping functionality to be used 438 * as the foundation for a custom translator. 439 */ 440 public static final CharSequenceTranslator UNESCAPE_HTML4 = 441 new AggregateTranslator( 442 new LookupTranslator(EntityArrays.BASIC_UNESCAPE), 443 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE), 444 new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE), 445 new NumericEntityUnescaper() 446 ); 447 448 /** 449 * Translator object for unescaping escaped XML. 450 * 451 * While {@link #unescapeXml(String)} is the expected method of use, this 452 * object allows the XML unescaping functionality to be used 453 * as the foundation for a custom translator. 454 */ 455 public static final CharSequenceTranslator UNESCAPE_XML = 456 new AggregateTranslator( 457 new LookupTranslator(EntityArrays.BASIC_UNESCAPE), 458 new LookupTranslator(EntityArrays.APOS_UNESCAPE), 459 new NumericEntityUnescaper() 460 ); 461 462 /** 463 * Translator object for unescaping escaped Comma Separated Value entries. 464 * 465 * While {@link #unescapeCsv(String)} is the expected method of use, this 466 * object allows the CSV unescaping functionality to be used 467 * as the foundation for a custom translator. 468 */ 469 public static final CharSequenceTranslator UNESCAPE_CSV = new CsvTranslators.CsvUnescaper(); 470 471 /* Helper functions */ 472 473 /** 474 * Translator object for unescaping escaped XSI Value entries. 475 * 476 * While {@link #unescapeXSI(String)} is the expected method of use, this 477 * object allows the XSI unescaping functionality to be used 478 * as the foundation for a custom translator. 479 */ 480 public static final CharSequenceTranslator UNESCAPE_XSI = new XsiUnescaper(); 481 482 /** 483 * Get a {@link Builder}. 484 * @param translator the text translator 485 * @return {@link Builder} 486 */ 487 public static StringEscapeUtils.Builder builder(final CharSequenceTranslator translator) { 488 return new Builder(translator); 489 } 490 491 /** 492 * Returns a {@code String} value for a CSV column enclosed in double quotes, 493 * if required. 494 * 495 * <p>If the value contains a comma, newline or double quote, then the 496 * String value is returned enclosed in double quotes.</p> 497 * 498 * <p>Any double quote characters in the value are escaped with another double quote.</p> 499 * 500 * <p>If the value does not contain a comma, newline or double quote, then the 501 * String value is returned unchanged.</p> 502 * 503 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 504 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 505 * 506 * @param input the input CSV column String, may be null 507 * @return The input String, enclosed in double quotes if the value contains a comma, 508 * newline or double quote, {@code null} if null string input 509 */ 510 public static final String escapeCsv(final String input) { 511 return ESCAPE_CSV.translate(input); 512 } 513 514 /** 515 * Escapes the characters in a {@code String} using EcmaScript String rules. 516 * 517 * <p>Escapes any values it finds into their EcmaScript String form. 518 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 519 * 520 * <p>So a tab becomes the characters {@code '\\'} and 521 * {@code 't'}.</p> 522 * 523 * <p>The only difference between Java strings and EcmaScript strings 524 * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p> 525 * 526 * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects.</p> 527 * 528 * <p>Example:</p> 529 * <pre> 530 * input string: He didn't say, "Stop!" 531 * output string: He didn\'t say, \"Stop!\" 532 * </pre> 533 * 534 * <b>Security Note.</b> We only provide backslash escaping in this method. For example, {@code '\"'} has the output 535 * {@code '\\\"'} which could result in potential issues in the case where the string being escaped is being used 536 * in an HTML tag like {@code <select onmouseover="..." />}. If you wish to have more rigorous string escaping, you 537 * may consider the 538 * <a href="https://www.owasp.org/index.php/Category:OWASP_Enterprise_Security_API_JAVA">ESAPI Libraries</a>. 539 * Further, you can view the <a href="https://github.com/esapi">ESAPI GitHub Org</a>. 540 * 541 * @param input String to escape values in, may be null 542 * @return String with escaped values, {@code null} if null string input 543 */ 544 public static final String escapeEcmaScript(final String input) { 545 return ESCAPE_ECMASCRIPT.translate(input); 546 } 547 548 /** 549 * Escapes the characters in a {@code String} using HTML entities. 550 * 551 * <p>Supports only the HTML 3.0 entities.</p> 552 * 553 * @param input the {@code String} to escape, may be null 554 * @return a new escaped {@code String}, {@code null} if null string input 555 */ 556 public static final String escapeHtml3(final String input) { 557 return ESCAPE_HTML3.translate(input); 558 } 559 560 // HTML and XML 561 /** 562 * Escapes the characters in a {@code String} using HTML entities. 563 * 564 * <p> 565 * For example: 566 * </p> 567 * <p>{@code "bread" & "butter"}</p> 568 * becomes: 569 * <p> 570 * {@code "bread" &amp; "butter"}. 571 * </p> 572 * 573 * <p>Supports all known HTML 4.0 entities, including funky accents. 574 * Note that the commonly used apostrophe escape character (&apos;) 575 * is not a legal entity and so is not supported).</p> 576 * 577 * @param input the {@code String} to escape, may be null 578 * @return a new escaped {@code String}, {@code null} if null string input 579 * 580 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> 581 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> 582 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> 583 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> 584 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> 585 */ 586 public static final String escapeHtml4(final String input) { 587 return ESCAPE_HTML4.translate(input); 588 } 589 590 // Java and JavaScript 591 /** 592 * Escapes the characters in a {@code String} using Java String rules. 593 * 594 * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 595 * 596 * <p>So a tab becomes the characters {@code '\\'} and 597 * {@code 't'}.</p> 598 * 599 * <p>The only difference between Java strings and JavaScript strings 600 * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p> 601 * 602 * <p>Example:</p> 603 * <pre> 604 * input string: He didn't say, "Stop!" 605 * output string: He didn't say, \"Stop!\" 606 * </pre> 607 * 608 * @param input String to escape values in, may be null 609 * @return String with escaped values, {@code null} if null string input 610 */ 611 public static final String escapeJava(final String input) { 612 return ESCAPE_JAVA.translate(input); 613 } 614 615 /** 616 * Escapes the characters in a {@code String} using Json String rules. 617 * 618 * <p>Escapes any values it finds into their Json String form. 619 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 620 * 621 * <p>So a tab becomes the characters {@code '\\'} and 622 * {@code 't'}.</p> 623 * 624 * <p>The only difference between Java strings and Json strings 625 * is that in Json, forward-slash (/) is escaped.</p> 626 * 627 * <p>See http://www.ietf.org/rfc/rfc4627.txt for further details.</p> 628 * 629 * <p>Example:</p> 630 * <pre> 631 * input string: He didn't say, "Stop!" 632 * output string: He didn't say, \"Stop!\" 633 * </pre> 634 * 635 * @param input String to escape values in, may be null 636 * @return String with escaped values, {@code null} if null string input 637 */ 638 public static final String escapeJson(final String input) { 639 return ESCAPE_JSON.translate(input); 640 } 641 642 /** 643 * Escapes the characters in a {@code String} using XML entities. 644 * 645 * <p>For example: {@code "bread" & "butter"} => 646 * {@code "bread" & "butter"}. 647 * </p> 648 * 649 * <p>Note that XML 1.0 is a text-only format: it cannot represent control 650 * characters or unpaired Unicode surrogate code points, even after escaping. 651 * {@code escapeXml10} will remove characters that do not fit in the 652 * following ranges:</p> 653 * 654 * <p>{@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p> 655 * 656 * <p>Though not strictly necessary, {@code escapeXml10} will escape 657 * characters in the following ranges:</p> 658 * 659 * <p>{@code [#x7F-#x84] | [#x86-#x9F]}</p> 660 * 661 * <p>The returned string can be inserted into a valid XML 1.0 or XML 1.1 662 * document. If you want to allow more non-text characters in an XML 1.1 663 * document, use {@link #escapeXml11(String)}.</p> 664 * 665 * @param input the {@code String} to escape, may be null 666 * @return a new escaped {@code String}, {@code null} if null string input 667 * @see #unescapeXml(String) 668 */ 669 public static String escapeXml10(final String input) { 670 return ESCAPE_XML10.translate(input); 671 } 672 673 /** 674 * Escapes the characters in a {@code String} using XML entities. 675 * 676 * <p>For example: {@code "bread" & "butter"} => 677 * {@code "bread" & "butter"}. 678 * </p> 679 * 680 * <p>XML 1.1 can represent certain control characters, but it cannot represent 681 * the null byte or unpaired Unicode surrogate code points, even after escaping. 682 * {@code escapeXml11} will remove characters that do not fit in the following 683 * ranges:</p> 684 * 685 * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p> 686 * 687 * <p>{@code escapeXml11} will escape characters in the following ranges:</p> 688 * 689 * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p> 690 * 691 * <p>The returned string can be inserted into a valid XML 1.1 document. Do not 692 * use it for XML 1.0 documents.</p> 693 * 694 * @param input the {@code String} to escape, may be null 695 * @return a new escaped {@code String}, {@code null} if null string input 696 * @see #unescapeXml(String) 697 */ 698 public static String escapeXml11(final String input) { 699 return ESCAPE_XML11.translate(input); 700 } 701 702 /** 703 * Escapes the characters in a {@code String} using XSI rules. 704 * 705 * <p><b>Beware!</b> In most cases you don't want to escape shell commands but use multi-argument 706 * methods provided by {@link java.lang.ProcessBuilder} or {@link java.lang.Runtime#exec(String[])} 707 * instead.</p> 708 * 709 * <p>Example:</p> 710 * <pre> 711 * input string: He didn't say, "Stop!" 712 * output string: He\ didn\'t\ say,\ \"Stop!\" 713 * </pre> 714 * 715 * @see <a href="http://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a> 716 * @param input String to escape values in, may be null 717 * @return String with escaped values, {@code null} if null string input 718 */ 719 public static final String escapeXSI(final String input) { 720 return ESCAPE_XSI.translate(input); 721 } 722 723 /** 724 * Returns a {@code String} value for an unescaped CSV column. 725 * 726 * <p>If the value is enclosed in double quotes, and contains a comma, newline 727 * or double quote, then quotes are removed. 728 * </p> 729 * 730 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped 731 * to just one double quote.</p> 732 * 733 * <p>If the value is not enclosed in double quotes, or is and does not contain a 734 * comma, newline or double quote, then the String value is returned unchanged.</p> 735 * 736 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 737 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 738 * 739 * @param input the input CSV column String, may be null 740 * @return The input String, with enclosing double quotes removed and embedded double 741 * quotes unescaped, {@code null} if null string input 742 */ 743 public static final String unescapeCsv(final String input) { 744 return UNESCAPE_CSV.translate(input); 745 } 746 747 /** 748 * Unescapes any EcmaScript literals found in the {@code String}. 749 * 750 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 751 * into a newline character, unless the {@code '\'} is preceded by another 752 * {@code '\'}.</p> 753 * 754 * @see #unescapeJava(String) 755 * @param input the {@code String} to unescape, may be null 756 * @return A new unescaped {@code String}, {@code null} if null string input 757 */ 758 public static final String unescapeEcmaScript(final String input) { 759 return UNESCAPE_ECMASCRIPT.translate(input); 760 } 761 762 /** 763 * Unescapes a string containing entity escapes to a string 764 * containing the actual Unicode characters corresponding to the 765 * escapes. Supports only HTML 3.0 entities. 766 * 767 * @param input the {@code String} to unescape, may be null 768 * @return a new unescaped {@code String}, {@code null} if null string input 769 */ 770 public static final String unescapeHtml3(final String input) { 771 return UNESCAPE_HTML3.translate(input); 772 } 773 774 /** 775 * Unescapes a string containing entity escapes to a string 776 * containing the actual Unicode characters corresponding to the 777 * escapes. Supports HTML 4.0 entities. 778 * 779 * <p>For example, the string {@code "<Français>"} 780 * will become {@code "<Fran�ais>"}</p> 781 * 782 * <p>If an entity is unrecognized, it is left alone, and inserted 783 * verbatim into the result string. e.g. {@code ">&zzzz;x"} will 784 * become {@code ">&zzzz;x"}.</p> 785 * 786 * @param input the {@code String} to unescape, may be null 787 * @return a new unescaped {@code String}, {@code null} if null string input 788 */ 789 public static final String unescapeHtml4(final String input) { 790 return UNESCAPE_HTML4.translate(input); 791 } 792 793 /** 794 * Unescapes any Java literals found in the {@code String}. 795 * For example, it will turn a sequence of {@code '\'} and 796 * {@code 'n'} into a newline character, unless the {@code '\'} 797 * is preceded by another {@code '\'}. 798 * 799 * @param input the {@code String} to unescape, may be null 800 * @return a new unescaped {@code String}, {@code null} if null string input 801 */ 802 public static final String unescapeJava(final String input) { 803 return UNESCAPE_JAVA.translate(input); 804 } 805 806 /** 807 * Unescapes any Json literals found in the {@code String}. 808 * 809 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 810 * into a newline character, unless the {@code '\'} is preceded by another 811 * {@code '\'}.</p> 812 * 813 * @see #unescapeJava(String) 814 * @param input the {@code String} to unescape, may be null 815 * @return A new unescaped {@code String}, {@code null} if null string input 816 */ 817 public static final String unescapeJson(final String input) { 818 return UNESCAPE_JSON.translate(input); 819 } 820 821 /** 822 * Unescapes a string containing XML entity escapes to a string 823 * containing the actual Unicode characters corresponding to the 824 * escapes. 825 * 826 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 827 * Does not support DTDs or external entities.</p> 828 * 829 * <p>Note that numerical \\u Unicode codes are unescaped to their respective 830 * Unicode characters. This may change in future releases.</p> 831 * 832 * @param input the {@code String} to unescape, may be null 833 * @return a new unescaped {@code String}, {@code null} if null string input 834 * @see #escapeXml10(String) 835 * @see #escapeXml11(String) 836 */ 837 public static final String unescapeXml(final String input) { 838 return UNESCAPE_XML.translate(input); 839 } 840 841 /** 842 * Unescapes the characters in a {@code String} using XSI rules. 843 * 844 * @see StringEscapeUtils#escapeXSI(String) 845 * @param input the {@code String} to unescape, may be null 846 * @return a new unescaped {@code String}, {@code null} if null string input 847 */ 848 public static final String unescapeXSI(final String input) { 849 return UNESCAPE_XSI.translate(input); 850 } 851 852 /** 853 * {@code StringEscapeUtils} instances should NOT be constructed in 854 * standard programming. 855 * 856 * <p>Instead, the class should be used as:</p> 857 * <pre>StringEscapeUtils.escapeJava("foo");</pre> 858 * 859 * <p>This constructor is public to permit tools that require a JavaBean 860 * instance to operate.</p> 861 */ 862 public StringEscapeUtils() { 863 } 864 865}