001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.lang;
018
019 import java.io.IOException;
020 import java.io.StringWriter;
021 import java.io.Writer;
022 import java.util.Locale;
023
024 import org.apache.commons.lang.exception.NestableRuntimeException;
025 import org.apache.commons.lang.text.StrBuilder;
026
027 /**
028 * <p>Escapes and unescapes <code>String</code>s for
029 * Java, Java Script, HTML, XML, and SQL.</p>
030 *
031 * <p>#ThreadSafe#</p>
032 * @author Apache Software Foundation
033 * @author Apache Jakarta Turbine
034 * @author Purple Technology
035 * @author <a href="mailto:alex@purpletech.com">Alexander Day Chaffee</a>
036 * @author Antony Riley
037 * @author Helge Tesgaard
038 * @author <a href="sean@boohai.com">Sean Brown</a>
039 * @author <a href="mailto:ggregory@seagullsw.com">Gary Gregory</a>
040 * @author Phil Steitz
041 * @author Pete Gieser
042 * @since 2.0
043 * @version $Id: StringEscapeUtils.java 1057072 2011-01-10 01:55:57Z niallp $
044 */
045 public class StringEscapeUtils {
046
047 private static final char CSV_DELIMITER = ',';
048 private static final char CSV_QUOTE = '"';
049 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
050 private static final char[] CSV_SEARCH_CHARS = new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};
051
052 /**
053 * <p><code>StringEscapeUtils</code> instances should NOT be constructed in
054 * standard programming.</p>
055 *
056 * <p>Instead, the class should be used as:
057 * <pre>StringEscapeUtils.escapeJava("foo");</pre></p>
058 *
059 * <p>This constructor is public to permit tools that require a JavaBean
060 * instance to operate.</p>
061 */
062 public StringEscapeUtils() {
063 super();
064 }
065
066 // Java and JavaScript
067 //--------------------------------------------------------------------------
068 /**
069 * <p>Escapes the characters in a <code>String</code> using Java String rules.</p>
070 *
071 * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
072 *
073 * <p>So a tab becomes the characters <code>'\\'</code> and
074 * <code>'t'</code>.</p>
075 *
076 * <p>The only difference between Java strings and JavaScript strings
077 * is that in JavaScript, a single quote must be escaped.</p>
078 *
079 * <p>Example:
080 * <pre>
081 * input string: He didn't say, "Stop!"
082 * output string: He didn't say, \"Stop!\"
083 * </pre>
084 * </p>
085 *
086 * @param str String to escape values in, may be null
087 * @return String with escaped values, <code>null</code> if null string input
088 */
089 public static String escapeJava(String str) {
090 return escapeJavaStyleString(str, false, false);
091 }
092
093 /**
094 * <p>Escapes the characters in a <code>String</code> using Java String rules to
095 * a <code>Writer</code>.</p>
096 *
097 * <p>A <code>null</code> string input has no effect.</p>
098 *
099 * @see #escapeJava(java.lang.String)
100 * @param out Writer to write escaped string into
101 * @param str String to escape values in, may be null
102 * @throws IllegalArgumentException if the Writer is <code>null</code>
103 * @throws IOException if error occurs on underlying Writer
104 */
105 public static void escapeJava(Writer out, String str) throws IOException {
106 escapeJavaStyleString(out, str, false, false);
107 }
108
109 /**
110 * <p>Escapes the characters in a <code>String</code> using JavaScript String rules.</p>
111 * <p>Escapes any values it finds into their JavaScript String form.
112 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
113 *
114 * <p>So a tab becomes the characters <code>'\\'</code> and
115 * <code>'t'</code>.</p>
116 *
117 * <p>The only difference between Java strings and JavaScript strings
118 * is that in JavaScript, a single quote must be escaped.</p>
119 *
120 * <p>Example:
121 * <pre>
122 * input string: He didn't say, "Stop!"
123 * output string: He didn\'t say, \"Stop!\"
124 * </pre>
125 * </p>
126 *
127 * @param str String to escape values in, may be null
128 * @return String with escaped values, <code>null</code> if null string input
129 */
130 public static String escapeJavaScript(String str) {
131 return escapeJavaStyleString(str, true, true);
132 }
133
134 /**
135 * <p>Escapes the characters in a <code>String</code> using JavaScript String rules
136 * to a <code>Writer</code>.</p>
137 *
138 * <p>A <code>null</code> string input has no effect.</p>
139 *
140 * @see #escapeJavaScript(java.lang.String)
141 * @param out Writer to write escaped string into
142 * @param str String to escape values in, may be null
143 * @throws IllegalArgumentException if the Writer is <code>null</code>
144 * @throws IOException if error occurs on underlying Writer
145 **/
146 public static void escapeJavaScript(Writer out, String str) throws IOException {
147 escapeJavaStyleString(out, str, true, true);
148 }
149
150 /**
151 * <p>Worker method for the {@link #escapeJavaScript(String)} method.</p>
152 *
153 * @param str String to escape values in, may be null
154 * @param escapeSingleQuotes escapes single quotes if <code>true</code>
155 * @param escapeForwardSlash TODO
156 * @return the escaped string
157 */
158 private static String escapeJavaStyleString(String str, boolean escapeSingleQuotes, boolean escapeForwardSlash) {
159 if (str == null) {
160 return null;
161 }
162 try {
163 StringWriter writer = new StringWriter(str.length() * 2);
164 escapeJavaStyleString(writer, str, escapeSingleQuotes, escapeForwardSlash);
165 return writer.toString();
166 } catch (IOException ioe) {
167 // this should never ever happen while writing to a StringWriter
168 throw new UnhandledException(ioe);
169 }
170 }
171
172 /**
173 * <p>Worker method for the {@link #escapeJavaScript(String)} method.</p>
174 *
175 * @param out write to receieve the escaped string
176 * @param str String to escape values in, may be null
177 * @param escapeSingleQuote escapes single quotes if <code>true</code>
178 * @param escapeForwardSlash TODO
179 * @throws IOException if an IOException occurs
180 */
181 private static void escapeJavaStyleString(Writer out, String str, boolean escapeSingleQuote,
182 boolean escapeForwardSlash) throws IOException {
183 if (out == null) {
184 throw new IllegalArgumentException("The Writer must not be null");
185 }
186 if (str == null) {
187 return;
188 }
189 int sz;
190 sz = str.length();
191 for (int i = 0; i < sz; i++) {
192 char ch = str.charAt(i);
193
194 // handle unicode
195 if (ch > 0xfff) {
196 out.write("\\u" + hex(ch));
197 } else if (ch > 0xff) {
198 out.write("\\u0" + hex(ch));
199 } else if (ch > 0x7f) {
200 out.write("\\u00" + hex(ch));
201 } else if (ch < 32) {
202 switch (ch) {
203 case '\b' :
204 out.write('\\');
205 out.write('b');
206 break;
207 case '\n' :
208 out.write('\\');
209 out.write('n');
210 break;
211 case '\t' :
212 out.write('\\');
213 out.write('t');
214 break;
215 case '\f' :
216 out.write('\\');
217 out.write('f');
218 break;
219 case '\r' :
220 out.write('\\');
221 out.write('r');
222 break;
223 default :
224 if (ch > 0xf) {
225 out.write("\\u00" + hex(ch));
226 } else {
227 out.write("\\u000" + hex(ch));
228 }
229 break;
230 }
231 } else {
232 switch (ch) {
233 case '\'' :
234 if (escapeSingleQuote) {
235 out.write('\\');
236 }
237 out.write('\'');
238 break;
239 case '"' :
240 out.write('\\');
241 out.write('"');
242 break;
243 case '\\' :
244 out.write('\\');
245 out.write('\\');
246 break;
247 case '/' :
248 if (escapeForwardSlash) {
249 out.write('\\');
250 }
251 out.write('/');
252 break;
253 default :
254 out.write(ch);
255 break;
256 }
257 }
258 }
259 }
260
261 /**
262 * <p>Returns an upper case hexadecimal <code>String</code> for the given
263 * character.</p>
264 *
265 * @param ch The character to convert.
266 * @return An upper case hexadecimal <code>String</code>
267 */
268 private static String hex(char ch) {
269 return Integer.toHexString(ch).toUpperCase(Locale.ENGLISH);
270 }
271
272 /**
273 * <p>Unescapes any Java literals found in the <code>String</code>.
274 * For example, it will turn a sequence of <code>'\'</code> and
275 * <code>'n'</code> into a newline character, unless the <code>'\'</code>
276 * is preceded by another <code>'\'</code>.</p>
277 *
278 * @param str the <code>String</code> to unescape, may be null
279 * @return a new unescaped <code>String</code>, <code>null</code> if null string input
280 */
281 public static String unescapeJava(String str) {
282 if (str == null) {
283 return null;
284 }
285 try {
286 StringWriter writer = new StringWriter(str.length());
287 unescapeJava(writer, str);
288 return writer.toString();
289 } catch (IOException ioe) {
290 // this should never ever happen while writing to a StringWriter
291 throw new UnhandledException(ioe);
292 }
293 }
294
295 /**
296 * <p>Unescapes any Java literals found in the <code>String</code> to a
297 * <code>Writer</code>.</p>
298 *
299 * <p>For example, it will turn a sequence of <code>'\'</code> and
300 * <code>'n'</code> into a newline character, unless the <code>'\'</code>
301 * is preceded by another <code>'\'</code>.</p>
302 *
303 * <p>A <code>null</code> string input has no effect.</p>
304 *
305 * @param out the <code>Writer</code> used to output unescaped characters
306 * @param str the <code>String</code> to unescape, may be null
307 * @throws IllegalArgumentException if the Writer is <code>null</code>
308 * @throws IOException if error occurs on underlying Writer
309 */
310 public static void unescapeJava(Writer out, String str) throws IOException {
311 if (out == null) {
312 throw new IllegalArgumentException("The Writer must not be null");
313 }
314 if (str == null) {
315 return;
316 }
317 int sz = str.length();
318 StrBuilder unicode = new StrBuilder(4);
319 boolean hadSlash = false;
320 boolean inUnicode = false;
321 for (int i = 0; i < sz; i++) {
322 char ch = str.charAt(i);
323 if (inUnicode) {
324 // if in unicode, then we're reading unicode
325 // values in somehow
326 unicode.append(ch);
327 if (unicode.length() == 4) {
328 // unicode now contains the four hex digits
329 // which represents our unicode character
330 try {
331 int value = Integer.parseInt(unicode.toString(), 16);
332 out.write((char) value);
333 unicode.setLength(0);
334 inUnicode = false;
335 hadSlash = false;
336 } catch (NumberFormatException nfe) {
337 throw new NestableRuntimeException("Unable to parse unicode value: " + unicode, nfe);
338 }
339 }
340 continue;
341 }
342 if (hadSlash) {
343 // handle an escaped value
344 hadSlash = false;
345 switch (ch) {
346 case '\\':
347 out.write('\\');
348 break;
349 case '\'':
350 out.write('\'');
351 break;
352 case '\"':
353 out.write('"');
354 break;
355 case 'r':
356 out.write('\r');
357 break;
358 case 'f':
359 out.write('\f');
360 break;
361 case 't':
362 out.write('\t');
363 break;
364 case 'n':
365 out.write('\n');
366 break;
367 case 'b':
368 out.write('\b');
369 break;
370 case 'u':
371 {
372 // uh-oh, we're in unicode country....
373 inUnicode = true;
374 break;
375 }
376 default :
377 out.write(ch);
378 break;
379 }
380 continue;
381 } else if (ch == '\\') {
382 hadSlash = true;
383 continue;
384 }
385 out.write(ch);
386 }
387 if (hadSlash) {
388 // then we're in the weird case of a \ at the end of the
389 // string, let's output it anyway.
390 out.write('\\');
391 }
392 }
393
394 /**
395 * <p>Unescapes any JavaScript literals found in the <code>String</code>.</p>
396 *
397 * <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'</code>
398 * into a newline character, unless the <code>'\'</code> is preceded by another
399 * <code>'\'</code>.</p>
400 *
401 * @see #unescapeJava(String)
402 * @param str the <code>String</code> to unescape, may be null
403 * @return A new unescaped <code>String</code>, <code>null</code> if null string input
404 */
405 public static String unescapeJavaScript(String str) {
406 return unescapeJava(str);
407 }
408
409 /**
410 * <p>Unescapes any JavaScript literals found in the <code>String</code> to a
411 * <code>Writer</code>.</p>
412 *
413 * <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'</code>
414 * into a newline character, unless the <code>'\'</code> is preceded by another
415 * <code>'\'</code>.</p>
416 *
417 * <p>A <code>null</code> string input has no effect.</p>
418 *
419 * @see #unescapeJava(Writer,String)
420 * @param out the <code>Writer</code> used to output unescaped characters
421 * @param str the <code>String</code> to unescape, may be null
422 * @throws IllegalArgumentException if the Writer is <code>null</code>
423 * @throws IOException if error occurs on underlying Writer
424 */
425 public static void unescapeJavaScript(Writer out, String str) throws IOException {
426 unescapeJava(out, str);
427 }
428
429 // HTML and XML
430 //--------------------------------------------------------------------------
431 /**
432 * <p>Escapes the characters in a <code>String</code> using HTML entities.</p>
433 *
434 * <p>
435 * For example:
436 * </p>
437 * <p><code>"bread" & "butter"</code></p>
438 * becomes:
439 * <p>
440 * <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>.
441 * </p>
442 *
443 * <p>Supports all known HTML 4.0 entities, including funky accents.
444 * Note that the commonly used apostrophe escape character (&apos;)
445 * is not a legal entity and so is not supported). </p>
446 *
447 * @param str the <code>String</code> to escape, may be null
448 * @return a new escaped <code>String</code>, <code>null</code> if null string input
449 *
450 * @see #unescapeHtml(String)
451 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
452 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
453 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
454 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
455 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
456 */
457 public static String escapeHtml(String str) {
458 if (str == null) {
459 return null;
460 }
461 try {
462 StringWriter writer = new StringWriter ((int)(str.length() * 1.5));
463 escapeHtml(writer, str);
464 return writer.toString();
465 } catch (IOException ioe) {
466 //should be impossible
467 throw new UnhandledException(ioe);
468 }
469 }
470
471 /**
472 * <p>Escapes the characters in a <code>String</code> using HTML entities and writes
473 * them to a <code>Writer</code>.</p>
474 *
475 * <p>
476 * For example:
477 * </p>
478 * <code>"bread" & "butter"</code>
479 * <p>becomes:</p>
480 * <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>.
481 *
482 * <p>Supports all known HTML 4.0 entities, including funky accents.
483 * Note that the commonly used apostrophe escape character (&apos;)
484 * is not a legal entity and so is not supported). </p>
485 *
486 * @param writer the writer receiving the escaped string, not null
487 * @param string the <code>String</code> to escape, may be null
488 * @throws IllegalArgumentException if the writer is null
489 * @throws IOException when <code>Writer</code> passed throws the exception from
490 * calls to the {@link Writer#write(int)} methods.
491 *
492 * @see #escapeHtml(String)
493 * @see #unescapeHtml(String)
494 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
495 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
496 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
497 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
498 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
499 */
500 public static void escapeHtml(Writer writer, String string) throws IOException {
501 if (writer == null ) {
502 throw new IllegalArgumentException ("The Writer must not be null.");
503 }
504 if (string == null) {
505 return;
506 }
507 Entities.HTML40.escape(writer, string);
508 }
509
510 //-----------------------------------------------------------------------
511 /**
512 * <p>Unescapes a string containing entity escapes to a string
513 * containing the actual Unicode characters corresponding to the
514 * escapes. Supports HTML 4.0 entities.</p>
515 *
516 * <p>For example, the string "&lt;Fran&ccedil;ais&gt;"
517 * will become "<Français>"</p>
518 *
519 * <p>If an entity is unrecognized, it is left alone, and inserted
520 * verbatim into the result string. e.g. "&gt;&zzzz;x" will
521 * become ">&zzzz;x".</p>
522 *
523 * @param str the <code>String</code> to unescape, may be null
524 * @return a new unescaped <code>String</code>, <code>null</code> if null string input
525 * @see #escapeHtml(Writer, String)
526 */
527 public static String unescapeHtml(String str) {
528 if (str == null) {
529 return null;
530 }
531 try {
532 StringWriter writer = new StringWriter ((int)(str.length() * 1.5));
533 unescapeHtml(writer, str);
534 return writer.toString();
535 } catch (IOException ioe) {
536 //should be impossible
537 throw new UnhandledException(ioe);
538 }
539 }
540
541 /**
542 * <p>Unescapes a string containing entity escapes to a string
543 * containing the actual Unicode characters corresponding to the
544 * escapes. Supports HTML 4.0 entities.</p>
545 *
546 * <p>For example, the string "&lt;Fran&ccedil;ais&gt;"
547 * will become "<Français>"</p>
548 *
549 * <p>If an entity is unrecognized, it is left alone, and inserted
550 * verbatim into the result string. e.g. "&gt;&zzzz;x" will
551 * become ">&zzzz;x".</p>
552 *
553 * @param writer the writer receiving the unescaped string, not null
554 * @param string the <code>String</code> to unescape, may be null
555 * @throws IllegalArgumentException if the writer is null
556 * @throws IOException if an IOException occurs
557 * @see #escapeHtml(String)
558 */
559 public static void unescapeHtml(Writer writer, String string) throws IOException {
560 if (writer == null ) {
561 throw new IllegalArgumentException ("The Writer must not be null.");
562 }
563 if (string == null) {
564 return;
565 }
566 Entities.HTML40.unescape(writer, string);
567 }
568
569 //-----------------------------------------------------------------------
570 /**
571 * <p>Escapes the characters in a <code>String</code> using XML entities.</p>
572 *
573 * <p>For example: <tt>"bread" & "butter"</tt> =>
574 * <tt>&quot;bread&quot; &amp; &quot;butter&quot;</tt>.
575 * </p>
576 *
577 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
578 * Does not support DTDs or external entities.</p>
579 *
580 * <p>Note that unicode characters greater than 0x7f are currently escaped to
581 * their numerical \\u equivalent. This may change in future releases. </p>
582 *
583 * @param writer the writer receiving the unescaped string, not null
584 * @param str the <code>String</code> to escape, may be null
585 * @throws IllegalArgumentException if the writer is null
586 * @throws IOException if there is a problem writing
587 * @see #unescapeXml(java.lang.String)
588 */
589 public static void escapeXml(Writer writer, String str) throws IOException {
590 if (writer == null ) {
591 throw new IllegalArgumentException ("The Writer must not be null.");
592 }
593 if (str == null) {
594 return;
595 }
596 Entities.XML.escape(writer, str);
597 }
598
599 /**
600 * <p>Escapes the characters in a <code>String</code> using XML entities.</p>
601 *
602 * <p>For example: <tt>"bread" & "butter"</tt> =>
603 * <tt>&quot;bread&quot; &amp; &quot;butter&quot;</tt>.
604 * </p>
605 *
606 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
607 * Does not support DTDs or external entities.</p>
608 *
609 * <p>Note that unicode characters greater than 0x7f are currently escaped to
610 * their numerical \\u equivalent. This may change in future releases. </p>
611 *
612 * @param str the <code>String</code> to escape, may be null
613 * @return a new escaped <code>String</code>, <code>null</code> if null string input
614 * @see #unescapeXml(java.lang.String)
615 */
616 public static String escapeXml(String str) {
617 if (str == null) {
618 return null;
619 }
620 return Entities.XML.escape(str);
621 }
622
623 //-----------------------------------------------------------------------
624 /**
625 * <p>Unescapes a string containing XML entity escapes to a string
626 * containing the actual Unicode characters corresponding to the
627 * escapes.</p>
628 *
629 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
630 * Does not support DTDs or external entities.</p>
631 *
632 * <p>Note that numerical \\u unicode codes are unescaped to their respective
633 * unicode characters. This may change in future releases. </p>
634 *
635 * @param writer the writer receiving the unescaped string, not null
636 * @param str the <code>String</code> to unescape, may be null
637 * @throws IllegalArgumentException if the writer is null
638 * @throws IOException if there is a problem writing
639 * @see #escapeXml(String)
640 */
641 public static void unescapeXml(Writer writer, String str) throws IOException {
642 if (writer == null ) {
643 throw new IllegalArgumentException ("The Writer must not be null.");
644 }
645 if (str == null) {
646 return;
647 }
648 Entities.XML.unescape(writer, str);
649 }
650
651 /**
652 * <p>Unescapes a string containing XML entity escapes to a string
653 * containing the actual Unicode characters corresponding to the
654 * escapes.</p>
655 *
656 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
657 * Does not support DTDs or external entities.</p>
658 *
659 * <p>Note that numerical \\u unicode codes are unescaped to their respective
660 * unicode characters. This may change in future releases. </p>
661 *
662 * @param str the <code>String</code> to unescape, may be null
663 * @return a new unescaped <code>String</code>, <code>null</code> if null string input
664 * @see #escapeXml(String)
665 */
666 public static String unescapeXml(String str) {
667 if (str == null) {
668 return null;
669 }
670 return Entities.XML.unescape(str);
671 }
672
673 //-----------------------------------------------------------------------
674 /**
675 * <p>Escapes the characters in a <code>String</code> to be suitable to pass to
676 * an SQL query.</p>
677 *
678 * <p>For example,
679 * <pre>statement.executeQuery("SELECT * FROM MOVIES WHERE TITLE='" +
680 * StringEscapeUtils.escapeSql("McHale's Navy") +
681 * "'");</pre>
682 * </p>
683 *
684 * <p>At present, this method only turns single-quotes into doubled single-quotes
685 * (<code>"McHale's Navy"</code> => <code>"McHale''s Navy"</code>). It does not
686 * handle the cases of percent (%) or underscore (_) for use in LIKE clauses.</p>
687 *
688 * see http://www.jguru.com/faq/view.jsp?EID=8881
689 * @param str the string to escape, may be null
690 * @return a new String, escaped for SQL, <code>null</code> if null string input
691 */
692 public static String escapeSql(String str) {
693 if (str == null) {
694 return null;
695 }
696 return StringUtils.replace(str, "'", "''");
697 }
698
699 //-----------------------------------------------------------------------
700
701 /**
702 * <p>Returns a <code>String</code> value for a CSV column enclosed in double quotes,
703 * if required.</p>
704 *
705 * <p>If the value contains a comma, newline or double quote, then the
706 * String value is returned enclosed in double quotes.</p>
707 * </p>
708 *
709 * <p>Any double quote characters in the value are escaped with another double quote.</p>
710 *
711 * <p>If the value does not contain a comma, newline or double quote, then the
712 * String value is returned unchanged.</p>
713 * </p>
714 *
715 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
716 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
717 *
718 * @param str the input CSV column String, may be null
719 * @return the input String, enclosed in double quotes if the value contains a comma,
720 * newline or double quote, <code>null</code> if null string input
721 * @since 2.4
722 */
723 public static String escapeCsv(String str) {
724 if (StringUtils.containsNone(str, CSV_SEARCH_CHARS)) {
725 return str;
726 }
727 try {
728 StringWriter writer = new StringWriter();
729 escapeCsv(writer, str);
730 return writer.toString();
731 } catch (IOException ioe) {
732 // this should never ever happen while writing to a StringWriter
733 throw new UnhandledException(ioe);
734 }
735 }
736
737 /**
738 * <p>Writes a <code>String</code> value for a CSV column enclosed in double quotes,
739 * if required.</p>
740 *
741 * <p>If the value contains a comma, newline or double quote, then the
742 * String value is written enclosed in double quotes.</p>
743 * </p>
744 *
745 * <p>Any double quote characters in the value are escaped with another double quote.</p>
746 *
747 * <p>If the value does not contain a comma, newline or double quote, then the
748 * String value is written unchanged (null values are ignored).</p>
749 * </p>
750 *
751 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
752 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
753 *
754 * @param str the input CSV column String, may be null
755 * @param out Writer to write input string to, enclosed in double quotes if it contains
756 * a comma, newline or double quote
757 * @throws IOException if error occurs on underlying Writer
758 * @since 2.4
759 */
760 public static void escapeCsv(Writer out, String str) throws IOException {
761 if (StringUtils.containsNone(str, CSV_SEARCH_CHARS)) {
762 if (str != null) {
763 out.write(str);
764 }
765 return;
766 }
767 out.write(CSV_QUOTE);
768 for (int i = 0; i < str.length(); i++) {
769 char c = str.charAt(i);
770 if (c == CSV_QUOTE) {
771 out.write(CSV_QUOTE); // escape double quote
772 }
773 out.write(c);
774 }
775 out.write(CSV_QUOTE);
776 }
777
778 /**
779 * <p>Returns a <code>String</code> value for an unescaped CSV column. </p>
780 *
781 * <p>If the value is enclosed in double quotes, and contains a comma, newline
782 * or double quote, then quotes are removed.
783 * </p>
784 *
785 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped
786 * to just one double quote. </p>
787 *
788 * <p>If the value is not enclosed in double quotes, or is and does not contain a
789 * comma, newline or double quote, then the String value is returned unchanged.</p>
790 * </p>
791 *
792 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
793 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
794 *
795 * @param str the input CSV column String, may be null
796 * @return the input String, with enclosing double quotes removed and embedded double
797 * quotes unescaped, <code>null</code> if null string input
798 * @since 2.4
799 */
800 public static String unescapeCsv(String str) {
801 if (str == null) {
802 return null;
803 }
804 try {
805 StringWriter writer = new StringWriter();
806 unescapeCsv(writer, str);
807 return writer.toString();
808 } catch (IOException ioe) {
809 // this should never ever happen while writing to a StringWriter
810 throw new UnhandledException(ioe);
811 }
812 }
813
814 /**
815 * <p>Returns a <code>String</code> value for an unescaped CSV column. </p>
816 *
817 * <p>If the value is enclosed in double quotes, and contains a comma, newline
818 * or double quote, then quotes are removed.
819 * </p>
820 *
821 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped
822 * to just one double quote. </p>
823 *
824 * <p>If the value is not enclosed in double quotes, or is and does not contain a
825 * comma, newline or double quote, then the String value is returned unchanged.</p>
826 * </p>
827 *
828 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
829 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
830 *
831 * @param str the input CSV column String, may be null
832 * @param out Writer to write the input String to, with enclosing double quotes
833 * removed and embedded double quotes unescaped, <code>null</code> if null string input
834 * @throws IOException if error occurs on underlying Writer
835 * @since 2.4
836 */
837 public static void unescapeCsv(Writer out, String str) throws IOException {
838 if (str == null) {
839 return;
840 }
841 if (str.length() < 2) {
842 out.write(str);
843 return;
844 }
845 if ( str.charAt(0) != CSV_QUOTE || str.charAt(str.length() - 1) != CSV_QUOTE ) {
846 out.write(str);
847 return;
848 }
849
850 // strip quotes
851 String quoteless = str.substring(1, str.length() - 1);
852
853 if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) {
854 // deal with escaped quotes; ie) ""
855 str = StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR);
856 }
857
858 out.write(str);
859 }
860
861 }