001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.lang;
018
019 /**
020 * <p>Operations on Strings that contain words.</p>
021 *
022 * <p>This class tries to handle <code>null</code> input gracefully.
023 * An exception will not be thrown for a <code>null</code> input.
024 * Each method documents its behaviour in more detail.</p>
025 *
026 * @author Apache Jakarta Velocity
027 * @author Apache Software Foundation
028 * @author <a href="mailto:hps@intermeta.de">Henning P. Schmiedehausen</a>
029 * @author Gary Gregory
030 * @since 2.0
031 * @version $Id: WordUtils.java 905636 2010-02-02 14:03:32Z niallp $
032 */
033 public class WordUtils {
034
035 /**
036 * <p><code>WordUtils</code> instances should NOT be constructed in
037 * standard programming. Instead, the class should be used as
038 * <code>WordUtils.wrap("foo bar", 20);</code>.</p>
039 *
040 * <p>This constructor is public to permit tools that require a JavaBean
041 * instance to operate.</p>
042 */
043 public WordUtils() {
044 super();
045 }
046
047 // Wrapping
048 //--------------------------------------------------------------------------
049 // /**
050 // * <p>Wraps a block of text to a specified line length using '\n' as
051 // * a newline.</p>
052 // *
053 // * <p>This method takes a block of text, which might have long lines in it
054 // * and wraps the long lines based on the supplied lineLength parameter.</p>
055 // *
056 // * <p>If a single word is longer than the line length (eg. a URL), it will
057 // * not be broken, and will display beyond the expected width.</p>
058 // *
059 // * <p>If there are tabs in inString, you are going to get results that are
060 // * a bit strange. Tabs are a single character but are displayed as 4 or 8
061 // * spaces. Remove the tabs.</p>
062 // *
063 // * @param str text which is in need of word-wrapping, may be null
064 // * @param lineLength the column to wrap the words at
065 // * @return the text with all the long lines word-wrapped
066 // * <code>null</code> if null string input
067 // */
068 // public static String wrapText(String str, int lineLength) {
069 // return wrap(str, null, lineLength);
070 // }
071
072 // /**
073 // * <p>Wraps a block of text to a specified line length.</p>
074 // *
075 // * <p>This method takes a block of text, which might have long lines in it
076 // * and wraps the long lines based on the supplied lineLength parameter.</p>
077 // *
078 // * <p>If a single word is longer than the wrapColumn (eg. a URL), it will
079 // * not be broken, and will display beyond the expected width.</p>
080 // *
081 // * <p>If there are tabs in inString, you are going to get results that are
082 // * a bit strange. Tabs are a single character but are displayed as 4 or 8
083 // * spaces. Remove the tabs.</p>
084 // *
085 // * @param str text which is in need of word-wrapping, may be null
086 // * @param newLineChars the characters that define a newline, null treated as \n
087 // * @param lineLength the column to wrap the words at
088 // * @return the text with all the long lines word-wrapped
089 // * <code>null</code> if null string input
090 // */
091 // public static String wrapText(String str, String newLineChars, int lineLength) {
092 // if (str == null) {
093 // return null;
094 // }
095 // if (newLineChars == null) {
096 // newLineChars = "\n";
097 // }
098 // StringTokenizer lineTokenizer = new StringTokenizer(str, newLineChars, true);
099 // StringBuffer stringBuffer = new StringBuffer();
100 //
101 // while (lineTokenizer.hasMoreTokens()) {
102 // try {
103 // String nextLine = lineTokenizer.nextToken();
104 //
105 // if (nextLine.length() > lineLength) {
106 // // This line is long enough to be wrapped.
107 // nextLine = wrapLine(nextLine, null, lineLength, false);
108 // }
109 //
110 // stringBuffer.append(nextLine);
111 //
112 // } catch (NoSuchElementException nsee) {
113 // // thrown by nextToken(), but I don't know why it would
114 // break;
115 // }
116 // }
117 //
118 // return stringBuffer.toString();
119 // }
120
121 // Wrapping
122 //-----------------------------------------------------------------------
123 /**
124 * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p>
125 *
126 * <p>New lines will be separated by the system property line separator.
127 * Very long words, such as URLs will <i>not</i> be wrapped.</p>
128 *
129 * <p>Leading spaces on a new line are stripped.
130 * Trailing spaces are not stripped.</p>
131 *
132 * <pre>
133 * WordUtils.wrap(null, *) = null
134 * WordUtils.wrap("", *) = ""
135 * </pre>
136 *
137 * @param str the String to be word wrapped, may be null
138 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1
139 * @return a line with newlines inserted, <code>null</code> if null input
140 */
141 public static String wrap(String str, int wrapLength) {
142 return wrap(str, wrapLength, null, false);
143 }
144
145 /**
146 * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p>
147 *
148 * <p>Leading spaces on a new line are stripped.
149 * Trailing spaces are not stripped.</p>
150 *
151 * <pre>
152 * WordUtils.wrap(null, *, *, *) = null
153 * WordUtils.wrap("", *, *, *) = ""
154 * </pre>
155 *
156 * @param str the String to be word wrapped, may be null
157 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1
158 * @param newLineStr the string to insert for a new line,
159 * <code>null</code> uses the system property line separator
160 * @param wrapLongWords true if long words (such as URLs) should be wrapped
161 * @return a line with newlines inserted, <code>null</code> if null input
162 */
163 public static String wrap(String str, int wrapLength, String newLineStr, boolean wrapLongWords) {
164 if (str == null) {
165 return null;
166 }
167 if (newLineStr == null) {
168 newLineStr = SystemUtils.LINE_SEPARATOR;
169 }
170 if (wrapLength < 1) {
171 wrapLength = 1;
172 }
173 int inputLineLength = str.length();
174 int offset = 0;
175 StringBuffer wrappedLine = new StringBuffer(inputLineLength + 32);
176
177 while ((inputLineLength - offset) > wrapLength) {
178 if (str.charAt(offset) == ' ') {
179 offset++;
180 continue;
181 }
182 int spaceToWrapAt = str.lastIndexOf(' ', wrapLength + offset);
183
184 if (spaceToWrapAt >= offset) {
185 // normal case
186 wrappedLine.append(str.substring(offset, spaceToWrapAt));
187 wrappedLine.append(newLineStr);
188 offset = spaceToWrapAt + 1;
189
190 } else {
191 // really long word or URL
192 if (wrapLongWords) {
193 // wrap really long word one line at a time
194 wrappedLine.append(str.substring(offset, wrapLength + offset));
195 wrappedLine.append(newLineStr);
196 offset += wrapLength;
197 } else {
198 // do not wrap really long word, just extend beyond limit
199 spaceToWrapAt = str.indexOf(' ', wrapLength + offset);
200 if (spaceToWrapAt >= 0) {
201 wrappedLine.append(str.substring(offset, spaceToWrapAt));
202 wrappedLine.append(newLineStr);
203 offset = spaceToWrapAt + 1;
204 } else {
205 wrappedLine.append(str.substring(offset));
206 offset = inputLineLength;
207 }
208 }
209 }
210 }
211
212 // Whatever is left in line is short enough to just pass through
213 wrappedLine.append(str.substring(offset));
214
215 return wrappedLine.toString();
216 }
217
218 // Capitalizing
219 //-----------------------------------------------------------------------
220 /**
221 * <p>Capitalizes all the whitespace separated words in a String.
222 * Only the first letter of each word is changed. To convert the
223 * rest of each word to lowercase at the same time,
224 * use {@link #capitalizeFully(String)}.</p>
225 *
226 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
227 * A <code>null</code> input String returns <code>null</code>.
228 * Capitalization uses the unicode title case, normally equivalent to
229 * upper case.</p>
230 *
231 * <pre>
232 * WordUtils.capitalize(null) = null
233 * WordUtils.capitalize("") = ""
234 * WordUtils.capitalize("i am FINE") = "I Am FINE"
235 * </pre>
236 *
237 * @param str the String to capitalize, may be null
238 * @return capitalized String, <code>null</code> if null String input
239 * @see #uncapitalize(String)
240 * @see #capitalizeFully(String)
241 */
242 public static String capitalize(String str) {
243 return capitalize(str, null);
244 }
245
246 /**
247 * <p>Capitalizes all the delimiter separated words in a String.
248 * Only the first letter of each word is changed. To convert the
249 * rest of each word to lowercase at the same time,
250 * use {@link #capitalizeFully(String, char[])}.</p>
251 *
252 * <p>The delimiters represent a set of characters understood to separate words.
253 * The first string character and the first non-delimiter character after a
254 * delimiter will be capitalized. </p>
255 *
256 * <p>A <code>null</code> input String returns <code>null</code>.
257 * Capitalization uses the unicode title case, normally equivalent to
258 * upper case.</p>
259 *
260 * <pre>
261 * WordUtils.capitalize(null, *) = null
262 * WordUtils.capitalize("", *) = ""
263 * WordUtils.capitalize(*, new char[0]) = *
264 * WordUtils.capitalize("i am fine", null) = "I Am Fine"
265 * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine"
266 * </pre>
267 *
268 * @param str the String to capitalize, may be null
269 * @param delimiters set of characters to determine capitalization, null means whitespace
270 * @return capitalized String, <code>null</code> if null String input
271 * @see #uncapitalize(String)
272 * @see #capitalizeFully(String)
273 * @since 2.1
274 */
275 public static String capitalize(String str, char[] delimiters) {
276 int delimLen = (delimiters == null ? -1 : delimiters.length);
277 if (str == null || str.length() == 0 || delimLen == 0) {
278 return str;
279 }
280 int strLen = str.length();
281 StringBuffer buffer = new StringBuffer(strLen);
282 boolean capitalizeNext = true;
283 for (int i = 0; i < strLen; i++) {
284 char ch = str.charAt(i);
285
286 if (isDelimiter(ch, delimiters)) {
287 buffer.append(ch);
288 capitalizeNext = true;
289 } else if (capitalizeNext) {
290 buffer.append(Character.toTitleCase(ch));
291 capitalizeNext = false;
292 } else {
293 buffer.append(ch);
294 }
295 }
296 return buffer.toString();
297 }
298
299 //-----------------------------------------------------------------------
300 /**
301 * <p>Converts all the whitespace separated words in a String into capitalized words,
302 * that is each word is made up of a titlecase character and then a series of
303 * lowercase characters. </p>
304 *
305 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
306 * A <code>null</code> input String returns <code>null</code>.
307 * Capitalization uses the unicode title case, normally equivalent to
308 * upper case.</p>
309 *
310 * <pre>
311 * WordUtils.capitalizeFully(null) = null
312 * WordUtils.capitalizeFully("") = ""
313 * WordUtils.capitalizeFully("i am FINE") = "I Am Fine"
314 * </pre>
315 *
316 * @param str the String to capitalize, may be null
317 * @return capitalized String, <code>null</code> if null String input
318 */
319 public static String capitalizeFully(String str) {
320 return capitalizeFully(str, null);
321 }
322
323 /**
324 * <p>Converts all the delimiter separated words in a String into capitalized words,
325 * that is each word is made up of a titlecase character and then a series of
326 * lowercase characters. </p>
327 *
328 * <p>The delimiters represent a set of characters understood to separate words.
329 * The first string character and the first non-delimiter character after a
330 * delimiter will be capitalized. </p>
331 *
332 * <p>A <code>null</code> input String returns <code>null</code>.
333 * Capitalization uses the unicode title case, normally equivalent to
334 * upper case.</p>
335 *
336 * <pre>
337 * WordUtils.capitalizeFully(null, *) = null
338 * WordUtils.capitalizeFully("", *) = ""
339 * WordUtils.capitalizeFully(*, null) = *
340 * WordUtils.capitalizeFully(*, new char[0]) = *
341 * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine"
342 * </pre>
343 *
344 * @param str the String to capitalize, may be null
345 * @param delimiters set of characters to determine capitalization, null means whitespace
346 * @return capitalized String, <code>null</code> if null String input
347 * @since 2.1
348 */
349 public static String capitalizeFully(String str, char[] delimiters) {
350 int delimLen = (delimiters == null ? -1 : delimiters.length);
351 if (str == null || str.length() == 0 || delimLen == 0) {
352 return str;
353 }
354 str = str.toLowerCase();
355 return capitalize(str, delimiters);
356 }
357
358 //-----------------------------------------------------------------------
359 /**
360 * <p>Uncapitalizes all the whitespace separated words in a String.
361 * Only the first letter of each word is changed.</p>
362 *
363 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
364 * A <code>null</code> input String returns <code>null</code>.</p>
365 *
366 * <pre>
367 * WordUtils.uncapitalize(null) = null
368 * WordUtils.uncapitalize("") = ""
369 * WordUtils.uncapitalize("I Am FINE") = "i am fINE"
370 * </pre>
371 *
372 * @param str the String to uncapitalize, may be null
373 * @return uncapitalized String, <code>null</code> if null String input
374 * @see #capitalize(String)
375 */
376 public static String uncapitalize(String str) {
377 return uncapitalize(str, null);
378 }
379
380 /**
381 * <p>Uncapitalizes all the whitespace separated words in a String.
382 * Only the first letter of each word is changed.</p>
383 *
384 * <p>The delimiters represent a set of characters understood to separate words.
385 * The first string character and the first non-delimiter character after a
386 * delimiter will be uncapitalized. </p>
387 *
388 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
389 * A <code>null</code> input String returns <code>null</code>.</p>
390 *
391 * <pre>
392 * WordUtils.uncapitalize(null, *) = null
393 * WordUtils.uncapitalize("", *) = ""
394 * WordUtils.uncapitalize(*, null) = *
395 * WordUtils.uncapitalize(*, new char[0]) = *
396 * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE"
397 * </pre>
398 *
399 * @param str the String to uncapitalize, may be null
400 * @param delimiters set of characters to determine uncapitalization, null means whitespace
401 * @return uncapitalized String, <code>null</code> if null String input
402 * @see #capitalize(String)
403 * @since 2.1
404 */
405 public static String uncapitalize(String str, char[] delimiters) {
406 int delimLen = (delimiters == null ? -1 : delimiters.length);
407 if (str == null || str.length() == 0 || delimLen == 0) {
408 return str;
409 }
410 int strLen = str.length();
411 StringBuffer buffer = new StringBuffer(strLen);
412 boolean uncapitalizeNext = true;
413 for (int i = 0; i < strLen; i++) {
414 char ch = str.charAt(i);
415
416 if (isDelimiter(ch, delimiters)) {
417 buffer.append(ch);
418 uncapitalizeNext = true;
419 } else if (uncapitalizeNext) {
420 buffer.append(Character.toLowerCase(ch));
421 uncapitalizeNext = false;
422 } else {
423 buffer.append(ch);
424 }
425 }
426 return buffer.toString();
427 }
428
429 //-----------------------------------------------------------------------
430 /**
431 * <p>Swaps the case of a String using a word based algorithm.</p>
432 *
433 * <ul>
434 * <li>Upper case character converts to Lower case</li>
435 * <li>Title case character converts to Lower case</li>
436 * <li>Lower case character after Whitespace or at start converts to Title case</li>
437 * <li>Other Lower case character converts to Upper case</li>
438 * </ul>
439 *
440 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
441 * A <code>null</code> input String returns <code>null</code>.</p>
442 *
443 * <pre>
444 * StringUtils.swapCase(null) = null
445 * StringUtils.swapCase("") = ""
446 * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone"
447 * </pre>
448 *
449 * @param str the String to swap case, may be null
450 * @return the changed String, <code>null</code> if null String input
451 */
452 public static String swapCase(String str) {
453 int strLen;
454 if (str == null || (strLen = str.length()) == 0) {
455 return str;
456 }
457 StringBuffer buffer = new StringBuffer(strLen);
458
459 boolean whitespace = true;
460 char ch = 0;
461 char tmp = 0;
462
463 for (int i = 0; i < strLen; i++) {
464 ch = str.charAt(i);
465 if (Character.isUpperCase(ch)) {
466 tmp = Character.toLowerCase(ch);
467 } else if (Character.isTitleCase(ch)) {
468 tmp = Character.toLowerCase(ch);
469 } else if (Character.isLowerCase(ch)) {
470 if (whitespace) {
471 tmp = Character.toTitleCase(ch);
472 } else {
473 tmp = Character.toUpperCase(ch);
474 }
475 } else {
476 tmp = ch;
477 }
478 buffer.append(tmp);
479 whitespace = Character.isWhitespace(ch);
480 }
481 return buffer.toString();
482 }
483
484 //-----------------------------------------------------------------------
485 /**
486 * <p>Extracts the initial letters from each word in the String.</p>
487 *
488 * <p>The first letter of the string and all first letters after
489 * whitespace are returned as a new string.
490 * Their case is not changed.</p>
491 *
492 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
493 * A <code>null</code> input String returns <code>null</code>.</p>
494 *
495 * <pre>
496 * WordUtils.initials(null) = null
497 * WordUtils.initials("") = ""
498 * WordUtils.initials("Ben John Lee") = "BJL"
499 * WordUtils.initials("Ben J.Lee") = "BJ"
500 * </pre>
501 *
502 * @param str the String to get initials from, may be null
503 * @return String of initial letters, <code>null</code> if null String input
504 * @see #initials(String,char[])
505 * @since 2.2
506 */
507 public static String initials(String str) {
508 return initials(str, null);
509 }
510
511 /**
512 * <p>Extracts the initial letters from each word in the String.</p>
513 *
514 * <p>The first letter of the string and all first letters after the
515 * defined delimiters are returned as a new string.
516 * Their case is not changed.</p>
517 *
518 * <p>If the delimiters array is null, then Whitespace is used.
519 * Whitespace is defined by {@link Character#isWhitespace(char)}.
520 * A <code>null</code> input String returns <code>null</code>.
521 * An empty delimiter array returns an empty String.</p>
522 *
523 * <pre>
524 * WordUtils.initials(null, *) = null
525 * WordUtils.initials("", *) = ""
526 * WordUtils.initials("Ben John Lee", null) = "BJL"
527 * WordUtils.initials("Ben J.Lee", null) = "BJ"
528 * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL"
529 * WordUtils.initials(*, new char[0]) = ""
530 * </pre>
531 *
532 * @param str the String to get initials from, may be null
533 * @param delimiters set of characters to determine words, null means whitespace
534 * @return String of initial letters, <code>null</code> if null String input
535 * @see #initials(String)
536 * @since 2.2
537 */
538 public static String initials(String str, char[] delimiters) {
539 if (str == null || str.length() == 0) {
540 return str;
541 }
542 if (delimiters != null && delimiters.length == 0) {
543 return "";
544 }
545 int strLen = str.length();
546 char[] buf = new char[strLen / 2 + 1];
547 int count = 0;
548 boolean lastWasGap = true;
549 for (int i = 0; i < strLen; i++) {
550 char ch = str.charAt(i);
551
552 if (isDelimiter(ch, delimiters)) {
553 lastWasGap = true;
554 } else if (lastWasGap) {
555 buf[count++] = ch;
556 lastWasGap = false;
557 } else {
558 // ignore ch
559 }
560 }
561 return new String(buf, 0, count);
562 }
563
564 //-----------------------------------------------------------------------
565 /**
566 * Is the character a delimiter.
567 *
568 * @param ch the character to check
569 * @param delimiters the delimiters
570 * @return true if it is a delimiter
571 */
572 private static boolean isDelimiter(char ch, char[] delimiters) {
573 if (delimiters == null) {
574 return Character.isWhitespace(ch);
575 }
576 for (int i = 0, isize = delimiters.length; i < isize; i++) {
577 if (ch == delimiters[i]) {
578 return true;
579 }
580 }
581 return false;
582 }
583
584 //-----------------------------------------------------------------------
585 /**
586 * Abbreviates a string nicely.
587 *
588 * This method searches for the first space after the lower limit and abbreviates
589 * the String there. It will also append any String passed as a parameter
590 * to the end of the String. The upper limit can be specified to forcibly
591 * abbreviate a String.
592 *
593 * @param str the string to be abbreviated. If null is passed, null is returned.
594 * If the empty String is passed, the empty string is returned.
595 * @param lower the lower limit.
596 * @param upper the upper limit; specify -1 if no limit is desired.
597 * If the upper limit is lower than the lower limit, it will be
598 * adjusted to be the same as the lower limit.
599 * @param appendToEnd String to be appended to the end of the abbreviated string.
600 * This is appended ONLY if the string was indeed abbreviated.
601 * The append does not count towards the lower or upper limits.
602 * @return the abbreviated String.
603 * @since 2.4
604 */
605 public static String abbreviate(String str, int lower, int upper, String appendToEnd) {
606 // initial parameter checks
607 if (str == null) {
608 return null;
609 }
610 if (str.length() == 0) {
611 return StringUtils.EMPTY;
612 }
613
614 // if the lower value is greater than the length of the string,
615 // set to the length of the string
616 if (lower > str.length()) {
617 lower = str.length();
618 }
619 // if the upper value is -1 (i.e. no limit) or is greater
620 // than the length of the string, set to the length of the string
621 if (upper == -1 || upper > str.length()) {
622 upper = str.length();
623 }
624 // if upper is less than lower, raise it to lower
625 if (upper < lower) {
626 upper = lower;
627 }
628
629 StringBuffer result = new StringBuffer();
630 int index = StringUtils.indexOf(str, " ", lower);
631 if (index == -1) {
632 result.append(str.substring(0, upper));
633 // only if abbreviation has occured do we append the appendToEnd value
634 if (upper != str.length()) {
635 result.append(StringUtils.defaultString(appendToEnd));
636 }
637 } else if (index > upper) {
638 result.append(str.substring(0, upper));
639 result.append(StringUtils.defaultString(appendToEnd));
640 } else {
641 result.append(str.substring(0, index));
642 result.append(StringUtils.defaultString(appendToEnd));
643 }
644 return result.toString();
645 }
646
647 }