001/*
002 * Copyright (C) 2010 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.base;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019
020import com.google.common.annotations.GwtCompatible;
021import com.google.common.annotations.VisibleForTesting;
022import org.checkerframework.checker.nullness.qual.Nullable;
023
024/**
025 * Static utility methods pertaining to {@code String} or {@code CharSequence} instances.
026 *
027 * @author Kevin Bourrillion
028 * @since 3.0
029 */
030@GwtCompatible
031public final class Strings {
032  private Strings() {}
033
034  /**
035   * Returns the given string if it is non-null; the empty string otherwise.
036   *
037   * @param string the string to test and possibly return
038   * @return {@code string} itself if it is non-null; {@code ""} if it is null
039   */
040  public static String nullToEmpty(@Nullable String string) {
041    return Platform.nullToEmpty(string);
042  }
043
044  /**
045   * Returns the given string if it is nonempty; {@code null} otherwise.
046   *
047   * @param string the string to test and possibly return
048   * @return {@code string} itself if it is nonempty; {@code null} if it is empty or null
049   */
050  public static @Nullable String emptyToNull(@Nullable String string) {
051    return Platform.emptyToNull(string);
052  }
053
054  /**
055   * Returns {@code true} if the given string is null or is the empty string.
056   *
057   * <p>Consider normalizing your string references with {@link #nullToEmpty}. If you do, you can
058   * use {@link String#isEmpty()} instead of this method, and you won't need special null-safe forms
059   * of methods like {@link String#toUpperCase} either. Or, if you'd like to normalize "in the other
060   * direction," converting empty strings to {@code null}, you can use {@link #emptyToNull}.
061   *
062   * @param string a string reference to check
063   * @return {@code true} if the string is null or is the empty string
064   */
065  public static boolean isNullOrEmpty(@Nullable String string) {
066    return Platform.stringIsNullOrEmpty(string);
067  }
068
069  /**
070   * Returns a string, of length at least {@code minLength}, consisting of {@code string} prepended
071   * with as many copies of {@code padChar} as are necessary to reach that length. For example,
072   *
073   * <ul>
074   *   <li>{@code padStart("7", 3, '0')} returns {@code "007"}
075   *   <li>{@code padStart("2010", 3, '0')} returns {@code "2010"}
076   * </ul>
077   *
078   * <p>See {@link java.util.Formatter} for a richer set of formatting capabilities.
079   *
080   * @param string the string which should appear at the end of the result
081   * @param minLength the minimum length the resulting string must have. Can be zero or negative, in
082   *     which case the input string is always returned.
083   * @param padChar the character to insert at the beginning of the result until the minimum length
084   *     is reached
085   * @return the padded string
086   */
087  public static String padStart(String string, int minLength, char padChar) {
088    checkNotNull(string); // eager for GWT.
089    if (string.length() >= minLength) {
090      return string;
091    }
092    StringBuilder sb = new StringBuilder(minLength);
093    for (int i = string.length(); i < minLength; i++) {
094      sb.append(padChar);
095    }
096    sb.append(string);
097    return sb.toString();
098  }
099
100  /**
101   * Returns a string, of length at least {@code minLength}, consisting of {@code string} appended
102   * with as many copies of {@code padChar} as are necessary to reach that length. For example,
103   *
104   * <ul>
105   *   <li>{@code padEnd("4.", 5, '0')} returns {@code "4.000"}
106   *   <li>{@code padEnd("2010", 3, '!')} returns {@code "2010"}
107   * </ul>
108   *
109   * <p>See {@link java.util.Formatter} for a richer set of formatting capabilities.
110   *
111   * @param string the string which should appear at the beginning of the result
112   * @param minLength the minimum length the resulting string must have. Can be zero or negative, in
113   *     which case the input string is always returned.
114   * @param padChar the character to append to the end of the result until the minimum length is
115   *     reached
116   * @return the padded string
117   */
118  public static String padEnd(String string, int minLength, char padChar) {
119    checkNotNull(string); // eager for GWT.
120    if (string.length() >= minLength) {
121      return string;
122    }
123    StringBuilder sb = new StringBuilder(minLength);
124    sb.append(string);
125    for (int i = string.length(); i < minLength; i++) {
126      sb.append(padChar);
127    }
128    return sb.toString();
129  }
130
131  /**
132   * Returns a string consisting of a specific number of concatenated copies of an input string. For
133   * example, {@code repeat("hey", 3)} returns the string {@code "heyheyhey"}.
134   *
135   * @param string any non-null string
136   * @param count the number of times to repeat it; a nonnegative integer
137   * @return a string containing {@code string} repeated {@code count} times (the empty string if
138   *     {@code count} is zero)
139   * @throws IllegalArgumentException if {@code count} is negative
140   */
141  public static String repeat(String string, int count) {
142    checkNotNull(string); // eager for GWT.
143
144    if (count <= 1) {
145      checkArgument(count >= 0, "invalid count: %s", count);
146      return (count == 0) ? "" : string;
147    }
148
149    // IF YOU MODIFY THE CODE HERE, you must update StringsRepeatBenchmark
150    final int len = string.length();
151    final long longSize = (long) len * (long) count;
152    final int size = (int) longSize;
153    if (size != longSize) {
154      throw new ArrayIndexOutOfBoundsException("Required array size too large: " + longSize);
155    }
156
157    final char[] array = new char[size];
158    string.getChars(0, len, array, 0);
159    int n;
160    for (n = len; n < size - n; n <<= 1) {
161      System.arraycopy(array, 0, array, n, n);
162    }
163    System.arraycopy(array, 0, array, n, size - n);
164    return new String(array);
165  }
166
167  /**
168   * Returns the longest string {@code prefix} such that {@code a.toString().startsWith(prefix) &&
169   * b.toString().startsWith(prefix)}, taking care not to split surrogate pairs. If {@code a} and
170   * {@code b} have no common prefix, returns the empty string.
171   *
172   * @since 11.0
173   */
174  public static String commonPrefix(CharSequence a, CharSequence b) {
175    checkNotNull(a);
176    checkNotNull(b);
177
178    int maxPrefixLength = Math.min(a.length(), b.length());
179    int p = 0;
180    while (p < maxPrefixLength && a.charAt(p) == b.charAt(p)) {
181      p++;
182    }
183    if (validSurrogatePairAt(a, p - 1) || validSurrogatePairAt(b, p - 1)) {
184      p--;
185    }
186    return a.subSequence(0, p).toString();
187  }
188
189  /**
190   * Returns the longest string {@code suffix} such that {@code a.toString().endsWith(suffix) &&
191   * b.toString().endsWith(suffix)}, taking care not to split surrogate pairs. If {@code a} and
192   * {@code b} have no common suffix, returns the empty string.
193   *
194   * @since 11.0
195   */
196  public static String commonSuffix(CharSequence a, CharSequence b) {
197    checkNotNull(a);
198    checkNotNull(b);
199
200    int maxSuffixLength = Math.min(a.length(), b.length());
201    int s = 0;
202    while (s < maxSuffixLength && a.charAt(a.length() - s - 1) == b.charAt(b.length() - s - 1)) {
203      s++;
204    }
205    if (validSurrogatePairAt(a, a.length() - s - 1)
206        || validSurrogatePairAt(b, b.length() - s - 1)) {
207      s--;
208    }
209    return a.subSequence(a.length() - s, a.length()).toString();
210  }
211
212  /**
213   * Returns the given {@code template} string with each occurrence of {@code "%s"} replaced with
214   * the corresponding argument value from {@code args}; or, if the placeholder and argument counts
215   * do not match, returns a best-effort form of that string. Will not throw an exception under any
216   * circumstances (as long as all arguments' {@code toString} methods successfully return).
217   *
218   * <p><b>Note:</b> For most string-formatting needs, use {@link String#format}, {@link
219   * PrintWriter#format}, and related methods. These support the full range of {@linkplain
220   * Formatter#syntax format specifiers}, and alert you to usage errors by throwing {@link
221   * InvalidFormatException}.
222   *
223   * <p>In certain cases, such as outputting debugging information or constructing a message to be
224   * used for another unchecked exception, an exception during string formatting would serve little
225   * purpose except to supplant the real information you were trying to provide. These are the cases
226   * this method is made for; it instead generates a best-effort string with all supplied argument
227   * values present. This method is also useful in environments such as GWT where {@code
228   * String.format} is not available. As an example, method implementations of the {@link
229   * Preconditions} class use this formatter, for both of the reasons just discussed.
230   *
231   * <p><b>Warning:</b> Only the exact two-character placeholder sequence {@code "%s"} is
232   * recognized.
233   *
234   * @param template a string containing zero or more {@code "%s"} placeholder sequences. {@code
235   *     null} is treated as the four-character string {@code "null"}.
236   * @param args the arguments to be substituted into the message template. The first argument
237   *     specified is substituted for the first occurrence of {@code "%s"} in the template, and so
238   *     forth. A {@code null} argument is converted to the four-character string {@code "null"};
239   *     non-null values are converted to strings using {@link Object#toString()}.
240   * @since 25.1
241   */
242  // TODO(diamondm) consider using Arrays.toString() for array parameters
243  // TODO(diamondm) capture exceptions thrown from arguments' toString methods
244  public static String lenientFormat(
245      @Nullable String template, @Nullable Object @Nullable... args) {
246    template = String.valueOf(template); // null -> "null"
247
248    args = args == null ? new Object[] {"(Object[])null"} : args;
249
250    // start substituting the arguments into the '%s' placeholders
251    StringBuilder builder = new StringBuilder(template.length() + 16 * args.length);
252    int templateStart = 0;
253    int i = 0;
254    while (i < args.length) {
255      int placeholderStart = template.indexOf("%s", templateStart);
256      if (placeholderStart == -1) {
257        break;
258      }
259      builder.append(template, templateStart, placeholderStart);
260      builder.append(args[i++]);
261      templateStart = placeholderStart + 2;
262    }
263    builder.append(template, templateStart, template.length());
264
265    // if we run out of placeholders, append the extra args in square braces
266    if (i < args.length) {
267      builder.append(" [");
268      builder.append(args[i++]);
269      while (i < args.length) {
270        builder.append(", ");
271        builder.append(args[i++]);
272      }
273      builder.append(']');
274    }
275
276    return builder.toString();
277  }
278
279  /**
280   * True when a valid surrogate pair starts at the given {@code index} in the given {@code string}.
281   * Out-of-range indexes return false.
282   */
283  @VisibleForTesting
284  static boolean validSurrogatePairAt(CharSequence string, int index) {
285    return index >= 0
286        && index <= (string.length() - 2)
287        && Character.isHighSurrogate(string.charAt(index))
288        && Character.isLowSurrogate(string.charAt(index + 1));
289  }
290}