001/*
002 * Copyright (C) 2008 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.base;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkPositionIndex;
020
021import com.google.common.annotations.GwtCompatible;
022import com.google.common.annotations.GwtIncompatible;
023import com.google.common.annotations.VisibleForTesting;
024import java.util.Arrays;
025import java.util.BitSet;
026
027/**
028 * Determines a true or false value for any Java {@code char} value, just as {@link Predicate} does
029 * for any {@link Object}. Also offers basic text processing methods based on this function.
030 * Implementations are strongly encouraged to be side-effect-free and immutable.
031 *
032 * <p>Throughout the documentation of this class, the phrase "matching character" is used to mean
033 * "any {@code char} value {@code c} for which {@code this.matches(c)} returns {@code true}".
034 *
035 * <p><b>Warning:</b> This class deals only with {@code char} values, that is, <a
036 * href="http://www.unicode.org/glossary/#BMP_character">BMP characters</a>. It does not understand
037 * <a href="http://www.unicode.org/glossary/#supplementary_code_point">supplementary Unicode code
038 * points</a> in the range {@code 0x10000} to {@code 0x10FFFF} which includes the majority of
039 * assigned characters, including important CJK characters and emoji.
040 *
041 * <p>Supplementary characters are <a
042 * href="https://docs.oracle.com/javase/8/docs/api/java/lang/Character.html#supplementary">encoded
043 * into a {@code String} using surrogate pairs</a>, and a {@code CharMatcher} treats these just as
044 * two separate characters. {@link #countIn} counts each supplementary character as 2 {@code char}s.
045 *
046 * <p>For up-to-date Unicode character properties (digit, letter, etc.) and support for
047 * supplementary code points, use ICU4J UCharacter and UnicodeSet (freeze() after building). For
048 * basic text processing based on UnicodeSet use the ICU4J UnicodeSetSpanner.
049 *
050 * <p>Example usages:
051 *
052 * <pre>
053 *   String trimmed = {@link #whitespace() whitespace()}.{@link #trimFrom trimFrom}(userInput);
054 *   if ({@link #ascii() ascii()}.{@link #matchesAllOf matchesAllOf}(s)) { ... }</pre>
055 *
056 * <p>See the Guava User Guide article on <a
057 * href="https://github.com/google/guava/wiki/StringsExplained#charmatcher">{@code CharMatcher}
058 * </a>.
059 *
060 * @author Kevin Bourrillion
061 * @since 1.0
062 */
063@GwtCompatible(emulated = true)
064public abstract class CharMatcher implements Predicate<Character> {
065  /*
066   *           N777777777NO
067   *         N7777777777777N
068   *        M777777777777777N
069   *        $N877777777D77777M
070   *       N M77777777ONND777M
071   *       MN777777777NN  D777
072   *     N7ZN777777777NN ~M7778
073   *    N777777777777MMNN88777N
074   *    N777777777777MNZZZ7777O
075   *    DZN7777O77777777777777
076   *     N7OONND7777777D77777N
077   *      8$M++++?N???$77777$
078   *       M7++++N+M77777777N
079   *        N77O777777777777$                              M
080   *          DNNM$$$$777777N                              D
081   *         N$N:=N$777N7777M                             NZ
082   *        77Z::::N777777777                          ODZZZ
083   *       77N::::::N77777777M                         NNZZZ$
084   *     $777:::::::77777777MN                        ZM8ZZZZZ
085   *     777M::::::Z7777777Z77                        N++ZZZZNN
086   *    7777M:::::M7777777$777M                       $++IZZZZM
087   *   M777$:::::N777777$M7777M                       +++++ZZZDN
088   *     NN$::::::7777$$M777777N                      N+++ZZZZNZ
089   *       N::::::N:7$O:77777777                      N++++ZZZZN
090   *       M::::::::::::N77777777+                   +?+++++ZZZM
091   *       8::::::::::::D77777777M                    O+++++ZZ
092   *        ::::::::::::M777777777N                      O+?D
093   *        M:::::::::::M77777777778                     77=
094   *        D=::::::::::N7777777777N                    777
095   *       INN===::::::=77777777777N                  I777N
096   *      ?777N========N7777777777787M               N7777
097   *      77777$D======N77777777777N777N?         N777777
098   *     I77777$$$N7===M$$77777777$77777777$MMZ77777777N
099   *      $$$$$$$$$$$NIZN$$$$$$$$$M$$7777777777777777ON
100   *       M$$$$$$$$M    M$$$$$$$$N=N$$$$7777777$$$ND
101   *      O77Z$$$$$$$     M$$$$$$$$MNI==$DNNNNM=~N
102   *   7 :N MNN$$$$M$      $$$777$8      8D8I
103   *     NMM.:7O           777777778
104   *                       7777777MN
105   *                       M NO .7:
106   *                       M   :   M
107   *                            8
108   */
109
110  // Constant matcher factory methods
111
112  /**
113   * Matches any character.
114   *
115   * @since 19.0 (since 1.0 as constant {@code ANY})
116   */
117  public static CharMatcher any() {
118    return Any.INSTANCE;
119  }
120
121  /**
122   * Matches no characters.
123   *
124   * @since 19.0 (since 1.0 as constant {@code NONE})
125   */
126  public static CharMatcher none() {
127    return None.INSTANCE;
128  }
129
130  /**
131   * Determines whether a character is whitespace according to the latest Unicode standard, as
132   * illustrated <a
133   * href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
134   * This is not the same definition used by other Java APIs. (See a <a
135   * href="https://goo.gl/Y6SLWx">comparison of several definitions of "whitespace"</a>.)
136   *
137   * <p>All Unicode White_Space characters are on the BMP and thus supported by this API.
138   *
139   * <p><b>Note:</b> as the Unicode definition evolves, we will modify this matcher to keep it up to
140   * date.
141   *
142   * @since 19.0 (since 1.0 as constant {@code WHITESPACE})
143   */
144  public static CharMatcher whitespace() {
145    return Whitespace.INSTANCE;
146  }
147
148  /**
149   * Determines whether a character is a breaking whitespace (that is, a whitespace which can be
150   * interpreted as a break between words for formatting purposes). See {@link #whitespace()} for a
151   * discussion of that term.
152   *
153   * @since 19.0 (since 2.0 as constant {@code BREAKING_WHITESPACE})
154   */
155  public static CharMatcher breakingWhitespace() {
156    return BreakingWhitespace.INSTANCE;
157  }
158
159  /**
160   * Determines whether a character is ASCII, meaning that its code point is less than 128.
161   *
162   * @since 19.0 (since 1.0 as constant {@code ASCII})
163   */
164  public static CharMatcher ascii() {
165    return Ascii.INSTANCE;
166  }
167
168  /**
169   * Determines whether a character is a BMP digit according to <a
170   * href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>. If
171   * you only care to match ASCII digits, you can use {@code inRange('0', '9')}.
172   *
173   * @deprecated Many digits are supplementary characters; see the class documentation.
174   * @since 19.0 (since 1.0 as constant {@code DIGIT})
175   */
176  @Deprecated
177  public static CharMatcher digit() {
178    return Digit.INSTANCE;
179  }
180
181  /**
182   * Determines whether a character is a BMP digit according to {@linkplain Character#isDigit(char)
183   * Java's definition}. If you only care to match ASCII digits, you can use {@code inRange('0',
184   * '9')}.
185   *
186   * @deprecated Many digits are supplementary characters; see the class documentation.
187   * @since 19.0 (since 1.0 as constant {@code JAVA_DIGIT})
188   */
189  @Deprecated
190  public static CharMatcher javaDigit() {
191    return JavaDigit.INSTANCE;
192  }
193
194  /**
195   * Determines whether a character is a BMP letter according to {@linkplain
196   * Character#isLetter(char) Java's definition}. If you only care to match letters of the Latin
197   * alphabet, you can use {@code inRange('a', 'z').or(inRange('A', 'Z'))}.
198   *
199   * @deprecated Most letters are supplementary characters; see the class documentation.
200   * @since 19.0 (since 1.0 as constant {@code JAVA_LETTER})
201   */
202  @Deprecated
203  public static CharMatcher javaLetter() {
204    return JavaLetter.INSTANCE;
205  }
206
207  /**
208   * Determines whether a character is a BMP letter or digit according to {@linkplain
209   * Character#isLetterOrDigit(char) Java's definition}.
210   *
211   * @deprecated Most letters and digits are supplementary characters; see the class documentation.
212   * @since 19.0 (since 1.0 as constant {@code JAVA_LETTER_OR_DIGIT}).
213   */
214  @Deprecated
215  public static CharMatcher javaLetterOrDigit() {
216    return JavaLetterOrDigit.INSTANCE;
217  }
218
219  /**
220   * Determines whether a BMP character is upper case according to {@linkplain
221   * Character#isUpperCase(char) Java's definition}.
222   *
223   * @deprecated Some uppercase characters are supplementary characters; see the class
224   *     documentation.
225   * @since 19.0 (since 1.0 as constant {@code JAVA_UPPER_CASE})
226   */
227  @Deprecated
228  public static CharMatcher javaUpperCase() {
229    return JavaUpperCase.INSTANCE;
230  }
231
232  /**
233   * Determines whether a BMP character is lower case according to {@linkplain
234   * Character#isLowerCase(char) Java's definition}.
235   *
236   * @deprecated Some lowercase characters are supplementary characters; see the class
237   *     documentation.
238   * @since 19.0 (since 1.0 as constant {@code JAVA_LOWER_CASE})
239   */
240  @Deprecated
241  public static CharMatcher javaLowerCase() {
242    return JavaLowerCase.INSTANCE;
243  }
244
245  /**
246   * Determines whether a character is an ISO control character as specified by {@link
247   * Character#isISOControl(char)}.
248   *
249   * <p>All ISO control codes are on the BMP and thus supported by this API.
250   *
251   * @since 19.0 (since 1.0 as constant {@code JAVA_ISO_CONTROL})
252   */
253  public static CharMatcher javaIsoControl() {
254    return JavaIsoControl.INSTANCE;
255  }
256
257  /**
258   * Determines whether a character is invisible; that is, if its Unicode category is any of
259   * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and
260   * PRIVATE_USE according to ICU4J.
261   *
262   * <p>See also the Unicode Default_Ignorable_Code_Point property (available via ICU).
263   *
264   * @deprecated Most invisible characters are supplementary characters; see the class
265   *     documentation.
266   * @since 19.0 (since 1.0 as constant {@code INVISIBLE})
267   */
268  @Deprecated
269  public static CharMatcher invisible() {
270    return Invisible.INSTANCE;
271  }
272
273  /**
274   * Determines whether a character is single-width (not double-width). When in doubt, this matcher
275   * errs on the side of returning {@code false} (that is, it tends to assume a character is
276   * double-width).
277   *
278   * <p><b>Note:</b> as the reference file evolves, we will modify this matcher to keep it up to
279   * date.
280   *
281   * <p>See also <a href="http://www.unicode.org/reports/tr11/">UAX #11 East Asian Width</a>.
282   *
283   * @deprecated Many such characters are supplementary characters; see the class documentation.
284   * @since 19.0 (since 1.0 as constant {@code SINGLE_WIDTH})
285   */
286  @Deprecated
287  public static CharMatcher singleWidth() {
288    return SingleWidth.INSTANCE;
289  }
290
291  // Static factories
292
293  /** Returns a {@code char} matcher that matches only one specified BMP character. */
294  public static CharMatcher is(final char match) {
295    return new Is(match);
296  }
297
298  /**
299   * Returns a {@code char} matcher that matches any character except the BMP character specified.
300   *
301   * <p>To negate another {@code CharMatcher}, use {@link #negate()}.
302   */
303  public static CharMatcher isNot(final char match) {
304    return new IsNot(match);
305  }
306
307  /**
308   * Returns a {@code char} matcher that matches any BMP character present in the given character
309   * sequence. Returns a bogus matcher if the sequence contains supplementary characters.
310   */
311  public static CharMatcher anyOf(final CharSequence sequence) {
312    switch (sequence.length()) {
313      case 0:
314        return none();
315      case 1:
316        return is(sequence.charAt(0));
317      case 2:
318        return isEither(sequence.charAt(0), sequence.charAt(1));
319      default:
320        // TODO(lowasser): is it potentially worth just going ahead and building a precomputed
321        // matcher?
322        return new AnyOf(sequence);
323    }
324  }
325
326  /**
327   * Returns a {@code char} matcher that matches any BMP character not present in the given
328   * character sequence. Returns a bogus matcher if the sequence contains supplementary characters.
329   */
330  public static CharMatcher noneOf(CharSequence sequence) {
331    return anyOf(sequence).negate();
332  }
333
334  /**
335   * Returns a {@code char} matcher that matches any character in a given BMP range (both endpoints
336   * are inclusive). For example, to match any lowercase letter of the English alphabet, use {@code
337   * CharMatcher.inRange('a', 'z')}.
338   *
339   * @throws IllegalArgumentException if {@code endInclusive < startInclusive}
340   */
341  public static CharMatcher inRange(final char startInclusive, final char endInclusive) {
342    return new InRange(startInclusive, endInclusive);
343  }
344
345  /**
346   * Returns a matcher with identical behavior to the given {@link Character}-based predicate, but
347   * which operates on primitive {@code char} instances instead.
348   */
349  public static CharMatcher forPredicate(final Predicate<? super Character> predicate) {
350    return predicate instanceof CharMatcher ? (CharMatcher) predicate : new ForPredicate(predicate);
351  }
352
353  // Constructors
354
355  /**
356   * Constructor for use by subclasses. When subclassing, you may want to override {@code
357   * toString()} to provide a useful description.
358   */
359  protected CharMatcher() {}
360
361  // Abstract methods
362
363  /** Determines a true or false value for the given character. */
364  public abstract boolean matches(char c);
365
366  // Non-static factories
367
368  /** Returns a matcher that matches any character not matched by this matcher. */
369  // @Override under Java 8 but not under Java 7
370  public CharMatcher negate() {
371    return new Negated(this);
372  }
373
374  /**
375   * Returns a matcher that matches any character matched by both this matcher and {@code other}.
376   */
377  public CharMatcher and(CharMatcher other) {
378    return new And(this, other);
379  }
380
381  /**
382   * Returns a matcher that matches any character matched by either this matcher or {@code other}.
383   */
384  public CharMatcher or(CharMatcher other) {
385    return new Or(this, other);
386  }
387
388  /**
389   * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to
390   * query than the original; your mileage may vary. Precomputation takes time and is likely to be
391   * worthwhile only if the precomputed matcher is queried many thousands of times.
392   *
393   * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a
394   * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a
395   * worthwhile tradeoff in a browser.
396   */
397  public CharMatcher precomputed() {
398    return Platform.precomputeCharMatcher(this);
399  }
400
401  private static final int DISTINCT_CHARS = Character.MAX_VALUE - Character.MIN_VALUE + 1;
402
403  /**
404   * This is the actual implementation of {@link #precomputed}, but we bounce calls through a method
405   * on {@link Platform} so that we can have different behavior in GWT.
406   *
407   * <p>This implementation tries to be smart in a number of ways. It recognizes cases where the
408   * negation is cheaper to precompute than the matcher itself; it tries to build small hash tables
409   * for matchers that only match a few characters, and so on. In the worst-case scenario, it
410   * constructs an eight-kilobyte bit array and queries that. In many situations this produces a
411   * matcher which is faster to query than the original.
412   */
413  @GwtIncompatible // SmallCharMatcher
414  CharMatcher precomputedInternal() {
415    final BitSet table = new BitSet();
416    setBits(table);
417    int totalCharacters = table.cardinality();
418    if (totalCharacters * 2 <= DISTINCT_CHARS) {
419      return precomputedPositive(totalCharacters, table, toString());
420    } else {
421      // TODO(lowasser): is it worth it to worry about the last character of large matchers?
422      table.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1);
423      int negatedCharacters = DISTINCT_CHARS - totalCharacters;
424      String suffix = ".negate()";
425      final String description = toString();
426      String negatedDescription =
427          description.endsWith(suffix)
428              ? description.substring(0, description.length() - suffix.length())
429              : description + suffix;
430      return new NegatedFastMatcher(
431          precomputedPositive(negatedCharacters, table, negatedDescription)) {
432        @Override
433        public String toString() {
434          return description;
435        }
436      };
437    }
438  }
439
440  /**
441   * Helper method for {@link #precomputedInternal} that doesn't test if the negation is cheaper.
442   */
443  @GwtIncompatible // SmallCharMatcher
444  private static CharMatcher precomputedPositive(
445      int totalCharacters, BitSet table, String description) {
446    switch (totalCharacters) {
447      case 0:
448        return none();
449      case 1:
450        return is((char) table.nextSetBit(0));
451      case 2:
452        char c1 = (char) table.nextSetBit(0);
453        char c2 = (char) table.nextSetBit(c1 + 1);
454        return isEither(c1, c2);
455      default:
456        return isSmall(totalCharacters, table.length())
457            ? SmallCharMatcher.from(table, description)
458            : new BitSetMatcher(table, description);
459    }
460  }
461
462  @GwtIncompatible // SmallCharMatcher
463  private static boolean isSmall(int totalCharacters, int tableLength) {
464    return totalCharacters <= SmallCharMatcher.MAX_SIZE
465        && tableLength > (totalCharacters * 4 * Character.SIZE);
466    // err on the side of BitSetMatcher
467  }
468
469  /** Sets bits in {@code table} matched by this matcher. */
470  @GwtIncompatible // used only from other GwtIncompatible code
471  void setBits(BitSet table) {
472    for (int c = Character.MAX_VALUE; c >= Character.MIN_VALUE; c--) {
473      if (matches((char) c)) {
474        table.set(c);
475      }
476    }
477  }
478
479  // Text processing routines
480
481  /**
482   * Returns {@code true} if a character sequence contains at least one matching BMP character.
483   * Equivalent to {@code !matchesNoneOf(sequence)}.
484   *
485   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
486   * character, until this returns {@code true} or the end is reached.
487   *
488   * @param sequence the character sequence to examine, possibly empty
489   * @return {@code true} if this matcher matches at least one character in the sequence
490   * @since 8.0
491   */
492  public boolean matchesAnyOf(CharSequence sequence) {
493    return !matchesNoneOf(sequence);
494  }
495
496  /**
497   * Returns {@code true} if a character sequence contains only matching BMP characters.
498   *
499   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
500   * character, until this returns {@code false} or the end is reached.
501   *
502   * @param sequence the character sequence to examine, possibly empty
503   * @return {@code true} if this matcher matches every character in the sequence, including when
504   *     the sequence is empty
505   */
506  public boolean matchesAllOf(CharSequence sequence) {
507    for (int i = sequence.length() - 1; i >= 0; i--) {
508      if (!matches(sequence.charAt(i))) {
509        return false;
510      }
511    }
512    return true;
513  }
514
515  /**
516   * Returns {@code true} if a character sequence contains no matching BMP characters. Equivalent to
517   * {@code !matchesAnyOf(sequence)}.
518   *
519   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
520   * character, until this returns {@code true} or the end is reached.
521   *
522   * @param sequence the character sequence to examine, possibly empty
523   * @return {@code true} if this matcher matches no characters in the sequence, including when the
524   *     sequence is empty
525   */
526  public boolean matchesNoneOf(CharSequence sequence) {
527    return indexIn(sequence) == -1;
528  }
529
530  /**
531   * Returns the index of the first matching BMP character in a character sequence, or {@code -1} if
532   * no matching character is present.
533   *
534   * <p>The default implementation iterates over the sequence in forward order calling {@link
535   * #matches} for each character.
536   *
537   * @param sequence the character sequence to examine from the beginning
538   * @return an index, or {@code -1} if no character matches
539   */
540  public int indexIn(CharSequence sequence) {
541    return indexIn(sequence, 0);
542  }
543
544  /**
545   * Returns the index of the first matching BMP character in a character sequence, starting from a
546   * given position, or {@code -1} if no character matches after that position.
547   *
548   * <p>The default implementation iterates over the sequence in forward order, beginning at {@code
549   * start}, calling {@link #matches} for each character.
550   *
551   * @param sequence the character sequence to examine
552   * @param start the first index to examine; must be nonnegative and no greater than {@code
553   *     sequence.length()}
554   * @return the index of the first matching character, guaranteed to be no less than {@code start},
555   *     or {@code -1} if no character matches
556   * @throws IndexOutOfBoundsException if start is negative or greater than {@code
557   *     sequence.length()}
558   */
559  public int indexIn(CharSequence sequence, int start) {
560    int length = sequence.length();
561    checkPositionIndex(start, length);
562    for (int i = start; i < length; i++) {
563      if (matches(sequence.charAt(i))) {
564        return i;
565      }
566    }
567    return -1;
568  }
569
570  /**
571   * Returns the index of the last matching BMP character in a character sequence, or {@code -1} if
572   * no matching character is present.
573   *
574   * <p>The default implementation iterates over the sequence in reverse order calling {@link
575   * #matches} for each character.
576   *
577   * @param sequence the character sequence to examine from the end
578   * @return an index, or {@code -1} if no character matches
579   */
580  public int lastIndexIn(CharSequence sequence) {
581    for (int i = sequence.length() - 1; i >= 0; i--) {
582      if (matches(sequence.charAt(i))) {
583        return i;
584      }
585    }
586    return -1;
587  }
588
589  /**
590   * Returns the number of matching {@code char}s found in a character sequence.
591   *
592   * <p>Counts 2 per supplementary character, such as for {@link #whitespace}().{@link #negate}().
593   */
594  public int countIn(CharSequence sequence) {
595    int count = 0;
596    for (int i = 0; i < sequence.length(); i++) {
597      if (matches(sequence.charAt(i))) {
598        count++;
599      }
600    }
601    return count;
602  }
603
604  /**
605   * Returns a string containing all non-matching characters of a character sequence, in order. For
606   * example:
607   *
608   * <pre>{@code
609   * CharMatcher.is('a').removeFrom("bazaar")
610   * }</pre>
611   *
612   * ... returns {@code "bzr"}.
613   */
614  public String removeFrom(CharSequence sequence) {
615    String string = sequence.toString();
616    int pos = indexIn(string);
617    if (pos == -1) {
618      return string;
619    }
620
621    char[] chars = string.toCharArray();
622    int spread = 1;
623
624    // This unusual loop comes from extensive benchmarking
625    OUT:
626    while (true) {
627      pos++;
628      while (true) {
629        if (pos == chars.length) {
630          break OUT;
631        }
632        if (matches(chars[pos])) {
633          break;
634        }
635        chars[pos - spread] = chars[pos];
636        pos++;
637      }
638      spread++;
639    }
640    return new String(chars, 0, pos - spread);
641  }
642
643  /**
644   * Returns a string containing all matching BMP characters of a character sequence, in order. For
645   * example:
646   *
647   * <pre>{@code
648   * CharMatcher.is('a').retainFrom("bazaar")
649   * }</pre>
650   *
651   * ... returns {@code "aaa"}.
652   */
653  public String retainFrom(CharSequence sequence) {
654    return negate().removeFrom(sequence);
655  }
656
657  /**
658   * Returns a string copy of the input character sequence, with each matching BMP character
659   * replaced by a given replacement character. For example:
660   *
661   * <pre>{@code
662   * CharMatcher.is('a').replaceFrom("radar", 'o')
663   * }</pre>
664   *
665   * ... returns {@code "rodor"}.
666   *
667   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
668   * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
669   * character.
670   *
671   * @param sequence the character sequence to replace matching characters in
672   * @param replacement the character to append to the result string in place of each matching
673   *     character in {@code sequence}
674   * @return the new string
675   */
676  public String replaceFrom(CharSequence sequence, char replacement) {
677    String string = sequence.toString();
678    int pos = indexIn(string);
679    if (pos == -1) {
680      return string;
681    }
682    char[] chars = string.toCharArray();
683    chars[pos] = replacement;
684    for (int i = pos + 1; i < chars.length; i++) {
685      if (matches(chars[i])) {
686        chars[i] = replacement;
687      }
688    }
689    return new String(chars);
690  }
691
692  /**
693   * Returns a string copy of the input character sequence, with each matching BMP character
694   * replaced by a given replacement sequence. For example:
695   *
696   * <pre>{@code
697   * CharMatcher.is('a').replaceFrom("yaha", "oo")
698   * }</pre>
699   *
700   * ... returns {@code "yoohoo"}.
701   *
702   * <p><b>Note:</b> If the replacement is a fixed string with only one character, you are better
703   * off calling {@link #replaceFrom(CharSequence, char)} directly.
704   *
705   * @param sequence the character sequence to replace matching characters in
706   * @param replacement the characters to append to the result string in place of each matching
707   *     character in {@code sequence}
708   * @return the new string
709   */
710  public String replaceFrom(CharSequence sequence, CharSequence replacement) {
711    int replacementLen = replacement.length();
712    if (replacementLen == 0) {
713      return removeFrom(sequence);
714    }
715    if (replacementLen == 1) {
716      return replaceFrom(sequence, replacement.charAt(0));
717    }
718
719    String string = sequence.toString();
720    int pos = indexIn(string);
721    if (pos == -1) {
722      return string;
723    }
724
725    int len = string.length();
726    StringBuilder buf = new StringBuilder((len * 3 / 2) + 16);
727
728    int oldpos = 0;
729    do {
730      buf.append(string, oldpos, pos);
731      buf.append(replacement);
732      oldpos = pos + 1;
733      pos = indexIn(string, oldpos);
734    } while (pos != -1);
735
736    buf.append(string, oldpos, len);
737    return buf.toString();
738  }
739
740  /**
741   * Returns a substring of the input character sequence that omits all matching BMP characters from
742   * the beginning and from the end of the string. For example:
743   *
744   * <pre>{@code
745   * CharMatcher.anyOf("ab").trimFrom("abacatbab")
746   * }</pre>
747   *
748   * ... returns {@code "cat"}.
749   *
750   * <p>Note that:
751   *
752   * <pre>{@code
753   * CharMatcher.inRange('\0', ' ').trimFrom(str)
754   * }</pre>
755   *
756   * ... is equivalent to {@link String#trim()}.
757   */
758  public String trimFrom(CharSequence sequence) {
759    int len = sequence.length();
760    int first;
761    int last;
762
763    for (first = 0; first < len; first++) {
764      if (!matches(sequence.charAt(first))) {
765        break;
766      }
767    }
768    for (last = len - 1; last > first; last--) {
769      if (!matches(sequence.charAt(last))) {
770        break;
771      }
772    }
773
774    return sequence.subSequence(first, last + 1).toString();
775  }
776
777  /**
778   * Returns a substring of the input character sequence that omits all matching BMP characters from
779   * the beginning of the string. For example:
780   *
781   * <pre>{@code
782   * CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")
783   * }</pre>
784   *
785   * ... returns {@code "catbab"}.
786   */
787  public String trimLeadingFrom(CharSequence sequence) {
788    int len = sequence.length();
789    for (int first = 0; first < len; first++) {
790      if (!matches(sequence.charAt(first))) {
791        return sequence.subSequence(first, len).toString();
792      }
793    }
794    return "";
795  }
796
797  /**
798   * Returns a substring of the input character sequence that omits all matching BMP characters from
799   * the end of the string. For example:
800   *
801   * <pre>{@code
802   * CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")
803   * }</pre>
804   *
805   * ... returns {@code "abacat"}.
806   */
807  public String trimTrailingFrom(CharSequence sequence) {
808    int len = sequence.length();
809    for (int last = len - 1; last >= 0; last--) {
810      if (!matches(sequence.charAt(last))) {
811        return sequence.subSequence(0, last + 1).toString();
812      }
813    }
814    return "";
815  }
816
817  /**
818   * Returns a string copy of the input character sequence, with each group of consecutive matching
819   * BMP characters replaced by a single replacement character. For example:
820   *
821   * <pre>{@code
822   * CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')
823   * }</pre>
824   *
825   * ... returns {@code "b-p-r"}.
826   *
827   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
828   * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
829   * character.
830   *
831   * @param sequence the character sequence to replace matching groups of characters in
832   * @param replacement the character to append to the result string in place of each group of
833   *     matching characters in {@code sequence}
834   * @return the new string
835   */
836  public String collapseFrom(CharSequence sequence, char replacement) {
837    // This implementation avoids unnecessary allocation.
838    int len = sequence.length();
839    for (int i = 0; i < len; i++) {
840      char c = sequence.charAt(i);
841      if (matches(c)) {
842        if (c == replacement && (i == len - 1 || !matches(sequence.charAt(i + 1)))) {
843          // a no-op replacement
844          i++;
845        } else {
846          StringBuilder builder = new StringBuilder(len).append(sequence, 0, i).append(replacement);
847          return finishCollapseFrom(sequence, i + 1, len, replacement, builder, true);
848        }
849      }
850    }
851    // no replacement needed
852    return sequence.toString();
853  }
854
855  /**
856   * Collapses groups of matching characters exactly as {@link #collapseFrom} does, except that
857   * groups of matching BMP characters at the start or end of the sequence are removed without
858   * replacement.
859   */
860  public String trimAndCollapseFrom(CharSequence sequence, char replacement) {
861    // This implementation avoids unnecessary allocation.
862    int len = sequence.length();
863    int first = 0;
864    int last = len - 1;
865
866    while (first < len && matches(sequence.charAt(first))) {
867      first++;
868    }
869
870    while (last > first && matches(sequence.charAt(last))) {
871      last--;
872    }
873
874    return (first == 0 && last == len - 1)
875        ? collapseFrom(sequence, replacement)
876        : finishCollapseFrom(
877            sequence, first, last + 1, replacement, new StringBuilder(last + 1 - first), false);
878  }
879
880  private String finishCollapseFrom(
881      CharSequence sequence,
882      int start,
883      int end,
884      char replacement,
885      StringBuilder builder,
886      boolean inMatchingGroup) {
887    for (int i = start; i < end; i++) {
888      char c = sequence.charAt(i);
889      if (matches(c)) {
890        if (!inMatchingGroup) {
891          builder.append(replacement);
892          inMatchingGroup = true;
893        }
894      } else {
895        builder.append(c);
896        inMatchingGroup = false;
897      }
898    }
899    return builder.toString();
900  }
901
902  /**
903   * @deprecated Provided only to satisfy the {@link Predicate} interface; use {@link #matches}
904   *     instead.
905   */
906  @Deprecated
907  @Override
908  public boolean apply(Character character) {
909    return matches(character);
910  }
911
912  /**
913   * Returns a string representation of this {@code CharMatcher}, such as {@code
914   * CharMatcher.or(WHITESPACE, JAVA_DIGIT)}.
915   */
916  @Override
917  public String toString() {
918    return super.toString();
919  }
920
921  /**
922   * Returns the Java Unicode escape sequence for the given {@code char}, in the form "\u12AB" where
923   * "12AB" is the four hexadecimal digits representing the 16-bit code unit.
924   */
925  private static String showCharacter(char c) {
926    String hex = "0123456789ABCDEF";
927    char[] tmp = {'\\', 'u', '\0', '\0', '\0', '\0'};
928    for (int i = 0; i < 4; i++) {
929      tmp[5 - i] = hex.charAt(c & 0xF);
930      c = (char) (c >> 4);
931    }
932    return String.copyValueOf(tmp);
933  }
934
935  // Fast matchers
936
937  /** A matcher for which precomputation will not yield any significant benefit. */
938  abstract static class FastMatcher extends CharMatcher {
939
940    @Override
941    public final CharMatcher precomputed() {
942      return this;
943    }
944
945    @Override
946    public CharMatcher negate() {
947      return new NegatedFastMatcher(this);
948    }
949  }
950
951  /** {@link FastMatcher} which overrides {@code toString()} with a custom name. */
952  abstract static class NamedFastMatcher extends FastMatcher {
953
954    private final String description;
955
956    NamedFastMatcher(String description) {
957      this.description = checkNotNull(description);
958    }
959
960    @Override
961    public final String toString() {
962      return description;
963    }
964  }
965
966  /** Negation of a {@link FastMatcher}. */
967  static class NegatedFastMatcher extends Negated {
968
969    NegatedFastMatcher(CharMatcher original) {
970      super(original);
971    }
972
973    @Override
974    public final CharMatcher precomputed() {
975      return this;
976    }
977  }
978
979  /** Fast matcher using a {@link BitSet} table of matching characters. */
980  @GwtIncompatible // used only from other GwtIncompatible code
981  private static final class BitSetMatcher extends NamedFastMatcher {
982
983    private final BitSet table;
984
985    private BitSetMatcher(BitSet table, String description) {
986      super(description);
987      if (table.length() + Long.SIZE < table.size()) {
988        table = (BitSet) table.clone();
989        // If only we could actually call BitSet.trimToSize() ourselves...
990      }
991      this.table = table;
992    }
993
994    @Override
995    public boolean matches(char c) {
996      return table.get(c);
997    }
998
999    @Override
1000    void setBits(BitSet bitSet) {
1001      bitSet.or(table);
1002    }
1003  }
1004
1005  // Static constant implementation classes
1006
1007  /** Implementation of {@link #any()}. */
1008  private static final class Any extends NamedFastMatcher {
1009
1010    static final Any INSTANCE = new Any();
1011
1012    private Any() {
1013      super("CharMatcher.any()");
1014    }
1015
1016    @Override
1017    public boolean matches(char c) {
1018      return true;
1019    }
1020
1021    @Override
1022    public int indexIn(CharSequence sequence) {
1023      return (sequence.length() == 0) ? -1 : 0;
1024    }
1025
1026    @Override
1027    public int indexIn(CharSequence sequence, int start) {
1028      int length = sequence.length();
1029      checkPositionIndex(start, length);
1030      return (start == length) ? -1 : start;
1031    }
1032
1033    @Override
1034    public int lastIndexIn(CharSequence sequence) {
1035      return sequence.length() - 1;
1036    }
1037
1038    @Override
1039    public boolean matchesAllOf(CharSequence sequence) {
1040      checkNotNull(sequence);
1041      return true;
1042    }
1043
1044    @Override
1045    public boolean matchesNoneOf(CharSequence sequence) {
1046      return sequence.length() == 0;
1047    }
1048
1049    @Override
1050    public String removeFrom(CharSequence sequence) {
1051      checkNotNull(sequence);
1052      return "";
1053    }
1054
1055    @Override
1056    public String replaceFrom(CharSequence sequence, char replacement) {
1057      char[] array = new char[sequence.length()];
1058      Arrays.fill(array, replacement);
1059      return new String(array);
1060    }
1061
1062    @Override
1063    public String replaceFrom(CharSequence sequence, CharSequence replacement) {
1064      StringBuilder result = new StringBuilder(sequence.length() * replacement.length());
1065      for (int i = 0; i < sequence.length(); i++) {
1066        result.append(replacement);
1067      }
1068      return result.toString();
1069    }
1070
1071    @Override
1072    public String collapseFrom(CharSequence sequence, char replacement) {
1073      return (sequence.length() == 0) ? "" : String.valueOf(replacement);
1074    }
1075
1076    @Override
1077    public String trimFrom(CharSequence sequence) {
1078      checkNotNull(sequence);
1079      return "";
1080    }
1081
1082    @Override
1083    public int countIn(CharSequence sequence) {
1084      return sequence.length();
1085    }
1086
1087    @Override
1088    public CharMatcher and(CharMatcher other) {
1089      return checkNotNull(other);
1090    }
1091
1092    @Override
1093    public CharMatcher or(CharMatcher other) {
1094      checkNotNull(other);
1095      return this;
1096    }
1097
1098    @Override
1099    public CharMatcher negate() {
1100      return none();
1101    }
1102  }
1103
1104  /** Implementation of {@link #none()}. */
1105  private static final class None extends NamedFastMatcher {
1106
1107    static final None INSTANCE = new None();
1108
1109    private None() {
1110      super("CharMatcher.none()");
1111    }
1112
1113    @Override
1114    public boolean matches(char c) {
1115      return false;
1116    }
1117
1118    @Override
1119    public int indexIn(CharSequence sequence) {
1120      checkNotNull(sequence);
1121      return -1;
1122    }
1123
1124    @Override
1125    public int indexIn(CharSequence sequence, int start) {
1126      int length = sequence.length();
1127      checkPositionIndex(start, length);
1128      return -1;
1129    }
1130
1131    @Override
1132    public int lastIndexIn(CharSequence sequence) {
1133      checkNotNull(sequence);
1134      return -1;
1135    }
1136
1137    @Override
1138    public boolean matchesAllOf(CharSequence sequence) {
1139      return sequence.length() == 0;
1140    }
1141
1142    @Override
1143    public boolean matchesNoneOf(CharSequence sequence) {
1144      checkNotNull(sequence);
1145      return true;
1146    }
1147
1148    @Override
1149    public String removeFrom(CharSequence sequence) {
1150      return sequence.toString();
1151    }
1152
1153    @Override
1154    public String replaceFrom(CharSequence sequence, char replacement) {
1155      return sequence.toString();
1156    }
1157
1158    @Override
1159    public String replaceFrom(CharSequence sequence, CharSequence replacement) {
1160      checkNotNull(replacement);
1161      return sequence.toString();
1162    }
1163
1164    @Override
1165    public String collapseFrom(CharSequence sequence, char replacement) {
1166      return sequence.toString();
1167    }
1168
1169    @Override
1170    public String trimFrom(CharSequence sequence) {
1171      return sequence.toString();
1172    }
1173
1174    @Override
1175    public String trimLeadingFrom(CharSequence sequence) {
1176      return sequence.toString();
1177    }
1178
1179    @Override
1180    public String trimTrailingFrom(CharSequence sequence) {
1181      return sequence.toString();
1182    }
1183
1184    @Override
1185    public int countIn(CharSequence sequence) {
1186      checkNotNull(sequence);
1187      return 0;
1188    }
1189
1190    @Override
1191    public CharMatcher and(CharMatcher other) {
1192      checkNotNull(other);
1193      return this;
1194    }
1195
1196    @Override
1197    public CharMatcher or(CharMatcher other) {
1198      return checkNotNull(other);
1199    }
1200
1201    @Override
1202    public CharMatcher negate() {
1203      return any();
1204    }
1205  }
1206
1207  /** Implementation of {@link #whitespace()}. */
1208  @VisibleForTesting
1209  static final class Whitespace extends NamedFastMatcher {
1210
1211    static final String TABLE =
1212        "\u2002\u3000\r\u0085\u200A\u2005\u2000\u3000"
1213            + "\u2029\u000B\u3000\u2008\u2003\u205F\u3000\u1680"
1214            + "\u0009\u0020\u2006\u2001\u202F\u00A0\u000C\u2009"
1215            + "\u3000\u2004\u3000\u3000\u2028\n\u2007\u3000";
1216    static final int MULTIPLIER = 1682554634;
1217    static final int SHIFT = Integer.numberOfLeadingZeros(TABLE.length() - 1);
1218
1219    static final Whitespace INSTANCE = new Whitespace();
1220
1221    Whitespace() {
1222      super("CharMatcher.whitespace()");
1223    }
1224
1225    @Override
1226    public boolean matches(char c) {
1227      return TABLE.charAt((MULTIPLIER * c) >>> SHIFT) == c;
1228    }
1229
1230    @GwtIncompatible // used only from other GwtIncompatible code
1231    @Override
1232    void setBits(BitSet table) {
1233      for (int i = 0; i < TABLE.length(); i++) {
1234        table.set(TABLE.charAt(i));
1235      }
1236    }
1237  }
1238
1239  /** Implementation of {@link #breakingWhitespace()}. */
1240  private static final class BreakingWhitespace extends CharMatcher {
1241
1242    static final CharMatcher INSTANCE = new BreakingWhitespace();
1243
1244    @Override
1245    public boolean matches(char c) {
1246      switch (c) {
1247        case '\t':
1248        case '\n':
1249        case '\013':
1250        case '\f':
1251        case '\r':
1252        case ' ':
1253        case '\u0085':
1254        case '\u1680':
1255        case '\u2028':
1256        case '\u2029':
1257        case '\u205f':
1258        case '\u3000':
1259          return true;
1260        case '\u2007':
1261          return false;
1262        default:
1263          return c >= '\u2000' && c <= '\u200a';
1264      }
1265    }
1266
1267    @Override
1268    public String toString() {
1269      return "CharMatcher.breakingWhitespace()";
1270    }
1271  }
1272
1273  /** Implementation of {@link #ascii()}. */
1274  private static final class Ascii extends NamedFastMatcher {
1275
1276    static final Ascii INSTANCE = new Ascii();
1277
1278    Ascii() {
1279      super("CharMatcher.ascii()");
1280    }
1281
1282    @Override
1283    public boolean matches(char c) {
1284      return c <= '\u007f';
1285    }
1286  }
1287
1288  /** Implementation that matches characters that fall within multiple ranges. */
1289  private static class RangesMatcher extends CharMatcher {
1290
1291    private final String description;
1292    private final char[] rangeStarts;
1293    private final char[] rangeEnds;
1294
1295    RangesMatcher(String description, char[] rangeStarts, char[] rangeEnds) {
1296      this.description = description;
1297      this.rangeStarts = rangeStarts;
1298      this.rangeEnds = rangeEnds;
1299      checkArgument(rangeStarts.length == rangeEnds.length);
1300      for (int i = 0; i < rangeStarts.length; i++) {
1301        checkArgument(rangeStarts[i] <= rangeEnds[i]);
1302        if (i + 1 < rangeStarts.length) {
1303          checkArgument(rangeEnds[i] < rangeStarts[i + 1]);
1304        }
1305      }
1306    }
1307
1308    @Override
1309    public boolean matches(char c) {
1310      int index = Arrays.binarySearch(rangeStarts, c);
1311      if (index >= 0) {
1312        return true;
1313      } else {
1314        index = ~index - 1;
1315        return index >= 0 && c <= rangeEnds[index];
1316      }
1317    }
1318
1319    @Override
1320    public String toString() {
1321      return description;
1322    }
1323  }
1324
1325  /** Implementation of {@link #digit()}. */
1326  private static final class Digit extends RangesMatcher {
1327    // Plug the following UnicodeSet pattern into
1328    // https://unicode.org/cldr/utility/list-unicodeset.jsp
1329    // [[:Nd:]&[:nv=0:]&[\u0000-\uFFFF]]
1330    // and get the zeroes from there.
1331
1332    // Must be in ascending order.
1333    private static final String ZEROES =
1334        "0\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6\u0c66\u0ce6\u0d66\u0de6"
1335            + "\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946\u19d0\u1a80\u1a90\u1b50\u1bb0"
1336            + "\u1c40\u1c50\ua620\ua8d0\ua900\ua9d0\ua9f0\uaa50\uabf0\uff10";
1337
1338    private static char[] zeroes() {
1339      return ZEROES.toCharArray();
1340    }
1341
1342    private static char[] nines() {
1343      char[] nines = new char[ZEROES.length()];
1344      for (int i = 0; i < ZEROES.length(); i++) {
1345        nines[i] = (char) (ZEROES.charAt(i) + 9);
1346      }
1347      return nines;
1348    }
1349
1350    static final Digit INSTANCE = new Digit();
1351
1352    private Digit() {
1353      super("CharMatcher.digit()", zeroes(), nines());
1354    }
1355  }
1356
1357  /** Implementation of {@link #javaDigit()}. */
1358  private static final class JavaDigit extends CharMatcher {
1359
1360    static final JavaDigit INSTANCE = new JavaDigit();
1361
1362    @Override
1363    public boolean matches(char c) {
1364      return Character.isDigit(c);
1365    }
1366
1367    @Override
1368    public String toString() {
1369      return "CharMatcher.javaDigit()";
1370    }
1371  }
1372
1373  /** Implementation of {@link #javaLetter()}. */
1374  private static final class JavaLetter extends CharMatcher {
1375
1376    static final JavaLetter INSTANCE = new JavaLetter();
1377
1378    @Override
1379    public boolean matches(char c) {
1380      return Character.isLetter(c);
1381    }
1382
1383    @Override
1384    public String toString() {
1385      return "CharMatcher.javaLetter()";
1386    }
1387  }
1388
1389  /** Implementation of {@link #javaLetterOrDigit()}. */
1390  private static final class JavaLetterOrDigit extends CharMatcher {
1391
1392    static final JavaLetterOrDigit INSTANCE = new JavaLetterOrDigit();
1393
1394    @Override
1395    public boolean matches(char c) {
1396      return Character.isLetterOrDigit(c);
1397    }
1398
1399    @Override
1400    public String toString() {
1401      return "CharMatcher.javaLetterOrDigit()";
1402    }
1403  }
1404
1405  /** Implementation of {@link #javaUpperCase()}. */
1406  private static final class JavaUpperCase extends CharMatcher {
1407
1408    static final JavaUpperCase INSTANCE = new JavaUpperCase();
1409
1410    @Override
1411    public boolean matches(char c) {
1412      return Character.isUpperCase(c);
1413    }
1414
1415    @Override
1416    public String toString() {
1417      return "CharMatcher.javaUpperCase()";
1418    }
1419  }
1420
1421  /** Implementation of {@link #javaLowerCase()}. */
1422  private static final class JavaLowerCase extends CharMatcher {
1423
1424    static final JavaLowerCase INSTANCE = new JavaLowerCase();
1425
1426    @Override
1427    public boolean matches(char c) {
1428      return Character.isLowerCase(c);
1429    }
1430
1431    @Override
1432    public String toString() {
1433      return "CharMatcher.javaLowerCase()";
1434    }
1435  }
1436
1437  /** Implementation of {@link #javaIsoControl()}. */
1438  private static final class JavaIsoControl extends NamedFastMatcher {
1439
1440    static final JavaIsoControl INSTANCE = new JavaIsoControl();
1441
1442    private JavaIsoControl() {
1443      super("CharMatcher.javaIsoControl()");
1444    }
1445
1446    @Override
1447    public boolean matches(char c) {
1448      return c <= '\u001f' || (c >= '\u007f' && c <= '\u009f');
1449    }
1450  }
1451
1452  /** Implementation of {@link #invisible()}. */
1453  private static final class Invisible extends RangesMatcher {
1454    // Plug the following UnicodeSet pattern into
1455    // https://unicode.org/cldr/utility/list-unicodeset.jsp
1456    // [[[:Zs:][:Zl:][:Zp:][:Cc:][:Cf:][:Cs:][:Co:]]&[\u0000-\uFFFF]]
1457    // with the "Abbreviate" option, and get the ranges from there.
1458    private static final String RANGE_STARTS =
1459        "\u0000\u007f\u00ad\u0600\u061c\u06dd\u070f\u08e2\u1680\u180e\u2000\u2028\u205f\u2066"
1460            + "\u3000\ud800\ufeff\ufff9";
1461    private static final String RANGE_ENDS = // inclusive ends
1462        "\u0020\u00a0\u00ad\u0605\u061c\u06dd\u070f\u08e2\u1680\u180e\u200f\u202f\u2064\u206f"
1463            + "\u3000\uf8ff\ufeff\ufffb";
1464
1465    static final Invisible INSTANCE = new Invisible();
1466
1467    private Invisible() {
1468      super("CharMatcher.invisible()", RANGE_STARTS.toCharArray(), RANGE_ENDS.toCharArray());
1469    }
1470  }
1471
1472  /** Implementation of {@link #singleWidth()}. */
1473  private static final class SingleWidth extends RangesMatcher {
1474
1475    static final SingleWidth INSTANCE = new SingleWidth();
1476
1477    private SingleWidth() {
1478      super(
1479          "CharMatcher.singleWidth()",
1480          "\u0000\u05be\u05d0\u05f3\u0600\u0750\u0e00\u1e00\u2100\ufb50\ufe70\uff61".toCharArray(),
1481          "\u04f9\u05be\u05ea\u05f4\u06ff\u077f\u0e7f\u20af\u213a\ufdff\ufeff\uffdc".toCharArray());
1482    }
1483  }
1484
1485  // Non-static factory implementation classes
1486
1487  /** Implementation of {@link #negate()}. */
1488  private static class Negated extends CharMatcher {
1489
1490    final CharMatcher original;
1491
1492    Negated(CharMatcher original) {
1493      this.original = checkNotNull(original);
1494    }
1495
1496    @Override
1497    public boolean matches(char c) {
1498      return !original.matches(c);
1499    }
1500
1501    @Override
1502    public boolean matchesAllOf(CharSequence sequence) {
1503      return original.matchesNoneOf(sequence);
1504    }
1505
1506    @Override
1507    public boolean matchesNoneOf(CharSequence sequence) {
1508      return original.matchesAllOf(sequence);
1509    }
1510
1511    @Override
1512    public int countIn(CharSequence sequence) {
1513      return sequence.length() - original.countIn(sequence);
1514    }
1515
1516    @GwtIncompatible // used only from other GwtIncompatible code
1517    @Override
1518    void setBits(BitSet table) {
1519      BitSet tmp = new BitSet();
1520      original.setBits(tmp);
1521      tmp.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1);
1522      table.or(tmp);
1523    }
1524
1525    @Override
1526    public CharMatcher negate() {
1527      return original;
1528    }
1529
1530    @Override
1531    public String toString() {
1532      return original + ".negate()";
1533    }
1534  }
1535
1536  /** Implementation of {@link #and(CharMatcher)}. */
1537  private static final class And extends CharMatcher {
1538
1539    final CharMatcher first;
1540    final CharMatcher second;
1541
1542    And(CharMatcher a, CharMatcher b) {
1543      first = checkNotNull(a);
1544      second = checkNotNull(b);
1545    }
1546
1547    @Override
1548    public boolean matches(char c) {
1549      return first.matches(c) && second.matches(c);
1550    }
1551
1552    @GwtIncompatible // used only from other GwtIncompatible code
1553    @Override
1554    void setBits(BitSet table) {
1555      BitSet tmp1 = new BitSet();
1556      first.setBits(tmp1);
1557      BitSet tmp2 = new BitSet();
1558      second.setBits(tmp2);
1559      tmp1.and(tmp2);
1560      table.or(tmp1);
1561    }
1562
1563    @Override
1564    public String toString() {
1565      return "CharMatcher.and(" + first + ", " + second + ")";
1566    }
1567  }
1568
1569  /** Implementation of {@link #or(CharMatcher)}. */
1570  private static final class Or extends CharMatcher {
1571
1572    final CharMatcher first;
1573    final CharMatcher second;
1574
1575    Or(CharMatcher a, CharMatcher b) {
1576      first = checkNotNull(a);
1577      second = checkNotNull(b);
1578    }
1579
1580    @GwtIncompatible // used only from other GwtIncompatible code
1581    @Override
1582    void setBits(BitSet table) {
1583      first.setBits(table);
1584      second.setBits(table);
1585    }
1586
1587    @Override
1588    public boolean matches(char c) {
1589      return first.matches(c) || second.matches(c);
1590    }
1591
1592    @Override
1593    public String toString() {
1594      return "CharMatcher.or(" + first + ", " + second + ")";
1595    }
1596  }
1597
1598  // Static factory implementations
1599
1600  /** Implementation of {@link #is(char)}. */
1601  private static final class Is extends FastMatcher {
1602
1603    private final char match;
1604
1605    Is(char match) {
1606      this.match = match;
1607    }
1608
1609    @Override
1610    public boolean matches(char c) {
1611      return c == match;
1612    }
1613
1614    @Override
1615    public String replaceFrom(CharSequence sequence, char replacement) {
1616      return sequence.toString().replace(match, replacement);
1617    }
1618
1619    @Override
1620    public CharMatcher and(CharMatcher other) {
1621      return other.matches(match) ? this : none();
1622    }
1623
1624    @Override
1625    public CharMatcher or(CharMatcher other) {
1626      return other.matches(match) ? other : super.or(other);
1627    }
1628
1629    @Override
1630    public CharMatcher negate() {
1631      return isNot(match);
1632    }
1633
1634    @GwtIncompatible // used only from other GwtIncompatible code
1635    @Override
1636    void setBits(BitSet table) {
1637      table.set(match);
1638    }
1639
1640    @Override
1641    public String toString() {
1642      return "CharMatcher.is('" + showCharacter(match) + "')";
1643    }
1644  }
1645
1646  /** Implementation of {@link #isNot(char)}. */
1647  private static final class IsNot extends FastMatcher {
1648
1649    private final char match;
1650
1651    IsNot(char match) {
1652      this.match = match;
1653    }
1654
1655    @Override
1656    public boolean matches(char c) {
1657      return c != match;
1658    }
1659
1660    @Override
1661    public CharMatcher and(CharMatcher other) {
1662      return other.matches(match) ? super.and(other) : other;
1663    }
1664
1665    @Override
1666    public CharMatcher or(CharMatcher other) {
1667      return other.matches(match) ? any() : this;
1668    }
1669
1670    @GwtIncompatible // used only from other GwtIncompatible code
1671    @Override
1672    void setBits(BitSet table) {
1673      table.set(0, match);
1674      table.set(match + 1, Character.MAX_VALUE + 1);
1675    }
1676
1677    @Override
1678    public CharMatcher negate() {
1679      return is(match);
1680    }
1681
1682    @Override
1683    public String toString() {
1684      return "CharMatcher.isNot('" + showCharacter(match) + "')";
1685    }
1686  }
1687
1688  private static CharMatcher.IsEither isEither(char c1, char c2) {
1689    return new CharMatcher.IsEither(c1, c2);
1690  }
1691
1692  /** Implementation of {@link #anyOf(CharSequence)} for exactly two characters. */
1693  private static final class IsEither extends FastMatcher {
1694
1695    private final char match1;
1696    private final char match2;
1697
1698    IsEither(char match1, char match2) {
1699      this.match1 = match1;
1700      this.match2 = match2;
1701    }
1702
1703    @Override
1704    public boolean matches(char c) {
1705      return c == match1 || c == match2;
1706    }
1707
1708    @GwtIncompatible // used only from other GwtIncompatible code
1709    @Override
1710    void setBits(BitSet table) {
1711      table.set(match1);
1712      table.set(match2);
1713    }
1714
1715    @Override
1716    public String toString() {
1717      return "CharMatcher.anyOf(\"" + showCharacter(match1) + showCharacter(match2) + "\")";
1718    }
1719  }
1720
1721  /** Implementation of {@link #anyOf(CharSequence)} for three or more characters. */
1722  private static final class AnyOf extends CharMatcher {
1723
1724    private final char[] chars;
1725
1726    public AnyOf(CharSequence chars) {
1727      this.chars = chars.toString().toCharArray();
1728      Arrays.sort(this.chars);
1729    }
1730
1731    @Override
1732    public boolean matches(char c) {
1733      return Arrays.binarySearch(chars, c) >= 0;
1734    }
1735
1736    @Override
1737    @GwtIncompatible // used only from other GwtIncompatible code
1738    void setBits(BitSet table) {
1739      for (char c : chars) {
1740        table.set(c);
1741      }
1742    }
1743
1744    @Override
1745    public String toString() {
1746      StringBuilder description = new StringBuilder("CharMatcher.anyOf(\"");
1747      for (char c : chars) {
1748        description.append(showCharacter(c));
1749      }
1750      description.append("\")");
1751      return description.toString();
1752    }
1753  }
1754
1755  /** Implementation of {@link #inRange(char, char)}. */
1756  private static final class InRange extends FastMatcher {
1757
1758    private final char startInclusive;
1759    private final char endInclusive;
1760
1761    InRange(char startInclusive, char endInclusive) {
1762      checkArgument(endInclusive >= startInclusive);
1763      this.startInclusive = startInclusive;
1764      this.endInclusive = endInclusive;
1765    }
1766
1767    @Override
1768    public boolean matches(char c) {
1769      return startInclusive <= c && c <= endInclusive;
1770    }
1771
1772    @GwtIncompatible // used only from other GwtIncompatible code
1773    @Override
1774    void setBits(BitSet table) {
1775      table.set(startInclusive, endInclusive + 1);
1776    }
1777
1778    @Override
1779    public String toString() {
1780      return "CharMatcher.inRange('"
1781          + showCharacter(startInclusive)
1782          + "', '"
1783          + showCharacter(endInclusive)
1784          + "')";
1785    }
1786  }
1787
1788  /** Implementation of {@link #forPredicate(Predicate)}. */
1789  private static final class ForPredicate extends CharMatcher {
1790
1791    private final Predicate<? super Character> predicate;
1792
1793    ForPredicate(Predicate<? super Character> predicate) {
1794      this.predicate = checkNotNull(predicate);
1795    }
1796
1797    @Override
1798    public boolean matches(char c) {
1799      return predicate.apply(c);
1800    }
1801
1802    @SuppressWarnings("deprecation") // intentional; deprecation is for callers primarily
1803    @Override
1804    public boolean apply(Character character) {
1805      return predicate.apply(checkNotNull(character));
1806    }
1807
1808    @Override
1809    public String toString() {
1810      return "CharMatcher.forPredicate(" + predicate + ")";
1811    }
1812  }
1813}