001/*
002 * Copyright (C) 2008 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.base;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkPositionIndex;
020
021import com.google.common.annotations.GwtCompatible;
022import com.google.common.annotations.GwtIncompatible;
023import com.google.common.annotations.VisibleForTesting;
024import java.util.Arrays;
025import java.util.BitSet;
026
027/**
028 * Determines a true or false value for any Java {@code char} value, just as {@link Predicate} does
029 * for any {@link Object}. Also offers basic text processing methods based on this function.
030 * Implementations are strongly encouraged to be side-effect-free and immutable.
031 *
032 * <p>Throughout the documentation of this class, the phrase "matching character" is used to mean
033 * "any {@code char} value {@code c} for which {@code this.matches(c)} returns {@code true}".
034 *
035 * <p><b>Warning:</b> This class deals only with {@code char} values, that is, <a
036 * href="http://www.unicode.org/glossary/#BMP_character">BMP characters</a>. It does not understand
037 * <a href="http://www.unicode.org/glossary/#supplementary_code_point">supplementary Unicode code
038 * points</a> in the range {@code 0x10000} to {@code 0x10FFFF} which includes the majority of
039 * assigned characters, including important CJK characters and emoji.
040 *
041 * <p>Supplementary characters are <a
042 * href="https://docs.oracle.com/javase/8/docs/api/java/lang/Character.html#supplementary">encoded
043 * into a {@code String} using surrogate pairs</a>, and a {@code CharMatcher} treats these just as
044 * two separate characters. {@link #countIn} counts each supplementary character as 2 {@code char}s.
045 *
046 * <p>For up-to-date Unicode character properties (digit, letter, etc.) and support for
047 * supplementary code points, use ICU4J UCharacter and UnicodeSet (freeze() after building). For
048 * basic text processing based on UnicodeSet use the ICU4J UnicodeSetSpanner.
049 *
050 * <p>Example usages:
051 *
052 * <pre>
053 *   String trimmed = {@link #whitespace() whitespace()}.{@link #trimFrom trimFrom}(userInput);
054 *   if ({@link #ascii() ascii()}.{@link #matchesAllOf matchesAllOf}(s)) { ... }</pre>
055 *
056 * <p>See the Guava User Guide article on <a
057 * href="https://github.com/google/guava/wiki/StringsExplained#charmatcher">{@code CharMatcher}
058 * </a>.
059 *
060 * @author Kevin Bourrillion
061 * @since 1.0
062 */
063@GwtCompatible(emulated = true)
064public abstract class CharMatcher implements Predicate<Character> {
065  /*
066   *           N777777777NO
067   *         N7777777777777N
068   *        M777777777777777N
069   *        $N877777777D77777M
070   *       N M77777777ONND777M
071   *       MN777777777NN  D777
072   *     N7ZN777777777NN ~M7778
073   *    N777777777777MMNN88777N
074   *    N777777777777MNZZZ7777O
075   *    DZN7777O77777777777777
076   *     N7OONND7777777D77777N
077   *      8$M++++?N???$77777$
078   *       M7++++N+M77777777N
079   *        N77O777777777777$                              M
080   *          DNNM$$$$777777N                              D
081   *         N$N:=N$777N7777M                             NZ
082   *        77Z::::N777777777                          ODZZZ
083   *       77N::::::N77777777M                         NNZZZ$
084   *     $777:::::::77777777MN                        ZM8ZZZZZ
085   *     777M::::::Z7777777Z77                        N++ZZZZNN
086   *    7777M:::::M7777777$777M                       $++IZZZZM
087   *   M777$:::::N777777$M7777M                       +++++ZZZDN
088   *     NN$::::::7777$$M777777N                      N+++ZZZZNZ
089   *       N::::::N:7$O:77777777                      N++++ZZZZN
090   *       M::::::::::::N77777777+                   +?+++++ZZZM
091   *       8::::::::::::D77777777M                    O+++++ZZ
092   *        ::::::::::::M777777777N                      O+?D
093   *        M:::::::::::M77777777778                     77=
094   *        D=::::::::::N7777777777N                    777
095   *       INN===::::::=77777777777N                  I777N
096   *      ?777N========N7777777777787M               N7777
097   *      77777$D======N77777777777N777N?         N777777
098   *     I77777$$$N7===M$$77777777$77777777$MMZ77777777N
099   *      $$$$$$$$$$$NIZN$$$$$$$$$M$$7777777777777777ON
100   *       M$$$$$$$$M    M$$$$$$$$N=N$$$$7777777$$$ND
101   *      O77Z$$$$$$$     M$$$$$$$$MNI==$DNNNNM=~N
102   *   7 :N MNN$$$$M$      $$$777$8      8D8I
103   *     NMM.:7O           777777778
104   *                       7777777MN
105   *                       M NO .7:
106   *                       M   :   M
107   *                            8
108   */
109
110  // Constant matcher factory methods
111
112  /**
113   * Matches any character.
114   *
115   * @since 19.0 (since 1.0 as constant {@code ANY})
116   */
117  public static CharMatcher any() {
118    return Any.INSTANCE;
119  }
120
121  /**
122   * Matches no characters.
123   *
124   * @since 19.0 (since 1.0 as constant {@code NONE})
125   */
126  public static CharMatcher none() {
127    return None.INSTANCE;
128  }
129
130  /**
131   * Determines whether a character is whitespace according to the latest Unicode standard, as
132   * illustrated <a
133   * href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
134   * This is not the same definition used by other Java APIs. (See a <a
135   * href="https://goo.gl/Y6SLWx">comparison of several definitions of "whitespace"</a>.)
136   *
137   * <p>All Unicode White_Space characters are on the BMP and thus supported by this API.
138   *
139   * <p><b>Note:</b> as the Unicode definition evolves, we will modify this matcher to keep it up to
140   * date.
141   *
142   * @since 19.0 (since 1.0 as constant {@code WHITESPACE})
143   */
144  public static CharMatcher whitespace() {
145    return Whitespace.INSTANCE;
146  }
147
148  /**
149   * Determines whether a character is a breaking whitespace (that is, a whitespace which can be
150   * interpreted as a break between words for formatting purposes). See {@link #whitespace()} for a
151   * discussion of that term.
152   *
153   * @since 19.0 (since 2.0 as constant {@code BREAKING_WHITESPACE})
154   */
155  public static CharMatcher breakingWhitespace() {
156    return BreakingWhitespace.INSTANCE;
157  }
158
159  /**
160   * Determines whether a character is ASCII, meaning that its code point is less than 128.
161   *
162   * @since 19.0 (since 1.0 as constant {@code ASCII})
163   */
164  public static CharMatcher ascii() {
165    return Ascii.INSTANCE;
166  }
167
168  /**
169   * Determines whether a character is a BMP digit according to <a
170   * href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>. If
171   * you only care to match ASCII digits, you can use {@code inRange('0', '9')}.
172   *
173   * @deprecated Many digits are supplementary characters; see the class documentation.
174   * @since 19.0 (since 1.0 as constant {@code DIGIT})
175   */
176  @Deprecated
177  public static CharMatcher digit() {
178    return Digit.INSTANCE;
179  }
180
181  /**
182   * Determines whether a character is a BMP digit according to {@linkplain Character#isDigit(char)
183   * Java's definition}. If you only care to match ASCII digits, you can use {@code inRange('0',
184   * '9')}.
185   *
186   * @deprecated Many digits are supplementary characters; see the class documentation.
187   * @since 19.0 (since 1.0 as constant {@code JAVA_DIGIT})
188   */
189  @Deprecated
190  public static CharMatcher javaDigit() {
191    return JavaDigit.INSTANCE;
192  }
193
194  /**
195   * Determines whether a character is a BMP letter according to {@linkplain
196   * Character#isLetter(char) Java's definition}. If you only care to match letters of the Latin
197   * alphabet, you can use {@code inRange('a', 'z').or(inRange('A', 'Z'))}.
198   *
199   * @deprecated Most letters are supplementary characters; see the class documentation.
200   * @since 19.0 (since 1.0 as constant {@code JAVA_LETTER})
201   */
202  @Deprecated
203  public static CharMatcher javaLetter() {
204    return JavaLetter.INSTANCE;
205  }
206
207  /**
208   * Determines whether a character is a BMP letter or digit according to {@linkplain
209   * Character#isLetterOrDigit(char) Java's definition}.
210   *
211   * @deprecated Most letters and digits are supplementary characters; see the class documentation.
212   * @since 19.0 (since 1.0 as constant {@code JAVA_LETTER_OR_DIGIT}).
213   */
214  @Deprecated
215  public static CharMatcher javaLetterOrDigit() {
216    return JavaLetterOrDigit.INSTANCE;
217  }
218
219  /**
220   * Determines whether a BMP character is upper case according to {@linkplain
221   * Character#isUpperCase(char) Java's definition}.
222   *
223   * @deprecated Some uppercase characters are supplementary characters; see the class
224   *     documentation.
225   * @since 19.0 (since 1.0 as constant {@code JAVA_UPPER_CASE})
226   */
227  @Deprecated
228  public static CharMatcher javaUpperCase() {
229    return JavaUpperCase.INSTANCE;
230  }
231
232  /**
233   * Determines whether a BMP character is lower case according to {@linkplain
234   * Character#isLowerCase(char) Java's definition}.
235   *
236   * @deprecated Some lowercase characters are supplementary characters; see the class
237   *     documentation.
238   * @since 19.0 (since 1.0 as constant {@code JAVA_LOWER_CASE})
239   */
240  @Deprecated
241  public static CharMatcher javaLowerCase() {
242    return JavaLowerCase.INSTANCE;
243  }
244
245  /**
246   * Determines whether a character is an ISO control character as specified by {@link
247   * Character#isISOControl(char)}.
248   *
249   * <p>All ISO control codes are on the BMP and thus supported by this API.
250   *
251   * @since 19.0 (since 1.0 as constant {@code JAVA_ISO_CONTROL})
252   */
253  public static CharMatcher javaIsoControl() {
254    return JavaIsoControl.INSTANCE;
255  }
256
257  /**
258   * Determines whether a character is invisible; that is, if its Unicode category is any of
259   * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and
260   * PRIVATE_USE according to ICU4J.
261   *
262   * <p>See also the Unicode Default_Ignorable_Code_Point property (available via ICU).
263   *
264   * @deprecated Most invisible characters are supplementary characters; see the class
265   *     documentation.
266   * @since 19.0 (since 1.0 as constant {@code INVISIBLE})
267   */
268  @Deprecated
269  public static CharMatcher invisible() {
270    return Invisible.INSTANCE;
271  }
272
273  /**
274   * Determines whether a character is single-width (not double-width). When in doubt, this matcher
275   * errs on the side of returning {@code false} (that is, it tends to assume a character is
276   * double-width).
277   *
278   * <p><b>Note:</b> as the reference file evolves, we will modify this matcher to keep it up to
279   * date.
280   *
281   * <p>See also <a href="http://www.unicode.org/reports/tr11/">UAX #11 East Asian Width</a>.
282   *
283   * @deprecated Many such characters are supplementary characters; see the class documentation.
284   * @since 19.0 (since 1.0 as constant {@code SINGLE_WIDTH})
285   */
286  @Deprecated
287  public static CharMatcher singleWidth() {
288    return SingleWidth.INSTANCE;
289  }
290
291  // Static factories
292
293  /** Returns a {@code char} matcher that matches only one specified BMP character. */
294  public static CharMatcher is(final char match) {
295    return new Is(match);
296  }
297
298  /**
299   * Returns a {@code char} matcher that matches any character except the BMP character specified.
300   *
301   * <p>To negate another {@code CharMatcher}, use {@link #negate()}.
302   */
303  public static CharMatcher isNot(final char match) {
304    return new IsNot(match);
305  }
306
307  /**
308   * Returns a {@code char} matcher that matches any BMP character present in the given character
309   * sequence. Returns a bogus matcher if the sequence contains supplementary characters.
310   */
311  public static CharMatcher anyOf(final CharSequence sequence) {
312    switch (sequence.length()) {
313      case 0:
314        return none();
315      case 1:
316        return is(sequence.charAt(0));
317      case 2:
318        return isEither(sequence.charAt(0), sequence.charAt(1));
319      default:
320        // TODO(lowasser): is it potentially worth just going ahead and building a precomputed
321        // matcher?
322        return new AnyOf(sequence);
323    }
324  }
325
326  /**
327   * Returns a {@code char} matcher that matches any BMP character not present in the given
328   * character sequence. Returns a bogus matcher if the sequence contains supplementary characters.
329   */
330  public static CharMatcher noneOf(CharSequence sequence) {
331    return anyOf(sequence).negate();
332  }
333
334  /**
335   * Returns a {@code char} matcher that matches any character in a given BMP range (both endpoints
336   * are inclusive). For example, to match any lowercase letter of the English alphabet, use {@code
337   * CharMatcher.inRange('a', 'z')}.
338   *
339   * @throws IllegalArgumentException if {@code endInclusive < startInclusive}
340   */
341  public static CharMatcher inRange(final char startInclusive, final char endInclusive) {
342    return new InRange(startInclusive, endInclusive);
343  }
344
345  /**
346   * Returns a matcher with identical behavior to the given {@link Character}-based predicate, but
347   * which operates on primitive {@code char} instances instead.
348   */
349  public static CharMatcher forPredicate(final Predicate<? super Character> predicate) {
350    return predicate instanceof CharMatcher ? (CharMatcher) predicate : new ForPredicate(predicate);
351  }
352
353  // Constructors
354
355  /**
356   * Constructor for use by subclasses. When subclassing, you may want to override {@code
357   * toString()} to provide a useful description.
358   */
359  protected CharMatcher() {}
360
361  // Abstract methods
362
363  /** Determines a true or false value for the given character. */
364  public abstract boolean matches(char c);
365
366  // Non-static factories
367
368  /** Returns a matcher that matches any character not matched by this matcher. */
369  // @Override under Java 8 but not under Java 7
370  public CharMatcher negate() {
371    return new Negated(this);
372  }
373
374  /**
375   * Returns a matcher that matches any character matched by both this matcher and {@code other}.
376   */
377  public CharMatcher and(CharMatcher other) {
378    return new And(this, other);
379  }
380
381  /**
382   * Returns a matcher that matches any character matched by either this matcher or {@code other}.
383   */
384  public CharMatcher or(CharMatcher other) {
385    return new Or(this, other);
386  }
387
388  /**
389   * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to
390   * query than the original; your mileage may vary. Precomputation takes time and is likely to be
391   * worthwhile only if the precomputed matcher is queried many thousands of times.
392   *
393   * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a
394   * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a
395   * worthwhile tradeoff in a browser.
396   */
397  public CharMatcher precomputed() {
398    return Platform.precomputeCharMatcher(this);
399  }
400
401  private static final int DISTINCT_CHARS = Character.MAX_VALUE - Character.MIN_VALUE + 1;
402
403  /**
404   * This is the actual implementation of {@link #precomputed}, but we bounce calls through a method
405   * on {@link Platform} so that we can have different behavior in GWT.
406   *
407   * <p>This implementation tries to be smart in a number of ways. It recognizes cases where the
408   * negation is cheaper to precompute than the matcher itself; it tries to build small hash tables
409   * for matchers that only match a few characters, and so on. In the worst-case scenario, it
410   * constructs an eight-kilobyte bit array and queries that. In many situations this produces a
411   * matcher which is faster to query than the original.
412   */
413  @GwtIncompatible // SmallCharMatcher
414  CharMatcher precomputedInternal() {
415    final BitSet table = new BitSet();
416    setBits(table);
417    int totalCharacters = table.cardinality();
418    if (totalCharacters * 2 <= DISTINCT_CHARS) {
419      return precomputedPositive(totalCharacters, table, toString());
420    } else {
421      // TODO(lowasser): is it worth it to worry about the last character of large matchers?
422      table.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1);
423      int negatedCharacters = DISTINCT_CHARS - totalCharacters;
424      String suffix = ".negate()";
425      final String description = toString();
426      String negatedDescription =
427          description.endsWith(suffix)
428              ? description.substring(0, description.length() - suffix.length())
429              : description + suffix;
430      return new NegatedFastMatcher(
431          precomputedPositive(negatedCharacters, table, negatedDescription)) {
432        @Override
433        public String toString() {
434          return description;
435        }
436      };
437    }
438  }
439
440  /**
441   * Helper method for {@link #precomputedInternal} that doesn't test if the negation is cheaper.
442   */
443  @GwtIncompatible // SmallCharMatcher
444  private static CharMatcher precomputedPositive(
445      int totalCharacters, BitSet table, String description) {
446    switch (totalCharacters) {
447      case 0:
448        return none();
449      case 1:
450        return is((char) table.nextSetBit(0));
451      case 2:
452        char c1 = (char) table.nextSetBit(0);
453        char c2 = (char) table.nextSetBit(c1 + 1);
454        return isEither(c1, c2);
455      default:
456        return isSmall(totalCharacters, table.length())
457            ? SmallCharMatcher.from(table, description)
458            : new BitSetMatcher(table, description);
459    }
460  }
461
462  @GwtIncompatible // SmallCharMatcher
463  private static boolean isSmall(int totalCharacters, int tableLength) {
464    return totalCharacters <= SmallCharMatcher.MAX_SIZE
465        && tableLength > (totalCharacters * 4 * Character.SIZE);
466    // err on the side of BitSetMatcher
467  }
468
469  /** Sets bits in {@code table} matched by this matcher. */
470  @GwtIncompatible // used only from other GwtIncompatible code
471  void setBits(BitSet table) {
472    for (int c = Character.MAX_VALUE; c >= Character.MIN_VALUE; c--) {
473      if (matches((char) c)) {
474        table.set(c);
475      }
476    }
477  }
478
479  // Text processing routines
480
481  /**
482   * Returns {@code true} if a character sequence contains at least one matching BMP character.
483   * Equivalent to {@code !matchesNoneOf(sequence)}.
484   *
485   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
486   * character, until this returns {@code true} or the end is reached.
487   *
488   * @param sequence the character sequence to examine, possibly empty
489   * @return {@code true} if this matcher matches at least one character in the sequence
490   * @since 8.0
491   */
492  public boolean matchesAnyOf(CharSequence sequence) {
493    return !matchesNoneOf(sequence);
494  }
495
496  /**
497   * Returns {@code true} if a character sequence contains only matching BMP characters.
498   *
499   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
500   * character, until this returns {@code false} or the end is reached.
501   *
502   * @param sequence the character sequence to examine, possibly empty
503   * @return {@code true} if this matcher matches every character in the sequence, including when
504   *     the sequence is empty
505   */
506  public boolean matchesAllOf(CharSequence sequence) {
507    for (int i = sequence.length() - 1; i >= 0; i--) {
508      if (!matches(sequence.charAt(i))) {
509        return false;
510      }
511    }
512    return true;
513  }
514
515  /**
516   * Returns {@code true} if a character sequence contains no matching BMP characters. Equivalent to
517   * {@code !matchesAnyOf(sequence)}.
518   *
519   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
520   * character, until this returns {@code true} or the end is reached.
521   *
522   * @param sequence the character sequence to examine, possibly empty
523   * @return {@code true} if this matcher matches no characters in the sequence, including when the
524   *     sequence is empty
525   */
526  public boolean matchesNoneOf(CharSequence sequence) {
527    return indexIn(sequence) == -1;
528  }
529
530  /**
531   * Returns the index of the first matching BMP character in a character sequence, or {@code -1} if
532   * no matching character is present.
533   *
534   * <p>The default implementation iterates over the sequence in forward order calling {@link
535   * #matches} for each character.
536   *
537   * @param sequence the character sequence to examine from the beginning
538   * @return an index, or {@code -1} if no character matches
539   */
540  public int indexIn(CharSequence sequence) {
541    return indexIn(sequence, 0);
542  }
543
544  /**
545   * Returns the index of the first matching BMP character in a character sequence, starting from a
546   * given position, or {@code -1} if no character matches after that position.
547   *
548   * <p>The default implementation iterates over the sequence in forward order, beginning at {@code
549   * start}, calling {@link #matches} for each character.
550   *
551   * @param sequence the character sequence to examine
552   * @param start the first index to examine; must be nonnegative and no greater than {@code
553   *     sequence.length()}
554   * @return the index of the first matching character, guaranteed to be no less than {@code start},
555   *     or {@code -1} if no character matches
556   * @throws IndexOutOfBoundsException if start is negative or greater than {@code
557   *     sequence.length()}
558   */
559  public int indexIn(CharSequence sequence, int start) {
560    int length = sequence.length();
561    checkPositionIndex(start, length);
562    for (int i = start; i < length; i++) {
563      if (matches(sequence.charAt(i))) {
564        return i;
565      }
566    }
567    return -1;
568  }
569
570  /**
571   * Returns the index of the last matching BMP character in a character sequence, or {@code -1} if
572   * no matching character is present.
573   *
574   * <p>The default implementation iterates over the sequence in reverse order calling {@link
575   * #matches} for each character.
576   *
577   * @param sequence the character sequence to examine from the end
578   * @return an index, or {@code -1} if no character matches
579   */
580  public int lastIndexIn(CharSequence sequence) {
581    for (int i = sequence.length() - 1; i >= 0; i--) {
582      if (matches(sequence.charAt(i))) {
583        return i;
584      }
585    }
586    return -1;
587  }
588
589  /**
590   * Returns the number of matching {@code char}s found in a character sequence.
591   *
592   * <p>Counts 2 per supplementary character, such as for {@link #whitespace}().{@link #negate}().
593   */
594  public int countIn(CharSequence sequence) {
595    int count = 0;
596    for (int i = 0; i < sequence.length(); i++) {
597      if (matches(sequence.charAt(i))) {
598        count++;
599      }
600    }
601    return count;
602  }
603
604  /**
605   * Returns a string containing all non-matching characters of a character sequence, in order. For
606   * example:
607   *
608   * <pre>{@code
609   * CharMatcher.is('a').removeFrom("bazaar")
610   * }</pre>
611   *
612   * ... returns {@code "bzr"}.
613   */
614  public String removeFrom(CharSequence sequence) {
615    String string = sequence.toString();
616    int pos = indexIn(string);
617    if (pos == -1) {
618      return string;
619    }
620
621    char[] chars = string.toCharArray();
622    int spread = 1;
623
624    // This unusual loop comes from extensive benchmarking
625    OUT:
626    while (true) {
627      pos++;
628      while (true) {
629        if (pos == chars.length) {
630          break OUT;
631        }
632        if (matches(chars[pos])) {
633          break;
634        }
635        chars[pos - spread] = chars[pos];
636        pos++;
637      }
638      spread++;
639    }
640    return new String(chars, 0, pos - spread);
641  }
642
643  /**
644   * Returns a string containing all matching BMP characters of a character sequence, in order. For
645   * example:
646   *
647   * <pre>{@code
648   * CharMatcher.is('a').retainFrom("bazaar")
649   * }</pre>
650   *
651   * ... returns {@code "aaa"}.
652   */
653  public String retainFrom(CharSequence sequence) {
654    return negate().removeFrom(sequence);
655  }
656
657  /**
658   * Returns a string copy of the input character sequence, with each matching BMP character
659   * replaced by a given replacement character. For example:
660   *
661   * <pre>{@code
662   * CharMatcher.is('a').replaceFrom("radar", 'o')
663   * }</pre>
664   *
665   * ... returns {@code "rodor"}.
666   *
667   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
668   * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
669   * character.
670   *
671   * @param sequence the character sequence to replace matching characters in
672   * @param replacement the character to append to the result string in place of each matching
673   *     character in {@code sequence}
674   * @return the new string
675   */
676  public String replaceFrom(CharSequence sequence, char replacement) {
677    String string = sequence.toString();
678    int pos = indexIn(string);
679    if (pos == -1) {
680      return string;
681    }
682    char[] chars = string.toCharArray();
683    chars[pos] = replacement;
684    for (int i = pos + 1; i < chars.length; i++) {
685      if (matches(chars[i])) {
686        chars[i] = replacement;
687      }
688    }
689    return new String(chars);
690  }
691
692  /**
693   * Returns a string copy of the input character sequence, with each matching BMP character
694   * replaced by a given replacement sequence. For example:
695   *
696   * <pre>{@code
697   * CharMatcher.is('a').replaceFrom("yaha", "oo")
698   * }</pre>
699   *
700   * ... returns {@code "yoohoo"}.
701   *
702   * <p><b>Note:</b> If the replacement is a fixed string with only one character, you are better
703   * off calling {@link #replaceFrom(CharSequence, char)} directly.
704   *
705   * @param sequence the character sequence to replace matching characters in
706   * @param replacement the characters to append to the result string in place of each matching
707   *     character in {@code sequence}
708   * @return the new string
709   */
710  public String replaceFrom(CharSequence sequence, CharSequence replacement) {
711    int replacementLen = replacement.length();
712    if (replacementLen == 0) {
713      return removeFrom(sequence);
714    }
715    if (replacementLen == 1) {
716      return replaceFrom(sequence, replacement.charAt(0));
717    }
718
719    String string = sequence.toString();
720    int pos = indexIn(string);
721    if (pos == -1) {
722      return string;
723    }
724
725    int len = string.length();
726    StringBuilder buf = new StringBuilder((len * 3 / 2) + 16);
727
728    int oldpos = 0;
729    do {
730      buf.append(string, oldpos, pos);
731      buf.append(replacement);
732      oldpos = pos + 1;
733      pos = indexIn(string, oldpos);
734    } while (pos != -1);
735
736    buf.append(string, oldpos, len);
737    return buf.toString();
738  }
739
740  /**
741   * Returns a substring of the input character sequence that omits all matching BMP characters from
742   * the beginning and from the end of the string. For example:
743   *
744   * <pre>{@code
745   * CharMatcher.anyOf("ab").trimFrom("abacatbab")
746   * }</pre>
747   *
748   * ... returns {@code "cat"}.
749   *
750   * <p>Note that:
751   *
752   * <pre>{@code
753   * CharMatcher.inRange('\0', ' ').trimFrom(str)
754   * }</pre>
755   *
756   * ... is equivalent to {@link String#trim()}.
757   */
758  public String trimFrom(CharSequence sequence) {
759    int len = sequence.length();
760    int first;
761    int last;
762
763    for (first = 0; first < len; first++) {
764      if (!matches(sequence.charAt(first))) {
765        break;
766      }
767    }
768    for (last = len - 1; last > first; last--) {
769      if (!matches(sequence.charAt(last))) {
770        break;
771      }
772    }
773
774    return sequence.subSequence(first, last + 1).toString();
775  }
776
777  /**
778   * Returns a substring of the input character sequence that omits all matching BMP characters from
779   * the beginning of the string. For example:
780   *
781   * <pre>{@code
782   * CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")
783   * }</pre>
784   *
785   * ... returns {@code "catbab"}.
786   */
787  public String trimLeadingFrom(CharSequence sequence) {
788    int len = sequence.length();
789    for (int first = 0; first < len; first++) {
790      if (!matches(sequence.charAt(first))) {
791        return sequence.subSequence(first, len).toString();
792      }
793    }
794    return "";
795  }
796
797  /**
798   * Returns a substring of the input character sequence that omits all matching BMP characters from
799   * the end of the string. For example:
800   *
801   * <pre>{@code
802   * CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")
803   * }</pre>
804   *
805   * ... returns {@code "abacat"}.
806   */
807  public String trimTrailingFrom(CharSequence sequence) {
808    int len = sequence.length();
809    for (int last = len - 1; last >= 0; last--) {
810      if (!matches(sequence.charAt(last))) {
811        return sequence.subSequence(0, last + 1).toString();
812      }
813    }
814    return "";
815  }
816
817  /**
818   * Returns a string copy of the input character sequence, with each group of consecutive matching
819   * BMP characters replaced by a single replacement character. For example:
820   *
821   * <pre>{@code
822   * CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')
823   * }</pre>
824   *
825   * ... returns {@code "b-p-r"}.
826   *
827   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
828   * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
829   * character.
830   *
831   * @param sequence the character sequence to replace matching groups of characters in
832   * @param replacement the character to append to the result string in place of each group of
833   *     matching characters in {@code sequence}
834   * @return the new string
835   */
836  public String collapseFrom(CharSequence sequence, char replacement) {
837    // This implementation avoids unnecessary allocation.
838    int len = sequence.length();
839    for (int i = 0; i < len; i++) {
840      char c = sequence.charAt(i);
841      if (matches(c)) {
842        if (c == replacement && (i == len - 1 || !matches(sequence.charAt(i + 1)))) {
843          // a no-op replacement
844          i++;
845        } else {
846          StringBuilder builder = new StringBuilder(len).append(sequence, 0, i).append(replacement);
847          return finishCollapseFrom(sequence, i + 1, len, replacement, builder, true);
848        }
849      }
850    }
851    // no replacement needed
852    return sequence.toString();
853  }
854
855  /**
856   * Collapses groups of matching characters exactly as {@link #collapseFrom} does, except that
857   * groups of matching BMP characters at the start or end of the sequence are removed without
858   * replacement.
859   */
860  public String trimAndCollapseFrom(CharSequence sequence, char replacement) {
861    // This implementation avoids unnecessary allocation.
862    int len = sequence.length();
863    int first = 0;
864    int last = len - 1;
865
866    while (first < len && matches(sequence.charAt(first))) {
867      first++;
868    }
869
870    while (last > first && matches(sequence.charAt(last))) {
871      last--;
872    }
873
874    return (first == 0 && last == len - 1)
875        ? collapseFrom(sequence, replacement)
876        : finishCollapseFrom(
877            sequence, first, last + 1, replacement, new StringBuilder(last + 1 - first), false);
878  }
879
880  private String finishCollapseFrom(
881      CharSequence sequence,
882      int start,
883      int end,
884      char replacement,
885      StringBuilder builder,
886      boolean inMatchingGroup) {
887    for (int i = start; i < end; i++) {
888      char c = sequence.charAt(i);
889      if (matches(c)) {
890        if (!inMatchingGroup) {
891          builder.append(replacement);
892          inMatchingGroup = true;
893        }
894      } else {
895        builder.append(c);
896        inMatchingGroup = false;
897      }
898    }
899    return builder.toString();
900  }
901
902  /**
903   * @deprecated Provided only to satisfy the {@link Predicate} interface; use {@link #matches}
904   *     instead.
905   */
906  @Deprecated
907  @Override
908  public boolean apply(Character character) {
909    return matches(character);
910  }
911
912  /**
913   * Returns a string representation of this {@code CharMatcher}, such as {@code
914   * CharMatcher.or(WHITESPACE, JAVA_DIGIT)}.
915   */
916  @Override
917  public String toString() {
918    return super.toString();
919  }
920
921  /**
922   * Returns the Java Unicode escape sequence for the given {@code char}, in the form "\u12AB" where
923   * "12AB" is the four hexadecimal digits representing the 16-bit code unit.
924   */
925  private static String showCharacter(char c) {
926    String hex = "0123456789ABCDEF";
927    char[] tmp = {'\\', 'u', '\0', '\0', '\0', '\0'};
928    for (int i = 0; i < 4; i++) {
929      tmp[5 - i] = hex.charAt(c & 0xF);
930      c = (char) (c >> 4);
931    }
932    return String.copyValueOf(tmp);
933  }
934
935  // Fast matchers
936
937  /** A matcher for which precomputation will not yield any significant benefit. */
938  abstract static class FastMatcher extends CharMatcher {
939
940    @Override
941    public final CharMatcher precomputed() {
942      return this;
943    }
944
945    @Override
946    public CharMatcher negate() {
947      return new NegatedFastMatcher(this);
948    }
949  }
950
951  /** {@link FastMatcher} which overrides {@code toString()} with a custom name. */
952  abstract static class NamedFastMatcher extends FastMatcher {
953
954    private final String description;
955
956    NamedFastMatcher(String description) {
957      this.description = checkNotNull(description);
958    }
959
960    @Override
961    public final String toString() {
962      return description;
963    }
964  }
965
966  /** Negation of a {@link FastMatcher}. */
967  static class NegatedFastMatcher extends Negated {
968
969    NegatedFastMatcher(CharMatcher original) {
970      super(original);
971    }
972
973    @Override
974    public final CharMatcher precomputed() {
975      return this;
976    }
977  }
978
979  /** Fast matcher using a {@link BitSet} table of matching characters. */
980  @GwtIncompatible // used only from other GwtIncompatible code
981  private static final class BitSetMatcher extends NamedFastMatcher {
982
983    private final BitSet table;
984
985    private BitSetMatcher(BitSet table, String description) {
986      super(description);
987      if (table.length() + Long.SIZE < table.size()) {
988        table = (BitSet) table.clone();
989        // If only we could actually call BitSet.trimToSize() ourselves...
990      }
991      this.table = table;
992    }
993
994    @Override
995    public boolean matches(char c) {
996      return table.get(c);
997    }
998
999    @Override
1000    void setBits(BitSet bitSet) {
1001      bitSet.or(table);
1002    }
1003  }
1004
1005  // Static constant implementation classes
1006
1007  /** Implementation of {@link #any()}. */
1008  private static final class Any extends NamedFastMatcher {
1009
1010    static final Any INSTANCE = new Any();
1011
1012    private Any() {
1013      super("CharMatcher.any()");
1014    }
1015
1016    @Override
1017    public boolean matches(char c) {
1018      return true;
1019    }
1020
1021    @Override
1022    public int indexIn(CharSequence sequence) {
1023      return (sequence.length() == 0) ? -1 : 0;
1024    }
1025
1026    @Override
1027    public int indexIn(CharSequence sequence, int start) {
1028      int length = sequence.length();
1029      checkPositionIndex(start, length);
1030      return (start == length) ? -1 : start;
1031    }
1032
1033    @Override
1034    public int lastIndexIn(CharSequence sequence) {
1035      return sequence.length() - 1;
1036    }
1037
1038    @Override
1039    public boolean matchesAllOf(CharSequence sequence) {
1040      checkNotNull(sequence);
1041      return true;
1042    }
1043
1044    @Override
1045    public boolean matchesNoneOf(CharSequence sequence) {
1046      return sequence.length() == 0;
1047    }
1048
1049    @Override
1050    public String removeFrom(CharSequence sequence) {
1051      checkNotNull(sequence);
1052      return "";
1053    }
1054
1055    @Override
1056    public String replaceFrom(CharSequence sequence, char replacement) {
1057      char[] array = new char[sequence.length()];
1058      Arrays.fill(array, replacement);
1059      return new String(array);
1060    }
1061
1062    @Override
1063    public String replaceFrom(CharSequence sequence, CharSequence replacement) {
1064      StringBuilder result = new StringBuilder(sequence.length() * replacement.length());
1065      for (int i = 0; i < sequence.length(); i++) {
1066        result.append(replacement);
1067      }
1068      return result.toString();
1069    }
1070
1071    @Override
1072    public String collapseFrom(CharSequence sequence, char replacement) {
1073      return (sequence.length() == 0) ? "" : String.valueOf(replacement);
1074    }
1075
1076    @Override
1077    public String trimFrom(CharSequence sequence) {
1078      checkNotNull(sequence);
1079      return "";
1080    }
1081
1082    @Override
1083    public int countIn(CharSequence sequence) {
1084      return sequence.length();
1085    }
1086
1087    @Override
1088    public CharMatcher and(CharMatcher other) {
1089      return checkNotNull(other);
1090    }
1091
1092    @Override
1093    public CharMatcher or(CharMatcher other) {
1094      checkNotNull(other);
1095      return this;
1096    }
1097
1098    @Override
1099    public CharMatcher negate() {
1100      return none();
1101    }
1102  }
1103
1104  /** Implementation of {@link #none()}. */
1105  private static final class None extends NamedFastMatcher {
1106
1107    static final None INSTANCE = new None();
1108
1109    private None() {
1110      super("CharMatcher.none()");
1111    }
1112
1113    @Override
1114    public boolean matches(char c) {
1115      return false;
1116    }
1117
1118    @Override
1119    public int indexIn(CharSequence sequence) {
1120      checkNotNull(sequence);
1121      return -1;
1122    }
1123
1124    @Override
1125    public int indexIn(CharSequence sequence, int start) {
1126      int length = sequence.length();
1127      checkPositionIndex(start, length);
1128      return -1;
1129    }
1130
1131    @Override
1132    public int lastIndexIn(CharSequence sequence) {
1133      checkNotNull(sequence);
1134      return -1;
1135    }
1136
1137    @Override
1138    public boolean matchesAllOf(CharSequence sequence) {
1139      return sequence.length() == 0;
1140    }
1141
1142    @Override
1143    public boolean matchesNoneOf(CharSequence sequence) {
1144      checkNotNull(sequence);
1145      return true;
1146    }
1147
1148    @Override
1149    public String removeFrom(CharSequence sequence) {
1150      return sequence.toString();
1151    }
1152
1153    @Override
1154    public String replaceFrom(CharSequence sequence, char replacement) {
1155      return sequence.toString();
1156    }
1157
1158    @Override
1159    public String replaceFrom(CharSequence sequence, CharSequence replacement) {
1160      checkNotNull(replacement);
1161      return sequence.toString();
1162    }
1163
1164    @Override
1165    public String collapseFrom(CharSequence sequence, char replacement) {
1166      return sequence.toString();
1167    }
1168
1169    @Override
1170    public String trimFrom(CharSequence sequence) {
1171      return sequence.toString();
1172    }
1173
1174    @Override
1175    public String trimLeadingFrom(CharSequence sequence) {
1176      return sequence.toString();
1177    }
1178
1179    @Override
1180    public String trimTrailingFrom(CharSequence sequence) {
1181      return sequence.toString();
1182    }
1183
1184    @Override
1185    public int countIn(CharSequence sequence) {
1186      checkNotNull(sequence);
1187      return 0;
1188    }
1189
1190    @Override
1191    public CharMatcher and(CharMatcher other) {
1192      checkNotNull(other);
1193      return this;
1194    }
1195
1196    @Override
1197    public CharMatcher or(CharMatcher other) {
1198      return checkNotNull(other);
1199    }
1200
1201    @Override
1202    public CharMatcher negate() {
1203      return any();
1204    }
1205  }
1206
1207  /** Implementation of {@link #whitespace()}. */
1208  @VisibleForTesting
1209  static final class Whitespace extends NamedFastMatcher {
1210
1211    // TABLE is a precomputed hashset of whitespace characters. MULTIPLIER serves as a hash function
1212    // whose key property is that it maps 25 characters into the 32-slot table without collision.
1213    // Basically this is an opportunistic fast implementation as opposed to "good code". For most
1214    // other use-cases, the reduction in readability isn't worth it.
1215    static final String TABLE =
1216        "\u2002\u3000\r\u0085\u200A\u2005\u2000\u3000"
1217            + "\u2029\u000B\u3000\u2008\u2003\u205F\u3000\u1680"
1218            + "\u0009\u0020\u2006\u2001\u202F\u00A0\u000C\u2009"
1219            + "\u3000\u2004\u3000\u3000\u2028\n\u2007\u3000";
1220    static final int MULTIPLIER = 1682554634;
1221    static final int SHIFT = Integer.numberOfLeadingZeros(TABLE.length() - 1);
1222
1223    static final Whitespace INSTANCE = new Whitespace();
1224
1225    Whitespace() {
1226      super("CharMatcher.whitespace()");
1227    }
1228
1229    @Override
1230    public boolean matches(char c) {
1231      return TABLE.charAt((MULTIPLIER * c) >>> SHIFT) == c;
1232    }
1233
1234    @GwtIncompatible // used only from other GwtIncompatible code
1235    @Override
1236    void setBits(BitSet table) {
1237      for (int i = 0; i < TABLE.length(); i++) {
1238        table.set(TABLE.charAt(i));
1239      }
1240    }
1241  }
1242
1243  /** Implementation of {@link #breakingWhitespace()}. */
1244  private static final class BreakingWhitespace extends CharMatcher {
1245
1246    static final CharMatcher INSTANCE = new BreakingWhitespace();
1247
1248    @Override
1249    public boolean matches(char c) {
1250      switch (c) {
1251        case '\t':
1252        case '\n':
1253        case '\013':
1254        case '\f':
1255        case '\r':
1256        case ' ':
1257        case '\u0085':
1258        case '\u1680':
1259        case '\u2028':
1260        case '\u2029':
1261        case '\u205f':
1262        case '\u3000':
1263          return true;
1264        case '\u2007':
1265          return false;
1266        default:
1267          return c >= '\u2000' && c <= '\u200a';
1268      }
1269    }
1270
1271    @Override
1272    public String toString() {
1273      return "CharMatcher.breakingWhitespace()";
1274    }
1275  }
1276
1277  /** Implementation of {@link #ascii()}. */
1278  private static final class Ascii extends NamedFastMatcher {
1279
1280    static final Ascii INSTANCE = new Ascii();
1281
1282    Ascii() {
1283      super("CharMatcher.ascii()");
1284    }
1285
1286    @Override
1287    public boolean matches(char c) {
1288      return c <= '\u007f';
1289    }
1290  }
1291
1292  /** Implementation that matches characters that fall within multiple ranges. */
1293  private static class RangesMatcher extends CharMatcher {
1294
1295    private final String description;
1296    private final char[] rangeStarts;
1297    private final char[] rangeEnds;
1298
1299    RangesMatcher(String description, char[] rangeStarts, char[] rangeEnds) {
1300      this.description = description;
1301      this.rangeStarts = rangeStarts;
1302      this.rangeEnds = rangeEnds;
1303      checkArgument(rangeStarts.length == rangeEnds.length);
1304      for (int i = 0; i < rangeStarts.length; i++) {
1305        checkArgument(rangeStarts[i] <= rangeEnds[i]);
1306        if (i + 1 < rangeStarts.length) {
1307          checkArgument(rangeEnds[i] < rangeStarts[i + 1]);
1308        }
1309      }
1310    }
1311
1312    @Override
1313    public boolean matches(char c) {
1314      int index = Arrays.binarySearch(rangeStarts, c);
1315      if (index >= 0) {
1316        return true;
1317      } else {
1318        index = ~index - 1;
1319        return index >= 0 && c <= rangeEnds[index];
1320      }
1321    }
1322
1323    @Override
1324    public String toString() {
1325      return description;
1326    }
1327  }
1328
1329  /** Implementation of {@link #digit()}. */
1330  private static final class Digit extends RangesMatcher {
1331    // Plug the following UnicodeSet pattern into
1332    // https://unicode.org/cldr/utility/list-unicodeset.jsp
1333    // [[:Nd:]&[:nv=0:]&[\u0000-\uFFFF]]
1334    // and get the zeroes from there.
1335
1336    // Must be in ascending order.
1337    private static final String ZEROES =
1338        "0\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6\u0c66\u0ce6\u0d66\u0de6"
1339            + "\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946\u19d0\u1a80\u1a90\u1b50\u1bb0"
1340            + "\u1c40\u1c50\ua620\ua8d0\ua900\ua9d0\ua9f0\uaa50\uabf0\uff10";
1341
1342    private static char[] zeroes() {
1343      return ZEROES.toCharArray();
1344    }
1345
1346    private static char[] nines() {
1347      char[] nines = new char[ZEROES.length()];
1348      for (int i = 0; i < ZEROES.length(); i++) {
1349        nines[i] = (char) (ZEROES.charAt(i) + 9);
1350      }
1351      return nines;
1352    }
1353
1354    static final Digit INSTANCE = new Digit();
1355
1356    private Digit() {
1357      super("CharMatcher.digit()", zeroes(), nines());
1358    }
1359  }
1360
1361  /** Implementation of {@link #javaDigit()}. */
1362  private static final class JavaDigit extends CharMatcher {
1363
1364    static final JavaDigit INSTANCE = new JavaDigit();
1365
1366    @Override
1367    public boolean matches(char c) {
1368      return Character.isDigit(c);
1369    }
1370
1371    @Override
1372    public String toString() {
1373      return "CharMatcher.javaDigit()";
1374    }
1375  }
1376
1377  /** Implementation of {@link #javaLetter()}. */
1378  private static final class JavaLetter extends CharMatcher {
1379
1380    static final JavaLetter INSTANCE = new JavaLetter();
1381
1382    @Override
1383    public boolean matches(char c) {
1384      return Character.isLetter(c);
1385    }
1386
1387    @Override
1388    public String toString() {
1389      return "CharMatcher.javaLetter()";
1390    }
1391  }
1392
1393  /** Implementation of {@link #javaLetterOrDigit()}. */
1394  private static final class JavaLetterOrDigit extends CharMatcher {
1395
1396    static final JavaLetterOrDigit INSTANCE = new JavaLetterOrDigit();
1397
1398    @Override
1399    public boolean matches(char c) {
1400      return Character.isLetterOrDigit(c);
1401    }
1402
1403    @Override
1404    public String toString() {
1405      return "CharMatcher.javaLetterOrDigit()";
1406    }
1407  }
1408
1409  /** Implementation of {@link #javaUpperCase()}. */
1410  private static final class JavaUpperCase extends CharMatcher {
1411
1412    static final JavaUpperCase INSTANCE = new JavaUpperCase();
1413
1414    @Override
1415    public boolean matches(char c) {
1416      return Character.isUpperCase(c);
1417    }
1418
1419    @Override
1420    public String toString() {
1421      return "CharMatcher.javaUpperCase()";
1422    }
1423  }
1424
1425  /** Implementation of {@link #javaLowerCase()}. */
1426  private static final class JavaLowerCase extends CharMatcher {
1427
1428    static final JavaLowerCase INSTANCE = new JavaLowerCase();
1429
1430    @Override
1431    public boolean matches(char c) {
1432      return Character.isLowerCase(c);
1433    }
1434
1435    @Override
1436    public String toString() {
1437      return "CharMatcher.javaLowerCase()";
1438    }
1439  }
1440
1441  /** Implementation of {@link #javaIsoControl()}. */
1442  private static final class JavaIsoControl extends NamedFastMatcher {
1443
1444    static final JavaIsoControl INSTANCE = new JavaIsoControl();
1445
1446    private JavaIsoControl() {
1447      super("CharMatcher.javaIsoControl()");
1448    }
1449
1450    @Override
1451    public boolean matches(char c) {
1452      return c <= '\u001f' || (c >= '\u007f' && c <= '\u009f');
1453    }
1454  }
1455
1456  /** Implementation of {@link #invisible()}. */
1457  private static final class Invisible extends RangesMatcher {
1458    // Plug the following UnicodeSet pattern into
1459    // https://unicode.org/cldr/utility/list-unicodeset.jsp
1460    // [[[:Zs:][:Zl:][:Zp:][:Cc:][:Cf:][:Cs:][:Co:]]&[\u0000-\uFFFF]]
1461    // with the "Abbreviate" option, and get the ranges from there.
1462    private static final String RANGE_STARTS =
1463        "\u0000\u007f\u00ad\u0600\u061c\u06dd\u070f\u08e2\u1680\u180e\u2000\u2028\u205f\u2066"
1464            + "\u3000\ud800\ufeff\ufff9";
1465    private static final String RANGE_ENDS = // inclusive ends
1466        "\u0020\u00a0\u00ad\u0605\u061c\u06dd\u070f\u08e2\u1680\u180e\u200f\u202f\u2064\u206f"
1467            + "\u3000\uf8ff\ufeff\ufffb";
1468
1469    static final Invisible INSTANCE = new Invisible();
1470
1471    private Invisible() {
1472      super("CharMatcher.invisible()", RANGE_STARTS.toCharArray(), RANGE_ENDS.toCharArray());
1473    }
1474  }
1475
1476  /** Implementation of {@link #singleWidth()}. */
1477  private static final class SingleWidth extends RangesMatcher {
1478
1479    static final SingleWidth INSTANCE = new SingleWidth();
1480
1481    private SingleWidth() {
1482      super(
1483          "CharMatcher.singleWidth()",
1484          "\u0000\u05be\u05d0\u05f3\u0600\u0750\u0e00\u1e00\u2100\ufb50\ufe70\uff61".toCharArray(),
1485          "\u04f9\u05be\u05ea\u05f4\u06ff\u077f\u0e7f\u20af\u213a\ufdff\ufeff\uffdc".toCharArray());
1486    }
1487  }
1488
1489  // Non-static factory implementation classes
1490
1491  /** Implementation of {@link #negate()}. */
1492  private static class Negated extends CharMatcher {
1493
1494    final CharMatcher original;
1495
1496    Negated(CharMatcher original) {
1497      this.original = checkNotNull(original);
1498    }
1499
1500    @Override
1501    public boolean matches(char c) {
1502      return !original.matches(c);
1503    }
1504
1505    @Override
1506    public boolean matchesAllOf(CharSequence sequence) {
1507      return original.matchesNoneOf(sequence);
1508    }
1509
1510    @Override
1511    public boolean matchesNoneOf(CharSequence sequence) {
1512      return original.matchesAllOf(sequence);
1513    }
1514
1515    @Override
1516    public int countIn(CharSequence sequence) {
1517      return sequence.length() - original.countIn(sequence);
1518    }
1519
1520    @GwtIncompatible // used only from other GwtIncompatible code
1521    @Override
1522    void setBits(BitSet table) {
1523      BitSet tmp = new BitSet();
1524      original.setBits(tmp);
1525      tmp.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1);
1526      table.or(tmp);
1527    }
1528
1529    @Override
1530    public CharMatcher negate() {
1531      return original;
1532    }
1533
1534    @Override
1535    public String toString() {
1536      return original + ".negate()";
1537    }
1538  }
1539
1540  /** Implementation of {@link #and(CharMatcher)}. */
1541  private static final class And extends CharMatcher {
1542
1543    final CharMatcher first;
1544    final CharMatcher second;
1545
1546    And(CharMatcher a, CharMatcher b) {
1547      first = checkNotNull(a);
1548      second = checkNotNull(b);
1549    }
1550
1551    @Override
1552    public boolean matches(char c) {
1553      return first.matches(c) && second.matches(c);
1554    }
1555
1556    @GwtIncompatible // used only from other GwtIncompatible code
1557    @Override
1558    void setBits(BitSet table) {
1559      BitSet tmp1 = new BitSet();
1560      first.setBits(tmp1);
1561      BitSet tmp2 = new BitSet();
1562      second.setBits(tmp2);
1563      tmp1.and(tmp2);
1564      table.or(tmp1);
1565    }
1566
1567    @Override
1568    public String toString() {
1569      return "CharMatcher.and(" + first + ", " + second + ")";
1570    }
1571  }
1572
1573  /** Implementation of {@link #or(CharMatcher)}. */
1574  private static final class Or extends CharMatcher {
1575
1576    final CharMatcher first;
1577    final CharMatcher second;
1578
1579    Or(CharMatcher a, CharMatcher b) {
1580      first = checkNotNull(a);
1581      second = checkNotNull(b);
1582    }
1583
1584    @GwtIncompatible // used only from other GwtIncompatible code
1585    @Override
1586    void setBits(BitSet table) {
1587      first.setBits(table);
1588      second.setBits(table);
1589    }
1590
1591    @Override
1592    public boolean matches(char c) {
1593      return first.matches(c) || second.matches(c);
1594    }
1595
1596    @Override
1597    public String toString() {
1598      return "CharMatcher.or(" + first + ", " + second + ")";
1599    }
1600  }
1601
1602  // Static factory implementations
1603
1604  /** Implementation of {@link #is(char)}. */
1605  private static final class Is extends FastMatcher {
1606
1607    private final char match;
1608
1609    Is(char match) {
1610      this.match = match;
1611    }
1612
1613    @Override
1614    public boolean matches(char c) {
1615      return c == match;
1616    }
1617
1618    @Override
1619    public String replaceFrom(CharSequence sequence, char replacement) {
1620      return sequence.toString().replace(match, replacement);
1621    }
1622
1623    @Override
1624    public CharMatcher and(CharMatcher other) {
1625      return other.matches(match) ? this : none();
1626    }
1627
1628    @Override
1629    public CharMatcher or(CharMatcher other) {
1630      return other.matches(match) ? other : super.or(other);
1631    }
1632
1633    @Override
1634    public CharMatcher negate() {
1635      return isNot(match);
1636    }
1637
1638    @GwtIncompatible // used only from other GwtIncompatible code
1639    @Override
1640    void setBits(BitSet table) {
1641      table.set(match);
1642    }
1643
1644    @Override
1645    public String toString() {
1646      return "CharMatcher.is('" + showCharacter(match) + "')";
1647    }
1648  }
1649
1650  /** Implementation of {@link #isNot(char)}. */
1651  private static final class IsNot extends FastMatcher {
1652
1653    private final char match;
1654
1655    IsNot(char match) {
1656      this.match = match;
1657    }
1658
1659    @Override
1660    public boolean matches(char c) {
1661      return c != match;
1662    }
1663
1664    @Override
1665    public CharMatcher and(CharMatcher other) {
1666      return other.matches(match) ? super.and(other) : other;
1667    }
1668
1669    @Override
1670    public CharMatcher or(CharMatcher other) {
1671      return other.matches(match) ? any() : this;
1672    }
1673
1674    @GwtIncompatible // used only from other GwtIncompatible code
1675    @Override
1676    void setBits(BitSet table) {
1677      table.set(0, match);
1678      table.set(match + 1, Character.MAX_VALUE + 1);
1679    }
1680
1681    @Override
1682    public CharMatcher negate() {
1683      return is(match);
1684    }
1685
1686    @Override
1687    public String toString() {
1688      return "CharMatcher.isNot('" + showCharacter(match) + "')";
1689    }
1690  }
1691
1692  private static CharMatcher.IsEither isEither(char c1, char c2) {
1693    return new CharMatcher.IsEither(c1, c2);
1694  }
1695
1696  /** Implementation of {@link #anyOf(CharSequence)} for exactly two characters. */
1697  private static final class IsEither extends FastMatcher {
1698
1699    private final char match1;
1700    private final char match2;
1701
1702    IsEither(char match1, char match2) {
1703      this.match1 = match1;
1704      this.match2 = match2;
1705    }
1706
1707    @Override
1708    public boolean matches(char c) {
1709      return c == match1 || c == match2;
1710    }
1711
1712    @GwtIncompatible // used only from other GwtIncompatible code
1713    @Override
1714    void setBits(BitSet table) {
1715      table.set(match1);
1716      table.set(match2);
1717    }
1718
1719    @Override
1720    public String toString() {
1721      return "CharMatcher.anyOf(\"" + showCharacter(match1) + showCharacter(match2) + "\")";
1722    }
1723  }
1724
1725  /** Implementation of {@link #anyOf(CharSequence)} for three or more characters. */
1726  private static final class AnyOf extends CharMatcher {
1727
1728    private final char[] chars;
1729
1730    public AnyOf(CharSequence chars) {
1731      this.chars = chars.toString().toCharArray();
1732      Arrays.sort(this.chars);
1733    }
1734
1735    @Override
1736    public boolean matches(char c) {
1737      return Arrays.binarySearch(chars, c) >= 0;
1738    }
1739
1740    @Override
1741    @GwtIncompatible // used only from other GwtIncompatible code
1742    void setBits(BitSet table) {
1743      for (char c : chars) {
1744        table.set(c);
1745      }
1746    }
1747
1748    @Override
1749    public String toString() {
1750      StringBuilder description = new StringBuilder("CharMatcher.anyOf(\"");
1751      for (char c : chars) {
1752        description.append(showCharacter(c));
1753      }
1754      description.append("\")");
1755      return description.toString();
1756    }
1757  }
1758
1759  /** Implementation of {@link #inRange(char, char)}. */
1760  private static final class InRange extends FastMatcher {
1761
1762    private final char startInclusive;
1763    private final char endInclusive;
1764
1765    InRange(char startInclusive, char endInclusive) {
1766      checkArgument(endInclusive >= startInclusive);
1767      this.startInclusive = startInclusive;
1768      this.endInclusive = endInclusive;
1769    }
1770
1771    @Override
1772    public boolean matches(char c) {
1773      return startInclusive <= c && c <= endInclusive;
1774    }
1775
1776    @GwtIncompatible // used only from other GwtIncompatible code
1777    @Override
1778    void setBits(BitSet table) {
1779      table.set(startInclusive, endInclusive + 1);
1780    }
1781
1782    @Override
1783    public String toString() {
1784      return "CharMatcher.inRange('"
1785          + showCharacter(startInclusive)
1786          + "', '"
1787          + showCharacter(endInclusive)
1788          + "')";
1789    }
1790  }
1791
1792  /** Implementation of {@link #forPredicate(Predicate)}. */
1793  private static final class ForPredicate extends CharMatcher {
1794
1795    private final Predicate<? super Character> predicate;
1796
1797    ForPredicate(Predicate<? super Character> predicate) {
1798      this.predicate = checkNotNull(predicate);
1799    }
1800
1801    @Override
1802    public boolean matches(char c) {
1803      return predicate.apply(c);
1804    }
1805
1806    @SuppressWarnings("deprecation") // intentional; deprecation is for callers primarily
1807    @Override
1808    public boolean apply(Character character) {
1809      return predicate.apply(checkNotNull(character));
1810    }
1811
1812    @Override
1813    public String toString() {
1814      return "CharMatcher.forPredicate(" + predicate + ")";
1815    }
1816  }
1817}