001/*
002 * Copyright (C) 2008 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.base;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkPositionIndex;
020
021import com.google.common.annotations.GwtCompatible;
022import com.google.common.annotations.GwtIncompatible;
023import com.google.common.annotations.J2ktIncompatible;
024import com.google.common.annotations.VisibleForTesting;
025import java.util.Arrays;
026import java.util.BitSet;
027
028/**
029 * Determines a true or false value for any Java {@code char} value, just as {@link Predicate} does
030 * for any {@link Object}. Also offers basic text processing methods based on this function.
031 * Implementations are strongly encouraged to be side-effect-free and immutable.
032 *
033 * <p>Throughout the documentation of this class, the phrase "matching character" is used to mean
034 * "any {@code char} value {@code c} for which {@code this.matches(c)} returns {@code true}".
035 *
036 * <p><b>Warning:</b> This class deals only with {@code char} values, that is, <a
037 * href="http://www.unicode.org/glossary/#BMP_character">BMP characters</a>. It does not understand
038 * <a href="http://www.unicode.org/glossary/#supplementary_code_point">supplementary Unicode code
039 * points</a> in the range {@code 0x10000} to {@code 0x10FFFF} which includes the majority of
040 * assigned characters, including important CJK characters and emoji.
041 *
042 * <p>Supplementary characters are <a
043 * href="https://docs.oracle.com/javase/8/docs/api/java/lang/Character.html#supplementary">encoded
044 * into a {@code String} using surrogate pairs</a>, and a {@code CharMatcher} treats these just as
045 * two separate characters. {@link #countIn} counts each supplementary character as 2 {@code char}s.
046 *
047 * <p>For up-to-date Unicode character properties (digit, letter, etc.) and support for
048 * supplementary code points, use ICU4J UCharacter and UnicodeSet (freeze() after building). For
049 * basic text processing based on UnicodeSet use the ICU4J UnicodeSetSpanner.
050 *
051 * <p>Example usages:
052 *
053 * <pre>
054 *   String trimmed = {@link #whitespace() whitespace()}.{@link #trimFrom trimFrom}(userInput);
055 *   if ({@link #ascii() ascii()}.{@link #matchesAllOf matchesAllOf}(s)) { ... }</pre>
056 *
057 * <p>See the Guava User Guide article on <a
058 * href="https://github.com/google/guava/wiki/StringsExplained#charmatcher">{@code CharMatcher}
059 * </a>.
060 *
061 * @author Kevin Bourrillion
062 * @since 1.0
063 */
064@GwtCompatible(emulated = true)
065@ElementTypesAreNonnullByDefault
066public abstract class CharMatcher implements Predicate<Character> {
067  /*
068   *           N777777777NO
069   *         N7777777777777N
070   *        M777777777777777N
071   *        $N877777777D77777M
072   *       N M77777777ONND777M
073   *       MN777777777NN  D777
074   *     N7ZN777777777NN ~M7778
075   *    N777777777777MMNN88777N
076   *    N777777777777MNZZZ7777O
077   *    DZN7777O77777777777777
078   *     N7OONND7777777D77777N
079   *      8$M++++?N???$77777$
080   *       M7++++N+M77777777N
081   *        N77O777777777777$                              M
082   *          DNNM$$$$777777N                              D
083   *         N$N:=N$777N7777M                             NZ
084   *        77Z::::N777777777                          ODZZZ
085   *       77N::::::N77777777M                         NNZZZ$
086   *     $777:::::::77777777MN                        ZM8ZZZZZ
087   *     777M::::::Z7777777Z77                        N++ZZZZNN
088   *    7777M:::::M7777777$777M                       $++IZZZZM
089   *   M777$:::::N777777$M7777M                       +++++ZZZDN
090   *     NN$::::::7777$$M777777N                      N+++ZZZZNZ
091   *       N::::::N:7$O:77777777                      N++++ZZZZN
092   *       M::::::::::::N77777777+                   +?+++++ZZZM
093   *       8::::::::::::D77777777M                    O+++++ZZ
094   *        ::::::::::::M777777777N                      O+?D
095   *        M:::::::::::M77777777778                     77=
096   *        D=::::::::::N7777777777N                    777
097   *       INN===::::::=77777777777N                  I777N
098   *      ?777N========N7777777777787M               N7777
099   *      77777$D======N77777777777N777N?         N777777
100   *     I77777$$$N7===M$$77777777$77777777$MMZ77777777N
101   *      $$$$$$$$$$$NIZN$$$$$$$$$M$$7777777777777777ON
102   *       M$$$$$$$$M    M$$$$$$$$N=N$$$$7777777$$$ND
103   *      O77Z$$$$$$$     M$$$$$$$$MNI==$DNNNNM=~N
104   *   7 :N MNN$$$$M$      $$$777$8      8D8I
105   *     NMM.:7O           777777778
106   *                       7777777MN
107   *                       M NO .7:
108   *                       M   :   M
109   *                            8
110   */
111
112  // Constant matcher factory methods
113
114  /**
115   * Matches any character.
116   *
117   * @since 19.0 (since 1.0 as constant {@code ANY})
118   */
119  public static CharMatcher any() {
120    return Any.INSTANCE;
121  }
122
123  /**
124   * Matches no characters.
125   *
126   * @since 19.0 (since 1.0 as constant {@code NONE})
127   */
128  public static CharMatcher none() {
129    return None.INSTANCE;
130  }
131
132  /**
133   * Determines whether a character is whitespace according to the latest Unicode standard, as
134   * illustrated <a
135   * href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
136   * This is not the same definition used by other Java APIs. (See a <a
137   * href="https://goo.gl/Y6SLWx">comparison of several definitions of "whitespace"</a>.)
138   *
139   * <p>All Unicode White_Space characters are on the BMP and thus supported by this API.
140   *
141   * <p><b>Note:</b> as the Unicode definition evolves, we will modify this matcher to keep it up to
142   * date.
143   *
144   * @since 19.0 (since 1.0 as constant {@code WHITESPACE})
145   */
146  public static CharMatcher whitespace() {
147    return Whitespace.INSTANCE;
148  }
149
150  /**
151   * Determines whether a character is a breaking whitespace (that is, a whitespace which can be
152   * interpreted as a break between words for formatting purposes). See {@link #whitespace()} for a
153   * discussion of that term.
154   *
155   * @since 19.0 (since 2.0 as constant {@code BREAKING_WHITESPACE})
156   */
157  public static CharMatcher breakingWhitespace() {
158    return BreakingWhitespace.INSTANCE;
159  }
160
161  /**
162   * Determines whether a character is ASCII, meaning that its code point is less than 128.
163   *
164   * @since 19.0 (since 1.0 as constant {@code ASCII})
165   */
166  public static CharMatcher ascii() {
167    return Ascii.INSTANCE;
168  }
169
170  /**
171   * Determines whether a character is a BMP digit according to <a
172   * href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>. If
173   * you only care to match ASCII digits, you can use {@code inRange('0', '9')}.
174   *
175   * @deprecated Many digits are supplementary characters; see the class documentation.
176   * @since 19.0 (since 1.0 as constant {@code DIGIT})
177   */
178  @Deprecated
179  public static CharMatcher digit() {
180    return Digit.INSTANCE;
181  }
182
183  /**
184   * Determines whether a character is a BMP digit according to {@linkplain Character#isDigit(char)
185   * Java's definition}. If you only care to match ASCII digits, you can use {@code inRange('0',
186   * '9')}.
187   *
188   * @deprecated Many digits are supplementary characters; see the class documentation.
189   * @since 19.0 (since 1.0 as constant {@code JAVA_DIGIT})
190   */
191  @Deprecated
192  public static CharMatcher javaDigit() {
193    return JavaDigit.INSTANCE;
194  }
195
196  /**
197   * Determines whether a character is a BMP letter according to {@linkplain
198   * Character#isLetter(char) Java's definition}. If you only care to match letters of the Latin
199   * alphabet, you can use {@code inRange('a', 'z').or(inRange('A', 'Z'))}.
200   *
201   * @deprecated Most letters are supplementary characters; see the class documentation.
202   * @since 19.0 (since 1.0 as constant {@code JAVA_LETTER})
203   */
204  @Deprecated
205  public static CharMatcher javaLetter() {
206    return JavaLetter.INSTANCE;
207  }
208
209  /**
210   * Determines whether a character is a BMP letter or digit according to {@linkplain
211   * Character#isLetterOrDigit(char) Java's definition}.
212   *
213   * @deprecated Most letters and digits are supplementary characters; see the class documentation.
214   * @since 19.0 (since 1.0 as constant {@code JAVA_LETTER_OR_DIGIT}).
215   */
216  @Deprecated
217  public static CharMatcher javaLetterOrDigit() {
218    return JavaLetterOrDigit.INSTANCE;
219  }
220
221  /**
222   * Determines whether a BMP character is upper case according to {@linkplain
223   * Character#isUpperCase(char) Java's definition}.
224   *
225   * @deprecated Some uppercase characters are supplementary characters; see the class
226   *     documentation.
227   * @since 19.0 (since 1.0 as constant {@code JAVA_UPPER_CASE})
228   */
229  @Deprecated
230  public static CharMatcher javaUpperCase() {
231    return JavaUpperCase.INSTANCE;
232  }
233
234  /**
235   * Determines whether a BMP character is lower case according to {@linkplain
236   * Character#isLowerCase(char) Java's definition}.
237   *
238   * @deprecated Some lowercase characters are supplementary characters; see the class
239   *     documentation.
240   * @since 19.0 (since 1.0 as constant {@code JAVA_LOWER_CASE})
241   */
242  @Deprecated
243  public static CharMatcher javaLowerCase() {
244    return JavaLowerCase.INSTANCE;
245  }
246
247  /**
248   * Determines whether a character is an ISO control character as specified by {@link
249   * Character#isISOControl(char)}.
250   *
251   * <p>All ISO control codes are on the BMP and thus supported by this API.
252   *
253   * @since 19.0 (since 1.0 as constant {@code JAVA_ISO_CONTROL})
254   */
255  public static CharMatcher javaIsoControl() {
256    return JavaIsoControl.INSTANCE;
257  }
258
259  /**
260   * Determines whether a character is invisible; that is, if its Unicode category is any of
261   * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and
262   * PRIVATE_USE according to ICU4J.
263   *
264   * <p>See also the Unicode Default_Ignorable_Code_Point property (available via ICU).
265   *
266   * @deprecated Most invisible characters are supplementary characters; see the class
267   *     documentation.
268   * @since 19.0 (since 1.0 as constant {@code INVISIBLE})
269   */
270  @Deprecated
271  public static CharMatcher invisible() {
272    return Invisible.INSTANCE;
273  }
274
275  /**
276   * Determines whether a character is single-width (not double-width). When in doubt, this matcher
277   * errs on the side of returning {@code false} (that is, it tends to assume a character is
278   * double-width).
279   *
280   * <p><b>Note:</b> as the reference file evolves, we will modify this matcher to keep it up to
281   * date.
282   *
283   * <p>See also <a href="http://www.unicode.org/reports/tr11/">UAX #11 East Asian Width</a>.
284   *
285   * @deprecated Many such characters are supplementary characters; see the class documentation.
286   * @since 19.0 (since 1.0 as constant {@code SINGLE_WIDTH})
287   */
288  @Deprecated
289  public static CharMatcher singleWidth() {
290    return SingleWidth.INSTANCE;
291  }
292
293  // Static factories
294
295  /** Returns a {@code char} matcher that matches only one specified BMP character. */
296  public static CharMatcher is(final char match) {
297    return new Is(match);
298  }
299
300  /**
301   * Returns a {@code char} matcher that matches any character except the BMP character specified.
302   *
303   * <p>To negate another {@code CharMatcher}, use {@link #negate()}.
304   */
305  public static CharMatcher isNot(final char match) {
306    return new IsNot(match);
307  }
308
309  /**
310   * Returns a {@code char} matcher that matches any BMP character present in the given character
311   * sequence. Returns a bogus matcher if the sequence contains supplementary characters.
312   */
313  public static CharMatcher anyOf(final CharSequence sequence) {
314    switch (sequence.length()) {
315      case 0:
316        return none();
317      case 1:
318        return is(sequence.charAt(0));
319      case 2:
320        return isEither(sequence.charAt(0), sequence.charAt(1));
321      default:
322        // TODO(lowasser): is it potentially worth just going ahead and building a precomputed
323        // matcher?
324        return new AnyOf(sequence);
325    }
326  }
327
328  /**
329   * Returns a {@code char} matcher that matches any BMP character not present in the given
330   * character sequence. Returns a bogus matcher if the sequence contains supplementary characters.
331   */
332  public static CharMatcher noneOf(CharSequence sequence) {
333    return anyOf(sequence).negate();
334  }
335
336  /**
337   * Returns a {@code char} matcher that matches any character in a given BMP range (both endpoints
338   * are inclusive). For example, to match any lowercase letter of the English alphabet, use {@code
339   * CharMatcher.inRange('a', 'z')}.
340   *
341   * @throws IllegalArgumentException if {@code endInclusive < startInclusive}
342   */
343  public static CharMatcher inRange(final char startInclusive, final char endInclusive) {
344    return new InRange(startInclusive, endInclusive);
345  }
346
347  /**
348   * Returns a matcher with identical behavior to the given {@link Character}-based predicate, but
349   * which operates on primitive {@code char} instances instead.
350   */
351  public static CharMatcher forPredicate(final Predicate<? super Character> predicate) {
352    return predicate instanceof CharMatcher ? (CharMatcher) predicate : new ForPredicate(predicate);
353  }
354
355  // Constructors
356
357  /**
358   * Constructor for use by subclasses. When subclassing, you may want to override {@code
359   * toString()} to provide a useful description.
360   */
361  protected CharMatcher() {}
362
363  // Abstract methods
364
365  /** Determines a true or false value for the given character. */
366  public abstract boolean matches(char c);
367
368  // Non-static factories
369
370  /** Returns a matcher that matches any character not matched by this matcher. */
371  // @Override under Java 8 but not under Java 7
372  @Override
373  public CharMatcher negate() {
374    return new Negated(this);
375  }
376
377  /**
378   * Returns a matcher that matches any character matched by both this matcher and {@code other}.
379   */
380  public CharMatcher and(CharMatcher other) {
381    return new And(this, other);
382  }
383
384  /**
385   * Returns a matcher that matches any character matched by either this matcher or {@code other}.
386   */
387  public CharMatcher or(CharMatcher other) {
388    return new Or(this, other);
389  }
390
391  /**
392   * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to
393   * query than the original; your mileage may vary. Precomputation takes time and is likely to be
394   * worthwhile only if the precomputed matcher is queried many thousands of times.
395   *
396   * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a
397   * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a
398   * worthwhile tradeoff in a browser.
399   */
400  public CharMatcher precomputed() {
401    return Platform.precomputeCharMatcher(this);
402  }
403
404  private static final int DISTINCT_CHARS = Character.MAX_VALUE - Character.MIN_VALUE + 1;
405
406  /**
407   * This is the actual implementation of {@link #precomputed}, but we bounce calls through a method
408   * on {@link Platform} so that we can have different behavior in GWT.
409   *
410   * <p>This implementation tries to be smart in a number of ways. It recognizes cases where the
411   * negation is cheaper to precompute than the matcher itself; it tries to build small hash tables
412   * for matchers that only match a few characters, and so on. In the worst-case scenario, it
413   * constructs an eight-kilobyte bit array and queries that. In many situations this produces a
414   * matcher which is faster to query than the original.
415   */
416  @J2ktIncompatible
417  @GwtIncompatible // SmallCharMatcher
418  CharMatcher precomputedInternal() {
419    final BitSet table = new BitSet();
420    setBits(table);
421    int totalCharacters = table.cardinality();
422    if (totalCharacters * 2 <= DISTINCT_CHARS) {
423      return precomputedPositive(totalCharacters, table, toString());
424    } else {
425      // TODO(lowasser): is it worth it to worry about the last character of large matchers?
426      table.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1);
427      int negatedCharacters = DISTINCT_CHARS - totalCharacters;
428      String suffix = ".negate()";
429      final String description = toString();
430      String negatedDescription =
431          description.endsWith(suffix)
432              ? description.substring(0, description.length() - suffix.length())
433              : description + suffix;
434      return new NegatedFastMatcher(
435          precomputedPositive(negatedCharacters, table, negatedDescription)) {
436        @Override
437        public String toString() {
438          return description;
439        }
440      };
441    }
442  }
443
444  /**
445   * Helper method for {@link #precomputedInternal} that doesn't test if the negation is cheaper.
446   */
447  @J2ktIncompatible
448  @GwtIncompatible // SmallCharMatcher
449  private static CharMatcher precomputedPositive(
450      int totalCharacters, BitSet table, String description) {
451    switch (totalCharacters) {
452      case 0:
453        return none();
454      case 1:
455        return is((char) table.nextSetBit(0));
456      case 2:
457        char c1 = (char) table.nextSetBit(0);
458        char c2 = (char) table.nextSetBit(c1 + 1);
459        return isEither(c1, c2);
460      default:
461        return isSmall(totalCharacters, table.length())
462            ? SmallCharMatcher.from(table, description)
463            : new BitSetMatcher(table, description);
464    }
465  }
466
467  @J2ktIncompatible
468  @GwtIncompatible // SmallCharMatcher
469  private static boolean isSmall(int totalCharacters, int tableLength) {
470    return totalCharacters <= SmallCharMatcher.MAX_SIZE
471        && tableLength > (totalCharacters * 4 * Character.SIZE);
472    // err on the side of BitSetMatcher
473  }
474
475  /** Sets bits in {@code table} matched by this matcher. */
476  @J2ktIncompatible
477  @GwtIncompatible // used only from other GwtIncompatible code
478  void setBits(BitSet table) {
479    for (int c = Character.MAX_VALUE; c >= Character.MIN_VALUE; c--) {
480      if (matches((char) c)) {
481        table.set(c);
482      }
483    }
484  }
485
486  // Text processing routines
487
488  /**
489   * Returns {@code true} if a character sequence contains at least one matching BMP character.
490   * Equivalent to {@code !matchesNoneOf(sequence)}.
491   *
492   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
493   * character, until this returns {@code true} or the end is reached.
494   *
495   * @param sequence the character sequence to examine, possibly empty
496   * @return {@code true} if this matcher matches at least one character in the sequence
497   * @since 8.0
498   */
499  public boolean matchesAnyOf(CharSequence sequence) {
500    return !matchesNoneOf(sequence);
501  }
502
503  /**
504   * Returns {@code true} if a character sequence contains only matching BMP characters.
505   *
506   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
507   * character, until this returns {@code false} or the end is reached.
508   *
509   * @param sequence the character sequence to examine, possibly empty
510   * @return {@code true} if this matcher matches every character in the sequence, including when
511   *     the sequence is empty
512   */
513  public boolean matchesAllOf(CharSequence sequence) {
514    for (int i = sequence.length() - 1; i >= 0; i--) {
515      if (!matches(sequence.charAt(i))) {
516        return false;
517      }
518    }
519    return true;
520  }
521
522  /**
523   * Returns {@code true} if a character sequence contains no matching BMP characters. Equivalent to
524   * {@code !matchesAnyOf(sequence)}.
525   *
526   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
527   * character, until this returns {@code true} or the end is reached.
528   *
529   * @param sequence the character sequence to examine, possibly empty
530   * @return {@code true} if this matcher matches no characters in the sequence, including when the
531   *     sequence is empty
532   */
533  public boolean matchesNoneOf(CharSequence sequence) {
534    return indexIn(sequence) == -1;
535  }
536
537  /**
538   * Returns the index of the first matching BMP character in a character sequence, or {@code -1} if
539   * no matching character is present.
540   *
541   * <p>The default implementation iterates over the sequence in forward order calling {@link
542   * #matches} for each character.
543   *
544   * @param sequence the character sequence to examine from the beginning
545   * @return an index, or {@code -1} if no character matches
546   */
547  public int indexIn(CharSequence sequence) {
548    return indexIn(sequence, 0);
549  }
550
551  /**
552   * Returns the index of the first matching BMP character in a character sequence, starting from a
553   * given position, or {@code -1} if no character matches after that position.
554   *
555   * <p>The default implementation iterates over the sequence in forward order, beginning at {@code
556   * start}, calling {@link #matches} for each character.
557   *
558   * @param sequence the character sequence to examine
559   * @param start the first index to examine; must be nonnegative and no greater than {@code
560   *     sequence.length()}
561   * @return the index of the first matching character, guaranteed to be no less than {@code start},
562   *     or {@code -1} if no character matches
563   * @throws IndexOutOfBoundsException if start is negative or greater than {@code
564   *     sequence.length()}
565   */
566  public int indexIn(CharSequence sequence, int start) {
567    int length = sequence.length();
568    checkPositionIndex(start, length);
569    for (int i = start; i < length; i++) {
570      if (matches(sequence.charAt(i))) {
571        return i;
572      }
573    }
574    return -1;
575  }
576
577  /**
578   * Returns the index of the last matching BMP character in a character sequence, or {@code -1} if
579   * no matching character is present.
580   *
581   * <p>The default implementation iterates over the sequence in reverse order calling {@link
582   * #matches} for each character.
583   *
584   * @param sequence the character sequence to examine from the end
585   * @return an index, or {@code -1} if no character matches
586   */
587  public int lastIndexIn(CharSequence sequence) {
588    for (int i = sequence.length() - 1; i >= 0; i--) {
589      if (matches(sequence.charAt(i))) {
590        return i;
591      }
592    }
593    return -1;
594  }
595
596  /**
597   * Returns the number of matching {@code char}s found in a character sequence.
598   *
599   * <p>Counts 2 per supplementary character, such as for {@link #whitespace}().{@link #negate}().
600   */
601  public int countIn(CharSequence sequence) {
602    int count = 0;
603    for (int i = 0; i < sequence.length(); i++) {
604      if (matches(sequence.charAt(i))) {
605        count++;
606      }
607    }
608    return count;
609  }
610
611  /**
612   * Returns a string containing all non-matching characters of a character sequence, in order. For
613   * example:
614   *
615   * <pre>{@code
616   * CharMatcher.is('a').removeFrom("bazaar")
617   * }</pre>
618   *
619   * ... returns {@code "bzr"}.
620   */
621  public String removeFrom(CharSequence sequence) {
622    String string = sequence.toString();
623    int pos = indexIn(string);
624    if (pos == -1) {
625      return string;
626    }
627
628    char[] chars = string.toCharArray();
629    int spread = 1;
630
631    // This unusual loop comes from extensive benchmarking
632    OUT:
633    while (true) {
634      pos++;
635      while (true) {
636        if (pos == chars.length) {
637          break OUT;
638        }
639        if (matches(chars[pos])) {
640          break;
641        }
642        chars[pos - spread] = chars[pos];
643        pos++;
644      }
645      spread++;
646    }
647    return new String(chars, 0, pos - spread);
648  }
649
650  /**
651   * Returns a string containing all matching BMP characters of a character sequence, in order. For
652   * example:
653   *
654   * <pre>{@code
655   * CharMatcher.is('a').retainFrom("bazaar")
656   * }</pre>
657   *
658   * ... returns {@code "aaa"}.
659   */
660  public String retainFrom(CharSequence sequence) {
661    return negate().removeFrom(sequence);
662  }
663
664  /**
665   * Returns a string copy of the input character sequence, with each matching BMP character
666   * replaced by a given replacement character. For example:
667   *
668   * <pre>{@code
669   * CharMatcher.is('a').replaceFrom("radar", 'o')
670   * }</pre>
671   *
672   * ... returns {@code "rodor"}.
673   *
674   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
675   * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
676   * character.
677   *
678   * @param sequence the character sequence to replace matching characters in
679   * @param replacement the character to append to the result string in place of each matching
680   *     character in {@code sequence}
681   * @return the new string
682   */
683  public String replaceFrom(CharSequence sequence, char replacement) {
684    String string = sequence.toString();
685    int pos = indexIn(string);
686    if (pos == -1) {
687      return string;
688    }
689    char[] chars = string.toCharArray();
690    chars[pos] = replacement;
691    for (int i = pos + 1; i < chars.length; i++) {
692      if (matches(chars[i])) {
693        chars[i] = replacement;
694      }
695    }
696    return new String(chars);
697  }
698
699  /**
700   * Returns a string copy of the input character sequence, with each matching BMP character
701   * replaced by a given replacement sequence. For example:
702   *
703   * <pre>{@code
704   * CharMatcher.is('a').replaceFrom("yaha", "oo")
705   * }</pre>
706   *
707   * ... returns {@code "yoohoo"}.
708   *
709   * <p><b>Note:</b> If the replacement is a fixed string with only one character, you are better
710   * off calling {@link #replaceFrom(CharSequence, char)} directly.
711   *
712   * @param sequence the character sequence to replace matching characters in
713   * @param replacement the characters to append to the result string in place of each matching
714   *     character in {@code sequence}
715   * @return the new string
716   */
717  public String replaceFrom(CharSequence sequence, CharSequence replacement) {
718    int replacementLen = replacement.length();
719    if (replacementLen == 0) {
720      return removeFrom(sequence);
721    }
722    if (replacementLen == 1) {
723      return replaceFrom(sequence, replacement.charAt(0));
724    }
725
726    String string = sequence.toString();
727    int pos = indexIn(string);
728    if (pos == -1) {
729      return string;
730    }
731
732    int len = string.length();
733    StringBuilder buf = new StringBuilder((len * 3 / 2) + 16);
734
735    int oldpos = 0;
736    do {
737      buf.append(string, oldpos, pos);
738      buf.append(replacement);
739      oldpos = pos + 1;
740      pos = indexIn(string, oldpos);
741    } while (pos != -1);
742
743    buf.append(string, oldpos, len);
744    return buf.toString();
745  }
746
747  /**
748   * Returns a substring of the input character sequence that omits all matching BMP characters from
749   * the beginning and from the end of the string. For example:
750   *
751   * <pre>{@code
752   * CharMatcher.anyOf("ab").trimFrom("abacatbab")
753   * }</pre>
754   *
755   * ... returns {@code "cat"}.
756   *
757   * <p>Note that:
758   *
759   * <pre>{@code
760   * CharMatcher.inRange('\0', ' ').trimFrom(str)
761   * }</pre>
762   *
763   * ... is equivalent to {@link String#trim()}.
764   */
765  public String trimFrom(CharSequence sequence) {
766    int len = sequence.length();
767    int first;
768    int last;
769
770    for (first = 0; first < len; first++) {
771      if (!matches(sequence.charAt(first))) {
772        break;
773      }
774    }
775    for (last = len - 1; last > first; last--) {
776      if (!matches(sequence.charAt(last))) {
777        break;
778      }
779    }
780
781    return sequence.subSequence(first, last + 1).toString();
782  }
783
784  /**
785   * Returns a substring of the input character sequence that omits all matching BMP characters from
786   * the beginning of the string. For example:
787   *
788   * <pre>{@code
789   * CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")
790   * }</pre>
791   *
792   * ... returns {@code "catbab"}.
793   */
794  public String trimLeadingFrom(CharSequence sequence) {
795    int len = sequence.length();
796    for (int first = 0; first < len; first++) {
797      if (!matches(sequence.charAt(first))) {
798        return sequence.subSequence(first, len).toString();
799      }
800    }
801    return "";
802  }
803
804  /**
805   * Returns a substring of the input character sequence that omits all matching BMP characters from
806   * the end of the string. For example:
807   *
808   * <pre>{@code
809   * CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")
810   * }</pre>
811   *
812   * ... returns {@code "abacat"}.
813   */
814  public String trimTrailingFrom(CharSequence sequence) {
815    int len = sequence.length();
816    for (int last = len - 1; last >= 0; last--) {
817      if (!matches(sequence.charAt(last))) {
818        return sequence.subSequence(0, last + 1).toString();
819      }
820    }
821    return "";
822  }
823
824  /**
825   * Returns a string copy of the input character sequence, with each group of consecutive matching
826   * BMP characters replaced by a single replacement character. For example:
827   *
828   * <pre>{@code
829   * CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')
830   * }</pre>
831   *
832   * ... returns {@code "b-p-r"}.
833   *
834   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
835   * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
836   * character.
837   *
838   * @param sequence the character sequence to replace matching groups of characters in
839   * @param replacement the character to append to the result string in place of each group of
840   *     matching characters in {@code sequence}
841   * @return the new string
842   */
843  public String collapseFrom(CharSequence sequence, char replacement) {
844    // This implementation avoids unnecessary allocation.
845    int len = sequence.length();
846    for (int i = 0; i < len; i++) {
847      char c = sequence.charAt(i);
848      if (matches(c)) {
849        if (c == replacement && (i == len - 1 || !matches(sequence.charAt(i + 1)))) {
850          // a no-op replacement
851          i++;
852        } else {
853          StringBuilder builder = new StringBuilder(len).append(sequence, 0, i).append(replacement);
854          return finishCollapseFrom(sequence, i + 1, len, replacement, builder, true);
855        }
856      }
857    }
858    // no replacement needed
859    return sequence.toString();
860  }
861
862  /**
863   * Collapses groups of matching characters exactly as {@link #collapseFrom} does, except that
864   * groups of matching BMP characters at the start or end of the sequence are removed without
865   * replacement.
866   */
867  public String trimAndCollapseFrom(CharSequence sequence, char replacement) {
868    // This implementation avoids unnecessary allocation.
869    int len = sequence.length();
870    int first = 0;
871    int last = len - 1;
872
873    while (first < len && matches(sequence.charAt(first))) {
874      first++;
875    }
876
877    while (last > first && matches(sequence.charAt(last))) {
878      last--;
879    }
880
881    return (first == 0 && last == len - 1)
882        ? collapseFrom(sequence, replacement)
883        : finishCollapseFrom(
884            sequence, first, last + 1, replacement, new StringBuilder(last + 1 - first), false);
885  }
886
887  private String finishCollapseFrom(
888      CharSequence sequence,
889      int start,
890      int end,
891      char replacement,
892      StringBuilder builder,
893      boolean inMatchingGroup) {
894    for (int i = start; i < end; i++) {
895      char c = sequence.charAt(i);
896      if (matches(c)) {
897        if (!inMatchingGroup) {
898          builder.append(replacement);
899          inMatchingGroup = true;
900        }
901      } else {
902        builder.append(c);
903        inMatchingGroup = false;
904      }
905    }
906    return builder.toString();
907  }
908
909  /**
910   * @deprecated Provided only to satisfy the {@link Predicate} interface; use {@link #matches}
911   *     instead.
912   */
913  @Deprecated
914  @Override
915  public boolean apply(Character character) {
916    return matches(character);
917  }
918
919  /**
920   * Returns a string representation of this {@code CharMatcher}, such as {@code
921   * CharMatcher.or(WHITESPACE, JAVA_DIGIT)}.
922   */
923  @Override
924  public String toString() {
925    return super.toString();
926  }
927
928  /**
929   * Returns the Java Unicode escape sequence for the given {@code char}, in the form "\u12AB" where
930   * "12AB" is the four hexadecimal digits representing the 16-bit code unit.
931   */
932  private static String showCharacter(char c) {
933    String hex = "0123456789ABCDEF";
934    char[] tmp = {'\\', 'u', '\0', '\0', '\0', '\0'};
935    for (int i = 0; i < 4; i++) {
936      tmp[5 - i] = hex.charAt(c & 0xF);
937      c = (char) (c >> 4);
938    }
939    return String.copyValueOf(tmp);
940  }
941
942  // Fast matchers
943
944  /** A matcher for which precomputation will not yield any significant benefit. */
945  abstract static class FastMatcher extends CharMatcher {
946
947    @Override
948    public final CharMatcher precomputed() {
949      return this;
950    }
951
952    @Override
953    public CharMatcher negate() {
954      return new NegatedFastMatcher(this);
955    }
956  }
957
958  /** {@link FastMatcher} which overrides {@code toString()} with a custom name. */
959  abstract static class NamedFastMatcher extends FastMatcher {
960
961    private final String description;
962
963    NamedFastMatcher(String description) {
964      this.description = checkNotNull(description);
965    }
966
967    @Override
968    public final String toString() {
969      return description;
970    }
971  }
972
973  /** Negation of a {@link FastMatcher}. */
974  private static class NegatedFastMatcher extends Negated {
975
976    NegatedFastMatcher(CharMatcher original) {
977      super(original);
978    }
979
980    @Override
981    public final CharMatcher precomputed() {
982      return this;
983    }
984  }
985
986  /** Fast matcher using a {@link BitSet} table of matching characters. */
987  @J2ktIncompatible
988  @GwtIncompatible // used only from other GwtIncompatible code
989  private static final class BitSetMatcher extends NamedFastMatcher {
990
991    private final BitSet table;
992
993    private BitSetMatcher(BitSet table, String description) {
994      super(description);
995      if (table.length() + Long.SIZE < table.size()) {
996        table = (BitSet) table.clone();
997        // If only we could actually call BitSet.trimToSize() ourselves...
998      }
999      this.table = table;
1000    }
1001
1002    @Override
1003    public boolean matches(char c) {
1004      return table.get(c);
1005    }
1006
1007    @Override
1008    void setBits(BitSet bitSet) {
1009      bitSet.or(table);
1010    }
1011  }
1012
1013  // Static constant implementation classes
1014
1015  /** Implementation of {@link #any()}. */
1016  private static final class Any extends NamedFastMatcher {
1017
1018    static final CharMatcher INSTANCE = new Any();
1019
1020    private Any() {
1021      super("CharMatcher.any()");
1022    }
1023
1024    @Override
1025    public boolean matches(char c) {
1026      return true;
1027    }
1028
1029    @Override
1030    public int indexIn(CharSequence sequence) {
1031      return (sequence.length() == 0) ? -1 : 0;
1032    }
1033
1034    @Override
1035    public int indexIn(CharSequence sequence, int start) {
1036      int length = sequence.length();
1037      checkPositionIndex(start, length);
1038      return (start == length) ? -1 : start;
1039    }
1040
1041    @Override
1042    public int lastIndexIn(CharSequence sequence) {
1043      return sequence.length() - 1;
1044    }
1045
1046    @Override
1047    public boolean matchesAllOf(CharSequence sequence) {
1048      checkNotNull(sequence);
1049      return true;
1050    }
1051
1052    @Override
1053    public boolean matchesNoneOf(CharSequence sequence) {
1054      return sequence.length() == 0;
1055    }
1056
1057    @Override
1058    public String removeFrom(CharSequence sequence) {
1059      checkNotNull(sequence);
1060      return "";
1061    }
1062
1063    @Override
1064    public String replaceFrom(CharSequence sequence, char replacement) {
1065      char[] array = new char[sequence.length()];
1066      Arrays.fill(array, replacement);
1067      return new String(array);
1068    }
1069
1070    @Override
1071    public String replaceFrom(CharSequence sequence, CharSequence replacement) {
1072      StringBuilder result = new StringBuilder(sequence.length() * replacement.length());
1073      for (int i = 0; i < sequence.length(); i++) {
1074        result.append(replacement);
1075      }
1076      return result.toString();
1077    }
1078
1079    @Override
1080    public String collapseFrom(CharSequence sequence, char replacement) {
1081      return (sequence.length() == 0) ? "" : String.valueOf(replacement);
1082    }
1083
1084    @Override
1085    public String trimFrom(CharSequence sequence) {
1086      checkNotNull(sequence);
1087      return "";
1088    }
1089
1090    @Override
1091    public int countIn(CharSequence sequence) {
1092      return sequence.length();
1093    }
1094
1095    @Override
1096    public CharMatcher and(CharMatcher other) {
1097      return checkNotNull(other);
1098    }
1099
1100    @Override
1101    public CharMatcher or(CharMatcher other) {
1102      checkNotNull(other);
1103      return this;
1104    }
1105
1106    @Override
1107    public CharMatcher negate() {
1108      return none();
1109    }
1110  }
1111
1112  /** Implementation of {@link #none()}. */
1113  private static final class None extends NamedFastMatcher {
1114
1115    static final CharMatcher INSTANCE = new None();
1116
1117    private None() {
1118      super("CharMatcher.none()");
1119    }
1120
1121    @Override
1122    public boolean matches(char c) {
1123      return false;
1124    }
1125
1126    @Override
1127    public int indexIn(CharSequence sequence) {
1128      checkNotNull(sequence);
1129      return -1;
1130    }
1131
1132    @Override
1133    public int indexIn(CharSequence sequence, int start) {
1134      int length = sequence.length();
1135      checkPositionIndex(start, length);
1136      return -1;
1137    }
1138
1139    @Override
1140    public int lastIndexIn(CharSequence sequence) {
1141      checkNotNull(sequence);
1142      return -1;
1143    }
1144
1145    @Override
1146    public boolean matchesAllOf(CharSequence sequence) {
1147      return sequence.length() == 0;
1148    }
1149
1150    @Override
1151    public boolean matchesNoneOf(CharSequence sequence) {
1152      checkNotNull(sequence);
1153      return true;
1154    }
1155
1156    @Override
1157    public String removeFrom(CharSequence sequence) {
1158      return sequence.toString();
1159    }
1160
1161    @Override
1162    public String replaceFrom(CharSequence sequence, char replacement) {
1163      return sequence.toString();
1164    }
1165
1166    @Override
1167    public String replaceFrom(CharSequence sequence, CharSequence replacement) {
1168      checkNotNull(replacement);
1169      return sequence.toString();
1170    }
1171
1172    @Override
1173    public String collapseFrom(CharSequence sequence, char replacement) {
1174      return sequence.toString();
1175    }
1176
1177    @Override
1178    public String trimFrom(CharSequence sequence) {
1179      return sequence.toString();
1180    }
1181
1182    @Override
1183    public String trimLeadingFrom(CharSequence sequence) {
1184      return sequence.toString();
1185    }
1186
1187    @Override
1188    public String trimTrailingFrom(CharSequence sequence) {
1189      return sequence.toString();
1190    }
1191
1192    @Override
1193    public int countIn(CharSequence sequence) {
1194      checkNotNull(sequence);
1195      return 0;
1196    }
1197
1198    @Override
1199    public CharMatcher and(CharMatcher other) {
1200      checkNotNull(other);
1201      return this;
1202    }
1203
1204    @Override
1205    public CharMatcher or(CharMatcher other) {
1206      return checkNotNull(other);
1207    }
1208
1209    @Override
1210    public CharMatcher negate() {
1211      return any();
1212    }
1213  }
1214
1215  /** Implementation of {@link #whitespace()}. */
1216  @VisibleForTesting
1217  static final class Whitespace extends NamedFastMatcher {
1218
1219    // TABLE is a precomputed hashset of whitespace characters. MULTIPLIER serves as a hash function
1220    // whose key property is that it maps 25 characters into the 32-slot table without collision.
1221    // Basically this is an opportunistic fast implementation as opposed to "good code". For most
1222    // other use-cases, the reduction in readability isn't worth it.
1223    static final String TABLE =
1224        "\u2002\u3000\r\u0085\u200A\u2005\u2000\u3000"
1225            + "\u2029\u000B\u3000\u2008\u2003\u205F\u3000\u1680"
1226            + "\u0009\u0020\u2006\u2001\u202F\u00A0\u000C\u2009"
1227            + "\u3000\u2004\u3000\u3000\u2028\n\u2007\u3000";
1228    static final int MULTIPLIER = 1682554634;
1229    static final int SHIFT = Integer.numberOfLeadingZeros(TABLE.length() - 1);
1230
1231    static final CharMatcher INSTANCE = new Whitespace();
1232
1233    Whitespace() {
1234      super("CharMatcher.whitespace()");
1235    }
1236
1237    @Override
1238    public boolean matches(char c) {
1239      return TABLE.charAt((MULTIPLIER * c) >>> SHIFT) == c;
1240    }
1241
1242    @J2ktIncompatible
1243    @GwtIncompatible // used only from other GwtIncompatible code
1244    @Override
1245    void setBits(BitSet table) {
1246      for (int i = 0; i < TABLE.length(); i++) {
1247        table.set(TABLE.charAt(i));
1248      }
1249    }
1250  }
1251
1252  /** Implementation of {@link #breakingWhitespace()}. */
1253  private static final class BreakingWhitespace extends CharMatcher {
1254
1255    static final CharMatcher INSTANCE = new BreakingWhitespace();
1256
1257    @Override
1258    public boolean matches(char c) {
1259      switch (c) {
1260        case '\t':
1261        case '\n':
1262        case '\013':
1263        case '\f':
1264        case '\r':
1265        case ' ':
1266        case '\u0085':
1267        case '\u1680':
1268        case '\u2028':
1269        case '\u2029':
1270        case '\u205f':
1271        case '\u3000':
1272          return true;
1273        case '\u2007':
1274          return false;
1275        default:
1276          return c >= '\u2000' && c <= '\u200a';
1277      }
1278    }
1279
1280    @Override
1281    public String toString() {
1282      return "CharMatcher.breakingWhitespace()";
1283    }
1284  }
1285
1286  /** Implementation of {@link #ascii()}. */
1287  private static final class Ascii extends NamedFastMatcher {
1288
1289    static final CharMatcher INSTANCE = new Ascii();
1290
1291    Ascii() {
1292      super("CharMatcher.ascii()");
1293    }
1294
1295    @Override
1296    public boolean matches(char c) {
1297      return c <= '\u007f';
1298    }
1299  }
1300
1301  /** Implementation that matches characters that fall within multiple ranges. */
1302  private static class RangesMatcher extends CharMatcher {
1303
1304    private final String description;
1305    private final char[] rangeStarts;
1306    private final char[] rangeEnds;
1307
1308    RangesMatcher(String description, char[] rangeStarts, char[] rangeEnds) {
1309      this.description = description;
1310      this.rangeStarts = rangeStarts;
1311      this.rangeEnds = rangeEnds;
1312      checkArgument(rangeStarts.length == rangeEnds.length);
1313      for (int i = 0; i < rangeStarts.length; i++) {
1314        checkArgument(rangeStarts[i] <= rangeEnds[i]);
1315        if (i + 1 < rangeStarts.length) {
1316          checkArgument(rangeEnds[i] < rangeStarts[i + 1]);
1317        }
1318      }
1319    }
1320
1321    @Override
1322    public boolean matches(char c) {
1323      int index = Arrays.binarySearch(rangeStarts, c);
1324      if (index >= 0) {
1325        return true;
1326      } else {
1327        index = ~index - 1;
1328        return index >= 0 && c <= rangeEnds[index];
1329      }
1330    }
1331
1332    @Override
1333    public String toString() {
1334      return description;
1335    }
1336  }
1337
1338  /** Implementation of {@link #digit()}. */
1339  private static final class Digit extends RangesMatcher {
1340    // Plug the following UnicodeSet pattern into
1341    // https://unicode.org/cldr/utility/list-unicodeset.jsp
1342    // [[:Nd:]&[:nv=0:]&[\u0000-\uFFFF]]
1343    // and get the zeroes from there.
1344
1345    // Must be in ascending order.
1346    private static final String ZEROES =
1347        "0\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6\u0c66\u0ce6\u0d66\u0de6"
1348            + "\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946\u19d0\u1a80\u1a90\u1b50\u1bb0"
1349            + "\u1c40\u1c50\ua620\ua8d0\ua900\ua9d0\ua9f0\uaa50\uabf0\uff10";
1350
1351    private static char[] zeroes() {
1352      return ZEROES.toCharArray();
1353    }
1354
1355    private static char[] nines() {
1356      char[] nines = new char[ZEROES.length()];
1357      for (int i = 0; i < ZEROES.length(); i++) {
1358        nines[i] = (char) (ZEROES.charAt(i) + 9);
1359      }
1360      return nines;
1361    }
1362
1363    static final CharMatcher INSTANCE = new Digit();
1364
1365    private Digit() {
1366      super("CharMatcher.digit()", zeroes(), nines());
1367    }
1368  }
1369
1370  /** Implementation of {@link #javaDigit()}. */
1371  private static final class JavaDigit extends CharMatcher {
1372
1373    static final CharMatcher INSTANCE = new JavaDigit();
1374
1375    @Override
1376    public boolean matches(char c) {
1377      return Character.isDigit(c);
1378    }
1379
1380    @Override
1381    public String toString() {
1382      return "CharMatcher.javaDigit()";
1383    }
1384  }
1385
1386  /** Implementation of {@link #javaLetter()}. */
1387  private static final class JavaLetter extends CharMatcher {
1388
1389    static final CharMatcher INSTANCE = new JavaLetter();
1390
1391    @Override
1392    public boolean matches(char c) {
1393      return Character.isLetter(c);
1394    }
1395
1396    @Override
1397    public String toString() {
1398      return "CharMatcher.javaLetter()";
1399    }
1400  }
1401
1402  /** Implementation of {@link #javaLetterOrDigit()}. */
1403  private static final class JavaLetterOrDigit extends CharMatcher {
1404
1405    static final CharMatcher INSTANCE = new JavaLetterOrDigit();
1406
1407    @Override
1408    public boolean matches(char c) {
1409      return Character.isLetterOrDigit(c);
1410    }
1411
1412    @Override
1413    public String toString() {
1414      return "CharMatcher.javaLetterOrDigit()";
1415    }
1416  }
1417
1418  /** Implementation of {@link #javaUpperCase()}. */
1419  private static final class JavaUpperCase extends CharMatcher {
1420
1421    static final CharMatcher INSTANCE = new JavaUpperCase();
1422
1423    @Override
1424    public boolean matches(char c) {
1425      return Character.isUpperCase(c);
1426    }
1427
1428    @Override
1429    public String toString() {
1430      return "CharMatcher.javaUpperCase()";
1431    }
1432  }
1433
1434  /** Implementation of {@link #javaLowerCase()}. */
1435  private static final class JavaLowerCase extends CharMatcher {
1436
1437    static final CharMatcher INSTANCE = new JavaLowerCase();
1438
1439    @Override
1440    public boolean matches(char c) {
1441      return Character.isLowerCase(c);
1442    }
1443
1444    @Override
1445    public String toString() {
1446      return "CharMatcher.javaLowerCase()";
1447    }
1448  }
1449
1450  /** Implementation of {@link #javaIsoControl()}. */
1451  private static final class JavaIsoControl extends NamedFastMatcher {
1452
1453    static final CharMatcher INSTANCE = new JavaIsoControl();
1454
1455    private JavaIsoControl() {
1456      super("CharMatcher.javaIsoControl()");
1457    }
1458
1459    @Override
1460    public boolean matches(char c) {
1461      return c <= '\u001f' || (c >= '\u007f' && c <= '\u009f');
1462    }
1463  }
1464
1465  /** Implementation of {@link #invisible()}. */
1466  private static final class Invisible extends RangesMatcher {
1467    // Plug the following UnicodeSet pattern into
1468    // https://unicode.org/cldr/utility/list-unicodeset.jsp
1469    // [[[:Zs:][:Zl:][:Zp:][:Cc:][:Cf:][:Cs:][:Co:]]&[\u0000-\uFFFF]]
1470    // with the "Abbreviate" option, and get the ranges from there.
1471    private static final String RANGE_STARTS =
1472        "\u0000\u007f\u00ad\u0600\u061c\u06dd\u070f\u0890\u08e2\u1680\u180e\u2000\u2028\u205f\u2066"
1473            + "\u3000\ud800\ufeff\ufff9";
1474    private static final String RANGE_ENDS = // inclusive ends
1475        "\u0020\u00a0\u00ad\u0605\u061c\u06dd\u070f\u0891\u08e2\u1680\u180e\u200f\u202f\u2064\u206f"
1476            + "\u3000\uf8ff\ufeff\ufffb";
1477
1478    static final CharMatcher INSTANCE = new Invisible();
1479
1480    private Invisible() {
1481      super("CharMatcher.invisible()", RANGE_STARTS.toCharArray(), RANGE_ENDS.toCharArray());
1482    }
1483  }
1484
1485  /** Implementation of {@link #singleWidth()}. */
1486  private static final class SingleWidth extends RangesMatcher {
1487
1488    static final CharMatcher INSTANCE = new SingleWidth();
1489
1490    private SingleWidth() {
1491      super(
1492          "CharMatcher.singleWidth()",
1493          "\u0000\u05be\u05d0\u05f3\u0600\u0750\u0e00\u1e00\u2100\ufb50\ufe70\uff61".toCharArray(),
1494          "\u04f9\u05be\u05ea\u05f4\u06ff\u077f\u0e7f\u20af\u213a\ufdff\ufeff\uffdc".toCharArray());
1495    }
1496  }
1497
1498  // Non-static factory implementation classes
1499
1500  /** Implementation of {@link #negate()}. */
1501  private static class Negated extends CharMatcher {
1502
1503    final CharMatcher original;
1504
1505    Negated(CharMatcher original) {
1506      this.original = checkNotNull(original);
1507    }
1508
1509    @Override
1510    public boolean matches(char c) {
1511      return !original.matches(c);
1512    }
1513
1514    @Override
1515    public boolean matchesAllOf(CharSequence sequence) {
1516      return original.matchesNoneOf(sequence);
1517    }
1518
1519    @Override
1520    public boolean matchesNoneOf(CharSequence sequence) {
1521      return original.matchesAllOf(sequence);
1522    }
1523
1524    @Override
1525    public int countIn(CharSequence sequence) {
1526      return sequence.length() - original.countIn(sequence);
1527    }
1528
1529    @J2ktIncompatible
1530    @GwtIncompatible // used only from other GwtIncompatible code
1531    @Override
1532    void setBits(BitSet table) {
1533      BitSet tmp = new BitSet();
1534      original.setBits(tmp);
1535      tmp.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1);
1536      table.or(tmp);
1537    }
1538
1539    @Override
1540    public CharMatcher negate() {
1541      return original;
1542    }
1543
1544    @Override
1545    public String toString() {
1546      return original + ".negate()";
1547    }
1548  }
1549
1550  /** Implementation of {@link #and(CharMatcher)}. */
1551  private static final class And extends CharMatcher {
1552
1553    final CharMatcher first;
1554    final CharMatcher second;
1555
1556    And(CharMatcher a, CharMatcher b) {
1557      first = checkNotNull(a);
1558      second = checkNotNull(b);
1559    }
1560
1561    @Override
1562    public boolean matches(char c) {
1563      return first.matches(c) && second.matches(c);
1564    }
1565
1566    @J2ktIncompatible
1567    @GwtIncompatible // used only from other GwtIncompatible code
1568    @Override
1569    void setBits(BitSet table) {
1570      BitSet tmp1 = new BitSet();
1571      first.setBits(tmp1);
1572      BitSet tmp2 = new BitSet();
1573      second.setBits(tmp2);
1574      tmp1.and(tmp2);
1575      table.or(tmp1);
1576    }
1577
1578    @Override
1579    public String toString() {
1580      return "CharMatcher.and(" + first + ", " + second + ")";
1581    }
1582  }
1583
1584  /** Implementation of {@link #or(CharMatcher)}. */
1585  private static final class Or extends CharMatcher {
1586
1587    final CharMatcher first;
1588    final CharMatcher second;
1589
1590    Or(CharMatcher a, CharMatcher b) {
1591      first = checkNotNull(a);
1592      second = checkNotNull(b);
1593    }
1594
1595    @J2ktIncompatible
1596    @GwtIncompatible // used only from other GwtIncompatible code
1597    @Override
1598    void setBits(BitSet table) {
1599      first.setBits(table);
1600      second.setBits(table);
1601    }
1602
1603    @Override
1604    public boolean matches(char c) {
1605      return first.matches(c) || second.matches(c);
1606    }
1607
1608    @Override
1609    public String toString() {
1610      return "CharMatcher.or(" + first + ", " + second + ")";
1611    }
1612  }
1613
1614  // Static factory implementations
1615
1616  /** Implementation of {@link #is(char)}. */
1617  private static final class Is extends FastMatcher {
1618
1619    private final char match;
1620
1621    Is(char match) {
1622      this.match = match;
1623    }
1624
1625    @Override
1626    public boolean matches(char c) {
1627      return c == match;
1628    }
1629
1630    @Override
1631    public String replaceFrom(CharSequence sequence, char replacement) {
1632      return sequence.toString().replace(match, replacement);
1633    }
1634
1635    @Override
1636    public CharMatcher and(CharMatcher other) {
1637      return other.matches(match) ? this : none();
1638    }
1639
1640    @Override
1641    public CharMatcher or(CharMatcher other) {
1642      return other.matches(match) ? other : super.or(other);
1643    }
1644
1645    @Override
1646    public CharMatcher negate() {
1647      return isNot(match);
1648    }
1649
1650    @J2ktIncompatible
1651    @GwtIncompatible // used only from other GwtIncompatible code
1652    @Override
1653    void setBits(BitSet table) {
1654      table.set(match);
1655    }
1656
1657    @Override
1658    public String toString() {
1659      return "CharMatcher.is('" + showCharacter(match) + "')";
1660    }
1661  }
1662
1663  /** Implementation of {@link #isNot(char)}. */
1664  private static final class IsNot extends FastMatcher {
1665
1666    private final char match;
1667
1668    IsNot(char match) {
1669      this.match = match;
1670    }
1671
1672    @Override
1673    public boolean matches(char c) {
1674      return c != match;
1675    }
1676
1677    @Override
1678    public CharMatcher and(CharMatcher other) {
1679      return other.matches(match) ? super.and(other) : other;
1680    }
1681
1682    @Override
1683    public CharMatcher or(CharMatcher other) {
1684      return other.matches(match) ? any() : this;
1685    }
1686
1687    @J2ktIncompatible
1688    @GwtIncompatible // used only from other GwtIncompatible code
1689    @Override
1690    void setBits(BitSet table) {
1691      table.set(0, match);
1692      table.set(match + 1, Character.MAX_VALUE + 1);
1693    }
1694
1695    @Override
1696    public CharMatcher negate() {
1697      return is(match);
1698    }
1699
1700    @Override
1701    public String toString() {
1702      return "CharMatcher.isNot('" + showCharacter(match) + "')";
1703    }
1704  }
1705
1706  private static CharMatcher.IsEither isEither(char c1, char c2) {
1707    return new CharMatcher.IsEither(c1, c2);
1708  }
1709
1710  /** Implementation of {@link #anyOf(CharSequence)} for exactly two characters. */
1711  private static final class IsEither extends FastMatcher {
1712
1713    private final char match1;
1714    private final char match2;
1715
1716    IsEither(char match1, char match2) {
1717      this.match1 = match1;
1718      this.match2 = match2;
1719    }
1720
1721    @Override
1722    public boolean matches(char c) {
1723      return c == match1 || c == match2;
1724    }
1725
1726    @J2ktIncompatible
1727    @GwtIncompatible // used only from other GwtIncompatible code
1728    @Override
1729    void setBits(BitSet table) {
1730      table.set(match1);
1731      table.set(match2);
1732    }
1733
1734    @Override
1735    public String toString() {
1736      return "CharMatcher.anyOf(\"" + showCharacter(match1) + showCharacter(match2) + "\")";
1737    }
1738  }
1739
1740  /** Implementation of {@link #anyOf(CharSequence)} for three or more characters. */
1741  private static final class AnyOf extends CharMatcher {
1742
1743    private final char[] chars;
1744
1745    public AnyOf(CharSequence chars) {
1746      this.chars = chars.toString().toCharArray();
1747      Arrays.sort(this.chars);
1748    }
1749
1750    @Override
1751    public boolean matches(char c) {
1752      return Arrays.binarySearch(chars, c) >= 0;
1753    }
1754
1755    @Override
1756    @J2ktIncompatible
1757    @GwtIncompatible // used only from other GwtIncompatible code
1758    void setBits(BitSet table) {
1759      for (char c : chars) {
1760        table.set(c);
1761      }
1762    }
1763
1764    @Override
1765    public String toString() {
1766      StringBuilder description = new StringBuilder("CharMatcher.anyOf(\"");
1767      for (char c : chars) {
1768        description.append(showCharacter(c));
1769      }
1770      description.append("\")");
1771      return description.toString();
1772    }
1773  }
1774
1775  /** Implementation of {@link #inRange(char, char)}. */
1776  private static final class InRange extends FastMatcher {
1777
1778    private final char startInclusive;
1779    private final char endInclusive;
1780
1781    InRange(char startInclusive, char endInclusive) {
1782      checkArgument(endInclusive >= startInclusive);
1783      this.startInclusive = startInclusive;
1784      this.endInclusive = endInclusive;
1785    }
1786
1787    @Override
1788    public boolean matches(char c) {
1789      return startInclusive <= c && c <= endInclusive;
1790    }
1791
1792    @J2ktIncompatible
1793    @GwtIncompatible // used only from other GwtIncompatible code
1794    @Override
1795    void setBits(BitSet table) {
1796      table.set(startInclusive, endInclusive + 1);
1797    }
1798
1799    @Override
1800    public String toString() {
1801      return "CharMatcher.inRange('"
1802          + showCharacter(startInclusive)
1803          + "', '"
1804          + showCharacter(endInclusive)
1805          + "')";
1806    }
1807  }
1808
1809  /** Implementation of {@link #forPredicate(Predicate)}. */
1810  private static final class ForPredicate extends CharMatcher {
1811
1812    private final Predicate<? super Character> predicate;
1813
1814    ForPredicate(Predicate<? super Character> predicate) {
1815      this.predicate = checkNotNull(predicate);
1816    }
1817
1818    @Override
1819    public boolean matches(char c) {
1820      return predicate.apply(c);
1821    }
1822
1823    @SuppressWarnings("deprecation") // intentional; deprecation is for callers primarily
1824    @Override
1825    public boolean apply(Character character) {
1826      return predicate.apply(checkNotNull(character));
1827    }
1828
1829    @Override
1830    public String toString() {
1831      return "CharMatcher.forPredicate(" + predicate + ")";
1832    }
1833  }
1834}