001/*
002 * Copyright (C) 2008 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.base;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkPositionIndex;
020
021import com.google.common.annotations.GwtCompatible;
022import com.google.common.annotations.GwtIncompatible;
023import com.google.common.annotations.J2ktIncompatible;
024import com.google.common.annotations.VisibleForTesting;
025import java.util.Arrays;
026import java.util.BitSet;
027
028/**
029 * Determines a true or false value for any Java {@code char} value, just as {@link Predicate} does
030 * for any {@link Object}. Also offers basic text processing methods based on this function.
031 * Implementations are strongly encouraged to be side-effect-free and immutable.
032 *
033 * <p>Throughout the documentation of this class, the phrase "matching character" is used to mean
034 * "any {@code char} value {@code c} for which {@code this.matches(c)} returns {@code true}".
035 *
036 * <p><b>Warning:</b> This class deals only with {@code char} values, that is, <a
037 * href="http://www.unicode.org/glossary/#BMP_character">BMP characters</a>. It does not understand
038 * <a href="http://www.unicode.org/glossary/#supplementary_code_point">supplementary Unicode code
039 * points</a> in the range {@code 0x10000} to {@code 0x10FFFF} which includes the majority of
040 * assigned characters, including important CJK characters and emoji.
041 *
042 * <p>Supplementary characters are <a
043 * href="https://docs.oracle.com/javase/8/docs/api/java/lang/Character.html#supplementary">encoded
044 * into a {@code String} using surrogate pairs</a>, and a {@code CharMatcher} treats these just as
045 * two separate characters. {@link #countIn} counts each supplementary character as 2 {@code char}s.
046 *
047 * <p>For up-to-date Unicode character properties (digit, letter, etc.) and support for
048 * supplementary code points, use ICU4J UCharacter and UnicodeSet (freeze() after building). For
049 * basic text processing based on UnicodeSet use the ICU4J UnicodeSetSpanner.
050 *
051 * <p>Example usages:
052 *
053 * <pre>
054 *   String trimmed = {@link #whitespace() whitespace()}.{@link #trimFrom trimFrom}(userInput);
055 *   if ({@link #ascii() ascii()}.{@link #matchesAllOf matchesAllOf}(s)) { ... }</pre>
056 *
057 * <p>See the Guava User Guide article on <a
058 * href="https://github.com/google/guava/wiki/StringsExplained#charmatcher">{@code CharMatcher}
059 * </a>.
060 *
061 * @author Kevin Bourrillion
062 * @since 1.0
063 */
064@GwtCompatible(emulated = true)
065@ElementTypesAreNonnullByDefault
066public abstract class CharMatcher implements Predicate<Character> {
067  /*
068   *           N777777777NO
069   *         N7777777777777N
070   *        M777777777777777N
071   *        $N877777777D77777M
072   *       N M77777777ONND777M
073   *       MN777777777NN  D777
074   *     N7ZN777777777NN ~M7778
075   *    N777777777777MMNN88777N
076   *    N777777777777MNZZZ7777O
077   *    DZN7777O77777777777777
078   *     N7OONND7777777D77777N
079   *      8$M++++?N???$77777$
080   *       M7++++N+M77777777N
081   *        N77O777777777777$                              M
082   *          DNNM$$$$777777N                              D
083   *         N$N:=N$777N7777M                             NZ
084   *        77Z::::N777777777                          ODZZZ
085   *       77N::::::N77777777M                         NNZZZ$
086   *     $777:::::::77777777MN                        ZM8ZZZZZ
087   *     777M::::::Z7777777Z77                        N++ZZZZNN
088   *    7777M:::::M7777777$777M                       $++IZZZZM
089   *   M777$:::::N777777$M7777M                       +++++ZZZDN
090   *     NN$::::::7777$$M777777N                      N+++ZZZZNZ
091   *       N::::::N:7$O:77777777                      N++++ZZZZN
092   *       M::::::::::::N77777777+                   +?+++++ZZZM
093   *       8::::::::::::D77777777M                    O+++++ZZ
094   *        ::::::::::::M777777777N                      O+?D
095   *        M:::::::::::M77777777778                     77=
096   *        D=::::::::::N7777777777N                    777
097   *       INN===::::::=77777777777N                  I777N
098   *      ?777N========N7777777777787M               N7777
099   *      77777$D======N77777777777N777N?         N777777
100   *     I77777$$$N7===M$$77777777$77777777$MMZ77777777N
101   *      $$$$$$$$$$$NIZN$$$$$$$$$M$$7777777777777777ON
102   *       M$$$$$$$$M    M$$$$$$$$N=N$$$$7777777$$$ND
103   *      O77Z$$$$$$$     M$$$$$$$$MNI==$DNNNNM=~N
104   *   7 :N MNN$$$$M$      $$$777$8      8D8I
105   *     NMM.:7O           777777778
106   *                       7777777MN
107   *                       M NO .7:
108   *                       M   :   M
109   *                            8
110   */
111
112  // Constant matcher factory methods
113
114  /**
115   * Matches any character.
116   *
117   * @since 19.0 (since 1.0 as constant {@code ANY})
118   */
119  public static CharMatcher any() {
120    return Any.INSTANCE;
121  }
122
123  /**
124   * Matches no characters.
125   *
126   * @since 19.0 (since 1.0 as constant {@code NONE})
127   */
128  public static CharMatcher none() {
129    return None.INSTANCE;
130  }
131
132  /**
133   * Determines whether a character is whitespace according to the latest Unicode standard, as
134   * illustrated <a
135   * href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
136   * This is not the same definition used by other Java APIs. (See a <a
137   * href="https://goo.gl/Y6SLWx">comparison of several definitions of "whitespace"</a>.)
138   *
139   * <p>All Unicode White_Space characters are on the BMP and thus supported by this API.
140   *
141   * <p><b>Note:</b> as the Unicode definition evolves, we will modify this matcher to keep it up to
142   * date.
143   *
144   * @since 19.0 (since 1.0 as constant {@code WHITESPACE})
145   */
146  public static CharMatcher whitespace() {
147    return Whitespace.INSTANCE;
148  }
149
150  /**
151   * Determines whether a character is a breaking whitespace (that is, a whitespace which can be
152   * interpreted as a break between words for formatting purposes). See {@link #whitespace()} for a
153   * discussion of that term.
154   *
155   * @since 19.0 (since 2.0 as constant {@code BREAKING_WHITESPACE})
156   */
157  public static CharMatcher breakingWhitespace() {
158    return BreakingWhitespace.INSTANCE;
159  }
160
161  /**
162   * Determines whether a character is ASCII, meaning that its code point is less than 128.
163   *
164   * @since 19.0 (since 1.0 as constant {@code ASCII})
165   */
166  public static CharMatcher ascii() {
167    return Ascii.INSTANCE;
168  }
169
170  /**
171   * Determines whether a character is a BMP digit according to <a
172   * href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>. If
173   * you only care to match ASCII digits, you can use {@code inRange('0', '9')}.
174   *
175   * @deprecated Many digits are supplementary characters; see the class documentation.
176   * @since 19.0 (since 1.0 as constant {@code DIGIT})
177   */
178  @Deprecated
179  public static CharMatcher digit() {
180    return Digit.INSTANCE;
181  }
182
183  /**
184   * Determines whether a character is a BMP digit according to {@linkplain Character#isDigit(char)
185   * Java's definition}. If you only care to match ASCII digits, you can use {@code inRange('0',
186   * '9')}.
187   *
188   * @deprecated Many digits are supplementary characters; see the class documentation.
189   * @since 19.0 (since 1.0 as constant {@code JAVA_DIGIT})
190   */
191  @Deprecated
192  public static CharMatcher javaDigit() {
193    return JavaDigit.INSTANCE;
194  }
195
196  /**
197   * Determines whether a character is a BMP letter according to {@linkplain
198   * Character#isLetter(char) Java's definition}. If you only care to match letters of the Latin
199   * alphabet, you can use {@code inRange('a', 'z').or(inRange('A', 'Z'))}.
200   *
201   * @deprecated Most letters are supplementary characters; see the class documentation.
202   * @since 19.0 (since 1.0 as constant {@code JAVA_LETTER})
203   */
204  @Deprecated
205  public static CharMatcher javaLetter() {
206    return JavaLetter.INSTANCE;
207  }
208
209  /**
210   * Determines whether a character is a BMP letter or digit according to {@linkplain
211   * Character#isLetterOrDigit(char) Java's definition}.
212   *
213   * @deprecated Most letters and digits are supplementary characters; see the class documentation.
214   * @since 19.0 (since 1.0 as constant {@code JAVA_LETTER_OR_DIGIT}).
215   */
216  @Deprecated
217  public static CharMatcher javaLetterOrDigit() {
218    return JavaLetterOrDigit.INSTANCE;
219  }
220
221  /**
222   * Determines whether a BMP character is upper case according to {@linkplain
223   * Character#isUpperCase(char) Java's definition}.
224   *
225   * @deprecated Some uppercase characters are supplementary characters; see the class
226   *     documentation.
227   * @since 19.0 (since 1.0 as constant {@code JAVA_UPPER_CASE})
228   */
229  @Deprecated
230  public static CharMatcher javaUpperCase() {
231    return JavaUpperCase.INSTANCE;
232  }
233
234  /**
235   * Determines whether a BMP character is lower case according to {@linkplain
236   * Character#isLowerCase(char) Java's definition}.
237   *
238   * @deprecated Some lowercase characters are supplementary characters; see the class
239   *     documentation.
240   * @since 19.0 (since 1.0 as constant {@code JAVA_LOWER_CASE})
241   */
242  @Deprecated
243  public static CharMatcher javaLowerCase() {
244    return JavaLowerCase.INSTANCE;
245  }
246
247  /**
248   * Determines whether a character is an ISO control character as specified by {@link
249   * Character#isISOControl(char)}.
250   *
251   * <p>All ISO control codes are on the BMP and thus supported by this API.
252   *
253   * @since 19.0 (since 1.0 as constant {@code JAVA_ISO_CONTROL})
254   */
255  public static CharMatcher javaIsoControl() {
256    return JavaIsoControl.INSTANCE;
257  }
258
259  /**
260   * Determines whether a character is invisible; that is, if its Unicode category is any of
261   * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and
262   * PRIVATE_USE according to ICU4J.
263   *
264   * <p>See also the Unicode Default_Ignorable_Code_Point property (available via ICU).
265   *
266   * @deprecated Most invisible characters are supplementary characters; see the class
267   *     documentation.
268   * @since 19.0 (since 1.0 as constant {@code INVISIBLE})
269   */
270  @Deprecated
271  public static CharMatcher invisible() {
272    return Invisible.INSTANCE;
273  }
274
275  /**
276   * Determines whether a character is single-width (not double-width). When in doubt, this matcher
277   * errs on the side of returning {@code false} (that is, it tends to assume a character is
278   * double-width).
279   *
280   * <p><b>Note:</b> as the reference file evolves, we will modify this matcher to keep it up to
281   * date.
282   *
283   * <p>See also <a href="http://www.unicode.org/reports/tr11/">UAX #11 East Asian Width</a>.
284   *
285   * @deprecated Many such characters are supplementary characters; see the class documentation.
286   * @since 19.0 (since 1.0 as constant {@code SINGLE_WIDTH})
287   */
288  @Deprecated
289  public static CharMatcher singleWidth() {
290    return SingleWidth.INSTANCE;
291  }
292
293  // Static factories
294
295  /** Returns a {@code char} matcher that matches only one specified BMP character. */
296  public static CharMatcher is(final char match) {
297    return new Is(match);
298  }
299
300  /**
301   * Returns a {@code char} matcher that matches any character except the BMP character specified.
302   *
303   * <p>To negate another {@code CharMatcher}, use {@link #negate()}.
304   */
305  public static CharMatcher isNot(final char match) {
306    return new IsNot(match);
307  }
308
309  /**
310   * Returns a {@code char} matcher that matches any BMP character present in the given character
311   * sequence. Returns a bogus matcher if the sequence contains supplementary characters.
312   */
313  public static CharMatcher anyOf(final CharSequence sequence) {
314    switch (sequence.length()) {
315      case 0:
316        return none();
317      case 1:
318        return is(sequence.charAt(0));
319      case 2:
320        return isEither(sequence.charAt(0), sequence.charAt(1));
321      default:
322        // TODO(lowasser): is it potentially worth just going ahead and building a precomputed
323        // matcher?
324        return new AnyOf(sequence);
325    }
326  }
327
328  /**
329   * Returns a {@code char} matcher that matches any BMP character not present in the given
330   * character sequence. Returns a bogus matcher if the sequence contains supplementary characters.
331   */
332  public static CharMatcher noneOf(CharSequence sequence) {
333    return anyOf(sequence).negate();
334  }
335
336  /**
337   * Returns a {@code char} matcher that matches any character in a given BMP range (both endpoints
338   * are inclusive). For example, to match any lowercase letter of the English alphabet, use {@code
339   * CharMatcher.inRange('a', 'z')}.
340   *
341   * @throws IllegalArgumentException if {@code endInclusive < startInclusive}
342   */
343  public static CharMatcher inRange(final char startInclusive, final char endInclusive) {
344    return new InRange(startInclusive, endInclusive);
345  }
346
347  /**
348   * Returns a matcher with identical behavior to the given {@link Character}-based predicate, but
349   * which operates on primitive {@code char} instances instead.
350   */
351  public static CharMatcher forPredicate(final Predicate<? super Character> predicate) {
352    return predicate instanceof CharMatcher ? (CharMatcher) predicate : new ForPredicate(predicate);
353  }
354
355  // Constructors
356
357  /**
358   * Constructor for use by subclasses. When subclassing, you may want to override {@code
359   * toString()} to provide a useful description.
360   */
361  protected CharMatcher() {}
362
363  // Abstract methods
364
365  /** Determines a true or false value for the given character. */
366  public abstract boolean matches(char c);
367
368  // Non-static factories
369
370  /** Returns a matcher that matches any character not matched by this matcher. */
371  // @Override under Java 8 but not under Java 7
372  public CharMatcher negate() {
373    return new Negated(this);
374  }
375
376  /**
377   * Returns a matcher that matches any character matched by both this matcher and {@code other}.
378   */
379  public CharMatcher and(CharMatcher other) {
380    return new And(this, other);
381  }
382
383  /**
384   * Returns a matcher that matches any character matched by either this matcher or {@code other}.
385   */
386  public CharMatcher or(CharMatcher other) {
387    return new Or(this, other);
388  }
389
390  /**
391   * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to
392   * query than the original; your mileage may vary. Precomputation takes time and is likely to be
393   * worthwhile only if the precomputed matcher is queried many thousands of times.
394   *
395   * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a
396   * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a
397   * worthwhile tradeoff in a browser.
398   */
399  public CharMatcher precomputed() {
400    return Platform.precomputeCharMatcher(this);
401  }
402
403  private static final int DISTINCT_CHARS = Character.MAX_VALUE - Character.MIN_VALUE + 1;
404
405  /**
406   * This is the actual implementation of {@link #precomputed}, but we bounce calls through a method
407   * on {@link Platform} so that we can have different behavior in GWT.
408   *
409   * <p>This implementation tries to be smart in a number of ways. It recognizes cases where the
410   * negation is cheaper to precompute than the matcher itself; it tries to build small hash tables
411   * for matchers that only match a few characters, and so on. In the worst-case scenario, it
412   * constructs an eight-kilobyte bit array and queries that. In many situations this produces a
413   * matcher which is faster to query than the original.
414   */
415  @J2ktIncompatible
416  @GwtIncompatible // SmallCharMatcher
417  CharMatcher precomputedInternal() {
418    final BitSet table = new BitSet();
419    setBits(table);
420    int totalCharacters = table.cardinality();
421    if (totalCharacters * 2 <= DISTINCT_CHARS) {
422      return precomputedPositive(totalCharacters, table, toString());
423    } else {
424      // TODO(lowasser): is it worth it to worry about the last character of large matchers?
425      table.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1);
426      int negatedCharacters = DISTINCT_CHARS - totalCharacters;
427      String suffix = ".negate()";
428      final String description = toString();
429      String negatedDescription =
430          description.endsWith(suffix)
431              ? description.substring(0, description.length() - suffix.length())
432              : description + suffix;
433      return new NegatedFastMatcher(
434          precomputedPositive(negatedCharacters, table, negatedDescription)) {
435        @Override
436        public String toString() {
437          return description;
438        }
439      };
440    }
441  }
442
443  /**
444   * Helper method for {@link #precomputedInternal} that doesn't test if the negation is cheaper.
445   */
446  @J2ktIncompatible
447  @GwtIncompatible // SmallCharMatcher
448  private static CharMatcher precomputedPositive(
449      int totalCharacters, BitSet table, String description) {
450    switch (totalCharacters) {
451      case 0:
452        return none();
453      case 1:
454        return is((char) table.nextSetBit(0));
455      case 2:
456        char c1 = (char) table.nextSetBit(0);
457        char c2 = (char) table.nextSetBit(c1 + 1);
458        return isEither(c1, c2);
459      default:
460        return isSmall(totalCharacters, table.length())
461            ? SmallCharMatcher.from(table, description)
462            : new BitSetMatcher(table, description);
463    }
464  }
465
466  @J2ktIncompatible
467  @GwtIncompatible // SmallCharMatcher
468  private static boolean isSmall(int totalCharacters, int tableLength) {
469    return totalCharacters <= SmallCharMatcher.MAX_SIZE
470        && tableLength > (totalCharacters * 4 * Character.SIZE);
471    // err on the side of BitSetMatcher
472  }
473
474  /** Sets bits in {@code table} matched by this matcher. */
475  @J2ktIncompatible
476  @GwtIncompatible // used only from other GwtIncompatible code
477  void setBits(BitSet table) {
478    for (int c = Character.MAX_VALUE; c >= Character.MIN_VALUE; c--) {
479      if (matches((char) c)) {
480        table.set(c);
481      }
482    }
483  }
484
485  // Text processing routines
486
487  /**
488   * Returns {@code true} if a character sequence contains at least one matching BMP character.
489   * Equivalent to {@code !matchesNoneOf(sequence)}.
490   *
491   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
492   * character, until this returns {@code true} or the end is reached.
493   *
494   * @param sequence the character sequence to examine, possibly empty
495   * @return {@code true} if this matcher matches at least one character in the sequence
496   * @since 8.0
497   */
498  public boolean matchesAnyOf(CharSequence sequence) {
499    return !matchesNoneOf(sequence);
500  }
501
502  /**
503   * Returns {@code true} if a character sequence contains only matching BMP characters.
504   *
505   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
506   * character, until this returns {@code false} or the end is reached.
507   *
508   * @param sequence the character sequence to examine, possibly empty
509   * @return {@code true} if this matcher matches every character in the sequence, including when
510   *     the sequence is empty
511   */
512  public boolean matchesAllOf(CharSequence sequence) {
513    for (int i = sequence.length() - 1; i >= 0; i--) {
514      if (!matches(sequence.charAt(i))) {
515        return false;
516      }
517    }
518    return true;
519  }
520
521  /**
522   * Returns {@code true} if a character sequence contains no matching BMP characters. Equivalent to
523   * {@code !matchesAnyOf(sequence)}.
524   *
525   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
526   * character, until this returns {@code true} or the end is reached.
527   *
528   * @param sequence the character sequence to examine, possibly empty
529   * @return {@code true} if this matcher matches no characters in the sequence, including when the
530   *     sequence is empty
531   */
532  public boolean matchesNoneOf(CharSequence sequence) {
533    return indexIn(sequence) == -1;
534  }
535
536  /**
537   * Returns the index of the first matching BMP character in a character sequence, or {@code -1} if
538   * no matching character is present.
539   *
540   * <p>The default implementation iterates over the sequence in forward order calling {@link
541   * #matches} for each character.
542   *
543   * @param sequence the character sequence to examine from the beginning
544   * @return an index, or {@code -1} if no character matches
545   */
546  public int indexIn(CharSequence sequence) {
547    return indexIn(sequence, 0);
548  }
549
550  /**
551   * Returns the index of the first matching BMP character in a character sequence, starting from a
552   * given position, or {@code -1} if no character matches after that position.
553   *
554   * <p>The default implementation iterates over the sequence in forward order, beginning at {@code
555   * start}, calling {@link #matches} for each character.
556   *
557   * @param sequence the character sequence to examine
558   * @param start the first index to examine; must be nonnegative and no greater than {@code
559   *     sequence.length()}
560   * @return the index of the first matching character, guaranteed to be no less than {@code start},
561   *     or {@code -1} if no character matches
562   * @throws IndexOutOfBoundsException if start is negative or greater than {@code
563   *     sequence.length()}
564   */
565  public int indexIn(CharSequence sequence, int start) {
566    int length = sequence.length();
567    checkPositionIndex(start, length);
568    for (int i = start; i < length; i++) {
569      if (matches(sequence.charAt(i))) {
570        return i;
571      }
572    }
573    return -1;
574  }
575
576  /**
577   * Returns the index of the last matching BMP character in a character sequence, or {@code -1} if
578   * no matching character is present.
579   *
580   * <p>The default implementation iterates over the sequence in reverse order calling {@link
581   * #matches} for each character.
582   *
583   * @param sequence the character sequence to examine from the end
584   * @return an index, or {@code -1} if no character matches
585   */
586  public int lastIndexIn(CharSequence sequence) {
587    for (int i = sequence.length() - 1; i >= 0; i--) {
588      if (matches(sequence.charAt(i))) {
589        return i;
590      }
591    }
592    return -1;
593  }
594
595  /**
596   * Returns the number of matching {@code char}s found in a character sequence.
597   *
598   * <p>Counts 2 per supplementary character, such as for {@link #whitespace}().{@link #negate}().
599   */
600  public int countIn(CharSequence sequence) {
601    int count = 0;
602    for (int i = 0; i < sequence.length(); i++) {
603      if (matches(sequence.charAt(i))) {
604        count++;
605      }
606    }
607    return count;
608  }
609
610  /**
611   * Returns a string containing all non-matching characters of a character sequence, in order. For
612   * example:
613   *
614   * <pre>{@code
615   * CharMatcher.is('a').removeFrom("bazaar")
616   * }</pre>
617   *
618   * ... returns {@code "bzr"}.
619   */
620  public String removeFrom(CharSequence sequence) {
621    String string = sequence.toString();
622    int pos = indexIn(string);
623    if (pos == -1) {
624      return string;
625    }
626
627    char[] chars = string.toCharArray();
628    int spread = 1;
629
630    // This unusual loop comes from extensive benchmarking
631    OUT:
632    while (true) {
633      pos++;
634      while (true) {
635        if (pos == chars.length) {
636          break OUT;
637        }
638        if (matches(chars[pos])) {
639          break;
640        }
641        chars[pos - spread] = chars[pos];
642        pos++;
643      }
644      spread++;
645    }
646    return new String(chars, 0, pos - spread);
647  }
648
649  /**
650   * Returns a string containing all matching BMP characters of a character sequence, in order. For
651   * example:
652   *
653   * <pre>{@code
654   * CharMatcher.is('a').retainFrom("bazaar")
655   * }</pre>
656   *
657   * ... returns {@code "aaa"}.
658   */
659  public String retainFrom(CharSequence sequence) {
660    return negate().removeFrom(sequence);
661  }
662
663  /**
664   * Returns a string copy of the input character sequence, with each matching BMP character
665   * replaced by a given replacement character. For example:
666   *
667   * <pre>{@code
668   * CharMatcher.is('a').replaceFrom("radar", 'o')
669   * }</pre>
670   *
671   * ... returns {@code "rodor"}.
672   *
673   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
674   * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
675   * character.
676   *
677   * @param sequence the character sequence to replace matching characters in
678   * @param replacement the character to append to the result string in place of each matching
679   *     character in {@code sequence}
680   * @return the new string
681   */
682  public String replaceFrom(CharSequence sequence, char replacement) {
683    String string = sequence.toString();
684    int pos = indexIn(string);
685    if (pos == -1) {
686      return string;
687    }
688    char[] chars = string.toCharArray();
689    chars[pos] = replacement;
690    for (int i = pos + 1; i < chars.length; i++) {
691      if (matches(chars[i])) {
692        chars[i] = replacement;
693      }
694    }
695    return new String(chars);
696  }
697
698  /**
699   * Returns a string copy of the input character sequence, with each matching BMP character
700   * replaced by a given replacement sequence. For example:
701   *
702   * <pre>{@code
703   * CharMatcher.is('a').replaceFrom("yaha", "oo")
704   * }</pre>
705   *
706   * ... returns {@code "yoohoo"}.
707   *
708   * <p><b>Note:</b> If the replacement is a fixed string with only one character, you are better
709   * off calling {@link #replaceFrom(CharSequence, char)} directly.
710   *
711   * @param sequence the character sequence to replace matching characters in
712   * @param replacement the characters to append to the result string in place of each matching
713   *     character in {@code sequence}
714   * @return the new string
715   */
716  public String replaceFrom(CharSequence sequence, CharSequence replacement) {
717    int replacementLen = replacement.length();
718    if (replacementLen == 0) {
719      return removeFrom(sequence);
720    }
721    if (replacementLen == 1) {
722      return replaceFrom(sequence, replacement.charAt(0));
723    }
724
725    String string = sequence.toString();
726    int pos = indexIn(string);
727    if (pos == -1) {
728      return string;
729    }
730
731    int len = string.length();
732    StringBuilder buf = new StringBuilder((len * 3 / 2) + 16);
733
734    int oldpos = 0;
735    do {
736      buf.append(string, oldpos, pos);
737      buf.append(replacement);
738      oldpos = pos + 1;
739      pos = indexIn(string, oldpos);
740    } while (pos != -1);
741
742    buf.append(string, oldpos, len);
743    return buf.toString();
744  }
745
746  /**
747   * Returns a substring of the input character sequence that omits all matching BMP characters from
748   * the beginning and from the end of the string. For example:
749   *
750   * <pre>{@code
751   * CharMatcher.anyOf("ab").trimFrom("abacatbab")
752   * }</pre>
753   *
754   * ... returns {@code "cat"}.
755   *
756   * <p>Note that:
757   *
758   * <pre>{@code
759   * CharMatcher.inRange('\0', ' ').trimFrom(str)
760   * }</pre>
761   *
762   * ... is equivalent to {@link String#trim()}.
763   */
764  public String trimFrom(CharSequence sequence) {
765    int len = sequence.length();
766    int first;
767    int last;
768
769    for (first = 0; first < len; first++) {
770      if (!matches(sequence.charAt(first))) {
771        break;
772      }
773    }
774    for (last = len - 1; last > first; last--) {
775      if (!matches(sequence.charAt(last))) {
776        break;
777      }
778    }
779
780    return sequence.subSequence(first, last + 1).toString();
781  }
782
783  /**
784   * Returns a substring of the input character sequence that omits all matching BMP characters from
785   * the beginning of the string. For example:
786   *
787   * <pre>{@code
788   * CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")
789   * }</pre>
790   *
791   * ... returns {@code "catbab"}.
792   */
793  public String trimLeadingFrom(CharSequence sequence) {
794    int len = sequence.length();
795    for (int first = 0; first < len; first++) {
796      if (!matches(sequence.charAt(first))) {
797        return sequence.subSequence(first, len).toString();
798      }
799    }
800    return "";
801  }
802
803  /**
804   * Returns a substring of the input character sequence that omits all matching BMP characters from
805   * the end of the string. For example:
806   *
807   * <pre>{@code
808   * CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")
809   * }</pre>
810   *
811   * ... returns {@code "abacat"}.
812   */
813  public String trimTrailingFrom(CharSequence sequence) {
814    int len = sequence.length();
815    for (int last = len - 1; last >= 0; last--) {
816      if (!matches(sequence.charAt(last))) {
817        return sequence.subSequence(0, last + 1).toString();
818      }
819    }
820    return "";
821  }
822
823  /**
824   * Returns a string copy of the input character sequence, with each group of consecutive matching
825   * BMP characters replaced by a single replacement character. For example:
826   *
827   * <pre>{@code
828   * CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')
829   * }</pre>
830   *
831   * ... returns {@code "b-p-r"}.
832   *
833   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
834   * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
835   * character.
836   *
837   * @param sequence the character sequence to replace matching groups of characters in
838   * @param replacement the character to append to the result string in place of each group of
839   *     matching characters in {@code sequence}
840   * @return the new string
841   */
842  public String collapseFrom(CharSequence sequence, char replacement) {
843    // This implementation avoids unnecessary allocation.
844    int len = sequence.length();
845    for (int i = 0; i < len; i++) {
846      char c = sequence.charAt(i);
847      if (matches(c)) {
848        if (c == replacement && (i == len - 1 || !matches(sequence.charAt(i + 1)))) {
849          // a no-op replacement
850          i++;
851        } else {
852          StringBuilder builder = new StringBuilder(len).append(sequence, 0, i).append(replacement);
853          return finishCollapseFrom(sequence, i + 1, len, replacement, builder, true);
854        }
855      }
856    }
857    // no replacement needed
858    return sequence.toString();
859  }
860
861  /**
862   * Collapses groups of matching characters exactly as {@link #collapseFrom} does, except that
863   * groups of matching BMP characters at the start or end of the sequence are removed without
864   * replacement.
865   */
866  public String trimAndCollapseFrom(CharSequence sequence, char replacement) {
867    // This implementation avoids unnecessary allocation.
868    int len = sequence.length();
869    int first = 0;
870    int last = len - 1;
871
872    while (first < len && matches(sequence.charAt(first))) {
873      first++;
874    }
875
876    while (last > first && matches(sequence.charAt(last))) {
877      last--;
878    }
879
880    return (first == 0 && last == len - 1)
881        ? collapseFrom(sequence, replacement)
882        : finishCollapseFrom(
883            sequence, first, last + 1, replacement, new StringBuilder(last + 1 - first), false);
884  }
885
886  private String finishCollapseFrom(
887      CharSequence sequence,
888      int start,
889      int end,
890      char replacement,
891      StringBuilder builder,
892      boolean inMatchingGroup) {
893    for (int i = start; i < end; i++) {
894      char c = sequence.charAt(i);
895      if (matches(c)) {
896        if (!inMatchingGroup) {
897          builder.append(replacement);
898          inMatchingGroup = true;
899        }
900      } else {
901        builder.append(c);
902        inMatchingGroup = false;
903      }
904    }
905    return builder.toString();
906  }
907
908  /**
909   * @deprecated Provided only to satisfy the {@link Predicate} interface; use {@link #matches}
910   *     instead.
911   */
912  @Deprecated
913  @Override
914  public boolean apply(Character character) {
915    return matches(character);
916  }
917
918  /**
919   * Returns a string representation of this {@code CharMatcher}, such as {@code
920   * CharMatcher.or(WHITESPACE, JAVA_DIGIT)}.
921   */
922  @Override
923  public String toString() {
924    return super.toString();
925  }
926
927  /**
928   * Returns the Java Unicode escape sequence for the given {@code char}, in the form "\u12AB" where
929   * "12AB" is the four hexadecimal digits representing the 16-bit code unit.
930   */
931  private static String showCharacter(char c) {
932    String hex = "0123456789ABCDEF";
933    char[] tmp = {'\\', 'u', '\0', '\0', '\0', '\0'};
934    for (int i = 0; i < 4; i++) {
935      tmp[5 - i] = hex.charAt(c & 0xF);
936      c = (char) (c >> 4);
937    }
938    return String.copyValueOf(tmp);
939  }
940
941  // Fast matchers
942
943  /** A matcher for which precomputation will not yield any significant benefit. */
944  abstract static class FastMatcher extends CharMatcher {
945
946    @Override
947    public final CharMatcher precomputed() {
948      return this;
949    }
950
951    @Override
952    public CharMatcher negate() {
953      return new NegatedFastMatcher(this);
954    }
955  }
956
957  /** {@link FastMatcher} which overrides {@code toString()} with a custom name. */
958  abstract static class NamedFastMatcher extends FastMatcher {
959
960    private final String description;
961
962    NamedFastMatcher(String description) {
963      this.description = checkNotNull(description);
964    }
965
966    @Override
967    public final String toString() {
968      return description;
969    }
970  }
971
972  /** Negation of a {@link FastMatcher}. */
973  static class NegatedFastMatcher extends Negated {
974
975    NegatedFastMatcher(CharMatcher original) {
976      super(original);
977    }
978
979    @Override
980    public final CharMatcher precomputed() {
981      return this;
982    }
983  }
984
985  /** Fast matcher using a {@link BitSet} table of matching characters. */
986  @J2ktIncompatible
987  @GwtIncompatible // used only from other GwtIncompatible code
988  private static final class BitSetMatcher extends NamedFastMatcher {
989
990    private final BitSet table;
991
992    private BitSetMatcher(BitSet table, String description) {
993      super(description);
994      if (table.length() + Long.SIZE < table.size()) {
995        table = (BitSet) table.clone();
996        // If only we could actually call BitSet.trimToSize() ourselves...
997      }
998      this.table = table;
999    }
1000
1001    @Override
1002    public boolean matches(char c) {
1003      return table.get(c);
1004    }
1005
1006    @Override
1007    void setBits(BitSet bitSet) {
1008      bitSet.or(table);
1009    }
1010  }
1011
1012  // Static constant implementation classes
1013
1014  /** Implementation of {@link #any()}. */
1015  private static final class Any extends NamedFastMatcher {
1016
1017    static final Any INSTANCE = new Any();
1018
1019    private Any() {
1020      super("CharMatcher.any()");
1021    }
1022
1023    @Override
1024    public boolean matches(char c) {
1025      return true;
1026    }
1027
1028    @Override
1029    public int indexIn(CharSequence sequence) {
1030      return (sequence.length() == 0) ? -1 : 0;
1031    }
1032
1033    @Override
1034    public int indexIn(CharSequence sequence, int start) {
1035      int length = sequence.length();
1036      checkPositionIndex(start, length);
1037      return (start == length) ? -1 : start;
1038    }
1039
1040    @Override
1041    public int lastIndexIn(CharSequence sequence) {
1042      return sequence.length() - 1;
1043    }
1044
1045    @Override
1046    public boolean matchesAllOf(CharSequence sequence) {
1047      checkNotNull(sequence);
1048      return true;
1049    }
1050
1051    @Override
1052    public boolean matchesNoneOf(CharSequence sequence) {
1053      return sequence.length() == 0;
1054    }
1055
1056    @Override
1057    public String removeFrom(CharSequence sequence) {
1058      checkNotNull(sequence);
1059      return "";
1060    }
1061
1062    @Override
1063    public String replaceFrom(CharSequence sequence, char replacement) {
1064      char[] array = new char[sequence.length()];
1065      Arrays.fill(array, replacement);
1066      return new String(array);
1067    }
1068
1069    @Override
1070    public String replaceFrom(CharSequence sequence, CharSequence replacement) {
1071      StringBuilder result = new StringBuilder(sequence.length() * replacement.length());
1072      for (int i = 0; i < sequence.length(); i++) {
1073        result.append(replacement);
1074      }
1075      return result.toString();
1076    }
1077
1078    @Override
1079    public String collapseFrom(CharSequence sequence, char replacement) {
1080      return (sequence.length() == 0) ? "" : String.valueOf(replacement);
1081    }
1082
1083    @Override
1084    public String trimFrom(CharSequence sequence) {
1085      checkNotNull(sequence);
1086      return "";
1087    }
1088
1089    @Override
1090    public int countIn(CharSequence sequence) {
1091      return sequence.length();
1092    }
1093
1094    @Override
1095    public CharMatcher and(CharMatcher other) {
1096      return checkNotNull(other);
1097    }
1098
1099    @Override
1100    public CharMatcher or(CharMatcher other) {
1101      checkNotNull(other);
1102      return this;
1103    }
1104
1105    @Override
1106    public CharMatcher negate() {
1107      return none();
1108    }
1109  }
1110
1111  /** Implementation of {@link #none()}. */
1112  private static final class None extends NamedFastMatcher {
1113
1114    static final None INSTANCE = new None();
1115
1116    private None() {
1117      super("CharMatcher.none()");
1118    }
1119
1120    @Override
1121    public boolean matches(char c) {
1122      return false;
1123    }
1124
1125    @Override
1126    public int indexIn(CharSequence sequence) {
1127      checkNotNull(sequence);
1128      return -1;
1129    }
1130
1131    @Override
1132    public int indexIn(CharSequence sequence, int start) {
1133      int length = sequence.length();
1134      checkPositionIndex(start, length);
1135      return -1;
1136    }
1137
1138    @Override
1139    public int lastIndexIn(CharSequence sequence) {
1140      checkNotNull(sequence);
1141      return -1;
1142    }
1143
1144    @Override
1145    public boolean matchesAllOf(CharSequence sequence) {
1146      return sequence.length() == 0;
1147    }
1148
1149    @Override
1150    public boolean matchesNoneOf(CharSequence sequence) {
1151      checkNotNull(sequence);
1152      return true;
1153    }
1154
1155    @Override
1156    public String removeFrom(CharSequence sequence) {
1157      return sequence.toString();
1158    }
1159
1160    @Override
1161    public String replaceFrom(CharSequence sequence, char replacement) {
1162      return sequence.toString();
1163    }
1164
1165    @Override
1166    public String replaceFrom(CharSequence sequence, CharSequence replacement) {
1167      checkNotNull(replacement);
1168      return sequence.toString();
1169    }
1170
1171    @Override
1172    public String collapseFrom(CharSequence sequence, char replacement) {
1173      return sequence.toString();
1174    }
1175
1176    @Override
1177    public String trimFrom(CharSequence sequence) {
1178      return sequence.toString();
1179    }
1180
1181    @Override
1182    public String trimLeadingFrom(CharSequence sequence) {
1183      return sequence.toString();
1184    }
1185
1186    @Override
1187    public String trimTrailingFrom(CharSequence sequence) {
1188      return sequence.toString();
1189    }
1190
1191    @Override
1192    public int countIn(CharSequence sequence) {
1193      checkNotNull(sequence);
1194      return 0;
1195    }
1196
1197    @Override
1198    public CharMatcher and(CharMatcher other) {
1199      checkNotNull(other);
1200      return this;
1201    }
1202
1203    @Override
1204    public CharMatcher or(CharMatcher other) {
1205      return checkNotNull(other);
1206    }
1207
1208    @Override
1209    public CharMatcher negate() {
1210      return any();
1211    }
1212  }
1213
1214  /** Implementation of {@link #whitespace()}. */
1215  @VisibleForTesting
1216  static final class Whitespace extends NamedFastMatcher {
1217
1218    // TABLE is a precomputed hashset of whitespace characters. MULTIPLIER serves as a hash function
1219    // whose key property is that it maps 25 characters into the 32-slot table without collision.
1220    // Basically this is an opportunistic fast implementation as opposed to "good code". For most
1221    // other use-cases, the reduction in readability isn't worth it.
1222    static final String TABLE =
1223        "\u2002\u3000\r\u0085\u200A\u2005\u2000\u3000"
1224            + "\u2029\u000B\u3000\u2008\u2003\u205F\u3000\u1680"
1225            + "\u0009\u0020\u2006\u2001\u202F\u00A0\u000C\u2009"
1226            + "\u3000\u2004\u3000\u3000\u2028\n\u2007\u3000";
1227    static final int MULTIPLIER = 1682554634;
1228    static final int SHIFT = Integer.numberOfLeadingZeros(TABLE.length() - 1);
1229
1230    static final Whitespace INSTANCE = new Whitespace();
1231
1232    Whitespace() {
1233      super("CharMatcher.whitespace()");
1234    }
1235
1236    @Override
1237    public boolean matches(char c) {
1238      return TABLE.charAt((MULTIPLIER * c) >>> SHIFT) == c;
1239    }
1240
1241    @J2ktIncompatible
1242    @GwtIncompatible // used only from other GwtIncompatible code
1243    @Override
1244    void setBits(BitSet table) {
1245      for (int i = 0; i < TABLE.length(); i++) {
1246        table.set(TABLE.charAt(i));
1247      }
1248    }
1249  }
1250
1251  /** Implementation of {@link #breakingWhitespace()}. */
1252  private static final class BreakingWhitespace extends CharMatcher {
1253
1254    static final CharMatcher INSTANCE = new BreakingWhitespace();
1255
1256    @Override
1257    public boolean matches(char c) {
1258      switch (c) {
1259        case '\t':
1260        case '\n':
1261        case '\013':
1262        case '\f':
1263        case '\r':
1264        case ' ':
1265        case '\u0085':
1266        case '\u1680':
1267        case '\u2028':
1268        case '\u2029':
1269        case '\u205f':
1270        case '\u3000':
1271          return true;
1272        case '\u2007':
1273          return false;
1274        default:
1275          return c >= '\u2000' && c <= '\u200a';
1276      }
1277    }
1278
1279    @Override
1280    public String toString() {
1281      return "CharMatcher.breakingWhitespace()";
1282    }
1283  }
1284
1285  /** Implementation of {@link #ascii()}. */
1286  private static final class Ascii extends NamedFastMatcher {
1287
1288    static final Ascii INSTANCE = new Ascii();
1289
1290    Ascii() {
1291      super("CharMatcher.ascii()");
1292    }
1293
1294    @Override
1295    public boolean matches(char c) {
1296      return c <= '\u007f';
1297    }
1298  }
1299
1300  /** Implementation that matches characters that fall within multiple ranges. */
1301  private static class RangesMatcher extends CharMatcher {
1302
1303    private final String description;
1304    private final char[] rangeStarts;
1305    private final char[] rangeEnds;
1306
1307    RangesMatcher(String description, char[] rangeStarts, char[] rangeEnds) {
1308      this.description = description;
1309      this.rangeStarts = rangeStarts;
1310      this.rangeEnds = rangeEnds;
1311      checkArgument(rangeStarts.length == rangeEnds.length);
1312      for (int i = 0; i < rangeStarts.length; i++) {
1313        checkArgument(rangeStarts[i] <= rangeEnds[i]);
1314        if (i + 1 < rangeStarts.length) {
1315          checkArgument(rangeEnds[i] < rangeStarts[i + 1]);
1316        }
1317      }
1318    }
1319
1320    @Override
1321    public boolean matches(char c) {
1322      int index = Arrays.binarySearch(rangeStarts, c);
1323      if (index >= 0) {
1324        return true;
1325      } else {
1326        index = ~index - 1;
1327        return index >= 0 && c <= rangeEnds[index];
1328      }
1329    }
1330
1331    @Override
1332    public String toString() {
1333      return description;
1334    }
1335  }
1336
1337  /** Implementation of {@link #digit()}. */
1338  private static final class Digit extends RangesMatcher {
1339    // Plug the following UnicodeSet pattern into
1340    // https://unicode.org/cldr/utility/list-unicodeset.jsp
1341    // [[:Nd:]&[:nv=0:]&[\u0000-\uFFFF]]
1342    // and get the zeroes from there.
1343
1344    // Must be in ascending order.
1345    private static final String ZEROES =
1346        "0\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6\u0c66\u0ce6\u0d66\u0de6"
1347            + "\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946\u19d0\u1a80\u1a90\u1b50\u1bb0"
1348            + "\u1c40\u1c50\ua620\ua8d0\ua900\ua9d0\ua9f0\uaa50\uabf0\uff10";
1349
1350    private static char[] zeroes() {
1351      return ZEROES.toCharArray();
1352    }
1353
1354    private static char[] nines() {
1355      char[] nines = new char[ZEROES.length()];
1356      for (int i = 0; i < ZEROES.length(); i++) {
1357        nines[i] = (char) (ZEROES.charAt(i) + 9);
1358      }
1359      return nines;
1360    }
1361
1362    static final Digit INSTANCE = new Digit();
1363
1364    private Digit() {
1365      super("CharMatcher.digit()", zeroes(), nines());
1366    }
1367  }
1368
1369  /** Implementation of {@link #javaDigit()}. */
1370  private static final class JavaDigit extends CharMatcher {
1371
1372    static final JavaDigit INSTANCE = new JavaDigit();
1373
1374    @Override
1375    public boolean matches(char c) {
1376      return Character.isDigit(c);
1377    }
1378
1379    @Override
1380    public String toString() {
1381      return "CharMatcher.javaDigit()";
1382    }
1383  }
1384
1385  /** Implementation of {@link #javaLetter()}. */
1386  private static final class JavaLetter extends CharMatcher {
1387
1388    static final JavaLetter INSTANCE = new JavaLetter();
1389
1390    @Override
1391    public boolean matches(char c) {
1392      return Character.isLetter(c);
1393    }
1394
1395    @Override
1396    public String toString() {
1397      return "CharMatcher.javaLetter()";
1398    }
1399  }
1400
1401  /** Implementation of {@link #javaLetterOrDigit()}. */
1402  private static final class JavaLetterOrDigit extends CharMatcher {
1403
1404    static final JavaLetterOrDigit INSTANCE = new JavaLetterOrDigit();
1405
1406    @Override
1407    public boolean matches(char c) {
1408      return Character.isLetterOrDigit(c);
1409    }
1410
1411    @Override
1412    public String toString() {
1413      return "CharMatcher.javaLetterOrDigit()";
1414    }
1415  }
1416
1417  /** Implementation of {@link #javaUpperCase()}. */
1418  private static final class JavaUpperCase extends CharMatcher {
1419
1420    static final JavaUpperCase INSTANCE = new JavaUpperCase();
1421
1422    @Override
1423    public boolean matches(char c) {
1424      return Character.isUpperCase(c);
1425    }
1426
1427    @Override
1428    public String toString() {
1429      return "CharMatcher.javaUpperCase()";
1430    }
1431  }
1432
1433  /** Implementation of {@link #javaLowerCase()}. */
1434  private static final class JavaLowerCase extends CharMatcher {
1435
1436    static final JavaLowerCase INSTANCE = new JavaLowerCase();
1437
1438    @Override
1439    public boolean matches(char c) {
1440      return Character.isLowerCase(c);
1441    }
1442
1443    @Override
1444    public String toString() {
1445      return "CharMatcher.javaLowerCase()";
1446    }
1447  }
1448
1449  /** Implementation of {@link #javaIsoControl()}. */
1450  private static final class JavaIsoControl extends NamedFastMatcher {
1451
1452    static final JavaIsoControl INSTANCE = new JavaIsoControl();
1453
1454    private JavaIsoControl() {
1455      super("CharMatcher.javaIsoControl()");
1456    }
1457
1458    @Override
1459    public boolean matches(char c) {
1460      return c <= '\u001f' || (c >= '\u007f' && c <= '\u009f');
1461    }
1462  }
1463
1464  /** Implementation of {@link #invisible()}. */
1465  private static final class Invisible extends RangesMatcher {
1466    // Plug the following UnicodeSet pattern into
1467    // https://unicode.org/cldr/utility/list-unicodeset.jsp
1468    // [[[:Zs:][:Zl:][:Zp:][:Cc:][:Cf:][:Cs:][:Co:]]&[\u0000-\uFFFF]]
1469    // with the "Abbreviate" option, and get the ranges from there.
1470    private static final String RANGE_STARTS =
1471        "\u0000\u007f\u00ad\u0600\u061c\u06dd\u070f\u0890\u08e2\u1680\u180e\u2000\u2028\u205f\u2066"
1472            + "\u3000\ud800\ufeff\ufff9";
1473    private static final String RANGE_ENDS = // inclusive ends
1474        "\u0020\u00a0\u00ad\u0605\u061c\u06dd\u070f\u0891\u08e2\u1680\u180e\u200f\u202f\u2064\u206f"
1475            + "\u3000\uf8ff\ufeff\ufffb";
1476
1477    static final Invisible INSTANCE = new Invisible();
1478
1479    private Invisible() {
1480      super("CharMatcher.invisible()", RANGE_STARTS.toCharArray(), RANGE_ENDS.toCharArray());
1481    }
1482  }
1483
1484  /** Implementation of {@link #singleWidth()}. */
1485  private static final class SingleWidth extends RangesMatcher {
1486
1487    static final SingleWidth INSTANCE = new SingleWidth();
1488
1489    private SingleWidth() {
1490      super(
1491          "CharMatcher.singleWidth()",
1492          "\u0000\u05be\u05d0\u05f3\u0600\u0750\u0e00\u1e00\u2100\ufb50\ufe70\uff61".toCharArray(),
1493          "\u04f9\u05be\u05ea\u05f4\u06ff\u077f\u0e7f\u20af\u213a\ufdff\ufeff\uffdc".toCharArray());
1494    }
1495  }
1496
1497  // Non-static factory implementation classes
1498
1499  /** Implementation of {@link #negate()}. */
1500  private static class Negated extends CharMatcher {
1501
1502    final CharMatcher original;
1503
1504    Negated(CharMatcher original) {
1505      this.original = checkNotNull(original);
1506    }
1507
1508    @Override
1509    public boolean matches(char c) {
1510      return !original.matches(c);
1511    }
1512
1513    @Override
1514    public boolean matchesAllOf(CharSequence sequence) {
1515      return original.matchesNoneOf(sequence);
1516    }
1517
1518    @Override
1519    public boolean matchesNoneOf(CharSequence sequence) {
1520      return original.matchesAllOf(sequence);
1521    }
1522
1523    @Override
1524    public int countIn(CharSequence sequence) {
1525      return sequence.length() - original.countIn(sequence);
1526    }
1527
1528    @J2ktIncompatible
1529    @GwtIncompatible // used only from other GwtIncompatible code
1530    @Override
1531    void setBits(BitSet table) {
1532      BitSet tmp = new BitSet();
1533      original.setBits(tmp);
1534      tmp.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1);
1535      table.or(tmp);
1536    }
1537
1538    @Override
1539    public CharMatcher negate() {
1540      return original;
1541    }
1542
1543    @Override
1544    public String toString() {
1545      return original + ".negate()";
1546    }
1547  }
1548
1549  /** Implementation of {@link #and(CharMatcher)}. */
1550  private static final class And extends CharMatcher {
1551
1552    final CharMatcher first;
1553    final CharMatcher second;
1554
1555    And(CharMatcher a, CharMatcher b) {
1556      first = checkNotNull(a);
1557      second = checkNotNull(b);
1558    }
1559
1560    @Override
1561    public boolean matches(char c) {
1562      return first.matches(c) && second.matches(c);
1563    }
1564
1565    @J2ktIncompatible
1566    @GwtIncompatible // used only from other GwtIncompatible code
1567    @Override
1568    void setBits(BitSet table) {
1569      BitSet tmp1 = new BitSet();
1570      first.setBits(tmp1);
1571      BitSet tmp2 = new BitSet();
1572      second.setBits(tmp2);
1573      tmp1.and(tmp2);
1574      table.or(tmp1);
1575    }
1576
1577    @Override
1578    public String toString() {
1579      return "CharMatcher.and(" + first + ", " + second + ")";
1580    }
1581  }
1582
1583  /** Implementation of {@link #or(CharMatcher)}. */
1584  private static final class Or extends CharMatcher {
1585
1586    final CharMatcher first;
1587    final CharMatcher second;
1588
1589    Or(CharMatcher a, CharMatcher b) {
1590      first = checkNotNull(a);
1591      second = checkNotNull(b);
1592    }
1593
1594    @J2ktIncompatible
1595    @GwtIncompatible // used only from other GwtIncompatible code
1596    @Override
1597    void setBits(BitSet table) {
1598      first.setBits(table);
1599      second.setBits(table);
1600    }
1601
1602    @Override
1603    public boolean matches(char c) {
1604      return first.matches(c) || second.matches(c);
1605    }
1606
1607    @Override
1608    public String toString() {
1609      return "CharMatcher.or(" + first + ", " + second + ")";
1610    }
1611  }
1612
1613  // Static factory implementations
1614
1615  /** Implementation of {@link #is(char)}. */
1616  private static final class Is extends FastMatcher {
1617
1618    private final char match;
1619
1620    Is(char match) {
1621      this.match = match;
1622    }
1623
1624    @Override
1625    public boolean matches(char c) {
1626      return c == match;
1627    }
1628
1629    @Override
1630    public String replaceFrom(CharSequence sequence, char replacement) {
1631      return sequence.toString().replace(match, replacement);
1632    }
1633
1634    @Override
1635    public CharMatcher and(CharMatcher other) {
1636      return other.matches(match) ? this : none();
1637    }
1638
1639    @Override
1640    public CharMatcher or(CharMatcher other) {
1641      return other.matches(match) ? other : super.or(other);
1642    }
1643
1644    @Override
1645    public CharMatcher negate() {
1646      return isNot(match);
1647    }
1648
1649    @J2ktIncompatible
1650    @GwtIncompatible // used only from other GwtIncompatible code
1651    @Override
1652    void setBits(BitSet table) {
1653      table.set(match);
1654    }
1655
1656    @Override
1657    public String toString() {
1658      return "CharMatcher.is('" + showCharacter(match) + "')";
1659    }
1660  }
1661
1662  /** Implementation of {@link #isNot(char)}. */
1663  private static final class IsNot extends FastMatcher {
1664
1665    private final char match;
1666
1667    IsNot(char match) {
1668      this.match = match;
1669    }
1670
1671    @Override
1672    public boolean matches(char c) {
1673      return c != match;
1674    }
1675
1676    @Override
1677    public CharMatcher and(CharMatcher other) {
1678      return other.matches(match) ? super.and(other) : other;
1679    }
1680
1681    @Override
1682    public CharMatcher or(CharMatcher other) {
1683      return other.matches(match) ? any() : this;
1684    }
1685
1686    @J2ktIncompatible
1687    @GwtIncompatible // used only from other GwtIncompatible code
1688    @Override
1689    void setBits(BitSet table) {
1690      table.set(0, match);
1691      table.set(match + 1, Character.MAX_VALUE + 1);
1692    }
1693
1694    @Override
1695    public CharMatcher negate() {
1696      return is(match);
1697    }
1698
1699    @Override
1700    public String toString() {
1701      return "CharMatcher.isNot('" + showCharacter(match) + "')";
1702    }
1703  }
1704
1705  private static CharMatcher.IsEither isEither(char c1, char c2) {
1706    return new CharMatcher.IsEither(c1, c2);
1707  }
1708
1709  /** Implementation of {@link #anyOf(CharSequence)} for exactly two characters. */
1710  private static final class IsEither extends FastMatcher {
1711
1712    private final char match1;
1713    private final char match2;
1714
1715    IsEither(char match1, char match2) {
1716      this.match1 = match1;
1717      this.match2 = match2;
1718    }
1719
1720    @Override
1721    public boolean matches(char c) {
1722      return c == match1 || c == match2;
1723    }
1724
1725    @J2ktIncompatible
1726    @GwtIncompatible // used only from other GwtIncompatible code
1727    @Override
1728    void setBits(BitSet table) {
1729      table.set(match1);
1730      table.set(match2);
1731    }
1732
1733    @Override
1734    public String toString() {
1735      return "CharMatcher.anyOf(\"" + showCharacter(match1) + showCharacter(match2) + "\")";
1736    }
1737  }
1738
1739  /** Implementation of {@link #anyOf(CharSequence)} for three or more characters. */
1740  private static final class AnyOf extends CharMatcher {
1741
1742    private final char[] chars;
1743
1744    public AnyOf(CharSequence chars) {
1745      this.chars = chars.toString().toCharArray();
1746      Arrays.sort(this.chars);
1747    }
1748
1749    @Override
1750    public boolean matches(char c) {
1751      return Arrays.binarySearch(chars, c) >= 0;
1752    }
1753
1754    @Override
1755    @J2ktIncompatible
1756    @GwtIncompatible // used only from other GwtIncompatible code
1757    void setBits(BitSet table) {
1758      for (char c : chars) {
1759        table.set(c);
1760      }
1761    }
1762
1763    @Override
1764    public String toString() {
1765      StringBuilder description = new StringBuilder("CharMatcher.anyOf(\"");
1766      for (char c : chars) {
1767        description.append(showCharacter(c));
1768      }
1769      description.append("\")");
1770      return description.toString();
1771    }
1772  }
1773
1774  /** Implementation of {@link #inRange(char, char)}. */
1775  private static final class InRange extends FastMatcher {
1776
1777    private final char startInclusive;
1778    private final char endInclusive;
1779
1780    InRange(char startInclusive, char endInclusive) {
1781      checkArgument(endInclusive >= startInclusive);
1782      this.startInclusive = startInclusive;
1783      this.endInclusive = endInclusive;
1784    }
1785
1786    @Override
1787    public boolean matches(char c) {
1788      return startInclusive <= c && c <= endInclusive;
1789    }
1790
1791    @J2ktIncompatible
1792    @GwtIncompatible // used only from other GwtIncompatible code
1793    @Override
1794    void setBits(BitSet table) {
1795      table.set(startInclusive, endInclusive + 1);
1796    }
1797
1798    @Override
1799    public String toString() {
1800      return "CharMatcher.inRange('"
1801          + showCharacter(startInclusive)
1802          + "', '"
1803          + showCharacter(endInclusive)
1804          + "')";
1805    }
1806  }
1807
1808  /** Implementation of {@link #forPredicate(Predicate)}. */
1809  private static final class ForPredicate extends CharMatcher {
1810
1811    private final Predicate<? super Character> predicate;
1812
1813    ForPredicate(Predicate<? super Character> predicate) {
1814      this.predicate = checkNotNull(predicate);
1815    }
1816
1817    @Override
1818    public boolean matches(char c) {
1819      return predicate.apply(c);
1820    }
1821
1822    @SuppressWarnings("deprecation") // intentional; deprecation is for callers primarily
1823    @Override
1824    public boolean apply(Character character) {
1825      return predicate.apply(checkNotNull(character));
1826    }
1827
1828    @Override
1829    public String toString() {
1830      return "CharMatcher.forPredicate(" + predicate + ")";
1831    }
1832  }
1833}