001/*
002 * Copyright (C) 2008 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.base;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkPositionIndex;
020
021import com.google.common.annotations.GwtCompatible;
022import com.google.common.annotations.GwtIncompatible;
023import com.google.common.annotations.VisibleForTesting;
024import java.util.Arrays;
025import java.util.BitSet;
026
027/**
028 * Determines a true or false value for any Java {@code char} value, just as {@link Predicate} does
029 * for any {@link Object}. Also offers basic text processing methods based on this function.
030 * Implementations are strongly encouraged to be side-effect-free and immutable.
031 *
032 * <p>Throughout the documentation of this class, the phrase "matching character" is used to mean
033 * "any {@code char} value {@code c} for which {@code this.matches(c)} returns {@code true}".
034 *
035 * <p><b>Warning:</b> This class deals only with {@code char} values, that is, <a
036 * href="http://www.unicode.org/glossary/#BMP_character">BMP characters</a>. It does not understand
037 * <a href="http://www.unicode.org/glossary/#supplementary_code_point">supplementary Unicode code
038 * points</a> in the range {@code 0x10000} to {@code 0x10FFFF} which includes the majority of
039 * assigned characters, including important CJK characters and emoji.
040 *
041 * <p>Supplementary characters are <a
042 * href="https://docs.oracle.com/javase/8/docs/api/java/lang/Character.html#supplementary">encoded
043 * into a {@code String} using surrogate pairs</a>, and a {@code CharMatcher} treats these just as
044 * two separate characters. {@link #countIn} counts each supplementary character as 2 {@code char}s.
045 *
046 * <p>For up-to-date Unicode character properties (digit, letter, etc.) and support for
047 * supplementary code points, use ICU4J UCharacter and UnicodeSet (freeze() after building). For
048 * basic text processing based on UnicodeSet use the ICU4J UnicodeSetSpanner.
049 *
050 * <p>Example usages:
051 *
052 * <pre>
053 *   String trimmed = {@link #whitespace() whitespace()}.{@link #trimFrom trimFrom}(userInput);
054 *   if ({@link #ascii() ascii()}.{@link #matchesAllOf matchesAllOf}(s)) { ... }</pre>
055 *
056 * <p>See the Guava User Guide article on <a
057 * href="https://github.com/google/guava/wiki/StringsExplained#charmatcher">{@code CharMatcher}
058 * </a>.
059 *
060 * @author Kevin Bourrillion
061 * @since 1.0
062 */
063@GwtCompatible(emulated = true)
064@ElementTypesAreNonnullByDefault
065public abstract class CharMatcher implements Predicate<Character> {
066  /*
067   *           N777777777NO
068   *         N7777777777777N
069   *        M777777777777777N
070   *        $N877777777D77777M
071   *       N M77777777ONND777M
072   *       MN777777777NN  D777
073   *     N7ZN777777777NN ~M7778
074   *    N777777777777MMNN88777N
075   *    N777777777777MNZZZ7777O
076   *    DZN7777O77777777777777
077   *     N7OONND7777777D77777N
078   *      8$M++++?N???$77777$
079   *       M7++++N+M77777777N
080   *        N77O777777777777$                              M
081   *          DNNM$$$$777777N                              D
082   *         N$N:=N$777N7777M                             NZ
083   *        77Z::::N777777777                          ODZZZ
084   *       77N::::::N77777777M                         NNZZZ$
085   *     $777:::::::77777777MN                        ZM8ZZZZZ
086   *     777M::::::Z7777777Z77                        N++ZZZZNN
087   *    7777M:::::M7777777$777M                       $++IZZZZM
088   *   M777$:::::N777777$M7777M                       +++++ZZZDN
089   *     NN$::::::7777$$M777777N                      N+++ZZZZNZ
090   *       N::::::N:7$O:77777777                      N++++ZZZZN
091   *       M::::::::::::N77777777+                   +?+++++ZZZM
092   *       8::::::::::::D77777777M                    O+++++ZZ
093   *        ::::::::::::M777777777N                      O+?D
094   *        M:::::::::::M77777777778                     77=
095   *        D=::::::::::N7777777777N                    777
096   *       INN===::::::=77777777777N                  I777N
097   *      ?777N========N7777777777787M               N7777
098   *      77777$D======N77777777777N777N?         N777777
099   *     I77777$$$N7===M$$77777777$77777777$MMZ77777777N
100   *      $$$$$$$$$$$NIZN$$$$$$$$$M$$7777777777777777ON
101   *       M$$$$$$$$M    M$$$$$$$$N=N$$$$7777777$$$ND
102   *      O77Z$$$$$$$     M$$$$$$$$MNI==$DNNNNM=~N
103   *   7 :N MNN$$$$M$      $$$777$8      8D8I
104   *     NMM.:7O           777777778
105   *                       7777777MN
106   *                       M NO .7:
107   *                       M   :   M
108   *                            8
109   */
110
111  // Constant matcher factory methods
112
113  /**
114   * Matches any character.
115   *
116   * @since 19.0 (since 1.0 as constant {@code ANY})
117   */
118  public static CharMatcher any() {
119    return Any.INSTANCE;
120  }
121
122  /**
123   * Matches no characters.
124   *
125   * @since 19.0 (since 1.0 as constant {@code NONE})
126   */
127  public static CharMatcher none() {
128    return None.INSTANCE;
129  }
130
131  /**
132   * Determines whether a character is whitespace according to the latest Unicode standard, as
133   * illustrated <a
134   * href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
135   * This is not the same definition used by other Java APIs. (See a <a
136   * href="https://goo.gl/Y6SLWx">comparison of several definitions of "whitespace"</a>.)
137   *
138   * <p>All Unicode White_Space characters are on the BMP and thus supported by this API.
139   *
140   * <p><b>Note:</b> as the Unicode definition evolves, we will modify this matcher to keep it up to
141   * date.
142   *
143   * @since 19.0 (since 1.0 as constant {@code WHITESPACE})
144   */
145  public static CharMatcher whitespace() {
146    return Whitespace.INSTANCE;
147  }
148
149  /**
150   * Determines whether a character is a breaking whitespace (that is, a whitespace which can be
151   * interpreted as a break between words for formatting purposes). See {@link #whitespace()} for a
152   * discussion of that term.
153   *
154   * @since 19.0 (since 2.0 as constant {@code BREAKING_WHITESPACE})
155   */
156  public static CharMatcher breakingWhitespace() {
157    return BreakingWhitespace.INSTANCE;
158  }
159
160  /**
161   * Determines whether a character is ASCII, meaning that its code point is less than 128.
162   *
163   * @since 19.0 (since 1.0 as constant {@code ASCII})
164   */
165  public static CharMatcher ascii() {
166    return Ascii.INSTANCE;
167  }
168
169  /**
170   * Determines whether a character is a BMP digit according to <a
171   * href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>. If
172   * you only care to match ASCII digits, you can use {@code inRange('0', '9')}.
173   *
174   * @deprecated Many digits are supplementary characters; see the class documentation.
175   * @since 19.0 (since 1.0 as constant {@code DIGIT})
176   */
177  @Deprecated
178  public static CharMatcher digit() {
179    return Digit.INSTANCE;
180  }
181
182  /**
183   * Determines whether a character is a BMP digit according to {@linkplain Character#isDigit(char)
184   * Java's definition}. If you only care to match ASCII digits, you can use {@code inRange('0',
185   * '9')}.
186   *
187   * @deprecated Many digits are supplementary characters; see the class documentation.
188   * @since 19.0 (since 1.0 as constant {@code JAVA_DIGIT})
189   */
190  @Deprecated
191  public static CharMatcher javaDigit() {
192    return JavaDigit.INSTANCE;
193  }
194
195  /**
196   * Determines whether a character is a BMP letter according to {@linkplain
197   * Character#isLetter(char) Java's definition}. If you only care to match letters of the Latin
198   * alphabet, you can use {@code inRange('a', 'z').or(inRange('A', 'Z'))}.
199   *
200   * @deprecated Most letters are supplementary characters; see the class documentation.
201   * @since 19.0 (since 1.0 as constant {@code JAVA_LETTER})
202   */
203  @Deprecated
204  public static CharMatcher javaLetter() {
205    return JavaLetter.INSTANCE;
206  }
207
208  /**
209   * Determines whether a character is a BMP letter or digit according to {@linkplain
210   * Character#isLetterOrDigit(char) Java's definition}.
211   *
212   * @deprecated Most letters and digits are supplementary characters; see the class documentation.
213   * @since 19.0 (since 1.0 as constant {@code JAVA_LETTER_OR_DIGIT}).
214   */
215  @Deprecated
216  public static CharMatcher javaLetterOrDigit() {
217    return JavaLetterOrDigit.INSTANCE;
218  }
219
220  /**
221   * Determines whether a BMP character is upper case according to {@linkplain
222   * Character#isUpperCase(char) Java's definition}.
223   *
224   * @deprecated Some uppercase characters are supplementary characters; see the class
225   *     documentation.
226   * @since 19.0 (since 1.0 as constant {@code JAVA_UPPER_CASE})
227   */
228  @Deprecated
229  public static CharMatcher javaUpperCase() {
230    return JavaUpperCase.INSTANCE;
231  }
232
233  /**
234   * Determines whether a BMP character is lower case according to {@linkplain
235   * Character#isLowerCase(char) Java's definition}.
236   *
237   * @deprecated Some lowercase characters are supplementary characters; see the class
238   *     documentation.
239   * @since 19.0 (since 1.0 as constant {@code JAVA_LOWER_CASE})
240   */
241  @Deprecated
242  public static CharMatcher javaLowerCase() {
243    return JavaLowerCase.INSTANCE;
244  }
245
246  /**
247   * Determines whether a character is an ISO control character as specified by {@link
248   * Character#isISOControl(char)}.
249   *
250   * <p>All ISO control codes are on the BMP and thus supported by this API.
251   *
252   * @since 19.0 (since 1.0 as constant {@code JAVA_ISO_CONTROL})
253   */
254  public static CharMatcher javaIsoControl() {
255    return JavaIsoControl.INSTANCE;
256  }
257
258  /**
259   * Determines whether a character is invisible; that is, if its Unicode category is any of
260   * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and
261   * PRIVATE_USE according to ICU4J.
262   *
263   * <p>See also the Unicode Default_Ignorable_Code_Point property (available via ICU).
264   *
265   * @deprecated Most invisible characters are supplementary characters; see the class
266   *     documentation.
267   * @since 19.0 (since 1.0 as constant {@code INVISIBLE})
268   */
269  @Deprecated
270  public static CharMatcher invisible() {
271    return Invisible.INSTANCE;
272  }
273
274  /**
275   * Determines whether a character is single-width (not double-width). When in doubt, this matcher
276   * errs on the side of returning {@code false} (that is, it tends to assume a character is
277   * double-width).
278   *
279   * <p><b>Note:</b> as the reference file evolves, we will modify this matcher to keep it up to
280   * date.
281   *
282   * <p>See also <a href="http://www.unicode.org/reports/tr11/">UAX #11 East Asian Width</a>.
283   *
284   * @deprecated Many such characters are supplementary characters; see the class documentation.
285   * @since 19.0 (since 1.0 as constant {@code SINGLE_WIDTH})
286   */
287  @Deprecated
288  public static CharMatcher singleWidth() {
289    return SingleWidth.INSTANCE;
290  }
291
292  // Static factories
293
294  /** Returns a {@code char} matcher that matches only one specified BMP character. */
295  public static CharMatcher is(final char match) {
296    return new Is(match);
297  }
298
299  /**
300   * Returns a {@code char} matcher that matches any character except the BMP character specified.
301   *
302   * <p>To negate another {@code CharMatcher}, use {@link #negate()}.
303   */
304  public static CharMatcher isNot(final char match) {
305    return new IsNot(match);
306  }
307
308  /**
309   * Returns a {@code char} matcher that matches any BMP character present in the given character
310   * sequence. Returns a bogus matcher if the sequence contains supplementary characters.
311   */
312  public static CharMatcher anyOf(final CharSequence sequence) {
313    switch (sequence.length()) {
314      case 0:
315        return none();
316      case 1:
317        return is(sequence.charAt(0));
318      case 2:
319        return isEither(sequence.charAt(0), sequence.charAt(1));
320      default:
321        // TODO(lowasser): is it potentially worth just going ahead and building a precomputed
322        // matcher?
323        return new AnyOf(sequence);
324    }
325  }
326
327  /**
328   * Returns a {@code char} matcher that matches any BMP character not present in the given
329   * character sequence. Returns a bogus matcher if the sequence contains supplementary characters.
330   */
331  public static CharMatcher noneOf(CharSequence sequence) {
332    return anyOf(sequence).negate();
333  }
334
335  /**
336   * Returns a {@code char} matcher that matches any character in a given BMP range (both endpoints
337   * are inclusive). For example, to match any lowercase letter of the English alphabet, use {@code
338   * CharMatcher.inRange('a', 'z')}.
339   *
340   * @throws IllegalArgumentException if {@code endInclusive < startInclusive}
341   */
342  public static CharMatcher inRange(final char startInclusive, final char endInclusive) {
343    return new InRange(startInclusive, endInclusive);
344  }
345
346  /**
347   * Returns a matcher with identical behavior to the given {@link Character}-based predicate, but
348   * which operates on primitive {@code char} instances instead.
349   */
350  public static CharMatcher forPredicate(final Predicate<? super Character> predicate) {
351    return predicate instanceof CharMatcher ? (CharMatcher) predicate : new ForPredicate(predicate);
352  }
353
354  // Constructors
355
356  /**
357   * Constructor for use by subclasses. When subclassing, you may want to override {@code
358   * toString()} to provide a useful description.
359   */
360  protected CharMatcher() {}
361
362  // Abstract methods
363
364  /** Determines a true or false value for the given character. */
365  public abstract boolean matches(char c);
366
367  // Non-static factories
368
369  /** Returns a matcher that matches any character not matched by this matcher. */
370  // @Override under Java 8 but not under Java 7
371  public CharMatcher negate() {
372    return new Negated(this);
373  }
374
375  /**
376   * Returns a matcher that matches any character matched by both this matcher and {@code other}.
377   */
378  public CharMatcher and(CharMatcher other) {
379    return new And(this, other);
380  }
381
382  /**
383   * Returns a matcher that matches any character matched by either this matcher or {@code other}.
384   */
385  public CharMatcher or(CharMatcher other) {
386    return new Or(this, other);
387  }
388
389  /**
390   * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to
391   * query than the original; your mileage may vary. Precomputation takes time and is likely to be
392   * worthwhile only if the precomputed matcher is queried many thousands of times.
393   *
394   * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a
395   * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a
396   * worthwhile tradeoff in a browser.
397   */
398  public CharMatcher precomputed() {
399    return Platform.precomputeCharMatcher(this);
400  }
401
402  private static final int DISTINCT_CHARS = Character.MAX_VALUE - Character.MIN_VALUE + 1;
403
404  /**
405   * This is the actual implementation of {@link #precomputed}, but we bounce calls through a method
406   * on {@link Platform} so that we can have different behavior in GWT.
407   *
408   * <p>This implementation tries to be smart in a number of ways. It recognizes cases where the
409   * negation is cheaper to precompute than the matcher itself; it tries to build small hash tables
410   * for matchers that only match a few characters, and so on. In the worst-case scenario, it
411   * constructs an eight-kilobyte bit array and queries that. In many situations this produces a
412   * matcher which is faster to query than the original.
413   */
414  @GwtIncompatible // SmallCharMatcher
415  CharMatcher precomputedInternal() {
416    final BitSet table = new BitSet();
417    setBits(table);
418    int totalCharacters = table.cardinality();
419    if (totalCharacters * 2 <= DISTINCT_CHARS) {
420      return precomputedPositive(totalCharacters, table, toString());
421    } else {
422      // TODO(lowasser): is it worth it to worry about the last character of large matchers?
423      table.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1);
424      int negatedCharacters = DISTINCT_CHARS - totalCharacters;
425      String suffix = ".negate()";
426      final String description = toString();
427      String negatedDescription =
428          description.endsWith(suffix)
429              ? description.substring(0, description.length() - suffix.length())
430              : description + suffix;
431      return new NegatedFastMatcher(
432          precomputedPositive(negatedCharacters, table, negatedDescription)) {
433        @Override
434        public String toString() {
435          return description;
436        }
437      };
438    }
439  }
440
441  /**
442   * Helper method for {@link #precomputedInternal} that doesn't test if the negation is cheaper.
443   */
444  @GwtIncompatible // SmallCharMatcher
445  private static CharMatcher precomputedPositive(
446      int totalCharacters, BitSet table, String description) {
447    switch (totalCharacters) {
448      case 0:
449        return none();
450      case 1:
451        return is((char) table.nextSetBit(0));
452      case 2:
453        char c1 = (char) table.nextSetBit(0);
454        char c2 = (char) table.nextSetBit(c1 + 1);
455        return isEither(c1, c2);
456      default:
457        return isSmall(totalCharacters, table.length())
458            ? SmallCharMatcher.from(table, description)
459            : new BitSetMatcher(table, description);
460    }
461  }
462
463  @GwtIncompatible // SmallCharMatcher
464  private static boolean isSmall(int totalCharacters, int tableLength) {
465    return totalCharacters <= SmallCharMatcher.MAX_SIZE
466        && tableLength > (totalCharacters * 4 * Character.SIZE);
467    // err on the side of BitSetMatcher
468  }
469
470  /** Sets bits in {@code table} matched by this matcher. */
471  @GwtIncompatible // used only from other GwtIncompatible code
472  void setBits(BitSet table) {
473    for (int c = Character.MAX_VALUE; c >= Character.MIN_VALUE; c--) {
474      if (matches((char) c)) {
475        table.set(c);
476      }
477    }
478  }
479
480  // Text processing routines
481
482  /**
483   * Returns {@code true} if a character sequence contains at least one matching BMP character.
484   * Equivalent to {@code !matchesNoneOf(sequence)}.
485   *
486   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
487   * character, until this returns {@code true} or the end is reached.
488   *
489   * @param sequence the character sequence to examine, possibly empty
490   * @return {@code true} if this matcher matches at least one character in the sequence
491   * @since 8.0
492   */
493  public boolean matchesAnyOf(CharSequence sequence) {
494    return !matchesNoneOf(sequence);
495  }
496
497  /**
498   * Returns {@code true} if a character sequence contains only matching BMP characters.
499   *
500   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
501   * character, until this returns {@code false} or the end is reached.
502   *
503   * @param sequence the character sequence to examine, possibly empty
504   * @return {@code true} if this matcher matches every character in the sequence, including when
505   *     the sequence is empty
506   */
507  public boolean matchesAllOf(CharSequence sequence) {
508    for (int i = sequence.length() - 1; i >= 0; i--) {
509      if (!matches(sequence.charAt(i))) {
510        return false;
511      }
512    }
513    return true;
514  }
515
516  /**
517   * Returns {@code true} if a character sequence contains no matching BMP characters. Equivalent to
518   * {@code !matchesAnyOf(sequence)}.
519   *
520   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
521   * character, until this returns {@code true} or the end is reached.
522   *
523   * @param sequence the character sequence to examine, possibly empty
524   * @return {@code true} if this matcher matches no characters in the sequence, including when the
525   *     sequence is empty
526   */
527  public boolean matchesNoneOf(CharSequence sequence) {
528    return indexIn(sequence) == -1;
529  }
530
531  /**
532   * Returns the index of the first matching BMP character in a character sequence, or {@code -1} if
533   * no matching character is present.
534   *
535   * <p>The default implementation iterates over the sequence in forward order calling {@link
536   * #matches} for each character.
537   *
538   * @param sequence the character sequence to examine from the beginning
539   * @return an index, or {@code -1} if no character matches
540   */
541  public int indexIn(CharSequence sequence) {
542    return indexIn(sequence, 0);
543  }
544
545  /**
546   * Returns the index of the first matching BMP character in a character sequence, starting from a
547   * given position, or {@code -1} if no character matches after that position.
548   *
549   * <p>The default implementation iterates over the sequence in forward order, beginning at {@code
550   * start}, calling {@link #matches} for each character.
551   *
552   * @param sequence the character sequence to examine
553   * @param start the first index to examine; must be nonnegative and no greater than {@code
554   *     sequence.length()}
555   * @return the index of the first matching character, guaranteed to be no less than {@code start},
556   *     or {@code -1} if no character matches
557   * @throws IndexOutOfBoundsException if start is negative or greater than {@code
558   *     sequence.length()}
559   */
560  public int indexIn(CharSequence sequence, int start) {
561    int length = sequence.length();
562    checkPositionIndex(start, length);
563    for (int i = start; i < length; i++) {
564      if (matches(sequence.charAt(i))) {
565        return i;
566      }
567    }
568    return -1;
569  }
570
571  /**
572   * Returns the index of the last matching BMP character in a character sequence, or {@code -1} if
573   * no matching character is present.
574   *
575   * <p>The default implementation iterates over the sequence in reverse order calling {@link
576   * #matches} for each character.
577   *
578   * @param sequence the character sequence to examine from the end
579   * @return an index, or {@code -1} if no character matches
580   */
581  public int lastIndexIn(CharSequence sequence) {
582    for (int i = sequence.length() - 1; i >= 0; i--) {
583      if (matches(sequence.charAt(i))) {
584        return i;
585      }
586    }
587    return -1;
588  }
589
590  /**
591   * Returns the number of matching {@code char}s found in a character sequence.
592   *
593   * <p>Counts 2 per supplementary character, such as for {@link #whitespace}().{@link #negate}().
594   */
595  public int countIn(CharSequence sequence) {
596    int count = 0;
597    for (int i = 0; i < sequence.length(); i++) {
598      if (matches(sequence.charAt(i))) {
599        count++;
600      }
601    }
602    return count;
603  }
604
605  /**
606   * Returns a string containing all non-matching characters of a character sequence, in order. For
607   * example:
608   *
609   * <pre>{@code
610   * CharMatcher.is('a').removeFrom("bazaar")
611   * }</pre>
612   *
613   * ... returns {@code "bzr"}.
614   */
615  public String removeFrom(CharSequence sequence) {
616    String string = sequence.toString();
617    int pos = indexIn(string);
618    if (pos == -1) {
619      return string;
620    }
621
622    char[] chars = string.toCharArray();
623    int spread = 1;
624
625    // This unusual loop comes from extensive benchmarking
626    OUT:
627    while (true) {
628      pos++;
629      while (true) {
630        if (pos == chars.length) {
631          break OUT;
632        }
633        if (matches(chars[pos])) {
634          break;
635        }
636        chars[pos - spread] = chars[pos];
637        pos++;
638      }
639      spread++;
640    }
641    return new String(chars, 0, pos - spread);
642  }
643
644  /**
645   * Returns a string containing all matching BMP characters of a character sequence, in order. For
646   * example:
647   *
648   * <pre>{@code
649   * CharMatcher.is('a').retainFrom("bazaar")
650   * }</pre>
651   *
652   * ... returns {@code "aaa"}.
653   */
654  public String retainFrom(CharSequence sequence) {
655    return negate().removeFrom(sequence);
656  }
657
658  /**
659   * Returns a string copy of the input character sequence, with each matching BMP character
660   * replaced by a given replacement character. For example:
661   *
662   * <pre>{@code
663   * CharMatcher.is('a').replaceFrom("radar", 'o')
664   * }</pre>
665   *
666   * ... returns {@code "rodor"}.
667   *
668   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
669   * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
670   * character.
671   *
672   * @param sequence the character sequence to replace matching characters in
673   * @param replacement the character to append to the result string in place of each matching
674   *     character in {@code sequence}
675   * @return the new string
676   */
677  public String replaceFrom(CharSequence sequence, char replacement) {
678    String string = sequence.toString();
679    int pos = indexIn(string);
680    if (pos == -1) {
681      return string;
682    }
683    char[] chars = string.toCharArray();
684    chars[pos] = replacement;
685    for (int i = pos + 1; i < chars.length; i++) {
686      if (matches(chars[i])) {
687        chars[i] = replacement;
688      }
689    }
690    return new String(chars);
691  }
692
693  /**
694   * Returns a string copy of the input character sequence, with each matching BMP character
695   * replaced by a given replacement sequence. For example:
696   *
697   * <pre>{@code
698   * CharMatcher.is('a').replaceFrom("yaha", "oo")
699   * }</pre>
700   *
701   * ... returns {@code "yoohoo"}.
702   *
703   * <p><b>Note:</b> If the replacement is a fixed string with only one character, you are better
704   * off calling {@link #replaceFrom(CharSequence, char)} directly.
705   *
706   * @param sequence the character sequence to replace matching characters in
707   * @param replacement the characters to append to the result string in place of each matching
708   *     character in {@code sequence}
709   * @return the new string
710   */
711  public String replaceFrom(CharSequence sequence, CharSequence replacement) {
712    int replacementLen = replacement.length();
713    if (replacementLen == 0) {
714      return removeFrom(sequence);
715    }
716    if (replacementLen == 1) {
717      return replaceFrom(sequence, replacement.charAt(0));
718    }
719
720    String string = sequence.toString();
721    int pos = indexIn(string);
722    if (pos == -1) {
723      return string;
724    }
725
726    int len = string.length();
727    StringBuilder buf = new StringBuilder((len * 3 / 2) + 16);
728
729    int oldpos = 0;
730    do {
731      buf.append(string, oldpos, pos);
732      buf.append(replacement);
733      oldpos = pos + 1;
734      pos = indexIn(string, oldpos);
735    } while (pos != -1);
736
737    buf.append(string, oldpos, len);
738    return buf.toString();
739  }
740
741  /**
742   * Returns a substring of the input character sequence that omits all matching BMP characters from
743   * the beginning and from the end of the string. For example:
744   *
745   * <pre>{@code
746   * CharMatcher.anyOf("ab").trimFrom("abacatbab")
747   * }</pre>
748   *
749   * ... returns {@code "cat"}.
750   *
751   * <p>Note that:
752   *
753   * <pre>{@code
754   * CharMatcher.inRange('\0', ' ').trimFrom(str)
755   * }</pre>
756   *
757   * ... is equivalent to {@link String#trim()}.
758   */
759  public String trimFrom(CharSequence sequence) {
760    int len = sequence.length();
761    int first;
762    int last;
763
764    for (first = 0; first < len; first++) {
765      if (!matches(sequence.charAt(first))) {
766        break;
767      }
768    }
769    for (last = len - 1; last > first; last--) {
770      if (!matches(sequence.charAt(last))) {
771        break;
772      }
773    }
774
775    return sequence.subSequence(first, last + 1).toString();
776  }
777
778  /**
779   * Returns a substring of the input character sequence that omits all matching BMP characters from
780   * the beginning of the string. For example:
781   *
782   * <pre>{@code
783   * CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")
784   * }</pre>
785   *
786   * ... returns {@code "catbab"}.
787   */
788  public String trimLeadingFrom(CharSequence sequence) {
789    int len = sequence.length();
790    for (int first = 0; first < len; first++) {
791      if (!matches(sequence.charAt(first))) {
792        return sequence.subSequence(first, len).toString();
793      }
794    }
795    return "";
796  }
797
798  /**
799   * Returns a substring of the input character sequence that omits all matching BMP characters from
800   * the end of the string. For example:
801   *
802   * <pre>{@code
803   * CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")
804   * }</pre>
805   *
806   * ... returns {@code "abacat"}.
807   */
808  public String trimTrailingFrom(CharSequence sequence) {
809    int len = sequence.length();
810    for (int last = len - 1; last >= 0; last--) {
811      if (!matches(sequence.charAt(last))) {
812        return sequence.subSequence(0, last + 1).toString();
813      }
814    }
815    return "";
816  }
817
818  /**
819   * Returns a string copy of the input character sequence, with each group of consecutive matching
820   * BMP characters replaced by a single replacement character. For example:
821   *
822   * <pre>{@code
823   * CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')
824   * }</pre>
825   *
826   * ... returns {@code "b-p-r"}.
827   *
828   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
829   * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
830   * character.
831   *
832   * @param sequence the character sequence to replace matching groups of characters in
833   * @param replacement the character to append to the result string in place of each group of
834   *     matching characters in {@code sequence}
835   * @return the new string
836   */
837  public String collapseFrom(CharSequence sequence, char replacement) {
838    // This implementation avoids unnecessary allocation.
839    int len = sequence.length();
840    for (int i = 0; i < len; i++) {
841      char c = sequence.charAt(i);
842      if (matches(c)) {
843        if (c == replacement && (i == len - 1 || !matches(sequence.charAt(i + 1)))) {
844          // a no-op replacement
845          i++;
846        } else {
847          StringBuilder builder = new StringBuilder(len).append(sequence, 0, i).append(replacement);
848          return finishCollapseFrom(sequence, i + 1, len, replacement, builder, true);
849        }
850      }
851    }
852    // no replacement needed
853    return sequence.toString();
854  }
855
856  /**
857   * Collapses groups of matching characters exactly as {@link #collapseFrom} does, except that
858   * groups of matching BMP characters at the start or end of the sequence are removed without
859   * replacement.
860   */
861  public String trimAndCollapseFrom(CharSequence sequence, char replacement) {
862    // This implementation avoids unnecessary allocation.
863    int len = sequence.length();
864    int first = 0;
865    int last = len - 1;
866
867    while (first < len && matches(sequence.charAt(first))) {
868      first++;
869    }
870
871    while (last > first && matches(sequence.charAt(last))) {
872      last--;
873    }
874
875    return (first == 0 && last == len - 1)
876        ? collapseFrom(sequence, replacement)
877        : finishCollapseFrom(
878            sequence, first, last + 1, replacement, new StringBuilder(last + 1 - first), false);
879  }
880
881  private String finishCollapseFrom(
882      CharSequence sequence,
883      int start,
884      int end,
885      char replacement,
886      StringBuilder builder,
887      boolean inMatchingGroup) {
888    for (int i = start; i < end; i++) {
889      char c = sequence.charAt(i);
890      if (matches(c)) {
891        if (!inMatchingGroup) {
892          builder.append(replacement);
893          inMatchingGroup = true;
894        }
895      } else {
896        builder.append(c);
897        inMatchingGroup = false;
898      }
899    }
900    return builder.toString();
901  }
902
903  /**
904   * @deprecated Provided only to satisfy the {@link Predicate} interface; use {@link #matches}
905   *     instead.
906   */
907  @Deprecated
908  @Override
909  public boolean apply(Character character) {
910    return matches(character);
911  }
912
913  /**
914   * Returns a string representation of this {@code CharMatcher}, such as {@code
915   * CharMatcher.or(WHITESPACE, JAVA_DIGIT)}.
916   */
917  @Override
918  public String toString() {
919    return super.toString();
920  }
921
922  /**
923   * Returns the Java Unicode escape sequence for the given {@code char}, in the form "\u12AB" where
924   * "12AB" is the four hexadecimal digits representing the 16-bit code unit.
925   */
926  private static String showCharacter(char c) {
927    String hex = "0123456789ABCDEF";
928    char[] tmp = {'\\', 'u', '\0', '\0', '\0', '\0'};
929    for (int i = 0; i < 4; i++) {
930      tmp[5 - i] = hex.charAt(c & 0xF);
931      c = (char) (c >> 4);
932    }
933    return String.copyValueOf(tmp);
934  }
935
936  // Fast matchers
937
938  /** A matcher for which precomputation will not yield any significant benefit. */
939  abstract static class FastMatcher extends CharMatcher {
940
941    @Override
942    public final CharMatcher precomputed() {
943      return this;
944    }
945
946    @Override
947    public CharMatcher negate() {
948      return new NegatedFastMatcher(this);
949    }
950  }
951
952  /** {@link FastMatcher} which overrides {@code toString()} with a custom name. */
953  abstract static class NamedFastMatcher extends FastMatcher {
954
955    private final String description;
956
957    NamedFastMatcher(String description) {
958      this.description = checkNotNull(description);
959    }
960
961    @Override
962    public final String toString() {
963      return description;
964    }
965  }
966
967  /** Negation of a {@link FastMatcher}. */
968  static class NegatedFastMatcher extends Negated {
969
970    NegatedFastMatcher(CharMatcher original) {
971      super(original);
972    }
973
974    @Override
975    public final CharMatcher precomputed() {
976      return this;
977    }
978  }
979
980  /** Fast matcher using a {@link BitSet} table of matching characters. */
981  @GwtIncompatible // used only from other GwtIncompatible code
982  private static final class BitSetMatcher extends NamedFastMatcher {
983
984    private final BitSet table;
985
986    private BitSetMatcher(BitSet table, String description) {
987      super(description);
988      if (table.length() + Long.SIZE < table.size()) {
989        table = (BitSet) table.clone();
990        // If only we could actually call BitSet.trimToSize() ourselves...
991      }
992      this.table = table;
993    }
994
995    @Override
996    public boolean matches(char c) {
997      return table.get(c);
998    }
999
1000    @Override
1001    void setBits(BitSet bitSet) {
1002      bitSet.or(table);
1003    }
1004  }
1005
1006  // Static constant implementation classes
1007
1008  /** Implementation of {@link #any()}. */
1009  private static final class Any extends NamedFastMatcher {
1010
1011    static final Any INSTANCE = new Any();
1012
1013    private Any() {
1014      super("CharMatcher.any()");
1015    }
1016
1017    @Override
1018    public boolean matches(char c) {
1019      return true;
1020    }
1021
1022    @Override
1023    public int indexIn(CharSequence sequence) {
1024      return (sequence.length() == 0) ? -1 : 0;
1025    }
1026
1027    @Override
1028    public int indexIn(CharSequence sequence, int start) {
1029      int length = sequence.length();
1030      checkPositionIndex(start, length);
1031      return (start == length) ? -1 : start;
1032    }
1033
1034    @Override
1035    public int lastIndexIn(CharSequence sequence) {
1036      return sequence.length() - 1;
1037    }
1038
1039    @Override
1040    public boolean matchesAllOf(CharSequence sequence) {
1041      checkNotNull(sequence);
1042      return true;
1043    }
1044
1045    @Override
1046    public boolean matchesNoneOf(CharSequence sequence) {
1047      return sequence.length() == 0;
1048    }
1049
1050    @Override
1051    public String removeFrom(CharSequence sequence) {
1052      checkNotNull(sequence);
1053      return "";
1054    }
1055
1056    @Override
1057    public String replaceFrom(CharSequence sequence, char replacement) {
1058      char[] array = new char[sequence.length()];
1059      Arrays.fill(array, replacement);
1060      return new String(array);
1061    }
1062
1063    @Override
1064    public String replaceFrom(CharSequence sequence, CharSequence replacement) {
1065      StringBuilder result = new StringBuilder(sequence.length() * replacement.length());
1066      for (int i = 0; i < sequence.length(); i++) {
1067        result.append(replacement);
1068      }
1069      return result.toString();
1070    }
1071
1072    @Override
1073    public String collapseFrom(CharSequence sequence, char replacement) {
1074      return (sequence.length() == 0) ? "" : String.valueOf(replacement);
1075    }
1076
1077    @Override
1078    public String trimFrom(CharSequence sequence) {
1079      checkNotNull(sequence);
1080      return "";
1081    }
1082
1083    @Override
1084    public int countIn(CharSequence sequence) {
1085      return sequence.length();
1086    }
1087
1088    @Override
1089    public CharMatcher and(CharMatcher other) {
1090      return checkNotNull(other);
1091    }
1092
1093    @Override
1094    public CharMatcher or(CharMatcher other) {
1095      checkNotNull(other);
1096      return this;
1097    }
1098
1099    @Override
1100    public CharMatcher negate() {
1101      return none();
1102    }
1103  }
1104
1105  /** Implementation of {@link #none()}. */
1106  private static final class None extends NamedFastMatcher {
1107
1108    static final None INSTANCE = new None();
1109
1110    private None() {
1111      super("CharMatcher.none()");
1112    }
1113
1114    @Override
1115    public boolean matches(char c) {
1116      return false;
1117    }
1118
1119    @Override
1120    public int indexIn(CharSequence sequence) {
1121      checkNotNull(sequence);
1122      return -1;
1123    }
1124
1125    @Override
1126    public int indexIn(CharSequence sequence, int start) {
1127      int length = sequence.length();
1128      checkPositionIndex(start, length);
1129      return -1;
1130    }
1131
1132    @Override
1133    public int lastIndexIn(CharSequence sequence) {
1134      checkNotNull(sequence);
1135      return -1;
1136    }
1137
1138    @Override
1139    public boolean matchesAllOf(CharSequence sequence) {
1140      return sequence.length() == 0;
1141    }
1142
1143    @Override
1144    public boolean matchesNoneOf(CharSequence sequence) {
1145      checkNotNull(sequence);
1146      return true;
1147    }
1148
1149    @Override
1150    public String removeFrom(CharSequence sequence) {
1151      return sequence.toString();
1152    }
1153
1154    @Override
1155    public String replaceFrom(CharSequence sequence, char replacement) {
1156      return sequence.toString();
1157    }
1158
1159    @Override
1160    public String replaceFrom(CharSequence sequence, CharSequence replacement) {
1161      checkNotNull(replacement);
1162      return sequence.toString();
1163    }
1164
1165    @Override
1166    public String collapseFrom(CharSequence sequence, char replacement) {
1167      return sequence.toString();
1168    }
1169
1170    @Override
1171    public String trimFrom(CharSequence sequence) {
1172      return sequence.toString();
1173    }
1174
1175    @Override
1176    public String trimLeadingFrom(CharSequence sequence) {
1177      return sequence.toString();
1178    }
1179
1180    @Override
1181    public String trimTrailingFrom(CharSequence sequence) {
1182      return sequence.toString();
1183    }
1184
1185    @Override
1186    public int countIn(CharSequence sequence) {
1187      checkNotNull(sequence);
1188      return 0;
1189    }
1190
1191    @Override
1192    public CharMatcher and(CharMatcher other) {
1193      checkNotNull(other);
1194      return this;
1195    }
1196
1197    @Override
1198    public CharMatcher or(CharMatcher other) {
1199      return checkNotNull(other);
1200    }
1201
1202    @Override
1203    public CharMatcher negate() {
1204      return any();
1205    }
1206  }
1207
1208  /** Implementation of {@link #whitespace()}. */
1209  @VisibleForTesting
1210  static final class Whitespace extends NamedFastMatcher {
1211
1212    // TABLE is a precomputed hashset of whitespace characters. MULTIPLIER serves as a hash function
1213    // whose key property is that it maps 25 characters into the 32-slot table without collision.
1214    // Basically this is an opportunistic fast implementation as opposed to "good code". For most
1215    // other use-cases, the reduction in readability isn't worth it.
1216    static final String TABLE =
1217        "\u2002\u3000\r\u0085\u200A\u2005\u2000\u3000"
1218            + "\u2029\u000B\u3000\u2008\u2003\u205F\u3000\u1680"
1219            + "\u0009\u0020\u2006\u2001\u202F\u00A0\u000C\u2009"
1220            + "\u3000\u2004\u3000\u3000\u2028\n\u2007\u3000";
1221    static final int MULTIPLIER = 1682554634;
1222    static final int SHIFT = Integer.numberOfLeadingZeros(TABLE.length() - 1);
1223
1224    static final Whitespace INSTANCE = new Whitespace();
1225
1226    Whitespace() {
1227      super("CharMatcher.whitespace()");
1228    }
1229
1230    @Override
1231    public boolean matches(char c) {
1232      return TABLE.charAt((MULTIPLIER * c) >>> SHIFT) == c;
1233    }
1234
1235    @GwtIncompatible // used only from other GwtIncompatible code
1236    @Override
1237    void setBits(BitSet table) {
1238      for (int i = 0; i < TABLE.length(); i++) {
1239        table.set(TABLE.charAt(i));
1240      }
1241    }
1242  }
1243
1244  /** Implementation of {@link #breakingWhitespace()}. */
1245  private static final class BreakingWhitespace extends CharMatcher {
1246
1247    static final CharMatcher INSTANCE = new BreakingWhitespace();
1248
1249    @Override
1250    public boolean matches(char c) {
1251      switch (c) {
1252        case '\t':
1253        case '\n':
1254        case '\013':
1255        case '\f':
1256        case '\r':
1257        case ' ':
1258        case '\u0085':
1259        case '\u1680':
1260        case '\u2028':
1261        case '\u2029':
1262        case '\u205f':
1263        case '\u3000':
1264          return true;
1265        case '\u2007':
1266          return false;
1267        default:
1268          return c >= '\u2000' && c <= '\u200a';
1269      }
1270    }
1271
1272    @Override
1273    public String toString() {
1274      return "CharMatcher.breakingWhitespace()";
1275    }
1276  }
1277
1278  /** Implementation of {@link #ascii()}. */
1279  private static final class Ascii extends NamedFastMatcher {
1280
1281    static final Ascii INSTANCE = new Ascii();
1282
1283    Ascii() {
1284      super("CharMatcher.ascii()");
1285    }
1286
1287    @Override
1288    public boolean matches(char c) {
1289      return c <= '\u007f';
1290    }
1291  }
1292
1293  /** Implementation that matches characters that fall within multiple ranges. */
1294  private static class RangesMatcher extends CharMatcher {
1295
1296    private final String description;
1297    private final char[] rangeStarts;
1298    private final char[] rangeEnds;
1299
1300    RangesMatcher(String description, char[] rangeStarts, char[] rangeEnds) {
1301      this.description = description;
1302      this.rangeStarts = rangeStarts;
1303      this.rangeEnds = rangeEnds;
1304      checkArgument(rangeStarts.length == rangeEnds.length);
1305      for (int i = 0; i < rangeStarts.length; i++) {
1306        checkArgument(rangeStarts[i] <= rangeEnds[i]);
1307        if (i + 1 < rangeStarts.length) {
1308          checkArgument(rangeEnds[i] < rangeStarts[i + 1]);
1309        }
1310      }
1311    }
1312
1313    @Override
1314    public boolean matches(char c) {
1315      int index = Arrays.binarySearch(rangeStarts, c);
1316      if (index >= 0) {
1317        return true;
1318      } else {
1319        index = ~index - 1;
1320        return index >= 0 && c <= rangeEnds[index];
1321      }
1322    }
1323
1324    @Override
1325    public String toString() {
1326      return description;
1327    }
1328  }
1329
1330  /** Implementation of {@link #digit()}. */
1331  private static final class Digit extends RangesMatcher {
1332    // Plug the following UnicodeSet pattern into
1333    // https://unicode.org/cldr/utility/list-unicodeset.jsp
1334    // [[:Nd:]&[:nv=0:]&[\u0000-\uFFFF]]
1335    // and get the zeroes from there.
1336
1337    // Must be in ascending order.
1338    private static final String ZEROES =
1339        "0\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6\u0c66\u0ce6\u0d66\u0de6"
1340            + "\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946\u19d0\u1a80\u1a90\u1b50\u1bb0"
1341            + "\u1c40\u1c50\ua620\ua8d0\ua900\ua9d0\ua9f0\uaa50\uabf0\uff10";
1342
1343    private static char[] zeroes() {
1344      return ZEROES.toCharArray();
1345    }
1346
1347    private static char[] nines() {
1348      char[] nines = new char[ZEROES.length()];
1349      for (int i = 0; i < ZEROES.length(); i++) {
1350        nines[i] = (char) (ZEROES.charAt(i) + 9);
1351      }
1352      return nines;
1353    }
1354
1355    static final Digit INSTANCE = new Digit();
1356
1357    private Digit() {
1358      super("CharMatcher.digit()", zeroes(), nines());
1359    }
1360  }
1361
1362  /** Implementation of {@link #javaDigit()}. */
1363  private static final class JavaDigit extends CharMatcher {
1364
1365    static final JavaDigit INSTANCE = new JavaDigit();
1366
1367    @Override
1368    public boolean matches(char c) {
1369      return Character.isDigit(c);
1370    }
1371
1372    @Override
1373    public String toString() {
1374      return "CharMatcher.javaDigit()";
1375    }
1376  }
1377
1378  /** Implementation of {@link #javaLetter()}. */
1379  private static final class JavaLetter extends CharMatcher {
1380
1381    static final JavaLetter INSTANCE = new JavaLetter();
1382
1383    @Override
1384    public boolean matches(char c) {
1385      return Character.isLetter(c);
1386    }
1387
1388    @Override
1389    public String toString() {
1390      return "CharMatcher.javaLetter()";
1391    }
1392  }
1393
1394  /** Implementation of {@link #javaLetterOrDigit()}. */
1395  private static final class JavaLetterOrDigit extends CharMatcher {
1396
1397    static final JavaLetterOrDigit INSTANCE = new JavaLetterOrDigit();
1398
1399    @Override
1400    public boolean matches(char c) {
1401      return Character.isLetterOrDigit(c);
1402    }
1403
1404    @Override
1405    public String toString() {
1406      return "CharMatcher.javaLetterOrDigit()";
1407    }
1408  }
1409
1410  /** Implementation of {@link #javaUpperCase()}. */
1411  private static final class JavaUpperCase extends CharMatcher {
1412
1413    static final JavaUpperCase INSTANCE = new JavaUpperCase();
1414
1415    @Override
1416    public boolean matches(char c) {
1417      return Character.isUpperCase(c);
1418    }
1419
1420    @Override
1421    public String toString() {
1422      return "CharMatcher.javaUpperCase()";
1423    }
1424  }
1425
1426  /** Implementation of {@link #javaLowerCase()}. */
1427  private static final class JavaLowerCase extends CharMatcher {
1428
1429    static final JavaLowerCase INSTANCE = new JavaLowerCase();
1430
1431    @Override
1432    public boolean matches(char c) {
1433      return Character.isLowerCase(c);
1434    }
1435
1436    @Override
1437    public String toString() {
1438      return "CharMatcher.javaLowerCase()";
1439    }
1440  }
1441
1442  /** Implementation of {@link #javaIsoControl()}. */
1443  private static final class JavaIsoControl extends NamedFastMatcher {
1444
1445    static final JavaIsoControl INSTANCE = new JavaIsoControl();
1446
1447    private JavaIsoControl() {
1448      super("CharMatcher.javaIsoControl()");
1449    }
1450
1451    @Override
1452    public boolean matches(char c) {
1453      return c <= '\u001f' || (c >= '\u007f' && c <= '\u009f');
1454    }
1455  }
1456
1457  /** Implementation of {@link #invisible()}. */
1458  private static final class Invisible extends RangesMatcher {
1459    // Plug the following UnicodeSet pattern into
1460    // https://unicode.org/cldr/utility/list-unicodeset.jsp
1461    // [[[:Zs:][:Zl:][:Zp:][:Cc:][:Cf:][:Cs:][:Co:]]&[\u0000-\uFFFF]]
1462    // with the "Abbreviate" option, and get the ranges from there.
1463    private static final String RANGE_STARTS =
1464        "\u0000\u007f\u00ad\u0600\u061c\u06dd\u070f\u0890\u08e2\u1680\u180e\u2000\u2028\u205f\u2066"
1465            + "\u3000\ud800\ufeff\ufff9";
1466    private static final String RANGE_ENDS = // inclusive ends
1467        "\u0020\u00a0\u00ad\u0605\u061c\u06dd\u070f\u0891\u08e2\u1680\u180e\u200f\u202f\u2064\u206f"
1468            + "\u3000\uf8ff\ufeff\ufffb";
1469
1470    static final Invisible INSTANCE = new Invisible();
1471
1472    private Invisible() {
1473      super("CharMatcher.invisible()", RANGE_STARTS.toCharArray(), RANGE_ENDS.toCharArray());
1474    }
1475  }
1476
1477  /** Implementation of {@link #singleWidth()}. */
1478  private static final class SingleWidth extends RangesMatcher {
1479
1480    static final SingleWidth INSTANCE = new SingleWidth();
1481
1482    private SingleWidth() {
1483      super(
1484          "CharMatcher.singleWidth()",
1485          "\u0000\u05be\u05d0\u05f3\u0600\u0750\u0e00\u1e00\u2100\ufb50\ufe70\uff61".toCharArray(),
1486          "\u04f9\u05be\u05ea\u05f4\u06ff\u077f\u0e7f\u20af\u213a\ufdff\ufeff\uffdc".toCharArray());
1487    }
1488  }
1489
1490  // Non-static factory implementation classes
1491
1492  /** Implementation of {@link #negate()}. */
1493  private static class Negated extends CharMatcher {
1494
1495    final CharMatcher original;
1496
1497    Negated(CharMatcher original) {
1498      this.original = checkNotNull(original);
1499    }
1500
1501    @Override
1502    public boolean matches(char c) {
1503      return !original.matches(c);
1504    }
1505
1506    @Override
1507    public boolean matchesAllOf(CharSequence sequence) {
1508      return original.matchesNoneOf(sequence);
1509    }
1510
1511    @Override
1512    public boolean matchesNoneOf(CharSequence sequence) {
1513      return original.matchesAllOf(sequence);
1514    }
1515
1516    @Override
1517    public int countIn(CharSequence sequence) {
1518      return sequence.length() - original.countIn(sequence);
1519    }
1520
1521    @GwtIncompatible // used only from other GwtIncompatible code
1522    @Override
1523    void setBits(BitSet table) {
1524      BitSet tmp = new BitSet();
1525      original.setBits(tmp);
1526      tmp.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1);
1527      table.or(tmp);
1528    }
1529
1530    @Override
1531    public CharMatcher negate() {
1532      return original;
1533    }
1534
1535    @Override
1536    public String toString() {
1537      return original + ".negate()";
1538    }
1539  }
1540
1541  /** Implementation of {@link #and(CharMatcher)}. */
1542  private static final class And extends CharMatcher {
1543
1544    final CharMatcher first;
1545    final CharMatcher second;
1546
1547    And(CharMatcher a, CharMatcher b) {
1548      first = checkNotNull(a);
1549      second = checkNotNull(b);
1550    }
1551
1552    @Override
1553    public boolean matches(char c) {
1554      return first.matches(c) && second.matches(c);
1555    }
1556
1557    @GwtIncompatible // used only from other GwtIncompatible code
1558    @Override
1559    void setBits(BitSet table) {
1560      BitSet tmp1 = new BitSet();
1561      first.setBits(tmp1);
1562      BitSet tmp2 = new BitSet();
1563      second.setBits(tmp2);
1564      tmp1.and(tmp2);
1565      table.or(tmp1);
1566    }
1567
1568    @Override
1569    public String toString() {
1570      return "CharMatcher.and(" + first + ", " + second + ")";
1571    }
1572  }
1573
1574  /** Implementation of {@link #or(CharMatcher)}. */
1575  private static final class Or extends CharMatcher {
1576
1577    final CharMatcher first;
1578    final CharMatcher second;
1579
1580    Or(CharMatcher a, CharMatcher b) {
1581      first = checkNotNull(a);
1582      second = checkNotNull(b);
1583    }
1584
1585    @GwtIncompatible // used only from other GwtIncompatible code
1586    @Override
1587    void setBits(BitSet table) {
1588      first.setBits(table);
1589      second.setBits(table);
1590    }
1591
1592    @Override
1593    public boolean matches(char c) {
1594      return first.matches(c) || second.matches(c);
1595    }
1596
1597    @Override
1598    public String toString() {
1599      return "CharMatcher.or(" + first + ", " + second + ")";
1600    }
1601  }
1602
1603  // Static factory implementations
1604
1605  /** Implementation of {@link #is(char)}. */
1606  private static final class Is extends FastMatcher {
1607
1608    private final char match;
1609
1610    Is(char match) {
1611      this.match = match;
1612    }
1613
1614    @Override
1615    public boolean matches(char c) {
1616      return c == match;
1617    }
1618
1619    @Override
1620    public String replaceFrom(CharSequence sequence, char replacement) {
1621      return sequence.toString().replace(match, replacement);
1622    }
1623
1624    @Override
1625    public CharMatcher and(CharMatcher other) {
1626      return other.matches(match) ? this : none();
1627    }
1628
1629    @Override
1630    public CharMatcher or(CharMatcher other) {
1631      return other.matches(match) ? other : super.or(other);
1632    }
1633
1634    @Override
1635    public CharMatcher negate() {
1636      return isNot(match);
1637    }
1638
1639    @GwtIncompatible // used only from other GwtIncompatible code
1640    @Override
1641    void setBits(BitSet table) {
1642      table.set(match);
1643    }
1644
1645    @Override
1646    public String toString() {
1647      return "CharMatcher.is('" + showCharacter(match) + "')";
1648    }
1649  }
1650
1651  /** Implementation of {@link #isNot(char)}. */
1652  private static final class IsNot extends FastMatcher {
1653
1654    private final char match;
1655
1656    IsNot(char match) {
1657      this.match = match;
1658    }
1659
1660    @Override
1661    public boolean matches(char c) {
1662      return c != match;
1663    }
1664
1665    @Override
1666    public CharMatcher and(CharMatcher other) {
1667      return other.matches(match) ? super.and(other) : other;
1668    }
1669
1670    @Override
1671    public CharMatcher or(CharMatcher other) {
1672      return other.matches(match) ? any() : this;
1673    }
1674
1675    @GwtIncompatible // used only from other GwtIncompatible code
1676    @Override
1677    void setBits(BitSet table) {
1678      table.set(0, match);
1679      table.set(match + 1, Character.MAX_VALUE + 1);
1680    }
1681
1682    @Override
1683    public CharMatcher negate() {
1684      return is(match);
1685    }
1686
1687    @Override
1688    public String toString() {
1689      return "CharMatcher.isNot('" + showCharacter(match) + "')";
1690    }
1691  }
1692
1693  private static CharMatcher.IsEither isEither(char c1, char c2) {
1694    return new CharMatcher.IsEither(c1, c2);
1695  }
1696
1697  /** Implementation of {@link #anyOf(CharSequence)} for exactly two characters. */
1698  private static final class IsEither extends FastMatcher {
1699
1700    private final char match1;
1701    private final char match2;
1702
1703    IsEither(char match1, char match2) {
1704      this.match1 = match1;
1705      this.match2 = match2;
1706    }
1707
1708    @Override
1709    public boolean matches(char c) {
1710      return c == match1 || c == match2;
1711    }
1712
1713    @GwtIncompatible // used only from other GwtIncompatible code
1714    @Override
1715    void setBits(BitSet table) {
1716      table.set(match1);
1717      table.set(match2);
1718    }
1719
1720    @Override
1721    public String toString() {
1722      return "CharMatcher.anyOf(\"" + showCharacter(match1) + showCharacter(match2) + "\")";
1723    }
1724  }
1725
1726  /** Implementation of {@link #anyOf(CharSequence)} for three or more characters. */
1727  private static final class AnyOf extends CharMatcher {
1728
1729    private final char[] chars;
1730
1731    public AnyOf(CharSequence chars) {
1732      this.chars = chars.toString().toCharArray();
1733      Arrays.sort(this.chars);
1734    }
1735
1736    @Override
1737    public boolean matches(char c) {
1738      return Arrays.binarySearch(chars, c) >= 0;
1739    }
1740
1741    @Override
1742    @GwtIncompatible // used only from other GwtIncompatible code
1743    void setBits(BitSet table) {
1744      for (char c : chars) {
1745        table.set(c);
1746      }
1747    }
1748
1749    @Override
1750    public String toString() {
1751      StringBuilder description = new StringBuilder("CharMatcher.anyOf(\"");
1752      for (char c : chars) {
1753        description.append(showCharacter(c));
1754      }
1755      description.append("\")");
1756      return description.toString();
1757    }
1758  }
1759
1760  /** Implementation of {@link #inRange(char, char)}. */
1761  private static final class InRange extends FastMatcher {
1762
1763    private final char startInclusive;
1764    private final char endInclusive;
1765
1766    InRange(char startInclusive, char endInclusive) {
1767      checkArgument(endInclusive >= startInclusive);
1768      this.startInclusive = startInclusive;
1769      this.endInclusive = endInclusive;
1770    }
1771
1772    @Override
1773    public boolean matches(char c) {
1774      return startInclusive <= c && c <= endInclusive;
1775    }
1776
1777    @GwtIncompatible // used only from other GwtIncompatible code
1778    @Override
1779    void setBits(BitSet table) {
1780      table.set(startInclusive, endInclusive + 1);
1781    }
1782
1783    @Override
1784    public String toString() {
1785      return "CharMatcher.inRange('"
1786          + showCharacter(startInclusive)
1787          + "', '"
1788          + showCharacter(endInclusive)
1789          + "')";
1790    }
1791  }
1792
1793  /** Implementation of {@link #forPredicate(Predicate)}. */
1794  private static final class ForPredicate extends CharMatcher {
1795
1796    private final Predicate<? super Character> predicate;
1797
1798    ForPredicate(Predicate<? super Character> predicate) {
1799      this.predicate = checkNotNull(predicate);
1800    }
1801
1802    @Override
1803    public boolean matches(char c) {
1804      return predicate.apply(c);
1805    }
1806
1807    @SuppressWarnings("deprecation") // intentional; deprecation is for callers primarily
1808    @Override
1809    public boolean apply(Character character) {
1810      return predicate.apply(checkNotNull(character));
1811    }
1812
1813    @Override
1814    public String toString() {
1815      return "CharMatcher.forPredicate(" + predicate + ")";
1816    }
1817  }
1818}