001/*
002 * Copyright (C) 2008 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.base;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkPositionIndex;
020
021import com.google.common.annotations.GwtCompatible;
022import com.google.common.annotations.GwtIncompatible;
023import com.google.common.annotations.VisibleForTesting;
024import com.google.errorprone.annotations.InlineMe;
025import com.google.errorprone.annotations.InlineMeValidationDisabled;
026import java.util.Arrays;
027import java.util.BitSet;
028
029/**
030 * Determines a true or false value for any Java {@code char} value, just as {@link Predicate} does
031 * for any {@link Object}. Also offers basic text processing methods based on this function.
032 * Implementations are strongly encouraged to be side-effect-free and immutable.
033 *
034 * <p>Throughout the documentation of this class, the phrase "matching character" is used to mean
035 * "any {@code char} value {@code c} for which {@code this.matches(c)} returns {@code true}".
036 *
037 * <p><b>Warning:</b> This class deals only with {@code char} values, that is, <a
038 * href="http://www.unicode.org/glossary/#BMP_character">BMP characters</a>. It does not understand
039 * <a href="http://www.unicode.org/glossary/#supplementary_code_point">supplementary Unicode code
040 * points</a> in the range {@code 0x10000} to {@code 0x10FFFF} which includes the majority of
041 * assigned characters, including important CJK characters and emoji.
042 *
043 * <p>Supplementary characters are <a
044 * href="https://docs.oracle.com/javase/8/docs/api/java/lang/Character.html#supplementary">encoded
045 * into a {@code String} using surrogate pairs</a>, and a {@code CharMatcher} treats these just as
046 * two separate characters. {@link #countIn} counts each supplementary character as 2 {@code char}s.
047 *
048 * <p>For up-to-date Unicode character properties (digit, letter, etc.) and support for
049 * supplementary code points, use ICU4J UCharacter and UnicodeSet (freeze() after building). For
050 * basic text processing based on UnicodeSet use the ICU4J UnicodeSetSpanner.
051 *
052 * <p>Example usages:
053 *
054 * <pre>
055 *   String trimmed = {@link #whitespace() whitespace()}.{@link #trimFrom trimFrom}(userInput);
056 *   if ({@link #ascii() ascii()}.{@link #matchesAllOf matchesAllOf}(s)) { ... }</pre>
057 *
058 * <p>See the Guava User Guide article on <a
059 * href="https://github.com/google/guava/wiki/StringsExplained#charmatcher">{@code CharMatcher}
060 * </a>.
061 *
062 * @author Kevin Bourrillion
063 * @since 1.0
064 */
065@GwtCompatible(emulated = true)
066public abstract class CharMatcher implements Predicate<Character> {
067  /*
068   *           N777777777NO
069   *         N7777777777777N
070   *        M777777777777777N
071   *        $N877777777D77777M
072   *       N M77777777ONND777M
073   *       MN777777777NN  D777
074   *     N7ZN777777777NN ~M7778
075   *    N777777777777MMNN88777N
076   *    N777777777777MNZZZ7777O
077   *    DZN7777O77777777777777
078   *     N7OONND7777777D77777N
079   *      8$M++++?N???$77777$
080   *       M7++++N+M77777777N
081   *        N77O777777777777$                              M
082   *          DNNM$$$$777777N                              D
083   *         N$N:=N$777N7777M                             NZ
084   *        77Z::::N777777777                          ODZZZ
085   *       77N::::::N77777777M                         NNZZZ$
086   *     $777:::::::77777777MN                        ZM8ZZZZZ
087   *     777M::::::Z7777777Z77                        N++ZZZZNN
088   *    7777M:::::M7777777$777M                       $++IZZZZM
089   *   M777$:::::N777777$M7777M                       +++++ZZZDN
090   *     NN$::::::7777$$M777777N                      N+++ZZZZNZ
091   *       N::::::N:7$O:77777777                      N++++ZZZZN
092   *       M::::::::::::N77777777+                   +?+++++ZZZM
093   *       8::::::::::::D77777777M                    O+++++ZZ
094   *        ::::::::::::M777777777N                      O+?D
095   *        M:::::::::::M77777777778                     77=
096   *        D=::::::::::N7777777777N                    777
097   *       INN===::::::=77777777777N                  I777N
098   *      ?777N========N7777777777787M               N7777
099   *      77777$D======N77777777777N777N?         N777777
100   *     I77777$$$N7===M$$77777777$77777777$MMZ77777777N
101   *      $$$$$$$$$$$NIZN$$$$$$$$$M$$7777777777777777ON
102   *       M$$$$$$$$M    M$$$$$$$$N=N$$$$7777777$$$ND
103   *      O77Z$$$$$$$     M$$$$$$$$MNI==$DNNNNM=~N
104   *   7 :N MNN$$$$M$      $$$777$8      8D8I
105   *     NMM.:7O           777777778
106   *                       7777777MN
107   *                       M NO .7:
108   *                       M   :   M
109   *                            8
110   */
111
112  // Constant matcher factory methods
113
114  /**
115   * Matches any character.
116   *
117   * @since 19.0 (since 1.0 as constant {@code ANY})
118   */
119  public static CharMatcher any() {
120    return Any.INSTANCE;
121  }
122
123  /**
124   * Matches no characters.
125   *
126   * @since 19.0 (since 1.0 as constant {@code NONE})
127   */
128  public static CharMatcher none() {
129    return None.INSTANCE;
130  }
131
132  /**
133   * Determines whether a character is whitespace according to the latest Unicode standard, as
134   * illustrated <a
135   * href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
136   * This is not the same definition used by other Java APIs. (See a <a
137   * href="https://docs.google.com/spreadsheets/d/1kq4ECwPjHX9B8QUCTPclgsDCXYaj7T-FlT4tB5q3ahk/edit">comparison
138   * of several definitions of "whitespace"</a>.)
139   *
140   * <p>All Unicode White_Space characters are on the BMP and thus supported by this API.
141   *
142   * <p><b>Note:</b> as the Unicode definition evolves, we will modify this matcher to keep it up to
143   * date.
144   *
145   * @since 19.0 (since 1.0 as constant {@code WHITESPACE})
146   */
147  public static CharMatcher whitespace() {
148    return Whitespace.INSTANCE;
149  }
150
151  /**
152   * Determines whether a character is a breaking whitespace (that is, a whitespace which can be
153   * interpreted as a break between words for formatting purposes). See {@link #whitespace()} for a
154   * discussion of that term.
155   *
156   * @since 19.0 (since 2.0 as constant {@code BREAKING_WHITESPACE})
157   */
158  public static CharMatcher breakingWhitespace() {
159    return BreakingWhitespace.INSTANCE;
160  }
161
162  /**
163   * Determines whether a character is ASCII, meaning that its code point is less than 128.
164   *
165   * @since 19.0 (since 1.0 as constant {@code ASCII})
166   */
167  public static CharMatcher ascii() {
168    return Ascii.INSTANCE;
169  }
170
171  /**
172   * Determines whether a character is a BMP digit according to <a
173   * href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>. If
174   * you only care to match ASCII digits, you can use {@code inRange('0', '9')}.
175   *
176   * @deprecated Many digits are supplementary characters; see the class documentation.
177   * @since 19.0 (since 1.0 as constant {@code DIGIT})
178   */
179  @Deprecated
180  public static CharMatcher digit() {
181    return Digit.INSTANCE;
182  }
183
184  /**
185   * Determines whether a character is a BMP digit according to {@linkplain Character#isDigit(char)
186   * Java's definition}. If you only care to match ASCII digits, you can use {@code inRange('0',
187   * '9')}.
188   *
189   * @deprecated Many digits are supplementary characters; see the class documentation.
190   * @since 19.0 (since 1.0 as constant {@code JAVA_DIGIT})
191   */
192  @Deprecated
193  public static CharMatcher javaDigit() {
194    return JavaDigit.INSTANCE;
195  }
196
197  /**
198   * Determines whether a character is a BMP letter according to {@linkplain
199   * Character#isLetter(char) Java's definition}. If you only care to match letters of the Latin
200   * alphabet, you can use {@code inRange('a', 'z').or(inRange('A', 'Z'))}.
201   *
202   * @deprecated Most letters are supplementary characters; see the class documentation.
203   * @since 19.0 (since 1.0 as constant {@code JAVA_LETTER})
204   */
205  @Deprecated
206  public static CharMatcher javaLetter() {
207    return JavaLetter.INSTANCE;
208  }
209
210  /**
211   * Determines whether a character is a BMP letter or digit according to {@linkplain
212   * Character#isLetterOrDigit(char) Java's definition}.
213   *
214   * @deprecated Most letters and digits are supplementary characters; see the class documentation.
215   * @since 19.0 (since 1.0 as constant {@code JAVA_LETTER_OR_DIGIT}).
216   */
217  @Deprecated
218  public static CharMatcher javaLetterOrDigit() {
219    return JavaLetterOrDigit.INSTANCE;
220  }
221
222  /**
223   * Determines whether a BMP character is upper case according to {@linkplain
224   * Character#isUpperCase(char) Java's definition}.
225   *
226   * @deprecated Some uppercase characters are supplementary characters; see the class
227   *     documentation.
228   * @since 19.0 (since 1.0 as constant {@code JAVA_UPPER_CASE})
229   */
230  @Deprecated
231  public static CharMatcher javaUpperCase() {
232    return JavaUpperCase.INSTANCE;
233  }
234
235  /**
236   * Determines whether a BMP character is lower case according to {@linkplain
237   * Character#isLowerCase(char) Java's definition}.
238   *
239   * @deprecated Some lowercase characters are supplementary characters; see the class
240   *     documentation.
241   * @since 19.0 (since 1.0 as constant {@code JAVA_LOWER_CASE})
242   */
243  @Deprecated
244  public static CharMatcher javaLowerCase() {
245    return JavaLowerCase.INSTANCE;
246  }
247
248  /**
249   * Determines whether a character is an ISO control character as specified by {@link
250   * Character#isISOControl(char)}.
251   *
252   * <p>All ISO control codes are on the BMP and thus supported by this API.
253   *
254   * @since 19.0 (since 1.0 as constant {@code JAVA_ISO_CONTROL})
255   */
256  public static CharMatcher javaIsoControl() {
257    return JavaIsoControl.INSTANCE;
258  }
259
260  /**
261   * Determines whether a character is invisible; that is, if its Unicode category is any of
262   * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and
263   * PRIVATE_USE according to ICU4J.
264   *
265   * <p>See also the Unicode Default_Ignorable_Code_Point property (available via ICU).
266   *
267   * @deprecated Most invisible characters are supplementary characters; see the class
268   *     documentation.
269   * @since 19.0 (since 1.0 as constant {@code INVISIBLE})
270   */
271  @Deprecated
272  public static CharMatcher invisible() {
273    return Invisible.INSTANCE;
274  }
275
276  /**
277   * Determines whether a character is single-width (not double-width). When in doubt, this matcher
278   * errs on the side of returning {@code false} (that is, it tends to assume a character is
279   * double-width).
280   *
281   * <p><b>Note:</b> as the reference file evolves, we will modify this matcher to keep it up to
282   * date.
283   *
284   * <p>See also <a href="http://www.unicode.org/reports/tr11/">UAX #11 East Asian Width</a>.
285   *
286   * @deprecated Many such characters are supplementary characters; see the class documentation.
287   * @since 19.0 (since 1.0 as constant {@code SINGLE_WIDTH})
288   */
289  @Deprecated
290  public static CharMatcher singleWidth() {
291    return SingleWidth.INSTANCE;
292  }
293
294  // Static factories
295
296  /** Returns a {@code char} matcher that matches only one specified BMP character. */
297  public static CharMatcher is(final char match) {
298    return new Is(match);
299  }
300
301  /**
302   * Returns a {@code char} matcher that matches any character except the BMP character specified.
303   *
304   * <p>To negate another {@code CharMatcher}, use {@link #negate()}.
305   */
306  public static CharMatcher isNot(final char match) {
307    return new IsNot(match);
308  }
309
310  /**
311   * Returns a {@code char} matcher that matches any BMP character present in the given character
312   * sequence. Returns a bogus matcher if the sequence contains supplementary characters.
313   */
314  public static CharMatcher anyOf(final CharSequence sequence) {
315    switch (sequence.length()) {
316      case 0:
317        return none();
318      case 1:
319        return is(sequence.charAt(0));
320      case 2:
321        return isEither(sequence.charAt(0), sequence.charAt(1));
322      default:
323        // TODO(lowasser): is it potentially worth just going ahead and building a precomputed
324        // matcher?
325        return new AnyOf(sequence);
326    }
327  }
328
329  /**
330   * Returns a {@code char} matcher that matches any BMP character not present in the given
331   * character sequence. Returns a bogus matcher if the sequence contains supplementary characters.
332   */
333  public static CharMatcher noneOf(CharSequence sequence) {
334    return anyOf(sequence).negate();
335  }
336
337  /**
338   * Returns a {@code char} matcher that matches any character in a given BMP range (both endpoints
339   * are inclusive). For example, to match any lowercase letter of the English alphabet, use {@code
340   * CharMatcher.inRange('a', 'z')}.
341   *
342   * @throws IllegalArgumentException if {@code endInclusive < startInclusive}
343   */
344  public static CharMatcher inRange(final char startInclusive, final char endInclusive) {
345    return new InRange(startInclusive, endInclusive);
346  }
347
348  /**
349   * Returns a matcher with identical behavior to the given {@link Character}-based predicate, but
350   * which operates on primitive {@code char} instances instead.
351   */
352  public static CharMatcher forPredicate(final Predicate<? super Character> predicate) {
353    return predicate instanceof CharMatcher ? (CharMatcher) predicate : new ForPredicate(predicate);
354  }
355
356  // Constructors
357
358  /**
359   * Constructor for use by subclasses. When subclassing, you may want to override {@code
360   * toString()} to provide a useful description.
361   */
362  protected CharMatcher() {}
363
364  // Abstract methods
365
366  /** Determines a true or false value for the given character. */
367  public abstract boolean matches(char c);
368
369  // Non-static factories
370
371  /** Returns a matcher that matches any character not matched by this matcher. */
372  // This is not an override in java7, where Guava's Predicate does not extend the JDK's Predicate.
373  @SuppressWarnings("MissingOverride")
374  public CharMatcher negate() {
375    return new Negated(this);
376  }
377
378  /**
379   * Returns a matcher that matches any character matched by both this matcher and {@code other}.
380   */
381  public CharMatcher and(CharMatcher other) {
382    return new And(this, other);
383  }
384
385  /**
386   * Returns a matcher that matches any character matched by either this matcher or {@code other}.
387   */
388  public CharMatcher or(CharMatcher other) {
389    return new Or(this, other);
390  }
391
392  /**
393   * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to
394   * query than the original; your mileage may vary. Precomputation takes time and is likely to be
395   * worthwhile only if the precomputed matcher is queried many thousands of times.
396   *
397   * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a
398   * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a
399   * worthwhile tradeoff in a browser.
400   */
401  public CharMatcher precomputed() {
402    return Platform.precomputeCharMatcher(this);
403  }
404
405  private static final int DISTINCT_CHARS = Character.MAX_VALUE - Character.MIN_VALUE + 1;
406
407  /**
408   * This is the actual implementation of {@link #precomputed}, but we bounce calls through a method
409   * on {@link Platform} so that we can have different behavior in GWT.
410   *
411   * <p>This implementation tries to be smart in a number of ways. It recognizes cases where the
412   * negation is cheaper to precompute than the matcher itself; it tries to build small hash tables
413   * for matchers that only match a few characters, and so on. In the worst-case scenario, it
414   * constructs an eight-kilobyte bit array and queries that. In many situations this produces a
415   * matcher which is faster to query than the original.
416   */
417  @GwtIncompatible // SmallCharMatcher
418  CharMatcher precomputedInternal() {
419    final BitSet table = new BitSet();
420    setBits(table);
421    int totalCharacters = table.cardinality();
422    if (totalCharacters * 2 <= DISTINCT_CHARS) {
423      return precomputedPositive(totalCharacters, table, toString());
424    } else {
425      // TODO(lowasser): is it worth it to worry about the last character of large matchers?
426      table.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1);
427      int negatedCharacters = DISTINCT_CHARS - totalCharacters;
428      String suffix = ".negate()";
429      final String description = toString();
430      String negatedDescription =
431          description.endsWith(suffix)
432              ? description.substring(0, description.length() - suffix.length())
433              : description + suffix;
434      return new NegatedFastMatcher(
435          precomputedPositive(negatedCharacters, table, negatedDescription)) {
436        @Override
437        public String toString() {
438          return description;
439        }
440      };
441    }
442  }
443
444  /**
445   * Helper method for {@link #precomputedInternal} that doesn't test if the negation is cheaper.
446   */
447  @GwtIncompatible // SmallCharMatcher
448  private static CharMatcher precomputedPositive(
449      int totalCharacters, BitSet table, String description) {
450    switch (totalCharacters) {
451      case 0:
452        return none();
453      case 1:
454        return is((char) table.nextSetBit(0));
455      case 2:
456        char c1 = (char) table.nextSetBit(0);
457        char c2 = (char) table.nextSetBit(c1 + 1);
458        return isEither(c1, c2);
459      default:
460        return isSmall(totalCharacters, table.length())
461            ? SmallCharMatcher.from(table, description)
462            : new BitSetMatcher(table, description);
463    }
464  }
465
466  @GwtIncompatible // SmallCharMatcher
467  private static boolean isSmall(int totalCharacters, int tableLength) {
468    return totalCharacters <= SmallCharMatcher.MAX_SIZE
469        && tableLength > (totalCharacters * 4 * Character.SIZE);
470    // err on the side of BitSetMatcher
471  }
472
473  /** Sets bits in {@code table} matched by this matcher. */
474  @GwtIncompatible // used only from other GwtIncompatible code
475  void setBits(BitSet table) {
476    for (int c = Character.MAX_VALUE; c >= Character.MIN_VALUE; c--) {
477      if (matches((char) c)) {
478        table.set(c);
479      }
480    }
481  }
482
483  // Text processing routines
484
485  /**
486   * Returns {@code true} if a character sequence contains at least one matching BMP character.
487   * Equivalent to {@code !matchesNoneOf(sequence)}.
488   *
489   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
490   * character, until this returns {@code true} or the end is reached.
491   *
492   * @param sequence the character sequence to examine, possibly empty
493   * @return {@code true} if this matcher matches at least one character in the sequence
494   * @since 8.0
495   */
496  public boolean matchesAnyOf(CharSequence sequence) {
497    return !matchesNoneOf(sequence);
498  }
499
500  /**
501   * Returns {@code true} if a character sequence contains only matching BMP characters.
502   *
503   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
504   * character, until this returns {@code false} or the end is reached.
505   *
506   * @param sequence the character sequence to examine, possibly empty
507   * @return {@code true} if this matcher matches every character in the sequence, including when
508   *     the sequence is empty
509   */
510  public boolean matchesAllOf(CharSequence sequence) {
511    for (int i = sequence.length() - 1; i >= 0; i--) {
512      if (!matches(sequence.charAt(i))) {
513        return false;
514      }
515    }
516    return true;
517  }
518
519  /**
520   * Returns {@code true} if a character sequence contains no matching BMP characters. Equivalent to
521   * {@code !matchesAnyOf(sequence)}.
522   *
523   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
524   * character, until this returns {@code true} or the end is reached.
525   *
526   * @param sequence the character sequence to examine, possibly empty
527   * @return {@code true} if this matcher matches no characters in the sequence, including when the
528   *     sequence is empty
529   */
530  public boolean matchesNoneOf(CharSequence sequence) {
531    return indexIn(sequence) == -1;
532  }
533
534  /**
535   * Returns the index of the first matching BMP character in a character sequence, or {@code -1} if
536   * no matching character is present.
537   *
538   * <p>The default implementation iterates over the sequence in forward order calling {@link
539   * #matches} for each character.
540   *
541   * @param sequence the character sequence to examine from the beginning
542   * @return an index, or {@code -1} if no character matches
543   */
544  public int indexIn(CharSequence sequence) {
545    return indexIn(sequence, 0);
546  }
547
548  /**
549   * Returns the index of the first matching BMP character in a character sequence, starting from a
550   * given position, or {@code -1} if no character matches after that position.
551   *
552   * <p>The default implementation iterates over the sequence in forward order, beginning at {@code
553   * start}, calling {@link #matches} for each character.
554   *
555   * @param sequence the character sequence to examine
556   * @param start the first index to examine; must be nonnegative and no greater than {@code
557   *     sequence.length()}
558   * @return the index of the first matching character, guaranteed to be no less than {@code start},
559   *     or {@code -1} if no character matches
560   * @throws IndexOutOfBoundsException if start is negative or greater than {@code
561   *     sequence.length()}
562   */
563  public int indexIn(CharSequence sequence, int start) {
564    int length = sequence.length();
565    checkPositionIndex(start, length);
566    for (int i = start; i < length; i++) {
567      if (matches(sequence.charAt(i))) {
568        return i;
569      }
570    }
571    return -1;
572  }
573
574  /**
575   * Returns the index of the last matching BMP character in a character sequence, or {@code -1} if
576   * no matching character is present.
577   *
578   * <p>The default implementation iterates over the sequence in reverse order calling {@link
579   * #matches} for each character.
580   *
581   * @param sequence the character sequence to examine from the end
582   * @return an index, or {@code -1} if no character matches
583   */
584  public int lastIndexIn(CharSequence sequence) {
585    for (int i = sequence.length() - 1; i >= 0; i--) {
586      if (matches(sequence.charAt(i))) {
587        return i;
588      }
589    }
590    return -1;
591  }
592
593  /**
594   * Returns the number of matching {@code char}s found in a character sequence.
595   *
596   * <p>Counts 2 per supplementary character, such as for {@link #whitespace}().{@link #negate}().
597   */
598  public int countIn(CharSequence sequence) {
599    int count = 0;
600    for (int i = 0; i < sequence.length(); i++) {
601      if (matches(sequence.charAt(i))) {
602        count++;
603      }
604    }
605    return count;
606  }
607
608  /**
609   * Returns a string containing all non-matching characters of a character sequence, in order. For
610   * example:
611   *
612   * <pre>{@code
613   * CharMatcher.is('a').removeFrom("bazaar")
614   * }</pre>
615   *
616   * ... returns {@code "bzr"}.
617   */
618  public String removeFrom(CharSequence sequence) {
619    String string = sequence.toString();
620    int pos = indexIn(string);
621    if (pos == -1) {
622      return string;
623    }
624
625    char[] chars = string.toCharArray();
626    int spread = 1;
627
628    // This unusual loop comes from extensive benchmarking
629    OUT:
630    while (true) {
631      pos++;
632      while (true) {
633        if (pos == chars.length) {
634          break OUT;
635        }
636        if (matches(chars[pos])) {
637          break;
638        }
639        chars[pos - spread] = chars[pos];
640        pos++;
641      }
642      spread++;
643    }
644    return new String(chars, 0, pos - spread);
645  }
646
647  /**
648   * Returns a string containing all matching BMP characters of a character sequence, in order. For
649   * example:
650   *
651   * <pre>{@code
652   * CharMatcher.is('a').retainFrom("bazaar")
653   * }</pre>
654   *
655   * ... returns {@code "aaa"}.
656   */
657  public String retainFrom(CharSequence sequence) {
658    return negate().removeFrom(sequence);
659  }
660
661  /**
662   * Returns a string copy of the input character sequence, with each matching BMP character
663   * replaced by a given replacement character. For example:
664   *
665   * <pre>{@code
666   * CharMatcher.is('a').replaceFrom("radar", 'o')
667   * }</pre>
668   *
669   * ... returns {@code "rodor"}.
670   *
671   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
672   * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
673   * character.
674   *
675   * @param sequence the character sequence to replace matching characters in
676   * @param replacement the character to append to the result string in place of each matching
677   *     character in {@code sequence}
678   * @return the new string
679   */
680  public String replaceFrom(CharSequence sequence, char replacement) {
681    String string = sequence.toString();
682    int pos = indexIn(string);
683    if (pos == -1) {
684      return string;
685    }
686    char[] chars = string.toCharArray();
687    chars[pos] = replacement;
688    for (int i = pos + 1; i < chars.length; i++) {
689      if (matches(chars[i])) {
690        chars[i] = replacement;
691      }
692    }
693    return new String(chars);
694  }
695
696  /**
697   * Returns a string copy of the input character sequence, with each matching BMP character
698   * replaced by a given replacement sequence. For example:
699   *
700   * <pre>{@code
701   * CharMatcher.is('a').replaceFrom("yaha", "oo")
702   * }</pre>
703   *
704   * ... returns {@code "yoohoo"}.
705   *
706   * <p><b>Note:</b> If the replacement is a fixed string with only one character, you are better
707   * off calling {@link #replaceFrom(CharSequence, char)} directly.
708   *
709   * @param sequence the character sequence to replace matching characters in
710   * @param replacement the characters to append to the result string in place of each matching
711   *     character in {@code sequence}
712   * @return the new string
713   */
714  public String replaceFrom(CharSequence sequence, CharSequence replacement) {
715    int replacementLen = replacement.length();
716    if (replacementLen == 0) {
717      return removeFrom(sequence);
718    }
719    if (replacementLen == 1) {
720      return replaceFrom(sequence, replacement.charAt(0));
721    }
722
723    String string = sequence.toString();
724    int pos = indexIn(string);
725    if (pos == -1) {
726      return string;
727    }
728
729    int len = string.length();
730    StringBuilder buf = new StringBuilder((len * 3 / 2) + 16);
731
732    int oldpos = 0;
733    do {
734      buf.append(string, oldpos, pos);
735      buf.append(replacement);
736      oldpos = pos + 1;
737      pos = indexIn(string, oldpos);
738    } while (pos != -1);
739
740    buf.append(string, oldpos, len);
741    return buf.toString();
742  }
743
744  /**
745   * Returns a substring of the input character sequence that omits all matching BMP characters from
746   * the beginning and from the end of the string. For example:
747   *
748   * <pre>{@code
749   * CharMatcher.anyOf("ab").trimFrom("abacatbab")
750   * }</pre>
751   *
752   * ... returns {@code "cat"}.
753   *
754   * <p>Note that:
755   *
756   * <pre>{@code
757   * CharMatcher.inRange('\0', ' ').trimFrom(str)
758   * }</pre>
759   *
760   * ... is equivalent to {@link String#trim()}.
761   */
762  public String trimFrom(CharSequence sequence) {
763    int len = sequence.length();
764    int first;
765    int last;
766
767    for (first = 0; first < len; first++) {
768      if (!matches(sequence.charAt(first))) {
769        break;
770      }
771    }
772    for (last = len - 1; last > first; last--) {
773      if (!matches(sequence.charAt(last))) {
774        break;
775      }
776    }
777
778    return sequence.subSequence(first, last + 1).toString();
779  }
780
781  /**
782   * Returns a substring of the input character sequence that omits all matching BMP characters from
783   * the beginning of the string. For example:
784   *
785   * <pre>{@code
786   * CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")
787   * }</pre>
788   *
789   * ... returns {@code "catbab"}.
790   */
791  public String trimLeadingFrom(CharSequence sequence) {
792    int len = sequence.length();
793    for (int first = 0; first < len; first++) {
794      if (!matches(sequence.charAt(first))) {
795        return sequence.subSequence(first, len).toString();
796      }
797    }
798    return "";
799  }
800
801  /**
802   * Returns a substring of the input character sequence that omits all matching BMP characters from
803   * the end of the string. For example:
804   *
805   * <pre>{@code
806   * CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")
807   * }</pre>
808   *
809   * ... returns {@code "abacat"}.
810   */
811  public String trimTrailingFrom(CharSequence sequence) {
812    int len = sequence.length();
813    for (int last = len - 1; last >= 0; last--) {
814      if (!matches(sequence.charAt(last))) {
815        return sequence.subSequence(0, last + 1).toString();
816      }
817    }
818    return "";
819  }
820
821  /**
822   * Returns a string copy of the input character sequence, with each group of consecutive matching
823   * BMP characters replaced by a single replacement character. For example:
824   *
825   * <pre>{@code
826   * CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')
827   * }</pre>
828   *
829   * ... returns {@code "b-p-r"}.
830   *
831   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
832   * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
833   * character.
834   *
835   * @param sequence the character sequence to replace matching groups of characters in
836   * @param replacement the character to append to the result string in place of each group of
837   *     matching characters in {@code sequence}
838   * @return the new string
839   */
840  public String collapseFrom(CharSequence sequence, char replacement) {
841    // This implementation avoids unnecessary allocation.
842    int len = sequence.length();
843    for (int i = 0; i < len; i++) {
844      char c = sequence.charAt(i);
845      if (matches(c)) {
846        if (c == replacement && (i == len - 1 || !matches(sequence.charAt(i + 1)))) {
847          // a no-op replacement
848          i++;
849        } else {
850          StringBuilder builder = new StringBuilder(len).append(sequence, 0, i).append(replacement);
851          return finishCollapseFrom(sequence, i + 1, len, replacement, builder, true);
852        }
853      }
854    }
855    // no replacement needed
856    return sequence.toString();
857  }
858
859  /**
860   * Collapses groups of matching characters exactly as {@link #collapseFrom} does, except that
861   * groups of matching BMP characters at the start or end of the sequence are removed without
862   * replacement.
863   */
864  public String trimAndCollapseFrom(CharSequence sequence, char replacement) {
865    // This implementation avoids unnecessary allocation.
866    int len = sequence.length();
867    int first = 0;
868    int last = len - 1;
869
870    while (first < len && matches(sequence.charAt(first))) {
871      first++;
872    }
873
874    while (last > first && matches(sequence.charAt(last))) {
875      last--;
876    }
877
878    return (first == 0 && last == len - 1)
879        ? collapseFrom(sequence, replacement)
880        : finishCollapseFrom(
881            sequence, first, last + 1, replacement, new StringBuilder(last + 1 - first), false);
882  }
883
884  private String finishCollapseFrom(
885      CharSequence sequence,
886      int start,
887      int end,
888      char replacement,
889      StringBuilder builder,
890      boolean inMatchingGroup) {
891    for (int i = start; i < end; i++) {
892      char c = sequence.charAt(i);
893      if (matches(c)) {
894        if (!inMatchingGroup) {
895          builder.append(replacement);
896          inMatchingGroup = true;
897        }
898      } else {
899        builder.append(c);
900        inMatchingGroup = false;
901      }
902    }
903    return builder.toString();
904  }
905
906  /**
907   * @deprecated Provided only to satisfy the {@link Predicate} interface; use {@link #matches}
908   *     instead.
909   */
910  @InlineMe(replacement = "this.matches(character)")
911  @Deprecated
912  @Override
913  /*
914   * We can't compatibly make this `final` now (even after devising a way for `ForPredicate`, which
915   * currently overrides it, to keep the null check that it inserts).
916   */
917  @InlineMeValidationDisabled(
918      "While apply() is not final, the inlining is still safe because all known overrides of"
919          + " apply() call matches().")
920  public boolean apply(Character character) {
921    return matches(character);
922  }
923
924  /**
925   * Returns a string representation of this {@code CharMatcher}, such as {@code
926   * CharMatcher.or(WHITESPACE, JAVA_DIGIT)}.
927   */
928  @Override
929  public String toString() {
930    return super.toString();
931  }
932
933  /**
934   * Returns the Java Unicode escape sequence for the given {@code char}, in the form "\u12AB" where
935   * "12AB" is the four hexadecimal digits representing the 16-bit code unit.
936   */
937  private static String showCharacter(char c) {
938    String hex = "0123456789ABCDEF";
939    char[] tmp = {'\\', 'u', '\0', '\0', '\0', '\0'};
940    for (int i = 0; i < 4; i++) {
941      tmp[5 - i] = hex.charAt(c & 0xF);
942      c = (char) (c >> 4);
943    }
944    return String.copyValueOf(tmp);
945  }
946
947  // Fast matchers
948
949  /** A matcher for which precomputation will not yield any significant benefit. */
950  abstract static class FastMatcher extends CharMatcher {
951
952    @Override
953    public final CharMatcher precomputed() {
954      return this;
955    }
956
957    @Override
958    public CharMatcher negate() {
959      return new NegatedFastMatcher(this);
960    }
961  }
962
963  /** {@link FastMatcher} which overrides {@code toString()} with a custom name. */
964  abstract static class NamedFastMatcher extends FastMatcher {
965
966    private final String description;
967
968    NamedFastMatcher(String description) {
969      this.description = checkNotNull(description);
970    }
971
972    @Override
973    public final String toString() {
974      return description;
975    }
976  }
977
978  /** Negation of a {@link FastMatcher}. */
979  private static class NegatedFastMatcher extends Negated {
980
981    NegatedFastMatcher(CharMatcher original) {
982      super(original);
983    }
984
985    @Override
986    public final CharMatcher precomputed() {
987      return this;
988    }
989  }
990
991  /** Fast matcher using a {@link BitSet} table of matching characters. */
992  @GwtIncompatible // used only from other GwtIncompatible code
993  private static final class BitSetMatcher extends NamedFastMatcher {
994
995    private final BitSet table;
996
997    private BitSetMatcher(BitSet table, String description) {
998      super(description);
999      if (table.length() + Long.SIZE < table.size()) {
1000        table = (BitSet) table.clone();
1001        // If only we could actually call BitSet.trimToSize() ourselves...
1002      }
1003      this.table = table;
1004    }
1005
1006    @Override
1007    public boolean matches(char c) {
1008      return table.get(c);
1009    }
1010
1011    @Override
1012    void setBits(BitSet bitSet) {
1013      bitSet.or(table);
1014    }
1015  }
1016
1017  // Static constant implementation classes
1018
1019  /** Implementation of {@link #any()}. */
1020  private static final class Any extends NamedFastMatcher {
1021
1022    static final CharMatcher INSTANCE = new Any();
1023
1024    private Any() {
1025      super("CharMatcher.any()");
1026    }
1027
1028    @Override
1029    public boolean matches(char c) {
1030      return true;
1031    }
1032
1033    @Override
1034    public int indexIn(CharSequence sequence) {
1035      return (sequence.length() == 0) ? -1 : 0;
1036    }
1037
1038    @Override
1039    public int indexIn(CharSequence sequence, int start) {
1040      int length = sequence.length();
1041      checkPositionIndex(start, length);
1042      return (start == length) ? -1 : start;
1043    }
1044
1045    @Override
1046    public int lastIndexIn(CharSequence sequence) {
1047      return sequence.length() - 1;
1048    }
1049
1050    @Override
1051    public boolean matchesAllOf(CharSequence sequence) {
1052      checkNotNull(sequence);
1053      return true;
1054    }
1055
1056    @Override
1057    public boolean matchesNoneOf(CharSequence sequence) {
1058      return sequence.length() == 0;
1059    }
1060
1061    @Override
1062    public String removeFrom(CharSequence sequence) {
1063      checkNotNull(sequence);
1064      return "";
1065    }
1066
1067    @Override
1068    public String replaceFrom(CharSequence sequence, char replacement) {
1069      char[] array = new char[sequence.length()];
1070      Arrays.fill(array, replacement);
1071      return new String(array);
1072    }
1073
1074    @Override
1075    public String replaceFrom(CharSequence sequence, CharSequence replacement) {
1076      StringBuilder result = new StringBuilder(sequence.length() * replacement.length());
1077      for (int i = 0; i < sequence.length(); i++) {
1078        result.append(replacement);
1079      }
1080      return result.toString();
1081    }
1082
1083    @Override
1084    public String collapseFrom(CharSequence sequence, char replacement) {
1085      return (sequence.length() == 0) ? "" : String.valueOf(replacement);
1086    }
1087
1088    @Override
1089    public String trimFrom(CharSequence sequence) {
1090      checkNotNull(sequence);
1091      return "";
1092    }
1093
1094    @Override
1095    public int countIn(CharSequence sequence) {
1096      return sequence.length();
1097    }
1098
1099    @Override
1100    public CharMatcher and(CharMatcher other) {
1101      return checkNotNull(other);
1102    }
1103
1104    @Override
1105    public CharMatcher or(CharMatcher other) {
1106      checkNotNull(other);
1107      return this;
1108    }
1109
1110    @Override
1111    public CharMatcher negate() {
1112      return none();
1113    }
1114  }
1115
1116  /** Implementation of {@link #none()}. */
1117  private static final class None extends NamedFastMatcher {
1118
1119    static final CharMatcher INSTANCE = new None();
1120
1121    private None() {
1122      super("CharMatcher.none()");
1123    }
1124
1125    @Override
1126    public boolean matches(char c) {
1127      return false;
1128    }
1129
1130    @Override
1131    public int indexIn(CharSequence sequence) {
1132      checkNotNull(sequence);
1133      return -1;
1134    }
1135
1136    @Override
1137    public int indexIn(CharSequence sequence, int start) {
1138      int length = sequence.length();
1139      checkPositionIndex(start, length);
1140      return -1;
1141    }
1142
1143    @Override
1144    public int lastIndexIn(CharSequence sequence) {
1145      checkNotNull(sequence);
1146      return -1;
1147    }
1148
1149    @Override
1150    public boolean matchesAllOf(CharSequence sequence) {
1151      return sequence.length() == 0;
1152    }
1153
1154    @Override
1155    public boolean matchesNoneOf(CharSequence sequence) {
1156      checkNotNull(sequence);
1157      return true;
1158    }
1159
1160    @Override
1161    public String removeFrom(CharSequence sequence) {
1162      return sequence.toString();
1163    }
1164
1165    @Override
1166    public String replaceFrom(CharSequence sequence, char replacement) {
1167      return sequence.toString();
1168    }
1169
1170    @Override
1171    public String replaceFrom(CharSequence sequence, CharSequence replacement) {
1172      checkNotNull(replacement);
1173      return sequence.toString();
1174    }
1175
1176    @Override
1177    public String collapseFrom(CharSequence sequence, char replacement) {
1178      return sequence.toString();
1179    }
1180
1181    @Override
1182    public String trimFrom(CharSequence sequence) {
1183      return sequence.toString();
1184    }
1185
1186    @Override
1187    public String trimLeadingFrom(CharSequence sequence) {
1188      return sequence.toString();
1189    }
1190
1191    @Override
1192    public String trimTrailingFrom(CharSequence sequence) {
1193      return sequence.toString();
1194    }
1195
1196    @Override
1197    public int countIn(CharSequence sequence) {
1198      checkNotNull(sequence);
1199      return 0;
1200    }
1201
1202    @Override
1203    public CharMatcher and(CharMatcher other) {
1204      checkNotNull(other);
1205      return this;
1206    }
1207
1208    @Override
1209    public CharMatcher or(CharMatcher other) {
1210      return checkNotNull(other);
1211    }
1212
1213    @Override
1214    public CharMatcher negate() {
1215      return any();
1216    }
1217  }
1218
1219  /** Implementation of {@link #whitespace()}. */
1220  @VisibleForTesting
1221  static final class Whitespace extends NamedFastMatcher {
1222
1223    // TABLE is a precomputed hashset of whitespace characters. MULTIPLIER serves as a hash function
1224    // whose key property is that it maps 25 characters into the 32-slot table without collision.
1225    // Basically this is an opportunistic fast implementation as opposed to "good code". For most
1226    // other use-cases, the reduction in readability isn't worth it.
1227    static final String TABLE =
1228        "\u2002\u3000\r\u0085\u200A\u2005\u2000\u3000"
1229            + "\u2029\u000B\u3000\u2008\u2003\u205F\u3000\u1680"
1230            + "\u0009\u0020\u2006\u2001\u202F\u00A0\u000C\u2009"
1231            + "\u3000\u2004\u3000\u3000\u2028\n\u2007\u3000";
1232    static final int MULTIPLIER = 1682554634;
1233    static final int SHIFT = Integer.numberOfLeadingZeros(TABLE.length() - 1);
1234
1235    static final CharMatcher INSTANCE = new Whitespace();
1236
1237    Whitespace() {
1238      super("CharMatcher.whitespace()");
1239    }
1240
1241    @Override
1242    public boolean matches(char c) {
1243      return TABLE.charAt((MULTIPLIER * c) >>> SHIFT) == c;
1244    }
1245
1246    @GwtIncompatible // used only from other GwtIncompatible code
1247    @Override
1248    void setBits(BitSet table) {
1249      for (int i = 0; i < TABLE.length(); i++) {
1250        table.set(TABLE.charAt(i));
1251      }
1252    }
1253  }
1254
1255  /** Implementation of {@link #breakingWhitespace()}. */
1256  private static final class BreakingWhitespace extends CharMatcher {
1257
1258    static final CharMatcher INSTANCE = new BreakingWhitespace();
1259
1260    @Override
1261    public boolean matches(char c) {
1262      switch (c) {
1263        case '\t':
1264        case '\n':
1265        case '\013':
1266        case '\f':
1267        case '\r':
1268        case ' ':
1269        case '\u0085':
1270        case '\u1680':
1271        case '\u2028':
1272        case '\u2029':
1273        case '\u205f':
1274        case '\u3000':
1275          return true;
1276        case '\u2007':
1277          return false;
1278        default:
1279          return c >= '\u2000' && c <= '\u200a';
1280      }
1281    }
1282
1283    @Override
1284    public String toString() {
1285      return "CharMatcher.breakingWhitespace()";
1286    }
1287  }
1288
1289  /** Implementation of {@link #ascii()}. */
1290  private static final class Ascii extends NamedFastMatcher {
1291
1292    static final CharMatcher INSTANCE = new Ascii();
1293
1294    Ascii() {
1295      super("CharMatcher.ascii()");
1296    }
1297
1298    @Override
1299    public boolean matches(char c) {
1300      return c <= '\u007f';
1301    }
1302  }
1303
1304  /** Implementation that matches characters that fall within multiple ranges. */
1305  private static class RangesMatcher extends CharMatcher {
1306
1307    private final String description;
1308    private final char[] rangeStarts;
1309    private final char[] rangeEnds;
1310
1311    RangesMatcher(String description, char[] rangeStarts, char[] rangeEnds) {
1312      this.description = description;
1313      this.rangeStarts = rangeStarts;
1314      this.rangeEnds = rangeEnds;
1315      checkArgument(rangeStarts.length == rangeEnds.length);
1316      for (int i = 0; i < rangeStarts.length; i++) {
1317        checkArgument(rangeStarts[i] <= rangeEnds[i]);
1318        if (i + 1 < rangeStarts.length) {
1319          checkArgument(rangeEnds[i] < rangeStarts[i + 1]);
1320        }
1321      }
1322    }
1323
1324    @Override
1325    public boolean matches(char c) {
1326      int index = Arrays.binarySearch(rangeStarts, c);
1327      if (index >= 0) {
1328        return true;
1329      } else {
1330        index = ~index - 1;
1331        return index >= 0 && c <= rangeEnds[index];
1332      }
1333    }
1334
1335    @Override
1336    public String toString() {
1337      return description;
1338    }
1339  }
1340
1341  /** Implementation of {@link #digit()}. */
1342  private static final class Digit extends RangesMatcher {
1343    // Plug the following UnicodeSet pattern into
1344    // https://unicode.org/cldr/utility/list-unicodeset.jsp
1345    // [[:Nd:]&[:nv=0:]&[\u0000-\uFFFF]]
1346    // and get the zeroes from there.
1347
1348    // Must be in ascending order.
1349    private static final String ZEROES =
1350        "0\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6\u0c66\u0ce6\u0d66\u0de6"
1351            + "\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946\u19d0\u1a80\u1a90\u1b50\u1bb0"
1352            + "\u1c40\u1c50\ua620\ua8d0\ua900\ua9d0\ua9f0\uaa50\uabf0\uff10";
1353
1354    private static char[] zeroes() {
1355      return ZEROES.toCharArray();
1356    }
1357
1358    private static char[] nines() {
1359      char[] nines = new char[ZEROES.length()];
1360      for (int i = 0; i < ZEROES.length(); i++) {
1361        nines[i] = (char) (ZEROES.charAt(i) + 9);
1362      }
1363      return nines;
1364    }
1365
1366    static final CharMatcher INSTANCE = new Digit();
1367
1368    private Digit() {
1369      super("CharMatcher.digit()", zeroes(), nines());
1370    }
1371  }
1372
1373  /** Implementation of {@link #javaDigit()}. */
1374  private static final class JavaDigit extends CharMatcher {
1375
1376    static final CharMatcher INSTANCE = new JavaDigit();
1377
1378    @Override
1379    public boolean matches(char c) {
1380      return Character.isDigit(c);
1381    }
1382
1383    @Override
1384    public String toString() {
1385      return "CharMatcher.javaDigit()";
1386    }
1387  }
1388
1389  /** Implementation of {@link #javaLetter()}. */
1390  private static final class JavaLetter extends CharMatcher {
1391
1392    static final CharMatcher INSTANCE = new JavaLetter();
1393
1394    @Override
1395    public boolean matches(char c) {
1396      return Character.isLetter(c);
1397    }
1398
1399    @Override
1400    public String toString() {
1401      return "CharMatcher.javaLetter()";
1402    }
1403  }
1404
1405  /** Implementation of {@link #javaLetterOrDigit()}. */
1406  private static final class JavaLetterOrDigit extends CharMatcher {
1407
1408    static final CharMatcher INSTANCE = new JavaLetterOrDigit();
1409
1410    @Override
1411    public boolean matches(char c) {
1412      return Character.isLetterOrDigit(c);
1413    }
1414
1415    @Override
1416    public String toString() {
1417      return "CharMatcher.javaLetterOrDigit()";
1418    }
1419  }
1420
1421  /** Implementation of {@link #javaUpperCase()}. */
1422  private static final class JavaUpperCase extends CharMatcher {
1423
1424    static final CharMatcher INSTANCE = new JavaUpperCase();
1425
1426    @Override
1427    public boolean matches(char c) {
1428      return Character.isUpperCase(c);
1429    }
1430
1431    @Override
1432    public String toString() {
1433      return "CharMatcher.javaUpperCase()";
1434    }
1435  }
1436
1437  /** Implementation of {@link #javaLowerCase()}. */
1438  private static final class JavaLowerCase extends CharMatcher {
1439
1440    static final CharMatcher INSTANCE = new JavaLowerCase();
1441
1442    @Override
1443    public boolean matches(char c) {
1444      return Character.isLowerCase(c);
1445    }
1446
1447    @Override
1448    public String toString() {
1449      return "CharMatcher.javaLowerCase()";
1450    }
1451  }
1452
1453  /** Implementation of {@link #javaIsoControl()}. */
1454  private static final class JavaIsoControl extends NamedFastMatcher {
1455
1456    static final CharMatcher INSTANCE = new JavaIsoControl();
1457
1458    private JavaIsoControl() {
1459      super("CharMatcher.javaIsoControl()");
1460    }
1461
1462    @Override
1463    public boolean matches(char c) {
1464      return c <= '\u001f' || (c >= '\u007f' && c <= '\u009f');
1465    }
1466  }
1467
1468  /** Implementation of {@link #invisible()}. */
1469  private static final class Invisible extends RangesMatcher {
1470    // Plug the following UnicodeSet pattern into
1471    // https://unicode.org/cldr/utility/list-unicodeset.jsp
1472    // [[[:Zs:][:Zl:][:Zp:][:Cc:][:Cf:][:Cs:][:Co:]]&[\u0000-\uFFFF]]
1473    // with the "Abbreviate" option, and get the ranges from there.
1474    private static final String RANGE_STARTS =
1475        "\u0000\u007f\u00ad\u0600\u061c\u06dd\u070f\u0890\u08e2\u1680\u180e\u2000\u2028\u205f\u2066"
1476            + "\u3000\ud800\ufeff\ufff9";
1477    private static final String RANGE_ENDS = // inclusive ends
1478        "\u0020\u00a0\u00ad\u0605\u061c\u06dd\u070f\u0891\u08e2\u1680\u180e\u200f\u202f\u2064\u206f"
1479            + "\u3000\uf8ff\ufeff\ufffb";
1480
1481    static final CharMatcher INSTANCE = new Invisible();
1482
1483    private Invisible() {
1484      super("CharMatcher.invisible()", RANGE_STARTS.toCharArray(), RANGE_ENDS.toCharArray());
1485    }
1486  }
1487
1488  /** Implementation of {@link #singleWidth()}. */
1489  private static final class SingleWidth extends RangesMatcher {
1490
1491    static final CharMatcher INSTANCE = new SingleWidth();
1492
1493    private SingleWidth() {
1494      super(
1495          "CharMatcher.singleWidth()",
1496          "\u0000\u05be\u05d0\u05f3\u0600\u0750\u0e00\u1e00\u2100\ufb50\ufe70\uff61".toCharArray(),
1497          "\u04f9\u05be\u05ea\u05f4\u06ff\u077f\u0e7f\u20af\u213a\ufdff\ufeff\uffdc".toCharArray());
1498    }
1499  }
1500
1501  // Non-static factory implementation classes
1502
1503  /** Implementation of {@link #negate()}. */
1504  private static class Negated extends CharMatcher {
1505
1506    final CharMatcher original;
1507
1508    Negated(CharMatcher original) {
1509      this.original = checkNotNull(original);
1510    }
1511
1512    @Override
1513    public boolean matches(char c) {
1514      return !original.matches(c);
1515    }
1516
1517    @Override
1518    public boolean matchesAllOf(CharSequence sequence) {
1519      return original.matchesNoneOf(sequence);
1520    }
1521
1522    @Override
1523    public boolean matchesNoneOf(CharSequence sequence) {
1524      return original.matchesAllOf(sequence);
1525    }
1526
1527    @Override
1528    public int countIn(CharSequence sequence) {
1529      return sequence.length() - original.countIn(sequence);
1530    }
1531
1532    @GwtIncompatible // used only from other GwtIncompatible code
1533    @Override
1534    void setBits(BitSet table) {
1535      BitSet tmp = new BitSet();
1536      original.setBits(tmp);
1537      tmp.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1);
1538      table.or(tmp);
1539    }
1540
1541    @Override
1542    public CharMatcher negate() {
1543      return original;
1544    }
1545
1546    @Override
1547    public String toString() {
1548      return original + ".negate()";
1549    }
1550  }
1551
1552  /** Implementation of {@link #and(CharMatcher)}. */
1553  private static final class And extends CharMatcher {
1554
1555    final CharMatcher first;
1556    final CharMatcher second;
1557
1558    And(CharMatcher a, CharMatcher b) {
1559      first = checkNotNull(a);
1560      second = checkNotNull(b);
1561    }
1562
1563    @Override
1564    public boolean matches(char c) {
1565      return first.matches(c) && second.matches(c);
1566    }
1567
1568    @GwtIncompatible // used only from other GwtIncompatible code
1569    @Override
1570    void setBits(BitSet table) {
1571      BitSet tmp1 = new BitSet();
1572      first.setBits(tmp1);
1573      BitSet tmp2 = new BitSet();
1574      second.setBits(tmp2);
1575      tmp1.and(tmp2);
1576      table.or(tmp1);
1577    }
1578
1579    @Override
1580    public String toString() {
1581      return "CharMatcher.and(" + first + ", " + second + ")";
1582    }
1583  }
1584
1585  /** Implementation of {@link #or(CharMatcher)}. */
1586  private static final class Or extends CharMatcher {
1587
1588    final CharMatcher first;
1589    final CharMatcher second;
1590
1591    Or(CharMatcher a, CharMatcher b) {
1592      first = checkNotNull(a);
1593      second = checkNotNull(b);
1594    }
1595
1596    @GwtIncompatible // used only from other GwtIncompatible code
1597    @Override
1598    void setBits(BitSet table) {
1599      first.setBits(table);
1600      second.setBits(table);
1601    }
1602
1603    @Override
1604    public boolean matches(char c) {
1605      return first.matches(c) || second.matches(c);
1606    }
1607
1608    @Override
1609    public String toString() {
1610      return "CharMatcher.or(" + first + ", " + second + ")";
1611    }
1612  }
1613
1614  // Static factory implementations
1615
1616  /** Implementation of {@link #is(char)}. */
1617  private static final class Is extends FastMatcher {
1618
1619    private final char match;
1620
1621    Is(char match) {
1622      this.match = match;
1623    }
1624
1625    @Override
1626    public boolean matches(char c) {
1627      return c == match;
1628    }
1629
1630    @Override
1631    public String replaceFrom(CharSequence sequence, char replacement) {
1632      return sequence.toString().replace(match, replacement);
1633    }
1634
1635    @Override
1636    public CharMatcher and(CharMatcher other) {
1637      return other.matches(match) ? this : none();
1638    }
1639
1640    @Override
1641    public CharMatcher or(CharMatcher other) {
1642      return other.matches(match) ? other : super.or(other);
1643    }
1644
1645    @Override
1646    public CharMatcher negate() {
1647      return isNot(match);
1648    }
1649
1650    @GwtIncompatible // used only from other GwtIncompatible code
1651    @Override
1652    void setBits(BitSet table) {
1653      table.set(match);
1654    }
1655
1656    @Override
1657    public String toString() {
1658      return "CharMatcher.is('" + showCharacter(match) + "')";
1659    }
1660  }
1661
1662  /** Implementation of {@link #isNot(char)}. */
1663  private static final class IsNot extends FastMatcher {
1664
1665    private final char match;
1666
1667    IsNot(char match) {
1668      this.match = match;
1669    }
1670
1671    @Override
1672    public boolean matches(char c) {
1673      return c != match;
1674    }
1675
1676    @Override
1677    public CharMatcher and(CharMatcher other) {
1678      return other.matches(match) ? super.and(other) : other;
1679    }
1680
1681    @Override
1682    public CharMatcher or(CharMatcher other) {
1683      return other.matches(match) ? any() : this;
1684    }
1685
1686    @GwtIncompatible // used only from other GwtIncompatible code
1687    @Override
1688    void setBits(BitSet table) {
1689      table.set(0, match);
1690      table.set(match + 1, Character.MAX_VALUE + 1);
1691    }
1692
1693    @Override
1694    public CharMatcher negate() {
1695      return is(match);
1696    }
1697
1698    @Override
1699    public String toString() {
1700      return "CharMatcher.isNot('" + showCharacter(match) + "')";
1701    }
1702  }
1703
1704  private static CharMatcher.IsEither isEither(char c1, char c2) {
1705    return new CharMatcher.IsEither(c1, c2);
1706  }
1707
1708  /** Implementation of {@link #anyOf(CharSequence)} for exactly two characters. */
1709  private static final class IsEither extends FastMatcher {
1710
1711    private final char match1;
1712    private final char match2;
1713
1714    IsEither(char match1, char match2) {
1715      this.match1 = match1;
1716      this.match2 = match2;
1717    }
1718
1719    @Override
1720    public boolean matches(char c) {
1721      return c == match1 || c == match2;
1722    }
1723
1724    @GwtIncompatible // used only from other GwtIncompatible code
1725    @Override
1726    void setBits(BitSet table) {
1727      table.set(match1);
1728      table.set(match2);
1729    }
1730
1731    @Override
1732    public String toString() {
1733      return "CharMatcher.anyOf(\"" + showCharacter(match1) + showCharacter(match2) + "\")";
1734    }
1735  }
1736
1737  /** Implementation of {@link #anyOf(CharSequence)} for three or more characters. */
1738  private static final class AnyOf extends CharMatcher {
1739
1740    private final char[] chars;
1741
1742    public AnyOf(CharSequence chars) {
1743      this.chars = chars.toString().toCharArray();
1744      Arrays.sort(this.chars);
1745    }
1746
1747    @Override
1748    public boolean matches(char c) {
1749      return Arrays.binarySearch(chars, c) >= 0;
1750    }
1751
1752    @Override
1753    @GwtIncompatible // used only from other GwtIncompatible code
1754    void setBits(BitSet table) {
1755      for (char c : chars) {
1756        table.set(c);
1757      }
1758    }
1759
1760    @Override
1761    public String toString() {
1762      StringBuilder description = new StringBuilder("CharMatcher.anyOf(\"");
1763      for (char c : chars) {
1764        description.append(showCharacter(c));
1765      }
1766      description.append("\")");
1767      return description.toString();
1768    }
1769  }
1770
1771  /** Implementation of {@link #inRange(char, char)}. */
1772  private static final class InRange extends FastMatcher {
1773
1774    private final char startInclusive;
1775    private final char endInclusive;
1776
1777    InRange(char startInclusive, char endInclusive) {
1778      checkArgument(endInclusive >= startInclusive);
1779      this.startInclusive = startInclusive;
1780      this.endInclusive = endInclusive;
1781    }
1782
1783    @Override
1784    public boolean matches(char c) {
1785      return startInclusive <= c && c <= endInclusive;
1786    }
1787
1788    @GwtIncompatible // used only from other GwtIncompatible code
1789    @Override
1790    void setBits(BitSet table) {
1791      table.set(startInclusive, endInclusive + 1);
1792    }
1793
1794    @Override
1795    public String toString() {
1796      return "CharMatcher.inRange('"
1797          + showCharacter(startInclusive)
1798          + "', '"
1799          + showCharacter(endInclusive)
1800          + "')";
1801    }
1802  }
1803
1804  /** Implementation of {@link #forPredicate(Predicate)}. */
1805  private static final class ForPredicate extends CharMatcher {
1806
1807    private final Predicate<? super Character> predicate;
1808
1809    ForPredicate(Predicate<? super Character> predicate) {
1810      this.predicate = checkNotNull(predicate);
1811    }
1812
1813    @Override
1814    public boolean matches(char c) {
1815      return predicate.apply(c);
1816    }
1817
1818    @Deprecated
1819    @Override
1820    public boolean apply(Character character) {
1821      return predicate.apply(checkNotNull(character));
1822    }
1823
1824    @Override
1825    public String toString() {
1826      return "CharMatcher.forPredicate(" + predicate + ")";
1827    }
1828  }
1829}