001/*
002 * Copyright (C) 2008 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.base;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkPositionIndex;
020
021import com.google.common.annotations.GwtCompatible;
022import com.google.common.annotations.GwtIncompatible;
023import com.google.common.annotations.VisibleForTesting;
024import java.util.Arrays;
025import java.util.BitSet;
026
027/**
028 * Determines a true or false value for any Java {@code char} value, just as {@link Predicate} does
029 * for any {@link Object}. Also offers basic text processing methods based on this function.
030 * Implementations are strongly encouraged to be side-effect-free and immutable.
031 *
032 * <p>Throughout the documentation of this class, the phrase "matching character" is used to mean
033 * "any {@code char} value {@code c} for which {@code this.matches(c)} returns {@code true}".
034 *
035 * <p><b>Warning:</b> This class deals only with {@code char} values, that is, <a
036 * href="http://www.unicode.org/glossary/#BMP_character">BMP characters</a>. It does not understand
037 * <a href="http://www.unicode.org/glossary/#supplementary_code_point">supplementary Unicode code
038 * points</a> in the range {@code 0x10000} to {@code 0x10FFFF} which includes the majority of
039 * assigned characters, including important CJK characters and emoji.
040 *
041 * <p>Supplementary characters are <a
042 * href="https://docs.oracle.com/javase/8/docs/api/java/lang/Character.html#supplementary">encoded
043 * into a {@code String} using surrogate pairs</a>, and a {@code CharMatcher} treats these just as
044 * two separate characters. {@link #countIn} counts each supplementary character as 2 {@code char}s.
045 *
046 * <p>For up-to-date Unicode character properties (digit, letter, etc.) and support for
047 * supplementary code points, use ICU4J UCharacter and UnicodeSet (freeze() after building). For
048 * basic text processing based on UnicodeSet use the ICU4J UnicodeSetSpanner.
049 *
050 * <p>Example usages:
051 *
052 * <pre>
053 *   String trimmed = {@link #whitespace() whitespace()}.{@link #trimFrom trimFrom}(userInput);
054 *   if ({@link #ascii() ascii()}.{@link #matchesAllOf matchesAllOf}(s)) { ... }</pre>
055 *
056 * <p>See the Guava User Guide article on <a
057 * href="https://github.com/google/guava/wiki/StringsExplained#charmatcher">{@code CharMatcher}
058 * </a>.
059 *
060 * @author Kevin Bourrillion
061 * @since 1.0
062 */
063@GwtCompatible(emulated = true)
064@ElementTypesAreNonnullByDefault
065public abstract class CharMatcher implements Predicate<Character> {
066  /*
067   *           N777777777NO
068   *         N7777777777777N
069   *        M777777777777777N
070   *        $N877777777D77777M
071   *       N M77777777ONND777M
072   *       MN777777777NN  D777
073   *     N7ZN777777777NN ~M7778
074   *    N777777777777MMNN88777N
075   *    N777777777777MNZZZ7777O
076   *    DZN7777O77777777777777
077   *     N7OONND7777777D77777N
078   *      8$M++++?N???$77777$
079   *       M7++++N+M77777777N
080   *        N77O777777777777$                              M
081   *          DNNM$$$$777777N                              D
082   *         N$N:=N$777N7777M                             NZ
083   *        77Z::::N777777777                          ODZZZ
084   *       77N::::::N77777777M                         NNZZZ$
085   *     $777:::::::77777777MN                        ZM8ZZZZZ
086   *     777M::::::Z7777777Z77                        N++ZZZZNN
087   *    7777M:::::M7777777$777M                       $++IZZZZM
088   *   M777$:::::N777777$M7777M                       +++++ZZZDN
089   *     NN$::::::7777$$M777777N                      N+++ZZZZNZ
090   *       N::::::N:7$O:77777777                      N++++ZZZZN
091   *       M::::::::::::N77777777+                   +?+++++ZZZM
092   *       8::::::::::::D77777777M                    O+++++ZZ
093   *        ::::::::::::M777777777N                      O+?D
094   *        M:::::::::::M77777777778                     77=
095   *        D=::::::::::N7777777777N                    777
096   *       INN===::::::=77777777777N                  I777N
097   *      ?777N========N7777777777787M               N7777
098   *      77777$D======N77777777777N777N?         N777777
099   *     I77777$$$N7===M$$77777777$77777777$MMZ77777777N
100   *      $$$$$$$$$$$NIZN$$$$$$$$$M$$7777777777777777ON
101   *       M$$$$$$$$M    M$$$$$$$$N=N$$$$7777777$$$ND
102   *      O77Z$$$$$$$     M$$$$$$$$MNI==$DNNNNM=~N
103   *   7 :N MNN$$$$M$      $$$777$8      8D8I
104   *     NMM.:7O           777777778
105   *                       7777777MN
106   *                       M NO .7:
107   *                       M   :   M
108   *                            8
109   */
110
111  // Constant matcher factory methods
112
113  /**
114   * Matches any character.
115   *
116   * @since 19.0 (since 1.0 as constant {@code ANY})
117   */
118  public static CharMatcher any() {
119    return Any.INSTANCE;
120  }
121
122  /**
123   * Matches no characters.
124   *
125   * @since 19.0 (since 1.0 as constant {@code NONE})
126   */
127  public static CharMatcher none() {
128    return None.INSTANCE;
129  }
130
131  /**
132   * Determines whether a character is whitespace according to the latest Unicode standard, as
133   * illustrated <a
134   * href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
135   * This is not the same definition used by other Java APIs. (See a <a
136   * href="https://docs.google.com/spreadsheets/d/1kq4ECwPjHX9B8QUCTPclgsDCXYaj7T-FlT4tB5q3ahk/edit">comparison
137   * of several definitions of "whitespace"</a>.)
138   *
139   * <p>All Unicode White_Space characters are on the BMP and thus supported by this API.
140   *
141   * <p><b>Note:</b> as the Unicode definition evolves, we will modify this matcher to keep it up to
142   * date.
143   *
144   * @since 19.0 (since 1.0 as constant {@code WHITESPACE})
145   */
146  public static CharMatcher whitespace() {
147    return Whitespace.INSTANCE;
148  }
149
150  /**
151   * Determines whether a character is a breaking whitespace (that is, a whitespace which can be
152   * interpreted as a break between words for formatting purposes). See {@link #whitespace()} for a
153   * discussion of that term.
154   *
155   * @since 19.0 (since 2.0 as constant {@code BREAKING_WHITESPACE})
156   */
157  public static CharMatcher breakingWhitespace() {
158    return BreakingWhitespace.INSTANCE;
159  }
160
161  /**
162   * Determines whether a character is ASCII, meaning that its code point is less than 128.
163   *
164   * @since 19.0 (since 1.0 as constant {@code ASCII})
165   */
166  public static CharMatcher ascii() {
167    return Ascii.INSTANCE;
168  }
169
170  /**
171   * Determines whether a character is a BMP digit according to <a
172   * href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>. If
173   * you only care to match ASCII digits, you can use {@code inRange('0', '9')}.
174   *
175   * @deprecated Many digits are supplementary characters; see the class documentation.
176   * @since 19.0 (since 1.0 as constant {@code DIGIT})
177   */
178  @Deprecated
179  public static CharMatcher digit() {
180    return Digit.INSTANCE;
181  }
182
183  /**
184   * Determines whether a character is a BMP digit according to {@linkplain Character#isDigit(char)
185   * Java's definition}. If you only care to match ASCII digits, you can use {@code inRange('0',
186   * '9')}.
187   *
188   * @deprecated Many digits are supplementary characters; see the class documentation.
189   * @since 19.0 (since 1.0 as constant {@code JAVA_DIGIT})
190   */
191  @Deprecated
192  public static CharMatcher javaDigit() {
193    return JavaDigit.INSTANCE;
194  }
195
196  /**
197   * Determines whether a character is a BMP letter according to {@linkplain
198   * Character#isLetter(char) Java's definition}. If you only care to match letters of the Latin
199   * alphabet, you can use {@code inRange('a', 'z').or(inRange('A', 'Z'))}.
200   *
201   * @deprecated Most letters are supplementary characters; see the class documentation.
202   * @since 19.0 (since 1.0 as constant {@code JAVA_LETTER})
203   */
204  @Deprecated
205  public static CharMatcher javaLetter() {
206    return JavaLetter.INSTANCE;
207  }
208
209  /**
210   * Determines whether a character is a BMP letter or digit according to {@linkplain
211   * Character#isLetterOrDigit(char) Java's definition}.
212   *
213   * @deprecated Most letters and digits are supplementary characters; see the class documentation.
214   * @since 19.0 (since 1.0 as constant {@code JAVA_LETTER_OR_DIGIT}).
215   */
216  @Deprecated
217  public static CharMatcher javaLetterOrDigit() {
218    return JavaLetterOrDigit.INSTANCE;
219  }
220
221  /**
222   * Determines whether a BMP character is upper case according to {@linkplain
223   * Character#isUpperCase(char) Java's definition}.
224   *
225   * @deprecated Some uppercase characters are supplementary characters; see the class
226   *     documentation.
227   * @since 19.0 (since 1.0 as constant {@code JAVA_UPPER_CASE})
228   */
229  @Deprecated
230  public static CharMatcher javaUpperCase() {
231    return JavaUpperCase.INSTANCE;
232  }
233
234  /**
235   * Determines whether a BMP character is lower case according to {@linkplain
236   * Character#isLowerCase(char) Java's definition}.
237   *
238   * @deprecated Some lowercase characters are supplementary characters; see the class
239   *     documentation.
240   * @since 19.0 (since 1.0 as constant {@code JAVA_LOWER_CASE})
241   */
242  @Deprecated
243  public static CharMatcher javaLowerCase() {
244    return JavaLowerCase.INSTANCE;
245  }
246
247  /**
248   * Determines whether a character is an ISO control character as specified by {@link
249   * Character#isISOControl(char)}.
250   *
251   * <p>All ISO control codes are on the BMP and thus supported by this API.
252   *
253   * @since 19.0 (since 1.0 as constant {@code JAVA_ISO_CONTROL})
254   */
255  public static CharMatcher javaIsoControl() {
256    return JavaIsoControl.INSTANCE;
257  }
258
259  /**
260   * Determines whether a character is invisible; that is, if its Unicode category is any of
261   * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and
262   * PRIVATE_USE according to ICU4J.
263   *
264   * <p>See also the Unicode Default_Ignorable_Code_Point property (available via ICU).
265   *
266   * @deprecated Most invisible characters are supplementary characters; see the class
267   *     documentation.
268   * @since 19.0 (since 1.0 as constant {@code INVISIBLE})
269   */
270  @Deprecated
271  public static CharMatcher invisible() {
272    return Invisible.INSTANCE;
273  }
274
275  /**
276   * Determines whether a character is single-width (not double-width). When in doubt, this matcher
277   * errs on the side of returning {@code false} (that is, it tends to assume a character is
278   * double-width).
279   *
280   * <p><b>Note:</b> as the reference file evolves, we will modify this matcher to keep it up to
281   * date.
282   *
283   * <p>See also <a href="http://www.unicode.org/reports/tr11/">UAX #11 East Asian Width</a>.
284   *
285   * @deprecated Many such characters are supplementary characters; see the class documentation.
286   * @since 19.0 (since 1.0 as constant {@code SINGLE_WIDTH})
287   */
288  @Deprecated
289  public static CharMatcher singleWidth() {
290    return SingleWidth.INSTANCE;
291  }
292
293  // Static factories
294
295  /** Returns a {@code char} matcher that matches only one specified BMP character. */
296  public static CharMatcher is(final char match) {
297    return new Is(match);
298  }
299
300  /**
301   * Returns a {@code char} matcher that matches any character except the BMP character specified.
302   *
303   * <p>To negate another {@code CharMatcher}, use {@link #negate()}.
304   */
305  public static CharMatcher isNot(final char match) {
306    return new IsNot(match);
307  }
308
309  /**
310   * Returns a {@code char} matcher that matches any BMP character present in the given character
311   * sequence. Returns a bogus matcher if the sequence contains supplementary characters.
312   */
313  public static CharMatcher anyOf(final CharSequence sequence) {
314    switch (sequence.length()) {
315      case 0:
316        return none();
317      case 1:
318        return is(sequence.charAt(0));
319      case 2:
320        return isEither(sequence.charAt(0), sequence.charAt(1));
321      default:
322        // TODO(lowasser): is it potentially worth just going ahead and building a precomputed
323        // matcher?
324        return new AnyOf(sequence);
325    }
326  }
327
328  /**
329   * Returns a {@code char} matcher that matches any BMP character not present in the given
330   * character sequence. Returns a bogus matcher if the sequence contains supplementary characters.
331   */
332  public static CharMatcher noneOf(CharSequence sequence) {
333    return anyOf(sequence).negate();
334  }
335
336  /**
337   * Returns a {@code char} matcher that matches any character in a given BMP range (both endpoints
338   * are inclusive). For example, to match any lowercase letter of the English alphabet, use {@code
339   * CharMatcher.inRange('a', 'z')}.
340   *
341   * @throws IllegalArgumentException if {@code endInclusive < startInclusive}
342   */
343  public static CharMatcher inRange(final char startInclusive, final char endInclusive) {
344    return new InRange(startInclusive, endInclusive);
345  }
346
347  /**
348   * Returns a matcher with identical behavior to the given {@link Character}-based predicate, but
349   * which operates on primitive {@code char} instances instead.
350   */
351  public static CharMatcher forPredicate(final Predicate<? super Character> predicate) {
352    return predicate instanceof CharMatcher ? (CharMatcher) predicate : new ForPredicate(predicate);
353  }
354
355  // Constructors
356
357  /**
358   * Constructor for use by subclasses. When subclassing, you may want to override {@code
359   * toString()} to provide a useful description.
360   */
361  protected CharMatcher() {}
362
363  // Abstract methods
364
365  /** Determines a true or false value for the given character. */
366  public abstract boolean matches(char c);
367
368  // Non-static factories
369
370  /** Returns a matcher that matches any character not matched by this matcher. */
371  // @Override under Java 8 but not under Java 7
372  @Override
373  public CharMatcher negate() {
374    return new Negated(this);
375  }
376
377  /**
378   * Returns a matcher that matches any character matched by both this matcher and {@code other}.
379   */
380  public CharMatcher and(CharMatcher other) {
381    return new And(this, other);
382  }
383
384  /**
385   * Returns a matcher that matches any character matched by either this matcher or {@code other}.
386   */
387  public CharMatcher or(CharMatcher other) {
388    return new Or(this, other);
389  }
390
391  /**
392   * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to
393   * query than the original; your mileage may vary. Precomputation takes time and is likely to be
394   * worthwhile only if the precomputed matcher is queried many thousands of times.
395   *
396   * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a
397   * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a
398   * worthwhile tradeoff in a browser.
399   */
400  public CharMatcher precomputed() {
401    return Platform.precomputeCharMatcher(this);
402  }
403
404  private static final int DISTINCT_CHARS = Character.MAX_VALUE - Character.MIN_VALUE + 1;
405
406  /**
407   * This is the actual implementation of {@link #precomputed}, but we bounce calls through a method
408   * on {@link Platform} so that we can have different behavior in GWT.
409   *
410   * <p>This implementation tries to be smart in a number of ways. It recognizes cases where the
411   * negation is cheaper to precompute than the matcher itself; it tries to build small hash tables
412   * for matchers that only match a few characters, and so on. In the worst-case scenario, it
413   * constructs an eight-kilobyte bit array and queries that. In many situations this produces a
414   * matcher which is faster to query than the original.
415   */
416  @GwtIncompatible // SmallCharMatcher
417  CharMatcher precomputedInternal() {
418    final BitSet table = new BitSet();
419    setBits(table);
420    int totalCharacters = table.cardinality();
421    if (totalCharacters * 2 <= DISTINCT_CHARS) {
422      return precomputedPositive(totalCharacters, table, toString());
423    } else {
424      // TODO(lowasser): is it worth it to worry about the last character of large matchers?
425      table.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1);
426      int negatedCharacters = DISTINCT_CHARS - totalCharacters;
427      String suffix = ".negate()";
428      final String description = toString();
429      String negatedDescription =
430          description.endsWith(suffix)
431              ? description.substring(0, description.length() - suffix.length())
432              : description + suffix;
433      return new NegatedFastMatcher(
434          precomputedPositive(negatedCharacters, table, negatedDescription)) {
435        @Override
436        public String toString() {
437          return description;
438        }
439      };
440    }
441  }
442
443  /**
444   * Helper method for {@link #precomputedInternal} that doesn't test if the negation is cheaper.
445   */
446  @GwtIncompatible // SmallCharMatcher
447  private static CharMatcher precomputedPositive(
448      int totalCharacters, BitSet table, String description) {
449    switch (totalCharacters) {
450      case 0:
451        return none();
452      case 1:
453        return is((char) table.nextSetBit(0));
454      case 2:
455        char c1 = (char) table.nextSetBit(0);
456        char c2 = (char) table.nextSetBit(c1 + 1);
457        return isEither(c1, c2);
458      default:
459        return isSmall(totalCharacters, table.length())
460            ? SmallCharMatcher.from(table, description)
461            : new BitSetMatcher(table, description);
462    }
463  }
464
465  @GwtIncompatible // SmallCharMatcher
466  private static boolean isSmall(int totalCharacters, int tableLength) {
467    return totalCharacters <= SmallCharMatcher.MAX_SIZE
468        && tableLength > (totalCharacters * 4 * Character.SIZE);
469    // err on the side of BitSetMatcher
470  }
471
472  /** Sets bits in {@code table} matched by this matcher. */
473  @GwtIncompatible // used only from other GwtIncompatible code
474  void setBits(BitSet table) {
475    for (int c = Character.MAX_VALUE; c >= Character.MIN_VALUE; c--) {
476      if (matches((char) c)) {
477        table.set(c);
478      }
479    }
480  }
481
482  // Text processing routines
483
484  /**
485   * Returns {@code true} if a character sequence contains at least one matching BMP character.
486   * Equivalent to {@code !matchesNoneOf(sequence)}.
487   *
488   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
489   * character, until this returns {@code true} or the end is reached.
490   *
491   * @param sequence the character sequence to examine, possibly empty
492   * @return {@code true} if this matcher matches at least one character in the sequence
493   * @since 8.0
494   */
495  public boolean matchesAnyOf(CharSequence sequence) {
496    return !matchesNoneOf(sequence);
497  }
498
499  /**
500   * Returns {@code true} if a character sequence contains only matching BMP characters.
501   *
502   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
503   * character, until this returns {@code false} or the end is reached.
504   *
505   * @param sequence the character sequence to examine, possibly empty
506   * @return {@code true} if this matcher matches every character in the sequence, including when
507   *     the sequence is empty
508   */
509  public boolean matchesAllOf(CharSequence sequence) {
510    for (int i = sequence.length() - 1; i >= 0; i--) {
511      if (!matches(sequence.charAt(i))) {
512        return false;
513      }
514    }
515    return true;
516  }
517
518  /**
519   * Returns {@code true} if a character sequence contains no matching BMP characters. Equivalent to
520   * {@code !matchesAnyOf(sequence)}.
521   *
522   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
523   * character, until this returns {@code true} or the end is reached.
524   *
525   * @param sequence the character sequence to examine, possibly empty
526   * @return {@code true} if this matcher matches no characters in the sequence, including when the
527   *     sequence is empty
528   */
529  public boolean matchesNoneOf(CharSequence sequence) {
530    return indexIn(sequence) == -1;
531  }
532
533  /**
534   * Returns the index of the first matching BMP character in a character sequence, or {@code -1} if
535   * no matching character is present.
536   *
537   * <p>The default implementation iterates over the sequence in forward order calling {@link
538   * #matches} for each character.
539   *
540   * @param sequence the character sequence to examine from the beginning
541   * @return an index, or {@code -1} if no character matches
542   */
543  public int indexIn(CharSequence sequence) {
544    return indexIn(sequence, 0);
545  }
546
547  /**
548   * Returns the index of the first matching BMP character in a character sequence, starting from a
549   * given position, or {@code -1} if no character matches after that position.
550   *
551   * <p>The default implementation iterates over the sequence in forward order, beginning at {@code
552   * start}, calling {@link #matches} for each character.
553   *
554   * @param sequence the character sequence to examine
555   * @param start the first index to examine; must be nonnegative and no greater than {@code
556   *     sequence.length()}
557   * @return the index of the first matching character, guaranteed to be no less than {@code start},
558   *     or {@code -1} if no character matches
559   * @throws IndexOutOfBoundsException if start is negative or greater than {@code
560   *     sequence.length()}
561   */
562  public int indexIn(CharSequence sequence, int start) {
563    int length = sequence.length();
564    checkPositionIndex(start, length);
565    for (int i = start; i < length; i++) {
566      if (matches(sequence.charAt(i))) {
567        return i;
568      }
569    }
570    return -1;
571  }
572
573  /**
574   * Returns the index of the last matching BMP character in a character sequence, or {@code -1} if
575   * no matching character is present.
576   *
577   * <p>The default implementation iterates over the sequence in reverse order calling {@link
578   * #matches} for each character.
579   *
580   * @param sequence the character sequence to examine from the end
581   * @return an index, or {@code -1} if no character matches
582   */
583  public int lastIndexIn(CharSequence sequence) {
584    for (int i = sequence.length() - 1; i >= 0; i--) {
585      if (matches(sequence.charAt(i))) {
586        return i;
587      }
588    }
589    return -1;
590  }
591
592  /**
593   * Returns the number of matching {@code char}s found in a character sequence.
594   *
595   * <p>Counts 2 per supplementary character, such as for {@link #whitespace}().{@link #negate}().
596   */
597  public int countIn(CharSequence sequence) {
598    int count = 0;
599    for (int i = 0; i < sequence.length(); i++) {
600      if (matches(sequence.charAt(i))) {
601        count++;
602      }
603    }
604    return count;
605  }
606
607  /**
608   * Returns a string containing all non-matching characters of a character sequence, in order. For
609   * example:
610   *
611   * <pre>{@code
612   * CharMatcher.is('a').removeFrom("bazaar")
613   * }</pre>
614   *
615   * ... returns {@code "bzr"}.
616   */
617  public String removeFrom(CharSequence sequence) {
618    String string = sequence.toString();
619    int pos = indexIn(string);
620    if (pos == -1) {
621      return string;
622    }
623
624    char[] chars = string.toCharArray();
625    int spread = 1;
626
627    // This unusual loop comes from extensive benchmarking
628    OUT:
629    while (true) {
630      pos++;
631      while (true) {
632        if (pos == chars.length) {
633          break OUT;
634        }
635        if (matches(chars[pos])) {
636          break;
637        }
638        chars[pos - spread] = chars[pos];
639        pos++;
640      }
641      spread++;
642    }
643    return new String(chars, 0, pos - spread);
644  }
645
646  /**
647   * Returns a string containing all matching BMP characters of a character sequence, in order. For
648   * example:
649   *
650   * <pre>{@code
651   * CharMatcher.is('a').retainFrom("bazaar")
652   * }</pre>
653   *
654   * ... returns {@code "aaa"}.
655   */
656  public String retainFrom(CharSequence sequence) {
657    return negate().removeFrom(sequence);
658  }
659
660  /**
661   * Returns a string copy of the input character sequence, with each matching BMP character
662   * replaced by a given replacement character. For example:
663   *
664   * <pre>{@code
665   * CharMatcher.is('a').replaceFrom("radar", 'o')
666   * }</pre>
667   *
668   * ... returns {@code "rodor"}.
669   *
670   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
671   * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
672   * character.
673   *
674   * @param sequence the character sequence to replace matching characters in
675   * @param replacement the character to append to the result string in place of each matching
676   *     character in {@code sequence}
677   * @return the new string
678   */
679  public String replaceFrom(CharSequence sequence, char replacement) {
680    String string = sequence.toString();
681    int pos = indexIn(string);
682    if (pos == -1) {
683      return string;
684    }
685    char[] chars = string.toCharArray();
686    chars[pos] = replacement;
687    for (int i = pos + 1; i < chars.length; i++) {
688      if (matches(chars[i])) {
689        chars[i] = replacement;
690      }
691    }
692    return new String(chars);
693  }
694
695  /**
696   * Returns a string copy of the input character sequence, with each matching BMP character
697   * replaced by a given replacement sequence. For example:
698   *
699   * <pre>{@code
700   * CharMatcher.is('a').replaceFrom("yaha", "oo")
701   * }</pre>
702   *
703   * ... returns {@code "yoohoo"}.
704   *
705   * <p><b>Note:</b> If the replacement is a fixed string with only one character, you are better
706   * off calling {@link #replaceFrom(CharSequence, char)} directly.
707   *
708   * @param sequence the character sequence to replace matching characters in
709   * @param replacement the characters to append to the result string in place of each matching
710   *     character in {@code sequence}
711   * @return the new string
712   */
713  public String replaceFrom(CharSequence sequence, CharSequence replacement) {
714    int replacementLen = replacement.length();
715    if (replacementLen == 0) {
716      return removeFrom(sequence);
717    }
718    if (replacementLen == 1) {
719      return replaceFrom(sequence, replacement.charAt(0));
720    }
721
722    String string = sequence.toString();
723    int pos = indexIn(string);
724    if (pos == -1) {
725      return string;
726    }
727
728    int len = string.length();
729    StringBuilder buf = new StringBuilder((len * 3 / 2) + 16);
730
731    int oldpos = 0;
732    do {
733      buf.append(string, oldpos, pos);
734      buf.append(replacement);
735      oldpos = pos + 1;
736      pos = indexIn(string, oldpos);
737    } while (pos != -1);
738
739    buf.append(string, oldpos, len);
740    return buf.toString();
741  }
742
743  /**
744   * Returns a substring of the input character sequence that omits all matching BMP characters from
745   * the beginning and from the end of the string. For example:
746   *
747   * <pre>{@code
748   * CharMatcher.anyOf("ab").trimFrom("abacatbab")
749   * }</pre>
750   *
751   * ... returns {@code "cat"}.
752   *
753   * <p>Note that:
754   *
755   * <pre>{@code
756   * CharMatcher.inRange('\0', ' ').trimFrom(str)
757   * }</pre>
758   *
759   * ... is equivalent to {@link String#trim()}.
760   */
761  public String trimFrom(CharSequence sequence) {
762    int len = sequence.length();
763    int first;
764    int last;
765
766    for (first = 0; first < len; first++) {
767      if (!matches(sequence.charAt(first))) {
768        break;
769      }
770    }
771    for (last = len - 1; last > first; last--) {
772      if (!matches(sequence.charAt(last))) {
773        break;
774      }
775    }
776
777    return sequence.subSequence(first, last + 1).toString();
778  }
779
780  /**
781   * Returns a substring of the input character sequence that omits all matching BMP characters from
782   * the beginning of the string. For example:
783   *
784   * <pre>{@code
785   * CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")
786   * }</pre>
787   *
788   * ... returns {@code "catbab"}.
789   */
790  public String trimLeadingFrom(CharSequence sequence) {
791    int len = sequence.length();
792    for (int first = 0; first < len; first++) {
793      if (!matches(sequence.charAt(first))) {
794        return sequence.subSequence(first, len).toString();
795      }
796    }
797    return "";
798  }
799
800  /**
801   * Returns a substring of the input character sequence that omits all matching BMP characters from
802   * the end of the string. For example:
803   *
804   * <pre>{@code
805   * CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")
806   * }</pre>
807   *
808   * ... returns {@code "abacat"}.
809   */
810  public String trimTrailingFrom(CharSequence sequence) {
811    int len = sequence.length();
812    for (int last = len - 1; last >= 0; last--) {
813      if (!matches(sequence.charAt(last))) {
814        return sequence.subSequence(0, last + 1).toString();
815      }
816    }
817    return "";
818  }
819
820  /**
821   * Returns a string copy of the input character sequence, with each group of consecutive matching
822   * BMP characters replaced by a single replacement character. For example:
823   *
824   * <pre>{@code
825   * CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')
826   * }</pre>
827   *
828   * ... returns {@code "b-p-r"}.
829   *
830   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
831   * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
832   * character.
833   *
834   * @param sequence the character sequence to replace matching groups of characters in
835   * @param replacement the character to append to the result string in place of each group of
836   *     matching characters in {@code sequence}
837   * @return the new string
838   */
839  public String collapseFrom(CharSequence sequence, char replacement) {
840    // This implementation avoids unnecessary allocation.
841    int len = sequence.length();
842    for (int i = 0; i < len; i++) {
843      char c = sequence.charAt(i);
844      if (matches(c)) {
845        if (c == replacement && (i == len - 1 || !matches(sequence.charAt(i + 1)))) {
846          // a no-op replacement
847          i++;
848        } else {
849          StringBuilder builder = new StringBuilder(len).append(sequence, 0, i).append(replacement);
850          return finishCollapseFrom(sequence, i + 1, len, replacement, builder, true);
851        }
852      }
853    }
854    // no replacement needed
855    return sequence.toString();
856  }
857
858  /**
859   * Collapses groups of matching characters exactly as {@link #collapseFrom} does, except that
860   * groups of matching BMP characters at the start or end of the sequence are removed without
861   * replacement.
862   */
863  public String trimAndCollapseFrom(CharSequence sequence, char replacement) {
864    // This implementation avoids unnecessary allocation.
865    int len = sequence.length();
866    int first = 0;
867    int last = len - 1;
868
869    while (first < len && matches(sequence.charAt(first))) {
870      first++;
871    }
872
873    while (last > first && matches(sequence.charAt(last))) {
874      last--;
875    }
876
877    return (first == 0 && last == len - 1)
878        ? collapseFrom(sequence, replacement)
879        : finishCollapseFrom(
880            sequence, first, last + 1, replacement, new StringBuilder(last + 1 - first), false);
881  }
882
883  private String finishCollapseFrom(
884      CharSequence sequence,
885      int start,
886      int end,
887      char replacement,
888      StringBuilder builder,
889      boolean inMatchingGroup) {
890    for (int i = start; i < end; i++) {
891      char c = sequence.charAt(i);
892      if (matches(c)) {
893        if (!inMatchingGroup) {
894          builder.append(replacement);
895          inMatchingGroup = true;
896        }
897      } else {
898        builder.append(c);
899        inMatchingGroup = false;
900      }
901    }
902    return builder.toString();
903  }
904
905  /**
906   * @deprecated Provided only to satisfy the {@link Predicate} interface; use {@link #matches}
907   *     instead.
908   */
909  @Deprecated
910  @Override
911  public boolean apply(Character character) {
912    return matches(character);
913  }
914
915  /**
916   * Returns a string representation of this {@code CharMatcher}, such as {@code
917   * CharMatcher.or(WHITESPACE, JAVA_DIGIT)}.
918   */
919  @Override
920  public String toString() {
921    return super.toString();
922  }
923
924  /**
925   * Returns the Java Unicode escape sequence for the given {@code char}, in the form "\u12AB" where
926   * "12AB" is the four hexadecimal digits representing the 16-bit code unit.
927   */
928  private static String showCharacter(char c) {
929    String hex = "0123456789ABCDEF";
930    char[] tmp = {'\\', 'u', '\0', '\0', '\0', '\0'};
931    for (int i = 0; i < 4; i++) {
932      tmp[5 - i] = hex.charAt(c & 0xF);
933      c = (char) (c >> 4);
934    }
935    return String.copyValueOf(tmp);
936  }
937
938  // Fast matchers
939
940  /** A matcher for which precomputation will not yield any significant benefit. */
941  abstract static class FastMatcher extends CharMatcher {
942
943    @Override
944    public final CharMatcher precomputed() {
945      return this;
946    }
947
948    @Override
949    public CharMatcher negate() {
950      return new NegatedFastMatcher(this);
951    }
952  }
953
954  /** {@link FastMatcher} which overrides {@code toString()} with a custom name. */
955  abstract static class NamedFastMatcher extends FastMatcher {
956
957    private final String description;
958
959    NamedFastMatcher(String description) {
960      this.description = checkNotNull(description);
961    }
962
963    @Override
964    public final String toString() {
965      return description;
966    }
967  }
968
969  /** Negation of a {@link FastMatcher}. */
970  private static class NegatedFastMatcher extends Negated {
971
972    NegatedFastMatcher(CharMatcher original) {
973      super(original);
974    }
975
976    @Override
977    public final CharMatcher precomputed() {
978      return this;
979    }
980  }
981
982  /** Fast matcher using a {@link BitSet} table of matching characters. */
983  @GwtIncompatible // used only from other GwtIncompatible code
984  private static final class BitSetMatcher extends NamedFastMatcher {
985
986    private final BitSet table;
987
988    private BitSetMatcher(BitSet table, String description) {
989      super(description);
990      if (table.length() + Long.SIZE < table.size()) {
991        table = (BitSet) table.clone();
992        // If only we could actually call BitSet.trimToSize() ourselves...
993      }
994      this.table = table;
995    }
996
997    @Override
998    public boolean matches(char c) {
999      return table.get(c);
1000    }
1001
1002    @Override
1003    void setBits(BitSet bitSet) {
1004      bitSet.or(table);
1005    }
1006  }
1007
1008  // Static constant implementation classes
1009
1010  /** Implementation of {@link #any()}. */
1011  private static final class Any extends NamedFastMatcher {
1012
1013    static final CharMatcher INSTANCE = new Any();
1014
1015    private Any() {
1016      super("CharMatcher.any()");
1017    }
1018
1019    @Override
1020    public boolean matches(char c) {
1021      return true;
1022    }
1023
1024    @Override
1025    public int indexIn(CharSequence sequence) {
1026      return (sequence.length() == 0) ? -1 : 0;
1027    }
1028
1029    @Override
1030    public int indexIn(CharSequence sequence, int start) {
1031      int length = sequence.length();
1032      checkPositionIndex(start, length);
1033      return (start == length) ? -1 : start;
1034    }
1035
1036    @Override
1037    public int lastIndexIn(CharSequence sequence) {
1038      return sequence.length() - 1;
1039    }
1040
1041    @Override
1042    public boolean matchesAllOf(CharSequence sequence) {
1043      checkNotNull(sequence);
1044      return true;
1045    }
1046
1047    @Override
1048    public boolean matchesNoneOf(CharSequence sequence) {
1049      return sequence.length() == 0;
1050    }
1051
1052    @Override
1053    public String removeFrom(CharSequence sequence) {
1054      checkNotNull(sequence);
1055      return "";
1056    }
1057
1058    @Override
1059    public String replaceFrom(CharSequence sequence, char replacement) {
1060      char[] array = new char[sequence.length()];
1061      Arrays.fill(array, replacement);
1062      return new String(array);
1063    }
1064
1065    @Override
1066    public String replaceFrom(CharSequence sequence, CharSequence replacement) {
1067      StringBuilder result = new StringBuilder(sequence.length() * replacement.length());
1068      for (int i = 0; i < sequence.length(); i++) {
1069        result.append(replacement);
1070      }
1071      return result.toString();
1072    }
1073
1074    @Override
1075    public String collapseFrom(CharSequence sequence, char replacement) {
1076      return (sequence.length() == 0) ? "" : String.valueOf(replacement);
1077    }
1078
1079    @Override
1080    public String trimFrom(CharSequence sequence) {
1081      checkNotNull(sequence);
1082      return "";
1083    }
1084
1085    @Override
1086    public int countIn(CharSequence sequence) {
1087      return sequence.length();
1088    }
1089
1090    @Override
1091    public CharMatcher and(CharMatcher other) {
1092      return checkNotNull(other);
1093    }
1094
1095    @Override
1096    public CharMatcher or(CharMatcher other) {
1097      checkNotNull(other);
1098      return this;
1099    }
1100
1101    @Override
1102    public CharMatcher negate() {
1103      return none();
1104    }
1105  }
1106
1107  /** Implementation of {@link #none()}. */
1108  private static final class None extends NamedFastMatcher {
1109
1110    static final CharMatcher INSTANCE = new None();
1111
1112    private None() {
1113      super("CharMatcher.none()");
1114    }
1115
1116    @Override
1117    public boolean matches(char c) {
1118      return false;
1119    }
1120
1121    @Override
1122    public int indexIn(CharSequence sequence) {
1123      checkNotNull(sequence);
1124      return -1;
1125    }
1126
1127    @Override
1128    public int indexIn(CharSequence sequence, int start) {
1129      int length = sequence.length();
1130      checkPositionIndex(start, length);
1131      return -1;
1132    }
1133
1134    @Override
1135    public int lastIndexIn(CharSequence sequence) {
1136      checkNotNull(sequence);
1137      return -1;
1138    }
1139
1140    @Override
1141    public boolean matchesAllOf(CharSequence sequence) {
1142      return sequence.length() == 0;
1143    }
1144
1145    @Override
1146    public boolean matchesNoneOf(CharSequence sequence) {
1147      checkNotNull(sequence);
1148      return true;
1149    }
1150
1151    @Override
1152    public String removeFrom(CharSequence sequence) {
1153      return sequence.toString();
1154    }
1155
1156    @Override
1157    public String replaceFrom(CharSequence sequence, char replacement) {
1158      return sequence.toString();
1159    }
1160
1161    @Override
1162    public String replaceFrom(CharSequence sequence, CharSequence replacement) {
1163      checkNotNull(replacement);
1164      return sequence.toString();
1165    }
1166
1167    @Override
1168    public String collapseFrom(CharSequence sequence, char replacement) {
1169      return sequence.toString();
1170    }
1171
1172    @Override
1173    public String trimFrom(CharSequence sequence) {
1174      return sequence.toString();
1175    }
1176
1177    @Override
1178    public String trimLeadingFrom(CharSequence sequence) {
1179      return sequence.toString();
1180    }
1181
1182    @Override
1183    public String trimTrailingFrom(CharSequence sequence) {
1184      return sequence.toString();
1185    }
1186
1187    @Override
1188    public int countIn(CharSequence sequence) {
1189      checkNotNull(sequence);
1190      return 0;
1191    }
1192
1193    @Override
1194    public CharMatcher and(CharMatcher other) {
1195      checkNotNull(other);
1196      return this;
1197    }
1198
1199    @Override
1200    public CharMatcher or(CharMatcher other) {
1201      return checkNotNull(other);
1202    }
1203
1204    @Override
1205    public CharMatcher negate() {
1206      return any();
1207    }
1208  }
1209
1210  /** Implementation of {@link #whitespace()}. */
1211  @VisibleForTesting
1212  static final class Whitespace extends NamedFastMatcher {
1213
1214    // TABLE is a precomputed hashset of whitespace characters. MULTIPLIER serves as a hash function
1215    // whose key property is that it maps 25 characters into the 32-slot table without collision.
1216    // Basically this is an opportunistic fast implementation as opposed to "good code". For most
1217    // other use-cases, the reduction in readability isn't worth it.
1218    static final String TABLE =
1219        "\u2002\u3000\r\u0085\u200A\u2005\u2000\u3000"
1220            + "\u2029\u000B\u3000\u2008\u2003\u205F\u3000\u1680"
1221            + "\u0009\u0020\u2006\u2001\u202F\u00A0\u000C\u2009"
1222            + "\u3000\u2004\u3000\u3000\u2028\n\u2007\u3000";
1223    static final int MULTIPLIER = 1682554634;
1224    static final int SHIFT = Integer.numberOfLeadingZeros(TABLE.length() - 1);
1225
1226    static final CharMatcher INSTANCE = new Whitespace();
1227
1228    Whitespace() {
1229      super("CharMatcher.whitespace()");
1230    }
1231
1232    @Override
1233    public boolean matches(char c) {
1234      return TABLE.charAt((MULTIPLIER * c) >>> SHIFT) == c;
1235    }
1236
1237    @GwtIncompatible // used only from other GwtIncompatible code
1238    @Override
1239    void setBits(BitSet table) {
1240      for (int i = 0; i < TABLE.length(); i++) {
1241        table.set(TABLE.charAt(i));
1242      }
1243    }
1244  }
1245
1246  /** Implementation of {@link #breakingWhitespace()}. */
1247  private static final class BreakingWhitespace extends CharMatcher {
1248
1249    static final CharMatcher INSTANCE = new BreakingWhitespace();
1250
1251    @Override
1252    public boolean matches(char c) {
1253      switch (c) {
1254        case '\t':
1255        case '\n':
1256        case '\013':
1257        case '\f':
1258        case '\r':
1259        case ' ':
1260        case '\u0085':
1261        case '\u1680':
1262        case '\u2028':
1263        case '\u2029':
1264        case '\u205f':
1265        case '\u3000':
1266          return true;
1267        case '\u2007':
1268          return false;
1269        default:
1270          return c >= '\u2000' && c <= '\u200a';
1271      }
1272    }
1273
1274    @Override
1275    public String toString() {
1276      return "CharMatcher.breakingWhitespace()";
1277    }
1278  }
1279
1280  /** Implementation of {@link #ascii()}. */
1281  private static final class Ascii extends NamedFastMatcher {
1282
1283    static final CharMatcher INSTANCE = new Ascii();
1284
1285    Ascii() {
1286      super("CharMatcher.ascii()");
1287    }
1288
1289    @Override
1290    public boolean matches(char c) {
1291      return c <= '\u007f';
1292    }
1293  }
1294
1295  /** Implementation that matches characters that fall within multiple ranges. */
1296  private static class RangesMatcher extends CharMatcher {
1297
1298    private final String description;
1299    private final char[] rangeStarts;
1300    private final char[] rangeEnds;
1301
1302    RangesMatcher(String description, char[] rangeStarts, char[] rangeEnds) {
1303      this.description = description;
1304      this.rangeStarts = rangeStarts;
1305      this.rangeEnds = rangeEnds;
1306      checkArgument(rangeStarts.length == rangeEnds.length);
1307      for (int i = 0; i < rangeStarts.length; i++) {
1308        checkArgument(rangeStarts[i] <= rangeEnds[i]);
1309        if (i + 1 < rangeStarts.length) {
1310          checkArgument(rangeEnds[i] < rangeStarts[i + 1]);
1311        }
1312      }
1313    }
1314
1315    @Override
1316    public boolean matches(char c) {
1317      int index = Arrays.binarySearch(rangeStarts, c);
1318      if (index >= 0) {
1319        return true;
1320      } else {
1321        index = ~index - 1;
1322        return index >= 0 && c <= rangeEnds[index];
1323      }
1324    }
1325
1326    @Override
1327    public String toString() {
1328      return description;
1329    }
1330  }
1331
1332  /** Implementation of {@link #digit()}. */
1333  private static final class Digit extends RangesMatcher {
1334    // Plug the following UnicodeSet pattern into
1335    // https://unicode.org/cldr/utility/list-unicodeset.jsp
1336    // [[:Nd:]&[:nv=0:]&[\u0000-\uFFFF]]
1337    // and get the zeroes from there.
1338
1339    // Must be in ascending order.
1340    private static final String ZEROES =
1341        "0\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6\u0c66\u0ce6\u0d66\u0de6"
1342            + "\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946\u19d0\u1a80\u1a90\u1b50\u1bb0"
1343            + "\u1c40\u1c50\ua620\ua8d0\ua900\ua9d0\ua9f0\uaa50\uabf0\uff10";
1344
1345    private static char[] zeroes() {
1346      return ZEROES.toCharArray();
1347    }
1348
1349    private static char[] nines() {
1350      char[] nines = new char[ZEROES.length()];
1351      for (int i = 0; i < ZEROES.length(); i++) {
1352        nines[i] = (char) (ZEROES.charAt(i) + 9);
1353      }
1354      return nines;
1355    }
1356
1357    static final CharMatcher INSTANCE = new Digit();
1358
1359    private Digit() {
1360      super("CharMatcher.digit()", zeroes(), nines());
1361    }
1362  }
1363
1364  /** Implementation of {@link #javaDigit()}. */
1365  private static final class JavaDigit extends CharMatcher {
1366
1367    static final CharMatcher INSTANCE = new JavaDigit();
1368
1369    @Override
1370    public boolean matches(char c) {
1371      return Character.isDigit(c);
1372    }
1373
1374    @Override
1375    public String toString() {
1376      return "CharMatcher.javaDigit()";
1377    }
1378  }
1379
1380  /** Implementation of {@link #javaLetter()}. */
1381  private static final class JavaLetter extends CharMatcher {
1382
1383    static final CharMatcher INSTANCE = new JavaLetter();
1384
1385    @Override
1386    public boolean matches(char c) {
1387      return Character.isLetter(c);
1388    }
1389
1390    @Override
1391    public String toString() {
1392      return "CharMatcher.javaLetter()";
1393    }
1394  }
1395
1396  /** Implementation of {@link #javaLetterOrDigit()}. */
1397  private static final class JavaLetterOrDigit extends CharMatcher {
1398
1399    static final CharMatcher INSTANCE = new JavaLetterOrDigit();
1400
1401    @Override
1402    public boolean matches(char c) {
1403      return Character.isLetterOrDigit(c);
1404    }
1405
1406    @Override
1407    public String toString() {
1408      return "CharMatcher.javaLetterOrDigit()";
1409    }
1410  }
1411
1412  /** Implementation of {@link #javaUpperCase()}. */
1413  private static final class JavaUpperCase extends CharMatcher {
1414
1415    static final CharMatcher INSTANCE = new JavaUpperCase();
1416
1417    @Override
1418    public boolean matches(char c) {
1419      return Character.isUpperCase(c);
1420    }
1421
1422    @Override
1423    public String toString() {
1424      return "CharMatcher.javaUpperCase()";
1425    }
1426  }
1427
1428  /** Implementation of {@link #javaLowerCase()}. */
1429  private static final class JavaLowerCase extends CharMatcher {
1430
1431    static final CharMatcher INSTANCE = new JavaLowerCase();
1432
1433    @Override
1434    public boolean matches(char c) {
1435      return Character.isLowerCase(c);
1436    }
1437
1438    @Override
1439    public String toString() {
1440      return "CharMatcher.javaLowerCase()";
1441    }
1442  }
1443
1444  /** Implementation of {@link #javaIsoControl()}. */
1445  private static final class JavaIsoControl extends NamedFastMatcher {
1446
1447    static final CharMatcher INSTANCE = new JavaIsoControl();
1448
1449    private JavaIsoControl() {
1450      super("CharMatcher.javaIsoControl()");
1451    }
1452
1453    @Override
1454    public boolean matches(char c) {
1455      return c <= '\u001f' || (c >= '\u007f' && c <= '\u009f');
1456    }
1457  }
1458
1459  /** Implementation of {@link #invisible()}. */
1460  private static final class Invisible extends RangesMatcher {
1461    // Plug the following UnicodeSet pattern into
1462    // https://unicode.org/cldr/utility/list-unicodeset.jsp
1463    // [[[:Zs:][:Zl:][:Zp:][:Cc:][:Cf:][:Cs:][:Co:]]&[\u0000-\uFFFF]]
1464    // with the "Abbreviate" option, and get the ranges from there.
1465    private static final String RANGE_STARTS =
1466        "\u0000\u007f\u00ad\u0600\u061c\u06dd\u070f\u0890\u08e2\u1680\u180e\u2000\u2028\u205f\u2066"
1467            + "\u3000\ud800\ufeff\ufff9";
1468    private static final String RANGE_ENDS = // inclusive ends
1469        "\u0020\u00a0\u00ad\u0605\u061c\u06dd\u070f\u0891\u08e2\u1680\u180e\u200f\u202f\u2064\u206f"
1470            + "\u3000\uf8ff\ufeff\ufffb";
1471
1472    static final CharMatcher INSTANCE = new Invisible();
1473
1474    private Invisible() {
1475      super("CharMatcher.invisible()", RANGE_STARTS.toCharArray(), RANGE_ENDS.toCharArray());
1476    }
1477  }
1478
1479  /** Implementation of {@link #singleWidth()}. */
1480  private static final class SingleWidth extends RangesMatcher {
1481
1482    static final CharMatcher INSTANCE = new SingleWidth();
1483
1484    private SingleWidth() {
1485      super(
1486          "CharMatcher.singleWidth()",
1487          "\u0000\u05be\u05d0\u05f3\u0600\u0750\u0e00\u1e00\u2100\ufb50\ufe70\uff61".toCharArray(),
1488          "\u04f9\u05be\u05ea\u05f4\u06ff\u077f\u0e7f\u20af\u213a\ufdff\ufeff\uffdc".toCharArray());
1489    }
1490  }
1491
1492  // Non-static factory implementation classes
1493
1494  /** Implementation of {@link #negate()}. */
1495  private static class Negated extends CharMatcher {
1496
1497    final CharMatcher original;
1498
1499    Negated(CharMatcher original) {
1500      this.original = checkNotNull(original);
1501    }
1502
1503    @Override
1504    public boolean matches(char c) {
1505      return !original.matches(c);
1506    }
1507
1508    @Override
1509    public boolean matchesAllOf(CharSequence sequence) {
1510      return original.matchesNoneOf(sequence);
1511    }
1512
1513    @Override
1514    public boolean matchesNoneOf(CharSequence sequence) {
1515      return original.matchesAllOf(sequence);
1516    }
1517
1518    @Override
1519    public int countIn(CharSequence sequence) {
1520      return sequence.length() - original.countIn(sequence);
1521    }
1522
1523    @GwtIncompatible // used only from other GwtIncompatible code
1524    @Override
1525    void setBits(BitSet table) {
1526      BitSet tmp = new BitSet();
1527      original.setBits(tmp);
1528      tmp.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1);
1529      table.or(tmp);
1530    }
1531
1532    @Override
1533    public CharMatcher negate() {
1534      return original;
1535    }
1536
1537    @Override
1538    public String toString() {
1539      return original + ".negate()";
1540    }
1541  }
1542
1543  /** Implementation of {@link #and(CharMatcher)}. */
1544  private static final class And extends CharMatcher {
1545
1546    final CharMatcher first;
1547    final CharMatcher second;
1548
1549    And(CharMatcher a, CharMatcher b) {
1550      first = checkNotNull(a);
1551      second = checkNotNull(b);
1552    }
1553
1554    @Override
1555    public boolean matches(char c) {
1556      return first.matches(c) && second.matches(c);
1557    }
1558
1559    @GwtIncompatible // used only from other GwtIncompatible code
1560    @Override
1561    void setBits(BitSet table) {
1562      BitSet tmp1 = new BitSet();
1563      first.setBits(tmp1);
1564      BitSet tmp2 = new BitSet();
1565      second.setBits(tmp2);
1566      tmp1.and(tmp2);
1567      table.or(tmp1);
1568    }
1569
1570    @Override
1571    public String toString() {
1572      return "CharMatcher.and(" + first + ", " + second + ")";
1573    }
1574  }
1575
1576  /** Implementation of {@link #or(CharMatcher)}. */
1577  private static final class Or extends CharMatcher {
1578
1579    final CharMatcher first;
1580    final CharMatcher second;
1581
1582    Or(CharMatcher a, CharMatcher b) {
1583      first = checkNotNull(a);
1584      second = checkNotNull(b);
1585    }
1586
1587    @GwtIncompatible // used only from other GwtIncompatible code
1588    @Override
1589    void setBits(BitSet table) {
1590      first.setBits(table);
1591      second.setBits(table);
1592    }
1593
1594    @Override
1595    public boolean matches(char c) {
1596      return first.matches(c) || second.matches(c);
1597    }
1598
1599    @Override
1600    public String toString() {
1601      return "CharMatcher.or(" + first + ", " + second + ")";
1602    }
1603  }
1604
1605  // Static factory implementations
1606
1607  /** Implementation of {@link #is(char)}. */
1608  private static final class Is extends FastMatcher {
1609
1610    private final char match;
1611
1612    Is(char match) {
1613      this.match = match;
1614    }
1615
1616    @Override
1617    public boolean matches(char c) {
1618      return c == match;
1619    }
1620
1621    @Override
1622    public String replaceFrom(CharSequence sequence, char replacement) {
1623      return sequence.toString().replace(match, replacement);
1624    }
1625
1626    @Override
1627    public CharMatcher and(CharMatcher other) {
1628      return other.matches(match) ? this : none();
1629    }
1630
1631    @Override
1632    public CharMatcher or(CharMatcher other) {
1633      return other.matches(match) ? other : super.or(other);
1634    }
1635
1636    @Override
1637    public CharMatcher negate() {
1638      return isNot(match);
1639    }
1640
1641    @GwtIncompatible // used only from other GwtIncompatible code
1642    @Override
1643    void setBits(BitSet table) {
1644      table.set(match);
1645    }
1646
1647    @Override
1648    public String toString() {
1649      return "CharMatcher.is('" + showCharacter(match) + "')";
1650    }
1651  }
1652
1653  /** Implementation of {@link #isNot(char)}. */
1654  private static final class IsNot extends FastMatcher {
1655
1656    private final char match;
1657
1658    IsNot(char match) {
1659      this.match = match;
1660    }
1661
1662    @Override
1663    public boolean matches(char c) {
1664      return c != match;
1665    }
1666
1667    @Override
1668    public CharMatcher and(CharMatcher other) {
1669      return other.matches(match) ? super.and(other) : other;
1670    }
1671
1672    @Override
1673    public CharMatcher or(CharMatcher other) {
1674      return other.matches(match) ? any() : this;
1675    }
1676
1677    @GwtIncompatible // used only from other GwtIncompatible code
1678    @Override
1679    void setBits(BitSet table) {
1680      table.set(0, match);
1681      table.set(match + 1, Character.MAX_VALUE + 1);
1682    }
1683
1684    @Override
1685    public CharMatcher negate() {
1686      return is(match);
1687    }
1688
1689    @Override
1690    public String toString() {
1691      return "CharMatcher.isNot('" + showCharacter(match) + "')";
1692    }
1693  }
1694
1695  private static CharMatcher.IsEither isEither(char c1, char c2) {
1696    return new CharMatcher.IsEither(c1, c2);
1697  }
1698
1699  /** Implementation of {@link #anyOf(CharSequence)} for exactly two characters. */
1700  private static final class IsEither extends FastMatcher {
1701
1702    private final char match1;
1703    private final char match2;
1704
1705    IsEither(char match1, char match2) {
1706      this.match1 = match1;
1707      this.match2 = match2;
1708    }
1709
1710    @Override
1711    public boolean matches(char c) {
1712      return c == match1 || c == match2;
1713    }
1714
1715    @GwtIncompatible // used only from other GwtIncompatible code
1716    @Override
1717    void setBits(BitSet table) {
1718      table.set(match1);
1719      table.set(match2);
1720    }
1721
1722    @Override
1723    public String toString() {
1724      return "CharMatcher.anyOf(\"" + showCharacter(match1) + showCharacter(match2) + "\")";
1725    }
1726  }
1727
1728  /** Implementation of {@link #anyOf(CharSequence)} for three or more characters. */
1729  private static final class AnyOf extends CharMatcher {
1730
1731    private final char[] chars;
1732
1733    public AnyOf(CharSequence chars) {
1734      this.chars = chars.toString().toCharArray();
1735      Arrays.sort(this.chars);
1736    }
1737
1738    @Override
1739    public boolean matches(char c) {
1740      return Arrays.binarySearch(chars, c) >= 0;
1741    }
1742
1743    @Override
1744    @GwtIncompatible // used only from other GwtIncompatible code
1745    void setBits(BitSet table) {
1746      for (char c : chars) {
1747        table.set(c);
1748      }
1749    }
1750
1751    @Override
1752    public String toString() {
1753      StringBuilder description = new StringBuilder("CharMatcher.anyOf(\"");
1754      for (char c : chars) {
1755        description.append(showCharacter(c));
1756      }
1757      description.append("\")");
1758      return description.toString();
1759    }
1760  }
1761
1762  /** Implementation of {@link #inRange(char, char)}. */
1763  private static final class InRange extends FastMatcher {
1764
1765    private final char startInclusive;
1766    private final char endInclusive;
1767
1768    InRange(char startInclusive, char endInclusive) {
1769      checkArgument(endInclusive >= startInclusive);
1770      this.startInclusive = startInclusive;
1771      this.endInclusive = endInclusive;
1772    }
1773
1774    @Override
1775    public boolean matches(char c) {
1776      return startInclusive <= c && c <= endInclusive;
1777    }
1778
1779    @GwtIncompatible // used only from other GwtIncompatible code
1780    @Override
1781    void setBits(BitSet table) {
1782      table.set(startInclusive, endInclusive + 1);
1783    }
1784
1785    @Override
1786    public String toString() {
1787      return "CharMatcher.inRange('"
1788          + showCharacter(startInclusive)
1789          + "', '"
1790          + showCharacter(endInclusive)
1791          + "')";
1792    }
1793  }
1794
1795  /** Implementation of {@link #forPredicate(Predicate)}. */
1796  private static final class ForPredicate extends CharMatcher {
1797
1798    private final Predicate<? super Character> predicate;
1799
1800    ForPredicate(Predicate<? super Character> predicate) {
1801      this.predicate = checkNotNull(predicate);
1802    }
1803
1804    @Override
1805    public boolean matches(char c) {
1806      return predicate.apply(c);
1807    }
1808
1809    @SuppressWarnings("deprecation") // intentional; deprecation is for callers primarily
1810    @Override
1811    public boolean apply(Character character) {
1812      return predicate.apply(checkNotNull(character));
1813    }
1814
1815    @Override
1816    public String toString() {
1817      return "CharMatcher.forPredicate(" + predicate + ")";
1818    }
1819  }
1820}