001/*
002 * Copyright (C) 2008 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.base;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkPositionIndex;
020
021import com.google.common.annotations.GwtCompatible;
022import com.google.common.annotations.GwtIncompatible;
023import com.google.common.annotations.VisibleForTesting;
024import java.util.Arrays;
025import java.util.BitSet;
026
027/**
028 * Determines a true or false value for any Java {@code char} value, just as {@link Predicate} does
029 * for any {@link Object}. Also offers basic text processing methods based on this function.
030 * Implementations are strongly encouraged to be side-effect-free and immutable.
031 *
032 * <p>Throughout the documentation of this class, the phrase "matching character" is used to mean
033 * "any {@code char} value {@code c} for which {@code this.matches(c)} returns {@code true}".
034 *
035 * <p><b>Warning:</b> This class deals only with {@code char} values, that is, <a
036 * href="http://www.unicode.org/glossary/#BMP_character">BMP characters</a>. It does not understand
037 * <a href="http://www.unicode.org/glossary/#supplementary_code_point">supplementary Unicode code
038 * points</a> in the range {@code 0x10000} to {@code 0x10FFFF} which includes the majority of
039 * assigned characters, including important CJK characters and emoji.
040 *
041 * <p>Supplementary characters are <a
042 * href="https://docs.oracle.com/javase/8/docs/api/java/lang/Character.html#supplementary">encoded
043 * into a {@code String} using surrogate pairs</a>, and a {@code CharMatcher} treats these just as
044 * two separate characters. {@link #countIn} counts each supplementary character as 2 {@code char}s.
045 *
046 * <p>For up-to-date Unicode character properties (digit, letter, etc.) and support for
047 * supplementary code points, use ICU4J UCharacter and UnicodeSet (freeze() after building). For
048 * basic text processing based on UnicodeSet use the ICU4J UnicodeSetSpanner.
049 *
050 * <p>Example usages:
051 *
052 * <pre>
053 *   String trimmed = {@link #whitespace() whitespace()}.{@link #trimFrom trimFrom}(userInput);
054 *   if ({@link #ascii() ascii()}.{@link #matchesAllOf matchesAllOf}(s)) { ... }</pre>
055 *
056 * <p>See the Guava User Guide article on <a
057 * href="https://github.com/google/guava/wiki/StringsExplained#charmatcher">{@code CharMatcher}
058 * </a>.
059 *
060 * @author Kevin Bourrillion
061 * @since 1.0
062 */
063@GwtCompatible(emulated = true)
064@ElementTypesAreNonnullByDefault
065public abstract class CharMatcher implements Predicate<Character> {
066  /*
067   *           N777777777NO
068   *         N7777777777777N
069   *        M777777777777777N
070   *        $N877777777D77777M
071   *       N M77777777ONND777M
072   *       MN777777777NN  D777
073   *     N7ZN777777777NN ~M7778
074   *    N777777777777MMNN88777N
075   *    N777777777777MNZZZ7777O
076   *    DZN7777O77777777777777
077   *     N7OONND7777777D77777N
078   *      8$M++++?N???$77777$
079   *       M7++++N+M77777777N
080   *        N77O777777777777$                              M
081   *          DNNM$$$$777777N                              D
082   *         N$N:=N$777N7777M                             NZ
083   *        77Z::::N777777777                          ODZZZ
084   *       77N::::::N77777777M                         NNZZZ$
085   *     $777:::::::77777777MN                        ZM8ZZZZZ
086   *     777M::::::Z7777777Z77                        N++ZZZZNN
087   *    7777M:::::M7777777$777M                       $++IZZZZM
088   *   M777$:::::N777777$M7777M                       +++++ZZZDN
089   *     NN$::::::7777$$M777777N                      N+++ZZZZNZ
090   *       N::::::N:7$O:77777777                      N++++ZZZZN
091   *       M::::::::::::N77777777+                   +?+++++ZZZM
092   *       8::::::::::::D77777777M                    O+++++ZZ
093   *        ::::::::::::M777777777N                      O+?D
094   *        M:::::::::::M77777777778                     77=
095   *        D=::::::::::N7777777777N                    777
096   *       INN===::::::=77777777777N                  I777N
097   *      ?777N========N7777777777787M               N7777
098   *      77777$D======N77777777777N777N?         N777777
099   *     I77777$$$N7===M$$77777777$77777777$MMZ77777777N
100   *      $$$$$$$$$$$NIZN$$$$$$$$$M$$7777777777777777ON
101   *       M$$$$$$$$M    M$$$$$$$$N=N$$$$7777777$$$ND
102   *      O77Z$$$$$$$     M$$$$$$$$MNI==$DNNNNM=~N
103   *   7 :N MNN$$$$M$      $$$777$8      8D8I
104   *     NMM.:7O           777777778
105   *                       7777777MN
106   *                       M NO .7:
107   *                       M   :   M
108   *                            8
109   */
110
111  // Constant matcher factory methods
112
113  /**
114   * Matches any character.
115   *
116   * @since 19.0 (since 1.0 as constant {@code ANY})
117   */
118  public static CharMatcher any() {
119    return Any.INSTANCE;
120  }
121
122  /**
123   * Matches no characters.
124   *
125   * @since 19.0 (since 1.0 as constant {@code NONE})
126   */
127  public static CharMatcher none() {
128    return None.INSTANCE;
129  }
130
131  /**
132   * Determines whether a character is whitespace according to the latest Unicode standard, as
133   * illustrated <a
134   * href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
135   * This is not the same definition used by other Java APIs. (See a <a
136   * href="https://docs.google.com/spreadsheets/d/1kq4ECwPjHX9B8QUCTPclgsDCXYaj7T-FlT4tB5q3ahk/edit">comparison
137   * of several definitions of "whitespace"</a>.)
138   *
139   * <p>All Unicode White_Space characters are on the BMP and thus supported by this API.
140   *
141   * <p><b>Note:</b> as the Unicode definition evolves, we will modify this matcher to keep it up to
142   * date.
143   *
144   * @since 19.0 (since 1.0 as constant {@code WHITESPACE})
145   */
146  public static CharMatcher whitespace() {
147    return Whitespace.INSTANCE;
148  }
149
150  /**
151   * Determines whether a character is a breaking whitespace (that is, a whitespace which can be
152   * interpreted as a break between words for formatting purposes). See {@link #whitespace()} for a
153   * discussion of that term.
154   *
155   * @since 19.0 (since 2.0 as constant {@code BREAKING_WHITESPACE})
156   */
157  public static CharMatcher breakingWhitespace() {
158    return BreakingWhitespace.INSTANCE;
159  }
160
161  /**
162   * Determines whether a character is ASCII, meaning that its code point is less than 128.
163   *
164   * @since 19.0 (since 1.0 as constant {@code ASCII})
165   */
166  public static CharMatcher ascii() {
167    return Ascii.INSTANCE;
168  }
169
170  /**
171   * Determines whether a character is a BMP digit according to <a
172   * href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>. If
173   * you only care to match ASCII digits, you can use {@code inRange('0', '9')}.
174   *
175   * @deprecated Many digits are supplementary characters; see the class documentation.
176   * @since 19.0 (since 1.0 as constant {@code DIGIT})
177   */
178  @Deprecated
179  public static CharMatcher digit() {
180    return Digit.INSTANCE;
181  }
182
183  /**
184   * Determines whether a character is a BMP digit according to {@linkplain Character#isDigit(char)
185   * Java's definition}. If you only care to match ASCII digits, you can use {@code inRange('0',
186   * '9')}.
187   *
188   * @deprecated Many digits are supplementary characters; see the class documentation.
189   * @since 19.0 (since 1.0 as constant {@code JAVA_DIGIT})
190   */
191  @Deprecated
192  public static CharMatcher javaDigit() {
193    return JavaDigit.INSTANCE;
194  }
195
196  /**
197   * Determines whether a character is a BMP letter according to {@linkplain
198   * Character#isLetter(char) Java's definition}. If you only care to match letters of the Latin
199   * alphabet, you can use {@code inRange('a', 'z').or(inRange('A', 'Z'))}.
200   *
201   * @deprecated Most letters are supplementary characters; see the class documentation.
202   * @since 19.0 (since 1.0 as constant {@code JAVA_LETTER})
203   */
204  @Deprecated
205  public static CharMatcher javaLetter() {
206    return JavaLetter.INSTANCE;
207  }
208
209  /**
210   * Determines whether a character is a BMP letter or digit according to {@linkplain
211   * Character#isLetterOrDigit(char) Java's definition}.
212   *
213   * @deprecated Most letters and digits are supplementary characters; see the class documentation.
214   * @since 19.0 (since 1.0 as constant {@code JAVA_LETTER_OR_DIGIT}).
215   */
216  @Deprecated
217  public static CharMatcher javaLetterOrDigit() {
218    return JavaLetterOrDigit.INSTANCE;
219  }
220
221  /**
222   * Determines whether a BMP character is upper case according to {@linkplain
223   * Character#isUpperCase(char) Java's definition}.
224   *
225   * @deprecated Some uppercase characters are supplementary characters; see the class
226   *     documentation.
227   * @since 19.0 (since 1.0 as constant {@code JAVA_UPPER_CASE})
228   */
229  @Deprecated
230  public static CharMatcher javaUpperCase() {
231    return JavaUpperCase.INSTANCE;
232  }
233
234  /**
235   * Determines whether a BMP character is lower case according to {@linkplain
236   * Character#isLowerCase(char) Java's definition}.
237   *
238   * @deprecated Some lowercase characters are supplementary characters; see the class
239   *     documentation.
240   * @since 19.0 (since 1.0 as constant {@code JAVA_LOWER_CASE})
241   */
242  @Deprecated
243  public static CharMatcher javaLowerCase() {
244    return JavaLowerCase.INSTANCE;
245  }
246
247  /**
248   * Determines whether a character is an ISO control character as specified by {@link
249   * Character#isISOControl(char)}.
250   *
251   * <p>All ISO control codes are on the BMP and thus supported by this API.
252   *
253   * @since 19.0 (since 1.0 as constant {@code JAVA_ISO_CONTROL})
254   */
255  public static CharMatcher javaIsoControl() {
256    return JavaIsoControl.INSTANCE;
257  }
258
259  /**
260   * Determines whether a character is invisible; that is, if its Unicode category is any of
261   * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and
262   * PRIVATE_USE according to ICU4J.
263   *
264   * <p>See also the Unicode Default_Ignorable_Code_Point property (available via ICU).
265   *
266   * @deprecated Most invisible characters are supplementary characters; see the class
267   *     documentation.
268   * @since 19.0 (since 1.0 as constant {@code INVISIBLE})
269   */
270  @Deprecated
271  public static CharMatcher invisible() {
272    return Invisible.INSTANCE;
273  }
274
275  /**
276   * Determines whether a character is single-width (not double-width). When in doubt, this matcher
277   * errs on the side of returning {@code false} (that is, it tends to assume a character is
278   * double-width).
279   *
280   * <p><b>Note:</b> as the reference file evolves, we will modify this matcher to keep it up to
281   * date.
282   *
283   * <p>See also <a href="http://www.unicode.org/reports/tr11/">UAX #11 East Asian Width</a>.
284   *
285   * @deprecated Many such characters are supplementary characters; see the class documentation.
286   * @since 19.0 (since 1.0 as constant {@code SINGLE_WIDTH})
287   */
288  @Deprecated
289  public static CharMatcher singleWidth() {
290    return SingleWidth.INSTANCE;
291  }
292
293  // Static factories
294
295  /** Returns a {@code char} matcher that matches only one specified BMP character. */
296  public static CharMatcher is(final char match) {
297    return new Is(match);
298  }
299
300  /**
301   * Returns a {@code char} matcher that matches any character except the BMP character specified.
302   *
303   * <p>To negate another {@code CharMatcher}, use {@link #negate()}.
304   */
305  public static CharMatcher isNot(final char match) {
306    return new IsNot(match);
307  }
308
309  /**
310   * Returns a {@code char} matcher that matches any BMP character present in the given character
311   * sequence. Returns a bogus matcher if the sequence contains supplementary characters.
312   */
313  public static CharMatcher anyOf(final CharSequence sequence) {
314    switch (sequence.length()) {
315      case 0:
316        return none();
317      case 1:
318        return is(sequence.charAt(0));
319      case 2:
320        return isEither(sequence.charAt(0), sequence.charAt(1));
321      default:
322        // TODO(lowasser): is it potentially worth just going ahead and building a precomputed
323        // matcher?
324        return new AnyOf(sequence);
325    }
326  }
327
328  /**
329   * Returns a {@code char} matcher that matches any BMP character not present in the given
330   * character sequence. Returns a bogus matcher if the sequence contains supplementary characters.
331   */
332  public static CharMatcher noneOf(CharSequence sequence) {
333    return anyOf(sequence).negate();
334  }
335
336  /**
337   * Returns a {@code char} matcher that matches any character in a given BMP range (both endpoints
338   * are inclusive). For example, to match any lowercase letter of the English alphabet, use {@code
339   * CharMatcher.inRange('a', 'z')}.
340   *
341   * @throws IllegalArgumentException if {@code endInclusive < startInclusive}
342   */
343  public static CharMatcher inRange(final char startInclusive, final char endInclusive) {
344    return new InRange(startInclusive, endInclusive);
345  }
346
347  /**
348   * Returns a matcher with identical behavior to the given {@link Character}-based predicate, but
349   * which operates on primitive {@code char} instances instead.
350   */
351  public static CharMatcher forPredicate(final Predicate<? super Character> predicate) {
352    return predicate instanceof CharMatcher ? (CharMatcher) predicate : new ForPredicate(predicate);
353  }
354
355  // Constructors
356
357  /**
358   * Constructor for use by subclasses. When subclassing, you may want to override {@code
359   * toString()} to provide a useful description.
360   */
361  protected CharMatcher() {}
362
363  // Abstract methods
364
365  /** Determines a true or false value for the given character. */
366  public abstract boolean matches(char c);
367
368  // Non-static factories
369
370  /** Returns a matcher that matches any character not matched by this matcher. */
371  // @Override under Java 8 but not under Java 7
372  public CharMatcher negate() {
373    return new Negated(this);
374  }
375
376  /**
377   * Returns a matcher that matches any character matched by both this matcher and {@code other}.
378   */
379  public CharMatcher and(CharMatcher other) {
380    return new And(this, other);
381  }
382
383  /**
384   * Returns a matcher that matches any character matched by either this matcher or {@code other}.
385   */
386  public CharMatcher or(CharMatcher other) {
387    return new Or(this, other);
388  }
389
390  /**
391   * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to
392   * query than the original; your mileage may vary. Precomputation takes time and is likely to be
393   * worthwhile only if the precomputed matcher is queried many thousands of times.
394   *
395   * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a
396   * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a
397   * worthwhile tradeoff in a browser.
398   */
399  public CharMatcher precomputed() {
400    return Platform.precomputeCharMatcher(this);
401  }
402
403  private static final int DISTINCT_CHARS = Character.MAX_VALUE - Character.MIN_VALUE + 1;
404
405  /**
406   * This is the actual implementation of {@link #precomputed}, but we bounce calls through a method
407   * on {@link Platform} so that we can have different behavior in GWT.
408   *
409   * <p>This implementation tries to be smart in a number of ways. It recognizes cases where the
410   * negation is cheaper to precompute than the matcher itself; it tries to build small hash tables
411   * for matchers that only match a few characters, and so on. In the worst-case scenario, it
412   * constructs an eight-kilobyte bit array and queries that. In many situations this produces a
413   * matcher which is faster to query than the original.
414   */
415  @GwtIncompatible // SmallCharMatcher
416  CharMatcher precomputedInternal() {
417    final BitSet table = new BitSet();
418    setBits(table);
419    int totalCharacters = table.cardinality();
420    if (totalCharacters * 2 <= DISTINCT_CHARS) {
421      return precomputedPositive(totalCharacters, table, toString());
422    } else {
423      // TODO(lowasser): is it worth it to worry about the last character of large matchers?
424      table.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1);
425      int negatedCharacters = DISTINCT_CHARS - totalCharacters;
426      String suffix = ".negate()";
427      final String description = toString();
428      String negatedDescription =
429          description.endsWith(suffix)
430              ? description.substring(0, description.length() - suffix.length())
431              : description + suffix;
432      return new NegatedFastMatcher(
433          precomputedPositive(negatedCharacters, table, negatedDescription)) {
434        @Override
435        public String toString() {
436          return description;
437        }
438      };
439    }
440  }
441
442  /**
443   * Helper method for {@link #precomputedInternal} that doesn't test if the negation is cheaper.
444   */
445  @GwtIncompatible // SmallCharMatcher
446  private static CharMatcher precomputedPositive(
447      int totalCharacters, BitSet table, String description) {
448    switch (totalCharacters) {
449      case 0:
450        return none();
451      case 1:
452        return is((char) table.nextSetBit(0));
453      case 2:
454        char c1 = (char) table.nextSetBit(0);
455        char c2 = (char) table.nextSetBit(c1 + 1);
456        return isEither(c1, c2);
457      default:
458        return isSmall(totalCharacters, table.length())
459            ? SmallCharMatcher.from(table, description)
460            : new BitSetMatcher(table, description);
461    }
462  }
463
464  @GwtIncompatible // SmallCharMatcher
465  private static boolean isSmall(int totalCharacters, int tableLength) {
466    return totalCharacters <= SmallCharMatcher.MAX_SIZE
467        && tableLength > (totalCharacters * 4 * Character.SIZE);
468    // err on the side of BitSetMatcher
469  }
470
471  /** Sets bits in {@code table} matched by this matcher. */
472  @GwtIncompatible // used only from other GwtIncompatible code
473  void setBits(BitSet table) {
474    for (int c = Character.MAX_VALUE; c >= Character.MIN_VALUE; c--) {
475      if (matches((char) c)) {
476        table.set(c);
477      }
478    }
479  }
480
481  // Text processing routines
482
483  /**
484   * Returns {@code true} if a character sequence contains at least one matching BMP character.
485   * Equivalent to {@code !matchesNoneOf(sequence)}.
486   *
487   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
488   * character, until this returns {@code true} or the end is reached.
489   *
490   * @param sequence the character sequence to examine, possibly empty
491   * @return {@code true} if this matcher matches at least one character in the sequence
492   * @since 8.0
493   */
494  public boolean matchesAnyOf(CharSequence sequence) {
495    return !matchesNoneOf(sequence);
496  }
497
498  /**
499   * Returns {@code true} if a character sequence contains only matching BMP characters.
500   *
501   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
502   * character, until this returns {@code false} or the end is reached.
503   *
504   * @param sequence the character sequence to examine, possibly empty
505   * @return {@code true} if this matcher matches every character in the sequence, including when
506   *     the sequence is empty
507   */
508  public boolean matchesAllOf(CharSequence sequence) {
509    for (int i = sequence.length() - 1; i >= 0; i--) {
510      if (!matches(sequence.charAt(i))) {
511        return false;
512      }
513    }
514    return true;
515  }
516
517  /**
518   * Returns {@code true} if a character sequence contains no matching BMP characters. Equivalent to
519   * {@code !matchesAnyOf(sequence)}.
520   *
521   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
522   * character, until this returns {@code true} or the end is reached.
523   *
524   * @param sequence the character sequence to examine, possibly empty
525   * @return {@code true} if this matcher matches no characters in the sequence, including when the
526   *     sequence is empty
527   */
528  public boolean matchesNoneOf(CharSequence sequence) {
529    return indexIn(sequence) == -1;
530  }
531
532  /**
533   * Returns the index of the first matching BMP character in a character sequence, or {@code -1} if
534   * no matching character is present.
535   *
536   * <p>The default implementation iterates over the sequence in forward order calling {@link
537   * #matches} for each character.
538   *
539   * @param sequence the character sequence to examine from the beginning
540   * @return an index, or {@code -1} if no character matches
541   */
542  public int indexIn(CharSequence sequence) {
543    return indexIn(sequence, 0);
544  }
545
546  /**
547   * Returns the index of the first matching BMP character in a character sequence, starting from a
548   * given position, or {@code -1} if no character matches after that position.
549   *
550   * <p>The default implementation iterates over the sequence in forward order, beginning at {@code
551   * start}, calling {@link #matches} for each character.
552   *
553   * @param sequence the character sequence to examine
554   * @param start the first index to examine; must be nonnegative and no greater than {@code
555   *     sequence.length()}
556   * @return the index of the first matching character, guaranteed to be no less than {@code start},
557   *     or {@code -1} if no character matches
558   * @throws IndexOutOfBoundsException if start is negative or greater than {@code
559   *     sequence.length()}
560   */
561  public int indexIn(CharSequence sequence, int start) {
562    int length = sequence.length();
563    checkPositionIndex(start, length);
564    for (int i = start; i < length; i++) {
565      if (matches(sequence.charAt(i))) {
566        return i;
567      }
568    }
569    return -1;
570  }
571
572  /**
573   * Returns the index of the last matching BMP character in a character sequence, or {@code -1} if
574   * no matching character is present.
575   *
576   * <p>The default implementation iterates over the sequence in reverse order calling {@link
577   * #matches} for each character.
578   *
579   * @param sequence the character sequence to examine from the end
580   * @return an index, or {@code -1} if no character matches
581   */
582  public int lastIndexIn(CharSequence sequence) {
583    for (int i = sequence.length() - 1; i >= 0; i--) {
584      if (matches(sequence.charAt(i))) {
585        return i;
586      }
587    }
588    return -1;
589  }
590
591  /**
592   * Returns the number of matching {@code char}s found in a character sequence.
593   *
594   * <p>Counts 2 per supplementary character, such as for {@link #whitespace}().{@link #negate}().
595   */
596  public int countIn(CharSequence sequence) {
597    int count = 0;
598    for (int i = 0; i < sequence.length(); i++) {
599      if (matches(sequence.charAt(i))) {
600        count++;
601      }
602    }
603    return count;
604  }
605
606  /**
607   * Returns a string containing all non-matching characters of a character sequence, in order. For
608   * example:
609   *
610   * <pre>{@code
611   * CharMatcher.is('a').removeFrom("bazaar")
612   * }</pre>
613   *
614   * ... returns {@code "bzr"}.
615   */
616  public String removeFrom(CharSequence sequence) {
617    String string = sequence.toString();
618    int pos = indexIn(string);
619    if (pos == -1) {
620      return string;
621    }
622
623    char[] chars = string.toCharArray();
624    int spread = 1;
625
626    // This unusual loop comes from extensive benchmarking
627    OUT:
628    while (true) {
629      pos++;
630      while (true) {
631        if (pos == chars.length) {
632          break OUT;
633        }
634        if (matches(chars[pos])) {
635          break;
636        }
637        chars[pos - spread] = chars[pos];
638        pos++;
639      }
640      spread++;
641    }
642    return new String(chars, 0, pos - spread);
643  }
644
645  /**
646   * Returns a string containing all matching BMP characters of a character sequence, in order. For
647   * example:
648   *
649   * <pre>{@code
650   * CharMatcher.is('a').retainFrom("bazaar")
651   * }</pre>
652   *
653   * ... returns {@code "aaa"}.
654   */
655  public String retainFrom(CharSequence sequence) {
656    return negate().removeFrom(sequence);
657  }
658
659  /**
660   * Returns a string copy of the input character sequence, with each matching BMP character
661   * replaced by a given replacement character. For example:
662   *
663   * <pre>{@code
664   * CharMatcher.is('a').replaceFrom("radar", 'o')
665   * }</pre>
666   *
667   * ... returns {@code "rodor"}.
668   *
669   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
670   * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
671   * character.
672   *
673   * @param sequence the character sequence to replace matching characters in
674   * @param replacement the character to append to the result string in place of each matching
675   *     character in {@code sequence}
676   * @return the new string
677   */
678  public String replaceFrom(CharSequence sequence, char replacement) {
679    String string = sequence.toString();
680    int pos = indexIn(string);
681    if (pos == -1) {
682      return string;
683    }
684    char[] chars = string.toCharArray();
685    chars[pos] = replacement;
686    for (int i = pos + 1; i < chars.length; i++) {
687      if (matches(chars[i])) {
688        chars[i] = replacement;
689      }
690    }
691    return new String(chars);
692  }
693
694  /**
695   * Returns a string copy of the input character sequence, with each matching BMP character
696   * replaced by a given replacement sequence. For example:
697   *
698   * <pre>{@code
699   * CharMatcher.is('a').replaceFrom("yaha", "oo")
700   * }</pre>
701   *
702   * ... returns {@code "yoohoo"}.
703   *
704   * <p><b>Note:</b> If the replacement is a fixed string with only one character, you are better
705   * off calling {@link #replaceFrom(CharSequence, char)} directly.
706   *
707   * @param sequence the character sequence to replace matching characters in
708   * @param replacement the characters to append to the result string in place of each matching
709   *     character in {@code sequence}
710   * @return the new string
711   */
712  public String replaceFrom(CharSequence sequence, CharSequence replacement) {
713    int replacementLen = replacement.length();
714    if (replacementLen == 0) {
715      return removeFrom(sequence);
716    }
717    if (replacementLen == 1) {
718      return replaceFrom(sequence, replacement.charAt(0));
719    }
720
721    String string = sequence.toString();
722    int pos = indexIn(string);
723    if (pos == -1) {
724      return string;
725    }
726
727    int len = string.length();
728    StringBuilder buf = new StringBuilder((len * 3 / 2) + 16);
729
730    int oldpos = 0;
731    do {
732      buf.append(string, oldpos, pos);
733      buf.append(replacement);
734      oldpos = pos + 1;
735      pos = indexIn(string, oldpos);
736    } while (pos != -1);
737
738    buf.append(string, oldpos, len);
739    return buf.toString();
740  }
741
742  /**
743   * Returns a substring of the input character sequence that omits all matching BMP characters from
744   * the beginning and from the end of the string. For example:
745   *
746   * <pre>{@code
747   * CharMatcher.anyOf("ab").trimFrom("abacatbab")
748   * }</pre>
749   *
750   * ... returns {@code "cat"}.
751   *
752   * <p>Note that:
753   *
754   * <pre>{@code
755   * CharMatcher.inRange('\0', ' ').trimFrom(str)
756   * }</pre>
757   *
758   * ... is equivalent to {@link String#trim()}.
759   */
760  public String trimFrom(CharSequence sequence) {
761    int len = sequence.length();
762    int first;
763    int last;
764
765    for (first = 0; first < len; first++) {
766      if (!matches(sequence.charAt(first))) {
767        break;
768      }
769    }
770    for (last = len - 1; last > first; last--) {
771      if (!matches(sequence.charAt(last))) {
772        break;
773      }
774    }
775
776    return sequence.subSequence(first, last + 1).toString();
777  }
778
779  /**
780   * Returns a substring of the input character sequence that omits all matching BMP characters from
781   * the beginning of the string. For example:
782   *
783   * <pre>{@code
784   * CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")
785   * }</pre>
786   *
787   * ... returns {@code "catbab"}.
788   */
789  public String trimLeadingFrom(CharSequence sequence) {
790    int len = sequence.length();
791    for (int first = 0; first < len; first++) {
792      if (!matches(sequence.charAt(first))) {
793        return sequence.subSequence(first, len).toString();
794      }
795    }
796    return "";
797  }
798
799  /**
800   * Returns a substring of the input character sequence that omits all matching BMP characters from
801   * the end of the string. For example:
802   *
803   * <pre>{@code
804   * CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")
805   * }</pre>
806   *
807   * ... returns {@code "abacat"}.
808   */
809  public String trimTrailingFrom(CharSequence sequence) {
810    int len = sequence.length();
811    for (int last = len - 1; last >= 0; last--) {
812      if (!matches(sequence.charAt(last))) {
813        return sequence.subSequence(0, last + 1).toString();
814      }
815    }
816    return "";
817  }
818
819  /**
820   * Returns a string copy of the input character sequence, with each group of consecutive matching
821   * BMP characters replaced by a single replacement character. For example:
822   *
823   * <pre>{@code
824   * CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')
825   * }</pre>
826   *
827   * ... returns {@code "b-p-r"}.
828   *
829   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
830   * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
831   * character.
832   *
833   * @param sequence the character sequence to replace matching groups of characters in
834   * @param replacement the character to append to the result string in place of each group of
835   *     matching characters in {@code sequence}
836   * @return the new string
837   */
838  public String collapseFrom(CharSequence sequence, char replacement) {
839    // This implementation avoids unnecessary allocation.
840    int len = sequence.length();
841    for (int i = 0; i < len; i++) {
842      char c = sequence.charAt(i);
843      if (matches(c)) {
844        if (c == replacement && (i == len - 1 || !matches(sequence.charAt(i + 1)))) {
845          // a no-op replacement
846          i++;
847        } else {
848          StringBuilder builder = new StringBuilder(len).append(sequence, 0, i).append(replacement);
849          return finishCollapseFrom(sequence, i + 1, len, replacement, builder, true);
850        }
851      }
852    }
853    // no replacement needed
854    return sequence.toString();
855  }
856
857  /**
858   * Collapses groups of matching characters exactly as {@link #collapseFrom} does, except that
859   * groups of matching BMP characters at the start or end of the sequence are removed without
860   * replacement.
861   */
862  public String trimAndCollapseFrom(CharSequence sequence, char replacement) {
863    // This implementation avoids unnecessary allocation.
864    int len = sequence.length();
865    int first = 0;
866    int last = len - 1;
867
868    while (first < len && matches(sequence.charAt(first))) {
869      first++;
870    }
871
872    while (last > first && matches(sequence.charAt(last))) {
873      last--;
874    }
875
876    return (first == 0 && last == len - 1)
877        ? collapseFrom(sequence, replacement)
878        : finishCollapseFrom(
879            sequence, first, last + 1, replacement, new StringBuilder(last + 1 - first), false);
880  }
881
882  private String finishCollapseFrom(
883      CharSequence sequence,
884      int start,
885      int end,
886      char replacement,
887      StringBuilder builder,
888      boolean inMatchingGroup) {
889    for (int i = start; i < end; i++) {
890      char c = sequence.charAt(i);
891      if (matches(c)) {
892        if (!inMatchingGroup) {
893          builder.append(replacement);
894          inMatchingGroup = true;
895        }
896      } else {
897        builder.append(c);
898        inMatchingGroup = false;
899      }
900    }
901    return builder.toString();
902  }
903
904  /**
905   * @deprecated Provided only to satisfy the {@link Predicate} interface; use {@link #matches}
906   *     instead.
907   */
908  @Deprecated
909  @Override
910  public boolean apply(Character character) {
911    return matches(character);
912  }
913
914  /**
915   * Returns a string representation of this {@code CharMatcher}, such as {@code
916   * CharMatcher.or(WHITESPACE, JAVA_DIGIT)}.
917   */
918  @Override
919  public String toString() {
920    return super.toString();
921  }
922
923  /**
924   * Returns the Java Unicode escape sequence for the given {@code char}, in the form "\u12AB" where
925   * "12AB" is the four hexadecimal digits representing the 16-bit code unit.
926   */
927  private static String showCharacter(char c) {
928    String hex = "0123456789ABCDEF";
929    char[] tmp = {'\\', 'u', '\0', '\0', '\0', '\0'};
930    for (int i = 0; i < 4; i++) {
931      tmp[5 - i] = hex.charAt(c & 0xF);
932      c = (char) (c >> 4);
933    }
934    return String.copyValueOf(tmp);
935  }
936
937  // Fast matchers
938
939  /** A matcher for which precomputation will not yield any significant benefit. */
940  abstract static class FastMatcher extends CharMatcher {
941
942    @Override
943    public final CharMatcher precomputed() {
944      return this;
945    }
946
947    @Override
948    public CharMatcher negate() {
949      return new NegatedFastMatcher(this);
950    }
951  }
952
953  /** {@link FastMatcher} which overrides {@code toString()} with a custom name. */
954  abstract static class NamedFastMatcher extends FastMatcher {
955
956    private final String description;
957
958    NamedFastMatcher(String description) {
959      this.description = checkNotNull(description);
960    }
961
962    @Override
963    public final String toString() {
964      return description;
965    }
966  }
967
968  /** Negation of a {@link FastMatcher}. */
969  private static class NegatedFastMatcher extends Negated {
970
971    NegatedFastMatcher(CharMatcher original) {
972      super(original);
973    }
974
975    @Override
976    public final CharMatcher precomputed() {
977      return this;
978    }
979  }
980
981  /** Fast matcher using a {@link BitSet} table of matching characters. */
982  @GwtIncompatible // used only from other GwtIncompatible code
983  private static final class BitSetMatcher extends NamedFastMatcher {
984
985    private final BitSet table;
986
987    private BitSetMatcher(BitSet table, String description) {
988      super(description);
989      if (table.length() + Long.SIZE < table.size()) {
990        table = (BitSet) table.clone();
991        // If only we could actually call BitSet.trimToSize() ourselves...
992      }
993      this.table = table;
994    }
995
996    @Override
997    public boolean matches(char c) {
998      return table.get(c);
999    }
1000
1001    @Override
1002    void setBits(BitSet bitSet) {
1003      bitSet.or(table);
1004    }
1005  }
1006
1007  // Static constant implementation classes
1008
1009  /** Implementation of {@link #any()}. */
1010  private static final class Any extends NamedFastMatcher {
1011
1012    static final CharMatcher INSTANCE = new Any();
1013
1014    private Any() {
1015      super("CharMatcher.any()");
1016    }
1017
1018    @Override
1019    public boolean matches(char c) {
1020      return true;
1021    }
1022
1023    @Override
1024    public int indexIn(CharSequence sequence) {
1025      return (sequence.length() == 0) ? -1 : 0;
1026    }
1027
1028    @Override
1029    public int indexIn(CharSequence sequence, int start) {
1030      int length = sequence.length();
1031      checkPositionIndex(start, length);
1032      return (start == length) ? -1 : start;
1033    }
1034
1035    @Override
1036    public int lastIndexIn(CharSequence sequence) {
1037      return sequence.length() - 1;
1038    }
1039
1040    @Override
1041    public boolean matchesAllOf(CharSequence sequence) {
1042      checkNotNull(sequence);
1043      return true;
1044    }
1045
1046    @Override
1047    public boolean matchesNoneOf(CharSequence sequence) {
1048      return sequence.length() == 0;
1049    }
1050
1051    @Override
1052    public String removeFrom(CharSequence sequence) {
1053      checkNotNull(sequence);
1054      return "";
1055    }
1056
1057    @Override
1058    public String replaceFrom(CharSequence sequence, char replacement) {
1059      char[] array = new char[sequence.length()];
1060      Arrays.fill(array, replacement);
1061      return new String(array);
1062    }
1063
1064    @Override
1065    public String replaceFrom(CharSequence sequence, CharSequence replacement) {
1066      StringBuilder result = new StringBuilder(sequence.length() * replacement.length());
1067      for (int i = 0; i < sequence.length(); i++) {
1068        result.append(replacement);
1069      }
1070      return result.toString();
1071    }
1072
1073    @Override
1074    public String collapseFrom(CharSequence sequence, char replacement) {
1075      return (sequence.length() == 0) ? "" : String.valueOf(replacement);
1076    }
1077
1078    @Override
1079    public String trimFrom(CharSequence sequence) {
1080      checkNotNull(sequence);
1081      return "";
1082    }
1083
1084    @Override
1085    public int countIn(CharSequence sequence) {
1086      return sequence.length();
1087    }
1088
1089    @Override
1090    public CharMatcher and(CharMatcher other) {
1091      return checkNotNull(other);
1092    }
1093
1094    @Override
1095    public CharMatcher or(CharMatcher other) {
1096      checkNotNull(other);
1097      return this;
1098    }
1099
1100    @Override
1101    public CharMatcher negate() {
1102      return none();
1103    }
1104  }
1105
1106  /** Implementation of {@link #none()}. */
1107  private static final class None extends NamedFastMatcher {
1108
1109    static final CharMatcher INSTANCE = new None();
1110
1111    private None() {
1112      super("CharMatcher.none()");
1113    }
1114
1115    @Override
1116    public boolean matches(char c) {
1117      return false;
1118    }
1119
1120    @Override
1121    public int indexIn(CharSequence sequence) {
1122      checkNotNull(sequence);
1123      return -1;
1124    }
1125
1126    @Override
1127    public int indexIn(CharSequence sequence, int start) {
1128      int length = sequence.length();
1129      checkPositionIndex(start, length);
1130      return -1;
1131    }
1132
1133    @Override
1134    public int lastIndexIn(CharSequence sequence) {
1135      checkNotNull(sequence);
1136      return -1;
1137    }
1138
1139    @Override
1140    public boolean matchesAllOf(CharSequence sequence) {
1141      return sequence.length() == 0;
1142    }
1143
1144    @Override
1145    public boolean matchesNoneOf(CharSequence sequence) {
1146      checkNotNull(sequence);
1147      return true;
1148    }
1149
1150    @Override
1151    public String removeFrom(CharSequence sequence) {
1152      return sequence.toString();
1153    }
1154
1155    @Override
1156    public String replaceFrom(CharSequence sequence, char replacement) {
1157      return sequence.toString();
1158    }
1159
1160    @Override
1161    public String replaceFrom(CharSequence sequence, CharSequence replacement) {
1162      checkNotNull(replacement);
1163      return sequence.toString();
1164    }
1165
1166    @Override
1167    public String collapseFrom(CharSequence sequence, char replacement) {
1168      return sequence.toString();
1169    }
1170
1171    @Override
1172    public String trimFrom(CharSequence sequence) {
1173      return sequence.toString();
1174    }
1175
1176    @Override
1177    public String trimLeadingFrom(CharSequence sequence) {
1178      return sequence.toString();
1179    }
1180
1181    @Override
1182    public String trimTrailingFrom(CharSequence sequence) {
1183      return sequence.toString();
1184    }
1185
1186    @Override
1187    public int countIn(CharSequence sequence) {
1188      checkNotNull(sequence);
1189      return 0;
1190    }
1191
1192    @Override
1193    public CharMatcher and(CharMatcher other) {
1194      checkNotNull(other);
1195      return this;
1196    }
1197
1198    @Override
1199    public CharMatcher or(CharMatcher other) {
1200      return checkNotNull(other);
1201    }
1202
1203    @Override
1204    public CharMatcher negate() {
1205      return any();
1206    }
1207  }
1208
1209  /** Implementation of {@link #whitespace()}. */
1210  @VisibleForTesting
1211  static final class Whitespace extends NamedFastMatcher {
1212
1213    // TABLE is a precomputed hashset of whitespace characters. MULTIPLIER serves as a hash function
1214    // whose key property is that it maps 25 characters into the 32-slot table without collision.
1215    // Basically this is an opportunistic fast implementation as opposed to "good code". For most
1216    // other use-cases, the reduction in readability isn't worth it.
1217    static final String TABLE =
1218        "\u2002\u3000\r\u0085\u200A\u2005\u2000\u3000"
1219            + "\u2029\u000B\u3000\u2008\u2003\u205F\u3000\u1680"
1220            + "\u0009\u0020\u2006\u2001\u202F\u00A0\u000C\u2009"
1221            + "\u3000\u2004\u3000\u3000\u2028\n\u2007\u3000";
1222    static final int MULTIPLIER = 1682554634;
1223    static final int SHIFT = Integer.numberOfLeadingZeros(TABLE.length() - 1);
1224
1225    static final CharMatcher INSTANCE = new Whitespace();
1226
1227    Whitespace() {
1228      super("CharMatcher.whitespace()");
1229    }
1230
1231    @Override
1232    public boolean matches(char c) {
1233      return TABLE.charAt((MULTIPLIER * c) >>> SHIFT) == c;
1234    }
1235
1236    @GwtIncompatible // used only from other GwtIncompatible code
1237    @Override
1238    void setBits(BitSet table) {
1239      for (int i = 0; i < TABLE.length(); i++) {
1240        table.set(TABLE.charAt(i));
1241      }
1242    }
1243  }
1244
1245  /** Implementation of {@link #breakingWhitespace()}. */
1246  private static final class BreakingWhitespace extends CharMatcher {
1247
1248    static final CharMatcher INSTANCE = new BreakingWhitespace();
1249
1250    @Override
1251    public boolean matches(char c) {
1252      switch (c) {
1253        case '\t':
1254        case '\n':
1255        case '\013':
1256        case '\f':
1257        case '\r':
1258        case ' ':
1259        case '\u0085':
1260        case '\u1680':
1261        case '\u2028':
1262        case '\u2029':
1263        case '\u205f':
1264        case '\u3000':
1265          return true;
1266        case '\u2007':
1267          return false;
1268        default:
1269          return c >= '\u2000' && c <= '\u200a';
1270      }
1271    }
1272
1273    @Override
1274    public String toString() {
1275      return "CharMatcher.breakingWhitespace()";
1276    }
1277  }
1278
1279  /** Implementation of {@link #ascii()}. */
1280  private static final class Ascii extends NamedFastMatcher {
1281
1282    static final CharMatcher INSTANCE = new Ascii();
1283
1284    Ascii() {
1285      super("CharMatcher.ascii()");
1286    }
1287
1288    @Override
1289    public boolean matches(char c) {
1290      return c <= '\u007f';
1291    }
1292  }
1293
1294  /** Implementation that matches characters that fall within multiple ranges. */
1295  private static class RangesMatcher extends CharMatcher {
1296
1297    private final String description;
1298    private final char[] rangeStarts;
1299    private final char[] rangeEnds;
1300
1301    RangesMatcher(String description, char[] rangeStarts, char[] rangeEnds) {
1302      this.description = description;
1303      this.rangeStarts = rangeStarts;
1304      this.rangeEnds = rangeEnds;
1305      checkArgument(rangeStarts.length == rangeEnds.length);
1306      for (int i = 0; i < rangeStarts.length; i++) {
1307        checkArgument(rangeStarts[i] <= rangeEnds[i]);
1308        if (i + 1 < rangeStarts.length) {
1309          checkArgument(rangeEnds[i] < rangeStarts[i + 1]);
1310        }
1311      }
1312    }
1313
1314    @Override
1315    public boolean matches(char c) {
1316      int index = Arrays.binarySearch(rangeStarts, c);
1317      if (index >= 0) {
1318        return true;
1319      } else {
1320        index = ~index - 1;
1321        return index >= 0 && c <= rangeEnds[index];
1322      }
1323    }
1324
1325    @Override
1326    public String toString() {
1327      return description;
1328    }
1329  }
1330
1331  /** Implementation of {@link #digit()}. */
1332  private static final class Digit extends RangesMatcher {
1333    // Plug the following UnicodeSet pattern into
1334    // https://unicode.org/cldr/utility/list-unicodeset.jsp
1335    // [[:Nd:]&[:nv=0:]&[\u0000-\uFFFF]]
1336    // and get the zeroes from there.
1337
1338    // Must be in ascending order.
1339    private static final String ZEROES =
1340        "0\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6\u0c66\u0ce6\u0d66\u0de6"
1341            + "\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946\u19d0\u1a80\u1a90\u1b50\u1bb0"
1342            + "\u1c40\u1c50\ua620\ua8d0\ua900\ua9d0\ua9f0\uaa50\uabf0\uff10";
1343
1344    private static char[] zeroes() {
1345      return ZEROES.toCharArray();
1346    }
1347
1348    private static char[] nines() {
1349      char[] nines = new char[ZEROES.length()];
1350      for (int i = 0; i < ZEROES.length(); i++) {
1351        nines[i] = (char) (ZEROES.charAt(i) + 9);
1352      }
1353      return nines;
1354    }
1355
1356    static final CharMatcher INSTANCE = new Digit();
1357
1358    private Digit() {
1359      super("CharMatcher.digit()", zeroes(), nines());
1360    }
1361  }
1362
1363  /** Implementation of {@link #javaDigit()}. */
1364  private static final class JavaDigit extends CharMatcher {
1365
1366    static final CharMatcher INSTANCE = new JavaDigit();
1367
1368    @Override
1369    public boolean matches(char c) {
1370      return Character.isDigit(c);
1371    }
1372
1373    @Override
1374    public String toString() {
1375      return "CharMatcher.javaDigit()";
1376    }
1377  }
1378
1379  /** Implementation of {@link #javaLetter()}. */
1380  private static final class JavaLetter extends CharMatcher {
1381
1382    static final CharMatcher INSTANCE = new JavaLetter();
1383
1384    @Override
1385    public boolean matches(char c) {
1386      return Character.isLetter(c);
1387    }
1388
1389    @Override
1390    public String toString() {
1391      return "CharMatcher.javaLetter()";
1392    }
1393  }
1394
1395  /** Implementation of {@link #javaLetterOrDigit()}. */
1396  private static final class JavaLetterOrDigit extends CharMatcher {
1397
1398    static final CharMatcher INSTANCE = new JavaLetterOrDigit();
1399
1400    @Override
1401    public boolean matches(char c) {
1402      return Character.isLetterOrDigit(c);
1403    }
1404
1405    @Override
1406    public String toString() {
1407      return "CharMatcher.javaLetterOrDigit()";
1408    }
1409  }
1410
1411  /** Implementation of {@link #javaUpperCase()}. */
1412  private static final class JavaUpperCase extends CharMatcher {
1413
1414    static final CharMatcher INSTANCE = new JavaUpperCase();
1415
1416    @Override
1417    public boolean matches(char c) {
1418      return Character.isUpperCase(c);
1419    }
1420
1421    @Override
1422    public String toString() {
1423      return "CharMatcher.javaUpperCase()";
1424    }
1425  }
1426
1427  /** Implementation of {@link #javaLowerCase()}. */
1428  private static final class JavaLowerCase extends CharMatcher {
1429
1430    static final CharMatcher INSTANCE = new JavaLowerCase();
1431
1432    @Override
1433    public boolean matches(char c) {
1434      return Character.isLowerCase(c);
1435    }
1436
1437    @Override
1438    public String toString() {
1439      return "CharMatcher.javaLowerCase()";
1440    }
1441  }
1442
1443  /** Implementation of {@link #javaIsoControl()}. */
1444  private static final class JavaIsoControl extends NamedFastMatcher {
1445
1446    static final CharMatcher INSTANCE = new JavaIsoControl();
1447
1448    private JavaIsoControl() {
1449      super("CharMatcher.javaIsoControl()");
1450    }
1451
1452    @Override
1453    public boolean matches(char c) {
1454      return c <= '\u001f' || (c >= '\u007f' && c <= '\u009f');
1455    }
1456  }
1457
1458  /** Implementation of {@link #invisible()}. */
1459  private static final class Invisible extends RangesMatcher {
1460    // Plug the following UnicodeSet pattern into
1461    // https://unicode.org/cldr/utility/list-unicodeset.jsp
1462    // [[[:Zs:][:Zl:][:Zp:][:Cc:][:Cf:][:Cs:][:Co:]]&[\u0000-\uFFFF]]
1463    // with the "Abbreviate" option, and get the ranges from there.
1464    private static final String RANGE_STARTS =
1465        "\u0000\u007f\u00ad\u0600\u061c\u06dd\u070f\u0890\u08e2\u1680\u180e\u2000\u2028\u205f\u2066"
1466            + "\u3000\ud800\ufeff\ufff9";
1467    private static final String RANGE_ENDS = // inclusive ends
1468        "\u0020\u00a0\u00ad\u0605\u061c\u06dd\u070f\u0891\u08e2\u1680\u180e\u200f\u202f\u2064\u206f"
1469            + "\u3000\uf8ff\ufeff\ufffb";
1470
1471    static final CharMatcher INSTANCE = new Invisible();
1472
1473    private Invisible() {
1474      super("CharMatcher.invisible()", RANGE_STARTS.toCharArray(), RANGE_ENDS.toCharArray());
1475    }
1476  }
1477
1478  /** Implementation of {@link #singleWidth()}. */
1479  private static final class SingleWidth extends RangesMatcher {
1480
1481    static final CharMatcher INSTANCE = new SingleWidth();
1482
1483    private SingleWidth() {
1484      super(
1485          "CharMatcher.singleWidth()",
1486          "\u0000\u05be\u05d0\u05f3\u0600\u0750\u0e00\u1e00\u2100\ufb50\ufe70\uff61".toCharArray(),
1487          "\u04f9\u05be\u05ea\u05f4\u06ff\u077f\u0e7f\u20af\u213a\ufdff\ufeff\uffdc".toCharArray());
1488    }
1489  }
1490
1491  // Non-static factory implementation classes
1492
1493  /** Implementation of {@link #negate()}. */
1494  private static class Negated extends CharMatcher {
1495
1496    final CharMatcher original;
1497
1498    Negated(CharMatcher original) {
1499      this.original = checkNotNull(original);
1500    }
1501
1502    @Override
1503    public boolean matches(char c) {
1504      return !original.matches(c);
1505    }
1506
1507    @Override
1508    public boolean matchesAllOf(CharSequence sequence) {
1509      return original.matchesNoneOf(sequence);
1510    }
1511
1512    @Override
1513    public boolean matchesNoneOf(CharSequence sequence) {
1514      return original.matchesAllOf(sequence);
1515    }
1516
1517    @Override
1518    public int countIn(CharSequence sequence) {
1519      return sequence.length() - original.countIn(sequence);
1520    }
1521
1522    @GwtIncompatible // used only from other GwtIncompatible code
1523    @Override
1524    void setBits(BitSet table) {
1525      BitSet tmp = new BitSet();
1526      original.setBits(tmp);
1527      tmp.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1);
1528      table.or(tmp);
1529    }
1530
1531    @Override
1532    public CharMatcher negate() {
1533      return original;
1534    }
1535
1536    @Override
1537    public String toString() {
1538      return original + ".negate()";
1539    }
1540  }
1541
1542  /** Implementation of {@link #and(CharMatcher)}. */
1543  private static final class And extends CharMatcher {
1544
1545    final CharMatcher first;
1546    final CharMatcher second;
1547
1548    And(CharMatcher a, CharMatcher b) {
1549      first = checkNotNull(a);
1550      second = checkNotNull(b);
1551    }
1552
1553    @Override
1554    public boolean matches(char c) {
1555      return first.matches(c) && second.matches(c);
1556    }
1557
1558    @GwtIncompatible // used only from other GwtIncompatible code
1559    @Override
1560    void setBits(BitSet table) {
1561      BitSet tmp1 = new BitSet();
1562      first.setBits(tmp1);
1563      BitSet tmp2 = new BitSet();
1564      second.setBits(tmp2);
1565      tmp1.and(tmp2);
1566      table.or(tmp1);
1567    }
1568
1569    @Override
1570    public String toString() {
1571      return "CharMatcher.and(" + first + ", " + second + ")";
1572    }
1573  }
1574
1575  /** Implementation of {@link #or(CharMatcher)}. */
1576  private static final class Or extends CharMatcher {
1577
1578    final CharMatcher first;
1579    final CharMatcher second;
1580
1581    Or(CharMatcher a, CharMatcher b) {
1582      first = checkNotNull(a);
1583      second = checkNotNull(b);
1584    }
1585
1586    @GwtIncompatible // used only from other GwtIncompatible code
1587    @Override
1588    void setBits(BitSet table) {
1589      first.setBits(table);
1590      second.setBits(table);
1591    }
1592
1593    @Override
1594    public boolean matches(char c) {
1595      return first.matches(c) || second.matches(c);
1596    }
1597
1598    @Override
1599    public String toString() {
1600      return "CharMatcher.or(" + first + ", " + second + ")";
1601    }
1602  }
1603
1604  // Static factory implementations
1605
1606  /** Implementation of {@link #is(char)}. */
1607  private static final class Is extends FastMatcher {
1608
1609    private final char match;
1610
1611    Is(char match) {
1612      this.match = match;
1613    }
1614
1615    @Override
1616    public boolean matches(char c) {
1617      return c == match;
1618    }
1619
1620    @Override
1621    public String replaceFrom(CharSequence sequence, char replacement) {
1622      return sequence.toString().replace(match, replacement);
1623    }
1624
1625    @Override
1626    public CharMatcher and(CharMatcher other) {
1627      return other.matches(match) ? this : none();
1628    }
1629
1630    @Override
1631    public CharMatcher or(CharMatcher other) {
1632      return other.matches(match) ? other : super.or(other);
1633    }
1634
1635    @Override
1636    public CharMatcher negate() {
1637      return isNot(match);
1638    }
1639
1640    @GwtIncompatible // used only from other GwtIncompatible code
1641    @Override
1642    void setBits(BitSet table) {
1643      table.set(match);
1644    }
1645
1646    @Override
1647    public String toString() {
1648      return "CharMatcher.is('" + showCharacter(match) + "')";
1649    }
1650  }
1651
1652  /** Implementation of {@link #isNot(char)}. */
1653  private static final class IsNot extends FastMatcher {
1654
1655    private final char match;
1656
1657    IsNot(char match) {
1658      this.match = match;
1659    }
1660
1661    @Override
1662    public boolean matches(char c) {
1663      return c != match;
1664    }
1665
1666    @Override
1667    public CharMatcher and(CharMatcher other) {
1668      return other.matches(match) ? super.and(other) : other;
1669    }
1670
1671    @Override
1672    public CharMatcher or(CharMatcher other) {
1673      return other.matches(match) ? any() : this;
1674    }
1675
1676    @GwtIncompatible // used only from other GwtIncompatible code
1677    @Override
1678    void setBits(BitSet table) {
1679      table.set(0, match);
1680      table.set(match + 1, Character.MAX_VALUE + 1);
1681    }
1682
1683    @Override
1684    public CharMatcher negate() {
1685      return is(match);
1686    }
1687
1688    @Override
1689    public String toString() {
1690      return "CharMatcher.isNot('" + showCharacter(match) + "')";
1691    }
1692  }
1693
1694  private static CharMatcher.IsEither isEither(char c1, char c2) {
1695    return new CharMatcher.IsEither(c1, c2);
1696  }
1697
1698  /** Implementation of {@link #anyOf(CharSequence)} for exactly two characters. */
1699  private static final class IsEither extends FastMatcher {
1700
1701    private final char match1;
1702    private final char match2;
1703
1704    IsEither(char match1, char match2) {
1705      this.match1 = match1;
1706      this.match2 = match2;
1707    }
1708
1709    @Override
1710    public boolean matches(char c) {
1711      return c == match1 || c == match2;
1712    }
1713
1714    @GwtIncompatible // used only from other GwtIncompatible code
1715    @Override
1716    void setBits(BitSet table) {
1717      table.set(match1);
1718      table.set(match2);
1719    }
1720
1721    @Override
1722    public String toString() {
1723      return "CharMatcher.anyOf(\"" + showCharacter(match1) + showCharacter(match2) + "\")";
1724    }
1725  }
1726
1727  /** Implementation of {@link #anyOf(CharSequence)} for three or more characters. */
1728  private static final class AnyOf extends CharMatcher {
1729
1730    private final char[] chars;
1731
1732    public AnyOf(CharSequence chars) {
1733      this.chars = chars.toString().toCharArray();
1734      Arrays.sort(this.chars);
1735    }
1736
1737    @Override
1738    public boolean matches(char c) {
1739      return Arrays.binarySearch(chars, c) >= 0;
1740    }
1741
1742    @Override
1743    @GwtIncompatible // used only from other GwtIncompatible code
1744    void setBits(BitSet table) {
1745      for (char c : chars) {
1746        table.set(c);
1747      }
1748    }
1749
1750    @Override
1751    public String toString() {
1752      StringBuilder description = new StringBuilder("CharMatcher.anyOf(\"");
1753      for (char c : chars) {
1754        description.append(showCharacter(c));
1755      }
1756      description.append("\")");
1757      return description.toString();
1758    }
1759  }
1760
1761  /** Implementation of {@link #inRange(char, char)}. */
1762  private static final class InRange extends FastMatcher {
1763
1764    private final char startInclusive;
1765    private final char endInclusive;
1766
1767    InRange(char startInclusive, char endInclusive) {
1768      checkArgument(endInclusive >= startInclusive);
1769      this.startInclusive = startInclusive;
1770      this.endInclusive = endInclusive;
1771    }
1772
1773    @Override
1774    public boolean matches(char c) {
1775      return startInclusive <= c && c <= endInclusive;
1776    }
1777
1778    @GwtIncompatible // used only from other GwtIncompatible code
1779    @Override
1780    void setBits(BitSet table) {
1781      table.set(startInclusive, endInclusive + 1);
1782    }
1783
1784    @Override
1785    public String toString() {
1786      return "CharMatcher.inRange('"
1787          + showCharacter(startInclusive)
1788          + "', '"
1789          + showCharacter(endInclusive)
1790          + "')";
1791    }
1792  }
1793
1794  /** Implementation of {@link #forPredicate(Predicate)}. */
1795  private static final class ForPredicate extends CharMatcher {
1796
1797    private final Predicate<? super Character> predicate;
1798
1799    ForPredicate(Predicate<? super Character> predicate) {
1800      this.predicate = checkNotNull(predicate);
1801    }
1802
1803    @Override
1804    public boolean matches(char c) {
1805      return predicate.apply(c);
1806    }
1807
1808    @SuppressWarnings("deprecation") // intentional; deprecation is for callers primarily
1809    @Override
1810    public boolean apply(Character character) {
1811      return predicate.apply(checkNotNull(character));
1812    }
1813
1814    @Override
1815    public String toString() {
1816      return "CharMatcher.forPredicate(" + predicate + ")";
1817    }
1818  }
1819}