001    /*
002     * Copyright (C) 2008 The Guava Authors
003     *
004     * Licensed under the Apache License, Version 2.0 (the "License");
005     * you may not use this file except in compliance with the License.
006     * You may obtain a copy of the License at
007     *
008     * http://www.apache.org/licenses/LICENSE-2.0
009     *
010     * Unless required by applicable law or agreed to in writing, software
011     * distributed under the License is distributed on an "AS IS" BASIS,
012     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013     * See the License for the specific language governing permissions and
014     * limitations under the License.
015     */
016    
017    package com.google.common.base;
018    
019    import static com.google.common.base.Preconditions.checkArgument;
020    import static com.google.common.base.Preconditions.checkNotNull;
021    
022    import com.google.common.annotations.Beta;
023    import com.google.common.annotations.GwtCompatible;
024    
025    import java.util.ArrayList;
026    import java.util.Arrays;
027    import java.util.List;
028    
029    /**
030     * Determines a true or false value for any Java {@code char} value, just as {@link Predicate} does
031     * for any {@link Object}. Also offers basic text processing methods based on this function.
032     * Implementations are strongly encouraged to be side-effect-free and immutable.
033     *
034     * <p>Throughout the documentation of this class, the phrase "matching character" is used to mean
035     * "any character {@code c} for which {@code this.matches(c)} returns {@code true}".
036     *
037     * <p><b>Note:</b> This class deals only with {@code char} values; it does not understand
038     * supplementary Unicode code points in the range {@code 0x10000} to {@code 0x10FFFF}. Such logical
039     * characters are encoded into a {@code String} using surrogate pairs, and a {@code CharMatcher}
040     * treats these just as two separate characters.
041     *
042     * @author Kevin Bourrillion
043     * @since 1
044     */
045    @Beta // Possibly change from chars to code points; decide constants vs. methods
046    @GwtCompatible
047    public abstract class CharMatcher implements Predicate<Character> {
048      // Constants
049    
050      // Excludes 2000-2000a, which is handled as a range
051      private static final String BREAKING_WHITESPACE_CHARS =
052          "\t\n\013\f\r \u0085\u1680\u2028\u2029\u205f\u3000";
053    
054      // Excludes 2007, which is handled as a gap in a pair of ranges
055      private static final String NON_BREAKING_WHITESPACE_CHARS =
056          "\u00a0\u180e\u202f";
057    
058      /**
059       * Determines whether a character is whitespace according to the latest Unicode standard, as
060       * illustrated
061       * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
062       * This is not the same definition used by other Java APIs. (See a
063       * <a href="http://spreadsheets.google.com/pub?key=pd8dAQyHbdewRsnE5x5GzKQ">comparison of several
064       * definitions of "whitespace"</a>.)
065       *
066       * <p><b>Note:</b> as the Unicode definition evolves, we will modify this constant to keep it up
067       * to date.
068       */
069      public static final CharMatcher WHITESPACE =
070          anyOf(BREAKING_WHITESPACE_CHARS + NON_BREAKING_WHITESPACE_CHARS)
071              .or(inRange('\u2000', '\u200a'))
072              .precomputed();
073    
074      /**
075       * Determines whether a character is a breaking whitespace (that is, a whitespace which can be
076       * interpreted as a break between words for formatting purposes). See {@link #WHITESPACE} for a
077       * discussion of that term.
078       *
079       * @since 2
080       */
081      public static final CharMatcher BREAKING_WHITESPACE =
082          anyOf(BREAKING_WHITESPACE_CHARS)
083              .or(inRange('\u2000', '\u2006'))
084              .or(inRange('\u2008', '\u200a'))
085              .precomputed();
086    
087      /**
088       * Determines whether a character is ASCII, meaning that its code point is less than 128.
089       */
090      public static final CharMatcher ASCII = inRange('\0', '\u007f');
091    
092      /**
093       * Determines whether a character is a digit according to
094       * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>.
095       */
096      public static final CharMatcher DIGIT;
097    
098      static {
099        CharMatcher digit = inRange('0', '9');
100        String zeroes =
101            "\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6\u0c66"
102                + "\u0ce6\u0d66\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946"
103                + "\u19d0\u1b50\u1bb0\u1c40\u1c50\ua620\ua8d0\ua900\uaa50\uff10";
104        for (char base : zeroes.toCharArray()) {
105          digit = digit.or(inRange(base, (char) (base + 9)));
106        }
107        DIGIT = digit.precomputed();
108      }
109    
110      /**
111       * Determines whether a character is whitespace according to {@link Character#isWhitespace(char)
112       * Java's definition}; it is usually preferable to use {@link #WHITESPACE}. (See a
113       * <a href="http://spreadsheets.google.com/pub?key=pd8dAQyHbdewRsnE5x5GzKQ">comparison of several
114       * definitions of "whitespace"</a>.)
115       */
116      public static final CharMatcher JAVA_WHITESPACE =
117          inRange('\u0009', (char) 13)  // \\u000d doesn't work as a char literal
118              .or(inRange('\u001c', '\u0020'))
119              .or(is('\u1680'))
120              .or(is('\u180e'))
121              .or(inRange('\u2000', '\u2006'))
122              .or(inRange('\u2008', '\u200b'))
123              .or(inRange('\u2028', '\u2029'))
124              .or(is('\u205f'))
125              .or(is('\u3000'))
126              .precomputed();
127    
128      /**
129       * Determines whether a character is a digit according to {@link Character#isDigit(char) Java's
130       * definition}. If you only care to match ASCII digits, you can use {@code inRange('0', '9')}.
131       */
132      public static final CharMatcher JAVA_DIGIT = new CharMatcher() {
133        @Override public boolean matches(char c) {
134          return Character.isDigit(c);
135        }
136      };
137    
138      /**
139       * Determines whether a character is a letter according to {@link Character#isLetter(char) Java's
140       * definition}. If you only care to match letters of the Latin alphabet, you can use {@code
141       * inRange('a', 'z').or(inRange('A', 'Z'))}.
142       */
143      public static final CharMatcher JAVA_LETTER = new CharMatcher() {
144        @Override public boolean matches(char c) {
145          return Character.isLetter(c);
146        }
147      };
148    
149      /**
150       * Determines whether a character is a letter or digit according to {@link
151       * Character#isLetterOrDigit(char) Java's definition}.
152       */
153      public static final CharMatcher JAVA_LETTER_OR_DIGIT = new CharMatcher() {
154        @Override public boolean matches(char c) {
155          return Character.isLetterOrDigit(c);
156        }
157      };
158    
159      /**
160       * Determines whether a character is upper case according to {@link Character#isUpperCase(char)
161       * Java's definition}.
162       */
163      public static final CharMatcher JAVA_UPPER_CASE = new CharMatcher() {
164        @Override public boolean matches(char c) {
165          return Character.isUpperCase(c);
166        }
167      };
168    
169      /**
170       * Determines whether a character is lower case according to {@link Character#isLowerCase(char)
171       * Java's definition}.
172       */
173      public static final CharMatcher JAVA_LOWER_CASE = new CharMatcher() {
174        @Override public boolean matches(char c) {
175          return Character.isLowerCase(c);
176        }
177      };
178    
179      /**
180       * Determines whether a character is an ISO control character as specified by {@link
181       * Character#isISOControl(char)}.
182       */
183      public static final CharMatcher JAVA_ISO_CONTROL =
184          inRange('\u0000', '\u001f').or(inRange('\u007f', '\u009f'));
185    
186      /**
187       * Determines whether a character is invisible; that is, if its Unicode category is any of
188       * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and
189       * PRIVATE_USE according to ICU4J.
190       */
191      public static final CharMatcher INVISIBLE = inRange('\u0000', '\u0020')
192          .or(inRange('\u007f', '\u00a0'))
193          .or(is('\u00ad'))
194          .or(inRange('\u0600', '\u0603'))
195          .or(anyOf("\u06dd\u070f\u1680\u17b4\u17b5\u180e"))
196          .or(inRange('\u2000', '\u200f'))
197          .or(inRange('\u2028', '\u202f'))
198          .or(inRange('\u205f', '\u2064'))
199          .or(inRange('\u206a', '\u206f'))
200          .or(is('\u3000'))
201          .or(inRange('\ud800', '\uf8ff'))
202          .or(anyOf("\ufeff\ufff9\ufffa\ufffb"))
203          .precomputed();
204    
205      /**
206       * Determines whether a character is single-width (not double-width). When in doubt, this matcher
207       * errs on the side of returning {@code false} (that is, it tends to assume a character is
208       * double-width).
209       *
210       * <p><b>Note:</b> as the reference file evolves, we will modify this constant to keep it up to
211       * date.
212       */
213      public static final CharMatcher SINGLE_WIDTH = inRange('\u0000', '\u04f9')
214          .or(is('\u05be'))
215          .or(inRange('\u05d0', '\u05ea'))
216          .or(is('\u05f3'))
217          .or(is('\u05f4'))
218          .or(inRange('\u0600', '\u06ff'))
219          .or(inRange('\u0750', '\u077f'))
220          .or(inRange('\u0e00', '\u0e7f'))
221          .or(inRange('\u1e00', '\u20af'))
222          .or(inRange('\u2100', '\u213a'))
223          .or(inRange('\ufb50', '\ufdff'))
224          .or(inRange('\ufe70', '\ufeff'))
225          .or(inRange('\uff61', '\uffdc'))
226          .precomputed();
227    
228      /** Matches any character. */
229      public static final CharMatcher ANY =
230          new CharMatcher() {
231            @Override public boolean matches(char c) {
232              return true;
233            }
234    
235            @Override public int indexIn(CharSequence sequence) {
236              return (sequence.length() == 0) ? -1 : 0;
237            }
238    
239            @Override public int indexIn(CharSequence sequence, int start) {
240              int length = sequence.length();
241              Preconditions.checkPositionIndex(start, length);
242              return (start == length) ? -1 : start;
243            }
244    
245            @Override public int lastIndexIn(CharSequence sequence) {
246              return sequence.length() - 1;
247            }
248    
249            @Override public boolean matchesAllOf(CharSequence sequence) {
250              checkNotNull(sequence);
251              return true;
252            }
253    
254            @Override public boolean matchesNoneOf(CharSequence sequence) {
255              return sequence.length() == 0;
256            }
257    
258            @Override public String removeFrom(CharSequence sequence) {
259              checkNotNull(sequence);
260              return "";
261            }
262    
263            @Override public String replaceFrom(CharSequence sequence, char replacement) {
264              char[] array = new char[sequence.length()];
265              Arrays.fill(array, replacement);
266              return new String(array);
267            }
268    
269            @Override public String replaceFrom(CharSequence sequence, CharSequence replacement) {
270              StringBuilder retval = new StringBuilder(sequence.length() * replacement.length());
271              for (int i = 0; i < sequence.length(); i++) {
272                retval.append(replacement);
273              }
274              return retval.toString();
275            }
276    
277            @Override public String collapseFrom(CharSequence sequence, char replacement) {
278              return (sequence.length() == 0) ? "" : String.valueOf(replacement);
279            }
280    
281            @Override public String trimFrom(CharSequence sequence) {
282              checkNotNull(sequence);
283              return "";
284            }
285    
286            @Override public int countIn(CharSequence sequence) {
287              return sequence.length();
288            }
289    
290            @Override public CharMatcher and(CharMatcher other) {
291              return checkNotNull(other);
292            }
293    
294            @Override public CharMatcher or(CharMatcher other) {
295              checkNotNull(other);
296              return this;
297            }
298    
299            @Override public CharMatcher negate() {
300              return NONE;
301            }
302    
303            @Override public CharMatcher precomputed() {
304              return this;
305            }
306          };
307    
308      /** Matches no characters. */
309      public static final CharMatcher NONE =
310          new CharMatcher() {
311            @Override public boolean matches(char c) {
312              return false;
313            }
314    
315            @Override public int indexIn(CharSequence sequence) {
316              checkNotNull(sequence);
317              return -1;
318            }
319    
320            @Override public int indexIn(CharSequence sequence, int start) {
321              int length = sequence.length();
322              Preconditions.checkPositionIndex(start, length);
323              return -1;
324            }
325    
326            @Override public int lastIndexIn(CharSequence sequence) {
327              checkNotNull(sequence);
328              return -1;
329            }
330    
331            @Override public boolean matchesAllOf(CharSequence sequence) {
332              return sequence.length() == 0;
333            }
334    
335            @Override public boolean matchesNoneOf(CharSequence sequence) {
336              checkNotNull(sequence);
337              return true;
338            }
339    
340            @Override public String removeFrom(CharSequence sequence) {
341              return sequence.toString();
342            }
343    
344            @Override public String replaceFrom(CharSequence sequence, char replacement) {
345              return sequence.toString();
346            }
347    
348            @Override public String replaceFrom(CharSequence sequence, CharSequence replacement) {
349              checkNotNull(replacement);
350              return sequence.toString();
351            }
352    
353            @Override public String collapseFrom(CharSequence sequence, char replacement) {
354              return sequence.toString();
355            }
356    
357            @Override public String trimFrom(CharSequence sequence) {
358              return sequence.toString();
359            }
360    
361            @Override public int countIn(CharSequence sequence) {
362              checkNotNull(sequence);
363              return 0;
364            }
365    
366            @Override public CharMatcher and(CharMatcher other) {
367              checkNotNull(other);
368              return this;
369            }
370    
371            @Override public CharMatcher or(CharMatcher other) {
372              return checkNotNull(other);
373            }
374    
375            @Override public CharMatcher negate() {
376              return ANY;
377            }
378    
379            @Override void setBits(LookupTable table) {}
380    
381            @Override public CharMatcher precomputed() {
382              return this;
383            }
384          };
385    
386      // Static factories
387    
388      /**
389       * Returns a {@code char} matcher that matches only one specified character.
390       */
391      public static CharMatcher is(final char match) {
392        return new CharMatcher() {
393          @Override public boolean matches(char c) {
394            return c == match;
395          }
396    
397          @Override public String replaceFrom(CharSequence sequence, char replacement) {
398            return sequence.toString().replace(match, replacement);
399          }
400    
401          @Override public CharMatcher and(CharMatcher other) {
402            return other.matches(match) ? this : NONE;
403          }
404    
405          @Override public CharMatcher or(CharMatcher other) {
406            return other.matches(match) ? other : super.or(other);
407          }
408    
409          @Override public CharMatcher negate() {
410            return isNot(match);
411          }
412    
413          @Override void setBits(LookupTable table) {
414            table.set(match);
415          }
416    
417          @Override public CharMatcher precomputed() {
418            return this;
419          }
420        };
421      }
422    
423      /**
424       * Returns a {@code char} matcher that matches any character except the one specified.
425       *
426       * <p>To negate another {@code CharMatcher}, use {@link #negate()}.
427       */
428      public static CharMatcher isNot(final char match) {
429        return new CharMatcher() {
430          @Override public boolean matches(char c) {
431            return c != match;
432          }
433    
434          @Override public CharMatcher and(CharMatcher other) {
435            return other.matches(match) ? super.and(other) : other;
436          }
437    
438          @Override public CharMatcher or(CharMatcher other) {
439            return other.matches(match) ? ANY : this;
440          }
441    
442          @Override public CharMatcher negate() {
443            return is(match);
444          }
445        };
446      }
447    
448      /**
449       * Returns a {@code char} matcher that matches any character present in the given character
450       * sequence.
451       */
452      public static CharMatcher anyOf(final CharSequence sequence) {
453        switch (sequence.length()) {
454          case 0:
455            return NONE;
456          case 1:
457            return is(sequence.charAt(0));
458          case 2:
459            final char match1 = sequence.charAt(0);
460            final char match2 = sequence.charAt(1);
461            return new CharMatcher() {
462              @Override public boolean matches(char c) {
463                return c == match1 || c == match2;
464              }
465    
466              @Override void setBits(LookupTable table) {
467                table.set(match1);
468                table.set(match2);
469              }
470    
471              @Override public CharMatcher precomputed() {
472                return this;
473              }
474            };
475        }
476    
477        final char[] chars = sequence.toString().toCharArray();
478        Arrays.sort(chars); // not worth collapsing duplicates
479    
480        return new CharMatcher() {
481          @Override public boolean matches(char c) {
482            return Arrays.binarySearch(chars, c) >= 0;
483          }
484    
485          @Override void setBits(LookupTable table) {
486            for (char c : chars) {
487              table.set(c);
488            }
489          }
490        };
491      }
492    
493      /**
494       * Returns a {@code char} matcher that matches any character not present in the given character
495       * sequence.
496       */
497      public static CharMatcher noneOf(CharSequence sequence) {
498        return anyOf(sequence).negate();
499      }
500    
501      /**
502       * Returns a {@code char} matcher that matches any character in a given range (both endpoints are
503       * inclusive). For example, to match any lowercase letter of the English alphabet, use {@code
504       * CharMatcher.inRange('a', 'z')}.
505       *
506       * @throws IllegalArgumentException if {@code endInclusive < startInclusive}
507       */
508      public static CharMatcher inRange(final char startInclusive, final char endInclusive) {
509        checkArgument(endInclusive >= startInclusive);
510        return new CharMatcher() {
511          @Override public boolean matches(char c) {
512            return startInclusive <= c && c <= endInclusive;
513          }
514    
515          @Override void setBits(LookupTable table) {
516            char c = startInclusive;
517            while (true) {
518              table.set(c);
519              if (c++ == endInclusive) {
520                break;
521              }
522            }
523          }
524    
525          @Override public CharMatcher precomputed() {
526            return this;
527          }
528        };
529      }
530    
531      /**
532       * Returns a matcher with identical behavior to the given {@link Character}-based predicate, but
533       * which operates on primitive {@code char} instances instead.
534       */
535      public static CharMatcher forPredicate(final Predicate<? super Character> predicate) {
536        checkNotNull(predicate);
537        if (predicate instanceof CharMatcher) {
538          return (CharMatcher) predicate;
539        }
540        return new CharMatcher() {
541          @Override public boolean matches(char c) {
542            return predicate.apply(c);
543          }
544    
545          @Override public boolean apply(Character character) {
546            return predicate.apply(checkNotNull(character));
547          }
548        };
549      }
550    
551      // Abstract methods
552    
553      /** Determines a true or false value for the given character. */
554      public abstract boolean matches(char c);
555    
556      // Non-static factories
557    
558      /**
559       * Returns a matcher that matches any character not matched by this matcher.
560       */
561      public CharMatcher negate() {
562        final CharMatcher original = this;
563        return new CharMatcher() {
564          @Override public boolean matches(char c) {
565            return !original.matches(c);
566          }
567    
568          @Override public boolean matchesAllOf(CharSequence sequence) {
569            return original.matchesNoneOf(sequence);
570          }
571    
572          @Override public boolean matchesNoneOf(CharSequence sequence) {
573            return original.matchesAllOf(sequence);
574          }
575    
576          @Override public int countIn(CharSequence sequence) {
577            return sequence.length() - original.countIn(sequence);
578          }
579    
580          @Override public CharMatcher negate() {
581            return original;
582          }
583        };
584      }
585    
586      /**
587       * Returns a matcher that matches any character matched by both this matcher and {@code other}.
588       */
589      public CharMatcher and(CharMatcher other) {
590        return new And(Arrays.asList(this, checkNotNull(other)));
591      }
592    
593      private static class And extends CharMatcher {
594        List<CharMatcher> components;
595    
596        And(List<CharMatcher> components) {
597          this.components = components; // Skip defensive copy (private)
598        }
599    
600        @Override public boolean matches(char c) {
601          for (CharMatcher matcher : components) {
602            if (!matcher.matches(c)) {
603              return false;
604            }
605          }
606          return true;
607        }
608    
609        @Override public CharMatcher and(CharMatcher other) {
610          List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components);
611          newComponents.add(checkNotNull(other));
612          return new And(newComponents);
613        }
614      }
615    
616      /**
617       * Returns a matcher that matches any character matched by either this matcher or {@code other}.
618       */
619      public CharMatcher or(CharMatcher other) {
620        return new Or(Arrays.asList(this, checkNotNull(other)));
621      }
622    
623      private static class Or extends CharMatcher {
624        List<CharMatcher> components;
625    
626        Or(List<CharMatcher> components) {
627          this.components = components; // Skip defensive copy (private)
628        }
629    
630        @Override public boolean matches(char c) {
631          for (CharMatcher matcher : components) {
632            if (matcher.matches(c)) {
633              return true;
634            }
635          }
636          return false;
637        }
638    
639        @Override public CharMatcher or(CharMatcher other) {
640          List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components);
641          newComponents.add(checkNotNull(other));
642          return new Or(newComponents);
643        }
644    
645        @Override void setBits(LookupTable table) {
646          for (CharMatcher matcher : components) {
647            matcher.setBits(table);
648          }
649        }
650      }
651    
652      /**
653       * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to
654       * query than the original; your mileage may vary. Precomputation takes time and is likely to be
655       * worthwhile only if the precomputed matcher is queried many thousands of times.
656       *
657       * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a
658       * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a
659       * worthwhile tradeoff in a browser.
660       */
661      public CharMatcher precomputed() {
662        return Platform.precomputeCharMatcher(this);
663      }
664    
665      /**
666       * This is the actual implementation of {@link #precomputed}, but we bounce calls through a method
667       * on {@link Platform} so that we can have different behavior in GWT.
668       *
669       * <p>The default precomputation is to cache the configuration of the original matcher in an
670       * eight-kilobyte bit array. In some situations this produces a matcher which is faster to query
671       * than the original.
672       *
673       * <p>The default implementation creates a new bit array and passes it to {@link
674       * #setBits(LookupTable)}.
675       */
676      CharMatcher precomputedInternal() {
677        final LookupTable table = new LookupTable();
678        setBits(table);
679    
680        return new CharMatcher() {
681          @Override public boolean matches(char c) {
682            return table.get(c);
683          }
684    
685          // TODO(kevinb): make methods like negate() smart?
686    
687          @Override public CharMatcher precomputed() {
688            return this;
689          }
690        };
691      }
692    
693      /**
694       * For use by implementors; sets the bit corresponding to each character ('\0' to '{@literal
695       * \}uFFFF') that matches this matcher in the given bit array, leaving all other bits untouched.
696       *
697       * <p>The default implementation loops over every possible character value, invoking {@link
698       * #matches} for each one.
699       */
700      void setBits(LookupTable table) {
701        char c = Character.MIN_VALUE;
702        while (true) {
703          if (matches(c)) {
704            table.set(c);
705          }
706          if (c++ == Character.MAX_VALUE) {
707            break;
708          }
709        }
710      }
711    
712      /**
713       * A bit array with one bit per {@code char} value, used by {@link CharMatcher#precomputed}.
714       *
715       * <p>TODO(kevinb): possibly share a common BitArray class with BloomFilter and others... a
716       * simpler java.util.BitSet.
717       */
718      private static final class LookupTable {
719        int[] data = new int[2048];
720    
721        void set(char index) {
722          data[index >> 5] |= (1 << index);
723        }
724    
725        boolean get(char index) {
726          return (data[index >> 5] & (1 << index)) != 0;
727        }
728      }
729    
730      // Text processing routines
731    
732      /**
733       * Returns {@code true} if a character sequence contains at least one matching character.
734       * Equivalent to {@code !matchesNoneOf(sequence)}.
735       *
736       * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
737       * character, until this returns {@code true} or the end is reached.
738       *
739       * @param sequence the character sequence to examine, possibly empty
740       * @return {@code true} if this matcher matches at least one character in the sequence
741       * @since 8
742       */
743      public boolean matchesAnyOf(CharSequence sequence) {
744        return !matchesNoneOf(sequence);
745      }
746    
747      /**
748       * Returns {@code true} if a character sequence contains only matching characters.
749       *
750       * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
751       * character, until this returns {@code false} or the end is reached.
752       *
753       * @param sequence the character sequence to examine, possibly empty
754       * @return {@code true} if this matcher matches every character in the sequence, including when
755       *         the sequence is empty
756       */
757      public boolean matchesAllOf(CharSequence sequence) {
758        for (int i = sequence.length() - 1; i >= 0; i--) {
759          if (!matches(sequence.charAt(i))) {
760            return false;
761          }
762        }
763        return true;
764      }
765    
766      /**
767       * Returns {@code true} if a character sequence contains no matching characters. Equivalent to
768       * {@code !matchesAnyOf(sequence)}.
769       *
770       * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
771       * character, until this returns {@code false} or the end is reached.
772       *
773       * @param sequence the character sequence to examine, possibly empty
774       * @return {@code true} if this matcher matches every character in the sequence, including when
775       *         the sequence is empty
776       */
777      public boolean matchesNoneOf(CharSequence sequence) {
778        return indexIn(sequence) == -1;
779      }
780    
781      // TODO(kevinb): add matchesAnyOf()
782    
783      /**
784       * Returns the index of the first matching character in a character sequence, or {@code -1} if no
785       * matching character is present.
786       *
787       * <p>The default implementation iterates over the sequence in forward order calling {@link
788       * #matches} for each character.
789       *
790       * @param sequence the character sequence to examine from the beginning
791       * @return an index, or {@code -1} if no character matches
792       */
793      public int indexIn(CharSequence sequence) {
794        int length = sequence.length();
795        for (int i = 0; i < length; i++) {
796          if (matches(sequence.charAt(i))) {
797            return i;
798          }
799        }
800        return -1;
801      }
802    
803      /**
804       * Returns the index of the first matching character in a character sequence, starting from a
805       * given position, or {@code -1} if no character matches after that position.
806       *
807       * <p>The default implementation iterates over the sequence in forward order, beginning at {@code
808       * start}, calling {@link #matches} for each character.
809       *
810       * @param sequence the character sequence to examine
811       * @param start the first index to examine; must be nonnegative and no greater than {@code
812       *        sequence.length()}
813       * @return the index of the first matching character, guaranteed to be no less than {@code start},
814       *         or {@code -1} if no character matches
815       * @throws IndexOutOfBoundsException if start is negative or greater than {@code
816       *         sequence.length()}
817       */
818      public int indexIn(CharSequence sequence, int start) {
819        int length = sequence.length();
820        Preconditions.checkPositionIndex(start, length);
821        for (int i = start; i < length; i++) {
822          if (matches(sequence.charAt(i))) {
823            return i;
824          }
825        }
826        return -1;
827      }
828    
829      /**
830       * Returns the index of the last matching character in a character sequence, or {@code -1} if no
831       * matching character is present.
832       *
833       * <p>The default implementation iterates over the sequence in reverse order calling {@link
834       * #matches} for each character.
835       *
836       * @param sequence the character sequence to examine from the end
837       * @return an index, or {@code -1} if no character matches
838       */
839      public int lastIndexIn(CharSequence sequence) {
840        for (int i = sequence.length() - 1; i >= 0; i--) {
841          if (matches(sequence.charAt(i))) {
842            return i;
843          }
844        }
845        return -1;
846      }
847    
848      /**
849       * Returns the number of matching characters found in a character sequence.
850       */
851      public int countIn(CharSequence sequence) {
852        int count = 0;
853        for (int i = 0; i < sequence.length(); i++) {
854          if (matches(sequence.charAt(i))) {
855            count++;
856          }
857        }
858        return count;
859      }
860    
861      /**
862       * Returns a string containing all non-matching characters of a character sequence, in order. For
863       * example: <pre>   {@code
864       *
865       *   CharMatcher.is('a').removeFrom("bazaar")}</pre>
866       *
867       * ... returns {@code "bzr"}.
868       */
869      public String removeFrom(CharSequence sequence) {
870        String string = sequence.toString();
871        int pos = indexIn(string);
872        if (pos == -1) {
873          return string;
874        }
875    
876        char[] chars = string.toCharArray();
877        int spread = 1;
878    
879        // This unusual loop comes from extensive benchmarking
880        OUT: while (true) {
881          pos++;
882          while (true) {
883            if (pos == chars.length) {
884              break OUT;
885            }
886            if (matches(chars[pos])) {
887              break;
888            }
889            chars[pos - spread] = chars[pos];
890            pos++;
891          }
892          spread++;
893        }
894        return new String(chars, 0, pos - spread);
895      }
896    
897      /**
898       * Returns a string containing all matching characters of a character sequence, in order. For
899       * example: <pre>   {@code
900       *
901       *   CharMatcher.is('a').retainFrom("bazaar")}</pre>
902       *
903       * ... returns {@code "aaa"}.
904       */
905      public String retainFrom(CharSequence sequence) {
906        return negate().removeFrom(sequence);
907      }
908    
909      /**
910       * Returns a string copy of the input character sequence, with each character that matches this
911       * matcher replaced by a given replacement character. For example: <pre>   {@code
912       *
913       *   CharMatcher.is('a').replaceFrom("radar", 'o')}</pre>
914       *
915       * ... returns {@code "rodor"}.
916       *
917       * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
918       * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
919       * character.
920       *
921       * @param sequence the character sequence to replace matching characters in
922       * @param replacement the character to append to the result string in place of each matching
923       *        character in {@code sequence}
924       * @return the new string
925       */
926      public String replaceFrom(CharSequence sequence, char replacement) {
927        String string = sequence.toString();
928        int pos = indexIn(string);
929        if (pos == -1) {
930          return string;
931        }
932        char[] chars = string.toCharArray();
933        chars[pos] = replacement;
934        for (int i = pos + 1; i < chars.length; i++) {
935          if (matches(chars[i])) {
936            chars[i] = replacement;
937          }
938        }
939        return new String(chars);
940      }
941    
942      /**
943       * Returns a string copy of the input character sequence, with each character that matches this
944       * matcher replaced by a given replacement sequence. For example: <pre>   {@code
945       *
946       *   CharMatcher.is('a').replaceFrom("yaha", "oo")}</pre>
947       *
948       * ... returns {@code "yoohoo"}.
949       *
950       * <p><b>Note:</b> If the replacement is a fixed string with only one character, you are better
951       * off calling {@link #replaceFrom(CharSequence, char)} directly.
952       *
953       * @param sequence the character sequence to replace matching characters in
954       * @param replacement the characters to append to the result string in place of each matching
955       *        character in {@code sequence}
956       * @return the new string
957       */
958      public String replaceFrom(CharSequence sequence, CharSequence replacement) {
959        int replacementLen = replacement.length();
960        if (replacementLen == 0) {
961          return removeFrom(sequence);
962        }
963        if (replacementLen == 1) {
964          return replaceFrom(sequence, replacement.charAt(0));
965        }
966    
967        String string = sequence.toString();
968        int pos = indexIn(string);
969        if (pos == -1) {
970          return string;
971        }
972    
973        int len = string.length();
974        StringBuilder buf = new StringBuilder((len * 3 / 2) + 16);
975    
976        int oldpos = 0;
977        do {
978          buf.append(string, oldpos, pos);
979          buf.append(replacement);
980          oldpos = pos + 1;
981          pos = indexIn(string, oldpos);
982        } while (pos != -1);
983    
984        buf.append(string, oldpos, len);
985        return buf.toString();
986      }
987    
988      /**
989       * Returns a substring of the input character sequence that omits all characters this matcher
990       * matches from the beginning and from the end of the string. For example: <pre>   {@code
991       *
992       *   CharMatcher.anyOf("ab").trimFrom("abacatbab")}</pre>
993       *
994       * ... returns {@code "cat"}.
995       *
996       * <p>Note that: <pre>   {@code
997       *
998       *   CharMatcher.inRange('\0', ' ').trimFrom(str)}</pre>
999       *
1000       * ... is equivalent to {@link String#trim()}.
1001       */
1002      public String trimFrom(CharSequence sequence) {
1003        int len = sequence.length();
1004        int first;
1005        int last;
1006    
1007        for (first = 0; first < len; first++) {
1008          if (!matches(sequence.charAt(first))) {
1009            break;
1010          }
1011        }
1012        for (last = len - 1; last > first; last--) {
1013          if (!matches(sequence.charAt(last))) {
1014            break;
1015          }
1016        }
1017    
1018        return sequence.subSequence(first, last + 1).toString();
1019      }
1020    
1021      /**
1022       * Returns a substring of the input character sequence that omits all characters this matcher
1023       * matches from the beginning of the string. For example: <pre> {@code
1024       *
1025       *   CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")}</pre>
1026       *
1027       * ... returns {@code "catbab"}.
1028       */
1029      public String trimLeadingFrom(CharSequence sequence) {
1030        int len = sequence.length();
1031        int first;
1032    
1033        for (first = 0; first < len; first++) {
1034          if (!matches(sequence.charAt(first))) {
1035            break;
1036          }
1037        }
1038    
1039        return sequence.subSequence(first, len).toString();
1040      }
1041    
1042      /**
1043       * Returns a substring of the input character sequence that omits all characters this matcher
1044       * matches from the end of the string. For example: <pre> {@code
1045       *
1046       *   CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")}</pre>
1047       *
1048       * ... returns {@code "abacat"}.
1049       */
1050      public String trimTrailingFrom(CharSequence sequence) {
1051        int len = sequence.length();
1052        int last;
1053    
1054        for (last = len - 1; last >= 0; last--) {
1055          if (!matches(sequence.charAt(last))) {
1056            break;
1057          }
1058        }
1059    
1060        return sequence.subSequence(0, last + 1).toString();
1061      }
1062    
1063      /**
1064       * Returns a string copy of the input character sequence, with each group of consecutive
1065       * characters that match this matcher replaced by a single replacement character. For example:
1066       * <pre>   {@code
1067       *
1068       *   CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')}</pre>
1069       *
1070       * ... returns {@code "b-p-r"}.
1071       *
1072       * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
1073       * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
1074       * character.
1075       *
1076       * @param sequence the character sequence to replace matching groups of characters in
1077       * @param replacement the character to append to the result string in place of each group of
1078       *        matching characters in {@code sequence}
1079       * @return the new string
1080       */
1081      public String collapseFrom(CharSequence sequence, char replacement) {
1082        int first = indexIn(sequence);
1083        if (first == -1) {
1084          return sequence.toString();
1085        }
1086    
1087        // TODO(kevinb): see if this implementation can be made faster
1088        StringBuilder builder = new StringBuilder(sequence.length())
1089            .append(sequence.subSequence(0, first))
1090            .append(replacement);
1091        boolean in = true;
1092        for (int i = first + 1; i < sequence.length(); i++) {
1093          char c = sequence.charAt(i);
1094          if (apply(c)) {
1095            if (!in) {
1096              builder.append(replacement);
1097              in = true;
1098            }
1099          } else {
1100            builder.append(c);
1101            in = false;
1102          }
1103        }
1104        return builder.toString();
1105      }
1106    
1107      /**
1108       * Collapses groups of matching characters exactly as {@link #collapseFrom} does, except that
1109       * groups of matching characters at the start or end of the sequence are removed without
1110       * replacement.
1111       */
1112      public String trimAndCollapseFrom(CharSequence sequence, char replacement) {
1113        int first = negate().indexIn(sequence);
1114        if (first == -1) {
1115          return ""; // everything matches. nothing's left.
1116        }
1117        StringBuilder builder = new StringBuilder(sequence.length());
1118        boolean inMatchingGroup = false;
1119        for (int i = first; i < sequence.length(); i++) {
1120          char c = sequence.charAt(i);
1121          if (apply(c)) {
1122            inMatchingGroup = true;
1123          } else {
1124            if (inMatchingGroup) {
1125              builder.append(replacement);
1126              inMatchingGroup = false;
1127            }
1128            builder.append(c);
1129          }
1130        }
1131        return builder.toString();
1132      }
1133    
1134      // Predicate interface
1135    
1136      /**
1137       * Returns {@code true} if this matcher matches the given character.
1138       *
1139       * @throws NullPointerException if {@code character} is null
1140       */
1141      @Override public boolean apply(Character character) {
1142        return matches(character);
1143      }
1144    }