001    /*
002     * Copyright (C) 2008 The Guava Authors
003     *
004     * Licensed under the Apache License, Version 2.0 (the "License");
005     * you may not use this file except in compliance with the License.
006     * You may obtain a copy of the License at
007     *
008     * http://www.apache.org/licenses/LICENSE-2.0
009     *
010     * Unless required by applicable law or agreed to in writing, software
011     * distributed under the License is distributed on an "AS IS" BASIS,
012     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013     * See the License for the specific language governing permissions and
014     * limitations under the License.
015     */
016    
017    package com.google.common.base;
018    
019    import static com.google.common.base.Preconditions.checkArgument;
020    import static com.google.common.base.Preconditions.checkNotNull;
021    
022    import com.google.common.annotations.Beta;
023    import com.google.common.annotations.GwtCompatible;
024    
025    import java.util.ArrayList;
026    import java.util.Arrays;
027    import java.util.List;
028    
029    import javax.annotation.CheckReturnValue;
030    
031    /**
032     * Determines a true or false value for any Java {@code char} value, just as {@link Predicate} does
033     * for any {@link Object}. Also offers basic text processing methods based on this function.
034     * Implementations are strongly encouraged to be side-effect-free and immutable.
035     *
036     * <p>Throughout the documentation of this class, the phrase "matching character" is used to mean
037     * "any character {@code c} for which {@code this.matches(c)} returns {@code true}".
038     *
039     * <p><b>Note:</b> This class deals only with {@code char} values; it does not understand
040     * supplementary Unicode code points in the range {@code 0x10000} to {@code 0x10FFFF}. Such logical
041     * characters are encoded into a {@code String} using surrogate pairs, and a {@code CharMatcher}
042     * treats these just as two separate characters.
043     *
044     * <p>Example usages: <pre>
045     *   String trimmed = {@link #WHITESPACE WHITESPACE}.{@link #trimFrom trimFrom}(userInput);
046     *   if ({@link #ASCII ASCII}.{@link #matchesAllOf matchesAllOf}(s)) { ... }</pre>
047     *
048     * @author Kevin Bourrillion
049     * @since 1.0
050     */
051    @Beta // Possibly change from chars to code points; decide constants vs. methods
052    @GwtCompatible
053    public abstract class CharMatcher implements Predicate<Character> {
054      // Constants
055    
056      // Excludes 2000-2000a, which is handled as a range
057      private static final String BREAKING_WHITESPACE_CHARS =
058          "\t\n\013\f\r \u0085\u1680\u2028\u2029\u205f\u3000";
059    
060      // Excludes 2007, which is handled as a gap in a pair of ranges
061      private static final String NON_BREAKING_WHITESPACE_CHARS =
062          "\u00a0\u180e\u202f";
063    
064      /**
065       * Determines whether a character is whitespace according to the latest Unicode standard, as
066       * illustrated
067       * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
068       * This is not the same definition used by other Java APIs. (See a
069       * <a href="http://spreadsheets.google.com/pub?key=pd8dAQyHbdewRsnE5x5GzKQ">comparison of several
070       * definitions of "whitespace"</a>.)
071       *
072       * <p><b>Note:</b> as the Unicode definition evolves, we will modify this constant to keep it up
073       * to date.
074       */
075      public static final CharMatcher WHITESPACE =
076          anyOf(BREAKING_WHITESPACE_CHARS + NON_BREAKING_WHITESPACE_CHARS)
077              .or(inRange('\u2000', '\u200a'))
078              .precomputed();
079    
080      /**
081       * Determines whether a character is a breaking whitespace (that is, a whitespace which can be
082       * interpreted as a break between words for formatting purposes). See {@link #WHITESPACE} for a
083       * discussion of that term.
084       *
085       * @since 2.0
086       */
087      public static final CharMatcher BREAKING_WHITESPACE =
088          anyOf(BREAKING_WHITESPACE_CHARS)
089              .or(inRange('\u2000', '\u2006'))
090              .or(inRange('\u2008', '\u200a'))
091              .precomputed();
092    
093      /**
094       * Determines whether a character is ASCII, meaning that its code point is less than 128.
095       */
096      public static final CharMatcher ASCII = inRange('\0', '\u007f');
097    
098      /**
099       * Determines whether a character is a digit according to
100       * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>.
101       */
102      public static final CharMatcher DIGIT;
103    
104      static {
105        CharMatcher digit = inRange('0', '9');
106        String zeroes =
107            "\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6\u0c66"
108                + "\u0ce6\u0d66\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946"
109                + "\u19d0\u1b50\u1bb0\u1c40\u1c50\ua620\ua8d0\ua900\uaa50\uff10";
110        for (char base : zeroes.toCharArray()) {
111          digit = digit.or(inRange(base, (char) (base + 9)));
112        }
113        DIGIT = digit.precomputed();
114      }
115    
116      /**
117       * Determines whether a character is a digit according to {@link Character#isDigit(char) Java's
118       * definition}. If you only care to match ASCII digits, you can use {@code inRange('0', '9')}.
119       */
120      public static final CharMatcher JAVA_DIGIT = new CharMatcher() {
121        @Override public boolean matches(char c) {
122          return Character.isDigit(c);
123        }
124      };
125    
126      /**
127       * Determines whether a character is a letter according to {@link Character#isLetter(char) Java's
128       * definition}. If you only care to match letters of the Latin alphabet, you can use {@code
129       * inRange('a', 'z').or(inRange('A', 'Z'))}.
130       */
131      public static final CharMatcher JAVA_LETTER = new CharMatcher() {
132        @Override public boolean matches(char c) {
133          return Character.isLetter(c);
134        }
135      };
136    
137      /**
138       * Determines whether a character is a letter or digit according to {@link
139       * Character#isLetterOrDigit(char) Java's definition}.
140       */
141      public static final CharMatcher JAVA_LETTER_OR_DIGIT = new CharMatcher() {
142        @Override public boolean matches(char c) {
143          return Character.isLetterOrDigit(c);
144        }
145      };
146    
147      /**
148       * Determines whether a character is upper case according to {@link Character#isUpperCase(char)
149       * Java's definition}.
150       */
151      public static final CharMatcher JAVA_UPPER_CASE = new CharMatcher() {
152        @Override public boolean matches(char c) {
153          return Character.isUpperCase(c);
154        }
155      };
156    
157      /**
158       * Determines whether a character is lower case according to {@link Character#isLowerCase(char)
159       * Java's definition}.
160       */
161      public static final CharMatcher JAVA_LOWER_CASE = new CharMatcher() {
162        @Override public boolean matches(char c) {
163          return Character.isLowerCase(c);
164        }
165      };
166    
167      /**
168       * Determines whether a character is an ISO control character as specified by {@link
169       * Character#isISOControl(char)}.
170       */
171      public static final CharMatcher JAVA_ISO_CONTROL =
172          inRange('\u0000', '\u001f').or(inRange('\u007f', '\u009f'));
173    
174      /**
175       * Determines whether a character is invisible; that is, if its Unicode category is any of
176       * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and
177       * PRIVATE_USE according to ICU4J.
178       */
179      public static final CharMatcher INVISIBLE = inRange('\u0000', '\u0020')
180          .or(inRange('\u007f', '\u00a0'))
181          .or(is('\u00ad'))
182          .or(inRange('\u0600', '\u0603'))
183          .or(anyOf("\u06dd\u070f\u1680\u17b4\u17b5\u180e"))
184          .or(inRange('\u2000', '\u200f'))
185          .or(inRange('\u2028', '\u202f'))
186          .or(inRange('\u205f', '\u2064'))
187          .or(inRange('\u206a', '\u206f'))
188          .or(is('\u3000'))
189          .or(inRange('\ud800', '\uf8ff'))
190          .or(anyOf("\ufeff\ufff9\ufffa\ufffb"))
191          .precomputed();
192    
193      /**
194       * Determines whether a character is single-width (not double-width). When in doubt, this matcher
195       * errs on the side of returning {@code false} (that is, it tends to assume a character is
196       * double-width).
197       *
198       * <p><b>Note:</b> as the reference file evolves, we will modify this constant to keep it up to
199       * date.
200       */
201      public static final CharMatcher SINGLE_WIDTH = inRange('\u0000', '\u04f9')
202          .or(is('\u05be'))
203          .or(inRange('\u05d0', '\u05ea'))
204          .or(is('\u05f3'))
205          .or(is('\u05f4'))
206          .or(inRange('\u0600', '\u06ff'))
207          .or(inRange('\u0750', '\u077f'))
208          .or(inRange('\u0e00', '\u0e7f'))
209          .or(inRange('\u1e00', '\u20af'))
210          .or(inRange('\u2100', '\u213a'))
211          .or(inRange('\ufb50', '\ufdff'))
212          .or(inRange('\ufe70', '\ufeff'))
213          .or(inRange('\uff61', '\uffdc'))
214          .precomputed();
215    
216      /** Matches any character. */
217      public static final CharMatcher ANY =
218          new CharMatcher() {
219            @Override public boolean matches(char c) {
220              return true;
221            }
222    
223            @Override public int indexIn(CharSequence sequence) {
224              return (sequence.length() == 0) ? -1 : 0;
225            }
226    
227            @Override public int indexIn(CharSequence sequence, int start) {
228              int length = sequence.length();
229              Preconditions.checkPositionIndex(start, length);
230              return (start == length) ? -1 : start;
231            }
232    
233            @Override public int lastIndexIn(CharSequence sequence) {
234              return sequence.length() - 1;
235            }
236    
237            @Override public boolean matchesAllOf(CharSequence sequence) {
238              checkNotNull(sequence);
239              return true;
240            }
241    
242            @Override public boolean matchesNoneOf(CharSequence sequence) {
243              return sequence.length() == 0;
244            }
245    
246            @Override public String removeFrom(CharSequence sequence) {
247              checkNotNull(sequence);
248              return "";
249            }
250    
251            @Override public String replaceFrom(CharSequence sequence, char replacement) {
252              char[] array = new char[sequence.length()];
253              Arrays.fill(array, replacement);
254              return new String(array);
255            }
256    
257            @Override public String replaceFrom(CharSequence sequence, CharSequence replacement) {
258              StringBuilder retval = new StringBuilder(sequence.length() * replacement.length());
259              for (int i = 0; i < sequence.length(); i++) {
260                retval.append(replacement);
261              }
262              return retval.toString();
263            }
264    
265            @Override public String collapseFrom(CharSequence sequence, char replacement) {
266              return (sequence.length() == 0) ? "" : String.valueOf(replacement);
267            }
268    
269            @Override public String trimFrom(CharSequence sequence) {
270              checkNotNull(sequence);
271              return "";
272            }
273    
274            @Override public int countIn(CharSequence sequence) {
275              return sequence.length();
276            }
277    
278            @Override public CharMatcher and(CharMatcher other) {
279              return checkNotNull(other);
280            }
281    
282            @Override public CharMatcher or(CharMatcher other) {
283              checkNotNull(other);
284              return this;
285            }
286    
287            @Override public CharMatcher negate() {
288              return NONE;
289            }
290    
291            @Override public CharMatcher precomputed() {
292              return this;
293            }
294          };
295    
296      /** Matches no characters. */
297      public static final CharMatcher NONE =
298          new CharMatcher() {
299            @Override public boolean matches(char c) {
300              return false;
301            }
302    
303            @Override public int indexIn(CharSequence sequence) {
304              checkNotNull(sequence);
305              return -1;
306            }
307    
308            @Override public int indexIn(CharSequence sequence, int start) {
309              int length = sequence.length();
310              Preconditions.checkPositionIndex(start, length);
311              return -1;
312            }
313    
314            @Override public int lastIndexIn(CharSequence sequence) {
315              checkNotNull(sequence);
316              return -1;
317            }
318    
319            @Override public boolean matchesAllOf(CharSequence sequence) {
320              return sequence.length() == 0;
321            }
322    
323            @Override public boolean matchesNoneOf(CharSequence sequence) {
324              checkNotNull(sequence);
325              return true;
326            }
327    
328            @Override public String removeFrom(CharSequence sequence) {
329              return sequence.toString();
330            }
331    
332            @Override public String replaceFrom(CharSequence sequence, char replacement) {
333              return sequence.toString();
334            }
335    
336            @Override public String replaceFrom(CharSequence sequence, CharSequence replacement) {
337              checkNotNull(replacement);
338              return sequence.toString();
339            }
340    
341            @Override public String collapseFrom(CharSequence sequence, char replacement) {
342              return sequence.toString();
343            }
344    
345            @Override public String trimFrom(CharSequence sequence) {
346              return sequence.toString();
347            }
348    
349            @Override public int countIn(CharSequence sequence) {
350              checkNotNull(sequence);
351              return 0;
352            }
353    
354            @Override public CharMatcher and(CharMatcher other) {
355              checkNotNull(other);
356              return this;
357            }
358    
359            @Override public CharMatcher or(CharMatcher other) {
360              return checkNotNull(other);
361            }
362    
363            @Override public CharMatcher negate() {
364              return ANY;
365            }
366    
367            @Override void setBits(LookupTable table) {}
368    
369            @Override public CharMatcher precomputed() {
370              return this;
371            }
372          };
373    
374      // Static factories
375    
376      /**
377       * Returns a {@code char} matcher that matches only one specified character.
378       */
379      public static CharMatcher is(final char match) {
380        return new CharMatcher() {
381          @Override public boolean matches(char c) {
382            return c == match;
383          }
384    
385          @Override public String replaceFrom(CharSequence sequence, char replacement) {
386            return sequence.toString().replace(match, replacement);
387          }
388    
389          @Override public CharMatcher and(CharMatcher other) {
390            return other.matches(match) ? this : NONE;
391          }
392    
393          @Override public CharMatcher or(CharMatcher other) {
394            return other.matches(match) ? other : super.or(other);
395          }
396    
397          @Override public CharMatcher negate() {
398            return isNot(match);
399          }
400    
401          @Override void setBits(LookupTable table) {
402            table.set(match);
403          }
404    
405          @Override public CharMatcher precomputed() {
406            return this;
407          }
408        };
409      }
410    
411      /**
412       * Returns a {@code char} matcher that matches any character except the one specified.
413       *
414       * <p>To negate another {@code CharMatcher}, use {@link #negate()}.
415       */
416      public static CharMatcher isNot(final char match) {
417        return new CharMatcher() {
418          @Override public boolean matches(char c) {
419            return c != match;
420          }
421    
422          @Override public CharMatcher and(CharMatcher other) {
423            return other.matches(match) ? super.and(other) : other;
424          }
425    
426          @Override public CharMatcher or(CharMatcher other) {
427            return other.matches(match) ? ANY : this;
428          }
429    
430          @Override public CharMatcher negate() {
431            return is(match);
432          }
433        };
434      }
435    
436      /**
437       * Returns a {@code char} matcher that matches any character present in the given character
438       * sequence.
439       */
440      public static CharMatcher anyOf(final CharSequence sequence) {
441        switch (sequence.length()) {
442          case 0:
443            return NONE;
444          case 1:
445            return is(sequence.charAt(0));
446          case 2:
447            final char match1 = sequence.charAt(0);
448            final char match2 = sequence.charAt(1);
449            return new CharMatcher() {
450              @Override public boolean matches(char c) {
451                return c == match1 || c == match2;
452              }
453    
454              @Override void setBits(LookupTable table) {
455                table.set(match1);
456                table.set(match2);
457              }
458    
459              @Override public CharMatcher precomputed() {
460                return this;
461              }
462            };
463        }
464    
465        final char[] chars = sequence.toString().toCharArray();
466        Arrays.sort(chars); // not worth collapsing duplicates
467    
468        return new CharMatcher() {
469          @Override public boolean matches(char c) {
470            return Arrays.binarySearch(chars, c) >= 0;
471          }
472    
473          @Override void setBits(LookupTable table) {
474            for (char c : chars) {
475              table.set(c);
476            }
477          }
478        };
479      }
480    
481      /**
482       * Returns a {@code char} matcher that matches any character not present in the given character
483       * sequence.
484       */
485      public static CharMatcher noneOf(CharSequence sequence) {
486        return anyOf(sequence).negate();
487      }
488    
489      /**
490       * Returns a {@code char} matcher that matches any character in a given range (both endpoints are
491       * inclusive). For example, to match any lowercase letter of the English alphabet, use {@code
492       * CharMatcher.inRange('a', 'z')}.
493       *
494       * @throws IllegalArgumentException if {@code endInclusive < startInclusive}
495       */
496      public static CharMatcher inRange(final char startInclusive, final char endInclusive) {
497        checkArgument(endInclusive >= startInclusive);
498        return new CharMatcher() {
499          @Override public boolean matches(char c) {
500            return startInclusive <= c && c <= endInclusive;
501          }
502    
503          @Override void setBits(LookupTable table) {
504            char c = startInclusive;
505            while (true) {
506              table.set(c);
507              if (c++ == endInclusive) {
508                break;
509              }
510            }
511          }
512    
513          @Override public CharMatcher precomputed() {
514            return this;
515          }
516        };
517      }
518    
519      /**
520       * Returns a matcher with identical behavior to the given {@link Character}-based predicate, but
521       * which operates on primitive {@code char} instances instead.
522       */
523      public static CharMatcher forPredicate(final Predicate<? super Character> predicate) {
524        checkNotNull(predicate);
525        if (predicate instanceof CharMatcher) {
526          return (CharMatcher) predicate;
527        }
528        return new CharMatcher() {
529          @Override public boolean matches(char c) {
530            return predicate.apply(c);
531          }
532    
533          @Override public boolean apply(Character character) {
534            return predicate.apply(checkNotNull(character));
535          }
536        };
537      }
538    
539      // Constructors
540    
541      /**
542       * Constructor for use by subclasses.
543       */
544      protected CharMatcher() {}
545    
546      // Abstract methods
547    
548      /** Determines a true or false value for the given character. */
549      public abstract boolean matches(char c);
550    
551      // Non-static factories
552    
553      /**
554       * Returns a matcher that matches any character not matched by this matcher.
555       */
556      public CharMatcher negate() {
557        final CharMatcher original = this;
558        return new CharMatcher() {
559          @Override public boolean matches(char c) {
560            return !original.matches(c);
561          }
562    
563          @Override public boolean matchesAllOf(CharSequence sequence) {
564            return original.matchesNoneOf(sequence);
565          }
566    
567          @Override public boolean matchesNoneOf(CharSequence sequence) {
568            return original.matchesAllOf(sequence);
569          }
570    
571          @Override public int countIn(CharSequence sequence) {
572            return sequence.length() - original.countIn(sequence);
573          }
574    
575          @Override public CharMatcher negate() {
576            return original;
577          }
578        };
579      }
580    
581      /**
582       * Returns a matcher that matches any character matched by both this matcher and {@code other}.
583       */
584      public CharMatcher and(CharMatcher other) {
585        return new And(Arrays.asList(this, checkNotNull(other)));
586      }
587    
588      private static class And extends CharMatcher {
589        List<CharMatcher> components;
590    
591        And(List<CharMatcher> components) {
592          this.components = components; // Skip defensive copy (private)
593        }
594    
595        @Override public boolean matches(char c) {
596          for (CharMatcher matcher : components) {
597            if (!matcher.matches(c)) {
598              return false;
599            }
600          }
601          return true;
602        }
603    
604        @Override public CharMatcher and(CharMatcher other) {
605          List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components);
606          newComponents.add(checkNotNull(other));
607          return new And(newComponents);
608        }
609      }
610    
611      /**
612       * Returns a matcher that matches any character matched by either this matcher or {@code other}.
613       */
614      public CharMatcher or(CharMatcher other) {
615        return new Or(Arrays.asList(this, checkNotNull(other)));
616      }
617    
618      private static class Or extends CharMatcher {
619        List<CharMatcher> components;
620    
621        Or(List<CharMatcher> components) {
622          this.components = components; // Skip defensive copy (private)
623        }
624    
625        @Override public boolean matches(char c) {
626          for (CharMatcher matcher : components) {
627            if (matcher.matches(c)) {
628              return true;
629            }
630          }
631          return false;
632        }
633    
634        @Override public CharMatcher or(CharMatcher other) {
635          List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components);
636          newComponents.add(checkNotNull(other));
637          return new Or(newComponents);
638        }
639    
640        @Override void setBits(LookupTable table) {
641          for (CharMatcher matcher : components) {
642            matcher.setBits(table);
643          }
644        }
645      }
646    
647      /**
648       * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to
649       * query than the original; your mileage may vary. Precomputation takes time and is likely to be
650       * worthwhile only if the precomputed matcher is queried many thousands of times.
651       *
652       * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a
653       * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a
654       * worthwhile tradeoff in a browser.
655       */
656      public CharMatcher precomputed() {
657        return Platform.precomputeCharMatcher(this);
658      }
659    
660      /**
661       * This is the actual implementation of {@link #precomputed}, but we bounce calls through a method
662       * on {@link Platform} so that we can have different behavior in GWT.
663       *
664       * <p>The default precomputation is to cache the configuration of the original matcher in an
665       * eight-kilobyte bit array. In some situations this produces a matcher which is faster to query
666       * than the original.
667       *
668       * <p>The default implementation creates a new bit array and passes it to {@link
669       * #setBits(LookupTable)}.
670       */
671      CharMatcher precomputedInternal() {
672        final LookupTable table = new LookupTable();
673        setBits(table);
674    
675        return new CharMatcher() {
676          @Override public boolean matches(char c) {
677            return table.get(c);
678          }
679    
680          // TODO(kevinb): make methods like negate() smart?
681    
682          @Override public CharMatcher precomputed() {
683            return this;
684          }
685        };
686      }
687    
688      /**
689       * For use by implementors; sets the bit corresponding to each character ('\0' to '{@literal
690       * \}uFFFF') that matches this matcher in the given bit array, leaving all other bits untouched.
691       *
692       * <p>The default implementation loops over every possible character value, invoking {@link
693       * #matches} for each one.
694       */
695      void setBits(LookupTable table) {
696        char c = Character.MIN_VALUE;
697        while (true) {
698          if (matches(c)) {
699            table.set(c);
700          }
701          if (c++ == Character.MAX_VALUE) {
702            break;
703          }
704        }
705      }
706    
707      /**
708       * A bit array with one bit per {@code char} value, used by {@link CharMatcher#precomputed}.
709       *
710       * <p>TODO(kevinb): possibly share a common BitArray class with BloomFilter and others... a
711       * simpler java.util.BitSet.
712       */
713      private static final class LookupTable {
714        int[] data = new int[2048];
715    
716        void set(char index) {
717          data[index >> 5] |= (1 << index);
718        }
719    
720        boolean get(char index) {
721          return (data[index >> 5] & (1 << index)) != 0;
722        }
723      }
724    
725      // Text processing routines
726    
727      /**
728       * Returns {@code true} if a character sequence contains at least one matching character.
729       * Equivalent to {@code !matchesNoneOf(sequence)}.
730       *
731       * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
732       * character, until this returns {@code true} or the end is reached.
733       *
734       * @param sequence the character sequence to examine, possibly empty
735       * @return {@code true} if this matcher matches at least one character in the sequence
736       * @since 8.0
737       */
738      public boolean matchesAnyOf(CharSequence sequence) {
739        return !matchesNoneOf(sequence);
740      }
741    
742      /**
743       * Returns {@code true} if a character sequence contains only matching characters.
744       *
745       * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
746       * character, until this returns {@code false} or the end is reached.
747       *
748       * @param sequence the character sequence to examine, possibly empty
749       * @return {@code true} if this matcher matches every character in the sequence, including when
750       *         the sequence is empty
751       */
752      public boolean matchesAllOf(CharSequence sequence) {
753        for (int i = sequence.length() - 1; i >= 0; i--) {
754          if (!matches(sequence.charAt(i))) {
755            return false;
756          }
757        }
758        return true;
759      }
760    
761      /**
762       * Returns {@code true} if a character sequence contains no matching characters. Equivalent to
763       * {@code !matchesAnyOf(sequence)}.
764       *
765       * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
766       * character, until this returns {@code false} or the end is reached.
767       *
768       * @param sequence the character sequence to examine, possibly empty
769       * @return {@code true} if this matcher matches every character in the sequence, including when
770       *         the sequence is empty
771       */
772      public boolean matchesNoneOf(CharSequence sequence) {
773        return indexIn(sequence) == -1;
774      }
775    
776      // TODO(kevinb): add matchesAnyOf()
777    
778      /**
779       * Returns the index of the first matching character in a character sequence, or {@code -1} if no
780       * matching character is present.
781       *
782       * <p>The default implementation iterates over the sequence in forward order calling {@link
783       * #matches} for each character.
784       *
785       * @param sequence the character sequence to examine from the beginning
786       * @return an index, or {@code -1} if no character matches
787       */
788      public int indexIn(CharSequence sequence) {
789        int length = sequence.length();
790        for (int i = 0; i < length; i++) {
791          if (matches(sequence.charAt(i))) {
792            return i;
793          }
794        }
795        return -1;
796      }
797    
798      /**
799       * Returns the index of the first matching character in a character sequence, starting from a
800       * given position, or {@code -1} if no character matches after that position.
801       *
802       * <p>The default implementation iterates over the sequence in forward order, beginning at {@code
803       * start}, calling {@link #matches} for each character.
804       *
805       * @param sequence the character sequence to examine
806       * @param start the first index to examine; must be nonnegative and no greater than {@code
807       *        sequence.length()}
808       * @return the index of the first matching character, guaranteed to be no less than {@code start},
809       *         or {@code -1} if no character matches
810       * @throws IndexOutOfBoundsException if start is negative or greater than {@code
811       *         sequence.length()}
812       */
813      public int indexIn(CharSequence sequence, int start) {
814        int length = sequence.length();
815        Preconditions.checkPositionIndex(start, length);
816        for (int i = start; i < length; i++) {
817          if (matches(sequence.charAt(i))) {
818            return i;
819          }
820        }
821        return -1;
822      }
823    
824      /**
825       * Returns the index of the last matching character in a character sequence, or {@code -1} if no
826       * matching character is present.
827       *
828       * <p>The default implementation iterates over the sequence in reverse order calling {@link
829       * #matches} for each character.
830       *
831       * @param sequence the character sequence to examine from the end
832       * @return an index, or {@code -1} if no character matches
833       */
834      public int lastIndexIn(CharSequence sequence) {
835        for (int i = sequence.length() - 1; i >= 0; i--) {
836          if (matches(sequence.charAt(i))) {
837            return i;
838          }
839        }
840        return -1;
841      }
842    
843      /**
844       * Returns the number of matching characters found in a character sequence.
845       */
846      public int countIn(CharSequence sequence) {
847        int count = 0;
848        for (int i = 0; i < sequence.length(); i++) {
849          if (matches(sequence.charAt(i))) {
850            count++;
851          }
852        }
853        return count;
854      }
855    
856      /**
857       * Returns a string containing all non-matching characters of a character sequence, in order. For
858       * example: <pre>   {@code
859       *
860       *   CharMatcher.is('a').removeFrom("bazaar")}</pre>
861       *
862       * ... returns {@code "bzr"}.
863       */
864      @CheckReturnValue
865      public String removeFrom(CharSequence sequence) {
866        String string = sequence.toString();
867        int pos = indexIn(string);
868        if (pos == -1) {
869          return string;
870        }
871    
872        char[] chars = string.toCharArray();
873        int spread = 1;
874    
875        // This unusual loop comes from extensive benchmarking
876        OUT: while (true) {
877          pos++;
878          while (true) {
879            if (pos == chars.length) {
880              break OUT;
881            }
882            if (matches(chars[pos])) {
883              break;
884            }
885            chars[pos - spread] = chars[pos];
886            pos++;
887          }
888          spread++;
889        }
890        return new String(chars, 0, pos - spread);
891      }
892    
893      /**
894       * Returns a string containing all matching characters of a character sequence, in order. For
895       * example: <pre>   {@code
896       *
897       *   CharMatcher.is('a').retainFrom("bazaar")}</pre>
898       *
899       * ... returns {@code "aaa"}.
900       */
901      @CheckReturnValue
902      public String retainFrom(CharSequence sequence) {
903        return negate().removeFrom(sequence);
904      }
905    
906      /**
907       * Returns a string copy of the input character sequence, with each character that matches this
908       * matcher replaced by a given replacement character. For example: <pre>   {@code
909       *
910       *   CharMatcher.is('a').replaceFrom("radar", 'o')}</pre>
911       *
912       * ... returns {@code "rodor"}.
913       *
914       * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
915       * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
916       * character.
917       *
918       * @param sequence the character sequence to replace matching characters in
919       * @param replacement the character to append to the result string in place of each matching
920       *        character in {@code sequence}
921       * @return the new string
922       */
923      @CheckReturnValue
924      public String replaceFrom(CharSequence sequence, char replacement) {
925        String string = sequence.toString();
926        int pos = indexIn(string);
927        if (pos == -1) {
928          return string;
929        }
930        char[] chars = string.toCharArray();
931        chars[pos] = replacement;
932        for (int i = pos + 1; i < chars.length; i++) {
933          if (matches(chars[i])) {
934            chars[i] = replacement;
935          }
936        }
937        return new String(chars);
938      }
939    
940      /**
941       * Returns a string copy of the input character sequence, with each character that matches this
942       * matcher replaced by a given replacement sequence. For example: <pre>   {@code
943       *
944       *   CharMatcher.is('a').replaceFrom("yaha", "oo")}</pre>
945       *
946       * ... returns {@code "yoohoo"}.
947       *
948       * <p><b>Note:</b> If the replacement is a fixed string with only one character, you are better
949       * off calling {@link #replaceFrom(CharSequence, char)} directly.
950       *
951       * @param sequence the character sequence to replace matching characters in
952       * @param replacement the characters to append to the result string in place of each matching
953       *        character in {@code sequence}
954       * @return the new string
955       */
956      @CheckReturnValue
957      public String replaceFrom(CharSequence sequence, CharSequence replacement) {
958        int replacementLen = replacement.length();
959        if (replacementLen == 0) {
960          return removeFrom(sequence);
961        }
962        if (replacementLen == 1) {
963          return replaceFrom(sequence, replacement.charAt(0));
964        }
965    
966        String string = sequence.toString();
967        int pos = indexIn(string);
968        if (pos == -1) {
969          return string;
970        }
971    
972        int len = string.length();
973        StringBuilder buf = new StringBuilder((len * 3 / 2) + 16);
974    
975        int oldpos = 0;
976        do {
977          buf.append(string, oldpos, pos);
978          buf.append(replacement);
979          oldpos = pos + 1;
980          pos = indexIn(string, oldpos);
981        } while (pos != -1);
982    
983        buf.append(string, oldpos, len);
984        return buf.toString();
985      }
986    
987      /**
988       * Returns a substring of the input character sequence that omits all characters this matcher
989       * matches from the beginning and from the end of the string. For example: <pre>   {@code
990       *
991       *   CharMatcher.anyOf("ab").trimFrom("abacatbab")}</pre>
992       *
993       * ... returns {@code "cat"}.
994       *
995       * <p>Note that: <pre>   {@code
996       *
997       *   CharMatcher.inRange('\0', ' ').trimFrom(str)}</pre>
998       *
999       * ... is equivalent to {@link String#trim()}.
1000       */
1001      @CheckReturnValue
1002      public String trimFrom(CharSequence sequence) {
1003        int len = sequence.length();
1004        int first;
1005        int last;
1006    
1007        for (first = 0; first < len; first++) {
1008          if (!matches(sequence.charAt(first))) {
1009            break;
1010          }
1011        }
1012        for (last = len - 1; last > first; last--) {
1013          if (!matches(sequence.charAt(last))) {
1014            break;
1015          }
1016        }
1017    
1018        return sequence.subSequence(first, last + 1).toString();
1019      }
1020    
1021      /**
1022       * Returns a substring of the input character sequence that omits all characters this matcher
1023       * matches from the beginning of the string. For example: <pre> {@code
1024       *
1025       *   CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")}</pre>
1026       *
1027       * ... returns {@code "catbab"}.
1028       */
1029      @CheckReturnValue
1030      public String trimLeadingFrom(CharSequence sequence) {
1031        int len = sequence.length();
1032        int first;
1033    
1034        for (first = 0; first < len; first++) {
1035          if (!matches(sequence.charAt(first))) {
1036            break;
1037          }
1038        }
1039    
1040        return sequence.subSequence(first, len).toString();
1041      }
1042    
1043      /**
1044       * Returns a substring of the input character sequence that omits all characters this matcher
1045       * matches from the end of the string. For example: <pre> {@code
1046       *
1047       *   CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")}</pre>
1048       *
1049       * ... returns {@code "abacat"}.
1050       */
1051      @CheckReturnValue
1052      public String trimTrailingFrom(CharSequence sequence) {
1053        int len = sequence.length();
1054        int last;
1055    
1056        for (last = len - 1; last >= 0; last--) {
1057          if (!matches(sequence.charAt(last))) {
1058            break;
1059          }
1060        }
1061    
1062        return sequence.subSequence(0, last + 1).toString();
1063      }
1064    
1065      /**
1066       * Returns a string copy of the input character sequence, with each group of consecutive
1067       * characters that match this matcher replaced by a single replacement character. For example:
1068       * <pre>   {@code
1069       *
1070       *   CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')}</pre>
1071       *
1072       * ... returns {@code "b-p-r"}.
1073       *
1074       * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
1075       * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
1076       * character.
1077       *
1078       * @param sequence the character sequence to replace matching groups of characters in
1079       * @param replacement the character to append to the result string in place of each group of
1080       *        matching characters in {@code sequence}
1081       * @return the new string
1082       */
1083      @CheckReturnValue
1084      public String collapseFrom(CharSequence sequence, char replacement) {
1085        int first = indexIn(sequence);
1086        if (first == -1) {
1087          return sequence.toString();
1088        }
1089    
1090        // TODO(kevinb): see if this implementation can be made faster
1091        StringBuilder builder = new StringBuilder(sequence.length())
1092            .append(sequence.subSequence(0, first))
1093            .append(replacement);
1094        boolean in = true;
1095        for (int i = first + 1; i < sequence.length(); i++) {
1096          char c = sequence.charAt(i);
1097          if (apply(c)) {
1098            if (!in) {
1099              builder.append(replacement);
1100              in = true;
1101            }
1102          } else {
1103            builder.append(c);
1104            in = false;
1105          }
1106        }
1107        return builder.toString();
1108      }
1109    
1110      /**
1111       * Collapses groups of matching characters exactly as {@link #collapseFrom} does, except that
1112       * groups of matching characters at the start or end of the sequence are removed without
1113       * replacement.
1114       */
1115      @CheckReturnValue
1116      public String trimAndCollapseFrom(CharSequence sequence, char replacement) {
1117        int first = negate().indexIn(sequence);
1118        if (first == -1) {
1119          return ""; // everything matches. nothing's left.
1120        }
1121        StringBuilder builder = new StringBuilder(sequence.length());
1122        boolean inMatchingGroup = false;
1123        for (int i = first; i < sequence.length(); i++) {
1124          char c = sequence.charAt(i);
1125          if (apply(c)) {
1126            inMatchingGroup = true;
1127          } else {
1128            if (inMatchingGroup) {
1129              builder.append(replacement);
1130              inMatchingGroup = false;
1131            }
1132            builder.append(c);
1133          }
1134        }
1135        return builder.toString();
1136      }
1137    
1138      // Predicate interface
1139    
1140      /**
1141       * Returns {@code true} if this matcher matches the given character.
1142       *
1143       * @throws NullPointerException if {@code character} is null
1144       */
1145      @Override public boolean apply(Character character) {
1146        return matches(character);
1147      }
1148    }