001    /*
002     * Copyright (C) 2010 Google Inc.
003     *
004     * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005     * in compliance with the License. You may obtain a copy of the License at
006     *
007     * http://www.apache.org/licenses/LICENSE-2.0
008     *
009     * Unless required by applicable law or agreed to in writing, software distributed under the License
010     * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011     * or implied. See the License for the specific language governing permissions and limitations under
012     * the License.
013     */
014    
015    package com.google.common.base;
016    
017    import com.google.common.annotations.Beta;
018    import com.google.common.annotations.GwtCompatible;
019    
020    /**
021     * Static methods pertaining to ASCII characters (those in the range of values {@code 0x00} through
022     * {@code 0x7F}), and to strings containing such characters.
023     *
024     * @author Craig Berry
025     * @author Gregory Kick
026     * @since 7
027     */
028    @Beta
029    @GwtCompatible
030    public final class Ascii {
031    
032      private Ascii() {}
033    
034      /* The ASCII control characters, per RFC 20. */
035      /**
036       * Null ('\0'): The all-zeros character which may serve to accomplish
037       * time fill and media fill.  Normally used as a C string terminator.
038       * <p>Although RFC 20 names this as "Null", note that it is distinct
039       * from the C/C++ "NULL" pointer.
040       *
041       * @since 8
042       */
043      public static final byte NUL = 0;
044    
045      /**
046       * Start of Heading: A communication control character used at
047       * the beginning of a sequence of characters which constitute a
048       * machine-sensible address or routing information.  Such a sequence is
049       * referred to as the "heading."  An STX character has the effect of
050       * terminating a heading.
051       *
052       * @since 8
053       */
054      public static final byte SOH = 1;
055    
056      /**
057       * Start of Text: A communication control character which
058       * precedes a sequence of characters that is to be treated as an entity
059       * and entirely transmitted through to the ultimate destination.  Such a
060       * sequence is referred to as "text."  STX may be used to terminate a
061       * sequence of characters started by SOH.
062       *
063       * @since 8
064       */
065      public static final byte STX = 2;
066    
067      /**
068       * End of Text: A communication control character used to
069       * terminate a sequence of characters started with STX and transmitted
070       * as an entity.
071       *
072       * @since 8
073       */
074      public static final byte ETX = 3;
075    
076      /**
077       * End of Transmission: A communication control character used
078       * to indicate the conclusion of a transmission, which may have
079       * contained one or more texts and any associated headings.
080       *
081       * @since 8
082       */
083      public static final byte EOT = 4;
084    
085      /**
086       * Enquiry: A communication control character used in data
087       * communication systems as a request for a response from a remote
088       * station.  It may be used as a "Who Are You" (WRU) to obtain
089       * identification, or may be used to obtain station status, or both.
090       *
091       * @since 8
092       */
093      public static final byte ENQ = 5;
094    
095      /**
096       * Acknowledge: A communication control character transmitted
097       * by a receiver as an affirmative response to a sender.
098       *
099       * @since 8
100       */
101      public static final byte ACK = 6;
102    
103      /**
104       * Bell ('\a'): A character for use when there is a need to call for
105       * human attention.  It may control alarm or attention devices.
106       *
107       * @since 8
108       */
109      public static final byte BEL = 7;
110    
111      /**
112       * Backspace ('\b'): A format effector which controls the movement of
113       * the printing position one printing space backward on the same
114       * printing line.  (Applicable also to display devices.)
115       *
116       * @since 8
117       */
118      public static final byte BS = 8;
119    
120      /**
121       * Horizontal Tabulation ('\t'): A format effector which controls the
122       * movement of the printing position to the next in a series of
123       * predetermined positions along the printing line.  (Applicable also to
124       * display devices and the skip function on punched cards.)
125       *
126       * @since 8
127       */
128      public static final byte HT = 9;
129    
130      /**
131       * Line Feed ('\n'): A format effector which controls the movement of
132       * the printing position to the next printing line.  (Applicable also to
133       * display devices.) Where appropriate, this character may have the
134       * meaning "New Line" (NL), a format effector which controls the
135       * movement of the printing point to the first printing position on the
136       * next printing line.  Use of this convention requires agreement
137       * between sender and recipient of data.
138       *
139       * @since 8
140       */
141      public static final byte LF = 10;
142    
143      /**
144       * Alternate name for {@link #LF}.  ({@code LF} is preferred.)
145       *
146       * @since 8
147       */
148      public static final byte NL = 10;
149    
150      /**
151       * Vertical Tabulation ('\v'): A format effector which controls the
152       * movement of the printing position to the next in a series of
153       * predetermined printing lines.  (Applicable also to display devices.)
154       *
155       * @since 8
156       */
157      public static final byte VT = 11;
158    
159      /**
160       * Form Feed ('\f'): A format effector which controls the movement of
161       * the printing position to the first pre-determined printing line on
162       * the next form or page.  (Applicable also to display devices.)
163       *
164       * @since 8
165       */
166      public static final byte FF = 12;
167    
168      /**
169       * Carriage Return ('\r'): A format effector which controls the
170       * movement of the printing position to the first printing position on
171       * the same printing line.  (Applicable also to display devices.)
172       *
173       * @since 8
174       */
175      public static final byte CR = 13;
176    
177      /**
178       * Shift Out: A control character indicating that the code
179       * combinations which follow shall be interpreted as outside of the
180       * character set of the standard code table until a Shift In character
181       * is reached.
182       *
183       * @since 8
184       */
185      public static final byte SO = 14;
186    
187      /**
188       * Shift In: A control character indicating that the code
189       * combinations which follow shall be interpreted according to the
190       * standard code table.
191       *
192       * @since 8
193       */
194      public static final byte SI = 15;
195    
196      /**
197       * Data Link Escape: A communication control character which
198       * will change the meaning of a limited number of contiguously following
199       * characters.  It is used exclusively to provide supplementary controls
200       * in data communication networks.
201       *
202       * @since 8
203       */
204      public static final byte DLE = 16;
205    
206      /**
207       * Device Controls: Characters for the control
208       * of ancillary devices associated with data processing or
209       * telecommunication systems, more especially switching devices "on" or
210       * "off."  (If a single "stop" control is required to interrupt or turn
211       * off ancillary devices, DC4 is the preferred assignment.)
212       *
213       * @since 8
214       */
215      public static final byte DC1 = 17; // aka XON
216    
217      /**
218       * Transmission on/off: Although originally defined as DC1, this ASCII
219       * control character is now better known as the XON code used for software
220       * flow control in serial communications.  The main use is restarting
221       * the transmission after the communication has been stopped by the XOFF
222       * control code.
223       *
224       * @since 8
225       */
226      public static final byte XON = 17; // aka DC1
227    
228      /**
229       * @see #DC1
230       *
231       * @since 8
232       */
233      public static final byte DC2 = 18;
234    
235      /**
236       * @see #DC1
237       *
238       * @since 8
239       */
240      public static final byte DC3 = 19; // aka XOFF
241    
242      /**
243       * Transmission off. @see #XON
244       *
245       * @since 8
246       */
247      public static final byte XOFF = 19; // aka DC3
248    
249      /**
250       * @see #DC1
251       *
252       * @since 8
253       */
254      public static final byte DC4 = 20;
255    
256      /**
257       * Negative Acknowledge: A communication control character
258       * transmitted by a receiver as a negative response to the sender.
259       *
260       * @since 8
261       */
262      public static final byte NAK = 21;
263    
264      /**
265       * Synchronous Idle: A communication control character used by
266       * a synchronous transmission system in the absence of any other
267       * character to provide a signal from which synchronism may be achieved
268       * or retained.
269       *
270       * @since 8
271       */
272      public static final byte SYN = 22;
273    
274      /**
275       * End of Transmission Block: A communication control character
276       * used to indicate the end of a block of data for communication
277       * purposes.  ETB is used for blocking data where the block structure is
278       * not necessarily related to the processing format.
279       *
280       * @since 8
281       */
282      public static final byte ETB = 23;
283    
284      /**
285       * Cancel: A control character used to indicate that the data
286       * with which it is sent is in error or is to be disregarded.
287       *
288       * @since 8
289       */
290      public static final byte CAN = 24;
291    
292      /**
293       * End of Medium: A control character associated with the sent
294       * data which may be used to identify the physical end of the medium, or
295       * the end of the used, or wanted, portion of information recorded on a
296       * medium.  (The position of this character does not necessarily
297       * correspond to the physical end of the medium.)
298       *
299       * @since 8
300       */
301      public static final byte EM = 25;
302    
303      /**
304       * Substitute: A character that may be substituted for a
305       * character which is determined to be invalid or in error.
306       *
307       * @since 8
308       */
309      public static final byte SUB = 26;
310    
311      /**
312       * Escape: A control character intended to provide code
313       * extension (supplementary characters) in general information
314       * interchange.  The Escape character itself is a prefix affecting the
315       * interpretation of a limited number of contiguously following
316       * characters.
317       *
318       * @since 8
319       */
320      public static final byte ESC = 27;
321    
322      /**
323       * File/Group/Record/Unit Separator: These information separators may be
324       * used within data in optional fashion, except that their hierarchical
325       * relationship shall be: FS is the most inclusive, then GS, then RS,
326       * and US is least inclusive.  (The content and length of a File, Group,
327       * Record, or Unit are not specified.)
328       *
329       * @since 8
330       */
331      public static final byte FS = 28;
332    
333      /**
334       * @see #FS
335       *
336       * @since 8
337       */
338      public static final byte GS = 29;
339    
340      /**
341       * @see #FS
342       *
343       * @since 8
344       */
345      public static final byte RS = 30;
346    
347      /**
348       * @see #FS
349       *
350       * @since 8
351       */
352      public static final byte US = 31;
353    
354      /**
355       * Space: A normally non-printing graphic character used to
356       * separate words.  It is also a format effector which controls the
357       * movement of the printing position, one printing position forward.
358       * (Applicable also to display devices.)
359       *
360       * @since 8
361       */
362      public static final byte SP = 32;
363    
364      /**
365       * Alternate name for {@link #SP}.
366       *
367       * @since 8
368       */
369      public static final byte SPACE = 32;
370    
371      /**
372       * Delete: This character is used primarily to "erase" or
373       * "obliterate" erroneous or unwanted characters in perforated tape.
374       *
375       * @since 8
376       */
377      public static final byte DEL = 127;
378    
379      /**
380       * Returns a copy of the input string in which all {@linkplain #isUpperCase(char) uppercase ASCII
381       * characters} have been converted to lowercase. All other characters are copied without
382       * modification.
383       */
384      public static String toLowerCase(String string) {
385        int length = string.length();
386        StringBuilder builder = new StringBuilder(length);
387        for (int i = 0; i < length; i++) {
388          builder.append(toLowerCase(string.charAt(i)));
389        }
390        return builder.toString();
391      }
392    
393      /**
394       * If the argument is an {@linkplain #isUpperCase(char) uppercase ASCII character} returns the
395       * lowercase equivalent. Otherwise returns the argument.
396       */
397      public static char toLowerCase(char c) {
398        return isUpperCase(c) ? (char) (c ^ 0x20) : c;
399      }
400    
401      /**
402       * Returns a copy of the input string in which all {@linkplain #isLowerCase(char) lowercase ASCII
403       * characters} have been converted to uppercase. All other characters are copied without
404       * modification.
405       */
406      public static String toUpperCase(String string) {
407        int length = string.length();
408        StringBuilder builder = new StringBuilder(length);
409        for (int i = 0; i < length; i++) {
410          builder.append(toUpperCase(string.charAt(i)));
411        }
412        return builder.toString();
413      }
414    
415      /**
416       * If the argument is a {@linkplain #isLowerCase(char) lowercase ASCII character} returns the
417       * uppercase equivalent. Otherwise returns the argument.
418       */
419      public static char toUpperCase(char c) {
420        return isLowerCase(c) ? (char) (c & 0x5f) : c;
421      }
422    
423      /**
424       * Indicates whether {@code c} is one of the twenty-six lowercase ASCII alphabetic characters
425       * between {@code 'a'} and {@code 'z'} inclusive. All others (including non-ASCII characters)
426       * return {@code false}.
427       */
428      public static boolean isLowerCase(char c) {
429        return (c >= 'a') && (c <= 'z');
430      }
431    
432      /**
433       * Indicates whether {@code c} is one of the twenty-six uppercase ASCII alphabetic characters
434       * between {@code 'A'} and {@code 'Z'} inclusive. All others (including non-ASCII characters)
435       * return {@code false}.
436       */
437      public static boolean isUpperCase(char c) {
438        return (c >= 'A') && (c <= 'Z');
439      }
440    }