001 /*
002 * Copyright (C) 2010 Google Inc.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015 package com.google.common.base;
016
017 import com.google.common.annotations.Beta;
018 import com.google.common.annotations.GwtCompatible;
019
020 /**
021 * Static methods pertaining to ASCII characters (those in the range of values {@code 0x00} through
022 * {@code 0x7F}), and to strings containing such characters.
023 *
024 * @author Craig Berry
025 * @author Gregory Kick
026 * @since 7
027 */
028 @Beta
029 @GwtCompatible
030 public final class Ascii {
031
032 private Ascii() {}
033
034 /* The ASCII control characters, per RFC 20. */
035 /**
036 * Null ('\0'): The all-zeros character which may serve to accomplish
037 * time fill and media fill. Normally used as a C string terminator.
038 * <p>Although RFC 20 names this as "Null", note that it is distinct
039 * from the C/C++ "NULL" pointer.
040 *
041 * @since 8
042 */
043 public static final byte NUL = 0;
044
045 /**
046 * Start of Heading: A communication control character used at
047 * the beginning of a sequence of characters which constitute a
048 * machine-sensible address or routing information. Such a sequence is
049 * referred to as the "heading." An STX character has the effect of
050 * terminating a heading.
051 *
052 * @since 8
053 */
054 public static final byte SOH = 1;
055
056 /**
057 * Start of Text: A communication control character which
058 * precedes a sequence of characters that is to be treated as an entity
059 * and entirely transmitted through to the ultimate destination. Such a
060 * sequence is referred to as "text." STX may be used to terminate a
061 * sequence of characters started by SOH.
062 *
063 * @since 8
064 */
065 public static final byte STX = 2;
066
067 /**
068 * End of Text: A communication control character used to
069 * terminate a sequence of characters started with STX and transmitted
070 * as an entity.
071 *
072 * @since 8
073 */
074 public static final byte ETX = 3;
075
076 /**
077 * End of Transmission: A communication control character used
078 * to indicate the conclusion of a transmission, which may have
079 * contained one or more texts and any associated headings.
080 *
081 * @since 8
082 */
083 public static final byte EOT = 4;
084
085 /**
086 * Enquiry: A communication control character used in data
087 * communication systems as a request for a response from a remote
088 * station. It may be used as a "Who Are You" (WRU) to obtain
089 * identification, or may be used to obtain station status, or both.
090 *
091 * @since 8
092 */
093 public static final byte ENQ = 5;
094
095 /**
096 * Acknowledge: A communication control character transmitted
097 * by a receiver as an affirmative response to a sender.
098 *
099 * @since 8
100 */
101 public static final byte ACK = 6;
102
103 /**
104 * Bell ('\a'): A character for use when there is a need to call for
105 * human attention. It may control alarm or attention devices.
106 *
107 * @since 8
108 */
109 public static final byte BEL = 7;
110
111 /**
112 * Backspace ('\b'): A format effector which controls the movement of
113 * the printing position one printing space backward on the same
114 * printing line. (Applicable also to display devices.)
115 *
116 * @since 8
117 */
118 public static final byte BS = 8;
119
120 /**
121 * Horizontal Tabulation ('\t'): A format effector which controls the
122 * movement of the printing position to the next in a series of
123 * predetermined positions along the printing line. (Applicable also to
124 * display devices and the skip function on punched cards.)
125 *
126 * @since 8
127 */
128 public static final byte HT = 9;
129
130 /**
131 * Line Feed ('\n'): A format effector which controls the movement of
132 * the printing position to the next printing line. (Applicable also to
133 * display devices.) Where appropriate, this character may have the
134 * meaning "New Line" (NL), a format effector which controls the
135 * movement of the printing point to the first printing position on the
136 * next printing line. Use of this convention requires agreement
137 * between sender and recipient of data.
138 *
139 * @since 8
140 */
141 public static final byte LF = 10;
142
143 /**
144 * Alternate name for {@link #LF}. ({@code LF} is preferred.)
145 *
146 * @since 8
147 */
148 public static final byte NL = 10;
149
150 /**
151 * Vertical Tabulation ('\v'): A format effector which controls the
152 * movement of the printing position to the next in a series of
153 * predetermined printing lines. (Applicable also to display devices.)
154 *
155 * @since 8
156 */
157 public static final byte VT = 11;
158
159 /**
160 * Form Feed ('\f'): A format effector which controls the movement of
161 * the printing position to the first pre-determined printing line on
162 * the next form or page. (Applicable also to display devices.)
163 *
164 * @since 8
165 */
166 public static final byte FF = 12;
167
168 /**
169 * Carriage Return ('\r'): A format effector which controls the
170 * movement of the printing position to the first printing position on
171 * the same printing line. (Applicable also to display devices.)
172 *
173 * @since 8
174 */
175 public static final byte CR = 13;
176
177 /**
178 * Shift Out: A control character indicating that the code
179 * combinations which follow shall be interpreted as outside of the
180 * character set of the standard code table until a Shift In character
181 * is reached.
182 *
183 * @since 8
184 */
185 public static final byte SO = 14;
186
187 /**
188 * Shift In: A control character indicating that the code
189 * combinations which follow shall be interpreted according to the
190 * standard code table.
191 *
192 * @since 8
193 */
194 public static final byte SI = 15;
195
196 /**
197 * Data Link Escape: A communication control character which
198 * will change the meaning of a limited number of contiguously following
199 * characters. It is used exclusively to provide supplementary controls
200 * in data communication networks.
201 *
202 * @since 8
203 */
204 public static final byte DLE = 16;
205
206 /**
207 * Device Controls: Characters for the control
208 * of ancillary devices associated with data processing or
209 * telecommunication systems, more especially switching devices "on" or
210 * "off." (If a single "stop" control is required to interrupt or turn
211 * off ancillary devices, DC4 is the preferred assignment.)
212 *
213 * @since 8
214 */
215 public static final byte DC1 = 17; // aka XON
216
217 /**
218 * Transmission on/off: Although originally defined as DC1, this ASCII
219 * control character is now better known as the XON code used for software
220 * flow control in serial communications. The main use is restarting
221 * the transmission after the communication has been stopped by the XOFF
222 * control code.
223 *
224 * @since 8
225 */
226 public static final byte XON = 17; // aka DC1
227
228 /**
229 * @see #DC1
230 *
231 * @since 8
232 */
233 public static final byte DC2 = 18;
234
235 /**
236 * @see #DC1
237 *
238 * @since 8
239 */
240 public static final byte DC3 = 19; // aka XOFF
241
242 /**
243 * Transmission off. @see #XON
244 *
245 * @since 8
246 */
247 public static final byte XOFF = 19; // aka DC3
248
249 /**
250 * @see #DC1
251 *
252 * @since 8
253 */
254 public static final byte DC4 = 20;
255
256 /**
257 * Negative Acknowledge: A communication control character
258 * transmitted by a receiver as a negative response to the sender.
259 *
260 * @since 8
261 */
262 public static final byte NAK = 21;
263
264 /**
265 * Synchronous Idle: A communication control character used by
266 * a synchronous transmission system in the absence of any other
267 * character to provide a signal from which synchronism may be achieved
268 * or retained.
269 *
270 * @since 8
271 */
272 public static final byte SYN = 22;
273
274 /**
275 * End of Transmission Block: A communication control character
276 * used to indicate the end of a block of data for communication
277 * purposes. ETB is used for blocking data where the block structure is
278 * not necessarily related to the processing format.
279 *
280 * @since 8
281 */
282 public static final byte ETB = 23;
283
284 /**
285 * Cancel: A control character used to indicate that the data
286 * with which it is sent is in error or is to be disregarded.
287 *
288 * @since 8
289 */
290 public static final byte CAN = 24;
291
292 /**
293 * End of Medium: A control character associated with the sent
294 * data which may be used to identify the physical end of the medium, or
295 * the end of the used, or wanted, portion of information recorded on a
296 * medium. (The position of this character does not necessarily
297 * correspond to the physical end of the medium.)
298 *
299 * @since 8
300 */
301 public static final byte EM = 25;
302
303 /**
304 * Substitute: A character that may be substituted for a
305 * character which is determined to be invalid or in error.
306 *
307 * @since 8
308 */
309 public static final byte SUB = 26;
310
311 /**
312 * Escape: A control character intended to provide code
313 * extension (supplementary characters) in general information
314 * interchange. The Escape character itself is a prefix affecting the
315 * interpretation of a limited number of contiguously following
316 * characters.
317 *
318 * @since 8
319 */
320 public static final byte ESC = 27;
321
322 /**
323 * File/Group/Record/Unit Separator: These information separators may be
324 * used within data in optional fashion, except that their hierarchical
325 * relationship shall be: FS is the most inclusive, then GS, then RS,
326 * and US is least inclusive. (The content and length of a File, Group,
327 * Record, or Unit are not specified.)
328 *
329 * @since 8
330 */
331 public static final byte FS = 28;
332
333 /**
334 * @see #FS
335 *
336 * @since 8
337 */
338 public static final byte GS = 29;
339
340 /**
341 * @see #FS
342 *
343 * @since 8
344 */
345 public static final byte RS = 30;
346
347 /**
348 * @see #FS
349 *
350 * @since 8
351 */
352 public static final byte US = 31;
353
354 /**
355 * Space: A normally non-printing graphic character used to
356 * separate words. It is also a format effector which controls the
357 * movement of the printing position, one printing position forward.
358 * (Applicable also to display devices.)
359 *
360 * @since 8
361 */
362 public static final byte SP = 32;
363
364 /**
365 * Alternate name for {@link #SP}.
366 *
367 * @since 8
368 */
369 public static final byte SPACE = 32;
370
371 /**
372 * Delete: This character is used primarily to "erase" or
373 * "obliterate" erroneous or unwanted characters in perforated tape.
374 *
375 * @since 8
376 */
377 public static final byte DEL = 127;
378
379 /**
380 * Returns a copy of the input string in which all {@linkplain #isUpperCase(char) uppercase ASCII
381 * characters} have been converted to lowercase. All other characters are copied without
382 * modification.
383 */
384 public static String toLowerCase(String string) {
385 int length = string.length();
386 StringBuilder builder = new StringBuilder(length);
387 for (int i = 0; i < length; i++) {
388 builder.append(toLowerCase(string.charAt(i)));
389 }
390 return builder.toString();
391 }
392
393 /**
394 * If the argument is an {@linkplain #isUpperCase(char) uppercase ASCII character} returns the
395 * lowercase equivalent. Otherwise returns the argument.
396 */
397 public static char toLowerCase(char c) {
398 return isUpperCase(c) ? (char) (c ^ 0x20) : c;
399 }
400
401 /**
402 * Returns a copy of the input string in which all {@linkplain #isLowerCase(char) lowercase ASCII
403 * characters} have been converted to uppercase. All other characters are copied without
404 * modification.
405 */
406 public static String toUpperCase(String string) {
407 int length = string.length();
408 StringBuilder builder = new StringBuilder(length);
409 for (int i = 0; i < length; i++) {
410 builder.append(toUpperCase(string.charAt(i)));
411 }
412 return builder.toString();
413 }
414
415 /**
416 * If the argument is a {@linkplain #isLowerCase(char) lowercase ASCII character} returns the
417 * uppercase equivalent. Otherwise returns the argument.
418 */
419 public static char toUpperCase(char c) {
420 return isLowerCase(c) ? (char) (c & 0x5f) : c;
421 }
422
423 /**
424 * Indicates whether {@code c} is one of the twenty-six lowercase ASCII alphabetic characters
425 * between {@code 'a'} and {@code 'z'} inclusive. All others (including non-ASCII characters)
426 * return {@code false}.
427 */
428 public static boolean isLowerCase(char c) {
429 return (c >= 'a') && (c <= 'z');
430 }
431
432 /**
433 * Indicates whether {@code c} is one of the twenty-six uppercase ASCII alphabetic characters
434 * between {@code 'A'} and {@code 'Z'} inclusive. All others (including non-ASCII characters)
435 * return {@code false}.
436 */
437 public static boolean isUpperCase(char c) {
438 return (c >= 'A') && (c <= 'Z');
439 }
440 }