001 /* 002 * Copyright (C) 2010 Google Inc. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015 package com.google.common.base; 016 017 import com.google.common.annotations.Beta; 018 import com.google.common.annotations.GwtCompatible; 019 020 /** 021 * Static methods pertaining to ASCII characters (those in the range of values {@code 0x00} through 022 * {@code 0x7F}), and to strings containing such characters. 023 * 024 * @author Craig Berry 025 * @author Gregory Kick 026 * @since 7 027 */ 028 @Beta 029 @GwtCompatible 030 public final class Ascii { 031 032 private Ascii() {} 033 034 /* The ASCII control characters, per RFC 20. */ 035 /** 036 * Null ('\0'): The all-zeros character which may serve to accomplish 037 * time fill and media fill. Normally used as a C string terminator. 038 * <p>Although RFC 20 names this as "Null", note that it is distinct 039 * from the C/C++ "NULL" pointer. 040 * 041 * @since 8 042 */ 043 public static final byte NUL = 0; 044 045 /** 046 * Start of Heading: A communication control character used at 047 * the beginning of a sequence of characters which constitute a 048 * machine-sensible address or routing information. Such a sequence is 049 * referred to as the "heading." An STX character has the effect of 050 * terminating a heading. 051 * 052 * @since 8 053 */ 054 public static final byte SOH = 1; 055 056 /** 057 * Start of Text: A communication control character which 058 * precedes a sequence of characters that is to be treated as an entity 059 * and entirely transmitted through to the ultimate destination. Such a 060 * sequence is referred to as "text." STX may be used to terminate a 061 * sequence of characters started by SOH. 062 * 063 * @since 8 064 */ 065 public static final byte STX = 2; 066 067 /** 068 * End of Text: A communication control character used to 069 * terminate a sequence of characters started with STX and transmitted 070 * as an entity. 071 * 072 * @since 8 073 */ 074 public static final byte ETX = 3; 075 076 /** 077 * End of Transmission: A communication control character used 078 * to indicate the conclusion of a transmission, which may have 079 * contained one or more texts and any associated headings. 080 * 081 * @since 8 082 */ 083 public static final byte EOT = 4; 084 085 /** 086 * Enquiry: A communication control character used in data 087 * communication systems as a request for a response from a remote 088 * station. It may be used as a "Who Are You" (WRU) to obtain 089 * identification, or may be used to obtain station status, or both. 090 * 091 * @since 8 092 */ 093 public static final byte ENQ = 5; 094 095 /** 096 * Acknowledge: A communication control character transmitted 097 * by a receiver as an affirmative response to a sender. 098 * 099 * @since 8 100 */ 101 public static final byte ACK = 6; 102 103 /** 104 * Bell ('\a'): A character for use when there is a need to call for 105 * human attention. It may control alarm or attention devices. 106 * 107 * @since 8 108 */ 109 public static final byte BEL = 7; 110 111 /** 112 * Backspace ('\b'): A format effector which controls the movement of 113 * the printing position one printing space backward on the same 114 * printing line. (Applicable also to display devices.) 115 * 116 * @since 8 117 */ 118 public static final byte BS = 8; 119 120 /** 121 * Horizontal Tabulation ('\t'): A format effector which controls the 122 * movement of the printing position to the next in a series of 123 * predetermined positions along the printing line. (Applicable also to 124 * display devices and the skip function on punched cards.) 125 * 126 * @since 8 127 */ 128 public static final byte HT = 9; 129 130 /** 131 * Line Feed ('\n'): A format effector which controls the movement of 132 * the printing position to the next printing line. (Applicable also to 133 * display devices.) Where appropriate, this character may have the 134 * meaning "New Line" (NL), a format effector which controls the 135 * movement of the printing point to the first printing position on the 136 * next printing line. Use of this convention requires agreement 137 * between sender and recipient of data. 138 * 139 * @since 8 140 */ 141 public static final byte LF = 10; 142 143 /** 144 * Alternate name for {@link #LF}. ({@code LF} is preferred.) 145 * 146 * @since 8 147 */ 148 public static final byte NL = 10; 149 150 /** 151 * Vertical Tabulation ('\v'): A format effector which controls the 152 * movement of the printing position to the next in a series of 153 * predetermined printing lines. (Applicable also to display devices.) 154 * 155 * @since 8 156 */ 157 public static final byte VT = 11; 158 159 /** 160 * Form Feed ('\f'): A format effector which controls the movement of 161 * the printing position to the first pre-determined printing line on 162 * the next form or page. (Applicable also to display devices.) 163 * 164 * @since 8 165 */ 166 public static final byte FF = 12; 167 168 /** 169 * Carriage Return ('\r'): A format effector which controls the 170 * movement of the printing position to the first printing position on 171 * the same printing line. (Applicable also to display devices.) 172 * 173 * @since 8 174 */ 175 public static final byte CR = 13; 176 177 /** 178 * Shift Out: A control character indicating that the code 179 * combinations which follow shall be interpreted as outside of the 180 * character set of the standard code table until a Shift In character 181 * is reached. 182 * 183 * @since 8 184 */ 185 public static final byte SO = 14; 186 187 /** 188 * Shift In: A control character indicating that the code 189 * combinations which follow shall be interpreted according to the 190 * standard code table. 191 * 192 * @since 8 193 */ 194 public static final byte SI = 15; 195 196 /** 197 * Data Link Escape: A communication control character which 198 * will change the meaning of a limited number of contiguously following 199 * characters. It is used exclusively to provide supplementary controls 200 * in data communication networks. 201 * 202 * @since 8 203 */ 204 public static final byte DLE = 16; 205 206 /** 207 * Device Controls: Characters for the control 208 * of ancillary devices associated with data processing or 209 * telecommunication systems, more especially switching devices "on" or 210 * "off." (If a single "stop" control is required to interrupt or turn 211 * off ancillary devices, DC4 is the preferred assignment.) 212 * 213 * @since 8 214 */ 215 public static final byte DC1 = 17; // aka XON 216 217 /** 218 * Transmission on/off: Although originally defined as DC1, this ASCII 219 * control character is now better known as the XON code used for software 220 * flow control in serial communications. The main use is restarting 221 * the transmission after the communication has been stopped by the XOFF 222 * control code. 223 * 224 * @since 8 225 */ 226 public static final byte XON = 17; // aka DC1 227 228 /** 229 * @see #DC1 230 * 231 * @since 8 232 */ 233 public static final byte DC2 = 18; 234 235 /** 236 * @see #DC1 237 * 238 * @since 8 239 */ 240 public static final byte DC3 = 19; // aka XOFF 241 242 /** 243 * Transmission off. @see #XON 244 * 245 * @since 8 246 */ 247 public static final byte XOFF = 19; // aka DC3 248 249 /** 250 * @see #DC1 251 * 252 * @since 8 253 */ 254 public static final byte DC4 = 20; 255 256 /** 257 * Negative Acknowledge: A communication control character 258 * transmitted by a receiver as a negative response to the sender. 259 * 260 * @since 8 261 */ 262 public static final byte NAK = 21; 263 264 /** 265 * Synchronous Idle: A communication control character used by 266 * a synchronous transmission system in the absence of any other 267 * character to provide a signal from which synchronism may be achieved 268 * or retained. 269 * 270 * @since 8 271 */ 272 public static final byte SYN = 22; 273 274 /** 275 * End of Transmission Block: A communication control character 276 * used to indicate the end of a block of data for communication 277 * purposes. ETB is used for blocking data where the block structure is 278 * not necessarily related to the processing format. 279 * 280 * @since 8 281 */ 282 public static final byte ETB = 23; 283 284 /** 285 * Cancel: A control character used to indicate that the data 286 * with which it is sent is in error or is to be disregarded. 287 * 288 * @since 8 289 */ 290 public static final byte CAN = 24; 291 292 /** 293 * End of Medium: A control character associated with the sent 294 * data which may be used to identify the physical end of the medium, or 295 * the end of the used, or wanted, portion of information recorded on a 296 * medium. (The position of this character does not necessarily 297 * correspond to the physical end of the medium.) 298 * 299 * @since 8 300 */ 301 public static final byte EM = 25; 302 303 /** 304 * Substitute: A character that may be substituted for a 305 * character which is determined to be invalid or in error. 306 * 307 * @since 8 308 */ 309 public static final byte SUB = 26; 310 311 /** 312 * Escape: A control character intended to provide code 313 * extension (supplementary characters) in general information 314 * interchange. The Escape character itself is a prefix affecting the 315 * interpretation of a limited number of contiguously following 316 * characters. 317 * 318 * @since 8 319 */ 320 public static final byte ESC = 27; 321 322 /** 323 * File/Group/Record/Unit Separator: These information separators may be 324 * used within data in optional fashion, except that their hierarchical 325 * relationship shall be: FS is the most inclusive, then GS, then RS, 326 * and US is least inclusive. (The content and length of a File, Group, 327 * Record, or Unit are not specified.) 328 * 329 * @since 8 330 */ 331 public static final byte FS = 28; 332 333 /** 334 * @see #FS 335 * 336 * @since 8 337 */ 338 public static final byte GS = 29; 339 340 /** 341 * @see #FS 342 * 343 * @since 8 344 */ 345 public static final byte RS = 30; 346 347 /** 348 * @see #FS 349 * 350 * @since 8 351 */ 352 public static final byte US = 31; 353 354 /** 355 * Space: A normally non-printing graphic character used to 356 * separate words. It is also a format effector which controls the 357 * movement of the printing position, one printing position forward. 358 * (Applicable also to display devices.) 359 * 360 * @since 8 361 */ 362 public static final byte SP = 32; 363 364 /** 365 * Alternate name for {@link #SP}. 366 * 367 * @since 8 368 */ 369 public static final byte SPACE = 32; 370 371 /** 372 * Delete: This character is used primarily to "erase" or 373 * "obliterate" erroneous or unwanted characters in perforated tape. 374 * 375 * @since 8 376 */ 377 public static final byte DEL = 127; 378 379 /** 380 * Returns a copy of the input string in which all {@linkplain #isUpperCase(char) uppercase ASCII 381 * characters} have been converted to lowercase. All other characters are copied without 382 * modification. 383 */ 384 public static String toLowerCase(String string) { 385 int length = string.length(); 386 StringBuilder builder = new StringBuilder(length); 387 for (int i = 0; i < length; i++) { 388 builder.append(toLowerCase(string.charAt(i))); 389 } 390 return builder.toString(); 391 } 392 393 /** 394 * If the argument is an {@linkplain #isUpperCase(char) uppercase ASCII character} returns the 395 * lowercase equivalent. Otherwise returns the argument. 396 */ 397 public static char toLowerCase(char c) { 398 return isUpperCase(c) ? (char) (c ^ 0x20) : c; 399 } 400 401 /** 402 * Returns a copy of the input string in which all {@linkplain #isLowerCase(char) lowercase ASCII 403 * characters} have been converted to uppercase. All other characters are copied without 404 * modification. 405 */ 406 public static String toUpperCase(String string) { 407 int length = string.length(); 408 StringBuilder builder = new StringBuilder(length); 409 for (int i = 0; i < length; i++) { 410 builder.append(toUpperCase(string.charAt(i))); 411 } 412 return builder.toString(); 413 } 414 415 /** 416 * If the argument is a {@linkplain #isLowerCase(char) lowercase ASCII character} returns the 417 * uppercase equivalent. Otherwise returns the argument. 418 */ 419 public static char toUpperCase(char c) { 420 return isLowerCase(c) ? (char) (c & 0x5f) : c; 421 } 422 423 /** 424 * Indicates whether {@code c} is one of the twenty-six lowercase ASCII alphabetic characters 425 * between {@code 'a'} and {@code 'z'} inclusive. All others (including non-ASCII characters) 426 * return {@code false}. 427 */ 428 public static boolean isLowerCase(char c) { 429 return (c >= 'a') && (c <= 'z'); 430 } 431 432 /** 433 * Indicates whether {@code c} is one of the twenty-six uppercase ASCII alphabetic characters 434 * between {@code 'A'} and {@code 'Z'} inclusive. All others (including non-ASCII characters) 435 * return {@code false}. 436 */ 437 public static boolean isUpperCase(char c) { 438 return (c >= 'A') && (c <= 'Z'); 439 } 440 }