001/* 002 * Copyright (C) 2009 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.primitives; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.base.Preconditions.checkPositionIndexes; 020 021import com.google.common.annotations.Beta; 022import com.google.common.annotations.GwtIncompatible; 023import com.google.common.annotations.VisibleForTesting; 024import com.google.errorprone.annotations.CanIgnoreReturnValue; 025import java.nio.ByteOrder; 026import java.util.Arrays; 027import java.util.Comparator; 028import sun.misc.Unsafe; 029 030/** 031 * Static utility methods pertaining to {@code byte} primitives that interpret values as 032 * <i>unsigned</i> (that is, any negative value {@code b} is treated as the positive value {@code 033 * 256 + b}). The corresponding methods that treat the values as signed are found in {@link 034 * SignedBytes}, and the methods for which signedness is not an issue are in {@link Bytes}. 035 * 036 * <p>See the Guava User Guide article on <a 037 * href="https://github.com/google/guava/wiki/PrimitivesExplained">primitive utilities</a>. 038 * 039 * @author Kevin Bourrillion 040 * @author Martin Buchholz 041 * @author Hiroshi Yamauchi 042 * @author Louis Wasserman 043 * @since 1.0 044 */ 045@GwtIncompatible 046public final class UnsignedBytes { 047 private UnsignedBytes() {} 048 049 /** 050 * The largest power of two that can be represented as an unsigned {@code byte}. 051 * 052 * @since 10.0 053 */ 054 public static final byte MAX_POWER_OF_TWO = (byte) 0x80; 055 056 /** 057 * The largest value that fits into an unsigned byte. 058 * 059 * @since 13.0 060 */ 061 public static final byte MAX_VALUE = (byte) 0xFF; 062 063 private static final int UNSIGNED_MASK = 0xFF; 064 065 /** 066 * Returns the value of the given byte as an integer, when treated as unsigned. That is, returns 067 * {@code value + 256} if {@code value} is negative; {@code value} itself otherwise. 068 * 069 * <p><b>Java 8 users:</b> use {@link Byte#toUnsignedInt(byte)} instead. 070 * 071 * @since 6.0 072 */ 073 public static int toInt(byte value) { 074 return value & UNSIGNED_MASK; 075 } 076 077 /** 078 * Returns the {@code byte} value that, when treated as unsigned, is equal to {@code value}, if 079 * possible. 080 * 081 * @param value a value between 0 and 255 inclusive 082 * @return the {@code byte} value that, when treated as unsigned, equals {@code value} 083 * @throws IllegalArgumentException if {@code value} is negative or greater than 255 084 */ 085 @CanIgnoreReturnValue 086 public static byte checkedCast(long value) { 087 checkArgument(value >> Byte.SIZE == 0, "out of range: %s", value); 088 return (byte) value; 089 } 090 091 /** 092 * Returns the {@code byte} value that, when treated as unsigned, is nearest in value to {@code 093 * value}. 094 * 095 * @param value any {@code long} value 096 * @return {@code (byte) 255} if {@code value >= 255}, {@code (byte) 0} if {@code value <= 0}, and 097 * {@code value} cast to {@code byte} otherwise 098 */ 099 public static byte saturatedCast(long value) { 100 if (value > toInt(MAX_VALUE)) { 101 return MAX_VALUE; // -1 102 } 103 if (value < 0) { 104 return (byte) 0; 105 } 106 return (byte) value; 107 } 108 109 /** 110 * Compares the two specified {@code byte} values, treating them as unsigned values between 0 and 111 * 255 inclusive. For example, {@code (byte) -127} is considered greater than {@code (byte) 127} 112 * because it is seen as having the value of positive {@code 129}. 113 * 114 * @param a the first {@code byte} to compare 115 * @param b the second {@code byte} to compare 116 * @return a negative value if {@code a} is less than {@code b}; a positive value if {@code a} is 117 * greater than {@code b}; or zero if they are equal 118 */ 119 public static int compare(byte a, byte b) { 120 return toInt(a) - toInt(b); 121 } 122 123 /** 124 * Returns the least value present in {@code array}, treating values as unsigned. 125 * 126 * @param array a <i>nonempty</i> array of {@code byte} values 127 * @return the value present in {@code array} that is less than or equal to every other value in 128 * the array according to {@link #compare} 129 * @throws IllegalArgumentException if {@code array} is empty 130 */ 131 public static byte min(byte... array) { 132 checkArgument(array.length > 0); 133 int min = toInt(array[0]); 134 for (int i = 1; i < array.length; i++) { 135 int next = toInt(array[i]); 136 if (next < min) { 137 min = next; 138 } 139 } 140 return (byte) min; 141 } 142 143 /** 144 * Returns the greatest value present in {@code array}, treating values as unsigned. 145 * 146 * @param array a <i>nonempty</i> array of {@code byte} values 147 * @return the value present in {@code array} that is greater than or equal to every other value 148 * in the array according to {@link #compare} 149 * @throws IllegalArgumentException if {@code array} is empty 150 */ 151 public static byte max(byte... array) { 152 checkArgument(array.length > 0); 153 int max = toInt(array[0]); 154 for (int i = 1; i < array.length; i++) { 155 int next = toInt(array[i]); 156 if (next > max) { 157 max = next; 158 } 159 } 160 return (byte) max; 161 } 162 163 /** 164 * Returns a string representation of x, where x is treated as unsigned. 165 * 166 * @since 13.0 167 */ 168 @Beta 169 public static String toString(byte x) { 170 return toString(x, 10); 171 } 172 173 /** 174 * Returns a string representation of {@code x} for the given radix, where {@code x} is treated as 175 * unsigned. 176 * 177 * @param x the value to convert to a string. 178 * @param radix the radix to use while working with {@code x} 179 * @throws IllegalArgumentException if {@code radix} is not between {@link Character#MIN_RADIX} 180 * and {@link Character#MAX_RADIX}. 181 * @since 13.0 182 */ 183 @Beta 184 public static String toString(byte x, int radix) { 185 checkArgument( 186 radix >= Character.MIN_RADIX && radix <= Character.MAX_RADIX, 187 "radix (%s) must be between Character.MIN_RADIX and Character.MAX_RADIX", 188 radix); 189 // Benchmarks indicate this is probably not worth optimizing. 190 return Integer.toString(toInt(x), radix); 191 } 192 193 /** 194 * Returns the unsigned {@code byte} value represented by the given decimal string. 195 * 196 * @throws NumberFormatException if the string does not contain a valid unsigned {@code byte} 197 * value 198 * @throws NullPointerException if {@code string} is null (in contrast to {@link 199 * Byte#parseByte(String)}) 200 * @since 13.0 201 */ 202 @Beta 203 @CanIgnoreReturnValue 204 public static byte parseUnsignedByte(String string) { 205 return parseUnsignedByte(string, 10); 206 } 207 208 /** 209 * Returns the unsigned {@code byte} value represented by a string with the given radix. 210 * 211 * @param string the string containing the unsigned {@code byte} representation to be parsed. 212 * @param radix the radix to use while parsing {@code string} 213 * @throws NumberFormatException if the string does not contain a valid unsigned {@code byte} with 214 * the given radix, or if {@code radix} is not between {@link Character#MIN_RADIX} and {@link 215 * Character#MAX_RADIX}. 216 * @throws NullPointerException if {@code string} is null (in contrast to {@link 217 * Byte#parseByte(String)}) 218 * @since 13.0 219 */ 220 @Beta 221 @CanIgnoreReturnValue 222 public static byte parseUnsignedByte(String string, int radix) { 223 int parse = Integer.parseInt(checkNotNull(string), radix); 224 // We need to throw a NumberFormatException, so we have to duplicate checkedCast. =( 225 if (parse >> Byte.SIZE == 0) { 226 return (byte) parse; 227 } else { 228 throw new NumberFormatException("out of range: " + parse); 229 } 230 } 231 232 /** 233 * Returns a string containing the supplied {@code byte} values separated by {@code separator}. 234 * For example, {@code join(":", (byte) 1, (byte) 2, (byte) 255)} returns the string {@code 235 * "1:2:255"}. 236 * 237 * @param separator the text that should appear between consecutive values in the resulting string 238 * (but not at the start or end) 239 * @param array an array of {@code byte} values, possibly empty 240 */ 241 public static String join(String separator, byte... array) { 242 checkNotNull(separator); 243 if (array.length == 0) { 244 return ""; 245 } 246 247 // For pre-sizing a builder, just get the right order of magnitude 248 StringBuilder builder = new StringBuilder(array.length * (3 + separator.length())); 249 builder.append(toInt(array[0])); 250 for (int i = 1; i < array.length; i++) { 251 builder.append(separator).append(toString(array[i])); 252 } 253 return builder.toString(); 254 } 255 256 /** 257 * Returns a comparator that compares two {@code byte} arrays <a 258 * href="http://en.wikipedia.org/wiki/Lexicographical_order">lexicographically</a>. That is, it 259 * compares, using {@link #compare(byte, byte)}), the first pair of values that follow any common 260 * prefix, or when one array is a prefix of the other, treats the shorter array as the lesser. For 261 * example, {@code [] < [0x01] < [0x01, 0x7F] < [0x01, 0x80] < [0x02]}. Values are treated as 262 * unsigned. 263 * 264 * <p>The returned comparator is inconsistent with {@link Object#equals(Object)} (since arrays 265 * support only identity equality), but it is consistent with {@link 266 * java.util.Arrays#equals(byte[], byte[])}. 267 * 268 * @since 2.0 269 */ 270 public static Comparator<byte[]> lexicographicalComparator() { 271 return LexicographicalComparatorHolder.BEST_COMPARATOR; 272 } 273 274 @VisibleForTesting 275 static Comparator<byte[]> lexicographicalComparatorJavaImpl() { 276 return LexicographicalComparatorHolder.PureJavaComparator.INSTANCE; 277 } 278 279 /** 280 * Provides a lexicographical comparator implementation; either a Java implementation or a faster 281 * implementation based on {@link Unsafe}. 282 * 283 * <p>Uses reflection to gracefully fall back to the Java implementation if {@code Unsafe} isn't 284 * available. 285 */ 286 @VisibleForTesting 287 static class LexicographicalComparatorHolder { 288 static final String UNSAFE_COMPARATOR_NAME = 289 LexicographicalComparatorHolder.class.getName() + "$UnsafeComparator"; 290 291 static final Comparator<byte[]> BEST_COMPARATOR = getBestComparator(); 292 293 @VisibleForTesting 294 enum UnsafeComparator implements Comparator<byte[]> { 295 INSTANCE; 296 297 static final boolean BIG_ENDIAN = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN); 298 299 /* 300 * The following static final fields exist for performance reasons. 301 * 302 * In UnsignedBytesBenchmark, accessing the following objects via static final fields is the 303 * fastest (more than twice as fast as the Java implementation, vs ~1.5x with non-final static 304 * fields, on x86_32) under the Hotspot server compiler. The reason is obviously that the 305 * non-final fields need to be reloaded inside the loop. 306 * 307 * And, no, defining (final or not) local variables out of the loop still isn't as good 308 * because the null check on the theUnsafe object remains inside the loop and 309 * BYTE_ARRAY_BASE_OFFSET doesn't get constant-folded. 310 * 311 * The compiler can treat static final fields as compile-time constants and can constant-fold 312 * them while (final or not) local variables are run time values. 313 */ 314 315 static final Unsafe theUnsafe = getUnsafe(); 316 317 /** The offset to the first element in a byte array. */ 318 static final int BYTE_ARRAY_BASE_OFFSET = theUnsafe.arrayBaseOffset(byte[].class); 319 320 static { 321 // fall back to the safer pure java implementation unless we're in 322 // a 64-bit JVM with an 8-byte aligned field offset. 323 if (!("64".equals(System.getProperty("sun.arch.data.model")) 324 && (BYTE_ARRAY_BASE_OFFSET % 8) == 0 325 // sanity check - this should never fail 326 && theUnsafe.arrayIndexScale(byte[].class) == 1)) { 327 throw new Error(); // force fallback to PureJavaComparator 328 } 329 } 330 331 /** 332 * Returns a sun.misc.Unsafe. Suitable for use in a 3rd party package. Replace with a simple 333 * call to Unsafe.getUnsafe when integrating into a jdk. 334 * 335 * @return a sun.misc.Unsafe 336 */ 337 private static sun.misc.Unsafe getUnsafe() { 338 try { 339 return sun.misc.Unsafe.getUnsafe(); 340 } catch (SecurityException e) { 341 // that's okay; try reflection instead 342 } 343 try { 344 return java.security.AccessController.doPrivileged( 345 new java.security.PrivilegedExceptionAction<sun.misc.Unsafe>() { 346 @Override 347 public sun.misc.Unsafe run() throws Exception { 348 Class<sun.misc.Unsafe> k = sun.misc.Unsafe.class; 349 for (java.lang.reflect.Field f : k.getDeclaredFields()) { 350 f.setAccessible(true); 351 Object x = f.get(null); 352 if (k.isInstance(x)) { 353 return k.cast(x); 354 } 355 } 356 throw new NoSuchFieldError("the Unsafe"); 357 } 358 }); 359 } catch (java.security.PrivilegedActionException e) { 360 throw new RuntimeException("Could not initialize intrinsics", e.getCause()); 361 } 362 } 363 364 @Override 365 public int compare(byte[] left, byte[] right) { 366 final int stride = 8; 367 int minLength = Math.min(left.length, right.length); 368 int strideLimit = minLength & ~(stride - 1); 369 int i; 370 371 /* 372 * Compare 8 bytes at a time. Benchmarking on x86 shows a stride of 8 bytes is no slower 373 * than 4 bytes even on 32-bit. On the other hand, it is substantially faster on 64-bit. 374 */ 375 for (i = 0; i < strideLimit; i += stride) { 376 long lw = theUnsafe.getLong(left, BYTE_ARRAY_BASE_OFFSET + (long) i); 377 long rw = theUnsafe.getLong(right, BYTE_ARRAY_BASE_OFFSET + (long) i); 378 if (lw != rw) { 379 if (BIG_ENDIAN) { 380 return UnsignedLongs.compare(lw, rw); 381 } 382 383 /* 384 * We want to compare only the first index where left[index] != right[index]. This 385 * corresponds to the least significant nonzero byte in lw ^ rw, since lw and rw are 386 * little-endian. Long.numberOfTrailingZeros(diff) tells us the least significant 387 * nonzero bit, and zeroing out the first three bits of L.nTZ gives us the shift to get 388 * that least significant nonzero byte. 389 */ 390 int n = Long.numberOfTrailingZeros(lw ^ rw) & ~0x7; 391 return ((int) ((lw >>> n) & UNSIGNED_MASK)) - ((int) ((rw >>> n) & UNSIGNED_MASK)); 392 } 393 } 394 395 // The epilogue to cover the last (minLength % stride) elements. 396 for (; i < minLength; i++) { 397 int result = UnsignedBytes.compare(left[i], right[i]); 398 if (result != 0) { 399 return result; 400 } 401 } 402 return left.length - right.length; 403 } 404 405 @Override 406 public String toString() { 407 return "UnsignedBytes.lexicographicalComparator() (sun.misc.Unsafe version)"; 408 } 409 } 410 411 enum PureJavaComparator implements Comparator<byte[]> { 412 INSTANCE; 413 414 @Override 415 public int compare(byte[] left, byte[] right) { 416 int minLength = Math.min(left.length, right.length); 417 for (int i = 0; i < minLength; i++) { 418 int result = UnsignedBytes.compare(left[i], right[i]); 419 if (result != 0) { 420 return result; 421 } 422 } 423 return left.length - right.length; 424 } 425 426 @Override 427 public String toString() { 428 return "UnsignedBytes.lexicographicalComparator() (pure Java version)"; 429 } 430 } 431 432 /** 433 * Returns the Unsafe-using Comparator, or falls back to the pure-Java implementation if unable 434 * to do so. 435 */ 436 static Comparator<byte[]> getBestComparator() { 437 try { 438 Class<?> theClass = Class.forName(UNSAFE_COMPARATOR_NAME); 439 440 // yes, UnsafeComparator does implement Comparator<byte[]> 441 @SuppressWarnings("unchecked") 442 Comparator<byte[]> comparator = (Comparator<byte[]>) theClass.getEnumConstants()[0]; 443 return comparator; 444 } catch (Throwable t) { // ensure we really catch *everything* 445 return lexicographicalComparatorJavaImpl(); 446 } 447 } 448 } 449 450 private static byte flip(byte b) { 451 return (byte) (b ^ 0x80); 452 } 453 454 /** 455 * Sorts the array, treating its elements as unsigned bytes. 456 * 457 * @since 23.1 458 */ 459 public static void sort(byte[] array) { 460 checkNotNull(array); 461 sort(array, 0, array.length); 462 } 463 464 /** 465 * Sorts the array between {@code fromIndex} inclusive and {@code toIndex} exclusive, treating its 466 * elements as unsigned bytes. 467 * 468 * @since 23.1 469 */ 470 public static void sort(byte[] array, int fromIndex, int toIndex) { 471 checkNotNull(array); 472 checkPositionIndexes(fromIndex, toIndex, array.length); 473 for (int i = fromIndex; i < toIndex; i++) { 474 array[i] = flip(array[i]); 475 } 476 Arrays.sort(array, fromIndex, toIndex); 477 for (int i = fromIndex; i < toIndex; i++) { 478 array[i] = flip(array[i]); 479 } 480 } 481 482 /** 483 * Sorts the elements of {@code array} in descending order, interpreting them as unsigned 8-bit 484 * integers. 485 * 486 * @since 23.1 487 */ 488 public static void sortDescending(byte[] array) { 489 checkNotNull(array); 490 sortDescending(array, 0, array.length); 491 } 492 493 /** 494 * Sorts the elements of {@code array} between {@code fromIndex} inclusive and {@code toIndex} 495 * exclusive in descending order, interpreting them as unsigned 8-bit integers. 496 * 497 * @since 23.1 498 */ 499 public static void sortDescending(byte[] array, int fromIndex, int toIndex) { 500 checkNotNull(array); 501 checkPositionIndexes(fromIndex, toIndex, array.length); 502 for (int i = fromIndex; i < toIndex; i++) { 503 array[i] ^= Byte.MAX_VALUE; 504 } 505 Arrays.sort(array, fromIndex, toIndex); 506 for (int i = fromIndex; i < toIndex; i++) { 507 array[i] ^= Byte.MAX_VALUE; 508 } 509 } 510}