001/* 002 * Copyright (C) 2009 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.primitives; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019 020import com.google.common.annotations.Beta; 021import com.google.common.annotations.GwtIncompatible; 022import com.google.common.annotations.VisibleForTesting; 023import com.google.errorprone.annotations.CanIgnoreReturnValue; 024import java.nio.ByteOrder; 025import java.util.Comparator; 026import sun.misc.Unsafe; 027 028/** 029 * Static utility methods pertaining to {@code byte} primitives that interpret values as 030 * <i>unsigned</i> (that is, any negative value {@code b} is treated as the positive value 031 * {@code 256 + b}). The corresponding methods that treat the values as signed are found in 032 * {@link SignedBytes}, and the methods for which signedness is not an issue are in {@link Bytes}. 033 * 034 * <p>See the Guava User Guide article on 035 * <a href="https://github.com/google/guava/wiki/PrimitivesExplained">primitive utilities</a>. 036 * 037 * @author Kevin Bourrillion 038 * @author Martin Buchholz 039 * @author Hiroshi Yamauchi 040 * @author Louis Wasserman 041 * @since 1.0 042 */ 043@GwtIncompatible 044public final class UnsignedBytes { 045 private UnsignedBytes() {} 046 047 /** 048 * The largest power of two that can be represented as an unsigned {@code 049 * byte}. 050 * 051 * @since 10.0 052 */ 053 public static final byte MAX_POWER_OF_TWO = (byte) 0x80; 054 055 /** 056 * The largest value that fits into an unsigned byte. 057 * 058 * @since 13.0 059 */ 060 public static final byte MAX_VALUE = (byte) 0xFF; 061 062 private static final int UNSIGNED_MASK = 0xFF; 063 064 /** 065 * Returns the value of the given byte as an integer, when treated as unsigned. That is, returns 066 * {@code value + 256} if {@code value} is negative; {@code value} itself otherwise. 067 * 068 * @since 6.0 069 */ 070 public static int toInt(byte value) { 071 return value & UNSIGNED_MASK; 072 } 073 074 /** 075 * Returns the {@code byte} value that, when treated as unsigned, is equal to {@code value}, if 076 * possible. 077 * 078 * @param value a value between 0 and 255 inclusive 079 * @return the {@code byte} value that, when treated as unsigned, equals {@code value} 080 * @throws IllegalArgumentException if {@code value} is negative or greater than 255 081 */ 082 @CanIgnoreReturnValue 083 public static byte checkedCast(long value) { 084 if ((value >> Byte.SIZE) != 0) { 085 // don't use checkArgument here, to avoid boxing 086 throw new IllegalArgumentException("Out of range: " + value); 087 } 088 return (byte) value; 089 } 090 091 /** 092 * Returns the {@code byte} value that, when treated as unsigned, is nearest in value to 093 * {@code value}. 094 * 095 * @param value any {@code long} value 096 * @return {@code (byte) 255} if {@code value >= 255}, {@code (byte) 0} if {@code value <= 0}, and 097 * {@code value} cast to {@code byte} otherwise 098 */ 099 public static byte saturatedCast(long value) { 100 if (value > toInt(MAX_VALUE)) { 101 return MAX_VALUE; // -1 102 } 103 if (value < 0) { 104 return (byte) 0; 105 } 106 return (byte) value; 107 } 108 109 /** 110 * Compares the two specified {@code byte} values, treating them as unsigned values between 0 and 111 * 255 inclusive. For example, {@code (byte) -127} is considered greater than {@code (byte) 127} 112 * because it is seen as having the value of positive {@code 129}. 113 * 114 * @param a the first {@code byte} to compare 115 * @param b the second {@code byte} to compare 116 * @return a negative value if {@code a} is less than {@code b}; a positive value if {@code a} is 117 * greater than {@code b}; or zero if they are equal 118 */ 119 public static int compare(byte a, byte b) { 120 return toInt(a) - toInt(b); 121 } 122 123 /** 124 * Returns the least value present in {@code array}. 125 * 126 * @param array a <i>nonempty</i> array of {@code byte} values 127 * @return the value present in {@code array} that is less than or equal to every other value in 128 * the array 129 * @throws IllegalArgumentException if {@code array} is empty 130 */ 131 public static byte min(byte... array) { 132 checkArgument(array.length > 0); 133 int min = toInt(array[0]); 134 for (int i = 1; i < array.length; i++) { 135 int next = toInt(array[i]); 136 if (next < min) { 137 min = next; 138 } 139 } 140 return (byte) min; 141 } 142 143 /** 144 * Returns the greatest value present in {@code array}. 145 * 146 * @param array a <i>nonempty</i> array of {@code byte} values 147 * @return the value present in {@code array} that is greater than or equal to every other value 148 * in the array 149 * @throws IllegalArgumentException if {@code array} is empty 150 */ 151 public static byte max(byte... array) { 152 checkArgument(array.length > 0); 153 int max = toInt(array[0]); 154 for (int i = 1; i < array.length; i++) { 155 int next = toInt(array[i]); 156 if (next > max) { 157 max = next; 158 } 159 } 160 return (byte) max; 161 } 162 163 /** 164 * Returns a string representation of x, where x is treated as unsigned. 165 * 166 * @since 13.0 167 */ 168 @Beta 169 public static String toString(byte x) { 170 return toString(x, 10); 171 } 172 173 /** 174 * Returns a string representation of {@code x} for the given radix, where {@code x} is treated as 175 * unsigned. 176 * 177 * @param x the value to convert to a string. 178 * @param radix the radix to use while working with {@code x} 179 * @throws IllegalArgumentException if {@code radix} is not between {@link Character#MIN_RADIX} 180 * and {@link Character#MAX_RADIX}. 181 * @since 13.0 182 */ 183 @Beta 184 public static String toString(byte x, int radix) { 185 checkArgument( 186 radix >= Character.MIN_RADIX && radix <= Character.MAX_RADIX, 187 "radix (%s) must be between Character.MIN_RADIX and Character.MAX_RADIX", 188 radix); 189 // Benchmarks indicate this is probably not worth optimizing. 190 return Integer.toString(toInt(x), radix); 191 } 192 193 /** 194 * Returns the unsigned {@code byte} value represented by the given decimal string. 195 * 196 * @throws NumberFormatException if the string does not contain a valid unsigned {@code byte} 197 * value 198 * @throws NullPointerException if {@code string} is null (in contrast to 199 * {@link Byte#parseByte(String)}) 200 * @since 13.0 201 */ 202 @Beta 203 @CanIgnoreReturnValue 204 public static byte parseUnsignedByte(String string) { 205 return parseUnsignedByte(string, 10); 206 } 207 208 /** 209 * Returns the unsigned {@code byte} value represented by a string with the given radix. 210 * 211 * @param string the string containing the unsigned {@code byte} representation to be parsed. 212 * @param radix the radix to use while parsing {@code string} 213 * @throws NumberFormatException if the string does not contain a valid unsigned {@code byte} with 214 * the given radix, or if {@code radix} is not between {@link Character#MIN_RADIX} and 215 * {@link Character#MAX_RADIX}. 216 * @throws NullPointerException if {@code string} is null (in contrast to 217 * {@link Byte#parseByte(String)}) 218 * @since 13.0 219 */ 220 @Beta 221 @CanIgnoreReturnValue 222 public static byte parseUnsignedByte(String string, int radix) { 223 int parse = Integer.parseInt(checkNotNull(string), radix); 224 // We need to throw a NumberFormatException, so we have to duplicate checkedCast. =( 225 if (parse >> Byte.SIZE == 0) { 226 return (byte) parse; 227 } else { 228 throw new NumberFormatException("out of range: " + parse); 229 } 230 } 231 232 /** 233 * Returns a string containing the supplied {@code byte} values separated by {@code separator}. 234 * For example, {@code join(":", (byte) 1, (byte) 2, 235 * (byte) 255)} returns the string {@code "1:2:255"}. 236 * 237 * @param separator the text that should appear between consecutive values in the resulting string 238 * (but not at the start or end) 239 * @param array an array of {@code byte} values, possibly empty 240 */ 241 public static String join(String separator, byte... array) { 242 checkNotNull(separator); 243 if (array.length == 0) { 244 return ""; 245 } 246 247 // For pre-sizing a builder, just get the right order of magnitude 248 StringBuilder builder = new StringBuilder(array.length * (3 + separator.length())); 249 builder.append(toInt(array[0])); 250 for (int i = 1; i < array.length; i++) { 251 builder.append(separator).append(toString(array[i])); 252 } 253 return builder.toString(); 254 } 255 256 /** 257 * Returns a comparator that compares two {@code byte} arrays <a 258 * href="http://en.wikipedia.org/wiki/Lexicographical_order">lexicographically</a>. That is, it 259 * compares, using {@link #compare(byte, byte)}), the first pair of values that follow any common 260 * prefix, or when one array is a prefix of the other, treats the shorter array as the lesser. For 261 * example, {@code [] < [0x01] < [0x01, 0x7F] < [0x01, 0x80] < [0x02]}. Values are treated as 262 * unsigned. 263 * 264 * <p>The returned comparator is inconsistent with {@link Object#equals(Object)} (since arrays 265 * support only identity equality), but it is consistent with 266 * {@link java.util.Arrays#equals(byte[], byte[])}. 267 * 268 * @since 2.0 269 */ 270 public static Comparator<byte[]> lexicographicalComparator() { 271 return LexicographicalComparatorHolder.BEST_COMPARATOR; 272 } 273 274 @VisibleForTesting 275 static Comparator<byte[]> lexicographicalComparatorJavaImpl() { 276 return LexicographicalComparatorHolder.PureJavaComparator.INSTANCE; 277 } 278 279 /** 280 * Provides a lexicographical comparator implementation; either a Java implementation or a faster 281 * implementation based on {@link Unsafe}. 282 * 283 * <p>Uses reflection to gracefully fall back to the Java implementation if {@code Unsafe} isn't 284 * available. 285 */ 286 @VisibleForTesting 287 static class LexicographicalComparatorHolder { 288 static final String UNSAFE_COMPARATOR_NAME = 289 LexicographicalComparatorHolder.class.getName() + "$UnsafeComparator"; 290 291 static final Comparator<byte[]> BEST_COMPARATOR = getBestComparator(); 292 293 @VisibleForTesting 294 enum UnsafeComparator implements Comparator<byte[]> { 295 INSTANCE; 296 297 static final boolean BIG_ENDIAN = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN); 298 299 /* 300 * The following static final fields exist for performance reasons. 301 * 302 * In UnsignedBytesBenchmark, accessing the following objects via static final fields is the 303 * fastest (more than twice as fast as the Java implementation, vs ~1.5x with non-final static 304 * fields, on x86_32) under the Hotspot server compiler. The reason is obviously that the 305 * non-final fields need to be reloaded inside the loop. 306 * 307 * And, no, defining (final or not) local variables out of the loop still isn't as good 308 * because the null check on the theUnsafe object remains inside the loop and 309 * BYTE_ARRAY_BASE_OFFSET doesn't get constant-folded. 310 * 311 * The compiler can treat static final fields as compile-time constants and can constant-fold 312 * them while (final or not) local variables are run time values. 313 */ 314 315 static final Unsafe theUnsafe; 316 317 /** The offset to the first element in a byte array. */ 318 static final int BYTE_ARRAY_BASE_OFFSET; 319 320 static { 321 theUnsafe = getUnsafe(); 322 323 BYTE_ARRAY_BASE_OFFSET = theUnsafe.arrayBaseOffset(byte[].class); 324 325 // sanity check - this should never fail 326 if (theUnsafe.arrayIndexScale(byte[].class) != 1) { 327 throw new AssertionError(); 328 } 329 } 330 331 /** 332 * Returns a sun.misc.Unsafe. Suitable for use in a 3rd party package. Replace with a simple 333 * call to Unsafe.getUnsafe when integrating into a jdk. 334 * 335 * @return a sun.misc.Unsafe 336 */ 337 private static sun.misc.Unsafe getUnsafe() { 338 try { 339 return sun.misc.Unsafe.getUnsafe(); 340 } catch (SecurityException e) { 341 // that's okay; try reflection instead 342 } 343 try { 344 return java.security.AccessController.doPrivileged( 345 new java.security.PrivilegedExceptionAction<sun.misc.Unsafe>() { 346 @Override 347 public sun.misc.Unsafe run() throws Exception { 348 Class<sun.misc.Unsafe> k = sun.misc.Unsafe.class; 349 for (java.lang.reflect.Field f : k.getDeclaredFields()) { 350 f.setAccessible(true); 351 Object x = f.get(null); 352 if (k.isInstance(x)) { 353 return k.cast(x); 354 } 355 } 356 throw new NoSuchFieldError("the Unsafe"); 357 } 358 }); 359 } catch (java.security.PrivilegedActionException e) { 360 throw new RuntimeException("Could not initialize intrinsics", e.getCause()); 361 } 362 } 363 364 @Override 365 public int compare(byte[] left, byte[] right) { 366 int minLength = Math.min(left.length, right.length); 367 int minWords = minLength / Longs.BYTES; 368 369 /* 370 * Compare 8 bytes at a time. Benchmarking shows comparing 8 bytes at a time is no slower 371 * than comparing 4 bytes at a time even on 32-bit. On the other hand, it is substantially 372 * faster on 64-bit. 373 */ 374 for (int i = 0; i < minWords * Longs.BYTES; i += Longs.BYTES) { 375 long lw = theUnsafe.getLong(left, BYTE_ARRAY_BASE_OFFSET + (long) i); 376 long rw = theUnsafe.getLong(right, BYTE_ARRAY_BASE_OFFSET + (long) i); 377 if (lw != rw) { 378 if (BIG_ENDIAN) { 379 return UnsignedLongs.compare(lw, rw); 380 } 381 382 /* 383 * We want to compare only the first index where left[index] != right[index]. This 384 * corresponds to the least significant nonzero byte in lw ^ rw, since lw and rw are 385 * little-endian. Long.numberOfTrailingZeros(diff) tells us the least significant 386 * nonzero bit, and zeroing out the first three bits of L.nTZ gives us the shift to get 387 * that least significant nonzero byte. 388 */ 389 int n = Long.numberOfTrailingZeros(lw ^ rw) & ~0x7; 390 return ((int) ((lw >>> n) & UNSIGNED_MASK)) - ((int) ((rw >>> n) & UNSIGNED_MASK)); 391 } 392 } 393 394 // The epilogue to cover the last (minLength % 8) elements. 395 for (int i = minWords * Longs.BYTES; i < minLength; i++) { 396 int result = UnsignedBytes.compare(left[i], right[i]); 397 if (result != 0) { 398 return result; 399 } 400 } 401 return left.length - right.length; 402 } 403 404 @Override 405 public String toString() { 406 return "UnsignedBytes.lexicographicalComparator() (sun.misc.Unsafe version)"; 407 } 408 } 409 410 enum PureJavaComparator implements Comparator<byte[]> { 411 INSTANCE; 412 413 @Override 414 public int compare(byte[] left, byte[] right) { 415 int minLength = Math.min(left.length, right.length); 416 for (int i = 0; i < minLength; i++) { 417 int result = UnsignedBytes.compare(left[i], right[i]); 418 if (result != 0) { 419 return result; 420 } 421 } 422 return left.length - right.length; 423 } 424 425 @Override 426 public String toString() { 427 return "UnsignedBytes.lexicographicalComparator() (pure Java version)"; 428 } 429 } 430 431 /** 432 * Returns the Unsafe-using Comparator, or falls back to the pure-Java implementation if unable 433 * to do so. 434 */ 435 static Comparator<byte[]> getBestComparator() { 436 try { 437 Class<?> theClass = Class.forName(UNSAFE_COMPARATOR_NAME); 438 439 // yes, UnsafeComparator does implement Comparator<byte[]> 440 @SuppressWarnings("unchecked") 441 Comparator<byte[]> comparator = (Comparator<byte[]>) theClass.getEnumConstants()[0]; 442 return comparator; 443 } catch (Throwable t) { // ensure we really catch *everything* 444 return lexicographicalComparatorJavaImpl(); 445 } 446 } 447 } 448}