XXHash64 and 32 implementations
This commit is contained in:
parent
8216c2298a
commit
3ec189804c
@ -4,7 +4,7 @@ kotlin.code.style=official
|
||||
specifyKotlinAsDependency=false
|
||||
|
||||
projectGroup=ru.dbotthepony.kommons
|
||||
projectVersion=2.6.0
|
||||
projectVersion=2.7.0
|
||||
|
||||
guavaDepVersion=33.0.0
|
||||
gsonDepVersion=2.8.9
|
||||
|
168
src/main/java/ru/dbotthepony/kommons/util/XXHash32.java
Normal file
168
src/main/java/ru/dbotthepony/kommons/util/XXHash32.java
Normal file
@ -0,0 +1,168 @@
|
||||
package ru.dbotthepony.kommons.util;
|
||||
|
||||
import java.security.MessageDigest;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Pure Java implementation of XXHash32 hashing function.
|
||||
* <p>
|
||||
* {@link XXHash32#digest()} returns Int encoded as big-endian number.
|
||||
*/
|
||||
public final class XXHash32 extends MessageDigest {
|
||||
private final int seed;
|
||||
|
||||
private final byte[] mem = new byte[16];
|
||||
private int memIndex;
|
||||
|
||||
private long length;
|
||||
|
||||
private int v0;
|
||||
private int v1;
|
||||
private int v2;
|
||||
private int v3;
|
||||
|
||||
public static final int XXH_PRIME32_1 = 0x9E3779B1; /*!< 0b10011110001101110111100110110001 */
|
||||
public static final int XXH_PRIME32_2 = 0x85EBCA77; /*!< 0b10000101111010111100101001110111 */
|
||||
public static final int XXH_PRIME32_3 = 0xC2B2AE3D; /*!< 0b11000010101100101010111000111101 */
|
||||
public static final int XXH_PRIME32_4 = 0x27D4EB2F; /*!< 0b00100111110101001110101100101111 */
|
||||
public static final int XXH_PRIME32_5 = 0x165667B1; /*!< 0b00010110010101100110011110110001 */
|
||||
|
||||
public XXHash32(int seed) {
|
||||
super("XXHash32");
|
||||
this.seed = seed;
|
||||
engineReset();
|
||||
}
|
||||
|
||||
public XXHash32() {
|
||||
this(0);
|
||||
}
|
||||
|
||||
private int round(int acc, int input) {
|
||||
acc += input * XXH_PRIME32_2;
|
||||
acc = Integer.rotateLeft(acc, 13);
|
||||
acc *= XXH_PRIME32_1;
|
||||
return acc;
|
||||
}
|
||||
|
||||
private int load32(int index) {
|
||||
return load32(mem, index);
|
||||
}
|
||||
|
||||
private int load32(byte[] source, int index) {
|
||||
return (((int) source[index]) & 0xFF) |
|
||||
(((int) source[index + 1]) & 0xFF) << 8 |
|
||||
(((int) source[index + 2]) & 0xFF) << 16 |
|
||||
(((int) source[index + 3]) & 0xFF) << 24;
|
||||
}
|
||||
|
||||
private void doRound() {
|
||||
memIndex = 0;
|
||||
v0 = round(v0, load32(0));
|
||||
v1 = round(v1, load32(4));
|
||||
v2 = round(v2, load32(8));
|
||||
v3 = round(v3, load32(12));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void engineUpdate(byte input) {
|
||||
length++;
|
||||
mem[memIndex++] = input;
|
||||
|
||||
if (memIndex >= 16)
|
||||
doRound();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void engineUpdate(byte[] input, int offset, int len) {
|
||||
Objects.checkFromIndexSize(offset, input.length, len);
|
||||
if (len == 0) return;
|
||||
|
||||
length += len;
|
||||
|
||||
if (memIndex != 0) {
|
||||
int size = Math.min(16 - memIndex, len);
|
||||
System.arraycopy(input, offset, mem, memIndex, size);
|
||||
memIndex += size;
|
||||
offset += size;
|
||||
len -= size;
|
||||
|
||||
if (memIndex >= 16)
|
||||
doRound();
|
||||
}
|
||||
|
||||
while (len >= 16) {
|
||||
v0 = round(v0, load32(input, offset));
|
||||
v1 = round(v1, load32(input, offset + 4));
|
||||
v2 = round(v2, load32(input, offset + 8));
|
||||
v3 = round(v3, load32(input, offset + 12));
|
||||
offset += 16;
|
||||
len -= 16;
|
||||
}
|
||||
|
||||
if (len != 0) {
|
||||
System.arraycopy(input, offset, mem, memIndex, len);
|
||||
memIndex += len;
|
||||
|
||||
if (memIndex >= 16)
|
||||
doRound();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected byte[] engineDigest() {
|
||||
int h32;
|
||||
|
||||
if (length >= 16) {
|
||||
h32 = Integer.rotateLeft(v0, 1) +
|
||||
Integer.rotateLeft(v1, 7) +
|
||||
Integer.rotateLeft(v2, 12) +
|
||||
Integer.rotateLeft(v3, 18);
|
||||
} else {
|
||||
h32 = seed + XXH_PRIME32_5;
|
||||
}
|
||||
|
||||
h32 += (int) length;
|
||||
|
||||
int len = memIndex;
|
||||
int index = 0;
|
||||
|
||||
while (len >= 4) {
|
||||
h32 += load32(index) * XXH_PRIME32_3;
|
||||
h32 = Integer.rotateLeft(h32, 17) * XXH_PRIME32_4;
|
||||
index += 4;
|
||||
len -= 4;
|
||||
}
|
||||
|
||||
while (len-- > 0) {
|
||||
h32 += (mem[index++] & 0xFF) * XXH_PRIME32_5;
|
||||
h32 = Integer.rotateLeft(h32, 11) * XXH_PRIME32_1;
|
||||
}
|
||||
|
||||
h32 ^= h32 >>> 15;
|
||||
h32 *= XXH_PRIME32_2;
|
||||
h32 ^= h32 >>> 13;
|
||||
h32 *= XXH_PRIME32_3;
|
||||
h32 ^= h32 >>> 16;
|
||||
|
||||
engineReset();
|
||||
|
||||
byte[] result = new byte[4];
|
||||
|
||||
result[0] = (byte) (h32 >>> 24);
|
||||
result[1] = (byte) (h32 >>> 16);
|
||||
result[2] = (byte) (h32 >>> 8);
|
||||
result[3] = (byte) (h32);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void engineReset() {
|
||||
this.length = 0L;
|
||||
this.memIndex = 0;
|
||||
this.v0 = this.seed + XXH_PRIME32_1 + XXH_PRIME32_2;
|
||||
this.v1 = this.seed + XXH_PRIME32_2;
|
||||
this.v2 = this.seed;
|
||||
this.v3 = this.seed - XXH_PRIME32_1;
|
||||
}
|
||||
}
|
194
src/main/java/ru/dbotthepony/kommons/util/XXHash64.java
Normal file
194
src/main/java/ru/dbotthepony/kommons/util/XXHash64.java
Normal file
@ -0,0 +1,194 @@
|
||||
package ru.dbotthepony.kommons.util;
|
||||
|
||||
import java.security.MessageDigest;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Pure Java implementation of XXHash64 hashing function.
|
||||
* <p>
|
||||
* {@link XXHash64#digest()} returns Long encoded as big-endian number.
|
||||
*/
|
||||
public final class XXHash64 extends MessageDigest {
|
||||
private final long seed;
|
||||
private final byte[] mem = new byte[32];
|
||||
private int memIndex;
|
||||
|
||||
private long length;
|
||||
|
||||
private long v0;
|
||||
private long v1;
|
||||
private long v2;
|
||||
private long v3;
|
||||
|
||||
public static final long XXH_PRIME64_1 = 0x9E3779B185EBCA87L; /*!< 0b1001111000110111011110011011000110000101111010111100101010000111 */
|
||||
public static final long XXH_PRIME64_2 = 0xC2B2AE3D27D4EB4FL; /*!< 0b1100001010110010101011100011110100100111110101001110101101001111 */
|
||||
public static final long XXH_PRIME64_3 = 0x165667B19E3779F9L; /*!< 0b0001011001010110011001111011000110011110001101110111100111111001 */
|
||||
public static final long XXH_PRIME64_4 = 0x85EBCA77C2B2AE63L; /*!< 0b1000010111101011110010100111011111000010101100101010111001100011 */
|
||||
public static final long XXH_PRIME64_5 = 0x27D4EB2F165667C5L; /*!< 0b0010011111010100111010110010111100010110010101100110011111000101 */
|
||||
|
||||
public XXHash64(long seed) {
|
||||
super("XXHash64");
|
||||
this.seed = seed;
|
||||
engineReset();
|
||||
}
|
||||
|
||||
public XXHash64() {
|
||||
this(0);
|
||||
}
|
||||
|
||||
private long load64(int index) {
|
||||
return load64(mem, index);
|
||||
}
|
||||
|
||||
private long load64(byte[] source, int index) {
|
||||
return (((long) source[index]) & 0xFFL) |
|
||||
(((long) source[index + 1]) & 0xFFL) << 8 |
|
||||
(((long) source[index + 2]) & 0xFFL) << 16 |
|
||||
(((long) source[index + 3]) & 0xFFL) << 24 |
|
||||
(((long) source[index + 4]) & 0xFFL) << 32 |
|
||||
(((long) source[index + 5]) & 0xFFL) << 40 |
|
||||
(((long) source[index + 6]) & 0xFFL) << 48 |
|
||||
(((long) source[index + 7]) & 0xFFL) << 56;
|
||||
}
|
||||
|
||||
private long load32(int index) {
|
||||
return (((long) mem[index]) & 0xFFL) |
|
||||
(((long) mem[index + 1]) & 0xFFL) << 8 |
|
||||
(((long) mem[index + 2]) & 0xFFL) << 16 |
|
||||
(((long) mem[index + 3]) & 0xFFL) << 24;
|
||||
}
|
||||
|
||||
private long round(long acc, long input) {
|
||||
acc += input * XXH_PRIME64_2;
|
||||
acc = Long.rotateLeft(acc, 31);
|
||||
return acc * XXH_PRIME64_1;
|
||||
}
|
||||
|
||||
private long mergeRound(long acc, long val) {
|
||||
val = round(0L, val);
|
||||
acc ^= val;
|
||||
return acc * XXH_PRIME64_1 + XXH_PRIME64_4;
|
||||
}
|
||||
|
||||
private void doRound() {
|
||||
memIndex = 0;
|
||||
v0 = round(v0, load64(0));
|
||||
v1 = round(v1, load64(8));
|
||||
v2 = round(v2, load64(16));
|
||||
v3 = round(v3, load64(24));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void engineUpdate(byte input) {
|
||||
length++;
|
||||
mem[memIndex++] = input;
|
||||
|
||||
if (memIndex >= 32)
|
||||
doRound();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void engineUpdate(byte[] input, int offset, int len) {
|
||||
Objects.checkFromIndexSize(offset, input.length, len);
|
||||
if (len == 0) return;
|
||||
|
||||
length += len;
|
||||
|
||||
if (memIndex != 0) {
|
||||
int size = Math.min(32 - memIndex, len);
|
||||
System.arraycopy(input, offset, mem, memIndex, size);
|
||||
memIndex += size;
|
||||
offset += size;
|
||||
len -= size;
|
||||
|
||||
if (memIndex >= 32)
|
||||
doRound();
|
||||
}
|
||||
|
||||
while (len >= 32) {
|
||||
v0 = round(v0, load64(input, offset));
|
||||
v1 = round(v1, load64(input, offset + 8));
|
||||
v2 = round(v2, load64(input, offset + 16));
|
||||
v3 = round(v3, load64(input, offset + 24));
|
||||
offset += 32;
|
||||
len -= 32;
|
||||
}
|
||||
|
||||
if (len != 0) {
|
||||
System.arraycopy(input, offset, mem, memIndex, len);
|
||||
memIndex += len;
|
||||
|
||||
if (memIndex >= 32)
|
||||
doRound();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected byte[] engineDigest() {
|
||||
long h64;
|
||||
|
||||
if (length >= 32L) {
|
||||
h64 = Long.rotateLeft(v0, 1) + Long.rotateLeft(v1, 7) + Long.rotateLeft(v2, 12) + Long.rotateLeft(v3, 18);
|
||||
h64 = mergeRound(h64, v0);
|
||||
h64 = mergeRound(h64, v1);
|
||||
h64 = mergeRound(h64, v2);
|
||||
h64 = mergeRound(h64, v3);
|
||||
} else {
|
||||
h64 = seed + XXH_PRIME64_5;
|
||||
}
|
||||
|
||||
h64 += length;
|
||||
int len = (int) (length & 31L);
|
||||
int index = 0;
|
||||
|
||||
while (len >= 8) {
|
||||
h64 ^= round(0L, load64(index));
|
||||
h64 = Long.rotateLeft(h64, 27) * XXH_PRIME64_1 + XXH_PRIME64_4;
|
||||
index += 8;
|
||||
len -= 8;
|
||||
}
|
||||
|
||||
if (len >= 4) {
|
||||
h64 ^= load32(index) * XXH_PRIME64_1;
|
||||
h64 = Long.rotateLeft(h64, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
|
||||
index += 4;
|
||||
len -= 4;
|
||||
}
|
||||
|
||||
while (len-- > 0) {
|
||||
h64 ^= (mem[index++] & 0xFFL) * XXH_PRIME64_5;
|
||||
h64 = Long.rotateLeft(h64, 11) * XXH_PRIME64_1;
|
||||
}
|
||||
|
||||
h64 ^= h64 >>> 33;
|
||||
h64 *= XXH_PRIME64_2;
|
||||
h64 ^= h64 >>> 29;
|
||||
h64 *= XXH_PRIME64_3;
|
||||
h64 ^= h64 >>> 32;
|
||||
|
||||
engineReset();
|
||||
|
||||
byte[] result = new byte[8];
|
||||
|
||||
result[7] = (byte) h64;
|
||||
result[6] = (byte) (h64 >>> 8);
|
||||
result[5] = (byte) (h64 >>> 16);
|
||||
result[4] = (byte) (h64 >>> 24);
|
||||
result[3] = (byte) (h64 >>> 32);
|
||||
result[2] = (byte) (h64 >>> 40);
|
||||
result[1] = (byte) (h64 >>> 48);
|
||||
result[0] = (byte) (h64 >>> 56);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void engineReset() {
|
||||
this.length = 0L;
|
||||
this.memIndex = 0;
|
||||
this.v0 = this.seed + XXH_PRIME64_1 + XXH_PRIME64_2;
|
||||
this.v1 = this.seed + XXH_PRIME64_2;
|
||||
this.v2 = this.seed;
|
||||
this.v3 = this.seed - XXH_PRIME64_1;
|
||||
}
|
||||
}
|
49
src/main/kotlin/ru/dbotthepony/kommons/util/HashUtils.kt
Normal file
49
src/main/kotlin/ru/dbotthepony/kommons/util/HashUtils.kt
Normal file
@ -0,0 +1,49 @@
|
||||
package ru.dbotthepony.kommons.util
|
||||
|
||||
fun xxhash64(data: String, seed: Long = 0L): ByteArray {
|
||||
val digest = XXHash64(seed)
|
||||
digest.update(data.toByteArray())
|
||||
return digest.digest()
|
||||
}
|
||||
|
||||
fun xxhash32(data: String, seed: Int = 0): ByteArray {
|
||||
val digest = XXHash32(seed)
|
||||
digest.update(data.toByteArray())
|
||||
return digest.digest()
|
||||
}
|
||||
|
||||
fun xxhash64(data: ByteArray, seed: Long = 0L): ByteArray {
|
||||
val digest = XXHash64(seed)
|
||||
digest.update(data)
|
||||
return digest.digest()
|
||||
}
|
||||
|
||||
fun xxhash32(data: ByteArray, seed: Int = 0): ByteArray {
|
||||
val digest = XXHash32(seed)
|
||||
digest.update(data)
|
||||
return digest.digest()
|
||||
}
|
||||
|
||||
fun xxhash64(data: ByteArray, offset: Int, length: Int, seed: Long = 0L): ByteArray {
|
||||
val digest = XXHash64(seed)
|
||||
digest.update(data, offset, length)
|
||||
return digest.digest()
|
||||
}
|
||||
|
||||
fun xxhash32(data: ByteArray, offset: Int, length: Int, seed: Int = 0): ByteArray {
|
||||
val digest = XXHash32(seed)
|
||||
digest.update(data, offset, length)
|
||||
return digest.digest()
|
||||
}
|
||||
|
||||
fun digest2string(hash: ByteArray): String {
|
||||
val builder = StringBuilder(hash.size * 2)
|
||||
|
||||
for (b in hash) {
|
||||
val s = (b.toInt() and 0xFF).toString(16)
|
||||
if (s.length == 1) builder.append("0")
|
||||
builder.append(s)
|
||||
}
|
||||
|
||||
return builder.toString()
|
||||
}
|
@ -14,16 +14,16 @@ object BTreeDB6Tests {
|
||||
fun test() {
|
||||
val file = File("dbtest.bdb")
|
||||
if (file.exists()) file.delete()
|
||||
val create = BTreeDB6.create(file, 4096, sync = false)
|
||||
val create = BTreeDB6.create(file, 128, sync = false)
|
||||
|
||||
for (i in 0 .. 8000) {
|
||||
for (i in 0 .. 200) {
|
||||
val s = "This is key $i"
|
||||
val k = ByteKey("This is key $i")
|
||||
create.write(k, s.toByteArray())
|
||||
assertEquals(s, String(create.read(k).get()))
|
||||
}
|
||||
|
||||
for (i in 0 .. 8000) {
|
||||
for (i in 0 .. 200) {
|
||||
val s = "This is key $i"
|
||||
val k = ByteKey("This is key $i")
|
||||
assertEquals(s, String(create.read(k).get()))
|
||||
|
23
src/test/kotlin/ru/dbotthepony/kommons/test/HashingTests.kt
Normal file
23
src/test/kotlin/ru/dbotthepony/kommons/test/HashingTests.kt
Normal file
@ -0,0 +1,23 @@
|
||||
package ru.dbotthepony.kommons.test
|
||||
|
||||
import org.junit.jupiter.api.DisplayName
|
||||
import org.junit.jupiter.api.Test
|
||||
import ru.dbotthepony.kommons.util.XXHash32
|
||||
import ru.dbotthepony.kommons.util.XXHash64
|
||||
import ru.dbotthepony.kommons.util.digest2string
|
||||
import kotlin.test.assertEquals
|
||||
|
||||
object HashingTests {
|
||||
@Test
|
||||
@DisplayName("xxHash64 implementation")
|
||||
fun xxHash64() {
|
||||
val str = "Nobody inspects the spammish repetition"
|
||||
val hasher = XXHash32()
|
||||
hasher.update(str.toByteArray())
|
||||
assertEquals("e2293b2f", digest2string(hasher.digest()))
|
||||
|
||||
val hasher2 = XXHash64()
|
||||
hasher2.update(str.toByteArray())
|
||||
assertEquals("fbcea83c8a378bf1", digest2string(hasher2.digest()))
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user