XXHash64 and 32 implementations

This commit is contained in:
DBotThePony 2024-02-20 00:28:44 +07:00
parent 8216c2298a
commit 3ec189804c
Signed by: DBot
GPG Key ID: DCC23B5715498507
6 changed files with 438 additions and 4 deletions

View File

@ -4,7 +4,7 @@ kotlin.code.style=official
specifyKotlinAsDependency=false
projectGroup=ru.dbotthepony.kommons
projectVersion=2.6.0
projectVersion=2.7.0
guavaDepVersion=33.0.0
gsonDepVersion=2.8.9

View File

@ -0,0 +1,168 @@
package ru.dbotthepony.kommons.util;
import java.security.MessageDigest;
import java.util.Objects;
/**
* Pure Java implementation of XXHash32 hashing function.
* <p>
* {@link XXHash32#digest()} returns Int encoded as big-endian number.
*/
public final class XXHash32 extends MessageDigest {
private final int seed;
private final byte[] mem = new byte[16];
private int memIndex;
private long length;
private int v0;
private int v1;
private int v2;
private int v3;
public static final int XXH_PRIME32_1 = 0x9E3779B1; /*!< 0b10011110001101110111100110110001 */
public static final int XXH_PRIME32_2 = 0x85EBCA77; /*!< 0b10000101111010111100101001110111 */
public static final int XXH_PRIME32_3 = 0xC2B2AE3D; /*!< 0b11000010101100101010111000111101 */
public static final int XXH_PRIME32_4 = 0x27D4EB2F; /*!< 0b00100111110101001110101100101111 */
public static final int XXH_PRIME32_5 = 0x165667B1; /*!< 0b00010110010101100110011110110001 */
public XXHash32(int seed) {
super("XXHash32");
this.seed = seed;
engineReset();
}
public XXHash32() {
this(0);
}
private int round(int acc, int input) {
acc += input * XXH_PRIME32_2;
acc = Integer.rotateLeft(acc, 13);
acc *= XXH_PRIME32_1;
return acc;
}
private int load32(int index) {
return load32(mem, index);
}
private int load32(byte[] source, int index) {
return (((int) source[index]) & 0xFF) |
(((int) source[index + 1]) & 0xFF) << 8 |
(((int) source[index + 2]) & 0xFF) << 16 |
(((int) source[index + 3]) & 0xFF) << 24;
}
private void doRound() {
memIndex = 0;
v0 = round(v0, load32(0));
v1 = round(v1, load32(4));
v2 = round(v2, load32(8));
v3 = round(v3, load32(12));
}
@Override
protected void engineUpdate(byte input) {
length++;
mem[memIndex++] = input;
if (memIndex >= 16)
doRound();
}
@Override
protected void engineUpdate(byte[] input, int offset, int len) {
Objects.checkFromIndexSize(offset, input.length, len);
if (len == 0) return;
length += len;
if (memIndex != 0) {
int size = Math.min(16 - memIndex, len);
System.arraycopy(input, offset, mem, memIndex, size);
memIndex += size;
offset += size;
len -= size;
if (memIndex >= 16)
doRound();
}
while (len >= 16) {
v0 = round(v0, load32(input, offset));
v1 = round(v1, load32(input, offset + 4));
v2 = round(v2, load32(input, offset + 8));
v3 = round(v3, load32(input, offset + 12));
offset += 16;
len -= 16;
}
if (len != 0) {
System.arraycopy(input, offset, mem, memIndex, len);
memIndex += len;
if (memIndex >= 16)
doRound();
}
}
@Override
protected byte[] engineDigest() {
int h32;
if (length >= 16) {
h32 = Integer.rotateLeft(v0, 1) +
Integer.rotateLeft(v1, 7) +
Integer.rotateLeft(v2, 12) +
Integer.rotateLeft(v3, 18);
} else {
h32 = seed + XXH_PRIME32_5;
}
h32 += (int) length;
int len = memIndex;
int index = 0;
while (len >= 4) {
h32 += load32(index) * XXH_PRIME32_3;
h32 = Integer.rotateLeft(h32, 17) * XXH_PRIME32_4;
index += 4;
len -= 4;
}
while (len-- > 0) {
h32 += (mem[index++] & 0xFF) * XXH_PRIME32_5;
h32 = Integer.rotateLeft(h32, 11) * XXH_PRIME32_1;
}
h32 ^= h32 >>> 15;
h32 *= XXH_PRIME32_2;
h32 ^= h32 >>> 13;
h32 *= XXH_PRIME32_3;
h32 ^= h32 >>> 16;
engineReset();
byte[] result = new byte[4];
result[0] = (byte) (h32 >>> 24);
result[1] = (byte) (h32 >>> 16);
result[2] = (byte) (h32 >>> 8);
result[3] = (byte) (h32);
return result;
}
@Override
protected void engineReset() {
this.length = 0L;
this.memIndex = 0;
this.v0 = this.seed + XXH_PRIME32_1 + XXH_PRIME32_2;
this.v1 = this.seed + XXH_PRIME32_2;
this.v2 = this.seed;
this.v3 = this.seed - XXH_PRIME32_1;
}
}

View File

@ -0,0 +1,194 @@
package ru.dbotthepony.kommons.util;
import java.security.MessageDigest;
import java.util.Objects;
/**
* Pure Java implementation of XXHash64 hashing function.
* <p>
* {@link XXHash64#digest()} returns Long encoded as big-endian number.
*/
public final class XXHash64 extends MessageDigest {
private final long seed;
private final byte[] mem = new byte[32];
private int memIndex;
private long length;
private long v0;
private long v1;
private long v2;
private long v3;
public static final long XXH_PRIME64_1 = 0x9E3779B185EBCA87L; /*!< 0b1001111000110111011110011011000110000101111010111100101010000111 */
public static final long XXH_PRIME64_2 = 0xC2B2AE3D27D4EB4FL; /*!< 0b1100001010110010101011100011110100100111110101001110101101001111 */
public static final long XXH_PRIME64_3 = 0x165667B19E3779F9L; /*!< 0b0001011001010110011001111011000110011110001101110111100111111001 */
public static final long XXH_PRIME64_4 = 0x85EBCA77C2B2AE63L; /*!< 0b1000010111101011110010100111011111000010101100101010111001100011 */
public static final long XXH_PRIME64_5 = 0x27D4EB2F165667C5L; /*!< 0b0010011111010100111010110010111100010110010101100110011111000101 */
public XXHash64(long seed) {
super("XXHash64");
this.seed = seed;
engineReset();
}
public XXHash64() {
this(0);
}
private long load64(int index) {
return load64(mem, index);
}
private long load64(byte[] source, int index) {
return (((long) source[index]) & 0xFFL) |
(((long) source[index + 1]) & 0xFFL) << 8 |
(((long) source[index + 2]) & 0xFFL) << 16 |
(((long) source[index + 3]) & 0xFFL) << 24 |
(((long) source[index + 4]) & 0xFFL) << 32 |
(((long) source[index + 5]) & 0xFFL) << 40 |
(((long) source[index + 6]) & 0xFFL) << 48 |
(((long) source[index + 7]) & 0xFFL) << 56;
}
private long load32(int index) {
return (((long) mem[index]) & 0xFFL) |
(((long) mem[index + 1]) & 0xFFL) << 8 |
(((long) mem[index + 2]) & 0xFFL) << 16 |
(((long) mem[index + 3]) & 0xFFL) << 24;
}
private long round(long acc, long input) {
acc += input * XXH_PRIME64_2;
acc = Long.rotateLeft(acc, 31);
return acc * XXH_PRIME64_1;
}
private long mergeRound(long acc, long val) {
val = round(0L, val);
acc ^= val;
return acc * XXH_PRIME64_1 + XXH_PRIME64_4;
}
private void doRound() {
memIndex = 0;
v0 = round(v0, load64(0));
v1 = round(v1, load64(8));
v2 = round(v2, load64(16));
v3 = round(v3, load64(24));
}
@Override
protected void engineUpdate(byte input) {
length++;
mem[memIndex++] = input;
if (memIndex >= 32)
doRound();
}
@Override
protected void engineUpdate(byte[] input, int offset, int len) {
Objects.checkFromIndexSize(offset, input.length, len);
if (len == 0) return;
length += len;
if (memIndex != 0) {
int size = Math.min(32 - memIndex, len);
System.arraycopy(input, offset, mem, memIndex, size);
memIndex += size;
offset += size;
len -= size;
if (memIndex >= 32)
doRound();
}
while (len >= 32) {
v0 = round(v0, load64(input, offset));
v1 = round(v1, load64(input, offset + 8));
v2 = round(v2, load64(input, offset + 16));
v3 = round(v3, load64(input, offset + 24));
offset += 32;
len -= 32;
}
if (len != 0) {
System.arraycopy(input, offset, mem, memIndex, len);
memIndex += len;
if (memIndex >= 32)
doRound();
}
}
@Override
protected byte[] engineDigest() {
long h64;
if (length >= 32L) {
h64 = Long.rotateLeft(v0, 1) + Long.rotateLeft(v1, 7) + Long.rotateLeft(v2, 12) + Long.rotateLeft(v3, 18);
h64 = mergeRound(h64, v0);
h64 = mergeRound(h64, v1);
h64 = mergeRound(h64, v2);
h64 = mergeRound(h64, v3);
} else {
h64 = seed + XXH_PRIME64_5;
}
h64 += length;
int len = (int) (length & 31L);
int index = 0;
while (len >= 8) {
h64 ^= round(0L, load64(index));
h64 = Long.rotateLeft(h64, 27) * XXH_PRIME64_1 + XXH_PRIME64_4;
index += 8;
len -= 8;
}
if (len >= 4) {
h64 ^= load32(index) * XXH_PRIME64_1;
h64 = Long.rotateLeft(h64, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
index += 4;
len -= 4;
}
while (len-- > 0) {
h64 ^= (mem[index++] & 0xFFL) * XXH_PRIME64_5;
h64 = Long.rotateLeft(h64, 11) * XXH_PRIME64_1;
}
h64 ^= h64 >>> 33;
h64 *= XXH_PRIME64_2;
h64 ^= h64 >>> 29;
h64 *= XXH_PRIME64_3;
h64 ^= h64 >>> 32;
engineReset();
byte[] result = new byte[8];
result[7] = (byte) h64;
result[6] = (byte) (h64 >>> 8);
result[5] = (byte) (h64 >>> 16);
result[4] = (byte) (h64 >>> 24);
result[3] = (byte) (h64 >>> 32);
result[2] = (byte) (h64 >>> 40);
result[1] = (byte) (h64 >>> 48);
result[0] = (byte) (h64 >>> 56);
return result;
}
@Override
protected void engineReset() {
this.length = 0L;
this.memIndex = 0;
this.v0 = this.seed + XXH_PRIME64_1 + XXH_PRIME64_2;
this.v1 = this.seed + XXH_PRIME64_2;
this.v2 = this.seed;
this.v3 = this.seed - XXH_PRIME64_1;
}
}

View File

@ -0,0 +1,49 @@
package ru.dbotthepony.kommons.util
fun xxhash64(data: String, seed: Long = 0L): ByteArray {
val digest = XXHash64(seed)
digest.update(data.toByteArray())
return digest.digest()
}
fun xxhash32(data: String, seed: Int = 0): ByteArray {
val digest = XXHash32(seed)
digest.update(data.toByteArray())
return digest.digest()
}
fun xxhash64(data: ByteArray, seed: Long = 0L): ByteArray {
val digest = XXHash64(seed)
digest.update(data)
return digest.digest()
}
fun xxhash32(data: ByteArray, seed: Int = 0): ByteArray {
val digest = XXHash32(seed)
digest.update(data)
return digest.digest()
}
fun xxhash64(data: ByteArray, offset: Int, length: Int, seed: Long = 0L): ByteArray {
val digest = XXHash64(seed)
digest.update(data, offset, length)
return digest.digest()
}
fun xxhash32(data: ByteArray, offset: Int, length: Int, seed: Int = 0): ByteArray {
val digest = XXHash32(seed)
digest.update(data, offset, length)
return digest.digest()
}
fun digest2string(hash: ByteArray): String {
val builder = StringBuilder(hash.size * 2)
for (b in hash) {
val s = (b.toInt() and 0xFF).toString(16)
if (s.length == 1) builder.append("0")
builder.append(s)
}
return builder.toString()
}

View File

@ -14,16 +14,16 @@ object BTreeDB6Tests {
fun test() {
val file = File("dbtest.bdb")
if (file.exists()) file.delete()
val create = BTreeDB6.create(file, 4096, sync = false)
val create = BTreeDB6.create(file, 128, sync = false)
for (i in 0 .. 8000) {
for (i in 0 .. 200) {
val s = "This is key $i"
val k = ByteKey("This is key $i")
create.write(k, s.toByteArray())
assertEquals(s, String(create.read(k).get()))
}
for (i in 0 .. 8000) {
for (i in 0 .. 200) {
val s = "This is key $i"
val k = ByteKey("This is key $i")
assertEquals(s, String(create.read(k).get()))

View File

@ -0,0 +1,23 @@
package ru.dbotthepony.kommons.test
import org.junit.jupiter.api.DisplayName
import org.junit.jupiter.api.Test
import ru.dbotthepony.kommons.util.XXHash32
import ru.dbotthepony.kommons.util.XXHash64
import ru.dbotthepony.kommons.util.digest2string
import kotlin.test.assertEquals
object HashingTests {
@Test
@DisplayName("xxHash64 implementation")
fun xxHash64() {
val str = "Nobody inspects the spammish repetition"
val hasher = XXHash32()
hasher.update(str.toByteArray())
assertEquals("e2293b2f", digest2string(hasher.digest()))
val hasher2 = XXHash64()
hasher2.update(str.toByteArray())
assertEquals("fbcea83c8a378bf1", digest2string(hasher2.digest()))
}
}