diff --git a/gradle.properties b/gradle.properties index e3ef8c9..54097e2 100644 --- a/gradle.properties +++ b/gradle.properties @@ -4,7 +4,7 @@ kotlin.code.style=official specifyKotlinAsDependency=false projectGroup=ru.dbotthepony.kommons -projectVersion=2.6.0 +projectVersion=2.7.0 guavaDepVersion=33.0.0 gsonDepVersion=2.8.9 diff --git a/src/main/java/ru/dbotthepony/kommons/util/XXHash32.java b/src/main/java/ru/dbotthepony/kommons/util/XXHash32.java new file mode 100644 index 0000000..233ae36 --- /dev/null +++ b/src/main/java/ru/dbotthepony/kommons/util/XXHash32.java @@ -0,0 +1,168 @@ +package ru.dbotthepony.kommons.util; + +import java.security.MessageDigest; +import java.util.Objects; + +/** + * Pure Java implementation of XXHash32 hashing function. + *

+ * {@link XXHash32#digest()} returns Int encoded as big-endian number. + */ +public final class XXHash32 extends MessageDigest { + private final int seed; + + private final byte[] mem = new byte[16]; + private int memIndex; + + private long length; + + private int v0; + private int v1; + private int v2; + private int v3; + + public static final int XXH_PRIME32_1 = 0x9E3779B1; /*!< 0b10011110001101110111100110110001 */ + public static final int XXH_PRIME32_2 = 0x85EBCA77; /*!< 0b10000101111010111100101001110111 */ + public static final int XXH_PRIME32_3 = 0xC2B2AE3D; /*!< 0b11000010101100101010111000111101 */ + public static final int XXH_PRIME32_4 = 0x27D4EB2F; /*!< 0b00100111110101001110101100101111 */ + public static final int XXH_PRIME32_5 = 0x165667B1; /*!< 0b00010110010101100110011110110001 */ + + public XXHash32(int seed) { + super("XXHash32"); + this.seed = seed; + engineReset(); + } + + public XXHash32() { + this(0); + } + + private int round(int acc, int input) { + acc += input * XXH_PRIME32_2; + acc = Integer.rotateLeft(acc, 13); + acc *= XXH_PRIME32_1; + return acc; + } + + private int load32(int index) { + return load32(mem, index); + } + + private int load32(byte[] source, int index) { + return (((int) source[index]) & 0xFF) | + (((int) source[index + 1]) & 0xFF) << 8 | + (((int) source[index + 2]) & 0xFF) << 16 | + (((int) source[index + 3]) & 0xFF) << 24; + } + + private void doRound() { + memIndex = 0; + v0 = round(v0, load32(0)); + v1 = round(v1, load32(4)); + v2 = round(v2, load32(8)); + v3 = round(v3, load32(12)); + } + + @Override + protected void engineUpdate(byte input) { + length++; + mem[memIndex++] = input; + + if (memIndex >= 16) + doRound(); + } + + @Override + protected void engineUpdate(byte[] input, int offset, int len) { + Objects.checkFromIndexSize(offset, input.length, len); + if (len == 0) return; + + length += len; + + if (memIndex != 0) { + int size = Math.min(16 - memIndex, len); + System.arraycopy(input, offset, mem, memIndex, size); + memIndex += size; + offset += size; + len -= size; + + if (memIndex >= 16) + doRound(); + } + + while (len >= 16) { + v0 = round(v0, load32(input, offset)); + v1 = round(v1, load32(input, offset + 4)); + v2 = round(v2, load32(input, offset + 8)); + v3 = round(v3, load32(input, offset + 12)); + offset += 16; + len -= 16; + } + + if (len != 0) { + System.arraycopy(input, offset, mem, memIndex, len); + memIndex += len; + + if (memIndex >= 16) + doRound(); + } + } + + @Override + protected byte[] engineDigest() { + int h32; + + if (length >= 16) { + h32 = Integer.rotateLeft(v0, 1) + + Integer.rotateLeft(v1, 7) + + Integer.rotateLeft(v2, 12) + + Integer.rotateLeft(v3, 18); + } else { + h32 = seed + XXH_PRIME32_5; + } + + h32 += (int) length; + + int len = memIndex; + int index = 0; + + while (len >= 4) { + h32 += load32(index) * XXH_PRIME32_3; + h32 = Integer.rotateLeft(h32, 17) * XXH_PRIME32_4; + index += 4; + len -= 4; + } + + while (len-- > 0) { + h32 += (mem[index++] & 0xFF) * XXH_PRIME32_5; + h32 = Integer.rotateLeft(h32, 11) * XXH_PRIME32_1; + } + + h32 ^= h32 >>> 15; + h32 *= XXH_PRIME32_2; + h32 ^= h32 >>> 13; + h32 *= XXH_PRIME32_3; + h32 ^= h32 >>> 16; + + engineReset(); + + byte[] result = new byte[4]; + + result[0] = (byte) (h32 >>> 24); + result[1] = (byte) (h32 >>> 16); + result[2] = (byte) (h32 >>> 8); + result[3] = (byte) (h32); + + return result; + } + + @Override + protected void engineReset() { + this.length = 0L; + this.memIndex = 0; + this.v0 = this.seed + XXH_PRIME32_1 + XXH_PRIME32_2; + this.v1 = this.seed + XXH_PRIME32_2; + this.v2 = this.seed; + this.v3 = this.seed - XXH_PRIME32_1; + } +} diff --git a/src/main/java/ru/dbotthepony/kommons/util/XXHash64.java b/src/main/java/ru/dbotthepony/kommons/util/XXHash64.java new file mode 100644 index 0000000..5e41616 --- /dev/null +++ b/src/main/java/ru/dbotthepony/kommons/util/XXHash64.java @@ -0,0 +1,194 @@ +package ru.dbotthepony.kommons.util; + +import java.security.MessageDigest; +import java.util.Objects; + +/** + * Pure Java implementation of XXHash64 hashing function. + *

+ * {@link XXHash64#digest()} returns Long encoded as big-endian number. + */ +public final class XXHash64 extends MessageDigest { + private final long seed; + private final byte[] mem = new byte[32]; + private int memIndex; + + private long length; + + private long v0; + private long v1; + private long v2; + private long v3; + + public static final long XXH_PRIME64_1 = 0x9E3779B185EBCA87L; /*!< 0b1001111000110111011110011011000110000101111010111100101010000111 */ + public static final long XXH_PRIME64_2 = 0xC2B2AE3D27D4EB4FL; /*!< 0b1100001010110010101011100011110100100111110101001110101101001111 */ + public static final long XXH_PRIME64_3 = 0x165667B19E3779F9L; /*!< 0b0001011001010110011001111011000110011110001101110111100111111001 */ + public static final long XXH_PRIME64_4 = 0x85EBCA77C2B2AE63L; /*!< 0b1000010111101011110010100111011111000010101100101010111001100011 */ + public static final long XXH_PRIME64_5 = 0x27D4EB2F165667C5L; /*!< 0b0010011111010100111010110010111100010110010101100110011111000101 */ + + public XXHash64(long seed) { + super("XXHash64"); + this.seed = seed; + engineReset(); + } + + public XXHash64() { + this(0); + } + + private long load64(int index) { + return load64(mem, index); + } + + private long load64(byte[] source, int index) { + return (((long) source[index]) & 0xFFL) | + (((long) source[index + 1]) & 0xFFL) << 8 | + (((long) source[index + 2]) & 0xFFL) << 16 | + (((long) source[index + 3]) & 0xFFL) << 24 | + (((long) source[index + 4]) & 0xFFL) << 32 | + (((long) source[index + 5]) & 0xFFL) << 40 | + (((long) source[index + 6]) & 0xFFL) << 48 | + (((long) source[index + 7]) & 0xFFL) << 56; + } + + private long load32(int index) { + return (((long) mem[index]) & 0xFFL) | + (((long) mem[index + 1]) & 0xFFL) << 8 | + (((long) mem[index + 2]) & 0xFFL) << 16 | + (((long) mem[index + 3]) & 0xFFL) << 24; + } + + private long round(long acc, long input) { + acc += input * XXH_PRIME64_2; + acc = Long.rotateLeft(acc, 31); + return acc * XXH_PRIME64_1; + } + + private long mergeRound(long acc, long val) { + val = round(0L, val); + acc ^= val; + return acc * XXH_PRIME64_1 + XXH_PRIME64_4; + } + + private void doRound() { + memIndex = 0; + v0 = round(v0, load64(0)); + v1 = round(v1, load64(8)); + v2 = round(v2, load64(16)); + v3 = round(v3, load64(24)); + } + + @Override + protected void engineUpdate(byte input) { + length++; + mem[memIndex++] = input; + + if (memIndex >= 32) + doRound(); + } + + @Override + protected void engineUpdate(byte[] input, int offset, int len) { + Objects.checkFromIndexSize(offset, input.length, len); + if (len == 0) return; + + length += len; + + if (memIndex != 0) { + int size = Math.min(32 - memIndex, len); + System.arraycopy(input, offset, mem, memIndex, size); + memIndex += size; + offset += size; + len -= size; + + if (memIndex >= 32) + doRound(); + } + + while (len >= 32) { + v0 = round(v0, load64(input, offset)); + v1 = round(v1, load64(input, offset + 8)); + v2 = round(v2, load64(input, offset + 16)); + v3 = round(v3, load64(input, offset + 24)); + offset += 32; + len -= 32; + } + + if (len != 0) { + System.arraycopy(input, offset, mem, memIndex, len); + memIndex += len; + + if (memIndex >= 32) + doRound(); + } + } + + @Override + protected byte[] engineDigest() { + long h64; + + if (length >= 32L) { + h64 = Long.rotateLeft(v0, 1) + Long.rotateLeft(v1, 7) + Long.rotateLeft(v2, 12) + Long.rotateLeft(v3, 18); + h64 = mergeRound(h64, v0); + h64 = mergeRound(h64, v1); + h64 = mergeRound(h64, v2); + h64 = mergeRound(h64, v3); + } else { + h64 = seed + XXH_PRIME64_5; + } + + h64 += length; + int len = (int) (length & 31L); + int index = 0; + + while (len >= 8) { + h64 ^= round(0L, load64(index)); + h64 = Long.rotateLeft(h64, 27) * XXH_PRIME64_1 + XXH_PRIME64_4; + index += 8; + len -= 8; + } + + if (len >= 4) { + h64 ^= load32(index) * XXH_PRIME64_1; + h64 = Long.rotateLeft(h64, 23) * XXH_PRIME64_2 + XXH_PRIME64_3; + index += 4; + len -= 4; + } + + while (len-- > 0) { + h64 ^= (mem[index++] & 0xFFL) * XXH_PRIME64_5; + h64 = Long.rotateLeft(h64, 11) * XXH_PRIME64_1; + } + + h64 ^= h64 >>> 33; + h64 *= XXH_PRIME64_2; + h64 ^= h64 >>> 29; + h64 *= XXH_PRIME64_3; + h64 ^= h64 >>> 32; + + engineReset(); + + byte[] result = new byte[8]; + + result[7] = (byte) h64; + result[6] = (byte) (h64 >>> 8); + result[5] = (byte) (h64 >>> 16); + result[4] = (byte) (h64 >>> 24); + result[3] = (byte) (h64 >>> 32); + result[2] = (byte) (h64 >>> 40); + result[1] = (byte) (h64 >>> 48); + result[0] = (byte) (h64 >>> 56); + + return result; + } + + @Override + protected void engineReset() { + this.length = 0L; + this.memIndex = 0; + this.v0 = this.seed + XXH_PRIME64_1 + XXH_PRIME64_2; + this.v1 = this.seed + XXH_PRIME64_2; + this.v2 = this.seed; + this.v3 = this.seed - XXH_PRIME64_1; + } +} diff --git a/src/main/kotlin/ru/dbotthepony/kommons/util/HashUtils.kt b/src/main/kotlin/ru/dbotthepony/kommons/util/HashUtils.kt new file mode 100644 index 0000000..942397e --- /dev/null +++ b/src/main/kotlin/ru/dbotthepony/kommons/util/HashUtils.kt @@ -0,0 +1,49 @@ +package ru.dbotthepony.kommons.util + +fun xxhash64(data: String, seed: Long = 0L): ByteArray { + val digest = XXHash64(seed) + digest.update(data.toByteArray()) + return digest.digest() +} + +fun xxhash32(data: String, seed: Int = 0): ByteArray { + val digest = XXHash32(seed) + digest.update(data.toByteArray()) + return digest.digest() +} + +fun xxhash64(data: ByteArray, seed: Long = 0L): ByteArray { + val digest = XXHash64(seed) + digest.update(data) + return digest.digest() +} + +fun xxhash32(data: ByteArray, seed: Int = 0): ByteArray { + val digest = XXHash32(seed) + digest.update(data) + return digest.digest() +} + +fun xxhash64(data: ByteArray, offset: Int, length: Int, seed: Long = 0L): ByteArray { + val digest = XXHash64(seed) + digest.update(data, offset, length) + return digest.digest() +} + +fun xxhash32(data: ByteArray, offset: Int, length: Int, seed: Int = 0): ByteArray { + val digest = XXHash32(seed) + digest.update(data, offset, length) + return digest.digest() +} + +fun digest2string(hash: ByteArray): String { + val builder = StringBuilder(hash.size * 2) + + for (b in hash) { + val s = (b.toInt() and 0xFF).toString(16) + if (s.length == 1) builder.append("0") + builder.append(s) + } + + return builder.toString() +} diff --git a/src/test/kotlin/ru/dbotthepony/kommons/test/BTreeDB6Tests.kt b/src/test/kotlin/ru/dbotthepony/kommons/test/BTreeDB6Tests.kt index d92c729..6cb84fe 100644 --- a/src/test/kotlin/ru/dbotthepony/kommons/test/BTreeDB6Tests.kt +++ b/src/test/kotlin/ru/dbotthepony/kommons/test/BTreeDB6Tests.kt @@ -14,16 +14,16 @@ object BTreeDB6Tests { fun test() { val file = File("dbtest.bdb") if (file.exists()) file.delete() - val create = BTreeDB6.create(file, 4096, sync = false) + val create = BTreeDB6.create(file, 128, sync = false) - for (i in 0 .. 8000) { + for (i in 0 .. 200) { val s = "This is key $i" val k = ByteKey("This is key $i") create.write(k, s.toByteArray()) assertEquals(s, String(create.read(k).get())) } - for (i in 0 .. 8000) { + for (i in 0 .. 200) { val s = "This is key $i" val k = ByteKey("This is key $i") assertEquals(s, String(create.read(k).get())) diff --git a/src/test/kotlin/ru/dbotthepony/kommons/test/HashingTests.kt b/src/test/kotlin/ru/dbotthepony/kommons/test/HashingTests.kt new file mode 100644 index 0000000..249a407 --- /dev/null +++ b/src/test/kotlin/ru/dbotthepony/kommons/test/HashingTests.kt @@ -0,0 +1,23 @@ +package ru.dbotthepony.kommons.test + +import org.junit.jupiter.api.DisplayName +import org.junit.jupiter.api.Test +import ru.dbotthepony.kommons.util.XXHash32 +import ru.dbotthepony.kommons.util.XXHash64 +import ru.dbotthepony.kommons.util.digest2string +import kotlin.test.assertEquals + +object HashingTests { + @Test + @DisplayName("xxHash64 implementation") + fun xxHash64() { + val str = "Nobody inspects the spammish repetition" + val hasher = XXHash32() + hasher.update(str.toByteArray()) + assertEquals("e2293b2f", digest2string(hasher.digest())) + + val hasher2 = XXHash64() + hasher2.update(str.toByteArray()) + assertEquals("fbcea83c8a378bf1", digest2string(hasher2.digest())) + } +}