diff --git a/gradle.properties b/gradle.properties index aca9826..3763027 100644 --- a/gradle.properties +++ b/gradle.properties @@ -4,7 +4,7 @@ kotlin.code.style=official specifyKotlinAsDependency=false projectGroup=ru.dbotthepony.kommons -projectVersion=3.6.0 +projectVersion=3.7.0 guavaDepVersion=33.0.0 gsonDepVersion=2.8.9 diff --git a/src/main/java/ru/dbotthepony/kommons/random/ShishuaEngine.java b/src/main/java/ru/dbotthepony/kommons/random/ShishuaEngine.java new file mode 100644 index 0000000..cd79efb --- /dev/null +++ b/src/main/java/ru/dbotthepony/kommons/random/ShishuaEngine.java @@ -0,0 +1,107 @@ +package ru.dbotthepony.kommons.random; + +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; + +public final class ShishuaEngine { + // Note: While it is an array, a "lane" refers to 4 consecutive uint64_t. + private final long[] state = new long[] { + 0x9E3779B97F4A7C15L, 0xF39CC0605CEDC834L, 0x1082276BF3A27251L, 0xF86C6A11D0C18E95L, + 0x2767F0B153D27B7FL, 0x0347045B5BF1827FL, 0x01886F0928403002L, 0xC1D64BA40F335E36L, + 0xF06AD7AE9717877EL, 0x85839D6EFFBD7DC6L, 0x64D325D1C5371682L, 0xCADD0CCCFDFFBBE1L, + 0x626E33B8D04B4331L, 0xBBF73C790D94F79DL, 0x471C4AB3ED3D82A5L, 0xFEC507705E4AE6E5L, + }; // 4 lanes + + public static final int OUTPUT_SIZE = 16; + + private final long[] output = new long[OUTPUT_SIZE]; // 4 lanes, 2 parts + private final long[] counter = new long[4]; // 1 lane + + private static final int STEPS = 1; + private static final int ROUNDS = 13; + + private static final int[] shuf_offsets = new int[] { + 2,3,0,1, 5,6,7,4, // left + 3,0,1,2, 6,7,4,5 // right + }; + + public void initialize(long[] seed) { + if (seed.length != 4) { + throw new IndexOutOfBoundsException("Seed provided must have 4 long values, " + seed.length + " given."); + } + + // Diffuse first two seed elements in s0, then the last two. Same for s1. + // We must keep half of the state unchanged so users cannot set a bad state. + + for (int i = 0; i < 4; i++) { + state[i * 2 ] ^= seed[i]; // { s0,0,s1,0,s2,0,s3,0 } + state[i * 2 + 8] ^= seed[(i << 1) & 3]; // { s2,0,s3,0,s0,0,s1,0 } + } + + for (int i = 0; i < ROUNDS; i++) { + step(null, STEPS); + + for (int j = 0; j < 4; j++) { + state[j] = state[j + 12]; + state[j + 4] = state[j + 8]; + state[j + 8] = state[j + 4]; + state[j + 12] = state[j]; + } + } + } + + /** + * Creates new Shishua engine without any proper initialization + */ + public ShishuaEngine() {} + + public void step(@Nullable ShishuaOutput acceptor, int steps) { + for (int i = 0; i < steps; i++) { + // Write the current output block to state if it is not NULL + if (acceptor != null) { + acceptor.accept(this.output); + } + + for (int j = 0; j < 2; j++) { + long[] temp = new long[8]; + + int o = j << 2; + int s = j << 3; + + for (int k = 0; k < 4; k++) { + state[s + k + 4] += counter[k]; + } + + for (int k = 0; k < 8; k++) { + temp[k] = (state[s + shuf_offsets[k]] >>> 32) | (state[s + shuf_offsets[k + 8]] << 32); + } + + for (int k = 0; k < 4; k++) { + long u_lo = state[s] >>> 1; + long u_hi = state[s + 4] >>> 3; + + state[s + k ] = u_lo + temp[k ]; + state[s + k + 4] = u_hi + temp[k + 4]; + + output[o + k ] = u_lo ^ temp[k + 4]; + } + } + + // Merge together. + for (int j = 0; j < 4; j++) { + output[j + 8] = state[j] ^ state[j + 12]; + output[j + 12] = state[j + 8] ^ state[j + 4]; + + counter[j] += 7 - (j << 1); + } + } + } + + public void step(@NotNull long[] output) { + if (output.length == 16) { + step(new ShishuaArrayOutput(output, 0), 1); + } else { + step(new ShishuaArrayOutput(output, 0), output.length / 16); + } + } +} diff --git a/src/main/kotlin/ru/dbotthepony/kommons/random/ShishuaArrayOutput.kt b/src/main/kotlin/ru/dbotthepony/kommons/random/ShishuaArrayOutput.kt new file mode 100644 index 0000000..f30ffab --- /dev/null +++ b/src/main/kotlin/ru/dbotthepony/kommons/random/ShishuaArrayOutput.kt @@ -0,0 +1,16 @@ +package ru.dbotthepony.kommons.random + +class ShishuaArrayOutput @JvmOverloads constructor(private val output: LongArray, var offset: Int = 0) : ShishuaOutput { + init { + require(output.size and (ShishuaEngine.OUTPUT_SIZE - 1) == 0) { + "Provided array is now 128 byte aligned, ${output.size * 8} given." + } + + require(offset >= 0) { "Negative array offset: $offset" } + } + + override fun accept(values: LongArray) { + System.arraycopy(values, 0, output, offset, values.size) + offset += values.size + } +} diff --git a/src/main/kotlin/ru/dbotthepony/kommons/random/ShishuaOutput.kt b/src/main/kotlin/ru/dbotthepony/kommons/random/ShishuaOutput.kt new file mode 100644 index 0000000..885086e --- /dev/null +++ b/src/main/kotlin/ru/dbotthepony/kommons/random/ShishuaOutput.kt @@ -0,0 +1,5 @@ +package ru.dbotthepony.kommons.random + +fun interface ShishuaOutput { + fun accept(values: LongArray) +} diff --git a/src/main/kotlin/ru/dbotthepony/kommons/random/ShishuaRandom.kt b/src/main/kotlin/ru/dbotthepony/kommons/random/ShishuaRandom.kt new file mode 100644 index 0000000..1f358f7 --- /dev/null +++ b/src/main/kotlin/ru/dbotthepony/kommons/random/ShishuaRandom.kt @@ -0,0 +1,38 @@ +package ru.dbotthepony.kommons.random + +import java.util.random.RandomGenerator + +/** + * [Shishua](https://github.com/espadrine/shishua) implementation, consists of this random generator and [ShishuaEngine], which generates the bits + * + * Because JVM isn't smart enough, [ShishuaEngine] doesn't get vectorized, and hence it is slow, almost 5 times + * slower than [GJRAND64Random] + */ +open class ShishuaRandom() : RandomGenerator { + protected val engine = ShishuaEngine() + private val values = LongArray(ShishuaEngine.OUTPUT_SIZE) + private var index = 16 + private val acceptor = ShishuaOutput { System.arraycopy(it, 0, values, 0, it.size) } + + constructor(seed: LongArray) : this() { + engine.initialize(seed) + } + + constructor(seed0: Long, seed1: Long, seed2: Long, seed3: Long) : this() { + engine.initialize(longArrayOf(seed0, seed1, seed2, seed3)) + } + + constructor(seed: Long) : this() { + val rng = PCG32Random(seed) + engine.initialize(longArrayOf(rng.nextLong(), rng.nextLong(), rng.nextLong(), rng.nextLong())) + } + + final override fun nextLong(): Long { + if (index == 16) { + index = 0 + engine.step(acceptor, 1) + } + + return values[index++] + } +}