HashTableInterner fixes

This commit is contained in:
DBotThePony 2023-09-20 17:25:40 +07:00
parent 352dea020a
commit 62dfc63839
Signed by: DBot
GPG Key ID: DCC23B5715498507
5 changed files with 242 additions and 68 deletions

View File

@ -3,6 +3,7 @@ import org.gradle.internal.jvm.Jvm
plugins {
kotlin("jvm") version "1.9.10"
id("me.champeau.jmh") version "0.7.1"
java
application
}
@ -87,6 +88,16 @@ dependencies {
implementation("com.github.ben-manes.caffeine:caffeine:3.1.5")
}
jmh {
iterations.set(5)
timeOnIteration.set("1s")
warmup.set("1s")
fork.set(1)
includes.add("ht")
synchronizeIterations.set(false)
threads.set(4)
}
tasks.getByName<Test>("test") {
useJUnitPlatform()
}

View File

@ -0,0 +1,118 @@
package ru.dbotthepony.kstarbound.jmh;
import com.github.benmanes.caffeine.cache.Interner;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.infra.Blackhole;
import ru.dbotthepony.kstarbound.util.HashTableInterner;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
@State(Scope.Benchmark)
public class Interning {
@Param({"1", "100", "10000", "1000000"})
private int size;
private StringInterner str;
private CHMInterner chm;
private HMInterner hm;
private HTInterner ht;
private CaffeineInterner ct;
@Setup
public void setup() {
str = new StringInterner();
chm = new CHMInterner();
hm = new HMInterner();
ht = new HTInterner();
ct = new CaffeineInterner();
}
public static class StringInterner {
public String intern(String s) {
return s.intern();
}
}
public static class HTInterner {
private final HashTableInterner<String> interner = new HashTableInterner<>(5);
public String intern(String s) {
return interner.intern(s);
}
}
public static class CaffeineInterner {
private final Interner<String> interner = Interner.newWeakInterner();
public String intern(String s) {
return interner.intern(s);
}
}
@Benchmark
public void intern(Blackhole bh) {
for (int c = 0; c < size; c++) {
bh.consume(str.intern("String" + c));
}
}
public static class CHMInterner {
private final Map<String, String> map;
public CHMInterner() {
map = new ConcurrentHashMap<>();
}
public String intern(String s) {
String exist = map.putIfAbsent(s, s);
return (exist == null) ? s : exist;
}
}
@Benchmark
public void chm(Blackhole bh) {
for (int c = 0; c < size; c++) {
bh.consume(chm.intern("String" + c));
}
}
public static class HMInterner {
private final Map<String, String> map;
public HMInterner() {
map = new HashMap<>();
}
public String intern(String s) {
String exist = map.putIfAbsent(s, s);
return (exist == null) ? s : exist;
}
}
@Benchmark
public void hm(Blackhole bh) {
for (int c = 0; c < size; c++) {
bh.consume(hm.intern("String" + c));
}
}
@Benchmark
public void ht(Blackhole bh) {
for (int c = 0; c < size; c++) {
bh.consume(ht.intern("String" + c));
}
}
@Benchmark
public void ct(Blackhole bh) {
for (int c = 0; c < size; c++) {
bh.consume(ct.intern("String" + c));
}
}
}

View File

@ -85,8 +85,6 @@ import ru.dbotthepony.kstarbound.util.set
import ru.dbotthepony.kstarbound.util.traverseJsonPath
import java.io.*
import java.text.DateFormat
import java.time.Duration
import java.time.temporal.ChronoUnit
import java.util.function.BiConsumer
import java.util.function.BinaryOperator
import java.util.function.Function

View File

@ -7,8 +7,10 @@ import ru.dbotthepony.kstarbound.stream
import java.lang.ref.ReferenceQueue
import java.lang.ref.WeakReference
import java.util.concurrent.locks.LockSupport
import java.util.stream.StreamSupport
// hand-rolled interner, which has similar performance to ConcurrentHashMap
// (given there is no strong congestion, in which case it performs somewhere above Caffeine interner),
// while yielding significantly better memory utilization than both
class HashTableInterner<T : Any>(private val segmentBits: Int) : Interner<T> {
companion object {
private val interners = ArrayList<WeakReference<HashTableInterner<*>>>()
@ -31,9 +33,7 @@ class HashTableInterner<T : Any>(private val segmentBits: Int) : Interner<T> {
i.remove()
} else {
for (segment in get.segments) {
synchronized(segment) {
any += segment.cleanup()
}
any += segment.cleanup()
}
}
}
@ -124,7 +124,8 @@ class HashTableInterner<T : Any>(private val segmentBits: Int) : Interner<T> {
actualSegmentBits = result
}
private val segments: Array<Segment> = Array(1.shl(segmentBits)) { Segment() }
private val locks: Array<Any> = Array(1.shl(segmentBits)) { Any() }
private val segments: Array<Segment> = Array(1.shl(segmentBits)) { Segment(32, locks[it]) }
init {
synchronized(interners) {
@ -136,54 +137,17 @@ class HashTableInterner<T : Any>(private val segmentBits: Int) : Interner<T> {
// while this increase memory usage (linked list), this greatly
// simplify logic, and make scanning a bit faster because we don't jump to neighbour nodes
// (assuming past our neighbour there is no such key)
private inner class Segment : Interner<T> {
private var mask = 31
private var mem = arrayOfNulls<Ref<T>>(32)
private var stored = 0
private inner class Segment(val size: Int, private val lock: Any) {
private val queue = ReferenceQueue<T>()
fun cleanup(): Int {
var any = 0
while (true) {
val p = queue.poll() as Ref<T>? ?: return any
remove(p)
any++
}
}
val mask = size - 1
val mem = arrayOfNulls<Ref<T>>(size)
var stored = 0
private fun hash(e: Any): Int {
return HashCommon.mix(e.hashCode().rotateRight(segmentBits)) and mask
}
private fun grow() {
mask = (mask shl 1) or 1
val old = mem.stream()
.filter { it != null }
.flatMap { ObjectArrayList(it!!.iterator()).stream() }
.filter { !it.refersTo(null) }
.collect(ObjectArrayList.toList())
for (elem in old) {
elem.nextEntry = null
}
mem = arrayOfNulls(mem.size shl 1)
val mem = mem
for (elem in old) {
val ehash = hash(elem)
val existing = mem[ehash]
if (existing == null) {
mem[ehash] = elem
} else {
existing.insert(elem)
}
}
}
fun remove(ref: Ref<T>): Boolean {
val hash = hash(ref)
val mem = mem
@ -202,8 +166,49 @@ class HashTableInterner<T : Any>(private val segmentBits: Int) : Interner<T> {
}
}
override fun intern(sample: T): T {
if (stored >= mem.size * 0.75f) grow()
fun insert(sample: T) {
stored++
val ref = Ref(sample, queue)
val mem = this.mem
val hash = hash(ref)
val existing = mem[hash]
if (existing == null)
mem[hash] = ref
else
existing.insert(ref)
}
fun cleanup(): Int {
var p: Ref<T>? = queue.poll() as Ref<T>? ?: return 0
var any = 0
synchronized(lock) {
while (p != null) {
check(remove(p!!)) { "Unable to remove null entry $p at hash ${hash(p!!)}" }
p = queue.poll() as Ref<T>?
any++
}
}
return any
}
fun grow(): Segment {
val old = mem.stream()
.filter { it != null }
.flatMap { ObjectArrayList(it!!.iterator()).stream() }
//.filter { !it.refersTo(null) }
.map { val v = it.get(); it.clear(); v }
.filter { it != null }
.collect(ObjectArrayList.toList())
val new = Segment(size * 2, lock)
for (elem in old) new.insert(elem as T)
return new
}
fun search(sample: T): T? {
val mem = mem
val hash = hash(sample)
var search = mem[hash]
@ -213,33 +218,35 @@ class HashTableInterner<T : Any>(private val segmentBits: Int) : Interner<T> {
if (get == sample)
return get
else if (get == null) {
check(remove(search)) { "Unable to remove null entry $search at hash $hash" }
else
search = search.nextEntry
} else {
search = search.nextEntry
}
}
val ref = Ref(sample, queue)
val existing = mem[hash]
if (existing == null)
mem[hash] = ref
else
existing.insert(ref)
stored++
return sample
return null
}
}
override fun intern(sample: T): T {
val hash = HashCommon.mix(sample.hashCode())
val segment = segments[hash and actualSegmentBits]
val segmentIndex = hash and actualSegmentBits
var segment = segments[segmentIndex]
synchronized(segment) {
return segment.intern(sample)
val find = segment.search(sample)
if (find != null) return find
synchronized(locks[segmentIndex]) {
segment = segments[segmentIndex]
val find = segment.search(sample)
if (find != null) return find
if (segment.stored >= segment.mem.size * 0.75f) {
segment = segment.grow()
segments[segmentIndex] = segment
}
segment.insert(sample)
return sample
}
}
}

View File

@ -0,0 +1,40 @@
package ru.dbotthepony.kstarbound.test
import org.junit.jupiter.api.DisplayName
import org.junit.jupiter.api.Test
import ru.dbotthepony.kstarbound.util.HashTableInterner
import java.util.concurrent.atomic.AtomicInteger
import java.util.random.RandomGenerator
object InternerTest {
@Test
@DisplayName("Interner stress test in high concurrency")
fun test() {
val interner = HashTableInterner<String>(5)
val threads = ArrayList<Thread>()
val misses = AtomicInteger()
for (i in 0 until 8) {
threads.add(Thread {
val rand = RandomGenerator.of("Xoroshiro128PlusPlus")
for (i2 in 0 until 100_000) {
val v = rand.nextInt()
val s1 = "String$v"
val s2 = "String$v"
if (interner.intern(s1) !== interner.intern(s2)) {
misses.incrementAndGet()
}
}
})
}
threads.forEach { it.start() }
threads.forEach { it.join() }
if (misses.get() != 0) {
throw IllegalStateException("Interner stress test failed ($misses misses)")
}
}
}