From d0718bdf1407ff700d96e4ba2baa29a9cd3ebeb8 Mon Sep 17 00:00:00 2001 From: DBotThePony Date: Wed, 21 Feb 2024 20:21:59 +0700 Subject: [PATCH] Some btreedb fixes, RLE encoded free bitmap --- gradle.properties | 2 +- .../ru/dbotthepony/kommons/io/BTreeDB6.kt | 86 ++++++++++++++++--- .../dbotthepony/kommons/test/BTreeDB6Tests.kt | 22 ++++- 3 files changed, 95 insertions(+), 15 deletions(-) diff --git a/gradle.properties b/gradle.properties index b82e9e0..157a5f8 100644 --- a/gradle.properties +++ b/gradle.properties @@ -4,7 +4,7 @@ kotlin.code.style=official specifyKotlinAsDependency=false projectGroup=ru.dbotthepony.kommons -projectVersion=2.7.4 +projectVersion=2.7.8 guavaDepVersion=33.0.0 gsonDepVersion=2.8.9 diff --git a/src/main/kotlin/ru/dbotthepony/kommons/io/BTreeDB6.kt b/src/main/kotlin/ru/dbotthepony/kommons/io/BTreeDB6.kt index 3a1a8a7..9fec194 100644 --- a/src/main/kotlin/ru/dbotthepony/kommons/io/BTreeDB6.kt +++ b/src/main/kotlin/ru/dbotthepony/kommons/io/BTreeDB6.kt @@ -1,8 +1,10 @@ package ru.dbotthepony.kommons.io +import it.unimi.dsi.fastutil.bytes.ByteArrayList import it.unimi.dsi.fastutil.ints.IntArrayList import it.unimi.dsi.fastutil.ints.IntArraySet import it.unimi.dsi.fastutil.ints.IntOpenHashSet +import it.unimi.dsi.fastutil.io.FastByteArrayOutputStream import it.unimi.dsi.fastutil.objects.Object2IntAVLTreeMap import it.unimi.dsi.fastutil.objects.ObjectArrayList import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet @@ -15,6 +17,7 @@ import java.io.File import java.io.InputStream import java.io.OutputStream import java.io.RandomAccessFile +import java.nio.ByteBuffer import java.util.* import java.util.concurrent.CompletableFuture import java.util.concurrent.Executor @@ -118,10 +121,20 @@ class BTreeDB6 private constructor(val file: File, private var reader: RandomAcc init { if (freeBlockList != INVALID_BLOCK_INDEX) { val reader = BlockInputStream(readBlock(freeBlockList)) - val size = reader.readVarInt() - if (size > 0) { - occupiedBlocksBitmap = BitSet.valueOf(reader.readNBytes(size)) + val bytes = ByteArrayList() + + var lastByte = reader.read() + var lastByteRepeats = reader.readVarInt() + + while (lastByteRepeats != 0) { + for (i in 0 until lastByteRepeats) + bytes.add(lastByte.toByte()) + + lastByte = reader.read() + lastByteRepeats = reader.readVarInt() } + + occupiedBlocksBitmap = BitSet.valueOf(ByteBuffer.wrap(bytes.elements(), 0, bytes.size)) } } @@ -154,6 +167,20 @@ class BTreeDB6 private constructor(val file: File, private var reader: RandomAcc private val headerBuf = ByteArray(16) + /** + * Checks free bitmap for validity + */ + fun checkFreeBitmap() { + for (i in 0 until occupiedBlocksBitmap.size()) { + if (occupiedBlocksBitmap[i]) { + check(readBlock(i).type != BlockType.FREE) { "Expected block $i to be not free" } + } else { + val block = readBlock(i) + check(block.type == BlockType.FREE) { "Expected block $i to be free, but got ${block.type}" } + } + } + } + private fun commit() { if (sync) reader.channel.force(true) val blocksToFree = ObjectOpenHashSet() @@ -176,16 +203,50 @@ class BTreeDB6 private constructor(val file: File, private var reader: RandomAcc occupiedBlocksBitmap[it.id] = false } - val size = occupiedBlocksBitmap.size() / 8 + 1 + var rleEncoded: FastByteArrayOutputStream + var size = 0 + var newSize = 1 val writer = BlockOutputStream(BlockType.BITMAP) - writer.ensureCapacity(size + 4) + + do { + writer.ensureCapacity(newSize - size) + size = newSize + val bytes = occupiedBlocksBitmap.toByteArray() + rleEncoded = FastByteArrayOutputStream(newSize + 256) + + if (bytes.isNotEmpty()) { + // RLE encode free bitmap, to significantly reduce space required + var lastByte: Byte = bytes[0] + var lastByteRepeats = 1 + + for (i in 1 until bytes.size) { + val byte = bytes[i] + + if (byte != lastByte) { + rleEncoded.write(lastByte.toInt() and 0xFF) + rleEncoded.writeVarInt(lastByteRepeats) + + lastByteRepeats = 1 + lastByte = byte + } else { + lastByteRepeats++ + } + } + + rleEncoded.write(lastByte.toInt() and 0xFF) + rleEncoded.writeVarInt(lastByteRepeats) + } + + rleEncoded.write(0) + rleEncoded.write(0) + + newSize = rleEncoded.length + } while (newSize / effectiveBlockSize > size / effectiveBlockSize) prohibitAllocation = true try { - val bytes = occupiedBlocksBitmap.toByteArray() - writer.writeInt(bytes.size) - writer.write(bytes) + writer.write(rleEncoded.array, 0, rleEncoded.length) freeBlockList = writer.head check(freeBlockList != INVALID_BLOCK_INDEX) @@ -396,8 +457,8 @@ class BTreeDB6 private constructor(val file: File, private var reader: RandomAcc } } - /*trees.first().check(LongConsumer { - if (blocksToFree.any { b -> b.id == it }) { + /*trees.first().check(IntConsumer { + if (pendingFree.contains(it)) { throw IllegalStateException("Tree is referencing dead blocks") } })*/ @@ -864,10 +925,10 @@ class BTreeDB6 private constructor(val file: File, private var reader: RandomAcc if (bytes == 0) return this require(bytes > 0) { "Negative amount of bytes: $bytes" } - val alloc = if (currentBlock == null) { + val alloc = if (currentBlock == null && preallocatedBlocks.isEmpty()) { allocBlocks(type, (bytes - 1) / effectiveBlockSize + 1) } else { - allocBlocks(type, (bytes - 1) / effectiveBlockSize + 1, currentBlock!!.id) + allocBlocks(type, (bytes - 1) / effectiveBlockSize + 1, currentBlock?.id ?: preallocatedBlocks.last().id) } preallocatedBlocks.addAll(alloc) @@ -880,6 +941,7 @@ class BTreeDB6 private constructor(val file: File, private var reader: RandomAcc blocks.forEach { it.write() } blocks.clear() if (currentBlock != null) blocks.add(currentBlock!!) + preallocatedBlocks.forEach { it.write() } // write preallocated blocks so they can be free'd later } override fun close() { diff --git a/src/test/kotlin/ru/dbotthepony/kommons/test/BTreeDB6Tests.kt b/src/test/kotlin/ru/dbotthepony/kommons/test/BTreeDB6Tests.kt index 6cb84fe..446cef9 100644 --- a/src/test/kotlin/ru/dbotthepony/kommons/test/BTreeDB6Tests.kt +++ b/src/test/kotlin/ru/dbotthepony/kommons/test/BTreeDB6Tests.kt @@ -16,19 +16,37 @@ object BTreeDB6Tests { if (file.exists()) file.delete() val create = BTreeDB6.create(file, 128, sync = false) - for (i in 0 .. 200) { + for (i in 0 .. 80000) { val s = "This is key $i" val k = ByteKey("This is key $i") create.write(k, s.toByteArray()) assertEquals(s, String(create.read(k).get())) } - for (i in 0 .. 200) { + for (i in 0 .. 80000) { val s = "This is key $i" val k = ByteKey("This is key $i") assertEquals(s, String(create.read(k).get())) } + for (i in 0 .. 80000) { + val s = "This is key $i" + val k = ByteKey("This is key $i") + create.write(k, s.toByteArray()) + assertEquals(s, String(create.read(k).get())) + } + create.close() + + val create2 = BTreeDB6(file) + + for (i in 0 .. 80000) { + val s = "This is key $i" + val k = ByteKey("This is key $i") + assertEquals(s, String(create2.read(k).get())) + } + + create2.checkFreeBitmap() + create2.close() } }