Some btreedb fixes, RLE encoded free bitmap

This commit is contained in:
DBotThePony 2024-02-21 20:21:59 +07:00
parent a19437667a
commit d0718bdf14
Signed by: DBot
GPG Key ID: DCC23B5715498507
3 changed files with 95 additions and 15 deletions

View File

@ -4,7 +4,7 @@ kotlin.code.style=official
specifyKotlinAsDependency=false specifyKotlinAsDependency=false
projectGroup=ru.dbotthepony.kommons projectGroup=ru.dbotthepony.kommons
projectVersion=2.7.4 projectVersion=2.7.8
guavaDepVersion=33.0.0 guavaDepVersion=33.0.0
gsonDepVersion=2.8.9 gsonDepVersion=2.8.9

View File

@ -1,8 +1,10 @@
package ru.dbotthepony.kommons.io package ru.dbotthepony.kommons.io
import it.unimi.dsi.fastutil.bytes.ByteArrayList
import it.unimi.dsi.fastutil.ints.IntArrayList import it.unimi.dsi.fastutil.ints.IntArrayList
import it.unimi.dsi.fastutil.ints.IntArraySet import it.unimi.dsi.fastutil.ints.IntArraySet
import it.unimi.dsi.fastutil.ints.IntOpenHashSet import it.unimi.dsi.fastutil.ints.IntOpenHashSet
import it.unimi.dsi.fastutil.io.FastByteArrayOutputStream
import it.unimi.dsi.fastutil.objects.Object2IntAVLTreeMap import it.unimi.dsi.fastutil.objects.Object2IntAVLTreeMap
import it.unimi.dsi.fastutil.objects.ObjectArrayList import it.unimi.dsi.fastutil.objects.ObjectArrayList
import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet
@ -15,6 +17,7 @@ import java.io.File
import java.io.InputStream import java.io.InputStream
import java.io.OutputStream import java.io.OutputStream
import java.io.RandomAccessFile import java.io.RandomAccessFile
import java.nio.ByteBuffer
import java.util.* import java.util.*
import java.util.concurrent.CompletableFuture import java.util.concurrent.CompletableFuture
import java.util.concurrent.Executor import java.util.concurrent.Executor
@ -118,10 +121,20 @@ class BTreeDB6 private constructor(val file: File, private var reader: RandomAcc
init { init {
if (freeBlockList != INVALID_BLOCK_INDEX) { if (freeBlockList != INVALID_BLOCK_INDEX) {
val reader = BlockInputStream(readBlock(freeBlockList)) val reader = BlockInputStream(readBlock(freeBlockList))
val size = reader.readVarInt() val bytes = ByteArrayList()
if (size > 0) {
occupiedBlocksBitmap = BitSet.valueOf(reader.readNBytes(size)) var lastByte = reader.read()
var lastByteRepeats = reader.readVarInt()
while (lastByteRepeats != 0) {
for (i in 0 until lastByteRepeats)
bytes.add(lastByte.toByte())
lastByte = reader.read()
lastByteRepeats = reader.readVarInt()
} }
occupiedBlocksBitmap = BitSet.valueOf(ByteBuffer.wrap(bytes.elements(), 0, bytes.size))
} }
} }
@ -154,6 +167,20 @@ class BTreeDB6 private constructor(val file: File, private var reader: RandomAcc
private val headerBuf = ByteArray(16) private val headerBuf = ByteArray(16)
/**
* Checks free bitmap for validity
*/
fun checkFreeBitmap() {
for (i in 0 until occupiedBlocksBitmap.size()) {
if (occupiedBlocksBitmap[i]) {
check(readBlock(i).type != BlockType.FREE) { "Expected block $i to be not free" }
} else {
val block = readBlock(i)
check(block.type == BlockType.FREE) { "Expected block $i to be free, but got ${block.type}" }
}
}
}
private fun commit() { private fun commit() {
if (sync) reader.channel.force(true) if (sync) reader.channel.force(true)
val blocksToFree = ObjectOpenHashSet<Block>() val blocksToFree = ObjectOpenHashSet<Block>()
@ -176,16 +203,50 @@ class BTreeDB6 private constructor(val file: File, private var reader: RandomAcc
occupiedBlocksBitmap[it.id] = false occupiedBlocksBitmap[it.id] = false
} }
val size = occupiedBlocksBitmap.size() / 8 + 1 var rleEncoded: FastByteArrayOutputStream
var size = 0
var newSize = 1
val writer = BlockOutputStream(BlockType.BITMAP) val writer = BlockOutputStream(BlockType.BITMAP)
writer.ensureCapacity(size + 4)
do {
writer.ensureCapacity(newSize - size)
size = newSize
val bytes = occupiedBlocksBitmap.toByteArray()
rleEncoded = FastByteArrayOutputStream(newSize + 256)
if (bytes.isNotEmpty()) {
// RLE encode free bitmap, to significantly reduce space required
var lastByte: Byte = bytes[0]
var lastByteRepeats = 1
for (i in 1 until bytes.size) {
val byte = bytes[i]
if (byte != lastByte) {
rleEncoded.write(lastByte.toInt() and 0xFF)
rleEncoded.writeVarInt(lastByteRepeats)
lastByteRepeats = 1
lastByte = byte
} else {
lastByteRepeats++
}
}
rleEncoded.write(lastByte.toInt() and 0xFF)
rleEncoded.writeVarInt(lastByteRepeats)
}
rleEncoded.write(0)
rleEncoded.write(0)
newSize = rleEncoded.length
} while (newSize / effectiveBlockSize > size / effectiveBlockSize)
prohibitAllocation = true prohibitAllocation = true
try { try {
val bytes = occupiedBlocksBitmap.toByteArray() writer.write(rleEncoded.array, 0, rleEncoded.length)
writer.writeInt(bytes.size)
writer.write(bytes)
freeBlockList = writer.head freeBlockList = writer.head
check(freeBlockList != INVALID_BLOCK_INDEX) check(freeBlockList != INVALID_BLOCK_INDEX)
@ -396,8 +457,8 @@ class BTreeDB6 private constructor(val file: File, private var reader: RandomAcc
} }
} }
/*trees.first().check(LongConsumer { /*trees.first().check(IntConsumer {
if (blocksToFree.any { b -> b.id == it }) { if (pendingFree.contains(it)) {
throw IllegalStateException("Tree is referencing dead blocks") throw IllegalStateException("Tree is referencing dead blocks")
} }
})*/ })*/
@ -864,10 +925,10 @@ class BTreeDB6 private constructor(val file: File, private var reader: RandomAcc
if (bytes == 0) return this if (bytes == 0) return this
require(bytes > 0) { "Negative amount of bytes: $bytes" } require(bytes > 0) { "Negative amount of bytes: $bytes" }
val alloc = if (currentBlock == null) { val alloc = if (currentBlock == null && preallocatedBlocks.isEmpty()) {
allocBlocks(type, (bytes - 1) / effectiveBlockSize + 1) allocBlocks(type, (bytes - 1) / effectiveBlockSize + 1)
} else { } else {
allocBlocks(type, (bytes - 1) / effectiveBlockSize + 1, currentBlock!!.id) allocBlocks(type, (bytes - 1) / effectiveBlockSize + 1, currentBlock?.id ?: preallocatedBlocks.last().id)
} }
preallocatedBlocks.addAll(alloc) preallocatedBlocks.addAll(alloc)
@ -880,6 +941,7 @@ class BTreeDB6 private constructor(val file: File, private var reader: RandomAcc
blocks.forEach { it.write() } blocks.forEach { it.write() }
blocks.clear() blocks.clear()
if (currentBlock != null) blocks.add(currentBlock!!) if (currentBlock != null) blocks.add(currentBlock!!)
preallocatedBlocks.forEach { it.write() } // write preallocated blocks so they can be free'd later
} }
override fun close() { override fun close() {

View File

@ -16,19 +16,37 @@ object BTreeDB6Tests {
if (file.exists()) file.delete() if (file.exists()) file.delete()
val create = BTreeDB6.create(file, 128, sync = false) val create = BTreeDB6.create(file, 128, sync = false)
for (i in 0 .. 200) { for (i in 0 .. 80000) {
val s = "This is key $i" val s = "This is key $i"
val k = ByteKey("This is key $i") val k = ByteKey("This is key $i")
create.write(k, s.toByteArray()) create.write(k, s.toByteArray())
assertEquals(s, String(create.read(k).get())) assertEquals(s, String(create.read(k).get()))
} }
for (i in 0 .. 200) { for (i in 0 .. 80000) {
val s = "This is key $i" val s = "This is key $i"
val k = ByteKey("This is key $i") val k = ByteKey("This is key $i")
assertEquals(s, String(create.read(k).get())) assertEquals(s, String(create.read(k).get()))
} }
for (i in 0 .. 80000) {
val s = "This is key $i"
val k = ByteKey("This is key $i")
create.write(k, s.toByteArray())
assertEquals(s, String(create.read(k).get()))
}
create.close() create.close()
val create2 = BTreeDB6(file)
for (i in 0 .. 80000) {
val s = "This is key $i"
val k = ByteKey("This is key $i")
assertEquals(s, String(create2.read(k).get()))
}
create2.checkFreeBitmap()
create2.close()
} }
} }