From 1f6eb023eda443e6fcb733a87bad839bd86f0a2b Mon Sep 17 00:00:00 2001 From: DBotThePony Date: Fri, 31 May 2024 10:39:06 +0700 Subject: [PATCH] While BTreeDB6 was a fun little project, it is inferior to SQLite in all ways --- gradle.properties | 2 +- .../ru/dbotthepony/kommons/io/BTreeDB6.kt | 1103 ----------------- .../ru/dbotthepony/kommons/io/ByteKey.kt | 102 -- .../dbotthepony/kommons/io/DelegateSyncher.kt | 2 +- .../kommons/io/InputStreamUtils.kt | 8 - .../kommons/io/OutputStreamUtils.kt | 8 - .../dbotthepony/kommons/test/BTreeDB6Tests.kt | 52 - 7 files changed, 2 insertions(+), 1275 deletions(-) delete mode 100644 src/main/kotlin/ru/dbotthepony/kommons/io/BTreeDB6.kt delete mode 100644 src/main/kotlin/ru/dbotthepony/kommons/io/ByteKey.kt delete mode 100644 src/test/kotlin/ru/dbotthepony/kommons/test/BTreeDB6Tests.kt diff --git a/gradle.properties b/gradle.properties index 22c9bfc..964b92a 100644 --- a/gradle.properties +++ b/gradle.properties @@ -4,7 +4,7 @@ kotlin.code.style=official specifyKotlinAsDependency=false projectGroup=ru.dbotthepony.kommons -projectVersion=2.18.1 +projectVersion=3.0.0 guavaDepVersion=33.0.0 gsonDepVersion=2.8.9 diff --git a/src/main/kotlin/ru/dbotthepony/kommons/io/BTreeDB6.kt b/src/main/kotlin/ru/dbotthepony/kommons/io/BTreeDB6.kt deleted file mode 100644 index 9156ed3..0000000 --- a/src/main/kotlin/ru/dbotthepony/kommons/io/BTreeDB6.kt +++ /dev/null @@ -1,1103 +0,0 @@ -package ru.dbotthepony.kommons.io - -import it.unimi.dsi.fastutil.bytes.ByteArrayList -import it.unimi.dsi.fastutil.ints.IntArrayList -import it.unimi.dsi.fastutil.ints.IntArraySet -import it.unimi.dsi.fastutil.ints.IntOpenHashSet -import it.unimi.dsi.fastutil.io.FastByteArrayOutputStream -import it.unimi.dsi.fastutil.objects.Object2IntAVLTreeMap -import it.unimi.dsi.fastutil.objects.ObjectArrayList -import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet -import ru.dbotthepony.kommons.util.KOptional -import java.io.Closeable -import java.io.DataInputStream -import java.io.DataOutputStream -import java.io.File -import java.io.InputStream -import java.io.OutputStream -import java.io.RandomAccessFile -import java.nio.ByteBuffer -import java.util.* -import java.util.function.IntConsumer -import java.util.stream.IntStream -import java.util.stream.Stream -import kotlin.collections.ArrayList - -private fun readHeader(reader: RandomAccessFile, required: Char) { - val read = reader.read() - require(read.toChar() == required) { "Bad header, expected ${required.code}, got $read" } -} - -/** - * General purpose binary object storage, employing copy-on-write updates. - * - * This database is designed to read/write small blobs, since any read/write operation - * must be able to fully store the blob in question in memory. - * - * Smallest unit of work is defined as block. Default block size is 4096 bytes (which includes block header and pointer to next block). - * See [create] for more details. - * - * While attempt at minimizing tree read/writes is made, there is no tree rebalancing, - * so it is possible to force tree to degenerate into linked list. But with relatively large - * block size, rebalancing isn't required since even with hundreds of thousands keys tree - * height remains relatively small. - * - * This storage does not employ any form of defragmentation, but writes try to allocate - * (reasonably sized) continuous blocks, to avoid severe internal fragmentation. - * - * [sync] determines whenever writes should be performed atomically. If [sync] is false, in event of - * host system crash or power loss, there is a risk of file becoming unreadable, unless underlying filesystem - * and/or device ensures writes to file are performed in the same order as they were issued by code. - * If performance is priority, specifying sync as false will yield vastly better performance for writes. - * - * While it might be tempting to use [java.util.concurrent.ForkJoinPool.commonPool] here, keep in mind that - * common ForkJoinPool is meant ot be used by calculation-intense operations. [BTreeDB6] is, on the other hand, - * is I/O intense, and can stall executor threads. A good Executor here would be one that can spawn a lot of threads, - * for example, [java.util.concurrent.Executors.newCachedThreadPool]. - * - * Format specs: - * * Block size can be anything between 64 and 2^31 - 1 bytes. Creation through [create] restricts block size to 128 and up, since that's - * already absurdly small block size! - * * Blocks are addressed by 32-bit integer. Due to Java int being signed, this allows to address at most 2^31 - 1 blocks. - * With block size of 4096 bytes, this puts max filesize just a little below 8 TiB. Merely opening such large file will require - * 255 MiB of RAM (to hold free bitmap). - * * Free space is stored as bitmap. As such, smaller blocks require more space for storing free bitmap, and hence will take more - * time during writes - * * All write operations are done in new blocks, outdated blocks erased with zeroes only when header updated to point - * at new tree - * * Keys can freely vary in size inside single DB, keys can be up to 2^31 - 1 bytes in size - */ -// TODO: Checksums (TREE/LEAF/DATA) -// TODO: Second tree (TREE/LEAF redundancy) -// TODO: Defragmenting -// TODO: Add keys to DATA blocks, so entire tree can be reconstructed from scratch in event of both trees become unreadable -// TODO: Changeset counter (to determine write-in-progress blocks/trees, to ignore them when reconstructing tree) -// TODO: Tree rotations (rebalancing), tree height tracking, weighted subtree splitting -// TODO: Removal of keys -// TODO: Faster free bitmap scan -class BTreeDB6 private constructor(val file: File, private var reader: RandomAccessFile, private val sync: Boolean) : Closeable { - constructor(file: File, sync: Boolean = true) : this(file, RandomAccessFile(file, "rw"), sync) - - init { - reader.seek(0L) - } - - init { - readHeader(reader, 'B') - readHeader(reader, 'T') - readHeader(reader, 'r') - readHeader(reader, 'e') - readHeader(reader, 'e') - readHeader(reader, 'D') - readHeader(reader, 'B') - readHeader(reader, '6') - } - - override fun close() { - reader.close() - } - - val blockSize = reader.readInt() - val effectiveBlockSize = blockSize - 9 - - init { - require(blockSize >= 64) { "Degenerate block size: $blockSize" } - } - - private var occupiedBlocksBitmap = BitSet() - - private val headerOffset = reader.filePointer - private var freeBlockList = reader.readInt() - private var rootBlockIndex = reader.readInt() - private var prohibitAllocation = false - - private val pendingFree = IntOpenHashSet() - - init { - if (freeBlockList != INVALID_BLOCK_INDEX) { - val head = readBlock(freeBlockList) - check(head.type == BlockType.BITMAP) { "free block list points to ${head.type} block" } - val reader = BlockInputStream(head) - val bytes = ByteArrayList() - - var lastByte = reader.read() - var lastByteRepeats = reader.readVarInt() - - while (lastByteRepeats != 0) { - for (i in 0 until lastByteRepeats) - bytes.add(lastByte.toByte()) - - lastByte = reader.read() - lastByteRepeats = reader.readVarInt() - } - - occupiedBlocksBitmap = BitSet.valueOf(ByteBuffer.wrap(bytes.elements(), 0, bytes.size)) - } else if (rootBlockIndex > 0) { - panic() - } - } - - private enum class BlockType { - // block that is unused - FREE, - // block that is part of search tree, contains relative keys and block pointers to either other trees or leafs - TREE, - // block contains exact keys and block pointers - LEAF, - // contains data length field and data itself - DATA, - // block contains free space bitmap - BITMAP; - - companion object { - val e = values() - } - } - - /** - * Called when file is considered unreadable. - * - * This method creates new file, and tries to recover current file's contents - * into new file. - */ - private fun panic() { - throw IllegalStateException("panic() has been called, but currently we can't recover corrupt file!") - } - - private val headerBuf = ByteArray(16) - - /** - * Checks free bitmap for consistency with file's contents - */ - fun checkFreeBitmap() { - val length = reader.length() - - for (i in 0 until occupiedBlocksBitmap.size()) { - if (occupiedBlocksBitmap[i]) { - check(readBlock(i).type != BlockType.FREE) { "Expected block $i to be not free" } - } else if ((i + 1) * blockSize < length) { - val block = readBlock(i) - check(block.type == BlockType.FREE) { "Expected block $i to be free, but got ${block.type}" } - } - } - } - - private fun commit() { - if (sync) reader.channel.force(true) - val blocksToFree = ObjectOpenHashSet() - - for (i in pendingFree) { - val block = Block(i) - block.read() - block.freeChain(blocksToFree) - } - - pendingFree.clear() - - if (freeBlockList != INVALID_BLOCK_INDEX) { - val block = Block(freeBlockList) - block.read() - check(block.type == BlockType.BITMAP) { "free block list points to ${block.type} block" } - block.freeChain(blocksToFree) - } - - blocksToFree.forEach { - occupiedBlocksBitmap[it.id] = false - } - - var rleEncoded: FastByteArrayOutputStream - var size = 0 - var newSize = 1 - val writer = BlockOutputStream(BlockType.BITMAP) - - do { - writer.ensureCapacity(newSize - size) - size = newSize - val bytes = occupiedBlocksBitmap.toByteArray() - rleEncoded = FastByteArrayOutputStream(newSize + 256) - - if (bytes.isNotEmpty()) { - // RLE encode free bitmap, to significantly reduce space required - var lastByte: Byte = bytes[0] - var lastByteRepeats = 1 - - for (i in 1 until bytes.size) { - val byte = bytes[i] - - if (byte != lastByte) { - rleEncoded.write(lastByte.toInt() and 0xFF) - rleEncoded.writeVarInt(lastByteRepeats) - - lastByteRepeats = 1 - lastByte = byte - } else { - lastByteRepeats++ - } - } - - rleEncoded.write(lastByte.toInt() and 0xFF) - rleEncoded.writeVarInt(lastByteRepeats) - } - - rleEncoded.write(0) - rleEncoded.write(0) - - newSize = rleEncoded.length - } while (newSize / effectiveBlockSize > size / effectiveBlockSize) - - prohibitAllocation = true - - try { - writer.write(rleEncoded.array, 0, rleEncoded.length) - - freeBlockList = writer.head - check(freeBlockList != INVALID_BLOCK_INDEX) - - writer.close() - } finally { - prohibitAllocation = false - } - - if (sync) reader.channel.force(true) - - reader.seek(headerOffset) - - headerBuf[0] = ((freeBlockList ushr 24) and 0xFF).toByte() - headerBuf[1] = ((freeBlockList ushr 16) and 0xFF).toByte() - headerBuf[2] = ((freeBlockList ushr 8) and 0xFF).toByte() - headerBuf[3] = ((freeBlockList ushr 0) and 0xFF).toByte() - - headerBuf[4 + 0] = ((rootBlockIndex ushr 24) and 0xFF).toByte() - headerBuf[4 + 1] = ((rootBlockIndex ushr 16) and 0xFF).toByte() - headerBuf[4 + 2] = ((rootBlockIndex ushr 8) and 0xFF).toByte() - headerBuf[4 + 3] = ((rootBlockIndex ushr 0) and 0xFF).toByte() - - reader.write(headerBuf) - if (sync) reader.channel.force(true) - - blocksToFree.forEach { - if (!occupiedBlocksBitmap[it.id]) - it.free() - } - } - - private fun searchForBlock(key: ByteKey): Block? { - if (rootBlockIndex == INVALID_BLOCK_INDEX) { - return null - } - - var block = readBlock(rootBlockIndex) - - while (block.type == BlockType.TREE) { - val tree = TreeData(block) - block = tree.childrenBlock(key) - } - - if (block.type == BlockType.LEAF) { - val leaf = LeafData(block) - return leaf.get(key) - } else { - throw IllegalStateException("Hit ${block.type} block while scanning index") - } - } - - operator fun contains(key: ByteKey): Boolean { - return searchForBlock(key) != null - } - - fun read(key: ByteKey): KOptional { - val data = searchForBlock(key) - - if (data != null) { - val stream = DataInputStream(BlockInputStream(data)) - val output = ByteArray(stream.readInt()) - stream.readFully(output) - return KOptional(output) - } - - return KOptional.empty() - } - - private fun readKeysInto(block: Block, result: MutableList) { - when (block.type) { - BlockType.TREE -> { - val tree = TreeData(block) - - for (sub in tree.children()) { - readKeysInto(readBlock(sub), result) - } - } - - BlockType.LEAF -> { - val leaf = LeafData(block) - - for ((key) in leaf.keys()) { - result.add(key) - } - } - - else -> throw IllegalStateException("Hit $block while scanning tree index") - } - } - - fun findAllKeys(): List { - if (rootBlockIndex == INVALID_BLOCK_INDEX) { - return emptyList() - } - - val result = ArrayList() - readKeysInto(readBlock(rootBlockIndex), result) - return result - } - - fun write(key: ByteKey, value: ByteArray, offset: Int, length: Int) { - if (rootBlockIndex == INVALID_BLOCK_INDEX) { - // create LEAF node for root - val block = allocBlock(BlockType.LEAF) - block.write() - rootBlockIndex = block.id - } - - val block = readBlock(rootBlockIndex) - - if (block.type == BlockType.LEAF) { - val leaf = LeafData(block) - val newData = leaf.compute(key, length + 4) - - val stream = DataOutputStream(BlockOutputStream(newData)) - stream.writeInt(length) - stream.write(value, offset, length) - stream.close() - - rootBlockIndex = leaf.write() - - if (leaf.shouldSplit) { - // replace root leaf with root tree - val split = leaf.split() - val tree = TreeData() - - tree.add(split) - tree.write() - rootBlockIndex = tree.write() - } - - commit() - } else if (block.type == BlockType.TREE) { - val trees = ArrayList() - var currentBlock = block - var tree = TreeData(currentBlock) - trees.add(tree) - - while (true) { - currentBlock = tree.childrenBlock(key) - - if (currentBlock.type == BlockType.TREE) { - tree = TreeData(currentBlock) - trees.add(tree) - } else if (currentBlock.type == BlockType.LEAF) { - break - } else { - throw IllegalStateException("Hit ${currentBlock.type} block while scanning tree") - } - } - - val leaf = LeafData(currentBlock) - val stream = DataOutputStream(BlockOutputStream(leaf.compute(key, length + 4))) - stream.writeInt(length) - stream.write(value, offset, length) - stream.close() - - if (leaf.shouldSplit) { - // splitting is required, which may split upstream trees - var split = leaf.split() - val itr = trees.asReversed().iterator() - - for (uptree in itr) { - uptree.add(split) - - if (uptree.shouldSplit) { - // split subtree - split = uptree.split() - - if (!itr.hasNext()) { - // splitted main tree - val newMainTree = TreeData() - newMainTree.add(split) - trees.add(0, newMainTree) - newMainTree.write() - - break - } - } else { - // splitting has finished, only update parent trees pointers - - var oldBlock = uptree.headBlock - check(oldBlock != INVALID_BLOCK_INDEX) - var newBlock = uptree.write() - - for (uptree2 in itr) { - uptree2.update(oldBlock, newBlock) - - oldBlock = uptree2.headBlock - check(oldBlock != INVALID_BLOCK_INDEX) - newBlock = uptree2.write() - } - - break - } - } - } else { - // no splitting required, simply construct new tree by updating pointers - var oldBlock = leaf.headBlock - check(oldBlock != INVALID_BLOCK_INDEX) - var newBlock = leaf.write() - - for (uptree in trees.asReversed()) { - uptree.update(oldBlock, newBlock) - oldBlock = uptree.headBlock - check(oldBlock != INVALID_BLOCK_INDEX) - newBlock = uptree.write() - } - } - - /*trees.first().check(IntConsumer { - if (pendingFree.contains(it)) { - throw IllegalStateException("Tree is referencing dead blocks") - } - })*/ - - rootBlockIndex = trees.first().headBlock - commit() - } else { - throw IllegalStateException("Hit ${block.type} block in root tree") - } - } - - fun write(key: ByteKey, value: ByteArray) { - write(key, value, 0, value.size) - } - - private fun allocBlock(type: BlockType): Block { - return allocBlocks(type, 1)[0] - } - - private fun allocBlocksByBytes(type: BlockType, size: Int, after: Int = INVALID_BLOCK_INDEX): List { - return allocBlocks(type, (size - 1) / effectiveBlockSize + 1, after) - } - - private fun allocBlocks(type: BlockType, size: Int, after: Int = INVALID_BLOCK_INDEX): List { - require(size > 0) { "Invalid amount of blocks to allocate: $size" } - check(!prohibitAllocation) { "Currently writing free block bitmap, can not allocate any blocks!" } - - // if we need to allocate lots of blocks, don't necessarily go for continuous allocation - // otherwise we might end up with ever-growing file when allocating bigger and bigger blocks. - // example: allocating space for free bitmap - if (size > 128) { - // also ignore 'after' request, since this shouldn't matter at this point - val result = ArrayList(size) - var toAllocate = size - - while (toAllocate > 0) { - val toAlloc = toAllocate.coerceAtMost(128) - result.addAll(allocBlocks(type, toAlloc)) - toAllocate -= toAlloc - } - - return result - } - - if (after != INVALID_BLOCK_INDEX && !occupiedBlocksBitmap[after + 1]) { - // if we can allocate enough blocks after specified index, allocate there (reduce fragmentation) - val findOccupied = occupiedBlocksBitmap.nextSetBit(after + 1) - val find = after + 1 - - if (findOccupied == -1 || findOccupied - find >= size) { - val blocks = ArrayList(size) - occupiedBlocksBitmap.set(find, find + size) - - for (i2 in 0 until size) { - val block = Block(i2 + find) - block.nextBlock = INVALID_BLOCK_INDEX - block.type = type - if (i2 > 0) blocks[i2 - 1].nextBlock = block.id - blocks.add(block) - } - - return blocks - } - } - - var i = 0 - - while (true) { - val findFree = occupiedBlocksBitmap.nextClearBit(i) - val findOccupied = occupiedBlocksBitmap.nextSetBit(findFree) - - if (findOccupied == -1 || findOccupied - findFree >= size) { - val blocks = ArrayList(size) - - occupiedBlocksBitmap.set(findFree, findFree + size) - - for (i2 in 0 until size) { - val block = Block(i2 + findFree) - block.nextBlock = INVALID_BLOCK_INDEX - block.type = type - blocks.add(block) - } - - return blocks - } else { - i = findOccupied + 1 - } - } - } - - private fun readBlock(id: Int): Block { - val block = Block(id) - block.read() - return block - } - - // from smallest to biggest - private inner class TreeData() { - constructor(head: Block) : this() { - check(head.type == BlockType.TREE) { "TreeData was constructed with ${head.type} block" } - val reader = DataInputStream(BlockInputStream(head)) - val entryCount = reader.readVarInt() - - for (i in 0 until entryCount) { - val key = reader.readByteKey() - val block = reader.readInt() - children.put(key, block) - } - - rightMostBlock = reader.readInt() - headBlock = head.id - } - - var headBlock: Int = INVALID_BLOCK_INDEX - private set - - private var rightMostBlock: Int = INVALID_BLOCK_INDEX - private val children = Object2IntAVLTreeMap() - - init { - children.defaultReturnValue(INVALID_BLOCK_INDEX) - } - - val size: Int - get() = children.size - - val shouldSplit: Boolean - get() = size >= 8 && (children.keys.stream().mapToInt { it.size + 8 }.sum() + 8) >= (blockSize - 9) - - fun children(): IntStream { - return IntStream.concat(children.values.intStream(), IntStream.of(rightMostBlock)) - } - - fun write(): Int { - if (headBlock != INVALID_BLOCK_INDEX) pendingFree.add(headBlock) - - val bytes = BlockOutputStream(BlockType.TREE) - val stream = DataOutputStream(bytes) - stream.writeVarInt(children.size) - - for ((key, block) in children) { - key.write(stream) - stream.writeInt(block) - } - - stream.writeInt(rightMostBlock) - stream.close() - - headBlock = bytes.head - check(headBlock != INVALID_BLOCK_INDEX) - return headBlock - } - - private fun intersects(other: TreeData): Boolean { - return other.rightMostBlock == rightMostBlock || rightMostBlock in other.children.values || children.values.any { it in other.children.values } - } - - fun split(): SplitResult { - val keys = ObjectArrayList(children.keys) - val middle = keys.size / 2 - val median = keys[middle] - - val tree0 = TreeData() - val tree1 = TreeData() - - tree0.rightMostBlock = children.getInt(median) - tree1.rightMostBlock = rightMostBlock - - check(tree0.rightMostBlock != INVALID_BLOCK_INDEX) - - for (i in 0 until middle) { - tree0.children[keys[i]] = children.getInt(keys[i]) - } - - for (i in middle + 1 until keys.size) { - tree1.children[keys[i]] = children.getInt(keys[i]) - } - - check(!tree0.intersects(tree1)) { "tree split consistency check failed" } - check(!tree1.intersects(tree0)) { "tree split consistency check failed" } - - tree0.write() - tree1.write() - - if (this.headBlock != INVALID_BLOCK_INDEX) - pendingFree.add(this.headBlock) - - return SplitResult(tree0.headBlock, tree1.headBlock, median, this.headBlock) - } - - fun childrenBlock(key: ByteKey): Block { - for ((rkey, block) in children) { - if (key < rkey) { - return readBlock(block) - } - } - - return readBlock(rightMostBlock) - } - - fun add(data: SplitResult) { - if (children.isEmpty()) { - // new tree - rightMostBlock = data.right - children[data.median] = data.left - } else if (rightMostBlock == data.original) { - children[data.median] = data.left - rightMostBlock = data.right - } else { - val splitPoint = children.entries.find { it.value == data.original } - checkNotNull(splitPoint) { "Tree is inconsistent: unable to locate children split point (median: ${data.median}; old children block: ${data.original})" } - val key = splitPoint.key - children[key] = data.right - children[data.median] = data.left - } - } - - fun check(blockIDVisitor: IntConsumer, stack: IntArraySet = IntArraySet()) { - check(headBlock != INVALID_BLOCK_INDEX) { "Tree contains 'unallocated' blocks" } - - if (!stack.add(headBlock)) { - throw IllegalStateException("Cyclic block reference in search tree (${stack.joinToString(" -> ", transform = { readBlock(it).toString() })} -> $headBlock)") - } - - try { - blockIDVisitor.accept(headBlock) - } catch (err: Throwable) { - throw IllegalStateException("Tree consistency check failed on path ${stack.joinToString(" -> ", transform = { readBlock(it).toString() })}", err) - } - - val ids = IntArrayList(children.values) - ids.add(rightMostBlock) - - for (block in ids) { - val read = readBlock(block) - - if (read.type == BlockType.LEAF) { - LeafData(read).check(blockIDVisitor, stack) - } else if (read.type == BlockType.TREE) { - TreeData(read).check(blockIDVisitor, stack) - } else if (read.type == BlockType.FREE) { - throw IllegalStateException("Hit free block while checking tree consistency") - } else if (read.type == BlockType.DATA) { - throw IllegalStateException("Hit data block while checking tree consistency") - } - } - - stack.remove(headBlock) - } - - fun update(oldBlock: Int, newBlock: Int) { - if (rightMostBlock == oldBlock) { - rightMostBlock = newBlock - - check(!children.values.any { it == oldBlock }) { "Tree contains duplicated entries" } - } else { - var hit = false - - for ((key, block) in children.entries) { - if (block == oldBlock) { - children[key] = newBlock - hit = true - break - } - } - - if (!hit) { - throw IllegalStateException("Can't find block $oldBlock in tree") - } - } - } - } - - /** - * left - less than median - * right - greater or equal to median - */ - private data class SplitResult(val left: Int, val right: Int, val median: ByteKey, val original: Int) - - private inner class LeafData() { - constructor(head: Block) : this() { - check(head.type == BlockType.LEAF) { "LeafData was constructed with ${head.type} block" } - val stream = DataInputStream(BlockInputStream(head)) - val amount = stream.readVarInt() - - for (i in 0 until amount) { - val key = stream.readByteKey() - val block = stream.readInt() - keys[key] = block - } - - this.headBlock = head.id - } - - var headBlock: Int = INVALID_BLOCK_INDEX - private set - - private val keys = Object2IntAVLTreeMap() - - init { - keys.defaultReturnValue(INVALID_BLOCK_INDEX) - } - - fun keys(): Stream> { - return keys.entries.stream().map { it.key to it.value } - } - - val size: Int - get() = keys.size - - val shouldSplit: Boolean - get() = size >= 8 && keys.keys.stream().mapToInt { it.size + 8 }.sum() >= (blockSize - 9) - - fun split(): SplitResult { - val keys = ObjectArrayList(keys.keys) - val median = keys[keys.size / 2] - - val leaf0 = LeafData() - val leaf1 = LeafData() - - for (i in 0 until keys.size / 2) { - val key = keys[i] - leaf0.keys.put(key, this.keys.getInt(key)) - } - - for (i in keys.size / 2 until keys.size) { - val key = keys[i] - leaf1.keys.put(key, this.keys.getInt(key)) - } - - leaf0.write() - leaf1.write() - - if (this.headBlock != INVALID_BLOCK_INDEX) - pendingFree.add(this.headBlock) - - return SplitResult(leaf0.headBlock, leaf1.headBlock, median, this.headBlock) - } - - fun check(blockIDVisitor: IntConsumer, stack: IntArraySet = IntArraySet()) { - check(headBlock != INVALID_BLOCK_INDEX) { "Tree contains 'unallocated' blocks" } - - if (!stack.add(headBlock)) { - throw IllegalStateException("Cyclic block reference in search tree (${stack.joinToString(" -> ", transform = { readBlock(it).toString() })} -> $headBlock)") - } - - try { - blockIDVisitor.accept(headBlock) - } catch (err: Throwable) { - throw IllegalStateException("Tree consistency check failed on path ${stack.joinToString(" -> ", transform = { readBlock(it).toString() })}", err) - } - - for (block in keys.values) { - val read = readBlock(block) - - try { - blockIDVisitor.accept(block) - } catch (err: Throwable) { - throw IllegalStateException("Tree consistency check failed on path ${stack.joinToString(" -> ", transform = { readBlock(it).toString() })} -> $read", err) - } - - if (read.type != BlockType.DATA) { - throw IllegalStateException("Hit ${read.type} block while checking leaf consistency") - } - } - - stack.remove(headBlock) - } - - fun remove(key: ByteKey): Block? { - val block = keys.removeInt(key) - - if (block != INVALID_BLOCK_INDEX) { - return readBlock(block) - } - - return null - } - - fun compute(key: ByteKey, size: Int): List { - val alloc = allocBlocksByBytes(BlockType.DATA, size) - val old = keys.put(key, alloc.first().id) - - if (old != INVALID_BLOCK_INDEX) - pendingFree.add(old) - - return alloc - } - - fun get(key: ByteKey): Block? { - val get = keys.getInt(key) - if (get == INVALID_BLOCK_INDEX) return null - return readBlock(get) - } - - fun write(): Int { - if (headBlock != INVALID_BLOCK_INDEX) pendingFree.add(headBlock) - - val bytes = BlockOutputStream(BlockType.LEAF) - val stream = DataOutputStream(bytes) - stream.writeVarInt(keys.size) - - for ((key, block) in keys) { - key.write(stream) - stream.writeInt(block) - } - - stream.close() - - headBlock = bytes.head - check(headBlock != INVALID_BLOCK_INDEX) - return headBlock - } - } - - private inner class BlockInputStream(head: Block) : InputStream() { - private var currentBlock = head - private var currentOffset = 9 - private var isFinished = false - - override fun read(): Int { - if (isFinished) return -1 - - if (currentOffset < blockSize) { - return currentBlock.bytes[currentOffset++].toInt() and 0xFF - } else { - if (currentBlock.nextBlock == INVALID_BLOCK_INDEX) { - isFinished = true - return -1 - } - - currentBlock = readBlock(currentBlock.nextBlock) - currentOffset = 9 - return read() - } - } - } - - private inner class BlockOutputStream(val type: BlockType) : OutputStream() { - constructor(block: Block) : this(block.type) { - currentBlock = block - head = block.id - blocks.add(block) - } - - constructor(blocks: Collection) : this(blocks.first()) { - val itr = blocks.iterator() - itr.next() - - for (block in itr) { - preallocatedBlocks.add(block) - } - } - - private val blocks = ArrayList() - private val preallocatedBlocks = ArrayList() - private var currentBlock: Block? = null - private var currentOffset = 9 - - var head = INVALID_BLOCK_INDEX - private set - - fun ensureCapacity(bytes: Int): BlockOutputStream { - if (bytes == 0) return this - require(bytes > 0) { "Negative amount of bytes: $bytes" } - - val alloc = if (currentBlock == null && preallocatedBlocks.isEmpty()) { - allocBlocks(type, (bytes - 1) / effectiveBlockSize + 1) - } else { - allocBlocks(type, (bytes - 1) / effectiveBlockSize + 1, currentBlock?.id ?: preallocatedBlocks.last().id) - } - - preallocatedBlocks.addAll(alloc) - currentBlock?.nextBlock = alloc.first().id - - return this - } - - override fun flush() { - blocks.forEach { it.write() } - blocks.clear() - if (currentBlock != null) blocks.add(currentBlock!!) - preallocatedBlocks.forEach { it.write() } // write preallocated blocks so they can be free'd later - } - - override fun close() { - flush() - } - - private fun nextBlock() { - val alloc = if (preallocatedBlocks.isNotEmpty()) { - preallocatedBlocks.removeAt(0) - } else { - allocBlocks(type, 1, currentBlock?.id ?: INVALID_BLOCK_INDEX)[0] - } - - if (head == INVALID_BLOCK_INDEX) head = alloc.id - currentBlock?.nextBlock = alloc.id - blocks.add(alloc) - currentOffset = 9 - currentBlock = alloc - } - - override fun write(b: ByteArray, off: Int, len: Int) { - Objects.checkFromIndexSize(off, len, b.size) - var written = 0 - - while (written < len) { - if (currentBlock == null || currentOffset >= blockSize) { - nextBlock() - } - - val writable = (len - written).coerceAtMost(blockSize - currentOffset) - System.arraycopy(b, off + written, currentBlock!!.bytes, currentOffset, writable) - written += writable - currentOffset += writable - } - } - - override fun write(b: Int) { - if (currentBlock != null && currentOffset < blockSize) { - currentBlock!!.bytes[currentOffset++] = b.toByte() - } else { - nextBlock() - currentBlock!!.bytes[currentOffset++] = b.toByte() - } - } - } - - /** - * represents a block in file - */ - private inner class Block(val id: Int) { - init { - require(id >= 0) { "Invalid block ID: $id" } - } - - val bytes = ByteArray(blockSize) - - override fun toString(): String { - return "BTreeDB6.Block[$id, $type]" - } - - val offset: Long get() = (id + 1L) * blockSize - - var type: BlockType - get() = BlockType.e[bytes[0].toInt()] - set(value) { bytes[0] = value.ordinal.toByte() } - - var nextBlock: Int - get() { - val backing = bytes - - val v0 = (backing[1 + 0].toInt() and 0xFF) shl 24 - val v1 = (backing[1 + 1].toInt() and 0xFF) shl 16 - val v2 = (backing[1 + 2].toInt() and 0xFF) shl 8 - val v3 = (backing[1 + 3].toInt() and 0xFF) - - return v0 or v1 or v2 or v3 - } - set(value) { - val backing = bytes - backing[1 + 0] = ((value ushr 24) and 0xFF).toByte() - backing[1 + 1] = ((value ushr 16) and 0xFF).toByte() - backing[1 + 2] = ((value ushr 8) and 0xFF).toByte() - backing[1 + 3] = ((value) and 0xFF).toByte() - } - - fun read() { - reader.seek(offset) - reader.readFully(bytes) - } - - fun write() { - reader.seek(offset) - reader.write(bytes) - } - - fun free() { - bytes.fill(0) - type = BlockType.FREE - nextBlock = INVALID_BLOCK_INDEX - write() - } - - fun freeChain(into: MutableCollection) { - check(into.add(this)) - var next = nextBlock - - while (next != INVALID_BLOCK_INDEX) { - val block = Block(next) - block.read() - next = block.nextBlock - check(into.add(block)) - } - } - - override fun equals(other: Any?): Boolean { - return other === this || other is Block && id == other.id - } - - override fun hashCode(): Int { - return id.hashCode() - } - } - - companion object { - const val INVALID_BLOCK_INDEX = -1 - - /** - * Creates a new [BTreeDB6] file, with block size specified as [blockSize]. - * - * Ideally, block size should match smallest unit of work done by file system, which is typically 4 KiB, or 4096 bytes. - * While arbitrary block size is supported (128 bytes and up, until [Int.MAX_VALUE]), be aware that each I/O operation BTreeDB6 - * performs will always read/write full block. So, block size of BTreeDB6 is smallest unit of work for that database. - */ - @JvmStatic - @JvmOverloads - fun create(file: File, blockSize: Int = 4096, sync: Boolean = true): BTreeDB6 { - require(blockSize >= 128) { "Degenerate block size: $blockSize" } - - if (file.exists()) - throw FileAlreadyExistsException(file) - - val writer = RandomAccessFile(file, "rw") - - writer.write("BTreeDB6".toByteArray()) // specify version as 6 to prohibit original game from opening - // and corrupting the file - - // block size - writer.writeInt(blockSize) - - writer.writeInt(INVALID_BLOCK_INDEX) // free bitmap block index - writer.writeInt(INVALID_BLOCK_INDEX) // tree root block index - - return BTreeDB6(file, writer, sync) - } - } -} diff --git a/src/main/kotlin/ru/dbotthepony/kommons/io/ByteKey.kt b/src/main/kotlin/ru/dbotthepony/kommons/io/ByteKey.kt deleted file mode 100644 index 09c768b..0000000 --- a/src/main/kotlin/ru/dbotthepony/kommons/io/ByteKey.kt +++ /dev/null @@ -1,102 +0,0 @@ -package ru.dbotthepony.kommons.io - -import java.io.InputStream -import java.io.OutputStream -import java.util.UUID - -private fun toBytes(key: UUID): ByteArray { - val value = ByteArray(16) - - value[0] = ((key.mostSignificantBits ushr 56) and 0xFFL).toByte() - value[1] = ((key.mostSignificantBits ushr 48) and 0xFFL).toByte() - value[2] = ((key.mostSignificantBits ushr 40) and 0xFFL).toByte() - value[3] = ((key.mostSignificantBits ushr 32) and 0xFFL).toByte() - value[4] = ((key.mostSignificantBits ushr 24) and 0xFFL).toByte() - value[5] = ((key.mostSignificantBits ushr 16) and 0xFFL).toByte() - value[6] = ((key.mostSignificantBits ushr 8) and 0xFFL).toByte() - value[7] = ((key.mostSignificantBits ushr 0) and 0xFFL).toByte() - - value[8 + 0] = ((key.leastSignificantBits ushr 56) and 0xFFL).toByte() - value[8 + 1] = ((key.leastSignificantBits ushr 48) and 0xFFL).toByte() - value[8 + 2] = ((key.leastSignificantBits ushr 40) and 0xFFL).toByte() - value[8 + 3] = ((key.leastSignificantBits ushr 32) and 0xFFL).toByte() - value[8 + 4] = ((key.leastSignificantBits ushr 24) and 0xFFL).toByte() - value[8 + 5] = ((key.leastSignificantBits ushr 16) and 0xFFL).toByte() - value[8 + 6] = ((key.leastSignificantBits ushr 8) and 0xFFL).toByte() - value[8 + 7] = ((key.leastSignificantBits ushr 0) and 0xFFL).toByte() - - return value -} - -class ByteKey private constructor(private val bytes: ByteArray, mark: Nothing?) : Comparable { - constructor(vararg bytes: Byte) : this(bytes, null) - constructor(key: String) : this(key.toByteArray(), null) - constructor(key: UUID) : this(toBytes(key), null) - - override fun equals(other: Any?): Boolean { - return this === other || other is ByteKey && other.bytes.contentEquals(bytes) - } - - val size: Int - get() = bytes.size - - operator fun get(index: Int): Byte { - return bytes[index] - } - - fun write(stream: OutputStream) { - stream.writeVarInt(bytes.size) - stream.write(bytes) - } - - fun writeRaw(stream: OutputStream) { - stream.write(bytes) - } - - fun toByteArray(): ByteArray { - return bytes.clone() - } - - override fun hashCode(): Int { - return bytes.contentHashCode() - } - - override fun toString(): String { - return "ByteKey[${bytes.map { it.toInt() and 0xFF }.joinToString(", ")}]" - } - - override fun compareTo(other: ByteKey): Int { - val cmp = size.compareTo(other.size) - if (cmp != 0) return cmp - - for (i in bytes.indices) { - if (bytes[i].toInt() and 0xFF > other.bytes[i].toInt() and 0xFF) { - return 1 - } else if (bytes[i].toInt() and 0xFF < other.bytes[i].toInt() and 0xFF) { - return -1 - } - } - - return 0 - } - - companion object { - /** - * Constructs [ByteKey] without any copying of provided array - */ - @JvmStatic - fun wrap(bytes: ByteArray): ByteKey { - return ByteKey(bytes, null) - } - - @JvmStatic - fun read(stream: InputStream): ByteKey { - return stream.readByteKey() - } - - @JvmStatic - fun readRaw(stream: InputStream, size: Int): ByteKey { - return stream.readByteKeyRaw(size) - } - } -} diff --git a/src/main/kotlin/ru/dbotthepony/kommons/io/DelegateSyncher.kt b/src/main/kotlin/ru/dbotthepony/kommons/io/DelegateSyncher.kt index 95ec089..44f769c 100644 --- a/src/main/kotlin/ru/dbotthepony/kommons/io/DelegateSyncher.kt +++ b/src/main/kotlin/ru/dbotthepony/kommons/io/DelegateSyncher.kt @@ -43,7 +43,7 @@ import kotlin.concurrent.withLock * to avoid structure corruption a lock is employed. * * Attached delegates can be safely mutated by multiple threads concurrently - * (attached [ListenableDelegate] can call [Listenable.addListener] callback concurrently). + * (attached [ListenableDelegate] can call [Listenable.addListener] callback from different threads). */ @Suppress("UNCHECKED_CAST", "UNUSED") class DelegateSyncher : Observer { diff --git a/src/main/kotlin/ru/dbotthepony/kommons/io/InputStreamUtils.kt b/src/main/kotlin/ru/dbotthepony/kommons/io/InputStreamUtils.kt index 8558564..ca8b0d7 100644 --- a/src/main/kotlin/ru/dbotthepony/kommons/io/InputStreamUtils.kt +++ b/src/main/kotlin/ru/dbotthepony/kommons/io/InputStreamUtils.kt @@ -327,11 +327,3 @@ fun InputStream.readByteArray(maxRead: Int = Int.MAX_VALUE): ByteArray { throw IllegalArgumentException("Trying to read $size bytes when $maxRead is the max allowed") return readOrError(ByteArray(size)) } - -fun InputStream.readByteKey(): ByteKey { - return ByteKey(*ByteArray(readVarInt()).also { read(it) }) -} - -fun InputStream.readByteKeyRaw(size: Int): ByteKey { - return ByteKey(*ByteArray(size).also { read(it) }) -} diff --git a/src/main/kotlin/ru/dbotthepony/kommons/io/OutputStreamUtils.kt b/src/main/kotlin/ru/dbotthepony/kommons/io/OutputStreamUtils.kt index d20b780..d13d5ff 100644 --- a/src/main/kotlin/ru/dbotthepony/kommons/io/OutputStreamUtils.kt +++ b/src/main/kotlin/ru/dbotthepony/kommons/io/OutputStreamUtils.kt @@ -327,11 +327,3 @@ fun OutputStream.writeByteArray(value: ByteArray, offset: Int, length: Int) { writeVarInt(length) write(value, offset, length) } - -fun OutputStream.writeByteKey(key: ByteKey) { - key.write(this) -} - -fun OutputStream.writeRawByteKey(key: ByteKey) { - key.writeRaw(this) -} diff --git a/src/test/kotlin/ru/dbotthepony/kommons/test/BTreeDB6Tests.kt b/src/test/kotlin/ru/dbotthepony/kommons/test/BTreeDB6Tests.kt deleted file mode 100644 index 77f31fd..0000000 --- a/src/test/kotlin/ru/dbotthepony/kommons/test/BTreeDB6Tests.kt +++ /dev/null @@ -1,52 +0,0 @@ -package ru.dbotthepony.kommons.test - -import org.junit.jupiter.api.DisplayName -import org.junit.jupiter.api.Test -import ru.dbotthepony.kommons.io.BTreeDB6 -import ru.dbotthepony.kommons.io.ByteKey -import java.io.File -import java.util.concurrent.Executor -import kotlin.test.assertEquals - -object BTreeDB6Tests { - @Test - @DisplayName("BTreeDB6 Tests") - fun test() { - val file = File("dbtest.bdb") - if (file.exists()) file.delete() - val create = BTreeDB6.create(file, 4096, sync = false) - - for (i in 0 .. 800) { - val s = "This is key $i" - val k = ByteKey("This is key $i") - create.write(k, s.toByteArray()) - assertEquals(s, String(create.read(k).get())) - } - - for (i in 0 .. 800) { - val s = "This is key $i" - val k = ByteKey("This is key $i") - assertEquals(s, String(create.read(k).get())) - } - - for (i in 0 .. 800) { - val s = "This is key $i" - val k = ByteKey("This is key $i") - create.write(k, s.toByteArray()) - assertEquals(s, String(create.read(k).get())) - } - - create.close() - - val create2 = BTreeDB6(file) - - for (i in 0 .. 800) { - val s = "This is key $i" - val k = ByteKey("This is key $i") - assertEquals(s, String(create2.read(k).get())) - } - - create2.checkFreeBitmap() - create2.close() - } -}