KStarbound/src/main/kotlin/ru/dbotthepony/kstarbound/io/BTreeDB5.kt

390 lines
10 KiB
Kotlin

package ru.dbotthepony.kstarbound.io
import it.unimi.dsi.fastutil.io.FastByteArrayInputStream
import it.unimi.dsi.fastutil.longs.LongArrayList
import ru.dbotthepony.kommons.io.ByteKey
import ru.dbotthepony.kommons.io.readByteKeyRaw
import ru.dbotthepony.kommons.io.readVarInt
import ru.dbotthepony.kommons.util.KOptional
import java.io.Closeable
import java.io.DataInputStream
import java.io.File
import java.io.InputStream
import java.io.RandomAccessFile
import java.util.*
private fun readHeader(reader: RandomAccessFile, required: Char) {
val read = reader.read()
require(read.toChar() == required) { "Bad Starbound Pak header, expected ${required.code}, got $read" }
}
private enum class TreeBlockType(val identity: String) {
INDEX("II"),
LEAF("LL"),
FREE("FF");
val i0 = identity[0].code.toByte()
val i1 = identity[1].code.toByte()
companion object {
operator fun get(index: String): TreeBlockType {
return when (index) {
INDEX.identity -> INDEX
LEAF.identity -> LEAF
FREE.identity -> FREE
else -> throw NoSuchElementException("Unknown block type $index")
}
}
operator fun get(i0: Byte, i1: Byte): TreeBlockType {
if (i0 == INDEX.i0 && i1 == INDEX.i1) return INDEX
if (i0 == LEAF.i0 && i1 == LEAF.i1) return LEAF
if (i0 == FREE.i0 && i1 == FREE.i1) return FREE
throw NoSuchElementException("Unknown block type ${String(byteArrayOf(i0, i1))}")
}
}
}
class BTreeDB5(val file: File) : Closeable {
private val reader = RandomAccessFile(file, "r")
init {
readHeader(reader, 'B')
readHeader(reader, 'T')
readHeader(reader, 'r')
readHeader(reader, 'e')
readHeader(reader, 'e')
readHeader(reader, 'D')
readHeader(reader, 'B')
readHeader(reader, '5')
}
val blockSize = reader.readInt()
val dbNameRaw = ByteArray(16).also { reader.read(it) }
val keySize = reader.readInt()
val useNodeTwo = reader.readBoolean()
init {
// we will work with only one tree
if (useNodeTwo) {
reader.skipBytes(17)
}
}
val freeNodeIndex = reader.readInt().toLong() and 0xFFFFFFFFL
init { reader.skipBytes(8) } // "device size", basically reflects filesize.
// This was done by starbound devs because there is btreedb test, which implements "writable device" in memory,
// and it doesn't report its own size / must be grown manually.
// inspiring stuff.
val rootNodeIndex = reader.readInt().toLong() and 0xFFFFFFFFL
init { reader.skipBytes(1) } // "root node is leaf". This is ignored even by original engine
// i believe this is a leftover from older versions of format, where blocks didn't have headers.
// But why is this even a thing, given file version is reported in file's header?
init {
if (!useNodeTwo) {
reader.skipBytes(17)
}
}
init { reader.skipBytes(445) }
val blocksOffsetStart = reader.filePointer
init {
check((reader.length() - 512L) % blockSize == 0L) { "Junk data somewhere in file (${(reader.length() - 512L) % blockSize} lingering bytes)" }
}
private fun blockOffset(blockID: Long): Long {
require(blockID >= 0) { "Negative block ID $blockID" }
val offset = blockID * blockSize + blocksOffsetStart
require(offset < reader.length()) { "Block with ID $blockID does not exist (seeking $offset; max ${reader.length()})" }
return offset
}
private fun doFindAllKeys(index: Long, list: MutableList<ByteKey>) {
seekBlock(index)
val stream = BlockInputStream()
val reader = stream.data
if (stream.type == TreeBlockType.LEAF) {
val keyAmount = reader.readInt()
for (i in 0 until keyAmount) {
list.add(reader.readByteKeyRaw(keySize))
reader.skipBytes(reader.readVarInt())
}
} else if (stream.type == TreeBlockType.INDEX) {
reader.skipBytes(1)
val keyAmount = reader.readInt()
val blockList = LongArrayList(keyAmount)
blockList.add(reader.readInt().toLong() and 0xFFFFFFFFL)
for (i in 0 until keyAmount) {
// ключ
reader.skipBytes(keySize)
// указатель на блок
blockList.add(reader.readInt().toLong() and 0xFFFFFFFFL)
}
// читаем все дочерние блоки на ключи
for (block in blockList.longIterator()) {
doFindAllKeys(block, list)
}
}
}
override fun close() {
reader.close()
}
fun findAllKeys(): List<ByteKey> {
val list = ArrayList<ByteKey>()
doFindAllKeys(rootNodeIndex, list)
return list
}
fun contains(key: ByteKey): Boolean {
seekBlock(rootNodeIndex)
var blockStream = BlockInputStream()
while (blockStream.type != TreeBlockType.LEAF) {
if (blockStream.type == TreeBlockType.FREE) {
throw IllegalStateException("Hit free block while scanning index for $key")
}
blockStream.skip(1)
val keyCount = blockStream.data.readInt()
var found = false
// B a
// B b
// B c
// B d
for (keyIndex in 0 until keyCount) {
// указатель на левый блок
val pointer = blockStream.data.readInt()
// левый ключ, всё что меньше него находится в левом блоке
val seekKey = blockStream.data.readByteKeyRaw(keySize)
// нужный ключ меньше самого первого ключа, поэтому он находится где то в левом блоке
if (key < seekKey) {
seekBlock(pointer)
blockStream = BlockInputStream()
found = true
break
}
}
if (!found) {
// ... B
seekBlock(blockStream.data.readInt())
blockStream = BlockInputStream()
}
}
// мы пришли в лепесток, теперь прямолинейно ищем в linked list
val keyCount = blockStream.data.readInt()
for (keyIndex in 0 until keyCount) {
// читаем ключ
val seekKey = blockStream.data.readByteKeyRaw(keySize)
// читаем размер данных
val dataLength = blockStream.data.readVarInt()
// это наш блок
if (seekKey == key) {
return true
} else {
blockStream.data.skipBytes(dataLength)
}
}
return false
}
fun read(key: ByteKey): KOptional<ByteArray> {
require(key.size == keySize) { "Key provided is ${key.size} in size, while $keySize is required" }
seekBlock(rootNodeIndex)
var blockStream = BlockInputStream()
while (blockStream.type != TreeBlockType.LEAF) {
if (blockStream.type == TreeBlockType.FREE) {
throw IllegalStateException("Hit free block while scanning index for $key")
}
blockStream.skip(1)
val keyCount = blockStream.data.readInt()
var found = false
// B a
// B b
// B c
// B d
for (keyIndex in 0 until keyCount) {
// указатель на левый блок
val pointer = blockStream.data.readInt()
// левый ключ, всё что меньше него находится в левом блоке
val seekKey = blockStream.data.readByteKeyRaw(keySize)
// нужный ключ меньше самого первого ключа, поэтому он находится где то в левом блоке
if (key < seekKey) {
seekBlock(pointer)
blockStream = BlockInputStream()
found = true
break
}
}
if (!found) {
// ... B
seekBlock(blockStream.data.readInt())
blockStream = BlockInputStream()
}
}
// мы пришли в лепесток, теперь прямолинейно ищем в linked list
val keyCount = blockStream.data.readInt()
for (keyIndex in 0 until keyCount) {
// читаем ключ
val seekKey = blockStream.data.readByteKeyRaw(keySize)
// читаем размер данных
val dataLength = blockStream.data.readVarInt()
// это наш блок
if (seekKey == key) {
val binary = ByteArray(dataLength)
if (dataLength == 0) {
// нет данных (?)
return KOptional(binary)
}
blockStream.data.readFully(binary)
return KOptional(binary)
} else {
blockStream.data.skipBytes(dataLength)
}
}
return KOptional.empty()
}
private fun seekBlock(id: Long) {
reader.seek(blockOffset(id))
}
private fun seekBlock(id: Int) {
seekBlock(id.toLong() and 0xFFFFFFFFL)
}
private inner class BlockInputStream : InputStream() {
val type: TreeBlockType
private val block = ByteArray(blockSize)
private val backingStream: FastByteArrayInputStream
init {
reader.readFully(block)
type = TreeBlockType[block[0], block[1]]
when (type) {
TreeBlockType.INDEX -> backingStream = FastByteArrayInputStream(block, 2, block.size - 4)
TreeBlockType.LEAF -> backingStream = FastByteArrayInputStream(block, 2, block.size - 6)
TreeBlockType.FREE -> TODO()
}
}
val data = DataInputStream(this)
private var isFinished = false
private fun seekNextBlock(): Boolean {
if (isFinished || type != TreeBlockType.LEAF) {
isFinished = true
return false
}
val b0 = (block[blockSize - 4].toLong() and 0xFFL) shl 24
val b1 = (block[blockSize - 3].toLong() and 0xFFL) shl 16
val b2 = (block[blockSize - 2].toLong() and 0xFFL) shl 8
val b3 = (block[blockSize - 1].toLong() and 0xFFL)
val nextBlockIndex = b0 or b1 or b2 or b3
if (nextBlockIndex == INVALID_BLOCK_INDEX) {
isFinished = true
return false
} else {
seekBlock(nextBlockIndex)
reader.readFully(block)
val read = TreeBlockType[block[0], block[1]]
check(read == type) { "Block continuation type mismatch ($type != $read)" }
backingStream.position(0)
return true
}
}
override fun read(): Int {
if (isFinished) return -1
var read = backingStream.read()
if (read == -1) {
seekNextBlock()
read = backingStream.read()
}
return read
}
override fun read(b: ByteArray, off: Int, len: Int): Int {
Objects.checkFromIndexSize(off, len, b.size)
if (isFinished) return -1
var readBytes = 0
while (readBytes < len && !isFinished) {
val read = backingStream.read(b, off + readBytes, len - readBytes)
if (read == -1) {
seekNextBlock()
continue
}
readBytes += read
}
return readBytes
}
override fun skip(n: Long): Long {
if (isFinished || n <= 0L) return 0
var remaining = n
while (remaining > 0L && !isFinished) {
val skipped = backingStream.skip(remaining)
if (skipped < remaining)
seekNextBlock()
remaining -= skipped
}
return n - remaining
}
}
companion object {
const val INVALID_BLOCK_INDEX = 0xFFFFFFFFL
const val INVALID_BLOCK_INDEX_INT = INVALID_BLOCK_INDEX.toInt()
}
}