Summary: Remove the restriction that limited UDI to ingest-only, Puts-only use cases. This enables UDI plugins (including the trie index from https://github.com/facebook/rocksdb/issues/14310) to work with all operation types: Put, Delete, Merge, SingleDelete, PutEntity, etc. Part of https://github.com/facebook/rocksdb/issues/12396 Pull Request resolved: https://github.com/facebook/rocksdb/pull/14399 Reviewed By: pdillinger Differential Revision: D96392636 Pulled By: xingbowang fbshipit-source-id: 0f1e6c38531fa72539a0e2c6a3dffff333392b4c
2843 lines
110 KiB
C++
2843 lines
110 KiB
C++
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
// DB-level tests for the trie-based User Defined Index (UDI). Validates that
|
|
// a DB opened with the trie UDI factory correctly handles all operation types
|
|
// (Put, Delete, Merge, SingleDelete, PutEntity, TimedPut, DeleteRange) through
|
|
// flush and compaction, and that the resulting SST files are readable with
|
|
// correct data.
|
|
//
|
|
// These tests complement the SST-level tests in trie_index_test.cc (which use
|
|
// SstFileWriter and are limited to Put/Delete/Merge) by exercising the full
|
|
// DB path including CompactionIterator, memtable flush, and the UDI builder
|
|
// wrapper's ValueType mapping and kTypeValuePreferredSeqno handling.
|
|
|
|
#include <memory>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#include "port/port.h"
|
|
#include "rocksdb/db.h"
|
|
#include "rocksdb/options.h"
|
|
#include "rocksdb/slice.h"
|
|
#include "rocksdb/sst_file_writer.h"
|
|
#include "rocksdb/status.h"
|
|
#include "rocksdb/table.h"
|
|
#include "rocksdb/utilities/transaction.h"
|
|
#include "rocksdb/utilities/transaction_db.h"
|
|
#include "rocksdb/wide_columns.h"
|
|
#include "rocksdb/write_batch.h"
|
|
#include "test_util/testharness.h"
|
|
#include "test_util/testutil.h"
|
|
#include "util/compression.h"
|
|
#include "util/random.h"
|
|
#include "utilities/merge_operators.h"
|
|
#include "utilities/trie_index/trie_index_factory.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
namespace trie_index {
|
|
|
|
// Encodes an integer as an 8-byte big-endian key body, matching the pattern
|
|
// used by db_stress's test_batches_snapshots mode.
|
|
static std::string MakeKeyBody(int k) {
|
|
std::string key_body(8, '\0');
|
|
uint64_t val = static_cast<uint64_t>(k);
|
|
for (int i = 7; i >= 0; --i) {
|
|
key_body[i] = static_cast<char>(val & 0xff);
|
|
val >>= 8;
|
|
}
|
|
return key_body;
|
|
}
|
|
|
|
class TrieIndexDBTest : public testing::Test {
|
|
protected:
|
|
void SetUp() override {
|
|
trie_factory_ = std::make_shared<TrieIndexFactory>();
|
|
dbname_ = test::PerThreadDBPath("trie_index_db_test");
|
|
ASSERT_OK(DestroyDB(dbname_, Options()));
|
|
}
|
|
|
|
void TearDown() override {
|
|
if (db_) {
|
|
EXPECT_OK(db_->Close());
|
|
db_.reset();
|
|
}
|
|
EXPECT_OK(DestroyDB(dbname_, last_options_));
|
|
}
|
|
|
|
// Opens a DB with the trie UDI factory configured. Caller should set
|
|
// options_ fields before calling this. An optional block_size overrides
|
|
// the default to force more data blocks in the SST.
|
|
Status OpenDB(int block_size = 0) {
|
|
options_.create_if_missing = true;
|
|
BlockBasedTableOptions table_options;
|
|
table_options.user_defined_index_factory = trie_factory_;
|
|
if (block_size > 0) {
|
|
table_options.block_size = block_size;
|
|
}
|
|
options_.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
last_options_ = options_;
|
|
return DB::Open(options_, dbname_, &db_);
|
|
}
|
|
|
|
// Returns a ReadOptions that routes reads through the standard binary
|
|
// search index (the default when table_index_factory is null).
|
|
ReadOptions StandardIndexReadOptions() const { return ReadOptions(); }
|
|
|
|
// Returns a ReadOptions that routes reads through the trie UDI index.
|
|
ReadOptions TrieIndexReadOptions() const {
|
|
ReadOptions ro;
|
|
ro.table_index_factory = trie_factory_.get();
|
|
return ro;
|
|
}
|
|
|
|
// Collects all visible keys via forward scan using the given ReadOptions.
|
|
std::vector<std::string> ScanAllKeys(const ReadOptions& ro) {
|
|
std::vector<std::string> keys;
|
|
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
iter->SeekToFirst();
|
|
for (; iter->Valid(); iter->Next()) {
|
|
keys.push_back(iter->key().ToString());
|
|
}
|
|
EXPECT_OK(iter->status());
|
|
return keys;
|
|
}
|
|
|
|
// Collects all visible keys via forward scan using the standard index.
|
|
std::vector<std::string> ScanAllKeys() {
|
|
return ScanAllKeys(StandardIndexReadOptions());
|
|
}
|
|
|
|
// Collects all visible (key, value) pairs via forward scan.
|
|
std::vector<std::pair<std::string, std::string>> ScanAllKeyValues(
|
|
const ReadOptions& ro) {
|
|
std::vector<std::pair<std::string, std::string>> kvs;
|
|
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
iter->SeekToFirst();
|
|
for (; iter->Valid(); iter->Next()) {
|
|
kvs.emplace_back(iter->key().ToString(), iter->value().ToString());
|
|
}
|
|
EXPECT_OK(iter->status());
|
|
return kvs;
|
|
}
|
|
|
|
// Verifies that forward scan via SeekToFirst+Next produces the same key
|
|
// set through both the standard index and the trie index.
|
|
void VerifyForwardScanBothIndexes(
|
|
const std::vector<std::string>& expected_keys) {
|
|
{
|
|
SCOPED_TRACE("standard index");
|
|
ASSERT_EQ(ScanAllKeys(StandardIndexReadOptions()), expected_keys);
|
|
}
|
|
{
|
|
SCOPED_TRACE("trie index");
|
|
ASSERT_EQ(ScanAllKeys(TrieIndexReadOptions()), expected_keys);
|
|
}
|
|
}
|
|
|
|
// Verifies that forward scan via SeekToFirst+Next produces the same
|
|
// (key, value) pairs through both indexes.
|
|
void VerifyForwardScanBothIndexes(
|
|
const std::vector<std::pair<std::string, std::string>>& expected_kvs) {
|
|
{
|
|
SCOPED_TRACE("standard index");
|
|
ASSERT_EQ(ScanAllKeyValues(StandardIndexReadOptions()), expected_kvs);
|
|
}
|
|
{
|
|
SCOPED_TRACE("trie index");
|
|
ASSERT_EQ(ScanAllKeyValues(TrieIndexReadOptions()), expected_kvs);
|
|
}
|
|
}
|
|
|
|
// Verifies that a point Get returns the expected value through both indexes.
|
|
void VerifyGetBothIndexes(const std::string& key,
|
|
const std::string& expected_value) {
|
|
for (const auto& ro :
|
|
{StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index");
|
|
std::string value;
|
|
ASSERT_OK(db_->Get(ro, key, &value));
|
|
ASSERT_EQ(value, expected_value);
|
|
}
|
|
}
|
|
|
|
// Verifies that a point Get returns NotFound through both indexes.
|
|
void VerifyGetNotFoundBothIndexes(const std::string& key) {
|
|
for (const auto& ro :
|
|
{StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index");
|
|
std::string value;
|
|
ASSERT_TRUE(db_->Get(ro, key, &value).IsNotFound());
|
|
}
|
|
}
|
|
|
|
// Verifies Get with a snapshot through both indexes.
|
|
void VerifyGetBothIndexes(const Snapshot* snap, const std::string& key,
|
|
const std::string& expected_value) {
|
|
for (auto base_ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(base_ro.table_index_factory ? "trie index"
|
|
: "standard index");
|
|
base_ro.snapshot = snap;
|
|
std::string value;
|
|
ASSERT_OK(db_->Get(base_ro, key, &value));
|
|
ASSERT_EQ(value, expected_value);
|
|
}
|
|
}
|
|
|
|
// Verifies Get returns NotFound with a snapshot through both indexes.
|
|
void VerifyGetNotFoundBothIndexes(const Snapshot* snap,
|
|
const std::string& key) {
|
|
for (auto base_ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(base_ro.table_index_factory ? "trie index"
|
|
: "standard index");
|
|
base_ro.snapshot = snap;
|
|
std::string value;
|
|
ASSERT_TRUE(db_->Get(base_ro, key, &value).IsNotFound());
|
|
}
|
|
}
|
|
|
|
// Verifies that a forward scan with a snapshot produces the expected
|
|
// (key, value) pairs through both indexes.
|
|
void VerifyForwardScanBothIndexes(
|
|
const Snapshot* snap,
|
|
const std::vector<std::pair<std::string, std::string>>& expected_kvs) {
|
|
for (auto base_ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(base_ro.table_index_factory ? "trie index"
|
|
: "standard index");
|
|
base_ro.snapshot = snap;
|
|
ASSERT_EQ(ScanAllKeyValues(base_ro), expected_kvs);
|
|
}
|
|
}
|
|
|
|
// Verifies that Seek to a specific key through both indexes returns the
|
|
// same result.
|
|
void VerifySeekBothIndexes(const std::string& seek_key,
|
|
const std::string& expected_key,
|
|
const std::string& expected_value) {
|
|
for (const auto& ro :
|
|
{StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index");
|
|
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
iter->Seek(seek_key);
|
|
ASSERT_TRUE(iter->Valid());
|
|
ASSERT_EQ(iter->key().ToString(), expected_key);
|
|
ASSERT_EQ(iter->value().ToString(), expected_value);
|
|
ASSERT_OK(iter->status());
|
|
}
|
|
}
|
|
|
|
// Verifies that Seek with a snapshot through both indexes returns the
|
|
// same result.
|
|
void VerifySeekBothIndexes(const Snapshot* snap, const std::string& seek_key,
|
|
const std::string& expected_key,
|
|
const std::string& expected_value) {
|
|
for (auto base_ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(base_ro.table_index_factory ? "trie index"
|
|
: "standard index");
|
|
base_ro.snapshot = snap;
|
|
std::unique_ptr<Iterator> iter(db_->NewIterator(base_ro));
|
|
iter->Seek(seek_key);
|
|
ASSERT_TRUE(iter->Valid());
|
|
ASSERT_EQ(iter->key().ToString(), expected_key);
|
|
ASSERT_EQ(iter->value().ToString(), expected_value);
|
|
ASSERT_OK(iter->status());
|
|
}
|
|
}
|
|
|
|
// Opens without UDI factory (standard index only). Used to test graceful
|
|
// degradation when reopening a DB that has UDI SSTs.
|
|
Status OpenDBWithoutUDI(int block_size = 0) {
|
|
options_.create_if_missing = true;
|
|
BlockBasedTableOptions table_options;
|
|
// Deliberately no user_defined_index_factory.
|
|
if (block_size > 0) {
|
|
table_options.block_size = block_size;
|
|
}
|
|
options_.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
last_options_ = options_;
|
|
return DB::Open(options_, dbname_, &db_);
|
|
}
|
|
|
|
// Verify prefix-scan lockstep across `num_prefixes` iterators.
|
|
//
|
|
// Creates one iterator per prefix digit (0..num_prefixes-1), seeks each to
|
|
// its prefix, then walks all in lockstep asserting key bodies match. When
|
|
// `use_upper_bounds` is true, even-numbered iterators get an upper bound
|
|
// set to the next prefix. When `verify_values` is true, value bodies are
|
|
// also cross-checked.
|
|
//
|
|
// Returns the number of keys walked (per-prefix).
|
|
uint64_t VerifyPrefixScanLockstep(const ReadOptions& base_ro,
|
|
int num_prefixes, bool use_upper_bounds,
|
|
bool verify_values,
|
|
const std::string& trace_context = "") {
|
|
std::vector<std::unique_ptr<Iterator>> iters(num_prefixes);
|
|
std::vector<std::string> prefixes(num_prefixes);
|
|
std::vector<Slice> prefix_slices(num_prefixes);
|
|
std::vector<ReadOptions> ro_copies(num_prefixes);
|
|
std::vector<std::string> upper_bounds(num_prefixes);
|
|
std::vector<Slice> ub_slices(num_prefixes);
|
|
|
|
for (int d = 0; d < num_prefixes; ++d) {
|
|
prefixes[d] = std::to_string(d);
|
|
prefix_slices[d] = Slice(prefixes[d]);
|
|
ro_copies[d] = base_ro;
|
|
if (use_upper_bounds && d % 2 == 0) {
|
|
upper_bounds[d] = prefixes[d];
|
|
upper_bounds[d].back()++;
|
|
ub_slices[d] = upper_bounds[d];
|
|
ro_copies[d].iterate_upper_bound = &ub_slices[d];
|
|
}
|
|
iters[d].reset(db_->NewIterator(ro_copies[d]));
|
|
iters[d]->Seek(prefix_slices[d]);
|
|
}
|
|
|
|
uint64_t count = 0;
|
|
while (iters[0]->Valid() && iters[0]->key().starts_with(prefix_slices[0])) {
|
|
count++;
|
|
std::vector<std::string> keys(num_prefixes);
|
|
std::vector<std::string> values(num_prefixes);
|
|
for (int d = 0; d < num_prefixes; ++d) {
|
|
EXPECT_TRUE(iters[d]->Valid())
|
|
<< trace_context << " iter " << d << " invalid at step " << count;
|
|
EXPECT_TRUE(iters[d]->key().starts_with(prefix_slices[d]))
|
|
<< trace_context << " iter " << d << " out of prefix at step "
|
|
<< count;
|
|
if (!iters[d]->Valid()) {
|
|
return count;
|
|
}
|
|
keys[d] = iters[d]->key().ToString();
|
|
values[d] = iters[d]->value().ToString();
|
|
}
|
|
|
|
std::string key0_body = keys[0].substr(1);
|
|
for (int d = 1; d < num_prefixes; ++d) {
|
|
EXPECT_EQ(key0_body, keys[d].substr(1))
|
|
<< trace_context << " key body mismatch at step " << count
|
|
<< " iter " << d;
|
|
}
|
|
|
|
if (verify_values) {
|
|
std::string val0 = values[0];
|
|
if (!val0.empty()) {
|
|
val0.pop_back();
|
|
}
|
|
for (int d = 1; d < num_prefixes; ++d) {
|
|
std::string vald = values[d];
|
|
if (!vald.empty()) {
|
|
vald.pop_back();
|
|
}
|
|
EXPECT_EQ(val0, vald) << trace_context << " value mismatch at step "
|
|
<< count << " iter " << d;
|
|
}
|
|
}
|
|
|
|
for (int d = 0; d < num_prefixes; ++d) {
|
|
iters[d]->Next();
|
|
}
|
|
}
|
|
|
|
EXPECT_OK(iters[0]->status());
|
|
for (int d = 1; d < num_prefixes; ++d) {
|
|
EXPECT_TRUE(!iters[d]->Valid() ||
|
|
!iters[d]->key().starts_with(prefix_slices[d]))
|
|
<< trace_context << " iter " << d
|
|
<< " still has keys after iter 0 finished";
|
|
EXPECT_OK(iters[d]->status());
|
|
}
|
|
|
|
return count;
|
|
}
|
|
|
|
std::shared_ptr<TrieIndexFactory> trie_factory_;
|
|
std::string dbname_;
|
|
Options options_;
|
|
Options last_options_;
|
|
std::unique_ptr<DB> db_;
|
|
};
|
|
|
|
// ============================================================================
|
|
// Flush tests
|
|
// ============================================================================
|
|
|
|
TEST_F(TrieIndexDBTest, FlushWithAllOperationTypes) {
|
|
// Write every supported operation type via the DB API, flush, and verify
|
|
// reads return correct results through both the standard binary search index
|
|
// and the trie UDI. This exercises the full path from memtable through
|
|
// CompactionIterator, BlockBasedTableBuilder, and the UDI builder wrapper's
|
|
// MapToUDIValueType for each internal ValueType.
|
|
options_.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
options_.disable_auto_compactions = true;
|
|
ASSERT_OK(OpenDB());
|
|
|
|
// kTypeValue
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_01_put", "val_put"));
|
|
// kTypeMerge
|
|
ASSERT_OK(db_->Merge(WriteOptions(), "key_02_merge", "val_merge"));
|
|
// kTypeDeletion (bare tombstone — no prior value for this key)
|
|
ASSERT_OK(db_->Delete(WriteOptions(), "key_03_del"));
|
|
// kTypeSingleDeletion (preceded by a Put; both cancel out with no snapshot)
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_04_sdel", "val_sdel"));
|
|
ASSERT_OK(db_->SingleDelete(WriteOptions(), "key_04_sdel"));
|
|
// kTypeWideColumnEntity (with a default column so Get() returns a value)
|
|
ASSERT_OK(db_->PutEntity(WriteOptions(), db_->DefaultColumnFamily(),
|
|
"key_05_entity",
|
|
WideColumns{{"", "default_val"}, {"col1", "val1"}}));
|
|
// Another kTypeValue to anchor the end of the key range
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_06_put", "val_put2"));
|
|
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Forward scan via both indexes. Expected visible keys after flush:
|
|
// key_01_put — Put (visible)
|
|
// key_02_merge — Merge single operand (visible)
|
|
// key_03_del — bare Delete tombstone (hidden by DBIter)
|
|
// key_04_sdel — Put + SingleDelete cancel out (hidden)
|
|
// key_05_entity — PutEntity (visible)
|
|
// key_06_put — Put (visible)
|
|
{
|
|
std::vector<std::string> expected = {"key_01_put", "key_02_merge",
|
|
"key_05_entity", "key_06_put"};
|
|
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(expected));
|
|
}
|
|
|
|
// Point lookups via both indexes.
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_01_put", "val_put"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_02_merge", "val_merge"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("key_03_del"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("key_04_sdel"));
|
|
// PutEntity: Get() returns the value of the default column ("").
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_05_entity", "default_val"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_06_put", "val_put2"));
|
|
}
|
|
|
|
TEST_F(TrieIndexDBTest, TimedPutFlush) {
|
|
// TimedPut produces kTypeValuePreferredSeqno entries during flush when
|
|
// preclude_last_level_data_seconds > 0. The UDI wrapper strips the packed
|
|
// preferred seqno suffix via ParsePackedValueForValue() before forwarding
|
|
// to the plugin builder. This test verifies that path end-to-end through
|
|
// both the standard binary search index and the trie UDI.
|
|
options_.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
options_.compaction_style = kCompactionStyleUniversal;
|
|
// Required for kTypeValuePreferredSeqno to survive the flush path: the
|
|
// seqno_to_time_mapping must be available so a preferred seqno can be
|
|
// computed. With write_unix_time=0, GetProximalSeqnoBeforeTime(0) returns 0,
|
|
// which is < any real seqno, so the entry stays as kTypeValuePreferredSeqno.
|
|
options_.preclude_last_level_data_seconds = 10000;
|
|
options_.disable_auto_compactions = true;
|
|
ASSERT_OK(OpenDB());
|
|
|
|
// Regular Put alongside the TimedPut to verify they coexist.
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_01_put", "val_put"));
|
|
|
|
// TimedPut via WriteBatch (there is no DB::TimedPut method).
|
|
{
|
|
WriteBatch wb;
|
|
ASSERT_OK(wb.TimedPut(db_->DefaultColumnFamily(), "key_02_timed",
|
|
"val_timed", /*write_unix_time=*/0));
|
|
ASSERT_OK(db_->Write(WriteOptions(), &wb));
|
|
}
|
|
|
|
// Merge to verify mixed types work with TimedPut in the same flush.
|
|
ASSERT_OK(db_->Merge(WriteOptions(), "key_03_merge", "val_merge"));
|
|
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Point lookups via both indexes — the packed seqno must be transparent.
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_01_put", "val_put"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_02_timed", "val_timed"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_03_merge", "val_merge"));
|
|
|
|
// Forward scan via both indexes — all three keys visible in order.
|
|
{
|
|
std::vector<std::pair<std::string, std::string>> expected = {
|
|
{"key_01_put", "val_put"},
|
|
{"key_02_timed", "val_timed"},
|
|
{"key_03_merge", "val_merge"}};
|
|
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(expected));
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// Compaction tests
|
|
// ============================================================================
|
|
|
|
TEST_F(TrieIndexDBTest, CompactionWithMixedOpsAndSnapshots) {
|
|
// Multiple flushes followed by compaction with a snapshot held. The snapshot
|
|
// forces compaction to preserve multiple versions of the same user key,
|
|
// exercising the UDI builder's handling of duplicate user keys with different
|
|
// sequence numbers and value types. Verified through both indexes.
|
|
options_.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
options_.disable_auto_compactions = true;
|
|
ASSERT_OK(OpenDB());
|
|
|
|
// Flush 1: initial values.
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_aa", "v1"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_bb", "v1"));
|
|
ASSERT_OK(db_->Merge(WriteOptions(), "key_cc", "m1"));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Snapshot pins flush 1 versions so compaction preserves them.
|
|
const Snapshot* snap = db_->GetSnapshot();
|
|
|
|
// Flush 2: updates that create new versions.
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_aa", "v2"));
|
|
ASSERT_OK(db_->Delete(WriteOptions(), "key_bb"));
|
|
ASSERT_OK(db_->Merge(WriteOptions(), "key_cc", "m2"));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Compact all levels. Both versions of each key are preserved because the
|
|
// snapshot prevents garbage collection of the older versions.
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
|
|
// Current view (no snapshot): key_aa=v2, key_bb deleted, key_cc="m1,m2".
|
|
{
|
|
std::vector<std::pair<std::string, std::string>> expected = {
|
|
{"key_aa", "v2"}, {"key_cc", "m1,m2"}};
|
|
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(expected));
|
|
}
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_aa", "v2"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("key_bb"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_cc", "m1,m2"));
|
|
|
|
// Snapshot view: key_aa=v1, key_bb=v1, key_cc="m1".
|
|
{
|
|
std::vector<std::pair<std::string, std::string>> expected = {
|
|
{"key_aa", "v1"}, {"key_bb", "v1"}, {"key_cc", "m1"}};
|
|
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(snap, expected));
|
|
}
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes(snap, "key_aa", "v1"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes(snap, "key_bb", "v1"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes(snap, "key_cc", "m1"));
|
|
|
|
db_->ReleaseSnapshot(snap);
|
|
}
|
|
|
|
TEST_F(TrieIndexDBTest, CompactionWithAllOperationTypes) {
|
|
// Exercises all operation types (Put, Delete, Merge, SingleDelete, PutEntity)
|
|
// across two flushes with a snapshot, then compacts. Verified through both
|
|
// indexes. This ensures the UDI builder handles the full range of value types
|
|
// in compaction output, and that both the current and snapshot views are
|
|
// correct.
|
|
options_.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
options_.disable_auto_compactions = true;
|
|
ASSERT_OK(OpenDB());
|
|
|
|
// Flush 1: initial values with diverse types.
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_01_put", "v1"));
|
|
ASSERT_OK(db_->Merge(WriteOptions(), "key_02_merge", "m1"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_03_sd_target", "sd_val"));
|
|
ASSERT_OK(db_->PutEntity(WriteOptions(), db_->DefaultColumnFamily(),
|
|
"key_04_entity", WideColumns{{"", "e1"}}));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_05_del_target", "del_val"));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Snapshot pins flush 1 versions.
|
|
const Snapshot* snap = db_->GetSnapshot();
|
|
|
|
// Flush 2: updates each key with a different operation type.
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_01_put", "v2"));
|
|
ASSERT_OK(db_->Merge(WriteOptions(), "key_02_merge", "m2"));
|
|
ASSERT_OK(db_->SingleDelete(WriteOptions(), "key_03_sd_target"));
|
|
ASSERT_OK(db_->PutEntity(WriteOptions(), db_->DefaultColumnFamily(),
|
|
"key_04_entity", WideColumns{{"", "e2"}}));
|
|
ASSERT_OK(db_->Delete(WriteOptions(), "key_05_del_target"));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
|
|
// Current view via both indexes: key_01=v2, key_02="m1,m2", key_03 SD'd,
|
|
// key_04=e2, key_05 deleted.
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_01_put", "v2"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_02_merge", "m1,m2"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("key_03_sd_target"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_04_entity", "e2"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("key_05_del_target"));
|
|
|
|
// Current view scan via both indexes: only key_01, key_02, key_04 visible.
|
|
{
|
|
std::vector<std::string> expected = {"key_01_put", "key_02_merge",
|
|
"key_04_entity"};
|
|
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(expected));
|
|
}
|
|
|
|
// Snapshot view via both indexes: all original flush 1 values visible.
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes(snap, "key_01_put", "v1"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes(snap, "key_02_merge", "m1"));
|
|
ASSERT_NO_FATAL_FAILURE(
|
|
VerifyGetBothIndexes(snap, "key_03_sd_target", "sd_val"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes(snap, "key_04_entity", "e1"));
|
|
ASSERT_NO_FATAL_FAILURE(
|
|
VerifyGetBothIndexes(snap, "key_05_del_target", "del_val"));
|
|
|
|
{
|
|
std::vector<std::pair<std::string, std::string>> expected = {
|
|
{"key_01_put", "v1"},
|
|
{"key_02_merge", "m1"},
|
|
{"key_03_sd_target", "sd_val"},
|
|
{"key_04_entity", "e1"},
|
|
{"key_05_del_target", "del_val"}};
|
|
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(snap, expected));
|
|
}
|
|
|
|
db_->ReleaseSnapshot(snap);
|
|
}
|
|
|
|
TEST_F(TrieIndexDBTest, TimedPutCompaction) {
|
|
// Verifies that kTypeValuePreferredSeqno entries survive compaction and the
|
|
// UDI builder correctly strips the packed seqno during compaction output.
|
|
// Verified through both indexes.
|
|
options_.compaction_style = kCompactionStyleUniversal;
|
|
options_.preclude_last_level_data_seconds = 10000;
|
|
options_.disable_auto_compactions = true;
|
|
ASSERT_OK(OpenDB());
|
|
|
|
// Flush 1: TimedPut + regular Put.
|
|
{
|
|
WriteBatch wb;
|
|
ASSERT_OK(wb.TimedPut(db_->DefaultColumnFamily(), "key_01_timed",
|
|
"timed_v1", /*write_unix_time=*/0));
|
|
ASSERT_OK(db_->Write(WriteOptions(), &wb));
|
|
}
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_02_put", "put_v1"));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Snapshot pins flush 1 versions.
|
|
const Snapshot* snap = db_->GetSnapshot();
|
|
|
|
// Flush 2: overwrite both keys with regular Puts.
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_01_timed", "put_v2"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_02_put", "put_v2"));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Compact: the snapshot forces both versions of key_01_timed to be kept.
|
|
// The older version is kTypeValuePreferredSeqno with packed seqno.
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
|
|
// Current view via both indexes: both keys have the newer value.
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_01_timed", "put_v2"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_02_put", "put_v2"));
|
|
{
|
|
std::vector<std::pair<std::string, std::string>> expected = {
|
|
{"key_01_timed", "put_v2"}, {"key_02_put", "put_v2"}};
|
|
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(expected));
|
|
}
|
|
|
|
// Snapshot view via both indexes: key_01 has the original TimedPut value
|
|
// (packed seqno must be transparent), key_02 has its original value.
|
|
ASSERT_NO_FATAL_FAILURE(
|
|
VerifyGetBothIndexes(snap, "key_01_timed", "timed_v1"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes(snap, "key_02_put", "put_v1"));
|
|
{
|
|
std::vector<std::pair<std::string, std::string>> expected = {
|
|
{"key_01_timed", "timed_v1"}, {"key_02_put", "put_v1"}};
|
|
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(snap, expected));
|
|
}
|
|
|
|
db_->ReleaseSnapshot(snap);
|
|
}
|
|
|
|
TEST_F(TrieIndexDBTest, CrossFlushSingleDelete) {
|
|
// Verifies that a SingleDelete in a later SST correctly cancels a Put from
|
|
// an earlier SST after compaction with the trie UDI active. Verified through
|
|
// both indexes.
|
|
options_.disable_auto_compactions = true;
|
|
ASSERT_OK(OpenDB());
|
|
|
|
// Flush 1: Puts.
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_aa", "val_aa"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_bb", "val_bb"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_cc", "val_cc"));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Flush 2: SingleDelete key_bb (targets the Put from flush 1).
|
|
ASSERT_OK(db_->SingleDelete(WriteOptions(), "key_bb"));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Before compaction: key_bb is already hidden by the merging iterator.
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("key_bb"));
|
|
|
|
// After compaction: SingleDelete + Put fully cancel out, key_bb is gone.
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("key_bb"));
|
|
|
|
// Remaining keys unaffected via both indexes.
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_aa", "val_aa"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_cc", "val_cc"));
|
|
|
|
{
|
|
std::vector<std::string> expected = {"key_aa", "key_cc"};
|
|
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(expected));
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// Iteration tests
|
|
// ============================================================================
|
|
|
|
TEST_F(TrieIndexDBTest, ReverseIteration) {
|
|
// Verifies that reverse iteration (SeekToLast, Prev, SeekForPrev) works
|
|
// correctly with mixed operation types. Forward scan and point lookups are
|
|
// verified through both indexes. Reverse operations use the standard index
|
|
// (the trie UDI iterator does not yet support SeekToLast/Prev/SeekForPrev).
|
|
options_.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
options_.disable_auto_compactions = true;
|
|
ASSERT_OK(OpenDB());
|
|
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_01", "v1"));
|
|
ASSERT_OK(db_->Merge(WriteOptions(), "key_02", "m1"));
|
|
ASSERT_OK(db_->Delete(WriteOptions(), "key_03"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_04", "v4"));
|
|
ASSERT_OK(db_->PutEntity(WriteOptions(), db_->DefaultColumnFamily(), "key_05",
|
|
WideColumns{{"", "e5"}}));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_06", "v6"));
|
|
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Visible keys: key_01, key_02, key_04, key_05, key_06 (key_03 deleted).
|
|
|
|
// Forward scan via both indexes.
|
|
{
|
|
std::vector<std::pair<std::string, std::string>> expected = {
|
|
{"key_01", "v1"},
|
|
{"key_02", "m1"},
|
|
{"key_04", "v4"},
|
|
{"key_05", "e5"},
|
|
{"key_06", "v6"}};
|
|
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(expected));
|
|
}
|
|
|
|
// Point lookups via both indexes.
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_01", "v1"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_02", "m1"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("key_03"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_04", "v4"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_05", "e5"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_06", "v6"));
|
|
|
|
// Seek via both indexes.
|
|
ASSERT_NO_FATAL_FAILURE(VerifySeekBothIndexes("key_04", "key_04", "v4"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifySeekBothIndexes("key_05", "key_05", "e5"));
|
|
|
|
// Reverse operations below use the standard index only.
|
|
|
|
// SeekToLast + full reverse scan.
|
|
{
|
|
ReadOptions ro;
|
|
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
iter->SeekToLast();
|
|
std::vector<std::string> reverse_keys;
|
|
for (; iter->Valid(); iter->Prev()) {
|
|
reverse_keys.push_back(iter->key().ToString());
|
|
}
|
|
ASSERT_OK(iter->status());
|
|
std::vector<std::string> expected = {"key_06", "key_05", "key_04", "key_02",
|
|
"key_01"};
|
|
ASSERT_EQ(reverse_keys, expected);
|
|
}
|
|
|
|
// SeekForPrev to an exact visible key.
|
|
{
|
|
ReadOptions ro;
|
|
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
iter->SeekForPrev("key_04");
|
|
ASSERT_TRUE(iter->Valid());
|
|
ASSERT_EQ(iter->key().ToString(), "key_04");
|
|
ASSERT_EQ(iter->value().ToString(), "v4");
|
|
ASSERT_OK(iter->status());
|
|
}
|
|
|
|
// SeekForPrev to a deleted key — should land on the largest visible key
|
|
// that is <= "key_03", which is key_02.
|
|
{
|
|
ReadOptions ro;
|
|
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
iter->SeekForPrev("key_03");
|
|
ASSERT_TRUE(iter->Valid());
|
|
ASSERT_EQ(iter->key().ToString(), "key_02");
|
|
ASSERT_OK(iter->status());
|
|
}
|
|
|
|
// SeekForPrev to a key between existing keys.
|
|
{
|
|
ReadOptions ro;
|
|
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
iter->SeekForPrev("key_04_5");
|
|
ASSERT_TRUE(iter->Valid());
|
|
ASSERT_EQ(iter->key().ToString(), "key_04");
|
|
ASSERT_OK(iter->status());
|
|
}
|
|
|
|
// SeekForPrev before all keys — should be invalid.
|
|
{
|
|
ReadOptions ro;
|
|
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
iter->SeekForPrev("key_00");
|
|
ASSERT_FALSE(iter->Valid());
|
|
ASSERT_OK(iter->status());
|
|
}
|
|
|
|
// Prev from a Seek position in the middle of the range.
|
|
{
|
|
ReadOptions ro;
|
|
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
iter->Seek("key_05");
|
|
ASSERT_TRUE(iter->Valid());
|
|
ASSERT_EQ(iter->key().ToString(), "key_05");
|
|
|
|
iter->Prev();
|
|
ASSERT_TRUE(iter->Valid());
|
|
ASSERT_EQ(iter->key().ToString(), "key_04");
|
|
|
|
iter->Prev();
|
|
ASSERT_TRUE(iter->Valid());
|
|
ASSERT_EQ(iter->key().ToString(), "key_02");
|
|
|
|
iter->Prev();
|
|
ASSERT_TRUE(iter->Valid());
|
|
ASSERT_EQ(iter->key().ToString(), "key_01");
|
|
|
|
iter->Prev();
|
|
ASSERT_FALSE(iter->Valid());
|
|
ASSERT_OK(iter->status());
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// DeleteRange test
|
|
// ============================================================================
|
|
|
|
TEST_F(TrieIndexDBTest, DeleteRangeWithTrieUDI) {
|
|
// Verifies that DeleteRange (kTypeRangeDeletion) works correctly alongside
|
|
// the trie UDI. Range deletions go to a separate range_del_block (not
|
|
// through OnKeyAdded), but we verify that reads correctly filter out
|
|
// range-deleted keys when the trie UDI is active. Forward scan and point
|
|
// lookups verified through both indexes; reverse scan uses standard index.
|
|
options_.disable_auto_compactions = true;
|
|
ASSERT_OK(OpenDB());
|
|
|
|
for (int i = 1; i <= 10; i++) {
|
|
char key_buf[16];
|
|
char val_buf[16];
|
|
snprintf(key_buf, sizeof(key_buf), "key_%02d", i);
|
|
snprintf(val_buf, sizeof(val_buf), "val_%02d", i);
|
|
ASSERT_OK(db_->Put(WriteOptions(), key_buf, val_buf));
|
|
}
|
|
|
|
// DeleteRange [key_04, key_08) — deletes key_04 through key_07.
|
|
ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
|
|
"key_04", "key_08"));
|
|
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Forward scan via both indexes: key_01..key_03 and key_08..key_10 visible.
|
|
{
|
|
std::vector<std::string> expected = {"key_01", "key_02", "key_03",
|
|
"key_08", "key_09", "key_10"};
|
|
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(expected));
|
|
}
|
|
|
|
// Point lookups via both indexes for deleted keys.
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("key_04"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("key_07"));
|
|
|
|
// Point lookups via both indexes for surviving keys at boundaries.
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_03", "val_03"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_08", "val_08"));
|
|
|
|
// Reverse scan (standard index only) should also respect the range deletion.
|
|
{
|
|
ReadOptions ro;
|
|
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
iter->SeekToLast();
|
|
std::vector<std::string> reverse_keys;
|
|
for (; iter->Valid(); iter->Prev()) {
|
|
reverse_keys.push_back(iter->key().ToString());
|
|
}
|
|
ASSERT_OK(iter->status());
|
|
std::vector<std::string> expected = {"key_10", "key_09", "key_08",
|
|
"key_03", "key_02", "key_01"};
|
|
ASSERT_EQ(reverse_keys, expected);
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// DB reopen test
|
|
// ============================================================================
|
|
|
|
TEST_F(TrieIndexDBTest, ReopenWithMixedOperationTypes) {
|
|
// Writes all operation types, flushes, closes the DB, reopens, and verifies
|
|
// all data reads correctly from cold SST files through both indexes. This
|
|
// exercises the read path on a freshly opened DB where no memtable data
|
|
// exists.
|
|
options_.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
options_.disable_auto_compactions = true;
|
|
ASSERT_OK(OpenDB());
|
|
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_01", "val_put"));
|
|
ASSERT_OK(db_->Merge(WriteOptions(), "key_02", "val_merge"));
|
|
ASSERT_OK(db_->Delete(WriteOptions(), "key_03"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_04", "sd_target"));
|
|
ASSERT_OK(db_->SingleDelete(WriteOptions(), "key_04"));
|
|
ASSERT_OK(db_->PutEntity(WriteOptions(), db_->DefaultColumnFamily(), "key_05",
|
|
WideColumns{{"", "entity_val"}}));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_06", "val_put2"));
|
|
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Close the DB. All data is now only in SST files.
|
|
ASSERT_OK(db_->Close());
|
|
db_.reset();
|
|
|
|
// Reopen with the same trie UDI configuration.
|
|
ASSERT_OK(OpenDB());
|
|
|
|
// Point lookups on cold data via both indexes.
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_01", "val_put"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_02", "val_merge"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("key_03"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("key_04"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_05", "entity_val"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_06", "val_put2"));
|
|
|
|
// Forward scan via both indexes.
|
|
{
|
|
std::vector<std::string> expected = {"key_01", "key_02", "key_05",
|
|
"key_06"};
|
|
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(expected));
|
|
}
|
|
|
|
// Reverse scan on cold data (standard index only).
|
|
{
|
|
ReadOptions ro;
|
|
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
iter->SeekToLast();
|
|
std::vector<std::string> reverse_keys;
|
|
for (; iter->Valid(); iter->Prev()) {
|
|
reverse_keys.push_back(iter->key().ToString());
|
|
}
|
|
ASSERT_OK(iter->status());
|
|
std::vector<std::string> expected = {"key_06", "key_05", "key_02",
|
|
"key_01"};
|
|
ASSERT_EQ(reverse_keys, expected);
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// Ingest external file test
|
|
// ============================================================================
|
|
|
|
TEST_F(TrieIndexDBTest, IngestExternalFileWithTrieUDI) {
|
|
// Creates an SST with SstFileWriter using the trie UDI, then ingests it
|
|
// into a live DB that also has trie UDI configured. Verifies that both the
|
|
// existing DB data and the ingested data are correctly readable through both
|
|
// indexes.
|
|
options_.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
options_.disable_auto_compactions = true;
|
|
ASSERT_OK(OpenDB());
|
|
|
|
// Write some data directly to the DB and flush.
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_01", "db_val1"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_05", "db_val5"));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Create an SST with SstFileWriter using trie UDI, containing mixed ops.
|
|
std::string sst_path = dbname_ + "/ingest.sst";
|
|
{
|
|
Options sst_options;
|
|
sst_options.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
BlockBasedTableOptions table_options;
|
|
table_options.user_defined_index_factory = trie_factory_;
|
|
sst_options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
|
|
SstFileWriter writer(EnvOptions(), sst_options);
|
|
ASSERT_OK(writer.Open(sst_path));
|
|
ASSERT_OK(writer.Put("key_02", "ingest_val2"));
|
|
ASSERT_OK(writer.Merge("key_03", "ingest_merge3"));
|
|
ASSERT_OK(writer.Delete("key_04"));
|
|
ASSERT_OK(writer.Put("key_06", "ingest_val6"));
|
|
ASSERT_OK(writer.Finish());
|
|
}
|
|
|
|
// Ingest into the live DB.
|
|
IngestExternalFileOptions ingest_opts;
|
|
ASSERT_OK(db_->IngestExternalFile({sst_path}, ingest_opts));
|
|
|
|
// Point lookups via both indexes — combined DB + ingested data.
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_01", "db_val1"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_02", "ingest_val2"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_03", "ingest_merge3"));
|
|
// key_04: ingested Delete tombstone, no prior value — NotFound.
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("key_04"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_05", "db_val5"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_06", "ingest_val6"));
|
|
|
|
// Forward scan via both indexes.
|
|
{
|
|
std::vector<std::string> expected = {"key_01", "key_02", "key_03", "key_05",
|
|
"key_06"};
|
|
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(expected));
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// WriteBatch test
|
|
// ============================================================================
|
|
|
|
TEST_F(TrieIndexDBTest, WriteBatchWithMixedOperations) {
|
|
// Verifies that a single WriteBatch containing multiple operation types
|
|
// (Put, Delete, Merge, SingleDelete, PutEntity) works correctly with the
|
|
// trie UDI. Verified through both indexes. Real-world workloads typically
|
|
// batch multiple operations.
|
|
options_.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
options_.disable_auto_compactions = true;
|
|
ASSERT_OK(OpenDB());
|
|
|
|
// Pre-populate a key that the batch's Delete will target.
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_02_del", "pre_val"));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Build a WriteBatch with all operation types.
|
|
WriteBatch wb;
|
|
ASSERT_OK(wb.Put(db_->DefaultColumnFamily(), "key_01_put", "batch_put"));
|
|
ASSERT_OK(wb.Delete(db_->DefaultColumnFamily(), "key_02_del"));
|
|
ASSERT_OK(wb.Merge(db_->DefaultColumnFamily(), "key_03_merge", "batch_m"));
|
|
// Put + SingleDelete within the same batch — they cancel out.
|
|
ASSERT_OK(wb.Put(db_->DefaultColumnFamily(), "key_04_sd", "sd_target"));
|
|
ASSERT_OK(wb.SingleDelete(db_->DefaultColumnFamily(), "key_04_sd"));
|
|
ASSERT_OK(wb.PutEntity(db_->DefaultColumnFamily(), "key_05_entity",
|
|
WideColumns{{"", "batch_entity"}}));
|
|
|
|
ASSERT_OK(db_->Write(WriteOptions(), &wb));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Point lookups via both indexes. Expected visible keys: key_01 (Put),
|
|
// key_03 (Merge), key_05 (PutEntity). key_02 deleted, key_04
|
|
// Put+SingleDelete cancel.
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_01_put", "batch_put"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("key_02_del"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_03_merge", "batch_m"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("key_04_sd"));
|
|
ASSERT_NO_FATAL_FAILURE(
|
|
VerifyGetBothIndexes("key_05_entity", "batch_entity"));
|
|
|
|
// Forward scan via both indexes.
|
|
{
|
|
std::vector<std::string> expected = {"key_01_put", "key_03_merge",
|
|
"key_05_entity"};
|
|
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(expected));
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// Large-scale test
|
|
// ============================================================================
|
|
|
|
TEST_F(TrieIndexDBTest, LargeMixedOperationsAcrossBlocks) {
|
|
// Large-scale test with many keys of different operation types and a small
|
|
// block size. This stresses block boundary handling in the trie UDI across
|
|
// Put, Delete, Merge, SingleDelete, and PutEntity entries. Verified through
|
|
// both indexes.
|
|
options_.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
options_.disable_auto_compactions = true;
|
|
// Small block size forces many data blocks, exercising the trie index's
|
|
// AddIndexEntry at frequent block boundaries.
|
|
ASSERT_OK(OpenDB(/*block_size=*/128));
|
|
|
|
const int kNumKeys = 500;
|
|
// Track keys expected to be visible after flush (non-deleted, non-SD'd).
|
|
std::vector<std::string> expected_visible;
|
|
|
|
for (int i = 0; i < kNumKeys; i++) {
|
|
char key_buf[32];
|
|
snprintf(key_buf, sizeof(key_buf), "key_%06d", i);
|
|
std::string key(key_buf);
|
|
|
|
// Distribute operation types:
|
|
// i%10 in [0,3] -> Put (40%)
|
|
// i%10 in [4,5] -> Delete (20%)
|
|
// i%10 in [6,7] -> Merge (20%)
|
|
// i%10 == 8 -> SingleDelete (10%, preceded by Put -- both cancel)
|
|
// i%10 == 9 -> PutEntity (10%)
|
|
int type = i % 10;
|
|
if (type <= 3) {
|
|
char val_buf[32];
|
|
snprintf(val_buf, sizeof(val_buf), "val_%06d", i);
|
|
ASSERT_OK(db_->Put(WriteOptions(), key, val_buf));
|
|
expected_visible.push_back(key);
|
|
} else if (type <= 5) {
|
|
ASSERT_OK(db_->Delete(WriteOptions(), key));
|
|
// Bare tombstone — not visible.
|
|
} else if (type <= 7) {
|
|
char val_buf[32];
|
|
snprintf(val_buf, sizeof(val_buf), "merge_%06d", i);
|
|
ASSERT_OK(db_->Merge(WriteOptions(), key, val_buf));
|
|
expected_visible.push_back(key);
|
|
} else if (type == 8) {
|
|
ASSERT_OK(db_->Put(WriteOptions(), key, "to_be_deleted"));
|
|
ASSERT_OK(db_->SingleDelete(WriteOptions(), key));
|
|
// Put + SingleDelete cancel — not visible.
|
|
} else {
|
|
ASSERT_OK(db_->PutEntity(WriteOptions(), db_->DefaultColumnFamily(), key,
|
|
WideColumns{{"", "entity_val"}}));
|
|
expected_visible.push_back(key);
|
|
}
|
|
}
|
|
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Forward scan via both indexes — verify exactly the expected visible keys.
|
|
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(expected_visible));
|
|
|
|
// Spot-check: Seek to every 10th visible key via both indexes.
|
|
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index");
|
|
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
for (size_t i = 0; i < expected_visible.size(); i += 10) {
|
|
iter->Seek(expected_visible[i]);
|
|
ASSERT_TRUE(iter->Valid()) << "Seek failed for " << expected_visible[i];
|
|
ASSERT_EQ(iter->key().ToString(), expected_visible[i]);
|
|
}
|
|
ASSERT_OK(iter->status());
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// Seqno side-table tests (same user key spanning data block boundaries)
|
|
// ============================================================================
|
|
|
|
TEST_F(TrieIndexDBTest, SameUserKeyAcrossBlockBoundaries) {
|
|
// Forces the same user key to appear in multiple data blocks by writing many
|
|
// versions with snapshots held to prevent garbage collection, using a tiny
|
|
// block_size. This exercises the trie's seqno side-table: the trie stores
|
|
// only one separator per user key, and the side-table records the seqno +
|
|
// overflow block count so that Seek() can find the correct data block for
|
|
// each version.
|
|
//
|
|
// Without the seqno side-table fix (PR #14412), reads through the trie index
|
|
// would return incorrect data when multiple versions of the same key span
|
|
// different data blocks.
|
|
options_.disable_auto_compactions = true;
|
|
// Tiny block_size (64 bytes) forces each version of the key into its own
|
|
// data block, creating same-user-key block boundaries that the trie must
|
|
// handle via the seqno side-table.
|
|
ASSERT_OK(OpenDB(/*block_size=*/64));
|
|
|
|
// Write multiple versions of the same key, holding snapshots so all versions
|
|
// survive the flush to a single SST file.
|
|
const std::string key = "same_key";
|
|
constexpr int kNumVersions = 10;
|
|
std::vector<const Snapshot*> snaps;
|
|
for (int i = 0; i < kNumVersions; i++) {
|
|
std::string val = "ver_" + std::to_string(i);
|
|
ASSERT_OK(db_->Put(WriteOptions(), key, val));
|
|
snaps.push_back(db_->GetSnapshot());
|
|
}
|
|
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Current view: latest version visible.
|
|
ASSERT_NO_FATAL_FAILURE(
|
|
VerifyGetBothIndexes(key, "ver_" + std::to_string(kNumVersions - 1)));
|
|
|
|
// Each snapshot should see the version written at or before its creation.
|
|
for (int i = 0; i < kNumVersions; i++) {
|
|
std::string expected_val = "ver_" + std::to_string(i);
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes(snaps[i], key, expected_val));
|
|
}
|
|
|
|
// Forward scan with each snapshot should return exactly one key with the
|
|
// correct version.
|
|
for (int i = 0; i < kNumVersions; i++) {
|
|
std::string expected_val = "ver_" + std::to_string(i);
|
|
std::vector<std::pair<std::string, std::string>> expected = {
|
|
{key, expected_val}};
|
|
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(snaps[i], expected));
|
|
}
|
|
|
|
// Seek to the key through the trie index with each snapshot — the trie's
|
|
// post-seek correction must advance through overflow blocks to find the
|
|
// correct version for each seqno.
|
|
for (int i = 0; i < kNumVersions; i++) {
|
|
std::string expected_val = "ver_" + std::to_string(i);
|
|
SCOPED_TRACE("snap=" + std::to_string(i));
|
|
ASSERT_NO_FATAL_FAILURE(
|
|
VerifySeekBothIndexes(snaps[i], key, key, expected_val));
|
|
}
|
|
|
|
for (auto* snap : snaps) {
|
|
db_->ReleaseSnapshot(snap);
|
|
}
|
|
}
|
|
|
|
TEST_F(TrieIndexDBTest, SameUserKeyPutThenDeleteAcrossBlocks) {
|
|
// Same user key with a Put followed by a Delete, where both entries land in
|
|
// different data blocks. A snapshot pins the Put version. After compaction,
|
|
// the current view shows NotFound while the snapshot view shows the Put.
|
|
// This tests the seqno side-table with mixed value types for the same key.
|
|
options_.disable_auto_compactions = true;
|
|
ASSERT_OK(OpenDB(/*block_size=*/64));
|
|
|
|
// Write a Put, take snapshot, then Delete.
|
|
ASSERT_OK(db_->Put(WriteOptions(), "del_key", "put_value"));
|
|
const Snapshot* snap = db_->GetSnapshot();
|
|
ASSERT_OK(db_->Delete(WriteOptions(), "del_key"));
|
|
|
|
// Add surrounding keys to create more data blocks and exercise trie
|
|
// separators around the duplicated key.
|
|
ASSERT_OK(db_->Put(WriteOptions(), "aaa_before", "before_val"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "zzz_after", "after_val"));
|
|
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Current view: del_key is deleted.
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("del_key"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("aaa_before", "before_val"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("zzz_after", "after_val"));
|
|
|
|
// Snapshot view: del_key is visible with the Put value.
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes(snap, "del_key", "put_value"));
|
|
|
|
// Seek to del_key with snapshot through both indexes.
|
|
ASSERT_NO_FATAL_FAILURE(
|
|
VerifySeekBothIndexes(snap, "del_key", "del_key", "put_value"));
|
|
|
|
// Compact to merge the Put + Delete. Snapshot prevents GC of the Put.
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
|
|
// After compaction, same behavior.
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("del_key"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes(snap, "del_key", "put_value"));
|
|
|
|
db_->ReleaseSnapshot(snap);
|
|
}
|
|
|
|
TEST_F(TrieIndexDBTest, SameUserKeyManyVersionsSeekCorrectness) {
|
|
// Writes many versions of three different keys (with snapshots), using a
|
|
// tiny block_size to force same-user-key block boundaries. Verifies that
|
|
// Seek + Get through the trie index returns the correct version for each
|
|
// snapshot, testing the seqno side-table's overflow handling with multiple
|
|
// keys interleaved in the SST.
|
|
options_.disable_auto_compactions = true;
|
|
ASSERT_OK(OpenDB(/*block_size=*/64));
|
|
|
|
const std::vector<std::string> keys = {"key_aaa", "key_mmm", "key_zzz"};
|
|
constexpr int kVersionsPerKey = 8;
|
|
// snaps[v] is taken after writing version v of all keys.
|
|
std::vector<const Snapshot*> snaps;
|
|
|
|
for (int v = 0; v < kVersionsPerKey; v++) {
|
|
for (const auto& k : keys) {
|
|
std::string val = k + "_v" + std::to_string(v);
|
|
ASSERT_OK(db_->Put(WriteOptions(), k, val));
|
|
}
|
|
snaps.push_back(db_->GetSnapshot());
|
|
}
|
|
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Verify each snapshot sees the correct version of each key via Get and Seek.
|
|
for (int v = 0; v < kVersionsPerKey; v++) {
|
|
for (const auto& k : keys) {
|
|
std::string expected_val = k + "_v" + std::to_string(v);
|
|
SCOPED_TRACE("key=" + k + " v=" + std::to_string(v));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes(snaps[v], k, expected_val));
|
|
ASSERT_NO_FATAL_FAILURE(
|
|
VerifySeekBothIndexes(snaps[v], k, k, expected_val));
|
|
}
|
|
}
|
|
|
|
// Compact and re-verify. Compaction must preserve all versions because
|
|
// snapshots are held.
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
|
|
for (int v = 0; v < kVersionsPerKey; v++) {
|
|
for (const auto& k : keys) {
|
|
std::string expected_val = k + "_v" + std::to_string(v);
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes(snaps[v], k, expected_val));
|
|
}
|
|
}
|
|
|
|
for (auto* snap : snaps) {
|
|
db_->ReleaseSnapshot(snap);
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// MultiGet test
|
|
// ============================================================================
|
|
|
|
TEST_F(TrieIndexDBTest, MultiGetWithTrieUDI) {
|
|
// Verifies that the batched MultiGet API works correctly with the trie UDI.
|
|
// MultiGet is a separate code path from single Get and uses batched block
|
|
// lookups, so it needs dedicated testing.
|
|
options_.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
options_.disable_auto_compactions = true;
|
|
ASSERT_OK(OpenDB(/*block_size=*/128));
|
|
|
|
// Write a mix of operation types.
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_01", "val_01"));
|
|
ASSERT_OK(db_->Merge(WriteOptions(), "key_02", "merge_02"));
|
|
ASSERT_OK(db_->Delete(WriteOptions(), "key_03"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_04", "val_04"));
|
|
ASSERT_OK(db_->PutEntity(WriteOptions(), db_->DefaultColumnFamily(), "key_05",
|
|
WideColumns{{"", "entity_05"}}));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_06", "val_06"));
|
|
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// MultiGet through both indexes.
|
|
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index");
|
|
|
|
std::vector<Slice> mg_keys = {"key_01", "key_02", "key_03",
|
|
"key_04", "key_05", "key_06",
|
|
"key_nonexistent"};
|
|
std::vector<std::string> mg_values(mg_keys.size());
|
|
std::vector<Status> mg_statuses = db_->MultiGet(ro, mg_keys, &mg_values);
|
|
|
|
ASSERT_EQ(mg_statuses.size(), mg_keys.size());
|
|
ASSERT_OK(mg_statuses[0]);
|
|
ASSERT_EQ(mg_values[0], "val_01");
|
|
ASSERT_OK(mg_statuses[1]);
|
|
ASSERT_EQ(mg_values[1], "merge_02");
|
|
ASSERT_TRUE(mg_statuses[2].IsNotFound());
|
|
ASSERT_OK(mg_statuses[3]);
|
|
ASSERT_EQ(mg_values[3], "val_04");
|
|
ASSERT_OK(mg_statuses[4]);
|
|
ASSERT_EQ(mg_values[4], "entity_05");
|
|
ASSERT_OK(mg_statuses[5]);
|
|
ASSERT_EQ(mg_values[5], "val_06");
|
|
ASSERT_TRUE(mg_statuses[6].IsNotFound());
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// WAL replay / crash recovery test
|
|
// ============================================================================
|
|
|
|
TEST_F(TrieIndexDBTest, WALReplayRecovery) {
|
|
// Writes data without flushing, then closes and reopens the DB. The data
|
|
// must be recovered from the WAL and then flushed. This tests that the trie
|
|
// UDI builder handles entries replayed from the WAL correctly.
|
|
options_.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
options_.disable_auto_compactions = true;
|
|
// WAL is enabled by default (WriteOptions::disableWAL = false).
|
|
ASSERT_OK(OpenDB());
|
|
|
|
// Write data — do NOT flush. Data lives only in the WAL + memtable.
|
|
ASSERT_OK(db_->Put(WriteOptions(), "wal_key_01", "wal_val_01"));
|
|
ASSERT_OK(db_->Merge(WriteOptions(), "wal_key_02", "wal_merge"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "wal_key_03", "wal_val_03"));
|
|
ASSERT_OK(db_->Delete(WriteOptions(), "wal_key_03"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "wal_key_04", "wal_val_04"));
|
|
|
|
// Close and reopen — triggers WAL replay.
|
|
ASSERT_OK(db_->Close());
|
|
db_.reset();
|
|
ASSERT_OK(OpenDB());
|
|
|
|
// After WAL replay, data should be in a memtable. Flush to create SST with
|
|
// the trie UDI.
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Verify data through both indexes.
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("wal_key_01", "wal_val_01"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("wal_key_02", "wal_merge"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("wal_key_03"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("wal_key_04", "wal_val_04"));
|
|
|
|
// Forward scan.
|
|
{
|
|
std::vector<std::pair<std::string, std::string>> expected = {
|
|
{"wal_key_01", "wal_val_01"},
|
|
{"wal_key_02", "wal_merge"},
|
|
{"wal_key_04", "wal_val_04"}};
|
|
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(expected));
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// Multiple column families test
|
|
// ============================================================================
|
|
|
|
TEST_F(TrieIndexDBTest, MultipleColumnFamilies) {
|
|
// Opens a DB with multiple column families, each using the trie UDI. Writes
|
|
// different data to each CF, flushes, and verifies reads through both indexes
|
|
// for each CF. This tests that the UDI builder/reader are correctly isolated
|
|
// per-CF.
|
|
options_.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
options_.disable_auto_compactions = true;
|
|
options_.create_if_missing = true;
|
|
|
|
BlockBasedTableOptions table_options;
|
|
table_options.user_defined_index_factory = trie_factory_;
|
|
options_.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
last_options_ = options_;
|
|
|
|
// Open with default CF first.
|
|
ASSERT_OK(DB::Open(options_, dbname_, &db_));
|
|
|
|
// Create two additional CFs with the same trie UDI options.
|
|
ColumnFamilyHandle* cf1 = nullptr;
|
|
ColumnFamilyHandle* cf2 = nullptr;
|
|
ASSERT_OK(db_->CreateColumnFamily(options_, "cf_one", &cf1));
|
|
ASSERT_OK(db_->CreateColumnFamily(options_, "cf_two", &cf2));
|
|
|
|
// Write different data to each CF.
|
|
ASSERT_OK(db_->Put(WriteOptions(), "default_key", "default_val"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), cf1, "cf1_key_a", "cf1_val_a"));
|
|
ASSERT_OK(db_->Merge(WriteOptions(), cf1, "cf1_key_b", "cf1_merge"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), cf2, "cf2_key_x", "cf2_val_x"));
|
|
ASSERT_OK(db_->Delete(WriteOptions(), cf2, "cf2_key_y"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), cf2, "cf2_key_z", "cf2_val_z"));
|
|
|
|
// Flush all CFs.
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
ASSERT_OK(db_->Flush(FlushOptions(), cf1));
|
|
ASSERT_OK(db_->Flush(FlushOptions(), cf2));
|
|
|
|
// Verify default CF.
|
|
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index");
|
|
std::string value;
|
|
ASSERT_OK(db_->Get(ro, "default_key", &value));
|
|
ASSERT_EQ(value, "default_val");
|
|
}
|
|
|
|
// Verify cf_one through both indexes.
|
|
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index");
|
|
std::string value;
|
|
ASSERT_OK(db_->Get(ro, cf1, "cf1_key_a", &value));
|
|
ASSERT_EQ(value, "cf1_val_a");
|
|
ASSERT_OK(db_->Get(ro, cf1, "cf1_key_b", &value));
|
|
ASSERT_EQ(value, "cf1_merge");
|
|
}
|
|
|
|
// Verify cf_two through both indexes.
|
|
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index");
|
|
std::string value;
|
|
ASSERT_OK(db_->Get(ro, cf2, "cf2_key_x", &value));
|
|
ASSERT_EQ(value, "cf2_val_x");
|
|
ASSERT_TRUE(db_->Get(ro, cf2, "cf2_key_y", &value).IsNotFound());
|
|
ASSERT_OK(db_->Get(ro, cf2, "cf2_key_z", &value));
|
|
ASSERT_EQ(value, "cf2_val_z");
|
|
}
|
|
|
|
// Forward scan on each CF via both indexes.
|
|
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index");
|
|
|
|
// cf_one scan.
|
|
std::unique_ptr<Iterator> it1(db_->NewIterator(ro, cf1));
|
|
it1->SeekToFirst();
|
|
ASSERT_TRUE(it1->Valid());
|
|
ASSERT_EQ(it1->key().ToString(), "cf1_key_a");
|
|
it1->Next();
|
|
ASSERT_TRUE(it1->Valid());
|
|
ASSERT_EQ(it1->key().ToString(), "cf1_key_b");
|
|
it1->Next();
|
|
ASSERT_FALSE(it1->Valid());
|
|
ASSERT_OK(it1->status());
|
|
|
|
// cf_two scan.
|
|
std::unique_ptr<Iterator> it2(db_->NewIterator(ro, cf2));
|
|
it2->SeekToFirst();
|
|
ASSERT_TRUE(it2->Valid());
|
|
ASSERT_EQ(it2->key().ToString(), "cf2_key_x");
|
|
it2->Next();
|
|
ASSERT_TRUE(it2->Valid());
|
|
ASSERT_EQ(it2->key().ToString(), "cf2_key_z");
|
|
it2->Next();
|
|
ASSERT_FALSE(it2->Valid());
|
|
ASSERT_OK(it2->status());
|
|
}
|
|
|
|
// Clean up CF handles before closing.
|
|
ASSERT_OK(db_->DestroyColumnFamilyHandle(cf1));
|
|
ASSERT_OK(db_->DestroyColumnFamilyHandle(cf2));
|
|
|
|
// Close the DB. Need to clear db_ first since TearDown will also close.
|
|
ASSERT_OK(db_->Close());
|
|
db_.reset();
|
|
|
|
// Reopen with all CFs to verify persistence.
|
|
{
|
|
std::vector<ColumnFamilyDescriptor> cf_descs = {
|
|
ColumnFamilyDescriptor(kDefaultColumnFamilyName, options_),
|
|
ColumnFamilyDescriptor("cf_one", options_),
|
|
ColumnFamilyDescriptor("cf_two", options_)};
|
|
std::vector<ColumnFamilyHandle*> cf_handles;
|
|
std::unique_ptr<DB> reopen_db;
|
|
ASSERT_OK(DB::Open(options_, dbname_, cf_descs, &cf_handles, &reopen_db));
|
|
db_ = std::move(reopen_db);
|
|
|
|
// Verify data survives reopen.
|
|
auto ro = TrieIndexReadOptions();
|
|
std::string value;
|
|
ASSERT_OK(db_->Get(ro, cf_handles[0], "default_key", &value));
|
|
ASSERT_EQ(value, "default_val");
|
|
ASSERT_OK(db_->Get(ro, cf_handles[1], "cf1_key_a", &value));
|
|
ASSERT_EQ(value, "cf1_val_a");
|
|
ASSERT_OK(db_->Get(ro, cf_handles[2], "cf2_key_z", &value));
|
|
ASSERT_EQ(value, "cf2_val_z");
|
|
|
|
for (auto* h : cf_handles) {
|
|
ASSERT_OK(db_->DestroyColumnFamilyHandle(h));
|
|
}
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// BatchedPrefixScan: reproduces the test_batches_snapshots pattern.
|
|
//
|
|
// Writes batches of 10 keys {digit+key_body : value_body+digit} for digit in
|
|
// 0..9, exactly as the crash-test stress tool does. Then scans each prefix
|
|
// concurrently (same snapshot) and checks that:
|
|
// (a) all 10 iterators yield the same key bodies in lockstep, and
|
|
// (b) the values stripped of the trailing digit are identical across
|
|
// prefixes.
|
|
//
|
|
// We run with both the standard index and the trie index and compare.
|
|
// ---------------------------------------------------------------------------
|
|
TEST_F(TrieIndexDBTest, BatchedPrefixScan) {
|
|
// Small block size to force many data blocks (and thus many trie entries).
|
|
ASSERT_OK(OpenDB(/*block_size=*/256));
|
|
|
|
const int kNumBatches = 200;
|
|
const int kNumPrefixes = 10;
|
|
Random rnd(42);
|
|
|
|
// Phase 1: Write batches.
|
|
for (int b = 0; b < kNumBatches; ++b) {
|
|
WriteBatch batch;
|
|
std::string key_body = MakeKeyBody(b);
|
|
std::string value_body = rnd.RandomString(20);
|
|
|
|
for (int d = kNumPrefixes - 1; d >= 0; --d) {
|
|
std::string k = std::to_string(d) + key_body;
|
|
std::string v = value_body + std::to_string(d);
|
|
ASSERT_OK(batch.Put(k, v));
|
|
}
|
|
ASSERT_OK(db_->Write(WriteOptions(), &batch));
|
|
}
|
|
|
|
// Flush so data is in SSTs with trie index.
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Phase 2: Prefix scan with both indexes.
|
|
for (int idx_type = 0; idx_type < 2; ++idx_type) {
|
|
ReadOptions base_ro =
|
|
idx_type == 0 ? StandardIndexReadOptions() : TrieIndexReadOptions();
|
|
SCOPED_TRACE(idx_type == 0 ? "standard index" : "trie index");
|
|
|
|
const Snapshot* snap = db_->GetSnapshot();
|
|
base_ro.snapshot = snap;
|
|
|
|
uint64_t count = VerifyPrefixScanLockstep(base_ro, kNumPrefixes,
|
|
/*use_upper_bounds=*/true,
|
|
/*verify_values=*/true);
|
|
ASSERT_EQ(count, static_cast<uint64_t>(kNumBatches))
|
|
<< "expected " << kNumBatches << " entries per prefix";
|
|
|
|
db_->ReleaseSnapshot(snap);
|
|
}
|
|
}
|
|
|
|
// Same as above but with multiple flushes, compaction, and a DB reopen
|
|
// in between to simulate the crash-recovery path.
|
|
TEST_F(TrieIndexDBTest, BatchedPrefixScanAfterReopen) {
|
|
ASSERT_OK(OpenDB(/*block_size=*/256));
|
|
|
|
const int kNumBatches = 100;
|
|
const int kNumPrefixes = 10;
|
|
Random rnd(123);
|
|
|
|
for (int b = 0; b < kNumBatches; ++b) {
|
|
WriteBatch batch;
|
|
std::string key_body = MakeKeyBody(b);
|
|
std::string value_body = rnd.RandomString(20);
|
|
|
|
for (int d = kNumPrefixes - 1; d >= 0; --d) {
|
|
std::string k = std::to_string(d) + key_body;
|
|
std::string v = value_body + std::to_string(d);
|
|
ASSERT_OK(batch.Put(k, v));
|
|
}
|
|
ASSERT_OK(db_->Write(WriteOptions(), &batch));
|
|
|
|
// Flush every 20 batches to create multiple SSTs.
|
|
if ((b + 1) % 20 == 0) {
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
}
|
|
}
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Compact to merge SSTs.
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
|
|
// Close and reopen (simulating recovery).
|
|
ASSERT_OK(db_->Close());
|
|
db_.reset();
|
|
ASSERT_OK(OpenDB(/*block_size=*/256));
|
|
|
|
// Prefix scan with trie index after reopen.
|
|
ReadOptions base_ro = TrieIndexReadOptions();
|
|
const Snapshot* snap = db_->GetSnapshot();
|
|
base_ro.snapshot = snap;
|
|
|
|
uint64_t count =
|
|
VerifyPrefixScanLockstep(base_ro, kNumPrefixes, /*use_upper_bounds=*/true,
|
|
/*verify_values=*/false);
|
|
ASSERT_EQ(count, static_cast<uint64_t>(kNumBatches));
|
|
db_->ReleaseSnapshot(snap);
|
|
}
|
|
|
|
// Test with overwrites: multiple writes to the same key body, ensuring
|
|
// the latest value is consistent across all prefixes.
|
|
TEST_F(TrieIndexDBTest, BatchedPrefixScanWithOverwrites) {
|
|
ASSERT_OK(OpenDB(/*block_size=*/256));
|
|
|
|
const int kNumKeys = 50;
|
|
const int kNumOverwrites = 5;
|
|
const int kNumPrefixes = 10;
|
|
Random rnd(999);
|
|
|
|
// Write each key body multiple times.
|
|
for (int round = 0; round < kNumOverwrites; ++round) {
|
|
for (int k = 0; k < kNumKeys; ++k) {
|
|
WriteBatch batch;
|
|
std::string key_body = MakeKeyBody(k);
|
|
std::string value_body = rnd.RandomString(20);
|
|
|
|
for (int d = kNumPrefixes - 1; d >= 0; --d) {
|
|
std::string key = std::to_string(d) + key_body;
|
|
std::string v = value_body + std::to_string(d);
|
|
ASSERT_OK(batch.Put(key, v));
|
|
}
|
|
ASSERT_OK(db_->Write(WriteOptions(), &batch));
|
|
}
|
|
|
|
// Flush after each round.
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
}
|
|
|
|
// Now verify with both indexes.
|
|
for (int idx_type = 0; idx_type < 2; ++idx_type) {
|
|
ReadOptions base_ro =
|
|
idx_type == 0 ? StandardIndexReadOptions() : TrieIndexReadOptions();
|
|
SCOPED_TRACE(idx_type == 0 ? "standard index" : "trie index");
|
|
|
|
const Snapshot* snap = db_->GetSnapshot();
|
|
base_ro.snapshot = snap;
|
|
|
|
uint64_t count = VerifyPrefixScanLockstep(base_ro, kNumPrefixes,
|
|
/*use_upper_bounds=*/false,
|
|
/*verify_values=*/true);
|
|
ASSERT_EQ(count, static_cast<uint64_t>(kNumKeys));
|
|
db_->ReleaseSnapshot(snap);
|
|
}
|
|
}
|
|
|
|
// Stress-like test: write + delete + rewrite many keys, flush between rounds,
|
|
// then verify prefix scan consistency. Simulates the crash test pattern that
|
|
// triggered failures.
|
|
TEST_F(TrieIndexDBTest, BatchedPrefixScanStressLike) {
|
|
ASSERT_OK(OpenDB(/*block_size=*/4096));
|
|
|
|
const int kMaxKey = 10000;
|
|
const int kNumPrefixes = 10;
|
|
const int kNumRounds = 20;
|
|
Random rnd(7777);
|
|
|
|
for (int round = 0; round < kNumRounds; ++round) {
|
|
// Write a batch of random keys
|
|
int num_writes = 100 + rnd.Uniform(200);
|
|
for (int w = 0; w < num_writes; ++w) {
|
|
int k = rnd.Uniform(kMaxKey);
|
|
WriteBatch batch;
|
|
std::string key_body = MakeKeyBody(k);
|
|
std::string value_body = rnd.RandomString(rnd.Uniform(60) + 4);
|
|
for (int d = kNumPrefixes - 1; d >= 0; --d) {
|
|
std::string key = std::to_string(d) + key_body;
|
|
std::string v = value_body + std::to_string(d);
|
|
ASSERT_OK(batch.Put(key, v));
|
|
}
|
|
ASSERT_OK(db_->Write(WriteOptions(), &batch));
|
|
}
|
|
|
|
// Delete some random keys
|
|
int num_deletes = 50 + rnd.Uniform(100);
|
|
for (int w = 0; w < num_deletes; ++w) {
|
|
int k = rnd.Uniform(kMaxKey);
|
|
WriteBatch batch;
|
|
std::string key_body = MakeKeyBody(k);
|
|
for (int d = kNumPrefixes - 1; d >= 0; --d) {
|
|
std::string key = std::to_string(d) + key_body;
|
|
ASSERT_OK(batch.Delete(key));
|
|
}
|
|
ASSERT_OK(db_->Write(WriteOptions(), &batch));
|
|
}
|
|
|
|
// Flush every few rounds
|
|
if (round % 3 == 0) {
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
}
|
|
|
|
// Verify prefix scan consistency with trie index.
|
|
{
|
|
ReadOptions base_ro = TrieIndexReadOptions();
|
|
const Snapshot* snap = db_->GetSnapshot();
|
|
base_ro.snapshot = snap;
|
|
|
|
VerifyPrefixScanLockstep(base_ro, kNumPrefixes,
|
|
/*use_upper_bounds=*/false,
|
|
/*verify_values=*/true,
|
|
"round=" + std::to_string(round));
|
|
|
|
db_->ReleaseSnapshot(snap);
|
|
}
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Regression test for the FindShortSuccessor last-block bug.
|
|
//
|
|
// Before the fix, TrieIndexBuilder::AddIndexEntry called
|
|
// FindShortSuccessor() on the last block's separator key, producing a
|
|
// shorter key that covered a wider range than the actual data. For example,
|
|
// if the last key's user key was "9\xff\xff", FindShortSuccessor would
|
|
// produce ":" (0x3A), making the trie claim it covers keys up to ":". A
|
|
// seek for "9\xff\xff\x01" (between the real last key and ":") would find a
|
|
// block via the trie but not via the standard index, causing prefix scan
|
|
// iterators to desynchronize.
|
|
//
|
|
// The standard ShortenedIndexBuilder (with default kShortenSeparators mode)
|
|
// does NOT call FindShortSuccessor on the last block — it uses the last key
|
|
// as-is. The fix makes the trie builder match this behavior.
|
|
// ---------------------------------------------------------------------------
|
|
TEST_F(TrieIndexDBTest, LastBlockSeparatorNotShortened) {
|
|
// Use a small block size so each key lands in its own block.
|
|
ASSERT_OK(OpenDB(/*block_size=*/32));
|
|
|
|
// Write keys where the last key has trailing 0xFF bytes, which
|
|
// FindShortSuccessor would shorten by incrementing the byte before the
|
|
// 0xFF suffix ("9\xff\xff" -> ":").
|
|
ASSERT_OK(db_->Put(WriteOptions(), "1aaa", "v1"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "5bbb", "v2"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), std::string("9\xff\xff", 3), "v3"));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// The key "9\xff\xff\x01" is lexicographically after "9\xff\xff" but
|
|
// before ":" (0x3A). With the old bug, the trie would return a valid
|
|
// block for this key. With the fix, both indexes correctly say "not
|
|
// found".
|
|
std::string seek_target = std::string("9\xff\xff\x01", 4);
|
|
|
|
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index");
|
|
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
|
|
iter->Seek(seek_target);
|
|
ASSERT_TRUE(!iter->Valid()) << "Expected no key at or after seek_target, "
|
|
<< "but got: " << iter->key().ToString(true);
|
|
ASSERT_OK(iter->status());
|
|
}
|
|
|
|
// Also verify the actual last key is still findable.
|
|
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index");
|
|
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
|
|
iter->Seek(std::string("9\xff\xff", 3));
|
|
ASSERT_TRUE(iter->Valid());
|
|
ASSERT_EQ(iter->key().ToString(), std::string("9\xff\xff", 3));
|
|
ASSERT_EQ(iter->value().ToString(), "v3");
|
|
|
|
// After this key, there should be nothing more.
|
|
iter->Next();
|
|
ASSERT_TRUE(!iter->Valid());
|
|
ASSERT_OK(iter->status());
|
|
}
|
|
}
|
|
|
|
// Variant: tests that when deletes remove the last key, seeking past the last
|
|
// remaining key correctly returns "not found" with both indexes.
|
|
TEST_F(TrieIndexDBTest, LastBlockSeparatorWithDeletes) {
|
|
ASSERT_OK(OpenDB(/*block_size=*/32));
|
|
|
|
// Write and flush initial data.
|
|
ASSERT_OK(db_->Put(WriteOptions(), "1aaa", "v1"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "5bbb", "v2"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), std::string("9\xff\xff", 3), "v3"));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Delete the last key and flush (creates a tombstone in a new SST).
|
|
ASSERT_OK(db_->Delete(WriteOptions(), std::string("9\xff\xff", 3)));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Now seeking for the deleted key should yield "5bbb" or nothing,
|
|
// depending on the seek target. Both indexes must agree.
|
|
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index");
|
|
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
|
|
// Seek to the deleted key — should skip it and land on nothing (it was
|
|
// the last key).
|
|
iter->Seek(std::string("9\xff\xff", 3));
|
|
ASSERT_TRUE(!iter->Valid())
|
|
<< "Deleted key should not be visible, but got: "
|
|
<< iter->key().ToString(true);
|
|
ASSERT_OK(iter->status());
|
|
|
|
// Seek to a key between "5bbb" and the deleted key — should find "5bbb"
|
|
// or nothing depending on order. Actually, "6" > "5bbb" and "6" <
|
|
// "9\xff\xff", so seeking "6" should find nothing since there's no key
|
|
// >= "6" that's still alive.
|
|
iter->Seek("6");
|
|
ASSERT_TRUE(!iter->Valid()) << "No live key >= '6' should exist, but got: "
|
|
<< iter->key().ToString(true);
|
|
ASSERT_OK(iter->status());
|
|
}
|
|
|
|
// Compact to merge the tombstone, then verify again.
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
|
|
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index");
|
|
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
|
|
iter->SeekToFirst();
|
|
ASSERT_TRUE(iter->Valid());
|
|
ASSERT_EQ(iter->key().ToString(), "1aaa");
|
|
iter->Next();
|
|
ASSERT_TRUE(iter->Valid());
|
|
ASSERT_EQ(iter->key().ToString(), "5bbb");
|
|
iter->Next();
|
|
ASSERT_TRUE(!iter->Valid());
|
|
ASSERT_OK(iter->status());
|
|
}
|
|
}
|
|
|
|
// Single-entry SST: the trie has exactly one leaf. Validates that Seek,
|
|
// SeekToFirst, Next, and Get all work with a one-block, one-key SST.
|
|
TEST_F(TrieIndexDBTest, SingleEntrySST) {
|
|
ASSERT_OK(OpenDB());
|
|
ASSERT_OK(db_->Put(WriteOptions(), "only_key", "only_val"));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Point lookup.
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("only_key", "only_val"));
|
|
|
|
// Forward scan: exactly one result.
|
|
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(
|
|
std::vector<std::pair<std::string, std::string>>{
|
|
{"only_key", "only_val"}}));
|
|
|
|
// Seek to the exact key.
|
|
ASSERT_NO_FATAL_FAILURE(
|
|
VerifySeekBothIndexes("only_key", "only_key", "only_val"));
|
|
|
|
// Seek before the key — should land on it.
|
|
ASSERT_NO_FATAL_FAILURE(VerifySeekBothIndexes("a", "only_key", "only_val"));
|
|
|
|
// Seek past the key — should be invalid.
|
|
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index");
|
|
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
iter->Seek("z");
|
|
ASSERT_FALSE(iter->Valid());
|
|
ASSERT_OK(iter->status());
|
|
}
|
|
}
|
|
|
|
// Deletion-only SST: flush a Put, then flush a Delete for that key so the
|
|
// second SST contains only a tombstone. After compaction, the key is gone.
|
|
TEST_F(TrieIndexDBTest, DeletionOnlySST) {
|
|
ASSERT_OK(OpenDB());
|
|
|
|
// Flush 1: a real Put.
|
|
ASSERT_OK(db_->Put(WriteOptions(), "del_target", "val"));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Flush 2: only a Delete — this creates an SST whose only entry is a
|
|
// tombstone (the trie still builds an index for the block containing it).
|
|
ASSERT_OK(db_->Delete(WriteOptions(), "del_target"));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// The tombstone hides the Put.
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("del_target"));
|
|
|
|
// Forward scan: nothing visible.
|
|
ASSERT_NO_FATAL_FAILURE(
|
|
VerifyForwardScanBothIndexes(std::vector<std::string>{}));
|
|
|
|
// Compact to merge: key is fully removed.
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("del_target"));
|
|
ASSERT_NO_FATAL_FAILURE(
|
|
VerifyForwardScanBothIndexes(std::vector<std::string>{}));
|
|
}
|
|
|
|
// All-same-key SST: multiple versions of the same user key (via snapshots)
|
|
// land in the same SST, possibly spanning multiple blocks. Validates that
|
|
// the trie's same-key-run handling (seqno-based separators) works at the
|
|
// DB level through both indexes.
|
|
TEST_F(TrieIndexDBTest, AllSameKeySST) {
|
|
options_.disable_auto_compactions = true;
|
|
// Small block size to force multiple blocks for the same user key.
|
|
ASSERT_OK(OpenDB(/*block_size=*/32));
|
|
|
|
// Write several versions of the same key with snapshots to prevent GC.
|
|
std::vector<const Snapshot*> snaps;
|
|
for (int i = 0; i < 10; i++) {
|
|
ASSERT_OK(db_->Put(WriteOptions(), "same_key", "val_" + std::to_string(i)));
|
|
snaps.push_back(db_->GetSnapshot());
|
|
}
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Latest value is visible.
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("same_key", "val_9"));
|
|
|
|
// Forward scan: only the latest version is visible (without snapshot).
|
|
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(
|
|
std::vector<std::pair<std::string, std::string>>{{"same_key", "val_9"}}));
|
|
|
|
// Each snapshot should see the correct version.
|
|
for (int i = 0; i < 10; i++) {
|
|
SCOPED_TRACE("snapshot " + std::to_string(i));
|
|
std::string expected = "val_" + std::to_string(i);
|
|
ASSERT_NO_FATAL_FAILURE(
|
|
VerifyGetBothIndexes(snaps[i], "same_key", expected));
|
|
|
|
// Forward scan with snapshot.
|
|
ASSERT_NO_FATAL_FAILURE(
|
|
VerifyForwardScanBothIndexes(snaps[i], {{"same_key", expected}}));
|
|
}
|
|
|
|
// Seek with earliest snapshot — should find the earliest version.
|
|
ASSERT_NO_FATAL_FAILURE(
|
|
VerifySeekBothIndexes(snaps[0], "same_key", "same_key", "val_0"));
|
|
|
|
for (auto* snap : snaps) {
|
|
db_->ReleaseSnapshot(snap);
|
|
}
|
|
}
|
|
|
|
// Operations on a completely empty DB: nothing should crash, and after
|
|
// creating + deleting all data, the DB should correctly return nothing.
|
|
TEST_F(TrieIndexDBTest, EmptyDBOperations) {
|
|
ASSERT_OK(OpenDB());
|
|
|
|
// Get / Seek / SeekToFirst on empty memtable (no SSTs yet).
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("anything"));
|
|
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard");
|
|
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
iter->SeekToFirst();
|
|
ASSERT_FALSE(iter->Valid());
|
|
ASSERT_OK(iter->status());
|
|
iter->Seek("anything");
|
|
ASSERT_FALSE(iter->Valid());
|
|
ASSERT_OK(iter->status());
|
|
}
|
|
|
|
// Create an SST, delete its only key, compact → DB has no live data but
|
|
// the trie code path was exercised during flush.
|
|
ASSERT_OK(db_->Put(WriteOptions(), "temp", "val"));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
ASSERT_OK(db_->Delete(WriteOptions(), "temp"));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("temp"));
|
|
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard");
|
|
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
iter->SeekToFirst();
|
|
ASSERT_FALSE(iter->Valid());
|
|
ASSERT_OK(iter->status());
|
|
}
|
|
}
|
|
|
|
// Focused seek-pattern tests: before all data, between blocks, exact match,
|
|
// after all data, and empty-key seek.
|
|
TEST_F(TrieIndexDBTest, SeekEdgeCases) {
|
|
ASSERT_OK(OpenDB(/*block_size=*/64));
|
|
|
|
// Write keys with deliberate gaps.
|
|
for (const auto& k : {"bbb", "ddd", "fff", "hhh"}) {
|
|
ASSERT_OK(db_->Put(WriteOptions(), k, std::string("v_") + k));
|
|
}
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard");
|
|
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
|
|
// Before first key.
|
|
iter->Seek("aaa");
|
|
ASSERT_TRUE(iter->Valid());
|
|
ASSERT_EQ(iter->key().ToString(), "bbb");
|
|
|
|
// Exact first key.
|
|
iter->Seek("bbb");
|
|
ASSERT_TRUE(iter->Valid());
|
|
ASSERT_EQ(iter->key().ToString(), "bbb");
|
|
|
|
// Between keys.
|
|
iter->Seek("ccc");
|
|
ASSERT_TRUE(iter->Valid());
|
|
ASSERT_EQ(iter->key().ToString(), "ddd");
|
|
|
|
// Between keys (eee → fff).
|
|
iter->Seek("eee");
|
|
ASSERT_TRUE(iter->Valid());
|
|
ASSERT_EQ(iter->key().ToString(), "fff");
|
|
|
|
// Exact last key.
|
|
iter->Seek("hhh");
|
|
ASSERT_TRUE(iter->Valid());
|
|
ASSERT_EQ(iter->key().ToString(), "hhh");
|
|
|
|
// After last key.
|
|
iter->Seek("zzz");
|
|
ASSERT_FALSE(iter->Valid());
|
|
ASSERT_OK(iter->status());
|
|
|
|
// Empty key (smallest possible key for BytewiseComparator).
|
|
iter->Seek("");
|
|
ASSERT_TRUE(iter->Valid());
|
|
ASSERT_EQ(iter->key().ToString(), "bbb");
|
|
}
|
|
}
|
|
|
|
// PutEntity + GetEntity through the trie index read path.
|
|
TEST_F(TrieIndexDBTest, GetEntityWithTrieUDI) {
|
|
ASSERT_OK(OpenDB());
|
|
|
|
// PutEntity with wide columns.
|
|
WideColumns columns{
|
|
{kDefaultWideColumnName, "default_val"},
|
|
{"col_a", "val_a"},
|
|
{"col_b", "val_b"},
|
|
};
|
|
ASSERT_OK(db_->PutEntity(WriteOptions(), db_->DefaultColumnFamily(),
|
|
"entity_key", columns));
|
|
// Also a regular Put to verify GetEntity reads it as a single default column.
|
|
ASSERT_OK(db_->Put(WriteOptions(), "regular_key", "regular_val"));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard");
|
|
|
|
// GetEntity on a PutEntity key.
|
|
PinnableWideColumns result;
|
|
ASSERT_OK(
|
|
db_->GetEntity(ro, db_->DefaultColumnFamily(), "entity_key", &result));
|
|
ASSERT_EQ(result.columns().size(), 3u);
|
|
ASSERT_EQ(result.columns()[0].name(), kDefaultWideColumnName);
|
|
ASSERT_EQ(result.columns()[0].value(), "default_val");
|
|
ASSERT_EQ(result.columns()[1].name(), "col_a");
|
|
ASSERT_EQ(result.columns()[1].value(), "val_a");
|
|
ASSERT_EQ(result.columns()[2].name(), "col_b");
|
|
ASSERT_EQ(result.columns()[2].value(), "val_b");
|
|
|
|
// GetEntity on a regular Put key returns single default column.
|
|
PinnableWideColumns result2;
|
|
ASSERT_OK(db_->GetEntity(ro, db_->DefaultColumnFamily(), "regular_key",
|
|
&result2));
|
|
ASSERT_EQ(result2.columns().size(), 1u);
|
|
ASSERT_EQ(result2.columns()[0].name(), kDefaultWideColumnName);
|
|
ASSERT_EQ(result2.columns()[0].value(), "regular_val");
|
|
|
|
// GetEntity on nonexistent key.
|
|
PinnableWideColumns result3;
|
|
ASSERT_TRUE(
|
|
db_->GetEntity(ro, db_->DefaultColumnFamily(), "no_such_key", &result3)
|
|
.IsNotFound());
|
|
}
|
|
}
|
|
|
|
// Multiple overlapping L0 SSTs: the level iterator must coordinate trie
|
|
// iterators across multiple SST files with overlapping key ranges.
|
|
TEST_F(TrieIndexDBTest, OverlappingL0SSTs) {
|
|
options_.disable_auto_compactions = true;
|
|
ASSERT_OK(OpenDB(/*block_size=*/128));
|
|
|
|
// SST1: keys 00..49.
|
|
for (int i = 0; i < 50; i++) {
|
|
char key[16];
|
|
snprintf(key, sizeof(key), "key_%03d", i);
|
|
ASSERT_OK(db_->Put(WriteOptions(), key, "sst1_" + std::to_string(i)));
|
|
}
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// SST2: keys 25..74 (overlapping with SST1).
|
|
for (int i = 25; i < 75; i++) {
|
|
char key[16];
|
|
snprintf(key, sizeof(key), "key_%03d", i);
|
|
ASSERT_OK(db_->Put(WriteOptions(), key, "sst2_" + std::to_string(i)));
|
|
}
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// SST3: keys 50..99 (overlapping with SST2).
|
|
for (int i = 50; i < 100; i++) {
|
|
char key[16];
|
|
snprintf(key, sizeof(key), "key_%03d", i);
|
|
ASSERT_OK(db_->Put(WriteOptions(), key, "sst3_" + std::to_string(i)));
|
|
}
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Verify: latest writer wins for overlapping keys.
|
|
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard");
|
|
auto kvs = ScanAllKeyValues(ro);
|
|
ASSERT_EQ(kvs.size(), 100u);
|
|
for (int i = 0; i < 100; i++) {
|
|
char key[16];
|
|
snprintf(key, sizeof(key), "key_%03d", i);
|
|
ASSERT_EQ(kvs[i].first, key);
|
|
if (i < 25) {
|
|
ASSERT_EQ(kvs[i].second, "sst1_" + std::to_string(i));
|
|
} else if (i < 50) {
|
|
ASSERT_EQ(kvs[i].second, "sst2_" + std::to_string(i));
|
|
} else {
|
|
ASSERT_EQ(kvs[i].second, "sst3_" + std::to_string(i));
|
|
}
|
|
}
|
|
}
|
|
|
|
// Compact all L0 → L1, re-verify.
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard");
|
|
ASSERT_EQ(ScanAllKeyValues(ro).size(), 100u);
|
|
}
|
|
}
|
|
|
|
// CompactRange with a sub-range: only part of the key space is compacted.
|
|
TEST_F(TrieIndexDBTest, CompactRangeSubset) {
|
|
options_.disable_auto_compactions = true;
|
|
ASSERT_OK(OpenDB(/*block_size=*/128));
|
|
|
|
for (int i = 0; i < 26; i++) {
|
|
char key[16];
|
|
snprintf(key, sizeof(key), "key_%c", 'a' + i);
|
|
ASSERT_OK(db_->Put(WriteOptions(), key, "val_" + std::to_string(i)));
|
|
}
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Compact only the middle range [key_f, key_p).
|
|
std::string begin = "key_f";
|
|
std::string end = "key_p";
|
|
Slice begin_s(begin);
|
|
Slice end_s(end);
|
|
CompactRangeOptions cro;
|
|
ASSERT_OK(db_->CompactRange(cro, &begin_s, &end_s));
|
|
|
|
// All 26 keys should still be readable.
|
|
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard");
|
|
ASSERT_EQ(ScanAllKeys(ro).size(), 26u);
|
|
}
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_a", "val_0"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_z", "val_25"));
|
|
}
|
|
|
|
// Write keys, delete all of them, compact. The DB should be empty.
|
|
TEST_F(TrieIndexDBTest, AllKeysDeletedCompaction) {
|
|
ASSERT_OK(OpenDB());
|
|
|
|
for (int i = 0; i < 20; i++) {
|
|
char key[16];
|
|
snprintf(key, sizeof(key), "key_%02d", i);
|
|
ASSERT_OK(db_->Put(WriteOptions(), key, "val"));
|
|
}
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Delete all keys.
|
|
for (int i = 0; i < 20; i++) {
|
|
char key[16];
|
|
snprintf(key, sizeof(key), "key_%02d", i);
|
|
ASSERT_OK(db_->Delete(WriteOptions(), key));
|
|
}
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Before compaction: tombstones hide all keys.
|
|
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard");
|
|
ASSERT_EQ(ScanAllKeys(ro).size(), 0u);
|
|
}
|
|
|
|
// After compaction: all tombstones and data are gone.
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard");
|
|
ASSERT_EQ(ScanAllKeys(ro).size(), 0u);
|
|
}
|
|
}
|
|
|
|
// Keys with special byte values: 0x00, 0xFF, embedded nulls, very short keys.
|
|
// These exercise trie byte-traversal edge cases.
|
|
TEST_F(TrieIndexDBTest, BinaryKeyEdgeCases) {
|
|
ASSERT_OK(OpenDB(/*block_size=*/64));
|
|
|
|
// All keys in sorted order (BytewiseComparator).
|
|
std::vector<std::pair<std::string, std::string>> kvs = {
|
|
{std::string("\x00", 1), "val_null"},
|
|
{std::string("\x00\x00\x00", 3), "val_triple_null"},
|
|
{std::string("\x01", 1), "val_0x01"},
|
|
{"a", "val_a"},
|
|
{std::string("a\x00"
|
|
"b",
|
|
3),
|
|
"val_a_null_b"},
|
|
{"mid", "val_mid"},
|
|
{std::string("\xfe", 1), "val_0xfe"},
|
|
{std::string("\xff", 1), "val_0xff"},
|
|
{std::string("\xff\xff\xff", 3), "val_triple_ff"},
|
|
};
|
|
|
|
for (const auto& kv : kvs) {
|
|
ASSERT_OK(db_->Put(WriteOptions(), kv.first, kv.second));
|
|
}
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Forward scan: all keys in order through both indexes.
|
|
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard");
|
|
auto actual = ScanAllKeyValues(ro);
|
|
ASSERT_EQ(actual.size(), kvs.size());
|
|
for (size_t i = 0; i < kvs.size(); i++) {
|
|
SCOPED_TRACE("key index " + std::to_string(i));
|
|
ASSERT_EQ(actual[i].first, kvs[i].first);
|
|
ASSERT_EQ(actual[i].second, kvs[i].second);
|
|
}
|
|
}
|
|
|
|
// Point lookups for boundary keys.
|
|
ASSERT_NO_FATAL_FAILURE(
|
|
VerifyGetBothIndexes(std::string("\x00", 1), "val_null"));
|
|
ASSERT_NO_FATAL_FAILURE(
|
|
VerifyGetBothIndexes(std::string("\xff\xff\xff", 3), "val_triple_ff"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes(std::string("a\x00"
|
|
"b",
|
|
3),
|
|
"val_a_null_b"));
|
|
|
|
// Seek to embedded-null key.
|
|
ASSERT_NO_FATAL_FAILURE(VerifySeekBothIndexes(
|
|
std::string("\x00", 1), std::string("\x00", 1), "val_null"));
|
|
}
|
|
|
|
// Puts with empty string values.
|
|
TEST_F(TrieIndexDBTest, EmptyValuePuts) {
|
|
ASSERT_OK(OpenDB());
|
|
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key1", ""));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key2", "non_empty"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key3", ""));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key1", ""));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key2", "non_empty"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key3", ""));
|
|
|
|
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(
|
|
std::vector<std::pair<std::string, std::string>>{
|
|
{"key1", ""}, {"key2", "non_empty"}, {"key3", ""}}));
|
|
}
|
|
|
|
// Zlib compression: data blocks are compressed, UDI block is not.
|
|
// Verifies that reads through the trie index work with compressed data.
|
|
TEST_F(TrieIndexDBTest, CompressionZlib) {
|
|
if (!Zlib_Supported()) {
|
|
ROCKSDB_GTEST_SKIP("Zlib not linked");
|
|
return;
|
|
}
|
|
options_.compression = kZlibCompression;
|
|
ASSERT_OK(OpenDB(/*block_size=*/128));
|
|
|
|
for (int i = 0; i < 100; i++) {
|
|
char key[16];
|
|
snprintf(key, sizeof(key), "key_%04d", i);
|
|
// Compressible value (repeated pattern).
|
|
ASSERT_OK(db_->Put(WriteOptions(), key, std::string(200, 'A' + (i % 26))));
|
|
}
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Forward scan.
|
|
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard");
|
|
ASSERT_EQ(ScanAllKeys(ro).size(), 100u);
|
|
}
|
|
|
|
// Spot-check a few keys.
|
|
for (int i : {0, 49, 99}) {
|
|
char key[16];
|
|
snprintf(key, sizeof(key), "key_%04d", i);
|
|
ASSERT_NO_FATAL_FAILURE(
|
|
VerifyGetBothIndexes(key, std::string(200, 'A' + (i % 26))));
|
|
}
|
|
}
|
|
|
|
// Iterator stability: an iterator pinned to a snapshot should not see data
|
|
// written after the iterator was created, even after flush.
|
|
TEST_F(TrieIndexDBTest, IteratorStabilityDuringFlush) {
|
|
ASSERT_OK(OpenDB());
|
|
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key1", "v1"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key2", "v2"));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Open iterator (implicitly pins a snapshot).
|
|
auto ro = TrieIndexReadOptions();
|
|
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
iter->SeekToFirst();
|
|
ASSERT_TRUE(iter->Valid());
|
|
ASSERT_EQ(iter->key().ToString(), "key1");
|
|
|
|
// Write + flush new data while iterator is open.
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key3", "v3"));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Existing iterator should NOT see key3.
|
|
iter->Next();
|
|
ASSERT_TRUE(iter->Valid());
|
|
ASSERT_EQ(iter->key().ToString(), "key2");
|
|
iter->Next();
|
|
ASSERT_FALSE(iter->Valid());
|
|
ASSERT_OK(iter->status());
|
|
|
|
// New iterator should see all three keys.
|
|
std::unique_ptr<Iterator> iter2(db_->NewIterator(ro));
|
|
iter2->SeekToFirst();
|
|
ASSERT_TRUE(iter2->Valid());
|
|
ASSERT_EQ(iter2->key().ToString(), "key1");
|
|
iter2->Next();
|
|
ASSERT_TRUE(iter2->Valid());
|
|
ASSERT_EQ(iter2->key().ToString(), "key2");
|
|
iter2->Next();
|
|
ASSERT_TRUE(iter2->Valid());
|
|
ASSERT_EQ(iter2->key().ToString(), "key3");
|
|
ASSERT_OK(iter2->status());
|
|
}
|
|
|
|
// iterate_upper_bound without prefix scan: the iterator should stop at the
|
|
// upper bound.
|
|
TEST_F(TrieIndexDBTest, IteratorUpperBound) {
|
|
ASSERT_OK(OpenDB(/*block_size=*/64));
|
|
|
|
for (const auto& k : {"aa", "bb", "cc", "dd", "ee", "ff"}) {
|
|
ASSERT_OK(db_->Put(WriteOptions(), k, std::string("v_") + k));
|
|
}
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
for (const auto& base_ro :
|
|
{StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(base_ro.table_index_factory ? "trie" : "standard");
|
|
|
|
// Upper bound = "dd" → should see aa, bb, cc only.
|
|
std::string ub_str = "dd";
|
|
Slice ub(ub_str);
|
|
ReadOptions ro = base_ro;
|
|
ro.iterate_upper_bound = &ub;
|
|
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
|
|
std::vector<std::string> keys;
|
|
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
|
keys.push_back(iter->key().ToString());
|
|
}
|
|
ASSERT_OK(iter->status());
|
|
ASSERT_EQ(keys, (std::vector<std::string>{"aa", "bb", "cc"}));
|
|
|
|
// Upper bound = "aa" → should see nothing.
|
|
std::string ub2_str = "aa";
|
|
Slice ub2(ub2_str);
|
|
ReadOptions ro2 = base_ro;
|
|
ro2.iterate_upper_bound = &ub2;
|
|
std::unique_ptr<Iterator> iter2(db_->NewIterator(ro2));
|
|
iter2->SeekToFirst();
|
|
ASSERT_FALSE(iter2->Valid());
|
|
ASSERT_OK(iter2->status());
|
|
|
|
// Upper bound after all data → should see everything.
|
|
std::string ub3_str = "zz";
|
|
Slice ub3(ub3_str);
|
|
ReadOptions ro3 = base_ro;
|
|
ro3.iterate_upper_bound = &ub3;
|
|
std::unique_ptr<Iterator> iter3(db_->NewIterator(ro3));
|
|
std::vector<std::string> all_keys;
|
|
for (iter3->SeekToFirst(); iter3->Valid(); iter3->Next()) {
|
|
all_keys.push_back(iter3->key().ToString());
|
|
}
|
|
ASSERT_OK(iter3->status());
|
|
ASSERT_EQ(all_keys.size(), 6u);
|
|
}
|
|
}
|
|
|
|
// Combined snapshot + upper_bound: iterator sees the snapshot's view of data,
|
|
// bounded by iterate_upper_bound.
|
|
TEST_F(TrieIndexDBTest, IteratorSnapshotAndUpperBound) {
|
|
ASSERT_OK(OpenDB());
|
|
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_a", "old_a"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_b", "old_b"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_c", "old_c"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_d", "old_d"));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
const Snapshot* snap = db_->GetSnapshot();
|
|
|
|
// Overwrite some keys after the snapshot.
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_a", "new_a"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_c", "new_c"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_e", "new_e"));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
for (const auto& base_ro :
|
|
{StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(base_ro.table_index_factory ? "trie" : "standard");
|
|
|
|
std::string ub_str = "key_d";
|
|
Slice ub(ub_str);
|
|
ReadOptions ro = base_ro;
|
|
ro.snapshot = snap;
|
|
ro.iterate_upper_bound = &ub;
|
|
|
|
auto kvs = ScanAllKeyValues(ro);
|
|
// Snapshot view: old values. Upper bound excludes key_d and key_e.
|
|
ASSERT_EQ(kvs.size(), 3u);
|
|
ASSERT_EQ(kvs[0],
|
|
std::make_pair(std::string("key_a"), std::string("old_a")));
|
|
ASSERT_EQ(kvs[1],
|
|
std::make_pair(std::string("key_b"), std::string("old_b")));
|
|
ASSERT_EQ(kvs[2],
|
|
std::make_pair(std::string("key_c"), std::string("old_c")));
|
|
}
|
|
db_->ReleaseSnapshot(snap);
|
|
}
|
|
|
|
// VerifyChecksum goes through SeekToFirst+Next on the index iterator.
|
|
TEST_F(TrieIndexDBTest, VerifyChecksumWithTrieUDI) {
|
|
ASSERT_OK(OpenDB(/*block_size=*/128));
|
|
|
|
for (int i = 0; i < 50; i++) {
|
|
char key[16];
|
|
snprintf(key, sizeof(key), "key_%03d", i);
|
|
ASSERT_OK(db_->Put(WriteOptions(), key, "value_" + std::to_string(i)));
|
|
}
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// VerifyChecksum with default ReadOptions (standard index).
|
|
ASSERT_OK(db_->VerifyChecksum());
|
|
|
|
// VerifyChecksum with trie ReadOptions.
|
|
ASSERT_OK(db_->VerifyChecksum(TrieIndexReadOptions()));
|
|
}
|
|
|
|
// Many small SSTs from frequent flushes: exercises trie iteration across
|
|
// many L0 files without compaction.
|
|
TEST_F(TrieIndexDBTest, ManySmallSSTs) {
|
|
options_.disable_auto_compactions = true;
|
|
ASSERT_OK(OpenDB());
|
|
|
|
// 50 flushes, 2 keys each → 50 SSTs.
|
|
for (int f = 0; f < 50; f++) {
|
|
char k1[16];
|
|
char k2[16];
|
|
snprintf(k1, sizeof(k1), "key_%04d", f * 2);
|
|
snprintf(k2, sizeof(k2), "key_%04d", f * 2 + 1);
|
|
ASSERT_OK(db_->Put(WriteOptions(), k1, "v" + std::to_string(f * 2)));
|
|
ASSERT_OK(db_->Put(WriteOptions(), k2, "v" + std::to_string(f * 2 + 1)));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
}
|
|
|
|
// Verify all 100 keys are readable.
|
|
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard");
|
|
auto keys = ScanAllKeys(ro);
|
|
ASSERT_EQ(keys.size(), 100u);
|
|
}
|
|
|
|
// Spot-check first and last.
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_0000", "v0"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_0099", "v99"));
|
|
|
|
// Compact everything into one SST, re-verify.
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard");
|
|
ASSERT_EQ(ScanAllKeys(ro).size(), 100u);
|
|
}
|
|
}
|
|
|
|
// Merge values accumulate across multiple compaction rounds.
|
|
TEST_F(TrieIndexDBTest, MergeAcrossMultipleCompactions) {
|
|
options_.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
ASSERT_OK(OpenDB());
|
|
|
|
// Round 1: Put base value.
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key", "base"));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key", "base"));
|
|
|
|
// Round 2: Merge "m1".
|
|
ASSERT_OK(db_->Merge(WriteOptions(), "key", "m1"));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key", "base,m1"));
|
|
|
|
// Round 3: Merge "m2".
|
|
ASSERT_OK(db_->Merge(WriteOptions(), "key", "m2"));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key", "base,m1,m2"));
|
|
|
|
// Forward scan also returns the accumulated value.
|
|
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(
|
|
std::vector<std::pair<std::string, std::string>>{{"key", "base,m1,m2"}}));
|
|
}
|
|
|
|
// Graceful degradation: reopen a DB that was written with UDI, but without
|
|
// the UDI factory configured. Reads should fall back to the standard index.
|
|
TEST_F(TrieIndexDBTest, ReopenWithoutTrieUDI) {
|
|
ASSERT_OK(OpenDB());
|
|
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_a", "val_a"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_b", "val_b"));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
ASSERT_OK(db_->Close());
|
|
db_.reset();
|
|
|
|
// Reopen WITHOUT UDI. The SST has a UDI meta block, but it's ignored.
|
|
ASSERT_OK(OpenDBWithoutUDI());
|
|
|
|
// Reads via standard index should work (UDI meta block is just ignored).
|
|
std::string val;
|
|
ASSERT_OK(db_->Get(ReadOptions(), "key_a", &val));
|
|
ASSERT_EQ(val, "val_a");
|
|
ASSERT_OK(db_->Get(ReadOptions(), "key_b", &val));
|
|
ASSERT_EQ(val, "val_b");
|
|
|
|
// Forward scan.
|
|
auto keys = ScanAllKeys(ReadOptions());
|
|
ASSERT_EQ(keys.size(), 2u);
|
|
ASSERT_EQ(keys[0], "key_a");
|
|
ASSERT_EQ(keys[1], "key_b");
|
|
}
|
|
|
|
// Mixed SSTs: some written with UDI, some without. Both should be readable
|
|
// through both index paths.
|
|
TEST_F(TrieIndexDBTest, MixedSSTsWithAndWithoutUDI) {
|
|
options_.disable_auto_compactions = true;
|
|
|
|
// Phase 1: Write with UDI → SST1 has UDI + standard index.
|
|
ASSERT_OK(OpenDB());
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_01", "udi_val1"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_02", "udi_val2"));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
ASSERT_OK(db_->Close());
|
|
db_.reset();
|
|
|
|
// Phase 2: Reopen WITHOUT UDI, write more → SST2 has only standard index.
|
|
ASSERT_OK(OpenDBWithoutUDI());
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_03", "noudi_val3"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "key_04", "noudi_val4"));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
ASSERT_OK(db_->Close());
|
|
db_.reset();
|
|
|
|
// Phase 3: Reopen WITH UDI again. SST1 uses trie, SST2 falls back to
|
|
// standard index (UDI block missing → logged warning, graceful fallback).
|
|
options_.disable_auto_compactions = true;
|
|
ASSERT_OK(OpenDB());
|
|
|
|
// All 4 keys should be readable through both index paths.
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_01", "udi_val1"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_02", "udi_val2"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_03", "noudi_val3"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_04", "noudi_val4"));
|
|
|
|
ASSERT_NO_FATAL_FAILURE(
|
|
VerifyForwardScanBothIndexes({"key_01", "key_02", "key_03", "key_04"}));
|
|
|
|
// Compact: merges UDI + non-UDI SSTs → new SST has UDI.
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
ASSERT_NO_FATAL_FAILURE(
|
|
VerifyForwardScanBothIndexes({"key_01", "key_02", "key_03", "key_04"}));
|
|
}
|
|
|
|
// TransactionDB commit: Put + Delete inside a transaction, then commit.
|
|
TEST_F(TrieIndexDBTest, TransactionCommit) {
|
|
options_.create_if_missing = true;
|
|
BlockBasedTableOptions table_options;
|
|
table_options.user_defined_index_factory = trie_factory_;
|
|
options_.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
last_options_ = options_;
|
|
|
|
TransactionDB* txn_db = nullptr;
|
|
ASSERT_OK(
|
|
TransactionDB::Open(options_, TransactionDBOptions(), dbname_, &txn_db));
|
|
db_.reset(txn_db);
|
|
|
|
// Pre-populate a key.
|
|
ASSERT_OK(txn_db->Put(WriteOptions(), "pre_key", "pre_val"));
|
|
ASSERT_OK(txn_db->Flush(FlushOptions()));
|
|
|
|
// Begin transaction: Put + Delete + Commit.
|
|
std::unique_ptr<Transaction> txn(
|
|
txn_db->BeginTransaction(WriteOptions(), TransactionOptions()));
|
|
ASSERT_OK(txn->Put("txn_key1", "txn_val1"));
|
|
ASSERT_OK(txn->Delete("pre_key"));
|
|
ASSERT_OK(txn->Commit());
|
|
|
|
ASSERT_OK(txn_db->Flush(FlushOptions()));
|
|
|
|
// Verify through both indexes.
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("txn_key1", "txn_val1"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("pre_key"));
|
|
}
|
|
|
|
// TransactionDB rollback: writes should be discarded. Rollback writes DELETE
|
|
// entries to WAL, which was previously restricted for UDI.
|
|
TEST_F(TrieIndexDBTest, TransactionRollback) {
|
|
options_.create_if_missing = true;
|
|
BlockBasedTableOptions table_options;
|
|
table_options.user_defined_index_factory = trie_factory_;
|
|
options_.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
last_options_ = options_;
|
|
|
|
TransactionDB* txn_db = nullptr;
|
|
ASSERT_OK(
|
|
TransactionDB::Open(options_, TransactionDBOptions(), dbname_, &txn_db));
|
|
db_.reset(txn_db);
|
|
|
|
// Pre-populate data and flush.
|
|
ASSERT_OK(txn_db->Put(WriteOptions(), "keep_key", "keep_val"));
|
|
ASSERT_OK(txn_db->Flush(FlushOptions()));
|
|
|
|
// Begin transaction, write, then ROLLBACK.
|
|
std::unique_ptr<Transaction> txn(
|
|
txn_db->BeginTransaction(WriteOptions(), TransactionOptions()));
|
|
ASSERT_OK(txn->Put("rollback_key", "rollback_val"));
|
|
ASSERT_OK(txn->Delete("keep_key"));
|
|
ASSERT_OK(txn->Rollback());
|
|
|
|
ASSERT_OK(txn_db->Flush(FlushOptions()));
|
|
|
|
// Original data should be unchanged. Rolled-back writes should not appear.
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("keep_key", "keep_val"));
|
|
ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("rollback_key"));
|
|
|
|
// Forward scan: only the original key.
|
|
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(
|
|
std::vector<std::pair<std::string, std::string>>{
|
|
{"keep_key", "keep_val"}}));
|
|
}
|
|
|
|
// total_order_seek with prefix_extractor: a common stress-test configuration.
|
|
// With total_order_seek=true, SeekToFirst and full forward scan should work
|
|
// correctly even when a prefix extractor is configured.
|
|
TEST_F(TrieIndexDBTest, TotalOrderSeekWithPrefixExtractor) {
|
|
options_.prefix_extractor.reset(NewFixedPrefixTransform(3));
|
|
ASSERT_OK(OpenDB(/*block_size=*/128));
|
|
|
|
// Keys with different prefixes.
|
|
ASSERT_OK(db_->Put(WriteOptions(), "aaa_1", "v1"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "aaa_2", "v2"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "bbb_1", "v3"));
|
|
ASSERT_OK(db_->Put(WriteOptions(), "ccc_1", "v4"));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// With total_order_seek=true, scan all keys across prefixes.
|
|
for (auto base_ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(base_ro.table_index_factory ? "trie" : "standard");
|
|
base_ro.total_order_seek = true;
|
|
auto keys = ScanAllKeys(base_ro);
|
|
ASSERT_EQ(keys.size(), 4u);
|
|
ASSERT_EQ(keys[0], "aaa_1");
|
|
ASSERT_EQ(keys[1], "aaa_2");
|
|
ASSERT_EQ(keys[2], "bbb_1");
|
|
ASSERT_EQ(keys[3], "ccc_1");
|
|
|
|
// Seek across prefix boundary.
|
|
std::unique_ptr<Iterator> iter(db_->NewIterator(base_ro));
|
|
iter->Seek("aab");
|
|
ASSERT_TRUE(iter->Valid());
|
|
ASSERT_EQ(iter->key().ToString(), "bbb_1");
|
|
ASSERT_OK(iter->status());
|
|
}
|
|
|
|
// auto_prefix_mode: let RocksDB decide per-seek.
|
|
for (auto base_ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) {
|
|
SCOPED_TRACE(base_ro.table_index_factory ? "trie" : "standard");
|
|
base_ro.auto_prefix_mode = true;
|
|
std::unique_ptr<Iterator> iter(db_->NewIterator(base_ro));
|
|
iter->Seek("bbb_1");
|
|
ASSERT_TRUE(iter->Valid());
|
|
ASSERT_EQ(iter->key().ToString(), "bbb_1");
|
|
ASSERT_OK(iter->status());
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// Multi-level SST + DeleteRange randomized test
|
|
//
|
|
// Historically bug-prone area: range tombstones interact with data across
|
|
// LSM levels (L0, L1, L2+), and the trie index must correctly handle
|
|
// seek/scan when blocks are partially or entirely covered by range deletions
|
|
// at different levels.
|
|
//
|
|
// Strategy:
|
|
// 1. Populate bottommost level with baseline data (flush + compact)
|
|
// 2. Write overlapping data and DeleteRanges to L0 (multiple rounds)
|
|
// 3. Partial compactions to create data at intermediate levels
|
|
// 4. Verify reads match between standard and trie index after each mutation
|
|
// 5. Snapshot before large DeleteRange, verify snapshot preserves state
|
|
// 6. Re-insert into deleted ranges, compact, and re-verify
|
|
// ============================================================================
|
|
TEST_F(TrieIndexDBTest, MultiLevelDeleteRangeRandomized) {
|
|
uint32_t seed = static_cast<uint32_t>(
|
|
std::chrono::system_clock::now().time_since_epoch().count());
|
|
SCOPED_TRACE("seed=" + std::to_string(seed));
|
|
Random rnd(seed);
|
|
|
|
options_.disable_auto_compactions = true;
|
|
// Small block size forces many data blocks (and thus many trie entries).
|
|
ASSERT_OK(OpenDB(/*block_size=*/256));
|
|
|
|
const int kMaxKey = 500;
|
|
|
|
auto format_key = [](int k) {
|
|
char buf[16];
|
|
snprintf(buf, sizeof(buf), "key_%05d", k);
|
|
return std::string(buf);
|
|
};
|
|
|
|
// Core correctness check: forward scan via both indexes must match.
|
|
auto verify_scan_consistency = [&]() {
|
|
auto standard_kvs = ScanAllKeyValues(StandardIndexReadOptions());
|
|
auto trie_kvs = ScanAllKeyValues(TrieIndexReadOptions());
|
|
ASSERT_EQ(standard_kvs, trie_kvs)
|
|
<< "Scan mismatch: standard=" << standard_kvs.size()
|
|
<< " trie=" << trie_kvs.size();
|
|
};
|
|
|
|
// Phase 1: Populate bottommost level with baseline data.
|
|
for (int i = 0; i < 200; i++) {
|
|
int k = rnd.Uniform(kMaxKey);
|
|
ASSERT_OK(db_->Put(WriteOptions(), format_key(k),
|
|
"base_" + rnd.RandomString(20)));
|
|
}
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
ASSERT_NO_FATAL_FAILURE(verify_scan_consistency());
|
|
|
|
// Phase 2: Write overlapping data + DeleteRanges across multiple rounds.
|
|
// Each round creates L0 SSTs with a mix of Puts and DeleteRanges,
|
|
// with occasional partial compactions to push data to intermediate levels.
|
|
for (int round = 0; round < 5; round++) {
|
|
SCOPED_TRACE("round=" + std::to_string(round));
|
|
|
|
// Write some new/updated keys.
|
|
int num_writes = 30 + rnd.Uniform(70);
|
|
for (int i = 0; i < num_writes; i++) {
|
|
int k = rnd.Uniform(kMaxKey);
|
|
ASSERT_OK(
|
|
db_->Put(WriteOptions(), format_key(k),
|
|
"r" + std::to_string(round) + "_" + rnd.RandomString(15)));
|
|
}
|
|
|
|
// Issue 1-3 random DeleteRanges per round.
|
|
int num_ranges = 1 + rnd.Uniform(3);
|
|
for (int r = 0; r < num_ranges; r++) {
|
|
int range_start = rnd.Uniform(kMaxKey - 10);
|
|
int range_end = range_start + 5 + rnd.Uniform(50);
|
|
if (range_end > kMaxKey) {
|
|
range_end = kMaxKey;
|
|
}
|
|
ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
|
|
format_key(range_start),
|
|
format_key(range_end)));
|
|
}
|
|
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
ASSERT_NO_FATAL_FAILURE(verify_scan_consistency());
|
|
|
|
// On odd rounds, do a partial compaction to push some data down,
|
|
// creating a multi-level structure where range tombstones at L0
|
|
// must shadow data at L1/L2.
|
|
if (round % 2 == 1) {
|
|
int compact_start = rnd.Uniform(kMaxKey / 2);
|
|
int compact_end = compact_start + kMaxKey / 4;
|
|
std::string start_key = format_key(compact_start);
|
|
std::string end_key = format_key(compact_end);
|
|
Slice s(start_key);
|
|
Slice e(end_key);
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &s, &e));
|
|
ASSERT_NO_FATAL_FAILURE(verify_scan_consistency());
|
|
}
|
|
}
|
|
|
|
// Phase 3: Snapshot, then delete a large range. The snapshot must
|
|
// preserve the pre-deletion state while current reads see the deletion.
|
|
const Snapshot* snap = db_->GetSnapshot();
|
|
auto snap_kvs = ScanAllKeyValues(StandardIndexReadOptions());
|
|
|
|
int big_start = rnd.Uniform(kMaxKey / 4);
|
|
int big_end = big_start + kMaxKey / 3;
|
|
if (big_end > kMaxKey) {
|
|
big_end = kMaxKey;
|
|
}
|
|
ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
|
|
format_key(big_start), format_key(big_end)));
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
// Current state should reflect the deletion.
|
|
ASSERT_NO_FATAL_FAILURE(verify_scan_consistency());
|
|
|
|
// Snapshot state should be unchanged.
|
|
ASSERT_NO_FATAL_FAILURE(VerifyForwardScanBothIndexes(snap, snap_kvs));
|
|
|
|
db_->ReleaseSnapshot(snap);
|
|
|
|
// Phase 4: Re-insert keys into the deleted range, creating a pattern
|
|
// where range tombstones and live data coexist at different levels.
|
|
for (int i = big_start; i < big_end && i < kMaxKey; i += 3) {
|
|
ASSERT_OK(db_->Put(WriteOptions(), format_key(i),
|
|
"reinserted_" + rnd.RandomString(10)));
|
|
}
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
ASSERT_NO_FATAL_FAILURE(verify_scan_consistency());
|
|
|
|
// Phase 5: Full compaction — all range tombstones should be resolved.
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
ASSERT_NO_FATAL_FAILURE(verify_scan_consistency());
|
|
|
|
// Phase 6: Point lookups for a sample of keys — both indexes must agree.
|
|
for (int i = 0; i < kMaxKey; i += 7) {
|
|
std::string key = format_key(i);
|
|
std::string std_val;
|
|
std::string trie_val;
|
|
Status s1 = db_->Get(StandardIndexReadOptions(), key, &std_val);
|
|
Status s2 = db_->Get(TrieIndexReadOptions(), key, &trie_val);
|
|
ASSERT_EQ(s1.code(), s2.code()) << "Status mismatch for " << key;
|
|
if (s1.ok()) {
|
|
ASSERT_EQ(std_val, trie_val) << "Value mismatch for " << key;
|
|
}
|
|
}
|
|
}
|
|
|
|
} // namespace trie_index
|
|
} // namespace ROCKSDB_NAMESPACE
|
|
|
|
int main(int argc, char** argv) {
|
|
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
|
|
::testing::InitGoogleTest(&argc, argv);
|
|
return RUN_ALL_TESTS();
|
|
}
|