Summary: Remove the restriction that limited UDI to ingest-only, Puts-only use cases. This enables UDI plugins (including the trie index from https://github.com/facebook/rocksdb/issues/14310) to work with all operation types: Put, Delete, Merge, SingleDelete, PutEntity, etc. Part of https://github.com/facebook/rocksdb/issues/12396 Pull Request resolved: https://github.com/facebook/rocksdb/pull/14399 Reviewed By: pdillinger Differential Revision: D96392636 Pulled By: xingbowang fbshipit-source-id: 0f1e6c38531fa72539a0e2c6a3dffff333392b4c
567 lines
23 KiB
C++
567 lines
23 KiB
C++
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
#include "utilities/trie_index/trie_index_factory.h"
|
|
|
|
#include <algorithm>
|
|
#include <cassert>
|
|
#include <cstring>
|
|
|
|
#include "db/dbformat.h"
|
|
#include "rocksdb/comparator.h"
|
|
#include "util/coding.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
namespace trie_index {
|
|
|
|
// ============================================================================
|
|
// TrieIndexBuilder
|
|
// ============================================================================
|
|
|
|
TrieIndexBuilder::TrieIndexBuilder(const Comparator* comparator)
|
|
: comparator_(comparator),
|
|
finished_(false),
|
|
must_use_separator_with_seq_(false) {}
|
|
|
|
Slice TrieIndexBuilder::AddIndexEntry(const Slice& last_key_in_current_block,
|
|
const Slice* first_key_in_next_block,
|
|
const BlockHandle& block_handle,
|
|
std::string* separator_scratch,
|
|
const IndexEntryContext& context) {
|
|
SequenceNumber last_key_seq = context.last_key_seq;
|
|
|
|
// Compute a short separator between the two user keys using the
|
|
// comparator. FindShortestSeparator takes `*start` as both input and output:
|
|
// input: *start == last_key_in_current_block
|
|
// output: *start modified to shortest string in [start, limit)
|
|
// If first_key_in_next_block is nullptr, this is the last block — use a
|
|
// short successor of the last key.
|
|
Slice separator;
|
|
// True when last_key and first_key_in_next_block are the same user key
|
|
// (same-user-key block boundary). Computed once and reused below for
|
|
// both the sticky flag and the per-entry seqno decision.
|
|
bool same_user_key = false;
|
|
if (first_key_in_next_block != nullptr) {
|
|
same_user_key = comparator_->Compare(last_key_in_current_block,
|
|
*first_key_in_next_block) == 0;
|
|
|
|
*separator_scratch = last_key_in_current_block.ToString();
|
|
comparator_->FindShortestSeparator(separator_scratch,
|
|
*first_key_in_next_block);
|
|
separator = Slice(*separator_scratch);
|
|
|
|
// Detect same-user-key block boundary: if the two user keys are identical,
|
|
// FindShortestSeparator returns the same key for both sides, making it
|
|
// impossible to distinguish the two blocks. Set the sticky flag so that
|
|
// at Finish() time, ALL separators will include encoded seqnos.
|
|
// This mirrors ShortenedIndexBuilder::must_use_separator_with_seq_.
|
|
if (!must_use_separator_with_seq_ && same_user_key) {
|
|
must_use_separator_with_seq_ = true;
|
|
}
|
|
|
|
// Edge case: FindShortestSeparator may fail to shorten the key even when
|
|
// the user keys are different. Example: FindShortestSeparator("abc","abd")
|
|
// returns "abc" unchanged because incrementing 'c' would yield "abd" which
|
|
// is not < limit. When the resulting separator matches the previous entry's
|
|
// separator, the blocks will be grouped into the same run in Finish().
|
|
// We must mark this as a same-user-key boundary so it gets a real seqno
|
|
// rather than kMaxSequenceNumber (which would trigger the overflow block
|
|
// assertion in Finish()).
|
|
if (!same_user_key && !buffered_entries_.empty() &&
|
|
buffered_entries_.back().separator_key == *separator_scratch) {
|
|
same_user_key = true;
|
|
if (!must_use_separator_with_seq_) {
|
|
must_use_separator_with_seq_ = true;
|
|
}
|
|
}
|
|
} else {
|
|
// Last block: use the last key itself as the separator, NOT a shortened
|
|
// successor. This matches the standard ShortenedIndexBuilder behavior
|
|
// (see index_builder.h GetSeparatorWithSeq lines 278-286): it only calls
|
|
// FindShortInternalKeySuccessor when shortening_mode is
|
|
// kShortenSeparatorsAndSuccessor, which is not the default. With the
|
|
// default kShortenSeparators, the last block's separator is simply
|
|
// last_key_in_current_block.
|
|
//
|
|
// Why this matters: FindShortSuccessor can widen the key range. For
|
|
// example, if the actual last key is "9\xff\xff", FindShortSuccessor
|
|
// produces ":" (0x3A). The trie would then claim to cover keys up to
|
|
// ":", but the data block only contains keys up to "9\xff\xff". A seek
|
|
// targeting a key in that gap (e.g., "9\xff\xff\x01") would find a
|
|
// block via the trie that contains no matching data, causing iterator
|
|
// desynchronization — the trie index returns a valid block while the
|
|
// standard index correctly reports no match.
|
|
separator = last_key_in_current_block;
|
|
|
|
// Edge case: if this last block's separator matches the previous entry's
|
|
// separator, they share the same user key (same-user-key run boundary).
|
|
if (!buffered_entries_.empty() &&
|
|
comparator_->Compare(buffered_entries_.back().separator_key,
|
|
separator) == 0) {
|
|
same_user_key = true;
|
|
if (!must_use_separator_with_seq_) {
|
|
must_use_separator_with_seq_ = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Buffer the entry for deferred trie construction in Finish().
|
|
// We buffer rather than adding to the trie immediately because the
|
|
// all-or-nothing seqno encoding decision is made at Finish() time.
|
|
TrieBlockHandle handle;
|
|
handle.offset = block_handle.offset;
|
|
handle.size = block_handle.size;
|
|
|
|
BufferedEntry entry;
|
|
entry.separator_key = separator.ToString();
|
|
// For same-user-key boundaries, use the actual seqno of the last key.
|
|
// For different-user-key boundaries, use kMaxSequenceNumber (sentinel
|
|
// meaning "this is not a same-key boundary, never advance past it").
|
|
if (same_user_key) {
|
|
entry.seqno = last_key_seq;
|
|
} else {
|
|
entry.seqno = kMaxSequenceNumber;
|
|
}
|
|
entry.handle = handle;
|
|
buffered_entries_.push_back(std::move(entry));
|
|
|
|
return separator;
|
|
}
|
|
|
|
void TrieIndexBuilder::OnKeyAdded(const Slice& /*key*/, ValueType /*type*/,
|
|
const Slice& /*value*/) {
|
|
// No-op: the trie is built from separator keys in AddIndexEntry(), not
|
|
// from individual key-value pairs.
|
|
}
|
|
|
|
Status TrieIndexBuilder::Finish(Slice* index_contents) {
|
|
if (finished_) {
|
|
return Status::InvalidArgument("TrieIndexBuilder::Finish called twice");
|
|
}
|
|
finished_ = true;
|
|
|
|
// Use seqno side-table when any same-user-key block boundary was detected.
|
|
// The must_use_separator_with_seq_ flag is set in AddIndexEntry() whenever
|
|
// the comparator finds two identical user keys at a block boundary. This
|
|
// always implies duplicate separators exist (since
|
|
// FindShortestSeparator("foo", "foo") = "foo"), so no separate scan is
|
|
// needed.
|
|
bool use_seqno = must_use_separator_with_seq_;
|
|
trie_builder_.SetHasSeqnoEncoding(use_seqno);
|
|
|
|
if (use_seqno) {
|
|
// Feed de-duplicated separators to the trie with seqno side-table metadata.
|
|
// Consecutive identical separators form a "run" — only the first occurrence
|
|
// goes into the trie (as the primary block). The remaining blocks in the
|
|
// run are stored as overflow blocks in the side-table.
|
|
//
|
|
// For non-boundary separators (different user keys), seqno is set to 0
|
|
// (sentinel = "never advance past this leaf"). kMaxSequenceNumber from
|
|
// AddIndexEntry is mapped to 0 here.
|
|
size_t i = 0;
|
|
while (i < buffered_entries_.size()) {
|
|
const auto& entry = buffered_entries_[i];
|
|
|
|
// Count how many consecutive entries share this separator key.
|
|
size_t run_start = i;
|
|
size_t run_end = i + 1;
|
|
while (run_end < buffered_entries_.size() &&
|
|
buffered_entries_[run_end].separator_key == entry.separator_key) {
|
|
run_end++;
|
|
}
|
|
uint32_t block_count = static_cast<uint32_t>(run_end - run_start);
|
|
|
|
// Map kMaxSequenceNumber (non-same-key boundary) to 0 (sentinel).
|
|
uint64_t seqno = (entry.seqno == kMaxSequenceNumber) ? 0 : entry.seqno;
|
|
|
|
// Add the primary (first) block for this separator.
|
|
trie_builder_.AddKeyWithSeqno(Slice(entry.separator_key), entry.handle,
|
|
seqno, block_count);
|
|
|
|
// Add overflow blocks (2nd, 3rd, ... in the run).
|
|
// Overflow blocks only exist within same-key runs, so their seqnos
|
|
// come from last_key_seq in AddIndexEntry (never kMaxSequenceNumber).
|
|
// The seqno may be 0 when bottommost compaction zeroes all sequence
|
|
// numbers — this is valid; see AddOverflowBlock comment.
|
|
for (size_t j = run_start + 1; j < run_end; j++) {
|
|
assert(buffered_entries_[j].seqno != kMaxSequenceNumber);
|
|
trie_builder_.AddOverflowBlock(buffered_entries_[j].handle,
|
|
buffered_entries_[j].seqno);
|
|
}
|
|
|
|
i = run_end;
|
|
}
|
|
} else {
|
|
// Common case: no same-user-key boundaries, add separators directly.
|
|
// Zero overhead — no seqno data stored.
|
|
for (const auto& entry : buffered_entries_) {
|
|
trie_builder_.AddKey(Slice(entry.separator_key), entry.handle);
|
|
}
|
|
}
|
|
|
|
// Release buffered entries — no longer needed after feeding to the trie.
|
|
buffered_entries_.clear();
|
|
buffered_entries_.shrink_to_fit();
|
|
|
|
// Always finish the trie builder, even with 0 keys — this produces a valid
|
|
// serialized trie that can be parsed by NewReader. Without this, an empty
|
|
// Slice would be returned, causing InitFromData to fail with "data too short
|
|
// for header".
|
|
trie_builder_.Finish();
|
|
*index_contents = trie_builder_.GetSerializedData();
|
|
return Status::OK();
|
|
}
|
|
|
|
// ============================================================================
|
|
// TrieIndexIterator
|
|
// ============================================================================
|
|
|
|
TrieIndexIterator::TrieIndexIterator(const LoudsTrie* trie,
|
|
const Comparator* comparator,
|
|
bool has_seqno_encoding)
|
|
: comparator_(comparator),
|
|
iter_(trie),
|
|
trie_(trie),
|
|
current_scan_idx_(0),
|
|
prepared_(false),
|
|
has_seqno_encoding_(has_seqno_encoding),
|
|
overflow_run_index_(0),
|
|
overflow_run_size_(1),
|
|
overflow_base_idx_(0) {}
|
|
|
|
void TrieIndexIterator::Prepare(const ScanOptions scan_opts[],
|
|
size_t num_opts) {
|
|
scan_opts_.clear();
|
|
scan_opts_.reserve(num_opts);
|
|
for (size_t i = 0; i < num_opts; i++) {
|
|
scan_opts_.push_back(scan_opts[i]);
|
|
}
|
|
current_scan_idx_ = 0;
|
|
prepared_ = true;
|
|
}
|
|
|
|
Status TrieIndexIterator::SeekToFirstAndGetResult(IterateResult* result) {
|
|
// Reset overflow state — SeekToFirst always lands on the primary block
|
|
// of the first trie leaf.
|
|
overflow_run_index_ = 0;
|
|
overflow_run_size_ = 1;
|
|
overflow_base_idx_ = 0;
|
|
|
|
if (!iter_.SeekToFirst()) {
|
|
result->bound_check_result = IterBoundCheck::kUnknown;
|
|
result->key = Slice();
|
|
return Status::OK();
|
|
}
|
|
|
|
result->key = iter_.Key();
|
|
current_key_scratch_ = result->key.ToString();
|
|
result->key = Slice(current_key_scratch_);
|
|
|
|
// Set up overflow state for the first leaf if seqno encoding is active.
|
|
if (has_seqno_encoding_) {
|
|
uint64_t leaf_idx = iter_.LeafIndex();
|
|
uint32_t block_count = trie_->GetLeafBlockCount(leaf_idx);
|
|
overflow_run_size_ = block_count;
|
|
overflow_base_idx_ = trie_->GetOverflowBase(leaf_idx);
|
|
}
|
|
|
|
// The very first entry is always in bounds (no target to compare against
|
|
// the limit, and the first block cannot precede any scan range).
|
|
result->bound_check_result = IterBoundCheck::kInbound;
|
|
return Status::OK();
|
|
}
|
|
|
|
Status TrieIndexIterator::SeekAndGetResult(const Slice& target,
|
|
IterateResult* result,
|
|
const SeekContext& context) {
|
|
SequenceNumber target_seq = context.target_seq;
|
|
|
|
// Advance current_scan_idx_ past any scans whose limit <= target.
|
|
// This handles the multi-scan case where the caller seeks into a later
|
|
// scan range after the previous scan returned kOutOfBound.
|
|
if (prepared_) {
|
|
while (current_scan_idx_ < scan_opts_.size()) {
|
|
const auto& opts = scan_opts_[current_scan_idx_];
|
|
if (opts.range.limit.has_value() &&
|
|
comparator_->Compare(target, opts.range.limit.value()) >= 0) {
|
|
current_scan_idx_++;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Reset overflow state.
|
|
overflow_run_index_ = 0;
|
|
overflow_run_size_ = 1;
|
|
overflow_base_idx_ = 0;
|
|
|
|
// Always seek with user key only — the trie stores user-key separators.
|
|
// When seqno encoding is active, post-seek correction handles the seqno.
|
|
if (!iter_.Seek(target)) {
|
|
// No leaf has a key >= target: the target is past all blocks in this SST.
|
|
// Return kUnknown (not kOutOfBound) because exhausting this SST's trie
|
|
// says nothing about the upper bound — the next SST on the level may
|
|
// still contain in-bound keys. kOutOfBound would cause LevelIterator to
|
|
// stop scanning the level prematurely.
|
|
result->bound_check_result = IterBoundCheck::kUnknown;
|
|
result->key = Slice();
|
|
return Status::OK();
|
|
}
|
|
|
|
// Set the result key (always a user key, no suffix stripping needed).
|
|
result->key = iter_.Key();
|
|
current_key_scratch_ = result->key.ToString();
|
|
result->key = Slice(current_key_scratch_);
|
|
|
|
// ---- Post-seek correction for seqno side-table ----
|
|
//
|
|
// When has_seqno_encoding_ is true, the leaf we landed on may have a
|
|
// seqno side-table entry. We use it to determine if this is the right
|
|
// block for the given (target, target_seq).
|
|
//
|
|
// The trie stores separators that are upper bounds on block contents:
|
|
// separator_key >= all keys in the block
|
|
// separator_seqno = seqno of the last key written to the block
|
|
//
|
|
// For same-user-key boundaries, the separator IS the user key. The seqno
|
|
// determines which block within a run of same-key blocks is correct:
|
|
// - If target_seq >= leaf_seqno: this is the right block (target's
|
|
// internal key <= separator's internal key, because higher seqno means
|
|
// "smaller" internal key for the same user key)
|
|
// - If target_seq < leaf_seqno: target's internal key > separator,
|
|
// so we need to advance to the next block in the run
|
|
//
|
|
// For non-boundary leaves (leaf_seqno == 0), the `leaf_seqno != 0` guard
|
|
// short-circuits before the comparison, so we never advance. This is the
|
|
// zero-overhead common path.
|
|
if (has_seqno_encoding_ && iter_.Valid()) {
|
|
uint64_t leaf_idx = iter_.LeafIndex();
|
|
uint64_t leaf_seqno = trie_->GetLeafSeqno(leaf_idx);
|
|
|
|
if (leaf_seqno != 0 && target_seq < leaf_seqno) {
|
|
// Target's internal key is AFTER the separator (lower seqno = later
|
|
// in internal key order for same user key). Advance through overflow
|
|
// blocks.
|
|
uint32_t block_count = trie_->GetLeafBlockCount(leaf_idx);
|
|
uint32_t base = trie_->GetOverflowBase(leaf_idx);
|
|
|
|
bool found = false;
|
|
for (uint32_t oi = 0; oi < block_count - 1; oi++) {
|
|
uint64_t ov_seqno = trie_->GetOverflowSeqno(base + oi);
|
|
if (ov_seqno == 0 || target_seq >= ov_seqno) {
|
|
// This overflow block is the right one.
|
|
overflow_run_index_ = oi + 1; // 1-based (0 = primary)
|
|
overflow_run_size_ = block_count;
|
|
overflow_base_idx_ = base;
|
|
found = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!found) {
|
|
// target_seq is below all seqnos in this run. Advance to the next
|
|
// trie leaf (the block after the run).
|
|
if (!iter_.Next()) {
|
|
// Exhausted all blocks: target is past the end of this SST.
|
|
// Return kUnknown — see comment in Seek path above.
|
|
result->bound_check_result = IterBoundCheck::kUnknown;
|
|
result->key = Slice();
|
|
return Status::OK();
|
|
}
|
|
// Update key and overflow state for the new leaf.
|
|
result->key = iter_.Key();
|
|
current_key_scratch_ = result->key.ToString();
|
|
result->key = Slice(current_key_scratch_);
|
|
overflow_run_index_ = 0;
|
|
overflow_run_size_ = 1;
|
|
overflow_base_idx_ = 0;
|
|
// Check if the new leaf also has overflow (unlikely but possible
|
|
// with adjacent same-key runs for different user keys).
|
|
// iter_.Valid() is guaranteed here — Next() returned true above.
|
|
if (has_seqno_encoding_) {
|
|
uint64_t new_leaf = iter_.LeafIndex();
|
|
overflow_run_size_ = trie_->GetLeafBlockCount(new_leaf);
|
|
overflow_base_idx_ = trie_->GetOverflowBase(new_leaf);
|
|
}
|
|
}
|
|
} else {
|
|
// Right block (common path). Set overflow state in case this leaf
|
|
// has a run (for subsequent Next() calls).
|
|
uint32_t block_count = trie_->GetLeafBlockCount(leaf_idx);
|
|
overflow_run_index_ = 0;
|
|
overflow_run_size_ = block_count;
|
|
overflow_base_idx_ = trie_->GetOverflowBase(leaf_idx);
|
|
}
|
|
}
|
|
|
|
result->bound_check_result = CheckBounds(target);
|
|
return Status::OK();
|
|
}
|
|
|
|
Status TrieIndexIterator::NextAndGetResult(IterateResult* result) {
|
|
// Save the current separator (user key) as "previous" before advancing.
|
|
prev_key_scratch_ = current_key_scratch_;
|
|
|
|
// If we're in an overflow run and haven't exhausted it, advance within
|
|
// the run (no trie traversal needed — just increment the overflow index).
|
|
if (overflow_run_index_ + 1 < overflow_run_size_) {
|
|
overflow_run_index_++;
|
|
// The key doesn't change (same separator for all blocks in the run).
|
|
result->key = Slice(current_key_scratch_);
|
|
result->bound_check_result = CheckBounds(Slice(prev_key_scratch_));
|
|
return Status::OK();
|
|
}
|
|
|
|
// Advance to the next trie leaf.
|
|
overflow_run_index_ = 0;
|
|
overflow_run_size_ = 1;
|
|
overflow_base_idx_ = 0;
|
|
|
|
if (!iter_.Next()) {
|
|
// No more blocks: past the end of this SST.
|
|
// Return kUnknown — see comment in Seek path above.
|
|
result->bound_check_result = IterBoundCheck::kUnknown;
|
|
result->key = Slice();
|
|
return Status::OK();
|
|
}
|
|
|
|
result->key = iter_.Key();
|
|
current_key_scratch_ = result->key.ToString();
|
|
result->key = Slice(current_key_scratch_);
|
|
|
|
// Set overflow state for the new leaf.
|
|
if (has_seqno_encoding_ && iter_.Valid()) {
|
|
uint64_t leaf_idx = iter_.LeafIndex();
|
|
overflow_run_size_ = trie_->GetLeafBlockCount(leaf_idx);
|
|
overflow_base_idx_ = trie_->GetOverflowBase(leaf_idx);
|
|
}
|
|
|
|
result->bound_check_result = CheckBounds(Slice(prev_key_scratch_));
|
|
return Status::OK();
|
|
}
|
|
|
|
UserDefinedIndexBuilder::BlockHandle TrieIndexIterator::value() {
|
|
if (overflow_run_index_ == 0) {
|
|
// Primary block — use the trie leaf's handle.
|
|
auto handle = iter_.Value();
|
|
return UserDefinedIndexBuilder::BlockHandle{handle.offset, handle.size};
|
|
}
|
|
// Overflow block — use the side-table handle.
|
|
// overflow_run_index_ is 1-based, overflow array is 0-based.
|
|
uint32_t overflow_idx = overflow_base_idx_ + overflow_run_index_ - 1;
|
|
auto handle = trie_->GetOverflowHandle(overflow_idx);
|
|
return UserDefinedIndexBuilder::BlockHandle{handle.offset, handle.size};
|
|
}
|
|
|
|
IterBoundCheck TrieIndexIterator::CheckBounds(
|
|
const Slice& reference_key) const {
|
|
if (!prepared_ || scan_opts_.empty()) {
|
|
// No bounds to check — always in-bound.
|
|
return IterBoundCheck::kInbound;
|
|
}
|
|
|
|
if (current_scan_idx_ >= scan_opts_.size()) {
|
|
return IterBoundCheck::kOutOfBound;
|
|
}
|
|
|
|
const auto& opts = scan_opts_[current_scan_idx_];
|
|
|
|
// Check upper bound (limit) against the reference key, NOT the current
|
|
// separator. The trie stores separator keys (upper bounds on block
|
|
// contents), so comparing the separator against the limit would
|
|
// prematurely reject blocks that contain keys < limit.
|
|
//
|
|
// For Seek: reference_key = seek target. If target < limit, the found
|
|
// block may contain keys within bounds.
|
|
// For Next: reference_key = previous separator. If prev_sep < limit,
|
|
// the current block may contain keys within bounds.
|
|
//
|
|
// This is conservative: it may return kInbound for a block that is fully
|
|
// out of bounds. The data-level iterator handles per-key filtering.
|
|
if (opts.range.limit.has_value()) {
|
|
const Slice& limit = opts.range.limit.value();
|
|
if (comparator_->Compare(reference_key, limit) >= 0) {
|
|
return IterBoundCheck::kOutOfBound;
|
|
}
|
|
}
|
|
|
|
return IterBoundCheck::kInbound;
|
|
}
|
|
|
|
// ============================================================================
|
|
// TrieIndexReader
|
|
// ============================================================================
|
|
|
|
TrieIndexReader::TrieIndexReader(const Comparator* comparator)
|
|
: comparator_(comparator), data_size_(0) {}
|
|
|
|
Status TrieIndexReader::InitFromSlice(const Slice& data) {
|
|
data_size_ = data.size();
|
|
return trie_.InitFromData(data);
|
|
}
|
|
|
|
std::unique_ptr<UserDefinedIndexIterator> TrieIndexReader::NewIterator(
|
|
const ReadOptions& /*read_options*/) {
|
|
return std::make_unique<TrieIndexIterator>(&trie_, comparator_,
|
|
trie_.HasSeqnoEncoding());
|
|
}
|
|
|
|
size_t TrieIndexReader::ApproximateMemoryUsage() const {
|
|
// The trie uses zero-copy pointers into the serialized data for bitvectors
|
|
// and handle arrays, so the base cost is the serialized data size. On top
|
|
// of that, InitFromData() heap-allocates child position lookup tables
|
|
// (s_child_start_pos_ and s_child_end_pos_) for Select-free sparse
|
|
// traversal — 8 bytes per sparse internal node.
|
|
return data_size_ + trie_.ApproximateAuxMemoryUsage();
|
|
}
|
|
|
|
// ============================================================================
|
|
// TrieIndexFactory
|
|
// ============================================================================
|
|
|
|
Status TrieIndexFactory::NewBuilder(
|
|
const UserDefinedIndexOption& option,
|
|
std::unique_ptr<UserDefinedIndexBuilder>& builder) const {
|
|
// The trie traverses keys byte-by-byte in lexicographic order, so it
|
|
// requires a bytewise comparator. Non-bytewise comparators (e.g.,
|
|
// ReverseBytewiseComparator or custom comparators) would produce separator
|
|
// keys in a different order than the trie's byte-level traversal, causing
|
|
// incorrect Seek results.
|
|
if (option.comparator != nullptr &&
|
|
option.comparator != BytewiseComparator()) {
|
|
return Status::NotSupported(
|
|
"TrieIndexFactory requires BytewiseComparator; got: ",
|
|
option.comparator->Name());
|
|
}
|
|
// Default to BytewiseComparator when null. The trie requires a bytewise
|
|
// comparator for separator key ordering; null would cause a dereference
|
|
// crash in AddIndexEntry when comparing keys.
|
|
const Comparator* cmp =
|
|
option.comparator ? option.comparator : BytewiseComparator();
|
|
builder = std::make_unique<TrieIndexBuilder>(cmp);
|
|
return Status::OK();
|
|
}
|
|
|
|
Status TrieIndexFactory::NewReader(
|
|
const UserDefinedIndexOption& option, Slice& index_block,
|
|
std::unique_ptr<UserDefinedIndexReader>& reader) const {
|
|
const Comparator* cmp =
|
|
option.comparator ? option.comparator : BytewiseComparator();
|
|
if (cmp != BytewiseComparator()) {
|
|
return Status::NotSupported(
|
|
"TrieIndexFactory requires BytewiseComparator; got: ", cmp->Name());
|
|
}
|
|
auto trie_reader = std::make_unique<TrieIndexReader>(cmp);
|
|
Status s = trie_reader->InitFromSlice(index_block);
|
|
if (!s.ok()) {
|
|
return s;
|
|
}
|
|
reader = std::move(trie_reader);
|
|
return Status::OK();
|
|
}
|
|
|
|
} // namespace trie_index
|
|
} // namespace ROCKSDB_NAMESPACE
|