Summary: as a preparation to support merge in [WBWIMemtable](d48af21386/memtable/wbwi_memtable.h (L31)
), this PR updates how we [order updates to the same key](d48af21386/utilities/write_batch_with_index/write_batch_with_index_internal.cc (L694-L697)
) in WriteBatchWithIndex. Specifically, the order is now reversed such that more recent update is ordered first. This will make iterating from WriteBatchWithIndex much easier since the key ordering in WBWI now matches internal key order where keys with larger sequence number are ordered first. The ordering is now explicitly documented above the declaration for `WriteBatchWithIndex` class. Places that use `WBWIIteratorImpl` and assume key ordering are updated. The rest is test and comments update. This will affect users who use WBWIIterator directly, the output of GetFromBatch, GetFromBatchAndDB or NewIteratorWithBase are not affected. Users are only affected if they may issue multiple updates to the same key. If WriteBatchWithIndex is created with `overwrite_key=true`, one the the updates needs to be Merge. Pull Request resolved: https://github.com/facebook/rocksdb/pull/13387 Test Plan: we have some good coverage of WBWI, I updated some existing tests and added a test for `WBWIIteratorImpl`. Reviewed By: pdillinger Differential Revision: D69421268 Pulled By: cbi42 fbshipit-source-id: d97eec4ee74aeac3937c9758041c7713f07f9676
151 lines
4.5 KiB
C++
151 lines
4.5 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
//
|
|
#pragma once
|
|
#include <algorithm>
|
|
#include <memory>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#include "rocksdb/db.h"
|
|
#include "rocksdb/slice.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
const std::vector<Slice> empty_operand_list;
|
|
|
|
// The merge context for merging a user key.
|
|
// When doing a Get(), DB will create such a class and pass it when
|
|
// issuing Get() operation to memtables and version_set. The operands
|
|
// will be fetched from the context when issuing partial of full merge.
|
|
class MergeContext {
|
|
public:
|
|
GetMergeOperandsOptions* get_merge_operands_options = nullptr;
|
|
|
|
// Clear all the operands
|
|
void Clear() {
|
|
if (operand_list_) {
|
|
operand_list_->clear();
|
|
copied_operands_->clear();
|
|
}
|
|
}
|
|
|
|
// Push a merge operand
|
|
void PushOperand(const Slice& operand_slice, bool operand_pinned = false) {
|
|
Initialize();
|
|
SetDirectionBackward();
|
|
|
|
if (operand_pinned) {
|
|
operand_list_->push_back(operand_slice);
|
|
} else {
|
|
// We need to have our own copy of the operand since it's not pinned
|
|
copied_operands_->emplace_back(
|
|
new std::string(operand_slice.data(), operand_slice.size()));
|
|
operand_list_->push_back(*copied_operands_->back());
|
|
}
|
|
}
|
|
|
|
// Push back a merge operand
|
|
void PushOperandBack(const Slice& operand_slice,
|
|
bool operand_pinned = false) {
|
|
Initialize();
|
|
SetDirectionForward();
|
|
|
|
if (operand_pinned) {
|
|
operand_list_->push_back(operand_slice);
|
|
} else {
|
|
// We need to have our own copy of the operand since it's not pinned
|
|
copied_operands_->emplace_back(
|
|
new std::string(operand_slice.data(), operand_slice.size()));
|
|
operand_list_->push_back(*copied_operands_->back());
|
|
}
|
|
}
|
|
|
|
// return total number of operands in the list
|
|
size_t GetNumOperands() const {
|
|
if (!operand_list_) {
|
|
return 0;
|
|
}
|
|
return operand_list_->size();
|
|
}
|
|
|
|
// Get the operand at the index.
|
|
Slice GetOperand(int index) const {
|
|
assert(operand_list_);
|
|
|
|
SetDirectionForward();
|
|
return (*operand_list_)[index];
|
|
}
|
|
|
|
// Same as GetOperandsDirectionForward
|
|
//
|
|
// Note that the returned reference is only good until another call
|
|
// to this MergeContext. If the returned value is needed for longer,
|
|
// a copy must be made.
|
|
const std::vector<Slice>& GetOperands() const {
|
|
return GetOperandsDirectionForward();
|
|
}
|
|
|
|
// Return all the operands in the order as they were merged (passed to
|
|
// FullMerge or FullMergeV2)
|
|
//
|
|
// Note that the returned reference is only good until another call
|
|
// to this MergeContext. If the returned value is needed for longer,
|
|
// a copy must be made.
|
|
const std::vector<Slice>& GetOperandsDirectionForward() const {
|
|
if (!operand_list_) {
|
|
return empty_operand_list;
|
|
}
|
|
|
|
SetDirectionForward();
|
|
return *operand_list_;
|
|
}
|
|
|
|
// Return all the operands in the reversed order relative to how they were
|
|
// merged (passed to FullMerge or FullMergeV2)
|
|
//
|
|
// Note that the returned reference is only good until another call
|
|
// to this MergeContext. If the returned value is needed for longer,
|
|
// a copy must be made.
|
|
const std::vector<Slice>& GetOperandsDirectionBackward() const {
|
|
if (!operand_list_) {
|
|
return empty_operand_list;
|
|
}
|
|
|
|
SetDirectionBackward();
|
|
return *operand_list_;
|
|
}
|
|
|
|
private:
|
|
void Initialize() {
|
|
if (!operand_list_) {
|
|
operand_list_.reset(new std::vector<Slice>());
|
|
copied_operands_.reset(new std::vector<std::unique_ptr<std::string>>());
|
|
}
|
|
}
|
|
|
|
void SetDirectionForward() const {
|
|
if (operands_reversed_ == true) {
|
|
std::reverse(operand_list_->begin(), operand_list_->end());
|
|
operands_reversed_ = false;
|
|
}
|
|
}
|
|
|
|
void SetDirectionBackward() const {
|
|
if (operands_reversed_ == false) {
|
|
std::reverse(operand_list_->begin(), operand_list_->end());
|
|
operands_reversed_ = true;
|
|
}
|
|
}
|
|
|
|
// List of operands, the order of operands depends on operands_reversed_.
|
|
mutable std::unique_ptr<std::vector<Slice>> operand_list_;
|
|
// Copy of operands that are not pinned.
|
|
std::unique_ptr<std::vector<std::unique_ptr<std::string>>> copied_operands_;
|
|
// Reversed means the newest update is ordered first.
|
|
mutable bool operands_reversed_ = true;
|
|
};
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|