rocksdb/db/multi_scan.cc
anand76 8b84390517 Add upper bound support for forward scans in MultiScan (#13723)
Summary:
Respect the scan upper bound/limit, if specified, in `MultiScan`. This applies to block based table and other native RocksDB SSTs. In order to properly support it, the `MultiScan` object caches the `ReadOptions` passed by the user and sets the `iterate_upper_bound` as appropriate. We optimize for the case of either all scans specifying the upper bound, or none of them. In case of mixed scans, we reallocate the DB iterator anytime `ReadOptions` has to be updated.

Tests:
New unit tests in `db_iterator_test`

Pull Request resolved: https://github.com/facebook/rocksdb/pull/13723

Reviewed By: cbi42

Differential Revision: D77385049

Pulled By: anand1976

fbshipit-source-id: 9c02d125770cbedbe6e8c10767ba537e7f7540e1
2025-06-26 12:19:16 -07:00

70 lines
2.5 KiB
C++

// Copyright (c) Meta Platforms, Inc. and affiliates.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "rocksdb/db.h"
namespace ROCKSDB_NAMESPACE {
using MultiScanIterator = MultiScan::MultiScanIterator;
MultiScan::MultiScan(const ReadOptions& read_options,
const std::vector<ScanOptions>& scan_opts, DB* db,
ColumnFamilyHandle* cfh)
: read_options_(read_options), scan_opts_(scan_opts), db_(db), cfh_(cfh) {
bool slow_path = false;
// Setup read_options with iterate_uuper_bound based on the first scan.
// Subsequent scans will update and allocate a new DB iterator as necessary
if (scan_opts[0].range.limit) {
upper_bound_ = *scan_opts[0].range.limit;
read_options_.iterate_upper_bound = &upper_bound_;
} else {
read_options_.iterate_upper_bound = nullptr;
}
for (auto opts : scan_opts) {
// Check that all the ScanOptions either specify an upper bound or not. If
// its mixed we take the slow path which avoids calling Prepare: we have to
// reallocate the Iterator with updated read_options everytime we switch
// between upper bound or no upper bound, which complicates Prepare.
if (opts.range.limit.has_value() != scan_opts[0].range.limit.has_value()) {
slow_path = true;
break;
}
}
db_iter_.reset(db->NewIterator(read_options_, cfh));
if (!slow_path) {
db_iter_->Prepare(scan_opts);
}
}
MultiScanIterator& MultiScanIterator::operator++() {
if (idx_ >= scan_opts_.size()) {
throw std::logic_error("Index out of range");
}
idx_++;
if (idx_ < scan_opts_.size()) {
// Check if we need to update read_options_
if (scan_opts_[idx_].range.limit.has_value() !=
(read_options_.iterate_upper_bound != nullptr)) {
if (scan_opts_[idx_].range.limit) {
*upper_bound_ = *scan_opts_[idx_].range.limit;
read_options_.iterate_upper_bound = upper_bound_;
} else {
read_options_.iterate_upper_bound = nullptr;
}
db_iter_.reset(db_->NewIterator(read_options_, cfh_));
scan_.Reset(db_iter_.get());
} else if (scan_opts_[idx_].range.limit) {
*upper_bound_ = *scan_opts_[idx_].range.limit;
}
db_iter_->Seek(*scan_opts_[idx_].range.start);
status_ = db_iter_->status();
if (!status_.ok()) {
throw MultiScanException(status_);
}
}
return *this;
}
} // namespace ROCKSDB_NAMESPACE