rocksdb/table/block_based/block_prefetcher.cc
ngina 3687dc4ad3 Add prefetch feature enum to FSSupportedOps (#13917)
Summary:
**Problem:** RocksDB was making unnecessary prefetch system calls on file systems that don't support prefetch operations, potentially leading to wasted CPU cycles.

**Fix:** Add kFSPrefetch to FSSupportedOps enum to allow file systems to indicate prefetch support capability. File systems can now opt out of prefetch calls by not setting this field.

**Backwards compatibility:** File systems that don't override SupportedOps() continue to receive prefetch calls exactly as before. Only file systems that explicitly opt out by not setting kFSPrefetch will avoid the calls.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/13917

Test Plan:
- Added a new test in block_based_table_reader.
- Run existing tests: ```make prefetch_test && ./prefetch_test```

Reviewed By: anand1976

Differential Revision: D81607145

Pulled By: nmk70

fbshipit-source-id: 3bbefa05919034e8776ea4e4540cdc695cdc6d3f
2025-10-17 19:54:49 -07:00

168 lines
6.3 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "table/block_based/block_prefetcher.h"
#include "rocksdb/file_system.h"
#include "table/block_based/block_based_table_reader.h"
namespace ROCKSDB_NAMESPACE {
void BlockPrefetcher::PrefetchIfNeeded(
const BlockBasedTable::Rep* rep, const BlockHandle& handle,
const size_t readahead_size, bool is_for_compaction,
const bool no_sequential_checking, const ReadOptions& read_options,
const std::function<void(bool, uint64_t&, uint64_t&)>& readaheadsize_cb,
bool is_async_io_prefetch) {
if (read_options.read_tier == ReadTier::kBlockCacheTier) {
// Disable prefetching when IO disallowed. (Note that we haven't allocated
// any buffers yet despite the various tracked settings.)
return;
}
ReadaheadParams readahead_params;
readahead_params.initial_readahead_size = readahead_size;
readahead_params.max_readahead_size = readahead_size;
readahead_params.num_buffers = is_async_io_prefetch ? 2 : 1;
const size_t len = BlockBasedTable::BlockSizeWithTrailer(handle);
const size_t offset = handle.offset();
if (is_for_compaction) {
if (!rep->file->use_direct_io() && compaction_readahead_size_ > 0) {
// If FS supports prefetching (readahead_limit_ will be non zero in that
// case) and current block exists in prefetch buffer then return.
if (offset + len <= readahead_limit_) {
return;
}
IOOptions opts;
IODebugContext dbg;
Status s = rep->file->PrepareIOOptions(read_options, opts, &dbg);
if (!s.ok()) {
return;
}
if (rep->fs_prefetch_support) {
s = rep->file->Prefetch(opts, offset, len + compaction_readahead_size_);
if (s.ok()) {
readahead_limit_ = offset + len + compaction_readahead_size_;
return;
} else if (!s.IsNotSupported()) {
return;
}
// If FS prefetch returned NotSupported despite feature bit being set,
// fall through to use internal prefetch buffer.
}
}
// If FS prefetch is not supported, fall back to use internal prefetch
// buffer.
//
// num_file_reads is used by FilePrefetchBuffer only when
// implicit_auto_readahead is set.
readahead_params.initial_readahead_size = compaction_readahead_size_;
readahead_params.max_readahead_size = compaction_readahead_size_;
rep->CreateFilePrefetchBufferIfNotExists(
readahead_params, &prefetch_buffer_,
/*readaheadsize_cb=*/nullptr,
/*usage=*/FilePrefetchBufferUsage::kCompactionPrefetch);
return;
}
// Explicit user requested readahead.
if (readahead_size > 0) {
rep->CreateFilePrefetchBufferIfNotExists(
readahead_params, &prefetch_buffer_, readaheadsize_cb,
/*usage=*/FilePrefetchBufferUsage::kUserScanPrefetch);
return;
}
// Implicit readahead.
// If max_auto_readahead_size is set to be 0 by user, no data will be
// prefetched.
size_t max_auto_readahead_size = rep->table_options.max_auto_readahead_size;
if (max_auto_readahead_size == 0 || initial_auto_readahead_size_ == 0) {
return;
}
if (initial_auto_readahead_size_ > max_auto_readahead_size) {
initial_auto_readahead_size_ = max_auto_readahead_size;
}
readahead_params.initial_readahead_size = initial_auto_readahead_size_;
readahead_params.max_readahead_size = max_auto_readahead_size;
readahead_params.implicit_auto_readahead = true;
readahead_params.num_file_reads_for_auto_readahead =
rep->table_options.num_file_reads_for_auto_readahead;
// In case of no_sequential_checking, it will skip the num_file_reads_ and
// will always creates the FilePrefetchBuffer.
if (no_sequential_checking) {
rep->CreateFilePrefetchBufferIfNotExists(
readahead_params, &prefetch_buffer_, readaheadsize_cb,
/*usage=*/FilePrefetchBufferUsage::kUserScanPrefetch);
return;
}
// If FS supports prefetching (readahead_limit_ will be non zero in that case)
// and current block exists in prefetch buffer then return.
if (offset + len <= readahead_limit_) {
UpdateReadPattern(offset, len);
return;
}
if (!IsBlockSequential(offset)) {
UpdateReadPattern(offset, len);
ResetValues(rep->table_options.initial_auto_readahead_size);
return;
}
UpdateReadPattern(offset, len);
// Implicit auto readahead, which will be enabled if the number of reads
// reached `table_options.num_file_reads_for_auto_readahead` (default: 2) and
// scans are sequential.
num_file_reads_++;
if (num_file_reads_ <= rep->table_options.num_file_reads_for_auto_readahead) {
return;
}
readahead_params.num_file_reads = num_file_reads_;
if (rep->file->use_direct_io()) {
rep->CreateFilePrefetchBufferIfNotExists(
readahead_params, &prefetch_buffer_, readaheadsize_cb,
/*usage=*/FilePrefetchBufferUsage::kUserScanPrefetch);
return;
}
if (readahead_size_ > max_auto_readahead_size) {
readahead_size_ = max_auto_readahead_size;
}
// If prefetch is not supported, fall back to use internal prefetch buffer.
IOOptions opts;
Status s = rep->file->PrepareIOOptions(read_options, opts);
if (!s.ok()) {
return;
}
if (rep->fs_prefetch_support) {
s = rep->file->Prefetch(
opts, handle.offset(),
BlockBasedTable::BlockSizeWithTrailer(handle) + readahead_size_);
if (s.ok()) {
readahead_limit_ = offset + len + readahead_size_;
// Keep exponentially increasing readahead size until
// max_auto_readahead_size.
readahead_size_ = std::min(max_auto_readahead_size, readahead_size_ * 2);
return;
}
}
// If FS prefetch is not supported or returned NotSupported, fall back to use
// internal prefetch buffer.
rep->CreateFilePrefetchBufferIfNotExists(
readahead_params, &prefetch_buffer_, readaheadsize_cb,
/*usage=*/FilePrefetchBufferUsage::kUserScanPrefetch);
}
} // namespace ROCKSDB_NAMESPACE