Summary: This pull request implements the prediction aspect of auto-tuning compression in RocksDB, as part of Milestone 2. The goal is to optimize compression decisions to meet a given CPU and IO budget, based on the predicted CPU time and result compression ratio for compression decisions on a data block. Pull Request resolved: https://github.com/facebook/rocksdb/pull/13711 Test Plan: Ran benchmark tests to evaluate performance impact of new algorithm Verified that optimization does not compromise overall system performance ```bash SUFFIX=`tty | sed 's|/|_|g'`; for ARGS in "-compression_parallel_threads=1 -compression_type=zstd -compression_manager=none" "-compression_parallel_threads=4 -compression_type=zstd -compression_manager=none" "-compression_parallel_threads=1 -compression_type=zstd -compression_manager=costpredictor" "-compression_parallel_threads=4 -compression_type=zstd -compression_manager=costpredictor" ; do echo $ARGS; (for I in `seq 1 20`; do ./db_bench -db=/dev/shm/dbbench$SUFFIX --benchmarks=fillseq -num=10000000 -compaction_style=2 -fifo_compaction_max_table_files_size_mb=1000 -fifo_compaction_allow_compaction=0 -disable_wal -write_buffer_size=12000000 $ARGS 2>&1 | grep micros/op; done) | awk '{n++; sum += $5;} END { print int(sum / n); }'; done ``` parallel threads | 1 | 4 -- | -- | -- master branch | 1076660.5 ops | 1668411.3 ops new code compression manager="none" | 1057155.35 ops (-1.81%) | 1648664.2 ops (-1.18%) new code compression manager="costpredictor" | 1080794.8 ops (0.38%)| 1652720.35 ops (-0.94%) Used the mean absolute percentage error (MAPE) to show accuracy of the predictor. ```bash ./db_bench --db=/dev/shm/dbbench$SUFFIX --benchmarks=fillseq --compaction_style=2 --num=10000000 --fifo_compaction_max_table_files_size_mb=1000 --fifo_compaction_allow_compaction=0 --disable_wal --write_buffer_size=12000000 --statistics --stats_level=5 --value_size=2000 --compression_manager=costpredictor --compression_type=zstd --progress_reports=false 2>&1 | tee /tmp/predict.log ``` compression_name | compression_level | MAPE (cpu cost) | MAPE (io cost) | average measured_time (micro sec) | average predicted_time (micro sec) | average measured_io (bytes) | average predicted_io (bytes) -- | -- | -- | -- | -- | -- | -- | -- Snappy | 0 | 16.979548 | 3.138885 | 3.639488 | 2.98755 | 2257.655152 | 2178.070375 LZ4 | 1 | 15.508632 | 3.103681 | 4.733639 | 4.010361 | 2257.803299 | 2179.82233 LZ4 | 4 | 15.471204 | 3.102158 | 4.731955 | 4.006011 | 2258.529203 | 2179.778441 LZ4 | 9 | 15.429305 | 3.09599 | 4.729104 | 4.007059 | 2257.822368 | 2179.927506 LZ4HC | 1 | 7.254545 | 3.112858 | 79.64412 | 76.603272 | 2258.636774 | 2177.464922 LZ4HC | 4 | 7.249132 | 3.085802 | 79.591264 | 76.576416 | 2255.098757 | 2176.126082 LZ4HC | 9 | 7.248921 | 3.09695 | 79.719061 | 76.614155 | 2253.772057 | 2175.882686 ZSTD | 1 | 8.728305 | 3.223971 | 18.93434 | 17.882706 | 1957.773706 | 1890.895071 ZSTD | 15 | 4.853552 | 3.238199 | 329.396574 | 318.277613 | 1918.021616 | 1853.833546 ZSTD | 22 | 4.275209 | 3.243137 | 625.471394 | 596.254939 | 1919.035477 | 1853.44902 ```bash ./db_bench --db=/dev/shm/dbbench$SUFFIX --benchmarks=fillseq --compaction_style=2 --num=10000000 --fifo_compaction_max_table_files_size_mb=1000 --fifo_compaction_allow_compaction=0 --disable_wal --write_buffer_size=12000000 --statistics --stats_level=5 --value_size=2000 --compression_manager=costpredictor --compression_type=zstd --progress_reports=false --write_buffer_size=140737488355328 --block_size=16382 ``` Increasing the block size i.e. doubling the measured time reduces the MAPE by half. compression_name | compression_level | MAPE (cpu cost) | MAPE (io cost) | average measured_time (micro sec) | average predicted_time (micro sec) | average measured_io (bytes) | average predicted_io (bytes) -- | -- | -- | -- | -- | -- | -- | -- Snappy | 0 | 7.933944 | 0.061173 | 7.187587 | 6.815071 | 4466.536629 | 4465.925648 LZ4 | 1 | 5.614279 | 0.050215 | 8.526641 | 8.14445 | 4473.768752 | 4473.159792 LZ4 | 4 | 5.617925 | 0.050317 | 8.525155 | 8.144209 | 4473.772343 | 4473.159782 LZ4 | 9 | 5.65519 | 0.050249 | 8.530569 | 8.14836 | 4473.762187 | 4473.150695 LZ4HC | 1 | 4.259648 | 0.028564 | 98.273778 | 97.820515 | 4471.691596 | 4471.05918 LZ4HC | 4 | 4.269529 | 0.027665 | 98.240579 | 97.788721 | 4465.537078 | 4464.901328 LZ4HC | 9 | 4.274553 | 0.027555 | 98.319357 | 97.8637 | 4465.539437 | 4464.903889 ZSTD | 1 | 4.909716 | 0.155441 | 29.503133 | 29.047057 | 3713.562704 | 3712.978633 ZSTD | 15 | 1.310407 | 0.162864 | 643.803097 | 635.960631 | 3797.544307 | 3705.772419 ZSTD | 22 | 1.011497 | 0.155876 | 1221.189822 | 1220.693678 | 3705.556448 | 3704.972332 Reviewed By: hx235 Differential Revision: D77065528 Pulled By: shubhajeet fbshipit-source-id: f7f4ae018f786bfeae3eacf0135055c63e142610
146 lines
4.7 KiB
C++
146 lines
4.7 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
//
|
|
#pragma once
|
|
#include "monitoring/statistics_impl.h"
|
|
#include "rocksdb/system_clock.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
// Auto-scoped.
|
|
// When statistics is not nullptr, records the measured time into any enabled
|
|
// histograms supplied to the constructor. A histogram argument may be omitted
|
|
// by setting it to Histograms::HISTOGRAM_ENUM_MAX. It is also saved into
|
|
// *elapsed if the pointer is not nullptr and overwrite is true, it will be
|
|
// added to *elapsed if overwrite is false.
|
|
class StopWatch {
|
|
public:
|
|
StopWatch(SystemClock* clock, Statistics* statistics,
|
|
const uint32_t hist_type_1,
|
|
const uint32_t hist_type_2 = Histograms::HISTOGRAM_ENUM_MAX,
|
|
uint64_t* elapsed = nullptr, bool overwrite = true,
|
|
bool delay_enabled = false)
|
|
: clock_(clock),
|
|
statistics_(statistics),
|
|
hist_type_1_(statistics && statistics->HistEnabledForType(hist_type_1)
|
|
? hist_type_1
|
|
: Histograms::HISTOGRAM_ENUM_MAX),
|
|
hist_type_2_(statistics && statistics->HistEnabledForType(hist_type_2)
|
|
? hist_type_2
|
|
: Histograms::HISTOGRAM_ENUM_MAX),
|
|
elapsed_(elapsed),
|
|
overwrite_(overwrite),
|
|
stats_enabled_(statistics &&
|
|
statistics->get_stats_level() >
|
|
StatsLevel::kExceptTimers &&
|
|
(hist_type_1_ != Histograms::HISTOGRAM_ENUM_MAX ||
|
|
hist_type_2_ != Histograms::HISTOGRAM_ENUM_MAX)),
|
|
delay_enabled_(delay_enabled),
|
|
total_delay_(0),
|
|
delay_start_time_(0),
|
|
start_time_((stats_enabled_ || elapsed != nullptr) ? clock->NowMicros()
|
|
: 0) {}
|
|
|
|
~StopWatch() {
|
|
if (elapsed_) {
|
|
if (overwrite_) {
|
|
*elapsed_ = clock_->NowMicros() - start_time_;
|
|
} else {
|
|
*elapsed_ += clock_->NowMicros() - start_time_;
|
|
}
|
|
}
|
|
if (elapsed_ && delay_enabled_) {
|
|
*elapsed_ -= total_delay_;
|
|
}
|
|
if (stats_enabled_) {
|
|
const auto time = (elapsed_ != nullptr)
|
|
? *elapsed_
|
|
: (clock_->NowMicros() - start_time_);
|
|
if (hist_type_1_ != Histograms::HISTOGRAM_ENUM_MAX) {
|
|
statistics_->reportTimeToHistogram(hist_type_1_, time);
|
|
}
|
|
if (hist_type_2_ != Histograms::HISTOGRAM_ENUM_MAX) {
|
|
statistics_->reportTimeToHistogram(hist_type_2_, time);
|
|
}
|
|
}
|
|
}
|
|
|
|
void DelayStart() {
|
|
// if delay_start_time_ is not 0, it means we are already tracking delay,
|
|
// so delay_start_time_ should not be overwritten
|
|
if (elapsed_ && delay_enabled_ && delay_start_time_ == 0) {
|
|
delay_start_time_ = clock_->NowMicros();
|
|
}
|
|
}
|
|
|
|
void DelayStop() {
|
|
if (elapsed_ && delay_enabled_ && delay_start_time_ != 0) {
|
|
total_delay_ += clock_->NowMicros() - delay_start_time_;
|
|
}
|
|
// reset to 0 means currently no delay is being tracked, so two consecutive
|
|
// calls to DelayStop will not increase total_delay_
|
|
delay_start_time_ = 0;
|
|
}
|
|
|
|
uint64_t GetDelay() const { return delay_enabled_ ? total_delay_ : 0; }
|
|
|
|
uint64_t start_time() const { return start_time_; }
|
|
|
|
private:
|
|
SystemClock* clock_;
|
|
Statistics* statistics_;
|
|
const uint32_t hist_type_1_;
|
|
const uint32_t hist_type_2_;
|
|
uint64_t* elapsed_;
|
|
bool overwrite_;
|
|
bool stats_enabled_;
|
|
bool delay_enabled_;
|
|
uint64_t total_delay_;
|
|
uint64_t delay_start_time_;
|
|
const uint64_t start_time_;
|
|
};
|
|
|
|
// a nano second precision stopwatch
|
|
template <bool use_cpu_time = false>
|
|
class StopWatchNano {
|
|
public:
|
|
explicit StopWatchNano(SystemClock* clock, bool auto_start = false)
|
|
: clock_(clock), start_(0) {
|
|
if (auto_start) {
|
|
Start();
|
|
}
|
|
}
|
|
void Start() {
|
|
if constexpr (use_cpu_time) {
|
|
start_ = clock_->CPUNanos();
|
|
} else {
|
|
start_ = clock_->NowNanos();
|
|
}
|
|
}
|
|
uint64_t ElapsedNanos(bool reset = false) {
|
|
uint64_t now = 0;
|
|
if constexpr (use_cpu_time) {
|
|
now = clock_->CPUNanos();
|
|
} else {
|
|
now = clock_->NowNanos();
|
|
}
|
|
auto elapsed = now - start_;
|
|
if (reset) {
|
|
start_ = now;
|
|
}
|
|
return elapsed;
|
|
}
|
|
uint64_t ElapsedNanosSafe(bool reset = false) {
|
|
return (clock_ != nullptr) ? ElapsedNanos(reset) : 0U;
|
|
}
|
|
bool IsStarted() { return start_ != 0; }
|
|
uint64_t ElapsedMicros(bool reset = false) {
|
|
return ElapsedNanos(reset) / 1000;
|
|
}
|
|
|
|
private:
|
|
SystemClock* clock_;
|
|
uint64_t start_;
|
|
};
|
|
} // namespace ROCKSDB_NAMESPACE
|