rocksdb/util/simple_mixed_compressor.cc
Peter Dillinger 6a79e02ebd Support pre-defined compression dictionaries (#14253)
Summary:
... in addition to those derived from samples. This could be useful when trade-offs favor an offline trained dictionary that's good for the whole work load, which can involve heavy-weight training, vs. on-the-fly training on samples for each file, which has limitations.

This involves some breaking changes to some deeper parts of the new compression API. I'm not concerned about performance because this doesn't touch the per-block parts of the API, just the per-file parts.

Bonus: change to
CompressionManagerWrapper::FindCompatibleCompressionManager to implement what is likely the preferred behavior.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/14253

Test Plan: unit test included

Reviewed By: hx235

Differential Revision: D91082208

Pulled By: pdillinger

fbshipit-source-id: 1442db65e15c9435437204c19787c96f7a40a207
2026-01-23 10:01:50 -08:00

119 lines
4.3 KiB
C++

// Copyright (c) Meta Platforms, Inc. and affiliates.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Creates mixed compressor wrapper which uses multiple compression algorithm
// within same SST file.
#include "simple_mixed_compressor.h"
#include <options/options_helper.h>
#include "random.h"
#include "rocksdb/advanced_compression.h"
namespace ROCKSDB_NAMESPACE {
// MultiCompressorWrapper implementation
MultiCompressorWrapper::MultiCompressorWrapper(const CompressionOptions& opts)
: opts_(opts) {
// TODO: make the compression manager a field
auto builtInManager = GetBuiltinV2CompressionManager();
const auto& compressions = GetSupportedCompressions();
for (auto type : compressions) {
if (type == kNoCompression) {
continue;
}
compressors_.push_back(builtInManager->GetCompressor(opts, type));
}
}
Compressor::DictConfig MultiCompressorWrapper::GetDictGuidance(
CacheEntryRole block_type) const {
return compressors_.back()->GetDictGuidance(block_type);
}
Slice MultiCompressorWrapper::GetSerializedDict() const {
return compressors_.back()->GetSerializedDict();
}
CompressionType MultiCompressorWrapper::GetPreferredCompressionType() const {
return compressors_.back()->GetPreferredCompressionType();
}
Compressor::ManagedWorkingArea MultiCompressorWrapper::ObtainWorkingArea() {
return compressors_.back()->ObtainWorkingArea();
}
std::unique_ptr<Compressor> MultiCompressorWrapper::MaybeCloneSpecialized(
CacheEntryRole block_type, DictConfigArgs&& dict_config) const {
// TODO: full dictionary compression support. Currently this just falls
// back on a non-multi compressor when asked to use a dictionary.
return compressors_.back()->MaybeCloneSpecialized(block_type,
std::move(dict_config));
}
// RandomMixedCompressor implementation
const char* RandomMixedCompressor::Name() const {
return "RandomMixedCompressor";
}
std::unique_ptr<Compressor> RandomMixedCompressor::Clone() const {
return std::make_unique<RandomMixedCompressor>(opts_);
}
Status RandomMixedCompressor::CompressBlock(
Slice uncompressed_data, char* compressed_output,
size_t* compressed_output_size, CompressionType* out_compression_type,
ManagedWorkingArea* wa) {
auto selected =
Random::GetTLSInstance()->Uniform(static_cast<int>(compressors_.size()));
auto& compressor = compressors_[selected];
return compressor->CompressBlock(uncompressed_data, compressed_output,
compressed_output_size, out_compression_type,
wa);
}
const char* RandomMixedCompressionManager::Name() const {
return "RandomMixedCompressionManager";
}
std::unique_ptr<Compressor> RandomMixedCompressionManager::GetCompressorForSST(
const FilterBuildingContext& /*context*/, const CompressionOptions& opts,
CompressionType /*preferred*/) {
return std::make_unique<RandomMixedCompressor>(opts);
}
// RoundRobinCompressor implementation
const char* RoundRobinCompressor::Name() const {
return "RoundRobinCompressor";
}
std::unique_ptr<Compressor> RoundRobinCompressor::Clone() const {
return std::make_unique<RoundRobinCompressor>(opts_);
}
Status RoundRobinCompressor::CompressBlock(
Slice uncompressed_data, char* compressed_output,
size_t* compressed_output_size, CompressionType* out_compression_type,
ManagedWorkingArea* wa) {
auto counter = block_counter.FetchAddRelaxed(1);
auto sel_idx = counter % (compressors_.size());
auto& compressor = compressors_[sel_idx];
return compressor->CompressBlock(uncompressed_data, compressed_output,
compressed_output_size, out_compression_type,
wa);
}
RelaxedAtomic<uint64_t> RoundRobinCompressor::block_counter{0};
// RoundRobinManager implementation
const char* RoundRobinManager::Name() const { return "RoundRobinManager"; }
std::unique_ptr<Compressor> RoundRobinManager::GetCompressorForSST(
const FilterBuildingContext& /*context*/, const CompressionOptions& opts,
CompressionType /*preferred*/) {
return std::make_unique<RoundRobinCompressor>(opts);
}
} // namespace ROCKSDB_NAMESPACE