Summary: Introduce new table option with separated key-value storage in data blocks. This PR implements a new SST block format where keys and values are stored in separate sections within data blocks, rather than interleaved. Keys are stored first, followed by all values in a contiguous section. The motivation is better cpu cache hit rate during seeks and potentially better compression. The additional storage cost is a varint per restart point, and 4 bytes additional block footer. For a data block with a restart interval of 16, it is approximately 1 bit of overhead per entry. But compression actually performs better, resulting in ~3% storage savings from benchmark. For now I've opted to not separate kvs in non-data blocks since restart interval for those blocks is typically 1, and values are typically small and probably better inlined. ### New block layout ``` +------------------+ | Keys Section | <- Key entries with delta encoding +------------------+ | Values Section | <- (new) Values stored contiguously +------------------+ | Restart Array | <- Fixed32 offsets to restart points +------------------+ | Values Offset | <- (new) 4 bytes: offset to values section | Footer | <- 4 bytes: packed index_type + num_restarts +------------------+ ``` ### Entry Format **At restart points** ``` +--------------+------------------+----------------+-----------------+-----------+ | shared (v32) | non_shared (v32) | value_sz (v32) | value_off (v32) | key_delta | +--------------+------------------+----------------+-----------------+-----------+ ``` **At non-restart points** ``` +--------------+------------------+----------------+-----------+ | shared (v32) | non_shared (v32) | value_sz (v32) | key_delta | +--------------+------------------+----------------+-----------+ ``` - `value_offset` is only stored at restart points to save space - For non-restart entries, value offset is computed as: `prev_value_offset + prev_value_size` ### Forward Compatibility - We make use of reserved block footer bits to mark if a block has separated kv format. Should an older version read this, it will assume a very large block restart interval and result in a corruption error. ### Key Changes - **BlockBuilder**: Accumulates values in a separate buffer; value offsets are stored only at restart points (other entries derive offset from previous value's position). There is an additional memcpy cost to place the value data after the key data. - **Block iteration**: Iteration now needs to know if we are at a restart point. This will rely on `cur_entry_idx_`, which was previously only used for per-kv checksum purposes. In this new format, we also need to know the block_restart_interval, which was previously also only calculated for per-kv checksums. - **Table properties**: Store `data_block_restart_interval`, `index_block_restart_interval`, and `separate_key_value_in_data_block` in table properties Pull Request resolved: https://github.com/facebook/rocksdb/pull/14287 Test Plan: - Extended block_test, table_test, compaction_test to contain new separated_kv param - Added new parameter to crash test --- ## Benchmark ### Varying Value Size **Write:** `db_bench --num=1000000 --separate_key_value_in_data_block=<bool> --value_size=<X> --benchmarks=fillrandom,compact` **Read:** `db_bench --db=$DB --use_existing_db --benchmarks=readrandom,readseq` | Value Size | FillRandom (ops/s) | | Compact (s) | | ReadRandom (ops/s) | | ReadSeq (ops/s) | | SST Size (MB) | | |----------:|-------------------:|-----:|----------:|-----:|-------------------:|-----:|----------------:|-----:|-------------:|-----:| | | baseline | sep_kv | baseline | sep_kv | baseline | sep_kv | baseline | sep_kv | baseline | sep_kv | | 1 | 253,280 | 264,977 | 0.516 | 0.497 (**-3.7%**) | 596,328 | 586,625 (**-1.6%**) | 5,904,681 | 6,069,581 (**+2.8%**) | 5.1 | 4.2 (**-18.6%**) | | 16 | 235,757 | 242,367 | 0.572 | 0.533 (**-6.8%**) | 570,751 | 572,815 (**+0.4%**) | 5,371,138 | 5,604,908 (**+4.4%**) | 14.7 | 13.4 (**-8.5%**) | | 100 | 264,299 | 265,427 | 0.461 | 0.454 (**-1.5%**) | 323,696 | 332,790 (**+2.8%**) | 4,239,725 | 4,232,416 (**-0.2%**) | 38.8 | 37.6 (**-3.2%**) | | 1,000 | 238,992 | 242,764 | 2.349 | 2.329 (**-0.9%**) | 244,608 | 261,403 (**+6.9%**) | 1,285,394 | 1,265,868 (**-1.5%**) | 342.1 | 342.0 (**-0.0%**) | ### Varying Block Restart Interval **Write:** `db_bench --num=1000000 --separate_key_value_in_data_block=<bool> --block_restart_interval=<X> --benchmarks=fillrandom,compact` **Read:** `db_bench --db=$DB --use_existing_db --benchmarks=readrandom,readseq` | BRI | FillRandom (ops/s) | | Compact (s) | | ReadRandom (ops/s) | | ReadSeq (ops/s) | | SST Size (MB) | | |----:|-------------------:|-----:|----------:|-----:|-------------------:|-----:|----------------:|-----:|-------------:|-----:| | | baseline | sep_kv | baseline | sep_kv | baseline | sep_kv | baseline | sep_kv | baseline | sep_kv | | 1 | 251,654 | 263,707 | 0.453 | 0.485 (**+7.1%**) | 334,653 | 328,708 (**-1.8%**) | 4,194,342 | 3,954,291 (**-5.7%**) | 40.6 | 42.4 (**+4.5%**) | | 4 | 253,797 | 252,394 | 0.476 | 0.476 (**+0.0%**) | 332,719 | 341,676 (**+2.7%**) | 4,135,691 | 4,051,151 (**-2.0%**) | 39.2 | 39.3 (**+0.3%**) | | 8 | 260,143 | 262,273 | 0.496 | 0.460 (**-7.3%**) | 330,859 | 337,567 (**+2.0%**) | 4,144,081 | 4,187,389 (**+1.0%**) | 38.9 | 38.1 (**-2.1%**) | | 16 | 252,875 | 263,176 | 0.464 | 0.455 (**-1.9%**) | 323,783 | 335,418 (**+3.6%**) | 4,127,310 | 4,217,028 (**+2.2%**) | 38.8 | 37.6 (**-3.2%**) | | 32 | 260,224 | 269,422 | 0.464 | 0.451 (**-2.8%**) | 304,001 | 314,989 (**+3.6%**) | 4,310,162 | 4,247,248 (**-1.5%**) | 38.8 | 37.3 (**-3.8%**) | ### Varying Compression **Write:** `db_bench --num=1000000 --separate_key_value_in_data_block=<bool> --compression_type=<X> [--compression_level=<N>] --benchmarks=fillrandom,compact` **Read:** `db_bench --db=$DB --use_existing_db --benchmarks=readrandom,readseq` | Compression | FillRandom (ops/s) | | Compact (s) | | ReadRandom (ops/s) | | ReadSeq (ops/s) | | SST Size (MB) | | |------------|-------------------:|-----:|----------:|-----:|-------------------:|-----:|----------------:|-----:|-------------:|-----:| | | baseline | sep_kv | baseline | sep_kv | baseline | sep_kv | baseline | sep_kv | baseline | sep_kv | | None | 252,494 | 260,552 | 0.413 | 0.419 (**+1.5%**) | 356,290 | 371,535 (**+4.3%**) | 4,479,261 | 4,507,133 (**+0.6%**) | 73.5 | 73.6 (**+0.2%**) | | LZ4 | 246,010 | 256,360 | 0.477 | 0.455 (**-4.6%**) | 342,497 | 345,882 (**+1.0%**) | 4,400,570 | 4,268,102 (**-3.0%**) | 38.3 | 37.6 (**-2.0%**) | | ZSTD (L3) | 254,748 | 258,556 | 1.067 | 1.055 (**-1.1%**) | 176,724 | 177,566 (**+0.5%**) | 2,736,841 | 2,717,739 (**-0.7%**) | 32.9 | 31.3 (**-4.7%**) | | ZSTD (L6) | 256,459 | 259,388 | 1.556 | 1.462 (**-6.0%**) | 177,390 | 176,691 (**-0.4%**) | 2,754,336 | 2,688,682 (**-2.4%**) | 32.8 | 31.1 (**-5.1%**) | ### Varying Block Size **Write:** `db_bench --num=1000000 --separate_key_value_in_data_block=<bool> --block_size=<X> --benchmarks=fillrandom,compact` **Read:** `db_bench --db=$DB --use_existing_db --benchmarks=readrandom,readseq` | Block Size | FillRandom (ops/s) | | Compact (s) | | ReadRandom (ops/s) | | ReadSeq (ops/s) | | SST Size (MB) | | |-----------:|-------------------:|-----:|----------:|-----:|-------------------:|-----:|----------------:|-----:|-------------:|-----:| | | baseline | sep_kv | baseline | sep_kv | baseline | sep_kv | baseline | sep_kv | baseline | sep_kv | | 4 KB | 263,203 | 260,362 | 0.469 | 0.461 (**-1.7%**) | 324,178 | 332,249 (**+2.5%**) | 4,231,537 | 4,217,763 (**-0.3%**) | 38.8 | 37.6 (**-3.1%**) | | 16 KB | 252,742 | 263,161 | 0.426 | 0.428 (**+0.5%**) | 227,805 | 222,873 (**-2.2%**) | 5,146,997 | 5,081,080 (**-1.3%**) | 38.1 | 36.7 (**-3.6%**) | | 64 KB | 257,490 | 260,225 | 0.423 | 0.414 (**-2.1%**) | 86,807 | 91,586 (**+5.5%**) | 5,380,403 | 5,372,372 (**-0.1%**) | 36.3 | 35.0 (**-3.5%**) | ### Varying Key Size **Write:** `db_bench --num=1000000 --separate_key_value_in_data_block=<bool> --min_key_size=10 --max_key_size=100 --benchmarks=fillrandom,compact` **Read:** `db_bench --db=$DB --use_existing_db --benchmarks=readrandom,readseq` | Key Size | FillRandom (ops/s) | | Compact (s) | | ReadRandom (ops/s) | | ReadSeq (ops/s) | | SST Size (MB) | | |---------:|-------------------:|-----:|----------:|-----:|-------------------:|-----:|----------------:|-----:|-------------:|-----:| | | baseline | sep_kv | baseline | sep_kv | baseline | sep_kv | baseline | sep_kv | baseline | sep_kv | | 10–100 | 243,740 | 255,183 | 0.618 | 0.622 (**+0.6%**) | 284,304 | 307,569 (**+8.2%**) | 3,738,921 | 3,686,676 (**-1.4%**) | 41.5 | 41.2 (**-0.8%**) | ### CPU Profile Notes - No compression: DataBlock::SeekForGet uses less cpu (13.2% vs 13.9%) - https://fburl.com/strobelight/6mwwebft with separated KV - https://fburl.com/strobelight/m9m798ka without - ZSTD compression: rocksdb::DecompressSerializedBlock uses more CPU (45.8% vs 44.9%), while DataBlock::SeekForGet uses less cpu (5.09% vs 6.52%) - https://fburl.com/strobelight/3x5nw1k4 with separated KV - https://fburl.com/strobelight/e7809046 without --- Reviewed By: xingbowang, pdillinger Differential Revision: D92103024 Pulled By: joshkang97 fbshipit-source-id: 47cfeb656ff3c20d34975f0b6c4c0462935a83dc
373 lines
12 KiB
C++
373 lines
12 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
//
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
//
|
|
// Encoding independent of machine byte order:
|
|
// * Fixed-length numbers are encoded with least-significant byte first
|
|
// (little endian, native order on Intel and others)
|
|
// * In addition we support variable length "varint" encoding
|
|
// * Strings are encoded prefixed by their length in varint format
|
|
//
|
|
// Some related functions are provided in coding_lean.h
|
|
|
|
#pragma once
|
|
#include <algorithm>
|
|
#include <string>
|
|
#include <type_traits>
|
|
|
|
#include "port/port.h"
|
|
#include "rocksdb/slice.h"
|
|
#include "util/cast_util.h"
|
|
#include "util/coding_lean.h"
|
|
|
|
// Some processors does not allow unaligned access to memory
|
|
#if defined(__sparc)
|
|
#define PLATFORM_UNALIGNED_ACCESS_NOT_ALLOWED
|
|
#endif
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
// The maximum length of a varint in bytes for 64-bit.
|
|
const uint32_t kMaxVarint64Length = 10;
|
|
|
|
// Standard Put... routines append to a string
|
|
void PutFixed16(std::string* dst, uint16_t value);
|
|
void PutFixed32(std::string* dst, uint32_t value);
|
|
void PutFixed64(std::string* dst, uint64_t value);
|
|
|
|
template <typename... Args>
|
|
void PutVarint32(std::string* dst, Args... args);
|
|
void PutVarint64(std::string* dst, uint64_t value);
|
|
void PutVarint64Varint64(std::string* dst, uint64_t value1, uint64_t value2);
|
|
void PutVarint32Varint64(std::string* dst, uint32_t value1, uint64_t value2);
|
|
void PutVarint32Varint32Varint64(std::string* dst, uint32_t value1,
|
|
uint32_t value2, uint64_t value3);
|
|
void PutLengthPrefixedSlice(std::string* dst, const Slice& value);
|
|
void PutLengthPrefixedSliceParts(std::string* dst,
|
|
const SliceParts& slice_parts);
|
|
void PutLengthPrefixedSlicePartsWithPadding(std::string* dst,
|
|
const SliceParts& slice_parts,
|
|
size_t pad_sz);
|
|
|
|
// Standard Get... routines parse a value from the beginning of a Slice
|
|
// and advance the slice past the parsed value.
|
|
bool GetFixed64(Slice* input, uint64_t* value);
|
|
bool GetFixed32(Slice* input, uint32_t* value);
|
|
bool GetFixed16(Slice* input, uint16_t* value);
|
|
bool GetVarint32(Slice* input, uint32_t* value);
|
|
bool GetVarint64(Slice* input, uint64_t* value);
|
|
bool GetVarsignedint64(Slice* input, int64_t* value);
|
|
bool GetLengthPrefixedSlice(Slice* input, Slice* result);
|
|
// This function assumes data is well-formed.
|
|
Slice GetLengthPrefixedSlice(const char* data);
|
|
|
|
Slice GetSliceUntil(Slice* slice, char delimiter);
|
|
|
|
// Borrowed from
|
|
// https://github.com/facebook/fbthrift/blob/449a5f77f9f9bae72c9eb5e78093247eef185c04/thrift/lib/cpp/util/VarintUtils-inl.h#L202-L208
|
|
constexpr inline uint64_t i64ToZigzag(const int64_t l) {
|
|
return (static_cast<uint64_t>(l) << 1) ^ static_cast<uint64_t>(l >> 63);
|
|
}
|
|
inline int64_t zigzagToI64(uint64_t n) {
|
|
return (n >> 1) ^ -static_cast<int64_t>(n & 1);
|
|
}
|
|
|
|
// Pointer-based variants of GetVarint... These either store a value
|
|
// in *v and return a pointer just past the parsed value, or return
|
|
// nullptr on error. These routines only look at bytes in the range
|
|
// [p..limit-1]
|
|
const char* GetVarint32Ptr(const char* p, const char* limit, uint32_t* v);
|
|
const char* GetVarint64Ptr(const char* p, const char* limit, uint64_t* v);
|
|
inline const char* GetVarsignedint64Ptr(const char* p, const char* limit,
|
|
int64_t* value) {
|
|
uint64_t u = 0;
|
|
const char* ret = GetVarint64Ptr(p, limit, &u);
|
|
*value = zigzagToI64(u);
|
|
return ret;
|
|
}
|
|
|
|
// Returns the length of the varint32 or varint64 encoding of "v"
|
|
uint16_t VarintLength(uint64_t v);
|
|
|
|
// Lower-level versions of Put... that write directly into a character buffer
|
|
// and return a pointer just past the last byte written.
|
|
// REQUIRES: dst has enough space for the value being written
|
|
char* EncodeVarint32(char* dst, uint32_t value);
|
|
char* EncodeVarint64(char* dst, uint64_t value);
|
|
|
|
// Internal routine for use by fallback path of GetVarint32Ptr
|
|
const char* GetVarint32PtrFallback(const char* p, const char* limit,
|
|
uint32_t* value);
|
|
inline const char* GetVarint32Ptr(const char* p, const char* limit,
|
|
uint32_t* value) {
|
|
if (p < limit) {
|
|
uint32_t result = *(lossless_cast<const unsigned char*>(p));
|
|
if ((result & 128) == 0) {
|
|
*value = result;
|
|
return p + 1;
|
|
}
|
|
}
|
|
return GetVarint32PtrFallback(p, limit, value);
|
|
}
|
|
|
|
// Pull the last 8 bits and cast it to a character
|
|
inline void PutFixed16(std::string* dst, uint16_t value) {
|
|
if (port::kLittleEndian) {
|
|
dst->append(const_cast<const char*>(reinterpret_cast<char*>(&value)),
|
|
sizeof(value));
|
|
} else {
|
|
char buf[sizeof(value)];
|
|
EncodeFixed16(buf, value);
|
|
dst->append(buf, sizeof(buf));
|
|
}
|
|
}
|
|
|
|
inline void PutFixed32(std::string* dst, uint32_t value) {
|
|
if (port::kLittleEndian) {
|
|
dst->append(const_cast<const char*>(reinterpret_cast<char*>(&value)),
|
|
sizeof(value));
|
|
} else {
|
|
char buf[sizeof(value)];
|
|
EncodeFixed32(buf, value);
|
|
dst->append(buf, sizeof(buf));
|
|
}
|
|
}
|
|
|
|
inline void PutFixed64(std::string* dst, uint64_t value) {
|
|
if (port::kLittleEndian) {
|
|
dst->append(const_cast<const char*>(reinterpret_cast<char*>(&value)),
|
|
sizeof(value));
|
|
} else {
|
|
char buf[sizeof(value)];
|
|
EncodeFixed64(buf, value);
|
|
dst->append(buf, sizeof(buf));
|
|
}
|
|
}
|
|
|
|
template <typename... Args>
|
|
inline void PutVarint32(std::string* dst, Args... args) {
|
|
static_assert((std::is_convertible_v<Args, uint32_t> && ...),
|
|
"All arguments must be convertible to uint32_t");
|
|
constexpr size_t kMaxBytesPerVarint32 = 5;
|
|
char buf[sizeof...(args) * kMaxBytesPerVarint32];
|
|
char* ptr = buf;
|
|
((ptr = EncodeVarint32(ptr, static_cast<uint32_t>(args))), ...);
|
|
dst->append(buf, static_cast<size_t>(ptr - buf));
|
|
}
|
|
|
|
inline char* EncodeVarint64(char* dst, uint64_t v) {
|
|
static const unsigned int B = 128;
|
|
unsigned char* ptr = lossless_cast<unsigned char*>(dst);
|
|
while (v >= B) {
|
|
*(ptr++) = (v & (B - 1)) | B;
|
|
v >>= 7;
|
|
}
|
|
*(ptr++) = static_cast<unsigned char>(v);
|
|
return lossless_cast<char*>(ptr);
|
|
}
|
|
|
|
inline void PutVarint64(std::string* dst, uint64_t v) {
|
|
char buf[kMaxVarint64Length];
|
|
char* ptr = EncodeVarint64(buf, v);
|
|
dst->append(buf, static_cast<size_t>(ptr - buf));
|
|
}
|
|
|
|
inline void PutVarsignedint64(std::string* dst, int64_t v) {
|
|
char buf[kMaxVarint64Length];
|
|
// Using Zigzag format to convert signed to unsigned
|
|
char* ptr = EncodeVarint64(buf, i64ToZigzag(v));
|
|
dst->append(buf, static_cast<size_t>(ptr - buf));
|
|
}
|
|
|
|
inline void PutVarint64Varint64(std::string* dst, uint64_t v1, uint64_t v2) {
|
|
char buf[20];
|
|
char* ptr = EncodeVarint64(buf, v1);
|
|
ptr = EncodeVarint64(ptr, v2);
|
|
dst->append(buf, static_cast<size_t>(ptr - buf));
|
|
}
|
|
|
|
inline void PutVarint32Varint64(std::string* dst, uint32_t v1, uint64_t v2) {
|
|
char buf[15];
|
|
char* ptr = EncodeVarint32(buf, v1);
|
|
ptr = EncodeVarint64(ptr, v2);
|
|
dst->append(buf, static_cast<size_t>(ptr - buf));
|
|
}
|
|
|
|
inline void PutVarint32Varint32Varint64(std::string* dst, uint32_t v1,
|
|
uint32_t v2, uint64_t v3) {
|
|
char buf[20];
|
|
char* ptr = EncodeVarint32(buf, v1);
|
|
ptr = EncodeVarint32(ptr, v2);
|
|
ptr = EncodeVarint64(ptr, v3);
|
|
dst->append(buf, static_cast<size_t>(ptr - buf));
|
|
}
|
|
|
|
inline void PutLengthPrefixedSlice(std::string* dst, const Slice& value) {
|
|
PutVarint32(dst, static_cast<uint32_t>(value.size()));
|
|
dst->append(value.data(), value.size());
|
|
}
|
|
|
|
inline void PutLengthPrefixedSliceParts(std::string* dst, size_t total_bytes,
|
|
const SliceParts& slice_parts) {
|
|
for (int i = 0; i < slice_parts.num_parts; ++i) {
|
|
total_bytes += slice_parts.parts[i].size();
|
|
}
|
|
PutVarint32(dst, static_cast<uint32_t>(total_bytes));
|
|
for (int i = 0; i < slice_parts.num_parts; ++i) {
|
|
dst->append(slice_parts.parts[i].data(), slice_parts.parts[i].size());
|
|
}
|
|
}
|
|
|
|
inline void PutLengthPrefixedSliceParts(std::string* dst,
|
|
const SliceParts& slice_parts) {
|
|
PutLengthPrefixedSliceParts(dst, /*total_bytes=*/0, slice_parts);
|
|
}
|
|
|
|
inline void PutLengthPrefixedSlicePartsWithPadding(
|
|
std::string* dst, const SliceParts& slice_parts, size_t pad_sz) {
|
|
PutLengthPrefixedSliceParts(dst, /*total_bytes=*/pad_sz, slice_parts);
|
|
dst->append(pad_sz, '\0');
|
|
}
|
|
|
|
inline uint16_t VarintLength(uint64_t v) {
|
|
uint16_t len = 1;
|
|
while (v >= 128) {
|
|
v >>= 7;
|
|
len++;
|
|
}
|
|
return len;
|
|
}
|
|
|
|
inline bool GetFixed64(Slice* input, uint64_t* value) {
|
|
if (input->size() < sizeof(uint64_t)) {
|
|
return false;
|
|
}
|
|
*value = DecodeFixed64(input->data());
|
|
input->remove_prefix(sizeof(uint64_t));
|
|
return true;
|
|
}
|
|
|
|
inline bool GetFixed32(Slice* input, uint32_t* value) {
|
|
if (input->size() < sizeof(uint32_t)) {
|
|
return false;
|
|
}
|
|
*value = DecodeFixed32(input->data());
|
|
input->remove_prefix(sizeof(uint32_t));
|
|
return true;
|
|
}
|
|
|
|
inline bool GetFixed16(Slice* input, uint16_t* value) {
|
|
if (input->size() < sizeof(uint16_t)) {
|
|
return false;
|
|
}
|
|
*value = DecodeFixed16(input->data());
|
|
input->remove_prefix(sizeof(uint16_t));
|
|
return true;
|
|
}
|
|
|
|
inline bool GetVarint32(Slice* input, uint32_t* value) {
|
|
const char* p = input->data();
|
|
const char* limit = p + input->size();
|
|
const char* q = GetVarint32Ptr(p, limit, value);
|
|
if (q == nullptr) {
|
|
return false;
|
|
} else {
|
|
*input = Slice(q, static_cast<size_t>(limit - q));
|
|
return true;
|
|
}
|
|
}
|
|
|
|
inline bool GetVarint64(Slice* input, uint64_t* value) {
|
|
const char* p = input->data();
|
|
const char* limit = p + input->size();
|
|
const char* q = GetVarint64Ptr(p, limit, value);
|
|
if (q == nullptr) {
|
|
return false;
|
|
} else {
|
|
*input = Slice(q, static_cast<size_t>(limit - q));
|
|
return true;
|
|
}
|
|
}
|
|
|
|
inline bool GetVarsignedint64(Slice* input, int64_t* value) {
|
|
const char* p = input->data();
|
|
const char* limit = p + input->size();
|
|
const char* q = GetVarsignedint64Ptr(p, limit, value);
|
|
if (q == nullptr) {
|
|
return false;
|
|
} else {
|
|
*input = Slice(q, static_cast<size_t>(limit - q));
|
|
return true;
|
|
}
|
|
}
|
|
|
|
inline bool GetLengthPrefixedSlice(Slice* input, Slice* result) {
|
|
uint32_t len = 0;
|
|
if (GetVarint32(input, &len) && input->size() >= len) {
|
|
*result = Slice(input->data(), len);
|
|
input->remove_prefix(len);
|
|
return true;
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
inline Slice GetLengthPrefixedSlice(const char* data) {
|
|
uint32_t len = 0;
|
|
// +5: we assume "data" is not corrupted
|
|
// unsigned char is 7 bits, uint32_t is 32 bits, need 5 unsigned char
|
|
auto p = GetVarint32Ptr(data, data + 5 /* limit */, &len);
|
|
return Slice(p, len);
|
|
}
|
|
|
|
inline Slice GetSliceUntil(Slice* slice, char delimiter) {
|
|
uint32_t len = 0;
|
|
for (len = 0; len < slice->size() && slice->data()[len] != delimiter; ++len) {
|
|
// nothing
|
|
}
|
|
|
|
Slice ret(slice->data(), len);
|
|
slice->remove_prefix(len + ((len < slice->size()) ? 1 : 0));
|
|
return ret;
|
|
}
|
|
|
|
template <class T>
|
|
#ifdef ROCKSDB_UBSAN_RUN
|
|
#if defined(__clang__)
|
|
__attribute__((__no_sanitize__("alignment")))
|
|
#elif defined(__GNUC__)
|
|
__attribute__((__no_sanitize_undefined__))
|
|
#endif
|
|
#endif
|
|
inline void PutUnaligned(T* memory, const T& value) {
|
|
#if defined(PLATFORM_UNALIGNED_ACCESS_NOT_ALLOWED)
|
|
char* nonAlignedMemory = reinterpret_cast<char*>(memory);
|
|
memcpy(nonAlignedMemory, reinterpret_cast<const char*>(&value), sizeof(T));
|
|
#else
|
|
*memory = value;
|
|
#endif
|
|
}
|
|
|
|
template <class T>
|
|
#ifdef ROCKSDB_UBSAN_RUN
|
|
#if defined(__clang__)
|
|
__attribute__((__no_sanitize__("alignment")))
|
|
#elif defined(__GNUC__)
|
|
__attribute__((__no_sanitize_undefined__))
|
|
#endif
|
|
#endif
|
|
inline void GetUnaligned(const T* memory, T* value) {
|
|
#if defined(PLATFORM_UNALIGNED_ACCESS_NOT_ALLOWED)
|
|
char* nonAlignedMemory = reinterpret_cast<char*>(value);
|
|
memcpy(nonAlignedMemory, reinterpret_cast<const char*>(memory), sizeof(T));
|
|
#else
|
|
*value = *memory;
|
|
#endif
|
|
}
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|