Summary: **Context/Summary:** This PR provides a new Options `track_and_verify_wals` to detect and handle WAL hole where new WAL data presents while some old WAL data is missing as well as db opened with no WAL. It's for https://github.com/facebook/rocksdb/issues/12488. It's intended to be a future replacement to `track_and_verify_wals_in_manifest` for its simplicity, better handling of WAL hole in `WALRecoveryMode::kPointInTimeRecovery` and potentials to cover more scenarios for `WALRecoveryMode::kTolerateCorruptedTailRecords/kAbsoluteConsistency`(in future PRs). The verification is done in `LogReader::MaybeVerifyPredecessorWALInfo()` and tracking is done in `log::Writer::MaybeAddPredecessorWALInfo()`. This PR also groups common utilities in `log::Writer` into functions `MaybeHandleSeenFileWriterError()`, `MaybeSwitchToNewBlock()` to avoid adding redundant code Pull Request resolved: https://github.com/facebook/rocksdb/pull/13226 Test Plan: - New UT - Integrate into existing UT - Intense rehearsal stress/crash test - db bench - The only potential performance implication it has is to the write path since now we keep track of the last seqno recorded in the WAL in `log::Writer`. Below benchmark show no regression. ``` ./db_bench --benchmarks=fillrandom[-X3] --num=2500000 --db=/dev/shm/db_bench_new --disable_auto_compactions=1 --threads=1 --enable_pipelined_write=0 --disable_wal=0 --track_and_verify_wals=1 Pre fillrandom [AVG 3 runs] : 310517 (± 5641) ops/sec; 34.4 (± 0.6) MB/sec fillrandom [MEDIAN 3 runs] : 308848 ops/sec; 34.2 MB/sec Post fillrandom [AVG 3 runs] : 311469 (± 4096) ops/sec; 34.5 (± 0.5) MB/sec fillrandom [MEDIAN 3 runs] : 311961 ops/sec; 34.5 MB/sec ``` Reviewed By: pdillinger Differential Revision: D67550260 Pulled By: hx235 fbshipit-source-id: 623e29bbe293ef03a45c20c348f84c8cb5bdaf91
64 lines
1.9 KiB
C++
64 lines
1.9 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
//
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
//
|
|
// Log format information shared by reader and writer.
|
|
// See ../doc/log_format.txt for more detail.
|
|
|
|
#pragma once
|
|
|
|
#include <cstdint>
|
|
|
|
#include "rocksdb/rocksdb_namespace.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
namespace log {
|
|
|
|
enum RecordType : uint8_t {
|
|
// Zero is reserved for preallocated files
|
|
kZeroType = 0,
|
|
kFullType = 1,
|
|
|
|
// For fragments
|
|
kFirstType = 2,
|
|
kMiddleType = 3,
|
|
kLastType = 4,
|
|
|
|
// For recycled log files
|
|
kRecyclableFullType = 5,
|
|
kRecyclableFirstType = 6,
|
|
kRecyclableMiddleType = 7,
|
|
kRecyclableLastType = 8,
|
|
|
|
// Compression Type
|
|
kSetCompressionType = 9,
|
|
|
|
// For all the values >= 10, the 1 bit indicates whether it's recyclable
|
|
// User-defined timestamp sizes
|
|
kUserDefinedTimestampSizeType = 10,
|
|
kRecyclableUserDefinedTimestampSizeType = 11,
|
|
|
|
// For WAL verification
|
|
kPredecessorWALInfoType = 130,
|
|
kRecyclePredecessorWALInfoType = 131,
|
|
};
|
|
// Unknown type of value with the 8-th bit set will be ignored
|
|
constexpr uint8_t kRecordTypeSafeIgnoreMask = 1 << 7;
|
|
constexpr uint8_t kMaxRecordType = kRecyclePredecessorWALInfoType;
|
|
|
|
constexpr unsigned int kBlockSize = 32768;
|
|
|
|
// Header is checksum (4 bytes), length (2 bytes), type (1 byte)
|
|
constexpr int kHeaderSize = 4 + 2 + 1;
|
|
|
|
// Recyclable header is checksum (4 bytes), length (2 bytes), type (1 byte),
|
|
// log number (4 bytes).
|
|
constexpr int kRecyclableHeaderSize = 4 + 2 + 1 + 4;
|
|
|
|
} // namespace log
|
|
} // namespace ROCKSDB_NAMESPACE
|