Summary:
`DBErrorHandlingFSTest.AtomicFlushNoSpaceError` is flaky due to seg fault during error recovery:
```
...
frame https://github.com/facebook/rocksdb/issues/5: 0x00007f0b3ea0a9d6 librocksdb.so.9.10`rocksdb::VersionSet::GetObsoleteFiles(std::vector<rocksdb::ObsoleteFileInfo, std::allocator<rocksdb::ObsoleteFileInfo>>*, std::vector<rocksdb::ObsoleteBlobFileInfo, std::allocator<rocksdb::ObsoleteBlobFileInfo>>*, std::vector<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>*, unsigned long) [inlined] std::vector<rocksdb::ObsoleteFileInfo, std::allocator<rocksdb::ObsoleteFileInfo>>::begin(this=<unavailable>) at stl_vector.h:812:16
frame https://github.com/facebook/rocksdb/issues/6: 0x00007f0b3ea0a9d6 librocksdb.so.9.10`rocksdb::VersionSet::GetObsoleteFiles(this=0x0000000000000000, files=size=0, blob_files=size=0, manifest_filenames=size=0, min_pending_output=18446744073709551615) at version_set.cc:7258:18
frame https://github.com/facebook/rocksdb/issues/7: 0x00007f0b3e8ccbc0 librocksdb.so.9.10`rocksdb::DBImpl::FindObsoleteFiles(this=<unavailable>, job_context=<unavailable>, force=<unavailable>, no_full_scan=<unavailable>) at db_impl_files.cc:162:30
frame https://github.com/facebook/rocksdb/issues/8: 0x00007f0b3e85e698 librocksdb.so.9.10`rocksdb::DBImpl::ResumeImpl(this=<unavailable>, context=<unavailable>) at db_impl.cc:434:20
frame https://github.com/facebook/rocksdb/issues/9: 0x00007f0b3e921516 librocksdb.so.9.10`rocksdb::ErrorHandler::RecoverFromBGError(this=<unavailable>, is_manual=<unavailable>) at error_handler.cc:632:46
```
I suspect this is due to DB being destructed and reopened during recovery. Specifically, the [ClearBGError() call](c72e79a262/db/db_impl/db_impl.cc (L425)
) can release and reacquire mutex, and DB can be closed during this time. So it's not safe to access DB state after ClearBGError(). There was a similar story in https://github.com/facebook/rocksdb/issues/9496. [Moving the obsolete files logic after ClearBGError()](https://github.com/facebook/rocksdb/pull/11955) probably makes the seg fault more easily triggered.
This PR updates `ClearBGError()` to guarantee that db close cannot finish until the method is returned and the mutex is released. So that we can safely access DB state after calling it.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/13234
Test Plan: I could not trigger the seg fault locally, will just monitor future test failures.
Reviewed By: jowlyzhang
Differential Revision: D67476836
Pulled By: cbi42
fbshipit-source-id: dfb3e9ccd4eb3d43fc596ec10e4052861eeec002
165 lines
5.6 KiB
C++
165 lines
5.6 KiB
C++
// Copyright (c) 2018-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
#pragma once
|
|
|
|
#include <sstream>
|
|
|
|
#include "monitoring/instrumented_mutex.h"
|
|
#include "options/db_options.h"
|
|
#include "rocksdb/io_status.h"
|
|
#include "rocksdb/listener.h"
|
|
#include "rocksdb/status.h"
|
|
#include "util/autovector.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
class DBImpl;
|
|
|
|
// This structure is used to store the DB recovery context. The context is
|
|
// the information that related to the recover actions. For example, it contains
|
|
// FlushReason, which tells the flush job why this flush is called.
|
|
struct DBRecoverContext {
|
|
FlushReason flush_reason;
|
|
bool flush_after_recovery;
|
|
|
|
DBRecoverContext()
|
|
: flush_reason(FlushReason::kErrorRecovery),
|
|
flush_after_recovery(false) {}
|
|
DBRecoverContext(FlushReason reason)
|
|
: flush_reason(reason), flush_after_recovery(false) {}
|
|
};
|
|
|
|
class ErrorHandler {
|
|
public:
|
|
ErrorHandler(DBImpl* db, const ImmutableDBOptions& db_options,
|
|
InstrumentedMutex* db_mutex)
|
|
: db_(db),
|
|
db_options_(db_options),
|
|
cv_(db_mutex),
|
|
end_recovery_(false),
|
|
recovery_thread_(nullptr),
|
|
db_mutex_(db_mutex),
|
|
auto_recovery_(false),
|
|
recovery_in_prog_(false),
|
|
soft_error_no_bg_work_(false),
|
|
allow_db_shutdown_(true),
|
|
is_db_stopped_(false),
|
|
bg_error_stats_(db_options.statistics) {
|
|
// Clear the checked flag for uninitialized errors
|
|
bg_error_.PermitUncheckedError();
|
|
recovery_error_.PermitUncheckedError();
|
|
}
|
|
|
|
void EnableAutoRecovery() { auto_recovery_ = true; }
|
|
|
|
Status::Severity GetErrorSeverity(BackgroundErrorReason reason,
|
|
Status::Code code, Status::SubCode subcode);
|
|
|
|
void SetBGError(const Status& bg_err, BackgroundErrorReason reason,
|
|
bool wal_related = false);
|
|
|
|
Status GetBGError() const { return bg_error_; }
|
|
|
|
Status GetRecoveryError() const { return recovery_error_; }
|
|
|
|
// REQUIREs: db mutex held
|
|
//
|
|
// Returns non-OK status if encountered error during recovery.
|
|
// Returns OK if bg error is successfully cleared. May releases and
|
|
// re-acquire db mutex to notify listeners. However, DB close (if initiated)
|
|
// will be blocked until db mutex is released after return.
|
|
Status ClearBGError();
|
|
|
|
bool IsDBStopped() { return is_db_stopped_.load(std::memory_order_acquire); }
|
|
|
|
bool IsBGWorkStopped() {
|
|
assert(db_mutex_);
|
|
db_mutex_->AssertHeld();
|
|
return !bg_error_.ok() &&
|
|
(bg_error_.severity() >= Status::Severity::kHardError ||
|
|
!auto_recovery_ || soft_error_no_bg_work_);
|
|
}
|
|
|
|
bool IsSoftErrorNoBGWork() { return soft_error_no_bg_work_; }
|
|
|
|
bool IsRecoveryInProgress() { return recovery_in_prog_; }
|
|
|
|
// REQUIRES: db mutex held
|
|
bool ReadyForShutdown() {
|
|
db_mutex_->AssertHeld();
|
|
return !recovery_in_prog_ && allow_db_shutdown_;
|
|
}
|
|
|
|
Status RecoverFromBGError(bool is_manual = false);
|
|
void CancelErrorRecoveryForShutDown();
|
|
|
|
void EndAutoRecovery();
|
|
|
|
void AddFilesToQuarantine(
|
|
autovector<const autovector<uint64_t>*> files_to_quarantine);
|
|
|
|
const autovector<uint64_t>& GetFilesToQuarantine() const {
|
|
db_mutex_->AssertHeld();
|
|
return files_to_quarantine_;
|
|
}
|
|
|
|
void ClearFilesToQuarantine();
|
|
|
|
private:
|
|
void RecordStats(
|
|
const std::vector<Tickers>& ticker_types,
|
|
const std::vector<std::tuple<Histograms, uint64_t>>& int_histograms);
|
|
|
|
DBImpl* db_;
|
|
const ImmutableDBOptions& db_options_;
|
|
Status bg_error_;
|
|
// A separate Status variable used to record any errors during the
|
|
// recovery process from hard errors
|
|
IOStatus recovery_error_;
|
|
// The condition variable used with db_mutex during auto resume for time
|
|
// wait.
|
|
InstrumentedCondVar cv_;
|
|
bool end_recovery_;
|
|
std::unique_ptr<port::Thread> recovery_thread_;
|
|
|
|
InstrumentedMutex* db_mutex_;
|
|
// A flag indicating whether automatic recovery from errors is enabled. Auto
|
|
// recovery applies for delegating to SstFileManager to handle no space type
|
|
// of errors. This flag doesn't control the auto resume behavior to recover
|
|
// from retryable IO errors.
|
|
bool auto_recovery_;
|
|
bool recovery_in_prog_;
|
|
// A flag to indicate that for the soft error, we should not allow any
|
|
// background work except the work is from recovery.
|
|
bool soft_error_no_bg_work_;
|
|
// Used in ClearBGError() to prevent DB from being closed.
|
|
bool allow_db_shutdown_;
|
|
|
|
// Used to store the context for recover, such as flush reason.
|
|
DBRecoverContext recover_context_;
|
|
std::atomic<bool> is_db_stopped_;
|
|
|
|
// The pointer of DB statistics.
|
|
std::shared_ptr<Statistics> bg_error_stats_;
|
|
|
|
// During recovery from manifest IO errors, files whose VersionEdits entries
|
|
// could be in an ambiguous state are quarantined and file deletion refrain
|
|
// from deleting them. Successful recovery will clear this vector. Files are
|
|
// added to this vector while DB mutex was locked, this data structure is
|
|
// unsorted.
|
|
autovector<uint64_t> files_to_quarantine_;
|
|
|
|
void HandleKnownErrors(const Status& bg_err, BackgroundErrorReason reason);
|
|
Status OverrideNoSpaceError(const Status& bg_error, bool* auto_recovery);
|
|
void RecoverFromNoSpace();
|
|
void StartRecoverFromRetryableBGIOError(const IOStatus& io_error);
|
|
void RecoverFromRetryableBGIOError();
|
|
// First, if it is in recovery and the recovery_error is ok. Set the
|
|
// recovery_error_ to bg_err. Second, if the severity is higher than the
|
|
// current bg_error_, overwrite it.
|
|
void CheckAndSetRecoveryAndBGError(const Status& bg_err);
|
|
};
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|