2019-06-20 07:17:21 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <memory>
|
|
|
|
#include <vector>
|
2019-09-03 20:27:46 +00:00
|
|
|
#include <cstdint>
|
|
|
|
|
2019-06-20 07:17:21 +00:00
|
|
|
namespace Poco
|
|
|
|
{
|
|
|
|
namespace Util
|
|
|
|
{
|
|
|
|
class AbstractConfiguration;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-09-06 17:48:27 +00:00
|
|
|
/// SensitiveDataMasker allows to remove sensitive data from queries using set of regexp-based rules
|
|
|
|
|
2020-06-27 19:05:00 +00:00
|
|
|
/// It's used as a singleton via getInstance method
|
2019-09-06 17:48:27 +00:00
|
|
|
|
|
|
|
/// Initially it's empty (nullptr) and after manual initialization
|
|
|
|
/// (one-time, done by setInstance call) it takes the proper value which
|
|
|
|
/// is stored in unique_ptr.
|
|
|
|
|
2020-06-27 19:05:00 +00:00
|
|
|
/// It looks like the singleton is the best option here, as
|
2019-09-06 17:48:27 +00:00
|
|
|
/// two users of that object (OwnSplitChannel & Interpreters/executeQuery)
|
2020-08-08 00:47:03 +00:00
|
|
|
/// can't own/share that Masker properly without synchronization & locks,
|
2019-09-06 17:48:27 +00:00
|
|
|
/// and we can't afford setting global locks for each logged line.
|
|
|
|
|
|
|
|
/// I've considered singleton alternatives, but it's unclear who should own the object,
|
|
|
|
/// and it introduce unnecessary complexity in implementation (passing references back and forward):
|
|
|
|
///
|
|
|
|
/// context can't own, as Context is destroyed before logger,
|
|
|
|
/// and logger lives longer and logging can still happen after Context destruction.
|
|
|
|
/// resetting masker in the logger at the moment of
|
2022-04-17 23:02:49 +00:00
|
|
|
/// context destruction can't be done without synchronization / locks in a safe manner.
|
2019-09-06 17:48:27 +00:00
|
|
|
///
|
|
|
|
/// logger is Poco derived and i didn't want to brake it's interface,
|
|
|
|
/// also logger can be dynamically reconfigured without server restart,
|
|
|
|
/// and it actually recreates OwnSplitChannel when reconfiguration happen,
|
|
|
|
/// so that makes it's quite tricky. So it a bad candidate for owning masker too.
|
|
|
|
|
2019-06-20 07:17:21 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
class SensitiveDataMasker
|
|
|
|
{
|
|
|
|
private:
|
|
|
|
class MaskingRule;
|
|
|
|
std::vector<std::unique_ptr<MaskingRule>> all_masking_rules;
|
2019-09-06 17:48:27 +00:00
|
|
|
static std::unique_ptr<SensitiveDataMasker> sensitive_data_masker;
|
2019-06-20 07:17:21 +00:00
|
|
|
|
|
|
|
public:
|
|
|
|
SensitiveDataMasker(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
|
|
|
|
~SensitiveDataMasker();
|
2019-09-03 20:27:46 +00:00
|
|
|
|
|
|
|
/// Returns the number of matched rules.
|
|
|
|
size_t wipeSensitiveData(std::string & data) const;
|
|
|
|
|
2019-09-06 17:48:27 +00:00
|
|
|
/// setInstance is not thread-safe and should be called once in single-thread mode.
|
2019-09-23 16:18:19 +00:00
|
|
|
/// https://github.com/ClickHouse/ClickHouse/pull/6810#discussion_r321183367
|
2019-09-06 17:48:27 +00:00
|
|
|
static void setInstance(std::unique_ptr<SensitiveDataMasker> sensitive_data_masker_);
|
|
|
|
static SensitiveDataMasker * getInstance();
|
|
|
|
|
2019-09-03 20:27:46 +00:00
|
|
|
/// Used in tests.
|
2019-06-20 07:17:21 +00:00
|
|
|
void addMaskingRule(const std::string & name, const std::string & regexp_string, const std::string & replacement_string);
|
|
|
|
|
|
|
|
#ifndef NDEBUG
|
|
|
|
void printStats();
|
|
|
|
#endif
|
|
|
|
|
2019-09-03 20:27:46 +00:00
|
|
|
size_t rulesCount() const;
|
2019-06-20 07:17:21 +00:00
|
|
|
};
|
|
|
|
|
2022-05-16 18:59:27 +00:00
|
|
|
}
|