Merge branch 'ClickHouse:master' into hive_partitioning_filtration

This commit is contained in:
Yarik Briukhovetskyi 2024-09-18 11:11:04 +02:00 committed by GitHub
commit 143d9f0201
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
66 changed files with 1334 additions and 1195 deletions

View File

@ -29,6 +29,7 @@ namespace DB
namespace ErrorCodes
{
extern const int CANNOT_RESTORE_TABLE;
extern const int ACCESS_ENTITY_ALREADY_EXISTS;
extern const int LOGICAL_ERROR;
}
@ -175,9 +176,46 @@ namespace
return res;
}
std::unordered_map<UUID, UUID> resolveDependencies(const std::unordered_map<UUID, std::pair<String, AccessEntityType>> & dependencies, const AccessControl & access_control, bool allow_unresolved_dependencies)
/// Checks if new entities (which we're going to restore) already exist,
/// and either skips them or throws an exception depending on the restore settings.
void checkExistingEntities(std::vector<std::pair<UUID, AccessEntityPtr>> & entities,
std::unordered_map<UUID, UUID> & old_to_new_id,
const AccessControl & access_control,
RestoreAccessCreationMode creation_mode)
{
if (creation_mode == RestoreAccessCreationMode::kReplace)
return;
auto should_skip = [&](const std::pair<UUID, AccessEntityPtr> & id_and_entity)
{
const auto & id = id_and_entity.first;
const auto & entity = *id_and_entity.second;
auto existing_id = access_control.find(entity.getType(), entity.getName());
if (!existing_id)
{
return false;
}
else if (creation_mode == RestoreAccessCreationMode::kCreateIfNotExists)
{
old_to_new_id[id] = *existing_id;
return true;
}
else
{
throw Exception(ErrorCodes::ACCESS_ENTITY_ALREADY_EXISTS, "Cannot restore {} because it already exists", entity.formatTypeWithName());
}
};
std::erase_if(entities, should_skip);
}
/// If new entities (which we're going to restore) depend on other entities which are not going to be restored or not present in the backup
/// then we should try to replace those dependencies with already existing entities.
void resolveDependencies(const std::unordered_map<UUID, std::pair<String, AccessEntityType>> & dependencies,
std::unordered_map<UUID, UUID> & old_to_new_ids,
const AccessControl & access_control,
bool allow_unresolved_dependencies)
{
std::unordered_map<UUID, UUID> old_to_new_ids;
for (const auto & [id, name_and_type] : dependencies)
{
std::optional<UUID> new_id;
@ -188,9 +226,9 @@ namespace
if (new_id)
old_to_new_ids.emplace(id, *new_id);
}
return old_to_new_ids;
}
/// Generates random IDs for the new entities.
void generateRandomIDs(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, std::unordered_map<UUID, UUID> & old_to_new_ids)
{
Poco::UUIDGenerator generator;
@ -203,27 +241,12 @@ namespace
}
}
void replaceDependencies(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, const std::unordered_map<UUID, UUID> & old_to_new_ids)
/// Updates dependencies of the new entities using a specified map.
void replaceDependencies(std::vector<std::pair<UUID, AccessEntityPtr>> & entities,
const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
for (auto & entity : entities | boost::adaptors::map_values)
{
bool need_replace = false;
for (const auto & dependency : entity->findDependencies())
{
if (old_to_new_ids.contains(dependency))
{
need_replace = true;
break;
}
}
if (!need_replace)
continue;
auto new_entity = entity->clone();
new_entity->replaceDependencies(old_to_new_ids);
entity = new_entity;
}
IAccessEntity::replaceDependencies(entity, old_to_new_ids);
}
AccessRightsElements getRequiredAccessToRestore(const std::vector<std::pair<UUID, AccessEntityPtr>> & entities)
@ -314,7 +337,9 @@ std::pair<String, BackupEntryPtr> makeBackupEntryForAccess(
AccessRestorerFromBackup::AccessRestorerFromBackup(
const BackupPtr & backup_, const RestoreSettings & restore_settings_)
: backup(backup_), allow_unresolved_access_dependencies(restore_settings_.allow_unresolved_access_dependencies)
: backup(backup_)
, creation_mode(restore_settings_.create_access)
, allow_unresolved_dependencies(restore_settings_.allow_unresolved_access_dependencies)
{
}
@ -362,7 +387,9 @@ std::vector<std::pair<UUID, AccessEntityPtr>> AccessRestorerFromBackup::getAcces
{
auto new_entities = entities;
auto old_to_new_ids = resolveDependencies(dependencies, access_control, allow_unresolved_access_dependencies);
std::unordered_map<UUID, UUID> old_to_new_ids;
checkExistingEntities(new_entities, old_to_new_ids, access_control, creation_mode);
resolveDependencies(dependencies, old_to_new_ids, access_control, allow_unresolved_dependencies);
generateRandomIDs(new_entities, old_to_new_ids);
replaceDependencies(new_entities, old_to_new_ids);

View File

@ -17,6 +17,7 @@ using BackupPtr = std::shared_ptr<const IBackup>;
class IBackupEntry;
using BackupEntryPtr = std::shared_ptr<const IBackupEntry>;
struct RestoreSettings;
enum class RestoreAccessCreationMode : uint8_t;
/// Makes a backup of access entities of a specified type.
@ -45,7 +46,8 @@ public:
private:
BackupPtr backup;
bool allow_unresolved_access_dependencies = false;
RestoreAccessCreationMode creation_mode;
bool allow_unresolved_dependencies = false;
std::vector<std::pair<UUID, AccessEntityPtr>> entities;
std::unordered_map<UUID, std::pair<String, AccessEntityType>> dependencies;
std::unordered_set<String> data_paths;

View File

@ -544,9 +544,9 @@ scope_guard AccessControl::subscribeForChanges(const std::vector<UUID> & ids, co
return changes_notifier->subscribeForChanges(ids, handler);
}
bool AccessControl::insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists)
bool AccessControl::insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
{
if (MultipleAccessStorage::insertImpl(id, entity, replace_if_exists, throw_if_exists))
if (MultipleAccessStorage::insertImpl(id, entity, replace_if_exists, throw_if_exists, conflicting_id))
{
changes_notifier->sendNotifications();
return true;

View File

@ -243,7 +243,7 @@ private:
class CustomSettingsPrefixes;
class PasswordComplexityRules;
bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override;
bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id) override;
bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;

View File

@ -1,8 +1,6 @@
#include <Access/DiskAccessStorage.h>
#include <Access/AccessEntityIO.h>
#include <Access/AccessChangesNotifier.h>
#include <Backups/RestorerFromBackup.h>
#include <Backups/RestoreSettings.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadBufferFromFile.h>
@ -418,7 +416,7 @@ void DiskAccessStorage::setAllInMemory(const std::vector<std::pair<UUID, AccessE
/// Insert or update entities.
for (const auto & [id, entity] : entities_without_conflicts)
insertNoLock(id, entity, /* replace_if_exists = */ true, /* throw_if_exists = */ false, /* write_on_disk= */ false);
insertNoLock(id, entity, /* replace_if_exists = */ true, /* throw_if_exists = */ false, /* conflicting_id = */ nullptr, /* write_on_disk= */ false);
}
void DiskAccessStorage::removeAllExceptInMemory(const boost::container::flat_set<UUID> & ids_to_keep)
@ -507,14 +505,14 @@ std::optional<std::pair<String, AccessEntityType>> DiskAccessStorage::readNameWi
}
bool DiskAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
bool DiskAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
{
std::lock_guard lock{mutex};
return insertNoLock(id, new_entity, replace_if_exists, throw_if_exists, /* write_on_disk = */ true);
return insertNoLock(id, new_entity, replace_if_exists, throw_if_exists, conflicting_id, /* write_on_disk = */ true);
}
bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, bool write_on_disk)
bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id, bool write_on_disk)
{
const String & name = new_entity->getName();
AccessEntityType type = new_entity->getType();
@ -533,10 +531,16 @@ bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & ne
if (name_collision && !replace_if_exists)
{
if (throw_if_exists)
{
throwNameCollisionCannotInsert(type, name);
}
else
{
if (conflicting_id)
*conflicting_id = id_by_name;
return false;
}
}
auto it_by_id = entries_by_id.find(id);
bool id_collision = (it_by_id != entries_by_id.end());
@ -548,8 +552,12 @@ bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & ne
throwIDCollisionCannotInsert(id, type, name, existing_entry.type, existing_entry.name);
}
else
{
if (conflicting_id)
*conflicting_id = id;
return false;
}
}
if (write_on_disk)
scheduleWriteLists(type);
@ -727,25 +735,4 @@ void DiskAccessStorage::deleteAccessEntityOnDisk(const UUID & id) const
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Couldn't delete {}", file_path);
}
void DiskAccessStorage::restoreFromBackup(RestorerFromBackup & restorer)
{
if (!isRestoreAllowed())
throwRestoreNotAllowed();
auto entities = restorer.getAccessEntitiesToRestore();
if (entities.empty())
return;
auto create_access = restorer.getRestoreSettings().create_access;
bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace);
bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate);
restorer.addDataRestoreTask([this, my_entities = std::move(entities), replace_if_exists, throw_if_exists]
{
for (const auto & [id, entity] : my_entities)
insert(id, entity, replace_if_exists, throw_if_exists);
});
}
}

View File

@ -34,14 +34,13 @@ public:
bool exists(const UUID & id) const override;
bool isBackupAllowed() const override { return backup_allowed; }
void restoreFromBackup(RestorerFromBackup & restorer) override;
private:
std::optional<UUID> findImpl(AccessEntityType type, const String & name) const override;
std::vector<UUID> findAllImpl(AccessEntityType type) const override;
AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
std::optional<std::pair<String, AccessEntityType>> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const override;
bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override;
bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id) override;
bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
@ -55,7 +54,7 @@ private:
void listsWritingThreadFunc() TSA_NO_THREAD_SAFETY_ANALYSIS;
void stopListsWritingThread();
bool insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, bool write_on_disk) TSA_REQUIRES(mutex);
bool insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id, bool write_on_disk) TSA_REQUIRES(mutex);
bool updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists, bool write_on_disk) TSA_REQUIRES(mutex);
bool removeNoLock(const UUID & id, bool throw_if_not_exists, bool write_on_disk) TSA_REQUIRES(mutex);

View File

@ -9,4 +9,28 @@ bool IAccessEntity::equal(const IAccessEntity & other) const
return (name == other.name) && (getType() == other.getType());
}
void IAccessEntity::replaceDependencies(std::shared_ptr<const IAccessEntity> & entity, const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
if (old_to_new_ids.empty())
return;
bool need_replace_dependencies = false;
auto dependencies = entity->findDependencies();
for (const auto & dependency : dependencies)
{
if (old_to_new_ids.contains(dependency))
{
need_replace_dependencies = true;
break;
}
}
if (!need_replace_dependencies)
return;
auto new_entity = entity->clone();
new_entity->replaceDependencies(old_to_new_ids);
entity = new_entity;
}
}

View File

@ -50,7 +50,8 @@ struct IAccessEntity
virtual std::vector<UUID> findDependencies() const { return {}; }
/// Replaces dependencies according to a specified map.
virtual void replaceDependencies(const std::unordered_map<UUID, UUID> & /* old_to_new_ids */) {}
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) { doReplaceDependencies(old_to_new_ids); }
static void replaceDependencies(std::shared_ptr<const IAccessEntity> & entity, const std::unordered_map<UUID, UUID> & old_to_new_ids);
/// Whether this access entity should be written to a backup.
virtual bool isBackupAllowed() const { return false; }
@ -66,6 +67,8 @@ protected:
{
return std::make_shared<EntityClassT>(typeid_cast<const EntityClassT &>(*this));
}
virtual void doReplaceDependencies(const std::unordered_map<UUID, UUID> & /* old_to_new_ids */) {}
};
using AccessEntityPtr = std::shared_ptr<const IAccessEntity>;

View File

@ -4,6 +4,8 @@
#include <Access/User.h>
#include <Access/AccessBackup.h>
#include <Backups/BackupEntriesCollector.h>
#include <Backups/RestorerFromBackup.h>
#include <Backups/RestoreSettings.h>
#include <Common/Exception.h>
#include <Common/quoteString.h>
#include <Common/callOnce.h>
@ -14,10 +16,11 @@
#include <base/FnTraits.h>
#include <boost/algorithm/string/join.hpp>
#include <boost/algorithm/string/replace.hpp>
#include <boost/range/adaptor/map.hpp>
#include <boost/range/adaptor/reversed.hpp>
#include <boost/range/algorithm/copy.hpp>
#include <boost/range/algorithm_ext/erase.hpp>
namespace DB
{
namespace ErrorCodes
@ -178,20 +181,20 @@ UUID IAccessStorage::insert(const AccessEntityPtr & entity)
return *insert(entity, /* replace_if_exists = */ false, /* throw_if_exists = */ true);
}
std::optional<UUID> IAccessStorage::insert(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists)
std::optional<UUID> IAccessStorage::insert(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
{
auto id = generateRandomID();
if (insert(id, entity, replace_if_exists, throw_if_exists))
if (insert(id, entity, replace_if_exists, throw_if_exists, conflicting_id))
return id;
return std::nullopt;
}
bool IAccessStorage::insert(const DB::UUID & id, const DB::AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists)
bool IAccessStorage::insert(const DB::UUID & id, const DB::AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
{
return insertImpl(id, entity, replace_if_exists, throw_if_exists);
return insertImpl(id, entity, replace_if_exists, throw_if_exists, conflicting_id);
}
@ -285,7 +288,7 @@ std::vector<UUID> IAccessStorage::insertOrReplace(const std::vector<AccessEntity
}
bool IAccessStorage::insertImpl(const UUID &, const AccessEntityPtr & entity, bool, bool)
bool IAccessStorage::insertImpl(const UUID &, const AccessEntityPtr & entity, bool, bool, UUID *)
{
if (isReadOnly())
throwReadonlyCannotInsert(entity->getType(), entity->getName());
@ -611,12 +614,51 @@ void IAccessStorage::backup(BackupEntriesCollector & backup_entries_collector, c
}
void IAccessStorage::restoreFromBackup(RestorerFromBackup &)
void IAccessStorage::restoreFromBackup(RestorerFromBackup & restorer)
{
if (!isRestoreAllowed())
throwRestoreNotAllowed();
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "restoreFromBackup() is not implemented in {}", getStorageType());
if (isReplicated() && !acquireReplicatedRestore(restorer))
return;
auto entities = restorer.getAccessEntitiesToRestore();
if (entities.empty())
return;
auto create_access = restorer.getRestoreSettings().create_access;
bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace);
bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate);
restorer.addDataRestoreTask([this, entities_to_restore = std::move(entities), replace_if_exists, throw_if_exists] mutable
{
std::unordered_map<UUID, UUID> new_to_existing_ids;
for (auto & [id, entity] : entities_to_restore)
{
UUID existing_entity_id;
if (!insert(id, entity, replace_if_exists, throw_if_exists, &existing_entity_id))
{
/// Couldn't insert `entity` because there is an existing entity with the same name.
new_to_existing_ids[id] = existing_entity_id;
}
}
if (!new_to_existing_ids.empty())
{
/// If new entities restored from backup have dependencies on other entities from backup which were not restored because they existed,
/// then we should correct those dependencies.
auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr
{
auto res = entity;
IAccessEntity::replaceDependencies(res, new_to_existing_ids);
return res;
};
std::vector<UUID> ids;
ids.reserve(entities_to_restore.size());
boost::copy(entities_to_restore | boost::adaptors::map_keys, std::back_inserter(ids));
tryUpdate(ids, update_func);
}
});
}

View File

@ -64,6 +64,9 @@ public:
/// Returns true if this entity is readonly.
virtual bool isReadOnly(const UUID &) const { return isReadOnly(); }
/// Returns true if this storage is replicated.
virtual bool isReplicated() const { return false; }
/// Starts periodic reloading and updating of entities in this storage.
virtual void startPeriodicReloading() {}
@ -153,8 +156,8 @@ public:
/// Inserts an entity to the storage. Returns ID of a new entry in the storage.
/// Throws an exception if the specified name already exists.
UUID insert(const AccessEntityPtr & entity);
std::optional<UUID> insert(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists);
bool insert(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists);
std::optional<UUID> insert(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id = nullptr);
bool insert(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id = nullptr);
std::vector<UUID> insert(const std::vector<AccessEntityPtr> & multiple_entities, bool replace_if_exists = false, bool throw_if_exists = true);
std::vector<UUID> insert(const std::vector<AccessEntityPtr> & multiple_entities, const std::vector<UUID> & ids, bool replace_if_exists = false, bool throw_if_exists = true);
@ -218,7 +221,7 @@ protected:
virtual std::vector<UUID> findAllImpl(AccessEntityType type) const = 0;
virtual AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const = 0;
virtual std::optional<std::pair<String, AccessEntityType>> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const;
virtual bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists);
virtual bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id);
virtual bool removeImpl(const UUID & id, bool throw_if_not_exists);
virtual bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists);
virtual std::optional<AuthResult> authenticateImpl(
@ -240,6 +243,7 @@ protected:
LoggerPtr getLogger() const;
static String formatEntityTypeWithName(AccessEntityType type, const String & name) { return AccessEntityTypeInfo::get(type).formatEntityNameWithType(name); }
static void clearConflictsInEntitiesList(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, LoggerPtr log_);
virtual bool acquireReplicatedRestore(RestorerFromBackup &) const { return false; }
[[noreturn]] void throwNotFound(const UUID & id) const;
[[noreturn]] void throwNotFound(AccessEntityType type, const String & name) const;
[[noreturn]] static void throwBadCast(const UUID & id, AccessEntityType type, const String & name, AccessEntityType required_type);

View File

@ -1,7 +1,5 @@
#include <Access/MemoryAccessStorage.h>
#include <Access/AccessChangesNotifier.h>
#include <Backups/RestorerFromBackup.h>
#include <Backups/RestoreSettings.h>
#include <base/scope_guard.h>
#include <boost/container/flat_set.hpp>
#include <boost/range/adaptor/map.hpp>
@ -63,14 +61,14 @@ AccessEntityPtr MemoryAccessStorage::readImpl(const UUID & id, bool throw_if_not
}
bool MemoryAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
bool MemoryAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
{
std::lock_guard lock{mutex};
return insertNoLock(id, new_entity, replace_if_exists, throw_if_exists);
return insertNoLock(id, new_entity, replace_if_exists, throw_if_exists, conflicting_id);
}
bool MemoryAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
bool MemoryAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
{
const String & name = new_entity->getName();
AccessEntityType type = new_entity->getType();
@ -86,10 +84,16 @@ bool MemoryAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr &
if (name_collision && !replace_if_exists)
{
if (throw_if_exists)
{
throwNameCollisionCannotInsert(type, name);
}
else
{
if (conflicting_id)
*conflicting_id = id_by_name;
return false;
}
}
auto it_by_id = entries_by_id.find(id);
bool id_collision = (it_by_id != entries_by_id.end());
@ -97,10 +101,16 @@ bool MemoryAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr &
{
const auto & existing_entry = it_by_id->second;
if (throw_if_exists)
{
throwIDCollisionCannotInsert(id, type, name, existing_entry.entity->getType(), existing_entry.entity->getName());
}
else
{
if (conflicting_id)
*conflicting_id = id;
return false;
}
}
/// Remove collisions if necessary.
if (name_collision && (id_by_name != id))
@ -270,28 +280,7 @@ void MemoryAccessStorage::setAll(const std::vector<std::pair<UUID, AccessEntityP
/// Insert or update entities.
for (const auto & [id, entity] : entities_without_conflicts)
insertNoLock(id, entity, /* replace_if_exists = */ true, /* throw_if_exists = */ false);
}
void MemoryAccessStorage::restoreFromBackup(RestorerFromBackup & restorer)
{
if (!isRestoreAllowed())
throwRestoreNotAllowed();
auto entities = restorer.getAccessEntitiesToRestore();
if (entities.empty())
return;
auto create_access = restorer.getRestoreSettings().create_access;
bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace);
bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate);
restorer.addDataRestoreTask([this, my_entities = std::move(entities), replace_if_exists, throw_if_exists]
{
for (const auto & [id, entity] : my_entities)
insert(id, entity, replace_if_exists, throw_if_exists);
});
insertNoLock(id, entity, /* replace_if_exists = */ true, /* throw_if_exists = */ false, /* conflicting_id = */ nullptr);
}
}

View File

@ -34,17 +34,16 @@ public:
bool exists(const UUID & id) const override;
bool isBackupAllowed() const override { return backup_allowed; }
void restoreFromBackup(RestorerFromBackup & restorer) override;
private:
std::optional<UUID> findImpl(AccessEntityType type, const String & name) const override;
std::vector<UUID> findAllImpl(AccessEntityType type) const override;
AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override;
bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id) override;
bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
bool insertNoLock(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists);
bool insertNoLock(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id);
bool removeNoLock(const UUID & id, bool throw_if_not_exists);
bool updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists);

View File

@ -353,7 +353,7 @@ void MultipleAccessStorage::reload(ReloadMode reload_mode)
}
bool MultipleAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists)
bool MultipleAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
{
std::shared_ptr<IAccessStorage> storage_for_insertion;
@ -376,7 +376,7 @@ bool MultipleAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr &
getStorageName());
}
if (storage_for_insertion->insert(id, entity, replace_if_exists, throw_if_exists))
if (storage_for_insertion->insert(id, entity, replace_if_exists, throw_if_exists, conflicting_id))
{
std::lock_guard lock{mutex};
ids_cache.set(id, storage_for_insertion);

View File

@ -67,7 +67,7 @@ protected:
std::vector<UUID> findAllImpl(AccessEntityType type) const override;
AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
std::optional<std::pair<String, AccessEntityType>> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const override;
bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override;
bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id) override;
bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
std::optional<AuthResult> authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists, bool allow_no_password, bool allow_plaintext_password) const override;

View File

@ -24,7 +24,7 @@ std::vector<UUID> Quota::findDependencies() const
return to_roles.findDependencies();
}
void Quota::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
void Quota::doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
to_roles.replaceDependencies(old_to_new_ids);
}

View File

@ -47,7 +47,7 @@ struct Quota : public IAccessEntity
AccessEntityType getType() const override { return TYPE; }
std::vector<UUID> findDependencies() const override;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
void doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
bool isBackupAllowed() const override { return true; }
};

View File

@ -5,10 +5,9 @@
#include <Access/AccessChangesNotifier.h>
#include <Access/AccessBackup.h>
#include <Backups/BackupEntriesCollector.h>
#include <Backups/RestorerFromBackup.h>
#include <Backups/RestoreSettings.h>
#include <Backups/IBackupCoordination.h>
#include <Backups/IRestoreCoordination.h>
#include <Backups/RestorerFromBackup.h>
#include <IO/ReadHelpers.h>
#include <Interpreters/Context.h>
#include <Common/ZooKeeper/KeeperException.h>
@ -120,7 +119,7 @@ static void retryOnZooKeeperUserError(size_t attempts, Func && function)
}
}
bool ReplicatedAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
bool ReplicatedAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
{
const AccessEntityTypeInfo type_info = AccessEntityTypeInfo::get(new_entity->getType());
const String & name = new_entity->getName();
@ -128,7 +127,7 @@ bool ReplicatedAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr
auto zookeeper = getZooKeeper();
bool ok = false;
retryOnZooKeeperUserError(10, [&]{ ok = insertZooKeeper(zookeeper, id, new_entity, replace_if_exists, throw_if_exists); });
retryOnZooKeeperUserError(10, [&]{ ok = insertZooKeeper(zookeeper, id, new_entity, replace_if_exists, throw_if_exists, conflicting_id); });
if (!ok)
return false;
@ -143,7 +142,8 @@ bool ReplicatedAccessStorage::insertZooKeeper(
const UUID & id,
const AccessEntityPtr & new_entity,
bool replace_if_exists,
bool throw_if_exists)
bool throw_if_exists,
UUID * conflicting_id)
{
const String & name = new_entity->getName();
const AccessEntityType type = new_entity->getType();
@ -167,16 +167,16 @@ bool ReplicatedAccessStorage::insertZooKeeper(
if (res == Coordination::Error::ZNODEEXISTS)
{
if (!throw_if_exists && !replace_if_exists)
return false; /// Couldn't insert a new entity.
if (throw_if_exists)
if (!replace_if_exists)
{
if (responses[0]->error == Coordination::Error::ZNODEEXISTS)
{
/// Couldn't insert the new entity because there is an existing entity with such UUID.
if (throw_if_exists)
{
/// To fail with a nice error message, we need info about what already exists.
/// This itself could fail if the conflicting uuid disappears in the meantime.
/// If that happens, then we'll just retry from the start.
/// This itself can fail if the conflicting uuid disappears in the meantime.
/// If that happens, then retryOnZooKeeperUserError() will just retry the operation from the start.
String existing_entity_definition = zookeeper->get(entity_path);
AccessEntityPtr existing_entity = deserializeAccessEntity(existing_entity_definition, entity_path);
@ -186,9 +186,34 @@ bool ReplicatedAccessStorage::insertZooKeeper(
}
else
{
/// Couldn't insert the new entity because there is an existing entity with such name.
if (conflicting_id)
*conflicting_id = id;
return false;
}
}
else if (responses[1]->error == Coordination::Error::ZNODEEXISTS)
{
/// Couldn't insert the new entity because there is an existing entity with the same name.
if (throw_if_exists)
{
throwNameCollisionCannotInsert(type, name);
}
else
{
if (conflicting_id)
{
/// Get UUID of the existing entry with the same name.
/// This itself can fail if the conflicting name disappears in the meantime.
/// If that happens, then retryOnZooKeeperUserError() will just retry the operation from the start.
*conflicting_id = parseUUID(zookeeper->get(name_path));
}
return false;
}
}
else
{
zkutil::KeeperMultiException::check(res, ops, responses);
}
}
assert(replace_if_exists);
@ -693,28 +718,10 @@ void ReplicatedAccessStorage::backup(BackupEntriesCollector & backup_entries_col
}
void ReplicatedAccessStorage::restoreFromBackup(RestorerFromBackup & restorer)
bool ReplicatedAccessStorage::acquireReplicatedRestore(RestorerFromBackup & restorer) const
{
if (!isRestoreAllowed())
throwRestoreNotAllowed();
auto restore_coordination = restorer.getRestoreCoordination();
if (!restore_coordination->acquireReplicatedAccessStorage(zookeeper_path))
return;
auto entities = restorer.getAccessEntitiesToRestore();
if (entities.empty())
return;
auto create_access = restorer.getRestoreSettings().create_access;
bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace);
bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate);
restorer.addDataRestoreTask([this, my_entities = std::move(entities), replace_if_exists, throw_if_exists]
{
for (const auto & [id, entity] : my_entities)
insert(id, entity, replace_if_exists, throw_if_exists);
});
return restore_coordination->acquireReplicatedAccessStorage(zookeeper_path);
}
}

View File

@ -26,6 +26,7 @@ public:
void shutdown() override;
const char * getStorageType() const override { return STORAGE_TYPE; }
bool isReplicated() const override { return true; }
void startPeriodicReloading() override { startWatchingThread(); }
void stopPeriodicReloading() override { stopWatchingThread(); }
@ -35,7 +36,6 @@ public:
bool isBackupAllowed() const override { return backup_allowed; }
void backup(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, AccessEntityType type) const override;
void restoreFromBackup(RestorerFromBackup & restorer) override;
private:
String zookeeper_path;
@ -48,11 +48,11 @@ private:
std::unique_ptr<ThreadFromGlobalPool> watching_thread;
std::shared_ptr<ConcurrentBoundedQueue<UUID>> watched_queue;
bool insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists) override;
bool insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id) override;
bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
bool insertZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists);
bool insertZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id);
bool removeZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, bool throw_if_not_exists);
bool updateZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists);
@ -80,6 +80,7 @@ private:
std::optional<UUID> findImpl(AccessEntityType type, const String & name) const override;
std::vector<UUID> findAllImpl(AccessEntityType type) const override;
AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
bool acquireReplicatedRestore(RestorerFromBackup & restorer) const override;
mutable std::mutex mutex;
MemoryAccessStorage memory_storage TSA_GUARDED_BY(mutex);

View File

@ -21,7 +21,7 @@ std::vector<UUID> Role::findDependencies() const
return res;
}
void Role::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
void Role::doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
granted_roles.replaceDependencies(old_to_new_ids);
settings.replaceDependencies(old_to_new_ids);

View File

@ -21,7 +21,7 @@ struct Role : public IAccessEntity
AccessEntityType getType() const override { return TYPE; }
std::vector<UUID> findDependencies() const override;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
void doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
bool isBackupAllowed() const override { return settings.isBackupAllowed(); }
};

View File

@ -63,7 +63,7 @@ std::vector<UUID> RowPolicy::findDependencies() const
return to_roles.findDependencies();
}
void RowPolicy::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
void RowPolicy::doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
to_roles.replaceDependencies(old_to_new_ids);
}

View File

@ -50,7 +50,7 @@ struct RowPolicy : public IAccessEntity
AccessEntityType getType() const override { return TYPE; }
std::vector<UUID> findDependencies() const override;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
void doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
bool isBackupAllowed() const override { return true; }
/// Which roles or users should use this row policy.

View File

@ -21,7 +21,7 @@ std::vector<UUID> SettingsProfile::findDependencies() const
return res;
}
void SettingsProfile::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
void SettingsProfile::doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
elements.replaceDependencies(old_to_new_ids);
to_roles.replaceDependencies(old_to_new_ids);

View File

@ -22,7 +22,7 @@ struct SettingsProfile : public IAccessEntity
AccessEntityType getType() const override { return TYPE; }
std::vector<UUID> findDependencies() const override;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
void doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
bool isBackupAllowed() const override { return elements.isBackupAllowed(); }
};

View File

@ -49,7 +49,7 @@ std::vector<UUID> User::findDependencies() const
return res;
}
void User::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
void User::doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
default_roles.replaceDependencies(old_to_new_ids);
granted_roles.replaceDependencies(old_to_new_ids);

View File

@ -32,7 +32,7 @@ struct User : public IAccessEntity
void setName(const String & name_) override;
std::vector<UUID> findDependencies() const override;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
void doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
bool isBackupAllowed() const override { return settings.isBackupAllowed(); }
};

View File

@ -3,195 +3,27 @@
#include <Parsers/FunctionSecretArgumentsFinder.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/IdentifierNode.h>
#include <Analyzer/ListNode.h>
#include <Common/KnownObjectNames.h>
#include <Core/QualifiedTableName.h>
#include <boost/algorithm/string/predicate.hpp>
namespace DB
{
/// Finds arguments of a specified function which should not be displayed for most users for security reasons.
/// That involves passwords and secret keys.
class FunctionSecretArgumentsFinderTreeNode
class FunctionTreeNode : public AbstractFunction
{
public:
explicit FunctionSecretArgumentsFinderTreeNode(const FunctionNode & function_) : function(function_), arguments(function.getArguments())
class ArgumentTreeNode : public Argument
{
if (arguments.getNodes().empty())
return;
findFunctionSecretArguments();
public:
explicit ArgumentTreeNode(const IQueryTreeNode * argument_) : argument(argument_) {}
std::unique_ptr<AbstractFunction> getFunction() const override
{
if (const auto * f = argument->as<FunctionNode>())
return std::make_unique<FunctionTreeNode>(*f);
return nullptr;
}
struct Result
{
/// Result constructed by default means no arguments will be hidden.
size_t start = static_cast<size_t>(-1);
size_t count = 0; /// Mostly it's either 0 or 1. There are only a few cases where `count` can be greater than 1 (e.g. see `encrypt`).
/// In all known cases secret arguments are consecutive
bool are_named = false; /// Arguments like `password = 'password'` are considered as named arguments.
/// E.g. "headers" in `url('..', headers('foo' = '[HIDDEN]'))`
std::vector<std::string> nested_maps;
bool hasSecrets() const
{
return count != 0 || !nested_maps.empty();
}
};
FunctionSecretArgumentsFinder::Result getResult() const { return result; }
private:
const FunctionNode & function;
const ListNode & arguments;
FunctionSecretArgumentsFinder::Result result;
void markSecretArgument(size_t index, bool argument_is_named = false)
{
if (index >= arguments.getNodes().size())
return;
if (!result.count)
{
result.start = index;
result.are_named = argument_is_named;
}
chassert(index >= result.start); /// We always check arguments consecutively
result.count = index + 1 - result.start;
if (!argument_is_named)
result.are_named = false;
}
void findFunctionSecretArguments()
{
const auto & name = function.getFunctionName();
if ((name == "mysql") || (name == "postgresql") || (name == "mongodb"))
{
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
/// postgresql('host:port', 'database', 'table', 'user', 'password', ...)
/// mongodb('host:port', 'database', 'collection', 'user', 'password', ...)
findMySQLFunctionSecretArguments();
}
else if ((name == "s3") || (name == "cosn") || (name == "oss") ||
(name == "deltaLake") || (name == "hudi") || (name == "iceberg"))
{
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
findS3FunctionSecretArguments(/* is_cluster_function= */ false);
}
else if (name == "s3Cluster")
{
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
findS3FunctionSecretArguments(/* is_cluster_function= */ true);
}
else if ((name == "remote") || (name == "remoteSecure"))
{
/// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
findRemoteFunctionSecretArguments();
}
else if ((name == "encrypt") || (name == "decrypt") ||
(name == "aes_encrypt_mysql") || (name == "aes_decrypt_mysql") ||
(name == "tryDecrypt"))
{
/// encrypt('mode', 'plaintext', 'key' [, iv, aad])
findEncryptionFunctionSecretArguments();
}
else if (name == "url")
{
findURLSecretArguments();
}
}
void findMySQLFunctionSecretArguments()
{
if (isNamedCollectionName(0))
{
/// mysql(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
}
else
{
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
markSecretArgument(4);
}
}
/// Returns the number of arguments excluding "headers" and "extra_credentials" (which should
/// always be at the end). Marks "headers" as secret, if found.
size_t excludeS3OrURLNestedMaps()
{
const auto & nodes = arguments.getNodes();
size_t count = nodes.size();
while (count > 0)
{
const FunctionNode * f = nodes.at(count - 1)->as<FunctionNode>();
if (!f)
break;
if (f->getFunctionName() == "headers")
result.nested_maps.push_back(f->getFunctionName());
else if (f->getFunctionName() != "extra_credentials")
break;
count -= 1;
}
return count;
}
void findS3FunctionSecretArguments(bool is_cluster_function)
{
/// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
size_t url_arg_idx = is_cluster_function ? 1 : 0;
if (!is_cluster_function && isNamedCollectionName(0))
{
/// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...)
findSecretNamedArgument("secret_access_key", 1);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case of
/// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
/// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
size_t count = excludeS3OrURLNestedMaps();
if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4))
{
String second_arg;
if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg))
{
if (boost::iequals(second_arg, "NOSIGN"))
return; /// The argument after 'url' is "NOSIGN".
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
}
}
/// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
if (url_arg_idx + 2 < count)
markSecretArgument(url_arg_idx + 2);
}
void findURLSecretArguments()
{
if (!isNamedCollectionName(0))
excludeS3OrURLNestedMaps();
}
bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const
{
if (arg_idx >= arguments.getNodes().size())
return false;
return tryGetStringFromArgument(arguments.getNodes()[arg_idx], res, allow_identifier);
}
static bool tryGetStringFromArgument(const QueryTreeNodePtr argument, String * res, bool allow_identifier = true)
bool isIdentifier() const override { return argument->as<IdentifierNode>(); }
bool tryGetString(String * res, bool allow_identifier) const override
{
if (const auto * literal = argument->as<ConstantNode>())
{
@ -214,159 +46,46 @@ private:
return false;
}
private:
const IQueryTreeNode * argument = nullptr;
};
void findRemoteFunctionSecretArguments()
class ArgumentsTreeNode : public Arguments
{
if (isNamedCollectionName(0))
public:
explicit ArgumentsTreeNode(const QueryTreeNodes * arguments_) : arguments(arguments_) {}
size_t size() const override { return arguments ? arguments->size() : 0; }
std::unique_ptr<Argument> at(size_t n) const override { return std::make_unique<ArgumentTreeNode>(arguments->at(n).get()); }
private:
const QueryTreeNodes * arguments = nullptr;
};
explicit FunctionTreeNode(const FunctionNode & function_) : function(&function_)
{
/// remote(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
return;
if (const auto & nodes = function->getArguments().getNodes(); !nodes.empty())
arguments = std::make_unique<ArgumentsTreeNode>(&nodes);
}
String name() const override { return function->getFunctionName(); }
private:
const FunctionNode * function = nullptr;
};
/// We're going to replace 'password' with '[HIDDEN'] for the following signatures:
/// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key])
/// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key])
/// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key])
/// But we should check the number of arguments first because we don't need to do any replacements in case of
/// remote('addresses_expr', db.table)
if (arguments.getNodes().size() < 3)
/// Finds arguments of a specified function which should not be displayed for most users for security reasons.
/// That involves passwords and secret keys.
class FunctionSecretArgumentsFinderTreeNode : public FunctionSecretArgumentsFinder
{
public:
explicit FunctionSecretArgumentsFinderTreeNode(const FunctionNode & function_)
: FunctionSecretArgumentsFinder(std::make_unique<FunctionTreeNode>(function_))
{
if (!function->hasArguments())
return;
size_t arg_num = 1;
/// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'.
const auto * table_function = arguments.getNodes()[arg_num]->as<FunctionNode>();
if (table_function && KnownTableFunctionNames::instance().exists(table_function->getFunctionName()))
{
++arg_num;
}
else
{
std::optional<String> database;
std::optional<QualifiedTableName> qualified_table_name;
if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name))
{
/// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'.
/// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user'
/// before the argument 'password'. So it's safer to wipe two arguments just in case.
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
/// before wiping it (because the `password` argument is always a literal string).
if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false))
{
/// Wipe either `password` or `user`.
markSecretArgument(arg_num + 2);
}
if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false))
{
/// Wipe either `password` or `sharding_key`.
markSecretArgument(arg_num + 3);
}
return;
findOrdinaryFunctionSecretArguments();
}
/// Skip the current argument (which is either a database name or a qualified table name).
++arg_num;
if (database)
{
/// Skip the 'table' argument if the previous argument was a database name.
++arg_num;
}
}
/// Skip username.
++arg_num;
/// Do our replacement:
/// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...)
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
/// before wiping it (because the `password` argument is always a literal string).
bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false);
if (can_be_password)
markSecretArgument(arg_num);
}
/// Tries to get either a database name or a qualified table name from an argument.
/// Empty string is also allowed (it means the default database).
/// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password.
bool tryGetDatabaseNameOrQualifiedTableName(
size_t arg_idx,
std::optional<String> & res_database,
std::optional<QualifiedTableName> & res_qualified_table_name) const
{
res_database.reset();
res_qualified_table_name.reset();
String str;
if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true))
return false;
if (str.empty())
{
res_database = "";
return true;
}
auto qualified_table_name = QualifiedTableName::tryParseFromString(str);
if (!qualified_table_name)
return false;
if (qualified_table_name->database.empty())
res_database = std::move(qualified_table_name->table);
else
res_qualified_table_name = std::move(qualified_table_name);
return true;
}
void findEncryptionFunctionSecretArguments()
{
if (arguments.getNodes().empty())
return;
/// We replace all arguments after 'mode' with '[HIDDEN]':
/// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]')
result.start = 1;
result.count = arguments.getNodes().size() - 1;
}
/// Whether a specified argument can be the name of a named collection?
bool isNamedCollectionName(size_t arg_idx) const
{
if (arguments.getNodes().size() <= arg_idx)
return false;
const auto * identifier = arguments.getNodes()[arg_idx]->as<IdentifierNode>();
return identifier != nullptr;
}
/// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
{
for (size_t i = start; i < arguments.getNodes().size(); ++i)
{
const auto & argument = arguments.getNodes()[i];
const auto * equals_func = argument->as<FunctionNode>();
if (!equals_func || (equals_func->getFunctionName() != "equals"))
continue;
const auto * expr_list = equals_func->getArguments().as<ListNode>();
if (!expr_list)
continue;
const auto & equal_args = expr_list->getNodes();
if (equal_args.size() != 2)
continue;
String found_key;
if (!tryGetStringFromArgument(equal_args[0], &found_key))
continue;
if (found_key == key)
markSecretArgument(i, /* argument_is_named= */ true);
}
}
FunctionSecretArgumentsFinder::Result getResult() const { return result; }
};
}

View File

@ -59,35 +59,27 @@ void cckMetadataPathForOrdinary(const ASTCreateQuery & create, const String & me
}
/// validate validates the database engine that's specified in the create query for
/// engine arguments, settings and table overrides.
void validate(const ASTCreateQuery & create_query)
void DatabaseFactory::validate(const ASTCreateQuery & create_query) const
{
auto * storage = create_query.storage;
/// Check engine may have arguments
static const std::unordered_set<std::string_view> engines_with_arguments{"MySQL", "MaterializeMySQL", "MaterializedMySQL",
"Lazy", "Replicated", "PostgreSQL", "MaterializedPostgreSQL", "SQLite", "Filesystem", "S3", "HDFS"};
const String & engine_name = storage->engine->name;
bool engine_may_have_arguments = engines_with_arguments.contains(engine_name);
const EngineFeatures & engine_features = database_engines.at(engine_name).features;
if (storage->engine->arguments && !engine_may_have_arguments)
/// Check engine may have arguments
if (storage->engine->arguments && !engine_features.supports_arguments)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database engine `{}` cannot have arguments", engine_name);
/// Check engine may have settings
bool may_have_settings = endsWith(engine_name, "MySQL") || engine_name == "Replicated" || engine_name == "MaterializedPostgreSQL";
bool has_unexpected_element = storage->engine->parameters || storage->partition_by ||
storage->primary_key || storage->order_by ||
storage->sample_by;
if (has_unexpected_element || (!may_have_settings && storage->settings))
if (has_unexpected_element || (!engine_features.supports_settings && storage->settings))
throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_AST,
"Database engine `{}` cannot have parameters, primary_key, order_by, sample_by, settings", engine_name);
/// Check engine with table overrides
static const std::unordered_set<std::string_view> engines_with_table_overrides{"MaterializeMySQL", "MaterializedMySQL", "MaterializedPostgreSQL"};
if (create_query.table_overrides && !engines_with_table_overrides.contains(engine_name))
if (create_query.table_overrides && !engine_features.supports_table_overrides)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database engine `{}` cannot have table overrides", engine_name);
}
@ -121,9 +113,9 @@ DatabasePtr DatabaseFactory::get(const ASTCreateQuery & create, const String & m
return impl;
}
void DatabaseFactory::registerDatabase(const std::string & name, CreatorFn creator_fn)
void DatabaseFactory::registerDatabase(const std::string & name, CreatorFn creator_fn, EngineFeatures features)
{
if (!database_engines.emplace(name, std::move(creator_fn)).second)
if (!database_engines.emplace(name, Creator{std::move(creator_fn), features}).second)
throw Exception(ErrorCodes::LOGICAL_ERROR, "DatabaseFactory: the database engine name '{}' is not unique", name);
}
@ -154,7 +146,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
.context = context};
// creator_fn creates and returns a DatabasePtr with the supplied arguments
auto creator_fn = database_engines.at(engine_name);
auto creator_fn = database_engines.at(engine_name).creator_fn;
return creator_fn(arguments);
}

View File

@ -43,13 +43,30 @@ public:
ContextPtr & context;
};
DatabasePtr get(const ASTCreateQuery & create, const String & metadata_path, ContextPtr context);
struct EngineFeatures
{
bool supports_arguments = false;
bool supports_settings = false;
bool supports_table_overrides = false;
};
using CreatorFn = std::function<DatabasePtr(const Arguments & arguments)>;
using DatabaseEngines = std::unordered_map<std::string, CreatorFn>;
struct Creator
{
CreatorFn creator_fn;
EngineFeatures features;
};
void registerDatabase(const std::string & name, CreatorFn creator_fn);
DatabasePtr get(const ASTCreateQuery & create, const String & metadata_path, ContextPtr context);
using DatabaseEngines = std::unordered_map<std::string, Creator>;
void registerDatabase(const std::string & name, CreatorFn creator_fn, EngineFeatures features = EngineFeatures{
.supports_arguments = false,
.supports_settings = false,
.supports_table_overrides = false,
});
const DatabaseEngines & getDatabaseEngines() const { return database_engines; }
@ -65,6 +82,10 @@ private:
DatabaseEngines database_engines;
DatabasePtr getImpl(const ASTCreateQuery & create, const String & metadata_path, ContextPtr context);
/// validate validates the database engine that's specified in the create query for
/// engine arguments, settings and table overrides.
void validate(const ASTCreateQuery & create_query) const;
};
}

View File

@ -257,6 +257,6 @@ void registerDatabaseFilesystem(DatabaseFactory & factory)
return std::make_shared<DatabaseFilesystem>(args.database_name, init_path, args.context);
};
factory.registerDatabase("Filesystem", create_fn);
factory.registerDatabase("Filesystem", create_fn, {.supports_arguments = true});
}
}

View File

@ -253,7 +253,7 @@ void registerDatabaseHDFS(DatabaseFactory & factory)
return std::make_shared<DatabaseHDFS>(args.database_name, source_url, args.context);
};
factory.registerDatabase("HDFS", create_fn);
factory.registerDatabase("HDFS", create_fn, {.supports_arguments = true});
}
} // DB

View File

@ -398,6 +398,6 @@ void registerDatabaseLazy(DatabaseFactory & factory)
cache_expiration_time_seconds,
args.context);
};
factory.registerDatabase("Lazy", create_fn);
factory.registerDatabase("Lazy", create_fn, {.supports_arguments = true});
}
}

View File

@ -2001,6 +2001,6 @@ void registerDatabaseReplicated(DatabaseFactory & factory)
replica_name,
std::move(database_replicated_settings), args.context);
};
factory.registerDatabase("Replicated", create_fn);
factory.registerDatabase("Replicated", create_fn, {.supports_arguments = true, .supports_settings = true});
}
}

View File

@ -326,7 +326,7 @@ void registerDatabaseS3(DatabaseFactory & factory)
return std::make_shared<DatabaseS3>(args.database_name, config, args.context);
};
factory.registerDatabase("S3", create_fn);
factory.registerDatabase("S3", create_fn, {.supports_arguments = true});
}
}
#endif

View File

@ -290,8 +290,14 @@ void registerDatabaseMaterializedMySQL(DatabaseFactory & factory)
binlog_client,
std::move(materialize_mode_settings));
};
factory.registerDatabase("MaterializeMySQL", create_fn);
factory.registerDatabase("MaterializedMySQL", create_fn);
DatabaseFactory::EngineFeatures features{
.supports_arguments = true,
.supports_settings = true,
.supports_table_overrides = true,
};
factory.registerDatabase("MaterializeMySQL", create_fn, features);
factory.registerDatabase("MaterializedMySQL", create_fn, features);
}
}

View File

@ -584,7 +584,7 @@ void registerDatabaseMySQL(DatabaseFactory & factory)
throw Exception(ErrorCodes::CANNOT_CREATE_DATABASE, "Cannot create MySQL database, because {}", exception_message);
}
};
factory.registerDatabase("MySQL", create_fn);
factory.registerDatabase("MySQL", create_fn, {.supports_arguments = true, .supports_settings = true});
}
}

View File

@ -546,7 +546,11 @@ void registerDatabaseMaterializedPostgreSQL(DatabaseFactory & factory)
args.database_name, configuration.database, connection_info,
std::move(postgresql_replica_settings));
};
factory.registerDatabase("MaterializedPostgreSQL", create_fn);
factory.registerDatabase("MaterializedPostgreSQL", create_fn, {
.supports_arguments = true,
.supports_settings = true,
.supports_table_overrides = true,
});
}
}

View File

@ -558,7 +558,7 @@ void registerDatabasePostgreSQL(DatabaseFactory & factory)
pool,
use_table_cache);
};
factory.registerDatabase("PostgreSQL", create_fn);
factory.registerDatabase("PostgreSQL", create_fn, {.supports_arguments = true});
}
}

View File

@ -220,7 +220,7 @@ void registerDatabaseSQLite(DatabaseFactory & factory)
return std::make_shared<DatabaseSQLite>(args.context, engine_define, args.create_query.attach, database_path);
};
factory.registerDatabase("SQLite", create_fn);
factory.registerDatabase("SQLite", create_fn, {.supports_arguments = true});
}
}

View File

@ -15,7 +15,6 @@ namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
extern const int ILLEGAL_COLUMN;
}
@ -38,13 +37,6 @@ public:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (arguments.empty())
throw Exception(
ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION,
"Function {} needs at least one argument; passed {}.",
getName(),
arguments.size());
DataTypes arguments_types;
for (size_t index = 0; index < arguments.size(); ++index)
{
@ -68,9 +60,16 @@ public:
}
ColumnPtr
executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override
executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{
size_t num_arguments = arguments.size();
if (num_arguments == 0)
{
auto res_col = result_type->createColumn();
res_col->insertDefault();
return ColumnConst::create(std::move(res_col), input_rows_count);
}
Columns holders(num_arguments);
Columns tuple_columns(num_arguments);

View File

@ -718,7 +718,12 @@ FileCache::getOrSet(
}
}
chassert(file_segments_limit ? file_segments.back()->range().left <= result_range.right : file_segments.back()->range().contains(result_range.right));
chassert(file_segments_limit
? file_segments.back()->range().left <= result_range.right
: file_segments.back()->range().contains(result_range.right),
fmt::format("Unexpected state. Back: {}, result range: {}, limit: {}",
file_segments.back()->range().toString(), result_range.toString(), file_segments_limit));
chassert(!file_segments_limit || file_segments.size() <= file_segments_limit);
return std::make_unique<FileSegmentsHolder>(std::move(file_segments));

View File

@ -380,15 +380,16 @@ BlockIO InterpreterDropQuery::executeToDatabase(const ASTDropQuery & query)
BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, DatabasePtr & database, std::vector<UUID> & uuids_to_wait)
{
if (query.kind != ASTDropQuery::Kind::Detach && query.kind != ASTDropQuery::Kind::Drop && query.kind != ASTDropQuery::Kind::Truncate)
return {};
const auto & database_name = query.getDatabase();
auto ddl_guard = DatabaseCatalog::instance().getDDLGuard(database_name, "");
database = tryGetDatabase(database_name, query.if_exists);
if (database)
{
if (query.kind == ASTDropQuery::Kind::Detach || query.kind == ASTDropQuery::Kind::Drop
|| query.kind == ASTDropQuery::Kind::Truncate)
{
if (!database)
return {};
bool drop = query.kind == ASTDropQuery::Kind::Drop;
bool truncate = query.kind == ASTDropQuery::Kind::Truncate;
@ -472,8 +473,6 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query,
/// DETACH or DROP database itself. If TRUNCATE skip dropping/erasing the database.
if (!truncate)
DatabaseCatalog::instance().detachDatabase(getContext(), database_name, drop, database->shouldBeEmptyOnDetach());
}
}
return {};
}

View File

@ -1,10 +1,42 @@
#pragma once
#include <vector>
#include <Common/KnownObjectNames.h>
#include <Core/QualifiedTableName.h>
#include <base/defines.h>
#include <boost/algorithm/string/predicate.hpp>
namespace DB
{
class AbstractFunction
{
friend class FunctionSecretArgumentsFinder;
public:
class Argument
{
public:
virtual ~Argument() = default;
virtual std::unique_ptr<AbstractFunction> getFunction() const = 0;
virtual bool isIdentifier() const = 0;
virtual bool tryGetString(String * res, bool allow_identifier) const = 0;
};
class Arguments
{
public:
virtual ~Arguments() = default;
virtual size_t size() const = 0;
virtual std::unique_ptr<Argument> at(size_t n) const = 0;
};
virtual ~AbstractFunction() = default;
virtual String name() const = 0;
bool hasArguments() const { return !!arguments; }
protected:
std::unique_ptr<Arguments> arguments;
};
class FunctionSecretArgumentsFinder
{
public:
@ -23,6 +55,485 @@ public:
return count != 0 || !nested_maps.empty();
}
};
explicit FunctionSecretArgumentsFinder(std::unique_ptr<AbstractFunction> && function_) : function(std::move(function_)) {}
FunctionSecretArgumentsFinder::Result getResult() const { return result; }
protected:
const std::unique_ptr<AbstractFunction> function;
Result result;
void markSecretArgument(size_t index, bool argument_is_named = false)
{
if (index >= function->arguments->size())
return;
if (!result.count)
{
result.start = index;
result.are_named = argument_is_named;
}
chassert(index >= result.start); /// We always check arguments consecutively
result.count = index + 1 - result.start;
if (!argument_is_named)
result.are_named = false;
}
void findOrdinaryFunctionSecretArguments()
{
if ((function->name() == "mysql") || (function->name() == "postgresql") || (function->name() == "mongodb"))
{
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
/// postgresql('host:port', 'database', 'table', 'user', 'password', ...)
/// mongodb('host:port', 'database', 'collection', 'user', 'password', ...)
findMySQLFunctionSecretArguments();
}
else if ((function->name() == "s3") || (function->name() == "cosn") || (function->name() == "oss") ||
(function->name() == "deltaLake") || (function->name() == "hudi") || (function->name() == "iceberg") ||
(function->name() == "gcs"))
{
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
findS3FunctionSecretArguments(/* is_cluster_function= */ false);
}
else if (function->name() == "s3Cluster")
{
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
findS3FunctionSecretArguments(/* is_cluster_function= */ true);
}
else if (function->name() == "azureBlobStorage")
{
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ false);
}
else if (function->name() == "azureBlobStorageCluster")
{
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ true);
}
else if ((function->name() == "remote") || (function->name() == "remoteSecure"))
{
/// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
findRemoteFunctionSecretArguments();
}
else if ((function->name() == "encrypt") || (function->name() == "decrypt") ||
(function->name() == "aes_encrypt_mysql") || (function->name() == "aes_decrypt_mysql") ||
(function->name() == "tryDecrypt"))
{
/// encrypt('mode', 'plaintext', 'key' [, iv, aad])
findEncryptionFunctionSecretArguments();
}
else if (function->name() == "url")
{
findURLSecretArguments();
}
}
void findMySQLFunctionSecretArguments()
{
if (isNamedCollectionName(0))
{
/// mysql(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
}
else
{
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
markSecretArgument(4);
}
}
/// Returns the number of arguments excluding "headers" and "extra_credentials" (which should
/// always be at the end). Marks "headers" as secret, if found.
size_t excludeS3OrURLNestedMaps()
{
size_t count = function->arguments->size();
while (count > 0)
{
const auto f = function->arguments->at(count - 1)->getFunction();
if (!f)
break;
if (f->name() == "headers")
result.nested_maps.push_back(f->name());
else if (f->name() != "extra_credentials")
break;
count -= 1;
}
return count;
}
void findS3FunctionSecretArguments(bool is_cluster_function)
{
/// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
size_t url_arg_idx = is_cluster_function ? 1 : 0;
if (!is_cluster_function && isNamedCollectionName(0))
{
/// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...)
findSecretNamedArgument("secret_access_key", 1);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case of
/// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
/// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
size_t count = excludeS3OrURLNestedMaps();
if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4))
{
String second_arg;
if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg))
{
if (boost::iequals(second_arg, "NOSIGN"))
return; /// The argument after 'url' is "NOSIGN".
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
}
}
/// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
if (url_arg_idx + 2 < count)
markSecretArgument(url_arg_idx + 2);
}
void findAzureBlobStorageFunctionSecretArguments(bool is_cluster_function)
{
/// azureBlobStorage('cluster_name', 'conn_string/storage_account_url', ...) has 'conn_string/storage_account_url' as its second argument.
size_t url_arg_idx = is_cluster_function ? 1 : 0;
if (!is_cluster_function && isNamedCollectionName(0))
{
/// azureBlobStorage(named_collection, ..., account_key = 'account_key', ...)
findSecretNamedArgument("account_key", 1);
return;
}
else if (is_cluster_function && isNamedCollectionName(1))
{
/// azureBlobStorageCluster(cluster, named_collection, ..., account_key = 'account_key', ...)
findSecretNamedArgument("account_key", 2);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case storage_account_url is not used
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
size_t count = function->arguments->size();
if ((url_arg_idx + 4 <= count) && (count <= url_arg_idx + 7))
{
String second_arg;
if (tryGetStringFromArgument(url_arg_idx + 3, &second_arg))
{
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
}
}
/// We're going to replace 'account_key' with '[HIDDEN]' if account_key is used in the signature
if (url_arg_idx + 4 < count)
markSecretArgument(url_arg_idx + 4);
}
void findURLSecretArguments()
{
if (!isNamedCollectionName(0))
excludeS3OrURLNestedMaps();
}
bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const
{
if (arg_idx >= function->arguments->size())
return false;
return tryGetStringFromArgument(*function->arguments->at(arg_idx), res, allow_identifier);
}
static bool tryGetStringFromArgument(const AbstractFunction::Argument & argument, String * res, bool allow_identifier = true)
{
return argument.tryGetString(res, allow_identifier);
}
void findRemoteFunctionSecretArguments()
{
if (isNamedCollectionName(0))
{
/// remote(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
return;
}
/// We're going to replace 'password' with '[HIDDEN'] for the following signatures:
/// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key])
/// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key])
/// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key])
/// But we should check the number of arguments first because we don't need to do any replacements in case of
/// remote('addresses_expr', db.table)
if (function->arguments->size() < 3)
return;
size_t arg_num = 1;
/// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'.
auto table_function = function->arguments->at(arg_num)->getFunction();
if (table_function && KnownTableFunctionNames::instance().exists(table_function->name()))
{
++arg_num;
}
else
{
std::optional<String> database;
std::optional<QualifiedTableName> qualified_table_name;
if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name))
{
/// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'.
/// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user'
/// before the argument 'password'. So it's safer to wipe two arguments just in case.
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
/// before wiping it (because the `password` argument is always a literal string).
if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false))
{
/// Wipe either `password` or `user`.
markSecretArgument(arg_num + 2);
}
if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false))
{
/// Wipe either `password` or `sharding_key`.
markSecretArgument(arg_num + 3);
}
return;
}
/// Skip the current argument (which is either a database name or a qualified table name).
++arg_num;
if (database)
{
/// Skip the 'table' argument if the previous argument was a database name.
++arg_num;
}
}
/// Skip username.
++arg_num;
/// Do our replacement:
/// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...)
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
/// before wiping it (because the `password` argument is always a literal string).
bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false);
if (can_be_password)
markSecretArgument(arg_num);
}
/// Tries to get either a database name or a qualified table name from an argument.
/// Empty string is also allowed (it means the default database).
/// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password.
bool tryGetDatabaseNameOrQualifiedTableName(
size_t arg_idx,
std::optional<String> & res_database,
std::optional<QualifiedTableName> & res_qualified_table_name) const
{
res_database.reset();
res_qualified_table_name.reset();
String str;
if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true))
return false;
if (str.empty())
{
res_database = "";
return true;
}
auto qualified_table_name = QualifiedTableName::tryParseFromString(str);
if (!qualified_table_name)
return false;
if (qualified_table_name->database.empty())
res_database = std::move(qualified_table_name->table);
else
res_qualified_table_name = std::move(qualified_table_name);
return true;
}
void findEncryptionFunctionSecretArguments()
{
if (function->arguments->size() == 0)
return;
/// We replace all arguments after 'mode' with '[HIDDEN]':
/// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]')
result.start = 1;
result.count = function->arguments->size() - 1;
}
void findTableEngineSecretArguments()
{
const String & engine_name = function->name();
if (engine_name == "ExternalDistributed")
{
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
findExternalDistributedTableEngineSecretArguments();
}
else if ((engine_name == "MySQL") || (engine_name == "PostgreSQL") ||
(engine_name == "MaterializedPostgreSQL") || (engine_name == "MongoDB"))
{
/// MySQL('host:port', 'database', 'table', 'user', 'password', ...)
/// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
/// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
/// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...)
findMySQLFunctionSecretArguments();
}
else if ((engine_name == "S3") || (engine_name == "COSN") || (engine_name == "OSS") ||
(engine_name == "DeltaLake") || (engine_name == "Hudi") || (engine_name == "Iceberg") || (engine_name == "S3Queue"))
{
/// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...)
findS3TableEngineSecretArguments();
}
else if (engine_name == "URL")
{
findURLSecretArguments();
}
}
void findExternalDistributedTableEngineSecretArguments()
{
if (isNamedCollectionName(1))
{
/// ExternalDistributed('engine', named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 2);
}
else
{
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
markSecretArgument(5);
}
}
void findS3TableEngineSecretArguments()
{
if (isNamedCollectionName(0))
{
/// S3(named_collection, ..., secret_access_key = 'secret_access_key')
findSecretNamedArgument("secret_access_key", 1);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case of
/// S3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
/// S3('url', 'format', 'compression' [, extra_credentials(..)] [, headers(..)])
size_t count = excludeS3OrURLNestedMaps();
if ((3 <= count) && (count <= 4))
{
String second_arg;
if (tryGetStringFromArgument(1, &second_arg))
{
if (boost::iequals(second_arg, "NOSIGN"))
return; /// The argument after 'url' is "NOSIGN".
if (count == 3)
{
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: S3('url', 'format', ...)
}
}
}
/// We replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key')
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format')
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
if (2 < count)
markSecretArgument(2);
}
void findDatabaseEngineSecretArguments()
{
const String & engine_name = function->name();
if ((engine_name == "MySQL") || (engine_name == "MaterializeMySQL") ||
(engine_name == "MaterializedMySQL") || (engine_name == "PostgreSQL") ||
(engine_name == "MaterializedPostgreSQL"))
{
/// MySQL('host:port', 'database', 'user', 'password')
/// PostgreSQL('host:port', 'database', 'user', 'password')
findMySQLDatabaseSecretArguments();
}
else if (engine_name == "S3")
{
/// S3('url', 'access_key_id', 'secret_access_key')
findS3DatabaseSecretArguments();
}
}
void findMySQLDatabaseSecretArguments()
{
if (isNamedCollectionName(0))
{
/// MySQL(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
}
else
{
/// MySQL('host:port', 'database', 'user', 'password')
markSecretArgument(3);
}
}
void findS3DatabaseSecretArguments()
{
if (isNamedCollectionName(0))
{
/// S3(named_collection, ..., secret_access_key = 'password', ...)
findSecretNamedArgument("secret_access_key", 1);
}
else
{
/// S3('url', 'access_key_id', 'secret_access_key')
markSecretArgument(2);
}
}
void findBackupNameSecretArguments()
{
const String & engine_name = function->name();
if (engine_name == "S3")
{
/// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key])
markSecretArgument(2);
}
}
/// Whether a specified argument can be the name of a named collection?
bool isNamedCollectionName(size_t arg_idx) const
{
if (function->arguments->size() <= arg_idx)
return false;
return function->arguments->at(arg_idx)->isIdentifier();
}
/// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
{
for (size_t i = start; i < function->arguments->size(); ++i)
{
const auto & argument = function->arguments->at(i);
const auto equals_func = argument->getFunction();
if (!equals_func || (equals_func->name() != "equals"))
continue;
if (!equals_func->arguments || equals_func->arguments->size() != 2)
continue;
String found_key;
if (!tryGetStringFromArgument(*equals_func->arguments->at(0), &found_key))
continue;
if (found_key == key)
markSecretArgument(i, /* argument_is_named= */ true);
}
}
};
}

View File

@ -1,35 +1,97 @@
#pragma once
#include <Parsers/FunctionSecretArgumentsFinder.h>
#include <Core/QualifiedTableName.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTIdentifier.h>
#include <Common/KnownObjectNames.h>
#include <boost/algorithm/string/predicate.hpp>
namespace DB
{
/// Finds arguments of a specified function which should not be displayed for most users for security reasons.
/// That involves passwords and secret keys.
class FunctionSecretArgumentsFinderAST
class FunctionAST : public AbstractFunction
{
public:
explicit FunctionSecretArgumentsFinderAST(const ASTFunction & function_) : function(function_)
class ArgumentAST : public Argument
{
if (!function.arguments)
public:
explicit ArgumentAST(const IAST * argument_) : argument(argument_) {}
std::unique_ptr<AbstractFunction> getFunction() const override
{
if (const auto * f = argument->as<ASTFunction>())
return std::make_unique<FunctionAST>(*f);
return nullptr;
}
bool isIdentifier() const override { return argument->as<ASTIdentifier>(); }
bool tryGetString(String * res, bool allow_identifier) const override
{
if (const auto * literal = argument->as<ASTLiteral>())
{
if (literal->value.getType() != Field::Types::String)
return false;
if (res)
*res = literal->value.safeGet<String>();
return true;
}
if (allow_identifier)
{
if (const auto * id = argument->as<ASTIdentifier>())
{
if (res)
*res = id->name();
return true;
}
}
return false;
}
private:
const IAST * argument = nullptr;
};
class ArgumentsAST : public Arguments
{
public:
explicit ArgumentsAST(const ASTs * arguments_) : arguments(arguments_) {}
size_t size() const override { return arguments ? arguments->size() : 0; }
std::unique_ptr<Argument> at(size_t n) const override
{
return std::make_unique<ArgumentAST>(arguments->at(n).get());
}
private:
const ASTs * arguments = nullptr;
};
explicit FunctionAST(const ASTFunction & function_) : function(&function_)
{
if (!function->arguments)
return;
const auto * expr_list = function.arguments->as<ASTExpressionList>();
const auto * expr_list = function->arguments->as<ASTExpressionList>();
if (!expr_list)
return;
arguments = &expr_list->children;
switch (function.kind)
arguments = std::make_unique<ArgumentsAST>(&expr_list->children);
}
String name() const override { return function->name; }
private:
const ASTFunction * function = nullptr;
};
/// Finds arguments of a specified function which should not be displayed for most users for security reasons.
/// That involves passwords and secret keys.
class FunctionSecretArgumentsFinderAST : public FunctionSecretArgumentsFinder
{
public:
explicit FunctionSecretArgumentsFinderAST(const ASTFunction & function_)
: FunctionSecretArgumentsFinder(std::make_unique<FunctionAST>(function_))
{
if (!function->hasArguments())
return;
switch (function_.kind)
{
case ASTFunction::Kind::ORDINARY_FUNCTION: findOrdinaryFunctionSecretArguments(); break;
case ASTFunction::Kind::WINDOW_FUNCTION: break;
@ -43,507 +105,7 @@ public:
}
FunctionSecretArgumentsFinder::Result getResult() const { return result; }
private:
const ASTFunction & function;
const ASTs * arguments = nullptr;
FunctionSecretArgumentsFinder::Result result;
void markSecretArgument(size_t index, bool argument_is_named = false)
{
if (index >= arguments->size())
return;
if (!result.count)
{
result.start = index;
result.are_named = argument_is_named;
}
chassert(index >= result.start); /// We always check arguments consecutively
result.count = index + 1 - result.start;
if (!argument_is_named)
result.are_named = false;
}
void findOrdinaryFunctionSecretArguments()
{
if ((function.name == "mysql") || (function.name == "postgresql") || (function.name == "mongodb"))
{
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
/// postgresql('host:port', 'database', 'table', 'user', 'password', ...)
/// mongodb('host:port', 'database', 'collection', 'user', 'password', ...)
findMySQLFunctionSecretArguments();
}
else if ((function.name == "s3") || (function.name == "cosn") || (function.name == "oss") ||
(function.name == "deltaLake") || (function.name == "hudi") || (function.name == "iceberg") ||
(function.name == "gcs"))
{
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
findS3FunctionSecretArguments(/* is_cluster_function= */ false);
}
else if (function.name == "s3Cluster")
{
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
findS3FunctionSecretArguments(/* is_cluster_function= */ true);
}
else if (function.name == "azureBlobStorage")
{
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ false);
}
else if (function.name == "azureBlobStorageCluster")
{
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ true);
}
else if ((function.name == "remote") || (function.name == "remoteSecure"))
{
/// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
findRemoteFunctionSecretArguments();
}
else if ((function.name == "encrypt") || (function.name == "decrypt") ||
(function.name == "aes_encrypt_mysql") || (function.name == "aes_decrypt_mysql") ||
(function.name == "tryDecrypt"))
{
/// encrypt('mode', 'plaintext', 'key' [, iv, aad])
findEncryptionFunctionSecretArguments();
}
else if (function.name == "url")
{
findURLSecretArguments();
}
}
void findMySQLFunctionSecretArguments()
{
if (isNamedCollectionName(0))
{
/// mysql(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
}
else
{
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
markSecretArgument(4);
}
}
/// Returns the number of arguments excluding "headers" and "extra_credentials" (which should
/// always be at the end). Marks "headers" as secret, if found.
size_t excludeS3OrURLNestedMaps()
{
size_t count = arguments->size();
while (count > 0)
{
const ASTFunction * f = arguments->at(count - 1)->as<ASTFunction>();
if (!f)
break;
if (f->name == "headers")
result.nested_maps.push_back(f->name);
else if (f->name != "extra_credentials")
break;
count -= 1;
}
return count;
}
void findS3FunctionSecretArguments(bool is_cluster_function)
{
/// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
size_t url_arg_idx = is_cluster_function ? 1 : 0;
if (!is_cluster_function && isNamedCollectionName(0))
{
/// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...)
findSecretNamedArgument("secret_access_key", 1);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case of
/// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
/// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
size_t count = excludeS3OrURLNestedMaps();
if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4))
{
String second_arg;
if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg))
{
if (boost::iequals(second_arg, "NOSIGN"))
return; /// The argument after 'url' is "NOSIGN".
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
}
}
/// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
if (url_arg_idx + 2 < count)
markSecretArgument(url_arg_idx + 2);
}
void findAzureBlobStorageFunctionSecretArguments(bool is_cluster_function)
{
/// azureBlobStorage('cluster_name', 'conn_string/storage_account_url', ...) has 'conn_string/storage_account_url' as its second argument.
size_t url_arg_idx = is_cluster_function ? 1 : 0;
if (!is_cluster_function && isNamedCollectionName(0))
{
/// azureBlobStorage(named_collection, ..., account_key = 'account_key', ...)
findSecretNamedArgument("account_key", 1);
return;
}
else if (is_cluster_function && isNamedCollectionName(1))
{
/// azureBlobStorageCluster(cluster, named_collection, ..., account_key = 'account_key', ...)
findSecretNamedArgument("account_key", 2);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case storage_account_url is not used
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
size_t count = arguments->size();
if ((url_arg_idx + 4 <= count) && (count <= url_arg_idx + 7))
{
String second_arg;
if (tryGetStringFromArgument(url_arg_idx + 3, &second_arg))
{
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
}
}
/// We're going to replace 'account_key' with '[HIDDEN]' if account_key is used in the signature
if (url_arg_idx + 4 < count)
markSecretArgument(url_arg_idx + 4);
}
void findURLSecretArguments()
{
if (!isNamedCollectionName(0))
excludeS3OrURLNestedMaps();
}
bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const
{
if (arg_idx >= arguments->size())
return false;
return tryGetStringFromArgument(*(*arguments)[arg_idx], res, allow_identifier);
}
static bool tryGetStringFromArgument(const IAST & argument, String * res, bool allow_identifier = true)
{
if (const auto * literal = argument.as<ASTLiteral>())
{
if (literal->value.getType() != Field::Types::String)
return false;
if (res)
*res = literal->value.safeGet<String>();
return true;
}
if (allow_identifier)
{
if (const auto * id = argument.as<ASTIdentifier>())
{
if (res)
*res = id->name();
return true;
}
}
return false;
}
void findRemoteFunctionSecretArguments()
{
if (isNamedCollectionName(0))
{
/// remote(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
return;
}
/// We're going to replace 'password' with '[HIDDEN'] for the following signatures:
/// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key])
/// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key])
/// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key])
/// But we should check the number of arguments first because we don't need to do any replacements in case of
/// remote('addresses_expr', db.table)
if (arguments->size() < 3)
return;
size_t arg_num = 1;
/// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'.
const auto * table_function = (*arguments)[arg_num]->as<ASTFunction>();
if (table_function && KnownTableFunctionNames::instance().exists(table_function->name))
{
++arg_num;
}
else
{
std::optional<String> database;
std::optional<QualifiedTableName> qualified_table_name;
if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name))
{
/// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'.
/// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user'
/// before the argument 'password'. So it's safer to wipe two arguments just in case.
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
/// before wiping it (because the `password` argument is always a literal string).
if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false))
{
/// Wipe either `password` or `user`.
markSecretArgument(arg_num + 2);
}
if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false))
{
/// Wipe either `password` or `sharding_key`.
markSecretArgument(arg_num + 3);
}
return;
}
/// Skip the current argument (which is either a database name or a qualified table name).
++arg_num;
if (database)
{
/// Skip the 'table' argument if the previous argument was a database name.
++arg_num;
}
}
/// Skip username.
++arg_num;
/// Do our replacement:
/// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...)
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
/// before wiping it (because the `password` argument is always a literal string).
bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false);
if (can_be_password)
markSecretArgument(arg_num);
}
/// Tries to get either a database name or a qualified table name from an argument.
/// Empty string is also allowed (it means the default database).
/// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password.
bool tryGetDatabaseNameOrQualifiedTableName(
size_t arg_idx,
std::optional<String> & res_database,
std::optional<QualifiedTableName> & res_qualified_table_name) const
{
res_database.reset();
res_qualified_table_name.reset();
String str;
if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true))
return false;
if (str.empty())
{
res_database = "";
return true;
}
auto qualified_table_name = QualifiedTableName::tryParseFromString(str);
if (!qualified_table_name)
return false;
if (qualified_table_name->database.empty())
res_database = std::move(qualified_table_name->table);
else
res_qualified_table_name = std::move(qualified_table_name);
return true;
}
void findEncryptionFunctionSecretArguments()
{
if (arguments->empty())
return;
/// We replace all arguments after 'mode' with '[HIDDEN]':
/// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]')
result.start = 1;
result.count = arguments->size() - 1;
}
void findTableEngineSecretArguments()
{
const String & engine_name = function.name;
if (engine_name == "ExternalDistributed")
{
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
findExternalDistributedTableEngineSecretArguments();
}
else if ((engine_name == "MySQL") || (engine_name == "PostgreSQL") ||
(engine_name == "MaterializedPostgreSQL") || (engine_name == "MongoDB"))
{
/// MySQL('host:port', 'database', 'table', 'user', 'password', ...)
/// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
/// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
/// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...)
findMySQLFunctionSecretArguments();
}
else if ((engine_name == "S3") || (engine_name == "COSN") || (engine_name == "OSS") ||
(engine_name == "DeltaLake") || (engine_name == "Hudi") || (engine_name == "Iceberg") || (engine_name == "S3Queue"))
{
/// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...)
findS3TableEngineSecretArguments();
}
else if (engine_name == "URL")
{
findURLSecretArguments();
}
}
void findExternalDistributedTableEngineSecretArguments()
{
if (isNamedCollectionName(1))
{
/// ExternalDistributed('engine', named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 2);
}
else
{
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
markSecretArgument(5);
}
}
void findS3TableEngineSecretArguments()
{
if (isNamedCollectionName(0))
{
/// S3(named_collection, ..., secret_access_key = 'secret_access_key')
findSecretNamedArgument("secret_access_key", 1);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case of
/// S3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
/// S3('url', 'format', 'compression' [, extra_credentials(..)] [, headers(..)])
size_t count = excludeS3OrURLNestedMaps();
if ((3 <= count) && (count <= 4))
{
String second_arg;
if (tryGetStringFromArgument(1, &second_arg))
{
if (boost::iequals(second_arg, "NOSIGN"))
return; /// The argument after 'url' is "NOSIGN".
if (count == 3)
{
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: S3('url', 'format', ...)
}
}
}
/// We replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key')
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format')
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
if (2 < count)
markSecretArgument(2);
}
void findDatabaseEngineSecretArguments()
{
const String & engine_name = function.name;
if ((engine_name == "MySQL") || (engine_name == "MaterializeMySQL") ||
(engine_name == "MaterializedMySQL") || (engine_name == "PostgreSQL") ||
(engine_name == "MaterializedPostgreSQL"))
{
/// MySQL('host:port', 'database', 'user', 'password')
/// PostgreSQL('host:port', 'database', 'user', 'password')
findMySQLDatabaseSecretArguments();
}
else if (engine_name == "S3")
{
/// S3('url', 'access_key_id', 'secret_access_key')
findS3DatabaseSecretArguments();
}
}
void findMySQLDatabaseSecretArguments()
{
if (isNamedCollectionName(0))
{
/// MySQL(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
}
else
{
/// MySQL('host:port', 'database', 'user', 'password')
markSecretArgument(3);
}
}
void findS3DatabaseSecretArguments()
{
if (isNamedCollectionName(0))
{
/// S3(named_collection, ..., secret_access_key = 'password', ...)
findSecretNamedArgument("secret_access_key", 1);
}
else
{
/// S3('url', 'access_key_id', 'secret_access_key')
markSecretArgument(2);
}
}
void findBackupNameSecretArguments()
{
const String & engine_name = function.name;
if (engine_name == "S3")
{
/// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key])
markSecretArgument(2);
}
}
/// Whether a specified argument can be the name of a named collection?
bool isNamedCollectionName(size_t arg_idx) const
{
if (arguments->size() <= arg_idx)
return false;
const auto * identifier = (*arguments)[arg_idx]->as<ASTIdentifier>();
return identifier != nullptr;
}
/// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
{
for (size_t i = start; i < arguments->size(); ++i)
{
const auto & argument = (*arguments)[i];
const auto * equals_func = argument->as<ASTFunction>();
if (!equals_func || (equals_func->name != "equals"))
continue;
const auto * expr_list = equals_func->arguments->as<ASTExpressionList>();
if (!expr_list)
continue;
const auto & equal_args = expr_list->children;
if (equal_args.size() != 2)
continue;
String found_key;
if (!tryGetStringFromArgument(*equal_args[0], &found_key))
continue;
if (found_key == key)
markSecretArgument(i, /* argument_is_named= */ true);
}
}
};
}

View File

@ -1142,6 +1142,16 @@ bool AlterCommands::hasFullTextIndex(const StorageInMemoryMetadata & metadata)
return false;
}
bool AlterCommands::hasLegacyInvertedIndex(const StorageInMemoryMetadata & metadata)
{
for (const auto & index : metadata.secondary_indices)
{
if (index.type == INVERTED_INDEX_NAME)
return true;
}
return false;
}
bool AlterCommands::hasVectorSimilarityIndex(const StorageInMemoryMetadata & metadata)
{
for (const auto & index : metadata.secondary_indices)

View File

@ -235,8 +235,9 @@ public:
/// additional mutation command (MATERIALIZE_TTL) will be returned.
MutationCommands getMutationCommands(StorageInMemoryMetadata metadata, bool materialize_ttl, ContextPtr context, bool with_alters=false) const;
/// Check if commands have any full-text index
/// Check if commands have any full-text index or a (legacy) inverted index
static bool hasFullTextIndex(const StorageInMemoryMetadata & metadata);
static bool hasLegacyInvertedIndex(const StorageInMemoryMetadata & metadata);
/// Check if commands have any vector similarity index
static bool hasVectorSimilarityIndex(const StorageInMemoryMetadata & metadata);

View File

@ -3230,6 +3230,10 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
"Experimental full-text index feature is not enabled (turn on setting 'allow_experimental_full_text_index')");
if (AlterCommands::hasLegacyInvertedIndex(new_metadata) && !settings.allow_experimental_inverted_index)
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
"Experimental inverted index feature is not enabled (turn on setting 'allow_experimental_inverted_index')");
if (AlterCommands::hasVectorSimilarityIndex(new_metadata) && !settings.allow_experimental_vector_similarity_index)
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
"Experimental vector similarity index is disabled (turn on setting 'allow_experimental_vector_similarity_index')");

View File

@ -361,11 +361,23 @@ std::optional<MergeTreeWhereOptimizer::OptimizeResult> MergeTreeWhereOptimizer::
UInt64 total_size_of_moved_conditions = 0;
UInt64 total_number_of_moved_columns = 0;
/// Remember positions of conditions in where_conditions list
/// to keep original order of conditions in prewhere_conditions while moving.
std::unordered_map<const Condition *, size_t> condition_positions;
size_t position= 0;
for (const auto & condition : where_conditions)
condition_positions[&condition] = position++;
/// Move condition and all other conditions depend on the same set of columns.
auto move_condition = [&](Conditions::iterator cond_it)
{
LOG_TRACE(log, "Condition {} moved to PREWHERE", cond_it->node.getColumnName());
prewhere_conditions.splice(prewhere_conditions.end(), where_conditions, cond_it);
/// Keep the original order of conditions in prewhere_conditions.
position = condition_positions[&(*cond_it)];
auto prewhere_it = prewhere_conditions.begin();
while (condition_positions[&(*prewhere_it)] < position && prewhere_it != prewhere_conditions.end())
++prewhere_it;
prewhere_conditions.splice(prewhere_it, where_conditions, cond_it);
total_size_of_moved_conditions += cond_it->columns_size;
total_number_of_moved_columns += cond_it->table_columns.size();
@ -375,7 +387,12 @@ std::optional<MergeTreeWhereOptimizer::OptimizeResult> MergeTreeWhereOptimizer::
if (jt->viable && jt->columns_size == cond_it->columns_size && jt->table_columns == cond_it->table_columns)
{
LOG_TRACE(log, "Condition {} moved to PREWHERE", jt->node.getColumnName());
prewhere_conditions.splice(prewhere_conditions.end(), where_conditions, jt++);
/// Keep the original order of conditions in prewhere_conditions.
position = condition_positions[&(*jt)];
prewhere_it = prewhere_conditions.begin();
while (condition_positions[&(*prewhere_it)] < position && prewhere_it != prewhere_conditions.end())
++prewhere_it;
prewhere_conditions.splice(prewhere_it, where_conditions, jt++);
}
else
{

View File

@ -373,18 +373,21 @@ void DefaultCoordinator::initializeReadingState(InitialAllRangesAnnouncement ann
if (state_initialized)
return;
{
/// To speedup search for adjacent parts
Parts known_parts(all_parts_to_read.begin(), all_parts_to_read.end());
for (auto && part : announcement.description)
{
auto intersecting_it = std::find_if(
all_parts_to_read.begin(),
all_parts_to_read.end(),
[&part](const Part & other) { return !other.description.info.isDisjoint(part.info); });
auto intersecting_it = known_parts.lower_bound(Part{.description = part, .replicas = {}});
if (intersecting_it != all_parts_to_read.end())
if (intersecting_it != known_parts.end() && !intersecting_it->description.info.isDisjoint(part.info))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Intersecting parts found in announcement");
known_parts.emplace(Part{.description = part, .replicas = {}});
all_parts_to_read.push_back(Part{.description = std::move(part), .replicas = {announcement.replica_num}});
}
}
std::ranges::sort(
all_parts_to_read, [](const Part & lhs, const Part & rhs) { return BiggerPartsFirst()(lhs.description, rhs.description); });
@ -870,8 +873,7 @@ void InOrderCoordinator<mode>::doHandleInitialAllRangesAnnouncement(InitialAllRa
/// To get rid of duplicates
for (auto && part: announcement.description)
{
auto the_same_it = std::find_if(all_parts_to_read.begin(), all_parts_to_read.end(),
[&part] (const Part & other) { return other.description.info == part.info; });
auto the_same_it = all_parts_to_read.find(Part{.description = part, .replicas = {}});
/// We have the same part - add the info about presence on the corresponding replica to it
if (the_same_it != all_parts_to_read.end())
@ -883,13 +885,29 @@ void InOrderCoordinator<mode>::doHandleInitialAllRangesAnnouncement(InitialAllRa
if (state_initialized)
continue;
auto covering_or_the_same_it = std::find_if(all_parts_to_read.begin(), all_parts_to_read.end(),
[&part] (const Part & other) { return other.description.info.contains(part.info) || part.info.contains(other.description.info); });
/// Look for the first part >= current
auto covering_it = all_parts_to_read.lower_bound(Part{.description = part, .replicas = {}});
/// It is covering part or we have covering - skip it
if (covering_or_the_same_it != all_parts_to_read.end())
if (covering_it != all_parts_to_read.end())
{
/// Checks if other part covers this one or this one covers the other
auto is_covered_or_covering = [&part] (const Part & other)
{
return other.description.info.contains(part.info) || part.info.contains(other.description.info);
};
if (is_covered_or_covering(*covering_it))
continue;
/// Also look at the previous part, it could be covering the current one
if (covering_it != all_parts_to_read.begin())
{
--covering_it;
if (is_covered_or_covering(*covering_it))
continue;
}
}
new_rows_to_read += part.rows;
auto [inserted_it, _] = all_parts_to_read.emplace(Part{.description = std::move(part), .replicas = {announcement.replica_num}});
@ -897,6 +915,21 @@ void InOrderCoordinator<mode>::doHandleInitialAllRangesAnnouncement(InitialAllRa
std::sort(ranges.begin(), ranges.end());
}
#ifndef NDEBUG
/// Double check that there are no intersecting parts
{
auto intersecting_part_it = std::adjacent_find(all_parts_to_read.begin(), all_parts_to_read.end(),
[] (const Part & lhs, const Part & rhs)
{
return !lhs.description.info.isDisjoint(rhs.description.info);
});
if (intersecting_part_it != all_parts_to_read.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Parts {} and {} intersect",
intersecting_part_it->description.info.getPartNameV1(), std::next(intersecting_part_it)->description.info.getPartNameV1());
}
#endif
state_initialized = true;
// progress_callback is not set when local plan is used for initiator

View File

@ -1,4 +1,5 @@
import json
import logging
import os
import re
import subprocess
@ -6,10 +7,12 @@ import sys
import time
from contextlib import contextmanager
from pathlib import Path
from typing import Any, Iterator, List, Union, Optional, Sequence
from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple, Union
import requests
logger = logging.getLogger(__name__)
class Envs:
GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")
@ -36,6 +39,34 @@ def cd(path: Union[Path, str]) -> Iterator[None]:
os.chdir(oldpwd)
def kill_ci_runner(message: str) -> None:
"""The function to kill the current process with all parents when it's possible.
Works only when run with the set `CI` environment"""
if not os.getenv("CI", ""): # cycle import env_helper
logger.info("Running outside the CI, won't kill the runner")
return
print(f"::error::{message}")
def get_ppid_name(pid: int) -> Tuple[int, str]:
# Avoid using psutil, it's not in stdlib
stats = Path(f"/proc/{pid}/stat").read_text(encoding="utf-8").split()
return int(stats[3]), stats[1]
pid = os.getpid()
pids = {} # type: Dict[str, str]
while pid:
ppid, name = get_ppid_name(pid)
pids[str(pid)] = name
pid = ppid
logger.error(
"Sleeping 5 seconds and killing all possible processes from following:\n %s",
"\n ".join(f"{p}: {n}" for p, n in pids.items()),
)
time.sleep(5)
# The current process will be killed too
subprocess.run(f"kill -9 {' '.join(pids.keys())}", check=False, shell=True)
class GH:
class ActionsNames:
RunConfig = "RunConfig"

View File

@ -19,11 +19,12 @@ from collections import defaultdict
from itertools import chain
from typing import Any, Dict, Optional
from ci_utils import kill_ci_runner
from env_helper import IS_CI
from integration_test_images import IMAGES
from tee_popen import TeePopen
from report import JOB_TIMEOUT_TEST_NAME
from stopwatch import Stopwatch
from tee_popen import TeePopen
MAX_RETRY = 1
NUM_WORKERS = 5
@ -332,7 +333,9 @@ class ClickhouseIntegrationTestsRunner:
except subprocess.CalledProcessError as err:
logging.info("docker-compose pull failed: %s", str(err))
continue
logging.error("Pulling images failed for 5 attempts. Will fail the worker.")
message = "Pulling images failed for 5 attempts. Will fail the worker."
logging.error(message)
kill_ci_runner(message)
# We pass specific retcode to to ci/integration_test_check.py to skip status reporting and restart job
sys.exit(13)

View File

@ -835,7 +835,9 @@ class SettingsRandomizer:
),
"remote_filesystem_read_method": lambda: random.choice(["read", "threadpool"]),
"local_filesystem_read_prefetch": lambda: random.randint(0, 1),
"filesystem_cache_segments_batch_size": lambda: random.choice([0, 3, 10, 50]),
"filesystem_cache_segments_batch_size": lambda: random.choice(
[0, 1, 2, 3, 5, 10, 50, 100]
),
"read_from_filesystem_cache_if_exists_otherwise_bypass_cache": lambda: random.randint(
0, 1
),

View File

@ -1,2 +1,3 @@
[('a','d'),('b','e'),('c','f')]
[('a','d','g'),('b','e','h'),('c','f','i')]
[]

View File

@ -2,7 +2,7 @@ SELECT arrayZip(['a', 'b', 'c'], ['d', 'e', 'f']);
SELECT arrayZip(['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i']);
SELECT arrayZip(); -- { serverError TOO_FEW_ARGUMENTS_FOR_FUNCTION }
SELECT arrayZip();
SELECT arrayZip('a', 'b', 'c'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }

View File

@ -1,25 +1,25 @@
Prewhere info
Prewhere filter
Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed)
Prewhere filter column: and(equals(k, 3), notEmpty(v)) (removed)
Prewhere info
Prewhere filter
Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed)
Prewhere filter column: and(equals(k, 3), notEmpty(v)) (removed)
Prewhere info
Prewhere filter
Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed)
Prewhere filter column: and(equals(k, 3), notEmpty(v)) (removed)
Prewhere info
Prewhere filter
Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed)
Prewhere filter column: and(equals(k, 3), notEmpty(v)) (removed)
2
Filter column: and(equals(k, 3), notEmpty(v)) (removed)
Prewhere info
Prewhere filter
Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed)
Prewhere filter column: and(equals(k, 3), notEmpty(v)) (removed)
2
Prewhere info
Prewhere filter
Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed)
Prewhere filter column: and(equals(k, 3), notEmpty(v)) (removed)
Prewhere info
Prewhere filter
Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed)
Prewhere filter column: and(equals(k, 3), notEmpty(v)) (removed)
2

View File

@ -1,16 +1,60 @@
-- Tests that the inverted index can only be supported when allow_experimental_full_text_index = 1.
SET allow_experimental_full_text_index = 0;
-- Tests that CREATE TABLE and ADD INDEX respect settings 'allow_experimental_full_text_index' and `allow_experimental_inverted_index`
DROP TABLE IF EXISTS tab;
CREATE TABLE tab
(
`key` UInt64,
`str` String
)
ENGINE = MergeTree
ORDER BY key;
ALTER TABLE tab ADD INDEX inv_idx(str) TYPE full_text(0); -- { serverError SUPPORT_IS_DISABLED }
-- Test CREATE TABLE + full_text index setting
SET allow_experimental_full_text_index = 0;
CREATE TABLE tab (id UInt32, str String, INDEX idx str TYPE full_text(0)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError SUPPORT_IS_DISABLED }
CREATE TABLE tab (id UInt32, str String, INDEX idx str TYPE inverted(0)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError ILLEGAL_INDEX }
SET allow_experimental_full_text_index = 1;
CREATE TABLE tab (id UInt32, str String, INDEX idx str TYPE full_text(0)) ENGINE = MergeTree ORDER BY tuple();
CREATE TABLE tab (id UInt32, str String, INDEX idx str TYPE inverted(0)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError ILLEGAL_INDEX }
DROP TABLE tab;
SET allow_experimental_full_text_index = 0; -- reset to default
-- Test CREATE TABLE + inverted index setting
SET allow_experimental_inverted_index = 0;
CREATE TABLE tab (id UInt32, str String, INDEX idx str TYPE full_text(0)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError SUPPORT_IS_DISABLED }
CREATE TABLE tab (id UInt32, str String, INDEX idx str TYPE inverted(0)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError ILLEGAL_INDEX }
SET allow_experimental_inverted_index = 1;
CREATE TABLE tab (id UInt32, str String, INDEX idx str TYPE full_text(0)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError SUPPORT_IS_DISABLED }
CREATE TABLE tab (id UInt32, str String, INDEX idx str TYPE inverted(0)) ENGINE = MergeTree ORDER BY tuple();
DROP TABLE tab;
SET allow_experimental_inverted_index = 0; -- reset to default
-- Test ADD INDEX + full_text index setting
SET allow_experimental_full_text_index = 0;
CREATE TABLE tab (id UInt32, str String) ENGINE = MergeTree ORDER BY tuple();
ALTER TABLE tab ADD INDEX idx1 str TYPE full_text(0); -- { serverError SUPPORT_IS_DISABLED }
ALTER TABLE tab ADD INDEX idx2 str TYPE inverted(0); -- { serverError SUPPORT_IS_DISABLED }
DROP TABLE tab;
SET allow_experimental_full_text_index = 1;
CREATE TABLE tab (id UInt32, str String) ENGINE = MergeTree ORDER BY tuple();
ALTER TABLE tab ADD INDEX idx1 str TYPE full_text(0);
ALTER TABLE tab ADD INDEX idx2 str TYPE inverted(0); -- { serverError SUPPORT_IS_DISABLED }
DROP TABLE tab;
SET allow_experimental_full_text_index = 0; -- reset to default
-- Test ADD INDEX + inverted index setting
SET allow_experimental_inverted_index = 0;
CREATE TABLE tab (id UInt32, str String) ENGINE = MergeTree ORDER BY tuple();
ALTER TABLE tab ADD INDEX idx1 str TYPE full_text(0); -- { serverError SUPPORT_IS_DISABLED }
ALTER TABLE tab ADD INDEX idx2 str TYPE inverted(0); -- { serverError SUPPORT_IS_DISABLED }
DROP TABLE tab;
SET allow_experimental_inverted_index = 1;
CREATE TABLE tab (id UInt32, str String) ENGINE = MergeTree ORDER BY tuple();
ALTER TABLE tab ADD INDEX idx1 str TYPE full_text(0); -- { serverError SUPPORT_IS_DISABLED }
ALTER TABLE tab ADD INDEX idx2 str TYPE inverted(0);
DROP TABLE tab;
SET allow_experimental_inverted_index = 0; -- reset to default

View File

@ -1,15 +1,15 @@
Prewhere filter
Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed)
Prewhere filter column: and(equals(k, 3), notEmpty(v)) (removed)
1
Prewhere filter
Prewhere filter column: and(like(d, \'%es%\'), less(c, 20), equals(b, \'3\'), equals(a, 3)) (removed)
Prewhere filter column: and(equals(a, 3), equals(b, \'3\'), less(c, 20), like(d, \'%es%\')) (removed)
1
Prewhere filter
Prewhere filter column: and(like(d, \'%es%\'), less(c, 20), greater(c, 0), equals(a, 3)) (removed)
Prewhere filter column: and(equals(a, 3), less(c, 20), greater(c, 0), like(d, \'%es%\')) (removed)
1
Prewhere filter
Prewhere filter column: and(like(d, \'%es%\'), equals(b, \'3\'), less(c, 20)) (removed)
Prewhere filter column: and(equals(b, \'3\'), less(c, 20), like(d, \'%es%\')) (removed)
1
Prewhere filter
Prewhere filter column: and(like(d, \'%es%\'), equals(b, \'3\'), equals(a, 3)) (removed)
Prewhere filter column: and(equals(a, 3), equals(b, \'3\'), like(d, \'%es%\')) (removed)
1

View File

@ -5,8 +5,8 @@ After insert
After merge
Prewhere info
Prewhere filter
Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed)
Prewhere filter column: and(less(b, 10_UInt8), less(a, 10_UInt8)) (removed)
After truncate, insert, and materialize
Prewhere info
Prewhere filter
Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed)
Prewhere filter column: and(less(b, 10_UInt8), less(a, 10_UInt8)) (removed)

View File

@ -1,7 +1,7 @@
After insert
Prewhere info
Prewhere filter
Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed)
Prewhere filter column: and(less(b, 10_UInt8), less(a, 10_UInt8)) (removed)
After drop statistic
Prewhere info
Prewhere filter
@ -9,12 +9,12 @@ After drop statistic
After add and materialize statistic
Prewhere info
Prewhere filter
Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed)
Prewhere filter column: and(less(b, 10_UInt8), less(a, 10_UInt8)) (removed)
After merge
Prewhere info
Prewhere filter
Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed)
Prewhere filter column: and(less(b, 10_UInt8), less(a, 10_UInt8)) (removed)
After rename
Prewhere info
Prewhere filter
Prewhere filter column: and(less(a, 10_UInt8), less(c, 10_UInt8)) (removed)
Prewhere filter column: and(less(c, 10_UInt8), less(a, 10_UInt8)) (removed)

View File

@ -1,5 +1,6 @@
[('a','d'),('b','e'),('c','f')] Array(Tuple(Nullable(String), Nullable(String)))
[('a','d','g'),('b','e','h'),('c','f','i')]
[]
[('a','d'),('b','e'),('c','f'),(NULL,'g')]
[('a',1),(NULL,2),(NULL,3)]
[('a',1,1.1),('b',2,2.2),('c',NULL,3.3),(NULL,NULL,4.4)]

View File

@ -2,7 +2,7 @@ SELECT arrayZipUnaligned(['a', 'b', 'c'], ['d', 'e', 'f']) as x, toTypeName(x);
SELECT arrayZipUnaligned(['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i']);
SELECT arrayZipUnaligned(); -- { serverError TOO_FEW_ARGUMENTS_FOR_FUNCTION }
SELECT arrayZipUnaligned();
SELECT arrayZipUnaligned('a', 'b', 'c'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }

View File

@ -0,0 +1 @@
1 [0,1] [0,1]

View File

@ -0,0 +1,6 @@
drop table if exists test;
create table test (x UInt32, arr1 Array(UInt32), arr2 Array(UInt32)) engine=MergeTree order by x;
insert into test values (1, [0, 1], [0, 1]), (2, [0], [0, 1]);
select * from test where x == 1 and arrayExists((x1, x2) -> (x1 == x2), arr1, arr2);
drop table test;

View File

@ -0,0 +1,6 @@
Everything dropped
User dropped
Nothing dropped
Nothing dropped, mode=replace
Nothing dropped, mode=create
ACCESS_ENTITY_ALREADY_EXISTS

View File

@ -0,0 +1,77 @@
#!/usr/bin/env bash
# Tags: no-parallel
# Disabled parallel since RESTORE can only restore either all users or no users
# (it can't restore only users added by the current test run),
# so a RESTORE from a parallel test run could recreate our users before we expect that.
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
user_a="user_a_${CLICKHOUSE_TEST_UNIQUE_NAME}"
role_b="role_b_${CLICKHOUSE_TEST_UNIQUE_NAME}"
${CLICKHOUSE_CLIENT} -m --query "
CREATE ROLE ${role_b} SETTINGS custom_x=1;
CREATE USER ${user_a} DEFAULT ROLE ${role_b} SETTINGS custom_x=2;
"
backup_name="Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}')"
${CLICKHOUSE_CLIENT} --query "BACKUP TABLE system.users, TABLE system.roles TO ${backup_name} FORMAT Null"
${CLICKHOUSE_CLIENT} --query "RESTORE ALL FROM ${backup_name} FORMAT Null"
do_check()
{
local replacements
replacements="s/${user_a}/user_a/g; s/${role_b}/role_b/g"
local check_info
check_info=$(${CLICKHOUSE_CLIENT} -mq "
SHOW CREATE USER ${user_a};
SHOW GRANTS FOR ${user_a};
SHOW CREATE ROLE ${role_b};
SHOW GRANTS FOR ${role_b};
" | sed "${replacements}")
local expected
expected=$'CREATE USER user_a IDENTIFIED WITH no_password DEFAULT ROLE role_b SETTINGS custom_x = 2\nGRANT role_b TO user_a\nCREATE ROLE role_b SETTINGS custom_x = 1'
if [[ "${check_info}" != "${expected}" ]]; then
echo "Assertion failed:"
echo "\"${check_info}\""
echo "!="
echo "\"${expected}\""
echo "Test database: ${CLICKHOUSE_DATABASE}" >&2
fi
}
echo "Everything dropped"
${CLICKHOUSE_CLIENT} --query "DROP USER ${user_a}"
${CLICKHOUSE_CLIENT} --query "DROP ROLE ${role_b}"
${CLICKHOUSE_CLIENT} --query "RESTORE ALL FROM ${backup_name} FORMAT Null"
do_check
echo "User dropped"
${CLICKHOUSE_CLIENT} --query "DROP USER ${user_a}"
${CLICKHOUSE_CLIENT} --query "RESTORE ALL FROM ${backup_name} FORMAT Null"
do_check
# TODO: Cannot restore a dropped role granted to an existing user. The result after RESTORE ALL below is the following:
# CREATE USER user_a DEFAULT ROLE NONE SETTINGS custom_x = 2; GRANT NONE TO user_a; CREATE ROLE role_b SETTINGS custom_x = 1
# because `role_b` is restored but not granted to existing user `user_a`.
#
# echo "Role dropped"
# ${CLICKHOUSE_CLIENT} --query "DROP ROLE ${role_b}"
# ${CLICKHOUSE_CLIENT} --query "RESTORE ALL FROM ${backup_name} FORMAT Null"
# do_check
echo "Nothing dropped"
${CLICKHOUSE_CLIENT} --query "RESTORE ALL FROM ${backup_name} FORMAT Null"
do_check
echo "Nothing dropped, mode=replace"
${CLICKHOUSE_CLIENT} --query "RESTORE ALL FROM ${backup_name} SETTINGS create_access='replace' FORMAT Null"
do_check
echo "Nothing dropped, mode=create"
${CLICKHOUSE_CLIENT} --query "RESTORE ALL FROM ${backup_name} SETTINGS create_access='create' FORMAT Null" 2>&1 | grep -om1 "ACCESS_ENTITY_ALREADY_EXISTS"
do_check

View File

@ -7,7 +7,7 @@ SET optimize_move_to_prewhere = 1;
SET enable_multiple_prewhere_read_steps = 1;
SELECT uniq(URL) FROM test.hits WHERE toTimeZone(EventTime, 'Asia/Dubai') >= '2014-03-20 00:00:00' AND toTimeZone(EventTime, 'Asia/Dubai') < '2014-03-21 00:00:00';
SELECT uniq(URL) FROM test.hits WHERE toTimeZone(EventTime, 'Asia/Dubai') >= '2014-03-20 00:00:00' AND URL != '' AND toTimeZone(EventTime, 'Asia/Dubai') < '2014-03-21 00:00:00';
SELECT uniq(URL) FROM test.hits WHERE toTimeZone(EventTime, 'Asia/Dubai') >= '2014-03-20 00:00:00' AND toTimeZone(EventTime, 'Asia/Dubai') < '2014-03-21 00:00:00' AND URL != '';
SELECT uniq(*) FROM test.hits WHERE toTimeZone(EventTime, 'Asia/Dubai') >= '2014-03-20 00:00:00' AND toTimeZone(EventTime, 'Asia/Dubai') < '2014-03-21 00:00:00' AND EventDate = '2014-03-21';
WITH toTimeZone(EventTime, 'Asia/Dubai') AS xyz SELECT uniq(*) FROM test.hits WHERE xyz >= '2014-03-20 00:00:00' AND xyz < '2014-03-21 00:00:00' AND EventDate = '2014-03-21';