diff --git a/contrib/poco b/contrib/poco index 2b273bfe9db..6216cc01a10 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit 2b273bfe9db89429b2040c024484dee0197e48c7 +Subproject commit 6216cc01a107ce149863411ca29013a224f80343 diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index fecc1fa7e76..eb4dd9550b4 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -432,6 +432,8 @@ if (USE_JEMALLOC) if(NOT MAKE_STATIC_LIBRARIES AND ${JEMALLOC_LIBRARIES} MATCHES "${CMAKE_STATIC_LIBRARY_SUFFIX}$") # mallctl in dbms/src/Interpreters/AsynchronousMetrics.cpp + # Actually we link JEMALLOC to almost all libraries. + # This is just hotfix for some uninvestigated problem. target_link_libraries(clickhouse_interpreters PRIVATE ${JEMALLOC_LIBRARIES}) endif() endif () diff --git a/dbms/programs/odbc-bridge/CMakeLists.txt b/dbms/programs/odbc-bridge/CMakeLists.txt index d03ff257562..460dfd007d4 100644 --- a/dbms/programs/odbc-bridge/CMakeLists.txt +++ b/dbms/programs/odbc-bridge/CMakeLists.txt @@ -30,6 +30,11 @@ if (Poco_Data_FOUND) set(CLICKHOUSE_ODBC_BRIDGE_LINK ${CLICKHOUSE_ODBC_BRIDGE_LINK} PRIVATE ${Poco_Data_LIBRARY}) set(CLICKHOUSE_ODBC_BRIDGE_INCLUDE ${CLICKHOUSE_ODBC_BRIDGE_INCLUDE} SYSTEM PRIVATE ${Poco_Data_INCLUDE_DIR}) endif () +if (USE_JEMALLOC) + # We need to link jemalloc directly to odbc-bridge-library, because in other case + # we will build it with default malloc. + set(CLICKHOUSE_ODBC_BRIDGE_LINK ${CLICKHOUSE_ODBC_BRIDGE_LINK} PRIVATE ${JEMALLOC_LIBRARIES}) +endif() clickhouse_program_add_library(odbc-bridge) diff --git a/dbms/programs/server/HTTPHandler.cpp b/dbms/programs/server/HTTPHandler.cpp index cefa3712997..29d186def2d 100644 --- a/dbms/programs/server/HTTPHandler.cpp +++ b/dbms/programs/server/HTTPHandler.cpp @@ -34,7 +34,6 @@ #include #include #include -#include #include #include diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index 76ea69cc737..7103769d54e 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/dbms/src/Access/AccessControlManager.cpp b/dbms/src/Access/AccessControlManager.cpp new file mode 100644 index 00000000000..1f1a57816a8 --- /dev/null +++ b/dbms/src/Access/AccessControlManager.cpp @@ -0,0 +1,52 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace +{ + std::vector> createStorages() + { + std::vector> list; + list.emplace_back(std::make_unique()); + list.emplace_back(std::make_unique()); + return list; + } +} + + +AccessControlManager::AccessControlManager() + : MultipleAccessStorage(createStorages()), + quota_context_factory(std::make_unique(*this)) +{ +} + + +AccessControlManager::~AccessControlManager() +{ +} + + +void AccessControlManager::loadFromConfig(const Poco::Util::AbstractConfiguration & users_config) +{ + auto & users_config_access_storage = dynamic_cast(getStorageByIndex(1)); + users_config_access_storage.loadFromConfig(users_config); +} + + +std::shared_ptr AccessControlManager::createQuotaContext( + const String & user_name, const Poco::Net::IPAddress & address, const String & custom_quota_key) +{ + return quota_context_factory->createContext(user_name, address, custom_quota_key); +} + + +std::vector AccessControlManager::getQuotaUsageInfo() const +{ + return quota_context_factory->getUsageInfo(); +} +} diff --git a/dbms/src/Access/AccessControlManager.h b/dbms/src/Access/AccessControlManager.h new file mode 100644 index 00000000000..2133717d676 --- /dev/null +++ b/dbms/src/Access/AccessControlManager.h @@ -0,0 +1,45 @@ +#pragma once + +#include +#include +#include + + +namespace Poco +{ + namespace Net + { + class IPAddress; + } + namespace Util + { + class AbstractConfiguration; + } +} + +namespace DB +{ +class QuotaContext; +class QuotaContextFactory; +struct QuotaUsageInfo; + + +/// Manages access control entities. +class AccessControlManager : public MultipleAccessStorage +{ +public: + AccessControlManager(); + ~AccessControlManager(); + + void loadFromConfig(const Poco::Util::AbstractConfiguration & users_config); + + std::shared_ptr + createQuotaContext(const String & user_name, const Poco::Net::IPAddress & address, const String & custom_quota_key); + + std::vector getQuotaUsageInfo() const; + +private: + std::unique_ptr quota_context_factory; +}; + +} diff --git a/dbms/src/Access/IAccessEntity.cpp b/dbms/src/Access/IAccessEntity.cpp new file mode 100644 index 00000000000..6a2f928ae9e --- /dev/null +++ b/dbms/src/Access/IAccessEntity.cpp @@ -0,0 +1,19 @@ +#include +#include +#include + + +namespace DB +{ +String IAccessEntity::getTypeName(std::type_index type) +{ + if (type == typeid(Quota)) + return "Quota"; + return demangle(type.name()); +} + +bool IAccessEntity::equal(const IAccessEntity & other) const +{ + return (full_name == other.full_name) && (getType() == other.getType()); +} +} diff --git a/dbms/src/Access/IAccessEntity.h b/dbms/src/Access/IAccessEntity.h new file mode 100644 index 00000000000..272fde006ac --- /dev/null +++ b/dbms/src/Access/IAccessEntity.h @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB +{ +/// Access entity is a set of data which have a name and a type. Access entity control something related to the access control. +/// Entities can be stored to a file or another storage, see IAccessStorage. +struct IAccessEntity +{ + IAccessEntity() = default; + IAccessEntity(const IAccessEntity &) = default; + virtual ~IAccessEntity() = default; + virtual std::shared_ptr clone() const = 0; + + std::type_index getType() const { return typeid(*this); } + static String getTypeName(std::type_index type); + const String getTypeName() const { return getTypeName(getType()); } + + template + bool isTypeOf() const { return isTypeOf(typeid(EntityType)); } + bool isTypeOf(std::type_index type) const { return type == getType(); } + + virtual void setName(const String & name_) { full_name = name_; } + virtual String getName() const { return full_name; } + String getFullName() const { return full_name; } + + friend bool operator ==(const IAccessEntity & lhs, const IAccessEntity & rhs) { return lhs.equal(rhs); } + friend bool operator !=(const IAccessEntity & lhs, const IAccessEntity & rhs) { return !(lhs == rhs); } + +protected: + String full_name; + + virtual bool equal(const IAccessEntity & other) const; + + /// Helper function to define clone() in the derived classes. + template + std::shared_ptr cloneImpl() const + { + return std::make_shared(typeid_cast(*this)); + } +}; + +using AccessEntityPtr = std::shared_ptr; +} diff --git a/dbms/src/Access/IAccessStorage.cpp b/dbms/src/Access/IAccessStorage.cpp new file mode 100644 index 00000000000..4283ec9e6dc --- /dev/null +++ b/dbms/src/Access/IAccessStorage.cpp @@ -0,0 +1,450 @@ +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int BAD_CAST; + extern const int ACCESS_ENTITY_NOT_FOUND; + extern const int ACCESS_ENTITY_ALREADY_EXISTS; + extern const int ACCESS_ENTITY_FOUND_DUPLICATES; + extern const int ACCESS_ENTITY_STORAGE_READONLY; +} + + +std::vector IAccessStorage::findAll(std::type_index type) const +{ + return findAllImpl(type); +} + + +std::optional IAccessStorage::find(std::type_index type, const String & name) const +{ + return findImpl(type, name); +} + + +std::vector IAccessStorage::find(std::type_index type, const Strings & names) const +{ + std::vector ids; + ids.reserve(names.size()); + for (const String & name : names) + { + auto id = findImpl(type, name); + if (id) + ids.push_back(*id); + } + return ids; +} + + +UUID IAccessStorage::getID(std::type_index type, const String & name) const +{ + auto id = findImpl(type, name); + if (id) + return *id; + throwNotFound(type, name); +} + + +std::vector IAccessStorage::getIDs(std::type_index type, const Strings & names) const +{ + std::vector ids; + ids.reserve(names.size()); + for (const String & name : names) + ids.push_back(getID(type, name)); + return ids; +} + + +bool IAccessStorage::exists(const UUID & id) const +{ + return existsImpl(id); +} + + + +AccessEntityPtr IAccessStorage::tryReadBase(const UUID & id) const +{ + try + { + return readImpl(id); + } + catch (Exception &) + { + return nullptr; + } +} + + +String IAccessStorage::readName(const UUID & id) const +{ + return readNameImpl(id); +} + + +std::optional IAccessStorage::tryReadName(const UUID & id) const +{ + try + { + return readNameImpl(id); + } + catch (Exception &) + { + return {}; + } +} + + +UUID IAccessStorage::insert(const AccessEntityPtr & entity) +{ + return insertImpl(entity, false); +} + + +std::vector IAccessStorage::insert(const std::vector & multiple_entities) +{ + std::vector ids; + ids.reserve(multiple_entities.size()); + String error_message; + for (const auto & entity : multiple_entities) + { + try + { + ids.push_back(insertImpl(entity, false)); + } + catch (Exception & e) + { + if (e.code() != ErrorCodes::ACCESS_ENTITY_ALREADY_EXISTS) + throw; + error_message += (error_message.empty() ? "" : ". ") + e.message(); + } + } + if (!error_message.empty()) + throw Exception(error_message, ErrorCodes::ACCESS_ENTITY_ALREADY_EXISTS); + return ids; +} + + +std::optional IAccessStorage::tryInsert(const AccessEntityPtr & entity) +{ + try + { + return insertImpl(entity, false); + } + catch (Exception &) + { + return {}; + } +} + + +std::vector IAccessStorage::tryInsert(const std::vector & multiple_entities) +{ + std::vector ids; + ids.reserve(multiple_entities.size()); + for (const auto & entity : multiple_entities) + { + try + { + ids.push_back(insertImpl(entity, false)); + } + catch (Exception &) + { + } + } + return ids; +} + + +UUID IAccessStorage::insertOrReplace(const AccessEntityPtr & entity) +{ + return insertImpl(entity, true); +} + + +std::vector IAccessStorage::insertOrReplace(const std::vector & multiple_entities) +{ + std::vector ids; + ids.reserve(multiple_entities.size()); + for (const auto & entity : multiple_entities) + ids.push_back(insertImpl(entity, true)); + return ids; +} + + +void IAccessStorage::remove(const UUID & id) +{ + removeImpl(id); +} + + +void IAccessStorage::remove(const std::vector & ids) +{ + String error_message; + for (const auto & id : ids) + { + try + { + removeImpl(id); + } + catch (Exception & e) + { + if (e.code() != ErrorCodes::ACCESS_ENTITY_NOT_FOUND) + throw; + error_message += (error_message.empty() ? "" : ". ") + e.message(); + } + } + if (!error_message.empty()) + throw Exception(error_message, ErrorCodes::ACCESS_ENTITY_NOT_FOUND); +} + + +bool IAccessStorage::tryRemove(const UUID & id) +{ + try + { + removeImpl(id); + return true; + } + catch (Exception &) + { + return false; + } +} + + +std::vector IAccessStorage::tryRemove(const std::vector & ids) +{ + std::vector removed; + removed.reserve(ids.size()); + for (const auto & id : ids) + { + try + { + removeImpl(id); + removed.push_back(id); + } + catch (Exception &) + { + } + } + return removed; +} + + +void IAccessStorage::update(const UUID & id, const UpdateFunc & update_func) +{ + updateImpl(id, update_func); +} + + +void IAccessStorage::update(const std::vector & ids, const UpdateFunc & update_func) +{ + String error_message; + for (const auto & id : ids) + { + try + { + updateImpl(id, update_func); + } + catch (Exception & e) + { + if (e.code() != ErrorCodes::ACCESS_ENTITY_NOT_FOUND) + throw; + error_message += (error_message.empty() ? "" : ". ") + e.message(); + } + } + if (!error_message.empty()) + throw Exception(error_message, ErrorCodes::ACCESS_ENTITY_NOT_FOUND); +} + + +bool IAccessStorage::tryUpdate(const UUID & id, const UpdateFunc & update_func) +{ + try + { + updateImpl(id, update_func); + return true; + } + catch (Exception &) + { + return false; + } +} + + +std::vector IAccessStorage::tryUpdate(const std::vector & ids, const UpdateFunc & update_func) +{ + std::vector updated; + updated.reserve(ids.size()); + for (const auto & id : ids) + { + try + { + updateImpl(id, update_func); + updated.push_back(id); + } + catch (Exception &) + { + } + } + return updated; +} + + +IAccessStorage::SubscriptionPtr IAccessStorage::subscribeForChanges(std::type_index type, const OnChangedHandler & handler) const +{ + return subscribeForChangesImpl(type, handler); +} + + +IAccessStorage::SubscriptionPtr IAccessStorage::subscribeForChanges(const UUID & id, const OnChangedHandler & handler) const +{ + return subscribeForChangesImpl(id, handler); +} + + +IAccessStorage::SubscriptionPtr IAccessStorage::subscribeForChanges(const std::vector & ids, const OnChangedHandler & handler) const +{ + if (ids.empty()) + return nullptr; + if (ids.size() == 1) + return subscribeForChangesImpl(ids[0], handler); + + std::vector subscriptions; + subscriptions.reserve(ids.size()); + for (const auto & id : ids) + { + auto subscription = subscribeForChangesImpl(id, handler); + if (subscription) + subscriptions.push_back(std::move(subscription)); + } + + class SubscriptionImpl : public Subscription + { + public: + SubscriptionImpl(std::vector subscriptions_) + : subscriptions(std::move(subscriptions_)) {} + private: + std::vector subscriptions; + }; + + return std::make_unique(std::move(subscriptions)); +} + + +bool IAccessStorage::hasSubscription(std::type_index type) const +{ + return hasSubscriptionImpl(type); +} + + +bool IAccessStorage::hasSubscription(const UUID & id) const +{ + return hasSubscriptionImpl(id); +} + + +void IAccessStorage::notify(const Notifications & notifications) +{ + for (const auto & [fn, id, new_entity] : notifications) + fn(id, new_entity); +} + + +UUID IAccessStorage::generateRandomID() +{ + static Poco::UUIDGenerator generator; + UUID id; + generator.createRandom().copyTo(reinterpret_cast(&id)); + return id; +} + + +Poco::Logger * IAccessStorage::getLogger() const +{ + Poco::Logger * ptr = log.load(); + if (!ptr) + log.store(ptr = &Poco::Logger::get("Access(" + storage_name + ")"), std::memory_order_relaxed); + return ptr; +} + + +void IAccessStorage::throwNotFound(const UUID & id) const +{ + throw Exception("ID {" + toString(id) + "} not found in " + getStorageName(), ErrorCodes::ACCESS_ENTITY_NOT_FOUND); +} + + +void IAccessStorage::throwNotFound(std::type_index type, const String & name) const +{ + throw Exception( + getTypeName(type) + " " + backQuote(name) + " not found in " + getStorageName(), ErrorCodes::ACCESS_ENTITY_NOT_FOUND); +} + + +void IAccessStorage::throwBadCast(const UUID & id, std::type_index type, const String & name, std::type_index required_type) const +{ + throw Exception( + "ID {" + toString(id) + "}: " + getTypeName(type) + backQuote(name) + " expected to be of type " + getTypeName(required_type), + ErrorCodes::BAD_CAST); +} + + +void IAccessStorage::throwIDCollisionCannotInsert(const UUID & id, std::type_index type, const String & name, std::type_index existing_type, const String & existing_name) const +{ + throw Exception( + getTypeName(type) + " " + backQuote(name) + ": cannot insert because the ID {" + toString(id) + "} is already used by " + + getTypeName(existing_type) + " " + backQuote(existing_name) + " in " + getStorageName(), + ErrorCodes::ACCESS_ENTITY_ALREADY_EXISTS); +} + + +void IAccessStorage::throwNameCollisionCannotInsert(std::type_index type, const String & name) const +{ + throw Exception( + getTypeName(type) + " " + backQuote(name) + ": cannot insert because " + getTypeName(type) + " " + backQuote(name) + + " already exists in " + getStorageName(), + ErrorCodes::ACCESS_ENTITY_ALREADY_EXISTS); +} + + +void IAccessStorage::throwNameCollisionCannotRename(std::type_index type, const String & old_name, const String & new_name) const +{ + throw Exception( + getTypeName(type) + " " + backQuote(old_name) + ": cannot rename to " + backQuote(new_name) + " because " + getTypeName(type) + " " + + backQuote(new_name) + " already exists in " + getStorageName(), + ErrorCodes::ACCESS_ENTITY_ALREADY_EXISTS); +} + + +void IAccessStorage::throwReadonlyCannotInsert(std::type_index type, const String & name) const +{ + throw Exception( + "Cannot insert " + getTypeName(type) + " " + backQuote(name) + " to " + getStorageName() + " because this storage is readonly", + ErrorCodes::ACCESS_ENTITY_STORAGE_READONLY); +} + + +void IAccessStorage::throwReadonlyCannotUpdate(std::type_index type, const String & name) const +{ + throw Exception( + "Cannot update " + getTypeName(type) + " " + backQuote(name) + " in " + getStorageName() + " because this storage is readonly", + ErrorCodes::ACCESS_ENTITY_STORAGE_READONLY); +} + + +void IAccessStorage::throwReadonlyCannotRemove(std::type_index type, const String & name) const +{ + throw Exception( + "Cannot remove " + getTypeName(type) + " " + backQuote(name) + " from " + getStorageName() + " because this storage is readonly", + ErrorCodes::ACCESS_ENTITY_STORAGE_READONLY); +} +} diff --git a/dbms/src/Access/IAccessStorage.h b/dbms/src/Access/IAccessStorage.h new file mode 100644 index 00000000000..b4153bce87d --- /dev/null +++ b/dbms/src/Access/IAccessStorage.h @@ -0,0 +1,209 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + + +namespace Poco { class Logger; } + +namespace DB +{ +/// Contains entities, i.e. instances of classes derived from IAccessEntity. +/// The implementations of this class MUST be thread-safe. +class IAccessStorage +{ +public: + IAccessStorage(const String & storage_name_) : storage_name(storage_name_) {} + virtual ~IAccessStorage() {} + + /// Returns the name of this storage. + const String & getStorageName() const { return storage_name; } + + /// Returns the identifiers of all the entities of a specified type contained in the storage. + std::vector findAll(std::type_index type) const; + + template + std::vector findAll() const { return findAll(typeid(EntityType)); } + + /// Searchs for an entity with specified type and name. Returns std::nullopt if not found. + std::optional find(std::type_index type, const String & name) const; + + template + std::optional find(const String & name) const { return find(typeid(EntityType), name); } + + std::vector find(std::type_index type, const Strings & names) const; + + template + std::vector find(const Strings & names) const { return find(typeid(EntityType), names); } + + /// Searchs for an entity with specified name and type. Throws an exception if not found. + UUID getID(std::type_index type, const String & name) const; + + template + UUID getID(const String & name) const { return getID(typeid(EntityType), name); } + + std::vector getIDs(std::type_index type, const Strings & names) const; + + template + std::vector getIDs(const Strings & names) const { return getIDs(typeid(EntityType), names); } + + /// Returns whether there is an entity with such identifier in the storage. + bool exists(const UUID & id) const; + + /// Reads an entity. Throws an exception if not found. + template + std::shared_ptr read(const UUID & id) const; + + template + std::shared_ptr read(const String & name) const; + + /// Reads an entity. Returns nullptr if not found. + template + std::shared_ptr tryRead(const UUID & id) const; + + template + std::shared_ptr tryRead(const String & name) const; + + /// Reads only name of an entity. + String readName(const UUID & id) const; + std::optional tryReadName(const UUID & id) const; + + /// Inserts an entity to the storage. Returns ID of a new entry in the storage. + /// Throws an exception if the specified name already exists. + UUID insert(const AccessEntityPtr & entity); + std::vector insert(const std::vector & multiple_entities); + + /// Inserts an entity to the storage. Returns ID of a new entry in the storage. + std::optional tryInsert(const AccessEntityPtr & entity); + std::vector tryInsert(const std::vector & multiple_entities); + + /// Inserts an entity to the storage. Return ID of a new entry in the storage. + /// Replaces an existing entry in the storage if the specified name already exists. + UUID insertOrReplace(const AccessEntityPtr & entity); + std::vector insertOrReplace(const std::vector & multiple_entities); + + /// Removes an entity from the storage. Throws an exception if couldn't remove. + void remove(const UUID & id); + void remove(const std::vector & ids); + + /// Removes an entity from the storage. Returns false if couldn't remove. + bool tryRemove(const UUID & id); + + /// Removes multiple entities from the storage. Returns the list of successfully dropped. + std::vector tryRemove(const std::vector & ids); + + using UpdateFunc = std::function; + + /// Updates an entity stored in the storage. Throws an exception if couldn't update. + void update(const UUID & id, const UpdateFunc & update_func); + void update(const std::vector & ids, const UpdateFunc & update_func); + + /// Updates an entity stored in the storage. Returns false if couldn't update. + bool tryUpdate(const UUID & id, const UpdateFunc & update_func); + + /// Updates multiple entities in the storage. Returns the list of successfully updated. + std::vector tryUpdate(const std::vector & ids, const UpdateFunc & update_func); + + class Subscription + { + public: + virtual ~Subscription() {} + }; + + using SubscriptionPtr = std::unique_ptr; + using OnChangedHandler = std::function; + + /// Subscribes for all changes. + /// Can return nullptr if cannot subscribe (identifier not found) or if it doesn't make sense (the storage is read-only). + SubscriptionPtr subscribeForChanges(std::type_index type, const OnChangedHandler & handler) const; + + template + SubscriptionPtr subscribeForChanges(OnChangedHandler handler) const { return subscribeForChanges(typeid(EntityType), handler); } + + /// Subscribes for changes of a specific entry. + /// Can return nullptr if cannot subscribe (identifier not found) or if it doesn't make sense (the storage is read-only). + SubscriptionPtr subscribeForChanges(const UUID & id, const OnChangedHandler & handler) const; + SubscriptionPtr subscribeForChanges(const std::vector & ids, const OnChangedHandler & handler) const; + + bool hasSubscription(std::type_index type) const; + bool hasSubscription(const UUID & id) const; + +protected: + virtual std::optional findImpl(std::type_index type, const String & name) const = 0; + virtual std::vector findAllImpl(std::type_index type) const = 0; + virtual bool existsImpl(const UUID & id) const = 0; + virtual AccessEntityPtr readImpl(const UUID & id) const = 0; + virtual String readNameImpl(const UUID & id) const = 0; + virtual UUID insertImpl(const AccessEntityPtr & entity, bool replace_if_exists) = 0; + virtual void removeImpl(const UUID & id) = 0; + virtual void updateImpl(const UUID & id, const UpdateFunc & update_func) = 0; + virtual SubscriptionPtr subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const = 0; + virtual SubscriptionPtr subscribeForChangesImpl(std::type_index type, const OnChangedHandler & handler) const = 0; + virtual bool hasSubscriptionImpl(const UUID & id) const = 0; + virtual bool hasSubscriptionImpl(std::type_index type) const = 0; + + static UUID generateRandomID(); + Poco::Logger * getLogger() const; + static String getTypeName(std::type_index type) { return IAccessEntity::getTypeName(type); } + [[noreturn]] void throwNotFound(const UUID & id) const; + [[noreturn]] void throwNotFound(std::type_index type, const String & name) const; + [[noreturn]] void throwBadCast(const UUID & id, std::type_index type, const String & name, std::type_index required_type) const; + [[noreturn]] void throwIDCollisionCannotInsert(const UUID & id, std::type_index type, const String & name, std::type_index existing_type, const String & existing_name) const; + [[noreturn]] void throwNameCollisionCannotInsert(std::type_index type, const String & name) const; + [[noreturn]] void throwNameCollisionCannotRename(std::type_index type, const String & old_name, const String & new_name) const; + [[noreturn]] void throwReadonlyCannotInsert(std::type_index type, const String & name) const; + [[noreturn]] void throwReadonlyCannotUpdate(std::type_index type, const String & name) const; + [[noreturn]] void throwReadonlyCannotRemove(std::type_index type, const String & name) const; + + using Notification = std::tuple; + using Notifications = std::vector; + static void notify(const Notifications & notifications); + +private: + AccessEntityPtr tryReadBase(const UUID & id) const; + + const String storage_name; + mutable std::atomic log = nullptr; +}; + + +template +std::shared_ptr IAccessStorage::read(const UUID & id) const +{ + auto entity = readImpl(id); + auto ptr = typeid_cast>(entity); + if (ptr) + return ptr; + throwBadCast(id, entity->getType(), entity->getFullName(), typeid(EntityType)); +} + + +template +std::shared_ptr IAccessStorage::read(const String & name) const +{ + return read(getID(name)); +} + + +template +std::shared_ptr IAccessStorage::tryRead(const UUID & id) const +{ + auto entity = tryReadBase(id); + if (!entity) + return nullptr; + return typeid_cast>(entity); +} + + +template +std::shared_ptr IAccessStorage::tryRead(const String & name) const +{ + auto id = find(name); + return id ? tryRead(*id) : nullptr; +} +} diff --git a/dbms/src/Access/MemoryAccessStorage.cpp b/dbms/src/Access/MemoryAccessStorage.cpp new file mode 100644 index 00000000000..ed42acca1a7 --- /dev/null +++ b/dbms/src/Access/MemoryAccessStorage.cpp @@ -0,0 +1,358 @@ +#include +#include +#include + + +namespace DB +{ +MemoryAccessStorage::MemoryAccessStorage(const String & storage_name_) + : IAccessStorage(storage_name_), shared_ptr_to_this{std::make_shared(this)} +{ +} + + +MemoryAccessStorage::~MemoryAccessStorage() {} + + +std::optional MemoryAccessStorage::findImpl(std::type_index type, const String & name) const +{ + std::lock_guard lock{mutex}; + auto it = names.find({name, type}); + if (it == names.end()) + return {}; + + Entry & entry = *(it->second); + return entry.id; +} + + +std::vector MemoryAccessStorage::findAllImpl(std::type_index type) const +{ + std::lock_guard lock{mutex}; + std::vector result; + result.reserve(entries.size()); + for (const auto & [id, entry] : entries) + if (entry.entity->isTypeOf(type)) + result.emplace_back(id); + return result; +} + + +bool MemoryAccessStorage::existsImpl(const UUID & id) const +{ + std::lock_guard lock{mutex}; + return entries.count(id); +} + + +AccessEntityPtr MemoryAccessStorage::readImpl(const UUID & id) const +{ + std::lock_guard lock{mutex}; + auto it = entries.find(id); + if (it == entries.end()) + throwNotFound(id); + const Entry & entry = it->second; + return entry.entity; +} + + +String MemoryAccessStorage::readNameImpl(const UUID & id) const +{ + return readImpl(id)->getFullName(); +} + + +UUID MemoryAccessStorage::insertImpl(const AccessEntityPtr & new_entity, bool replace_if_exists) +{ + Notifications notifications; + SCOPE_EXIT({ notify(notifications); }); + + UUID id = generateRandomID(); + std::lock_guard lock{mutex}; + insertNoLock(generateRandomID(), new_entity, replace_if_exists, notifications); + return id; +} + + +void MemoryAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, Notifications & notifications) +{ + const String & name = new_entity->getFullName(); + std::type_index type = new_entity->getType(); + + /// Check that we can insert. + auto it = entries.find(id); + if (it != entries.end()) + { + const auto & existing_entry = it->second; + throwIDCollisionCannotInsert(id, type, name, existing_entry.entity->getType(), existing_entry.entity->getFullName()); + } + + auto it2 = names.find({name, type}); + if (it2 != names.end()) + { + const auto & existing_entry = *(it2->second); + if (replace_if_exists) + removeNoLock(existing_entry.id, notifications); + else + throwNameCollisionCannotInsert(type, name); + } + + /// Do insertion. + auto & entry = entries[id]; + entry.id = id; + entry.entity = new_entity; + names[std::pair{name, type}] = &entry; + prepareNotifications(entry, false, notifications); +} + + +void MemoryAccessStorage::removeImpl(const UUID & id) +{ + Notifications notifications; + SCOPE_EXIT({ notify(notifications); }); + + std::lock_guard lock{mutex}; + removeNoLock(id, notifications); +} + + +void MemoryAccessStorage::removeNoLock(const UUID & id, Notifications & notifications) +{ + auto it = entries.find(id); + if (it == entries.end()) + throwNotFound(id); + + Entry & entry = it->second; + const String & name = entry.entity->getFullName(); + std::type_index type = entry.entity->getType(); + + prepareNotifications(entry, true, notifications); + + /// Do removing. + names.erase({name, type}); + entries.erase(it); +} + + +void MemoryAccessStorage::updateImpl(const UUID & id, const UpdateFunc & update_func) +{ + Notifications notifications; + SCOPE_EXIT({ notify(notifications); }); + + std::lock_guard lock{mutex}; + updateNoLock(id, update_func, notifications); +} + + +void MemoryAccessStorage::updateNoLock(const UUID & id, const UpdateFunc & update_func, Notifications & notifications) +{ + auto it = entries.find(id); + if (it == entries.end()) + throwNotFound(id); + + Entry & entry = it->second; + auto old_entity = entry.entity; + auto new_entity = update_func(old_entity); + + if (*new_entity == *old_entity) + return; + + entry.entity = new_entity; + + if (new_entity->getFullName() != old_entity->getFullName()) + { + auto it2 = names.find({new_entity->getFullName(), new_entity->getType()}); + if (it2 != names.end()) + throwNameCollisionCannotRename(old_entity->getType(), old_entity->getFullName(), new_entity->getFullName()); + + names.erase({old_entity->getFullName(), old_entity->getType()}); + names[std::pair{new_entity->getFullName(), new_entity->getType()}] = &entry; + } + + prepareNotifications(entry, false, notifications); +} + + +void MemoryAccessStorage::setAll(const std::vector & all_entities) +{ + std::vector> entities_with_ids; + entities_with_ids.reserve(all_entities.size()); + for (const auto & entity : all_entities) + entities_with_ids.emplace_back(generateRandomID(), entity); + setAll(entities_with_ids); +} + + +void MemoryAccessStorage::setAll(const std::vector> & all_entities) +{ + Notifications notifications; + SCOPE_EXIT({ notify(notifications); }); + + std::lock_guard lock{mutex}; + setAllNoLock(all_entities, notifications); +} + + +void MemoryAccessStorage::setAllNoLock(const std::vector> & all_entities, Notifications & notifications) +{ + /// Get list of the currently used IDs. Later we will remove those of them which are not used anymore. + std::unordered_set not_used_ids; + for (const auto & id_and_entry : entries) + not_used_ids.emplace(id_and_entry.first); + + /// Remove conflicting entities. + for (const auto & [id, entity] : all_entities) + { + auto it = entries.find(id); + if (it != entries.end()) + { + not_used_ids.erase(id); /// ID is used. + Entry & entry = it->second; + if (entry.entity->getType() != entity->getType()) + { + removeNoLock(id, notifications); + continue; + } + } + auto it2 = names.find({entity->getFullName(), entity->getType()}); + if (it2 != names.end()) + { + Entry & entry = *(it2->second); + if (entry.id != id) + removeNoLock(id, notifications); + } + } + + /// Remove entities which are not used anymore. + for (const auto & id : not_used_ids) + removeNoLock(id, notifications); + + /// Insert or update entities. + for (const auto & [id, entity] : all_entities) + { + auto it = entries.find(id); + if (it != entries.end()) + { + if (*(it->second.entity) != *entity) + { + const AccessEntityPtr & changed_entity = entity; + updateNoLock(id, [&changed_entity](const AccessEntityPtr &) { return changed_entity; }, notifications); + } + } + else + insertNoLock(id, entity, false, notifications); + } +} + + +void MemoryAccessStorage::prepareNotifications(const Entry & entry, bool remove, Notifications & notifications) const +{ + for (const auto & handler : entry.handlers_by_id) + notifications.push_back({handler, entry.id, remove ? nullptr : entry.entity}); + + auto range = handlers_by_type.equal_range(entry.entity->getType()); + for (auto it = range.first; it != range.second; ++it) + notifications.push_back({it->second, entry.id, remove ? nullptr : entry.entity}); +} + + +IAccessStorage::SubscriptionPtr MemoryAccessStorage::subscribeForChangesImpl(std::type_index type, const OnChangedHandler & handler) const +{ + class SubscriptionImpl : public Subscription + { + public: + SubscriptionImpl( + const MemoryAccessStorage & storage_, + std::type_index type_, + const OnChangedHandler & handler_) + : storage_weak(storage_.shared_ptr_to_this) + { + std::lock_guard lock{storage_.mutex}; + handler_it = storage_.handlers_by_type.emplace(type_, handler_); + } + + ~SubscriptionImpl() override + { + auto storage = storage_weak.lock(); + if (storage) + { + std::lock_guard lock{(*storage)->mutex}; + (*storage)->handlers_by_type.erase(handler_it); + } + } + + private: + std::weak_ptr storage_weak; + std::unordered_multimap::iterator handler_it; + }; + + return std::make_unique(*this, type, handler); +} + + +IAccessStorage::SubscriptionPtr MemoryAccessStorage::subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const +{ + class SubscriptionImpl : public Subscription + { + public: + SubscriptionImpl( + const MemoryAccessStorage & storage_, + const UUID & id_, + const OnChangedHandler & handler_) + : storage_weak(storage_.shared_ptr_to_this), + id(id_) + { + std::lock_guard lock{storage_.mutex}; + auto it = storage_.entries.find(id); + if (it == storage_.entries.end()) + { + storage_weak.reset(); + return; + } + const Entry & entry = it->second; + handler_it = entry.handlers_by_id.insert(entry.handlers_by_id.end(), handler_); + } + + ~SubscriptionImpl() override + { + auto storage = storage_weak.lock(); + if (storage) + { + std::lock_guard lock{(*storage)->mutex}; + auto it = (*storage)->entries.find(id); + if (it != (*storage)->entries.end()) + { + const Entry & entry = it->second; + entry.handlers_by_id.erase(handler_it); + } + } + } + + private: + std::weak_ptr storage_weak; + UUID id; + std::list::iterator handler_it; + }; + + return std::make_unique(*this, id, handler); +} + + +bool MemoryAccessStorage::hasSubscriptionImpl(const UUID & id) const +{ + auto it = entries.find(id); + if (it != entries.end()) + { + const Entry & entry = it->second; + return !entry.handlers_by_id.empty(); + } + return false; +} + + +bool MemoryAccessStorage::hasSubscriptionImpl(std::type_index type) const +{ + auto range = handlers_by_type.equal_range(type); + return range.first != range.second; +} +} diff --git a/dbms/src/Access/MemoryAccessStorage.h b/dbms/src/Access/MemoryAccessStorage.h new file mode 100644 index 00000000000..51c55487ca7 --- /dev/null +++ b/dbms/src/Access/MemoryAccessStorage.h @@ -0,0 +1,65 @@ +#pragma once + +#include +#include +#include +#include +#include + + +namespace DB +{ +/// Implementation of IAccessStorage which keeps all data in memory. +class MemoryAccessStorage : public IAccessStorage +{ +public: + MemoryAccessStorage(const String & storage_name_ = "memory"); + ~MemoryAccessStorage() override; + + /// Sets all entities at once. + void setAll(const std::vector & all_entities); + void setAll(const std::vector> & all_entities); + +private: + std::optional findImpl(std::type_index type, const String & name) const override; + std::vector findAllImpl(std::type_index type) const override; + bool existsImpl(const UUID & id) const override; + AccessEntityPtr readImpl(const UUID & id) const override; + String readNameImpl(const UUID & id) const override; + UUID insertImpl(const AccessEntityPtr & entity, bool replace_if_exists) override; + void removeImpl(const UUID & id) override; + void updateImpl(const UUID & id, const UpdateFunc & update_func) override; + SubscriptionPtr subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const override; + SubscriptionPtr subscribeForChangesImpl(std::type_index type, const OnChangedHandler & handler) const override; + bool hasSubscriptionImpl(const UUID & id) const override; + bool hasSubscriptionImpl(std::type_index type) const override; + + struct Entry + { + UUID id; + AccessEntityPtr entity; + mutable std::list handlers_by_id; + }; + + void insertNoLock(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, Notifications & notifications); + void removeNoLock(const UUID & id, Notifications & notifications); + void updateNoLock(const UUID & id, const UpdateFunc & update_func, Notifications & notifications); + void setAllNoLock(const std::vector> & all_entities, Notifications & notifications); + void prepareNotifications(const Entry & entry, bool remove, Notifications & notifications) const; + + using NameTypePair = std::pair; + struct Hash + { + size_t operator()(const NameTypePair & key) const + { + return std::hash{}(key.first) - std::hash{}(key.second); + } + }; + + mutable std::mutex mutex; + std::unordered_map entries; /// We want to search entries both by ID and by the pair of name and type. + std::unordered_map names; /// and by the pair of name and type. + mutable std::unordered_multimap handlers_by_type; + std::shared_ptr shared_ptr_to_this; /// We need weak pointers to `this` to implement subscriptions. +}; +} diff --git a/dbms/src/Access/MultipleAccessStorage.cpp b/dbms/src/Access/MultipleAccessStorage.cpp new file mode 100644 index 00000000000..f3db0b0fbbc --- /dev/null +++ b/dbms/src/Access/MultipleAccessStorage.cpp @@ -0,0 +1,246 @@ +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ACCESS_ENTITY_NOT_FOUND; + extern const int ACCESS_ENTITY_FOUND_DUPLICATES; +} + + +namespace +{ + template + String joinStorageNames(const std::vector & storages) + { + String result; + for (const auto & storage : storages) + { + if (!result.empty()) + result += ", "; + result += storage->getStorageName(); + } + return result; + } +} + + +MultipleAccessStorage::MultipleAccessStorage( + std::vector> nested_storages_, size_t index_of_nested_storage_for_insertion_) + : IAccessStorage(joinStorageNames(nested_storages_)) + , nested_storages(std::move(nested_storages_)) + , nested_storage_for_insertion(nested_storages[index_of_nested_storage_for_insertion_].get()) + , ids_cache(512 /* cache size */) +{ +} + + +MultipleAccessStorage::~MultipleAccessStorage() +{ +} + + +std::vector MultipleAccessStorage::findMultiple(std::type_index type, const String & name) const +{ + std::vector ids; + for (const auto & nested_storage : nested_storages) + { + auto id = nested_storage->find(type, name); + if (id) + { + std::lock_guard lock{ids_cache_mutex}; + ids_cache.set(*id, std::make_shared(nested_storage.get())); + ids.push_back(*id); + } + } + return ids; +} + + +std::optional MultipleAccessStorage::findImpl(std::type_index type, const String & name) const +{ + auto ids = findMultiple(type, name); + if (ids.empty()) + return {}; + if (ids.size() == 1) + return ids[0]; + + std::vector storages_with_duplicates; + for (const auto & id : ids) + { + auto * storage = findStorage(id); + if (storage) + storages_with_duplicates.push_back(storage); + } + + throw Exception( + "Found " + getTypeName(type) + " " + backQuote(name) + " in " + std::to_string(ids.size()) + + " storages: " + joinStorageNames(storages_with_duplicates), + ErrorCodes::ACCESS_ENTITY_FOUND_DUPLICATES); +} + + +std::vector MultipleAccessStorage::findAllImpl(std::type_index type) const +{ + std::vector all_ids; + for (const auto & nested_storage : nested_storages) + { + auto ids = nested_storage->findAll(type); + all_ids.insert(all_ids.end(), std::make_move_iterator(ids.begin()), std::make_move_iterator(ids.end())); + } + return all_ids; +} + + +bool MultipleAccessStorage::existsImpl(const UUID & id) const +{ + return findStorage(id) != nullptr; +} + + +IAccessStorage * MultipleAccessStorage::findStorage(const UUID & id) +{ + { + std::lock_guard lock{ids_cache_mutex}; + auto from_cache = ids_cache.get(id); + if (from_cache) + { + auto * storage = *from_cache; + if (storage->exists(id)) + return storage; + } + } + + for (const auto & nested_storage : nested_storages) + { + if (nested_storage->exists(id)) + { + std::lock_guard lock{ids_cache_mutex}; + ids_cache.set(id, std::make_shared(nested_storage.get())); + return nested_storage.get(); + } + } + + return nullptr; +} + + +const IAccessStorage * MultipleAccessStorage::findStorage(const UUID & id) const +{ + return const_cast(this)->findStorage(id); +} + + +IAccessStorage & MultipleAccessStorage::getStorage(const UUID & id) +{ + auto * storage = findStorage(id); + if (storage) + return *storage; + throwNotFound(id); +} + + +const IAccessStorage & MultipleAccessStorage::getStorage(const UUID & id) const +{ + return const_cast(this)->getStorage(id); +} + + +AccessEntityPtr MultipleAccessStorage::readImpl(const UUID & id) const +{ + return getStorage(id).read(id); +} + + +String MultipleAccessStorage::readNameImpl(const UUID & id) const +{ + return getStorage(id).readName(id); +} + + +UUID MultipleAccessStorage::insertImpl(const AccessEntityPtr & entity, bool replace_if_exists) +{ + auto id = replace_if_exists ? nested_storage_for_insertion->insertOrReplace(entity) : nested_storage_for_insertion->insert(entity); + + std::lock_guard lock{ids_cache_mutex}; + ids_cache.set(id, std::make_shared(nested_storage_for_insertion)); + + return id; +} + + +void MultipleAccessStorage::removeImpl(const UUID & id) +{ + getStorage(id).remove(id); +} + + +void MultipleAccessStorage::updateImpl(const UUID & id, const UpdateFunc & update_func) +{ + getStorage(id).update(id, update_func); +} + + +IAccessStorage::SubscriptionPtr MultipleAccessStorage::subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const +{ + auto storage = findStorage(id); + if (!storage) + return nullptr; + return storage->subscribeForChanges(id, handler); +} + + +IAccessStorage::SubscriptionPtr MultipleAccessStorage::subscribeForChangesImpl(std::type_index type, const OnChangedHandler & handler) const +{ + std::vector subscriptions; + for (const auto & nested_storage : nested_storages) + { + auto subscription = nested_storage->subscribeForChanges(type, handler); + if (subscription) + subscriptions.emplace_back(std::move(subscription)); + } + + if (subscriptions.empty()) + return nullptr; + + if (subscriptions.size() == 1) + return std::move(subscriptions[0]); + + class SubscriptionImpl : public Subscription + { + public: + SubscriptionImpl(std::vector subscriptions_) + : subscriptions(std::move(subscriptions_)) {} + private: + std::vector subscriptions; + }; + + return std::make_unique(std::move(subscriptions)); +} + + +bool MultipleAccessStorage::hasSubscriptionImpl(const UUID & id) const +{ + for (const auto & nested_storage : nested_storages) + { + if (nested_storage->hasSubscription(id)) + return true; + } + return false; +} + + +bool MultipleAccessStorage::hasSubscriptionImpl(std::type_index type) const +{ + for (const auto & nested_storage : nested_storages) + { + if (nested_storage->hasSubscription(type)) + return true; + } + return false; +} +} diff --git a/dbms/src/Access/MultipleAccessStorage.h b/dbms/src/Access/MultipleAccessStorage.h new file mode 100644 index 00000000000..42e500a1851 --- /dev/null +++ b/dbms/src/Access/MultipleAccessStorage.h @@ -0,0 +1,53 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ +/// Implementation of IAccessStorage which contains multiple nested storages. +class MultipleAccessStorage : public IAccessStorage +{ +public: + using Storage = IAccessStorage; + + MultipleAccessStorage(std::vector> nested_storages_, size_t index_of_nested_storage_for_insertion_ = 0); + ~MultipleAccessStorage() override; + + std::vector findMultiple(std::type_index type, const String & name) const; + + template + std::vector findMultiple(const String & name) const { return findMultiple(EntityType::TYPE, name); } + + const Storage * findStorage(const UUID & id) const; + Storage * findStorage(const UUID & id); + const Storage & getStorage(const UUID & id) const; + Storage & getStorage(const UUID & id); + + Storage & getStorageByIndex(size_t i) { return *(nested_storages[i]); } + const Storage & getStorageByIndex(size_t i) const { return *(nested_storages[i]); } + +protected: + std::optional findImpl(std::type_index type, const String & name) const override; + std::vector findAllImpl(std::type_index type) const override; + bool existsImpl(const UUID & id) const override; + AccessEntityPtr readImpl(const UUID & id) const override; + String readNameImpl(const UUID &id) const override; + UUID insertImpl(const AccessEntityPtr & entity, bool replace_if_exists) override; + void removeImpl(const UUID & id) override; + void updateImpl(const UUID & id, const UpdateFunc & update_func) override; + SubscriptionPtr subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const override; + SubscriptionPtr subscribeForChangesImpl(std::type_index type, const OnChangedHandler & handler) const override; + bool hasSubscriptionImpl(const UUID & id) const override; + bool hasSubscriptionImpl(std::type_index type) const override; + +private: + std::vector> nested_storages; + IAccessStorage * nested_storage_for_insertion; + mutable LRUCache ids_cache; + mutable std::mutex ids_cache_mutex; +}; + +} diff --git a/dbms/src/Access/Quota.cpp b/dbms/src/Access/Quota.cpp new file mode 100644 index 00000000000..d178307ca51 --- /dev/null +++ b/dbms/src/Access/Quota.cpp @@ -0,0 +1,46 @@ +#include +#include +#include + + +namespace DB +{ +Quota::Limits::Limits() +{ + boost::range::fill(max, 0); +} + + +bool operator ==(const Quota::Limits & lhs, const Quota::Limits & rhs) +{ + return boost::range::equal(lhs.max, rhs.max) && (lhs.duration == rhs.duration) + && (lhs.randomize_interval == rhs.randomize_interval); +} + + +bool Quota::equal(const IAccessEntity & other) const +{ + if (!IAccessEntity::equal(other)) + return false; + const auto & other_quota = typeid_cast(other); + return (all_limits == other_quota.all_limits) && (key_type == other_quota.key_type) && (roles == other_quota.roles) + && (all_roles == other_quota.all_roles) && (except_roles == other_quota.except_roles); +} + + +const char * Quota::resourceTypeToColumnName(ResourceType resource_type) +{ + switch (resource_type) + { + case Quota::QUERIES: return "queries"; + case Quota::ERRORS: return "errors"; + case Quota::RESULT_ROWS: return "result_rows"; + case Quota::RESULT_BYTES: return "result_bytes"; + case Quota::READ_ROWS: return "read_rows"; + case Quota::READ_BYTES: return "read_bytes"; + case Quota::EXECUTION_TIME: return "execution_time"; + } + __builtin_unreachable(); +} +} + diff --git a/dbms/src/Access/Quota.h b/dbms/src/Access/Quota.h new file mode 100644 index 00000000000..716bccbe1ff --- /dev/null +++ b/dbms/src/Access/Quota.h @@ -0,0 +1,141 @@ +#pragma once + +#include +#include + + + +namespace DB +{ +/** Quota for resources consumption for specific interval. + * Used to limit resource usage by user. + * Quota is applied "softly" - could be slightly exceed, because it is checked usually only on each block of processed data. + * Accumulated values are not persisted and are lost on server restart. + * Quota is local to server, + * but for distributed queries, accumulated values for read rows and bytes + * are collected from all participating servers and accumulated locally. + */ +struct Quota : public IAccessEntity +{ + enum ResourceType + { + QUERIES, /// Number of queries. + ERRORS, /// Number of queries with exceptions. + RESULT_ROWS, /// Number of rows returned as result. + RESULT_BYTES, /// Number of bytes returned as result. + READ_ROWS, /// Number of rows read from tables. + READ_BYTES, /// Number of bytes read from tables. + EXECUTION_TIME, /// Total amount of query execution time in nanoseconds. + }; + static constexpr size_t MAX_RESOURCE_TYPE = 7; + + using ResourceAmount = UInt64; + static constexpr ResourceAmount UNLIMITED = 0; /// 0 means unlimited. + + /// Amount of resources available to consume for each duration. + struct Limits + { + ResourceAmount max[MAX_RESOURCE_TYPE]; + std::chrono::seconds duration = std::chrono::seconds::zero(); + + /// Intervals can be randomized (to avoid DoS if intervals for many users end at one time). + bool randomize_interval = false; + + Limits(); + friend bool operator ==(const Limits & lhs, const Limits & rhs); + friend bool operator !=(const Limits & lhs, const Limits & rhs) { return !(lhs == rhs); } + }; + + std::vector all_limits; + + /// Key to share quota consumption. + /// Users with the same key share the same amount of resource. + enum class KeyType + { + NONE, /// All users share the same quota. + USER_NAME, /// Connections with the same user name share the same quota. + IP_ADDRESS, /// Connections from the same IP share the same quota. + CLIENT_KEY, /// Client should explicitly supply a key to use. + CLIENT_KEY_OR_USER_NAME, /// Same as CLIENT_KEY, but use USER_NAME if the client doesn't supply a key. + CLIENT_KEY_OR_IP_ADDRESS, /// Same as CLIENT_KEY, but use IP_ADDRESS if the client doesn't supply a key. + }; + static constexpr size_t MAX_KEY_TYPE = 6; + KeyType key_type = KeyType::NONE; + + /// Which roles or users should use this quota. + Strings roles; + bool all_roles = false; + Strings except_roles; + + bool equal(const IAccessEntity & other) const override; + std::shared_ptr clone() const override { return cloneImpl(); } + + static const char * getNameOfResourceType(ResourceType resource_type); + static const char * resourceTypeToKeyword(ResourceType resource_type); + static const char * resourceTypeToColumnName(ResourceType resource_type); + static const char * getNameOfKeyType(KeyType key_type); + static double executionTimeToSeconds(ResourceAmount ns); + static ResourceAmount secondsToExecutionTime(double s); +}; + + +inline const char * Quota::getNameOfResourceType(ResourceType resource_type) +{ + switch (resource_type) + { + case Quota::QUERIES: return "queries"; + case Quota::ERRORS: return "errors"; + case Quota::RESULT_ROWS: return "result rows"; + case Quota::RESULT_BYTES: return "result bytes"; + case Quota::READ_ROWS: return "read rows"; + case Quota::READ_BYTES: return "read bytes"; + case Quota::EXECUTION_TIME: return "execution time"; + } + __builtin_unreachable(); +} + + +inline const char * Quota::resourceTypeToKeyword(ResourceType resource_type) +{ + switch (resource_type) + { + case Quota::QUERIES: return "QUERIES"; + case Quota::ERRORS: return "ERRORS"; + case Quota::RESULT_ROWS: return "RESULT ROWS"; + case Quota::RESULT_BYTES: return "RESULT BYTES"; + case Quota::READ_ROWS: return "READ ROWS"; + case Quota::READ_BYTES: return "READ BYTES"; + case Quota::EXECUTION_TIME: return "EXECUTION TIME"; + } + __builtin_unreachable(); +} + + +inline const char * Quota::getNameOfKeyType(KeyType key_type) +{ + switch (key_type) + { + case KeyType::NONE: return "none"; + case KeyType::USER_NAME: return "user name"; + case KeyType::IP_ADDRESS: return "ip address"; + case KeyType::CLIENT_KEY: return "client key"; + case KeyType::CLIENT_KEY_OR_USER_NAME: return "client key or user name"; + case KeyType::CLIENT_KEY_OR_IP_ADDRESS: return "client key or ip address"; + } + __builtin_unreachable(); +} + + +inline double Quota::executionTimeToSeconds(ResourceAmount ns) +{ + return std::chrono::duration_cast>(std::chrono::nanoseconds{ns}).count(); +} + +inline Quota::ResourceAmount Quota::secondsToExecutionTime(double s) +{ + return std::chrono::duration_cast(std::chrono::duration(s)).count(); +} + + +using QuotaPtr = std::shared_ptr; +} diff --git a/dbms/src/Access/QuotaContext.cpp b/dbms/src/Access/QuotaContext.cpp new file mode 100644 index 00000000000..11666e5d4b8 --- /dev/null +++ b/dbms/src/Access/QuotaContext.cpp @@ -0,0 +1,264 @@ +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int QUOTA_EXPIRED; +} + +struct QuotaContext::Impl +{ + [[noreturn]] static void throwQuotaExceed( + const String & user_name, + const String & quota_name, + ResourceType resource_type, + ResourceAmount used, + ResourceAmount max, + std::chrono::seconds duration, + std::chrono::system_clock::time_point end_of_interval) + { + std::function amount_to_string = [](UInt64 amount) { return std::to_string(amount); }; + if (resource_type == Quota::EXECUTION_TIME) + amount_to_string = [&](UInt64 amount) { return ext::to_string(std::chrono::nanoseconds(amount)); }; + + throw Exception( + "Quota for user " + backQuote(user_name) + " for " + ext::to_string(duration) + " has been exceeded: " + + Quota::getNameOfResourceType(resource_type) + " = " + amount_to_string(used) + "/" + amount_to_string(max) + ". " + + "Interval will end at " + ext::to_string(end_of_interval) + ". " + "Name of quota template: " + backQuote(quota_name), + ErrorCodes::QUOTA_EXPIRED); + } + + + static std::chrono::system_clock::time_point getEndOfInterval( + const Interval & interval, std::chrono::system_clock::time_point current_time, bool * counters_were_reset = nullptr) + { + auto & end_of_interval = interval.end_of_interval; + auto end_loaded = end_of_interval.load(); + auto end = std::chrono::system_clock::time_point{end_loaded}; + if (current_time < end) + { + if (counters_were_reset) + *counters_were_reset = false; + return end; + } + + const auto duration = interval.duration; + + do + { + end = end + (current_time - end + duration) / duration * duration; + if (end_of_interval.compare_exchange_strong(end_loaded, end.time_since_epoch())) + { + boost::range::fill(interval.used, 0); + break; + } + end = std::chrono::system_clock::time_point{end_loaded}; + } + while (current_time >= end); + + if (counters_were_reset) + *counters_were_reset = true; + return end; + } + + + static void used( + const String & user_name, + const Intervals & intervals, + ResourceType resource_type, + ResourceAmount amount, + std::chrono::system_clock::time_point current_time, + bool check_exceeded) + { + for (const auto & interval : intervals.intervals) + { + ResourceAmount used = (interval.used[resource_type] += amount); + ResourceAmount max = interval.max[resource_type]; + if (max == Quota::UNLIMITED) + continue; + if (used > max) + { + bool counters_were_reset = false; + auto end_of_interval = getEndOfInterval(interval, current_time, &counters_were_reset); + if (counters_were_reset) + { + used = (interval.used[resource_type] += amount); + if ((used > max) && check_exceeded) + throwQuotaExceed(user_name, intervals.quota_name, resource_type, used, max, interval.duration, end_of_interval); + } + else if (check_exceeded) + throwQuotaExceed(user_name, intervals.quota_name, resource_type, used, max, interval.duration, end_of_interval); + } + } + } + + static void checkExceeded( + const String & user_name, + const Intervals & intervals, + ResourceType resource_type, + std::chrono::system_clock::time_point current_time) + { + for (const auto & interval : intervals.intervals) + { + ResourceAmount used = interval.used[resource_type]; + ResourceAmount max = interval.max[resource_type]; + if (max == Quota::UNLIMITED) + continue; + if (used > max) + { + bool used_counters_reset = false; + std::chrono::system_clock::time_point end_of_interval = getEndOfInterval(interval, current_time, &used_counters_reset); + if (!used_counters_reset) + throwQuotaExceed(user_name, intervals.quota_name, resource_type, used, max, interval.duration, end_of_interval); + } + } + } + + static void checkExceeded( + const String & user_name, + const Intervals & intervals, + std::chrono::system_clock::time_point current_time) + { + for (auto resource_type : ext::range_with_static_cast(Quota::MAX_RESOURCE_TYPE)) + checkExceeded(user_name, intervals, resource_type, current_time); + } +}; + + +QuotaContext::Interval & QuotaContext::Interval::operator =(const Interval & src) +{ + randomize_interval = src.randomize_interval; + duration = src.duration; + end_of_interval.store(src.end_of_interval.load()); + for (auto resource_type : ext::range(MAX_RESOURCE_TYPE)) + { + max[resource_type] = src.max[resource_type]; + used[resource_type].store(src.used[resource_type].load()); + } + return *this; +} + + +QuotaUsageInfo QuotaContext::Intervals::getUsageInfo(std::chrono::system_clock::time_point current_time) const +{ + QuotaUsageInfo info; + info.quota_id = quota_id; + info.quota_name = quota_name; + info.quota_key = quota_key; + info.intervals.reserve(intervals.size()); + for (const auto & in : intervals) + { + info.intervals.push_back({}); + auto & out = info.intervals.back(); + out.duration = in.duration; + out.randomize_interval = in.randomize_interval; + out.end_of_interval = Impl::getEndOfInterval(in, current_time); + for (auto resource_type : ext::range(MAX_RESOURCE_TYPE)) + { + out.max[resource_type] = in.max[resource_type]; + out.used[resource_type] = in.used[resource_type]; + } + } + return info; +} + + +QuotaContext::QuotaContext() + : atomic_intervals(std::make_shared()) /// Unlimited quota. +{ +} + + +QuotaContext::QuotaContext( + const String & user_name_, + const Poco::Net::IPAddress & address_, + const String & client_key_) + : user_name(user_name_), address(address_), client_key(client_key_) +{ +} + + +QuotaContext::~QuotaContext() = default; + + +void QuotaContext::used(ResourceType resource_type, ResourceAmount amount, bool check_exceeded) +{ + used({resource_type, amount}, check_exceeded); +} + + +void QuotaContext::used(const std::pair & resource, bool check_exceeded) +{ + auto intervals_ptr = std::atomic_load(&atomic_intervals); + auto current_time = std::chrono::system_clock::now(); + Impl::used(user_name, *intervals_ptr, resource.first, resource.second, current_time, check_exceeded); +} + + +void QuotaContext::used(const std::pair & resource1, const std::pair & resource2, bool check_exceeded) +{ + auto intervals_ptr = std::atomic_load(&atomic_intervals); + auto current_time = std::chrono::system_clock::now(); + Impl::used(user_name, *intervals_ptr, resource1.first, resource1.second, current_time, check_exceeded); + Impl::used(user_name, *intervals_ptr, resource2.first, resource2.second, current_time, check_exceeded); +} + + +void QuotaContext::used(const std::pair & resource1, const std::pair & resource2, const std::pair & resource3, bool check_exceeded) +{ + auto intervals_ptr = std::atomic_load(&atomic_intervals); + auto current_time = std::chrono::system_clock::now(); + Impl::used(user_name, *intervals_ptr, resource1.first, resource1.second, current_time, check_exceeded); + Impl::used(user_name, *intervals_ptr, resource2.first, resource2.second, current_time, check_exceeded); + Impl::used(user_name, *intervals_ptr, resource3.first, resource3.second, current_time, check_exceeded); +} + + +void QuotaContext::used(const std::vector> & resources, bool check_exceeded) +{ + auto intervals_ptr = std::atomic_load(&atomic_intervals); + auto current_time = std::chrono::system_clock::now(); + for (const auto & resource : resources) + Impl::used(user_name, *intervals_ptr, resource.first, resource.second, current_time, check_exceeded); +} + + +void QuotaContext::checkExceeded() +{ + auto intervals_ptr = std::atomic_load(&atomic_intervals); + Impl::checkExceeded(user_name, *intervals_ptr, std::chrono::system_clock::now()); +} + + +void QuotaContext::checkExceeded(ResourceType resource_type) +{ + auto intervals_ptr = std::atomic_load(&atomic_intervals); + Impl::checkExceeded(user_name, *intervals_ptr, resource_type, std::chrono::system_clock::now()); +} + + +QuotaUsageInfo QuotaContext::getUsageInfo() const +{ + auto intervals_ptr = std::atomic_load(&atomic_intervals); + return intervals_ptr->getUsageInfo(std::chrono::system_clock::now()); +} + + +QuotaUsageInfo::QuotaUsageInfo() : quota_id(UUID(UInt128(0))) +{ +} + + +QuotaUsageInfo::Interval::Interval() +{ + boost::range::fill(used, 0); + boost::range::fill(max, 0); +} +} diff --git a/dbms/src/Access/QuotaContext.h b/dbms/src/Access/QuotaContext.h new file mode 100644 index 00000000000..122d0df6ee7 --- /dev/null +++ b/dbms/src/Access/QuotaContext.h @@ -0,0 +1,110 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +struct QuotaUsageInfo; + + +/// Instances of `QuotaContext` are used to track resource consumption. +class QuotaContext : public boost::noncopyable +{ +public: + using ResourceType = Quota::ResourceType; + using ResourceAmount = Quota::ResourceAmount; + + /// Default constructors makes an unlimited quota. + QuotaContext(); + + ~QuotaContext(); + + /// Tracks resource consumption. If the quota exceeded and `check_exceeded == true`, throws an exception. + void used(ResourceType resource_type, ResourceAmount amount, bool check_exceeded = true); + void used(const std::pair & resource, bool check_exceeded = true); + void used(const std::pair & resource1, const std::pair & resource2, bool check_exceeded = true); + void used(const std::pair & resource1, const std::pair & resource2, const std::pair & resource3, bool check_exceeded = true); + void used(const std::vector> & resources, bool check_exceeded = true); + + /// Checks if the quota exceeded. If so, throws an exception. + void checkExceeded(); + void checkExceeded(ResourceType resource_type); + + /// Returns the information about this quota context. + QuotaUsageInfo getUsageInfo() const; + +private: + friend class QuotaContextFactory; + friend struct ext::shared_ptr_helper; + + /// Instances of this class are created by QuotaContextFactory. + QuotaContext(const String & user_name_, const Poco::Net::IPAddress & address_, const String & client_key_); + + static constexpr size_t MAX_RESOURCE_TYPE = Quota::MAX_RESOURCE_TYPE; + + struct Interval + { + mutable std::atomic used[MAX_RESOURCE_TYPE]; + ResourceAmount max[MAX_RESOURCE_TYPE]; + std::chrono::seconds duration; + bool randomize_interval; + mutable std::atomic end_of_interval; + + Interval() {} + Interval(const Interval & src) { *this = src; } + Interval & operator =(const Interval & src); + }; + + struct Intervals + { + std::vector intervals; + UUID quota_id; + String quota_name; + String quota_key; + + QuotaUsageInfo getUsageInfo(std::chrono::system_clock::time_point current_time) const; + }; + + struct Impl; + + const String user_name; + const Poco::Net::IPAddress address; + const String client_key; + std::shared_ptr atomic_intervals; /// atomically changed by QuotaUsageManager +}; + +using QuotaContextPtr = std::shared_ptr; + + +/// The information about a quota context. +struct QuotaUsageInfo +{ + using ResourceType = Quota::ResourceType; + using ResourceAmount = Quota::ResourceAmount; + static constexpr size_t MAX_RESOURCE_TYPE = Quota::MAX_RESOURCE_TYPE; + + struct Interval + { + ResourceAmount used[MAX_RESOURCE_TYPE]; + ResourceAmount max[MAX_RESOURCE_TYPE]; + std::chrono::seconds duration = std::chrono::seconds::zero(); + bool randomize_interval = false; + std::chrono::system_clock::time_point end_of_interval; + Interval(); + }; + + std::vector intervals; + UUID quota_id; + String quota_name; + String quota_key; + QuotaUsageInfo(); +}; +} diff --git a/dbms/src/Access/QuotaContextFactory.cpp b/dbms/src/Access/QuotaContextFactory.cpp new file mode 100644 index 00000000000..c6ecb947102 --- /dev/null +++ b/dbms/src/Access/QuotaContextFactory.cpp @@ -0,0 +1,299 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int QUOTA_REQUIRES_CLIENT_KEY; +} + + +namespace +{ + std::chrono::system_clock::duration randomDuration(std::chrono::seconds max) + { + auto count = std::chrono::duration_cast(max).count(); + std::uniform_int_distribution distribution{0, count - 1}; + return std::chrono::system_clock::duration(distribution(thread_local_rng)); + } +} + + +void QuotaContextFactory::QuotaInfo::setQuota(const QuotaPtr & quota_, const UUID & quota_id_) +{ + quota = quota_; + quota_id = quota_id_; + + boost::range::copy(quota->roles, std::inserter(roles, roles.end())); + all_roles = quota->all_roles; + boost::range::copy(quota->except_roles, std::inserter(except_roles, except_roles.end())); + + rebuildAllIntervals(); +} + + +bool QuotaContextFactory::QuotaInfo::canUseWithContext(const QuotaContext & context) const +{ + if (roles.count(context.user_name)) + return true; + + if (all_roles && !except_roles.count(context.user_name)) + return true; + + return false; +} + + +String QuotaContextFactory::QuotaInfo::calculateKey(const QuotaContext & context) const +{ + using KeyType = Quota::KeyType; + switch (quota->key_type) + { + case KeyType::NONE: + return ""; + case KeyType::USER_NAME: + return context.user_name; + case KeyType::IP_ADDRESS: + return context.address.toString(); + case KeyType::CLIENT_KEY: + { + if (!context.client_key.empty()) + return context.client_key; + throw Exception( + "Quota " + quota->getName() + " (for user " + context.user_name + ") requires a client supplied key.", + ErrorCodes::QUOTA_REQUIRES_CLIENT_KEY); + } + case KeyType::CLIENT_KEY_OR_USER_NAME: + { + if (!context.client_key.empty()) + return context.client_key; + return context.user_name; + } + case KeyType::CLIENT_KEY_OR_IP_ADDRESS: + { + if (!context.client_key.empty()) + return context.client_key; + return context.address.toString(); + } + } + __builtin_unreachable(); +} + + +std::shared_ptr QuotaContextFactory::QuotaInfo::getOrBuildIntervals(const String & key) +{ + auto it = key_to_intervals.find(key); + if (it != key_to_intervals.end()) + return it->second; + return rebuildIntervals(key); +} + + +void QuotaContextFactory::QuotaInfo::rebuildAllIntervals() +{ + for (const String & key : key_to_intervals | boost::adaptors::map_keys) + rebuildIntervals(key); +} + + +std::shared_ptr QuotaContextFactory::QuotaInfo::rebuildIntervals(const String & key) +{ + auto new_intervals = std::make_shared(); + new_intervals->quota_name = quota->getName(); + new_intervals->quota_id = quota_id; + new_intervals->quota_key = key; + auto & intervals = new_intervals->intervals; + intervals.reserve(quota->all_limits.size()); + constexpr size_t MAX_RESOURCE_TYPE = Quota::MAX_RESOURCE_TYPE; + for (const auto & limits : quota->all_limits) + { + intervals.emplace_back(); + auto & interval = intervals.back(); + interval.duration = limits.duration; + std::chrono::system_clock::time_point end_of_interval{}; + interval.randomize_interval = limits.randomize_interval; + if (limits.randomize_interval) + end_of_interval += randomDuration(limits.duration); + interval.end_of_interval = end_of_interval.time_since_epoch(); + for (auto resource_type : ext::range(MAX_RESOURCE_TYPE)) + { + interval.max[resource_type] = limits.max[resource_type]; + interval.used[resource_type] = 0; + } + } + + /// Order intervals by durations from largest to smallest. + /// To report first about largest interval on what quota was exceeded. + struct GreaterByDuration + { + bool operator()(const Interval & lhs, const Interval & rhs) const { return lhs.duration > rhs.duration; } + }; + boost::range::stable_sort(intervals, GreaterByDuration{}); + + auto it = key_to_intervals.find(key); + if (it == key_to_intervals.end()) + { + /// Just put new intervals into the map. + key_to_intervals.try_emplace(key, new_intervals); + } + else + { + /// We need to keep usage information from the old intervals. + const auto & old_intervals = it->second->intervals; + for (auto & new_interval : new_intervals->intervals) + { + /// Check if an interval with the same duration is already in use. + auto lower_bound = boost::range::lower_bound(old_intervals, new_interval, GreaterByDuration{}); + if ((lower_bound == old_intervals.end()) || (lower_bound->duration != new_interval.duration)) + continue; + + /// Found an interval with the same duration, we need to copy its usage information to `result`. + auto & current_interval = *lower_bound; + for (auto resource_type : ext::range(MAX_RESOURCE_TYPE)) + { + new_interval.used[resource_type].store(current_interval.used[resource_type].load()); + new_interval.end_of_interval.store(current_interval.end_of_interval.load()); + } + } + it->second = new_intervals; + } + + return new_intervals; +} + + +QuotaContextFactory::QuotaContextFactory(const AccessControlManager & access_control_manager_) + : access_control_manager(access_control_manager_) +{ +} + + +QuotaContextFactory::~QuotaContextFactory() +{ +} + + +std::shared_ptr QuotaContextFactory::createContext(const String & user_name, const Poco::Net::IPAddress & address, const String & client_key) +{ + std::lock_guard lock{mutex}; + ensureAllQuotasRead(); + auto context = ext::shared_ptr_helper::create(user_name, address, client_key); + contexts.push_back(context); + chooseQuotaForContext(context); + return context; +} + + +void QuotaContextFactory::ensureAllQuotasRead() +{ + /// `mutex` is already locked. + if (all_quotas_read) + return; + all_quotas_read = true; + + subscription = access_control_manager.subscribeForChanges( + [&](const UUID & id, const AccessEntityPtr & entity) + { + if (entity) + quotaAddedOrChanged(id, typeid_cast(entity)); + else + quotaRemoved(id); + }); + + for (const UUID & quota_id : access_control_manager.findAll()) + { + auto quota = access_control_manager.tryRead(quota_id); + if (quota) + all_quotas.emplace(quota_id, QuotaInfo(quota, quota_id)); + } +} + + +void QuotaContextFactory::quotaAddedOrChanged(const UUID & quota_id, const std::shared_ptr & new_quota) +{ + std::lock_guard lock{mutex}; + auto it = all_quotas.find(quota_id); + if (it == all_quotas.end()) + { + it = all_quotas.emplace(quota_id, QuotaInfo(new_quota, quota_id)).first; + } + else + { + if (it->second.quota == new_quota) + return; + } + + auto & info = it->second; + info.setQuota(new_quota, quota_id); + chooseQuotaForAllContexts(); +} + + +void QuotaContextFactory::quotaRemoved(const UUID & quota_id) +{ + std::lock_guard lock{mutex}; + all_quotas.erase(quota_id); + chooseQuotaForAllContexts(); +} + + +void QuotaContextFactory::chooseQuotaForAllContexts() +{ + /// `mutex` is already locked. + boost::range::remove_erase_if( + contexts, + [&](const std::weak_ptr & weak) + { + auto context = weak.lock(); + if (!context) + return true; // remove from the `contexts` list. + chooseQuotaForContext(context); + return false; // keep in the `contexts` list. + }); +} + +void QuotaContextFactory::chooseQuotaForContext(const std::shared_ptr & context) +{ + /// `mutex` is already locked. + std::shared_ptr intervals; + for (auto & info : all_quotas | boost::adaptors::map_values) + { + if (info.canUseWithContext(*context)) + { + String key = info.calculateKey(*context); + intervals = info.getOrBuildIntervals(key); + break; + } + } + + if (!intervals) + intervals = std::make_shared(); /// No quota == no limits. + + std::atomic_store(&context->atomic_intervals, intervals); +} + + +std::vector QuotaContextFactory::getUsageInfo() const +{ + std::lock_guard lock{mutex}; + std::vector all_infos; + auto current_time = std::chrono::system_clock::now(); + for (const auto & info : all_quotas | boost::adaptors::map_values) + { + for (const auto & intervals : info.key_to_intervals | boost::adaptors::map_values) + all_infos.push_back(intervals->getUsageInfo(current_time)); + } + return all_infos; +} +} diff --git a/dbms/src/Access/QuotaContextFactory.h b/dbms/src/Access/QuotaContextFactory.h new file mode 100644 index 00000000000..159ffe1fa09 --- /dev/null +++ b/dbms/src/Access/QuotaContextFactory.h @@ -0,0 +1,62 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +class AccessControlManager; + + +/// Stores information how much amount of resources have been consumed and how much are left. +class QuotaContextFactory +{ +public: + QuotaContextFactory(const AccessControlManager & access_control_manager_); + ~QuotaContextFactory(); + + QuotaContextPtr createContext(const String & user_name, const Poco::Net::IPAddress & address, const String & client_key); + std::vector getUsageInfo() const; + +private: + using Interval = QuotaContext::Interval; + using Intervals = QuotaContext::Intervals; + + struct QuotaInfo + { + QuotaInfo(const QuotaPtr & quota_, const UUID & quota_id_) { setQuota(quota_, quota_id_); } + void setQuota(const QuotaPtr & quota_, const UUID & quota_id_); + + bool canUseWithContext(const QuotaContext & context) const; + String calculateKey(const QuotaContext & context) const; + std::shared_ptr getOrBuildIntervals(const String & key); + std::shared_ptr rebuildIntervals(const String & key); + void rebuildAllIntervals(); + + QuotaPtr quota; + UUID quota_id; + std::unordered_set roles; + bool all_roles = false; + std::unordered_set except_roles; + std::unordered_map> key_to_intervals; + }; + + void ensureAllQuotasRead(); + void quotaAddedOrChanged(const UUID & quota_id, const std::shared_ptr & new_quota); + void quotaRemoved(const UUID & quota_id); + void chooseQuotaForAllContexts(); + void chooseQuotaForContext(const std::shared_ptr & context); + + const AccessControlManager & access_control_manager; + mutable std::mutex mutex; + std::unordered_map all_quotas; + bool all_quotas_read = false; + IAccessStorage::SubscriptionPtr subscription; + std::vector> contexts; +}; +} diff --git a/dbms/src/Access/UsersConfigAccessStorage.cpp b/dbms/src/Access/UsersConfigAccessStorage.cpp new file mode 100644 index 00000000000..d417968bb64 --- /dev/null +++ b/dbms/src/Access/UsersConfigAccessStorage.cpp @@ -0,0 +1,207 @@ +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace +{ + char getTypeChar(std::type_index type) + { + if (type == typeid(Quota)) + return 'Q'; + return 0; + } + + + UUID generateID(std::type_index type, const String & name) + { + Poco::MD5Engine md5; + md5.update(name); + char type_storage_chars[] = " USRSXML"; + type_storage_chars[0] = getTypeChar(type); + md5.update(type_storage_chars, strlen(type_storage_chars)); + UUID result; + memcpy(&result, md5.digest().data(), md5.digestLength()); + return result; + } + + + UUID generateID(const IAccessEntity & entity) { return generateID(entity.getType(), entity.getFullName()); } + + QuotaPtr parseQuota(const Poco::Util::AbstractConfiguration & config, const String & quota_name, const Strings & user_names) + { + auto quota = std::make_shared(); + quota->setName(quota_name); + + using KeyType = Quota::KeyType; + String quota_config = "quotas." + quota_name; + if (config.has(quota_config + ".keyed_by_ip")) + quota->key_type = KeyType::IP_ADDRESS; + else if (config.has(quota_config + ".keyed")) + quota->key_type = KeyType::CLIENT_KEY_OR_USER_NAME; + else + quota->key_type = KeyType::USER_NAME; + + Poco::Util::AbstractConfiguration::Keys interval_keys; + config.keys(quota_config, interval_keys); + + for (const String & interval_key : interval_keys) + { + if (!startsWith(interval_key, "interval")) + continue; + + String interval_config = quota_config + "." + interval_key; + std::chrono::seconds duration{config.getInt(interval_config + ".duration", 0)}; + if (duration.count() <= 0) /// Skip quotas with non-positive duration. + continue; + + quota->all_limits.emplace_back(); + auto & limits = quota->all_limits.back(); + limits.duration = duration; + limits.randomize_interval = config.getBool(interval_config + ".randomize", false); + + using ResourceType = Quota::ResourceType; + limits.max[ResourceType::QUERIES] = config.getUInt64(interval_config + ".queries", Quota::UNLIMITED); + limits.max[ResourceType::ERRORS] = config.getUInt64(interval_config + ".errors", Quota::UNLIMITED); + limits.max[ResourceType::RESULT_ROWS] = config.getUInt64(interval_config + ".result_rows", Quota::UNLIMITED); + limits.max[ResourceType::RESULT_BYTES] = config.getUInt64(interval_config + ".result_bytes", Quota::UNLIMITED); + limits.max[ResourceType::READ_ROWS] = config.getUInt64(interval_config + ".read_rows", Quota::UNLIMITED); + limits.max[ResourceType::READ_BYTES] = config.getUInt64(interval_config + ".read_bytes", Quota::UNLIMITED); + limits.max[ResourceType::EXECUTION_TIME] = Quota::secondsToExecutionTime(config.getUInt64(interval_config + ".execution_time", Quota::UNLIMITED)); + } + + quota->roles = user_names; + + return quota; + } + + + std::vector parseQuotas(const Poco::Util::AbstractConfiguration & config, Poco::Logger * log) + { + Poco::Util::AbstractConfiguration::Keys user_names; + config.keys("users", user_names); + std::unordered_map quota_to_user_names; + for (const auto & user_name : user_names) + { + if (config.has("users." + user_name + ".quota")) + quota_to_user_names[config.getString("users." + user_name + ".quota")].push_back(user_name); + } + + Poco::Util::AbstractConfiguration::Keys quota_names; + config.keys("quotas", quota_names); + std::vector quotas; + quotas.reserve(quota_names.size()); + for (const auto & quota_name : quota_names) + { + try + { + auto it = quota_to_user_names.find(quota_name); + const Strings quota_users = (it != quota_to_user_names.end()) ? std::move(it->second) : Strings{}; + quotas.push_back(parseQuota(config, quota_name, quota_users)); + } + catch (...) + { + tryLogCurrentException(log, "Could not parse quota " + backQuote(quota_name)); + } + } + return quotas; + } +} + + +UsersConfigAccessStorage::UsersConfigAccessStorage() : IAccessStorage("users.xml") +{ +} + + +UsersConfigAccessStorage::~UsersConfigAccessStorage() {} + + +void UsersConfigAccessStorage::loadFromConfig(const Poco::Util::AbstractConfiguration & config) +{ + std::vector> all_entities; + for (const auto & entity : parseQuotas(config, getLogger())) + all_entities.emplace_back(generateID(*entity), entity); + memory_storage.setAll(all_entities); +} + + +std::optional UsersConfigAccessStorage::findImpl(std::type_index type, const String & name) const +{ + return memory_storage.find(type, name); +} + + +std::vector UsersConfigAccessStorage::findAllImpl(std::type_index type) const +{ + return memory_storage.findAll(type); +} + + +bool UsersConfigAccessStorage::existsImpl(const UUID & id) const +{ + return memory_storage.exists(id); +} + + +AccessEntityPtr UsersConfigAccessStorage::readImpl(const UUID & id) const +{ + return memory_storage.read(id); +} + + +String UsersConfigAccessStorage::readNameImpl(const UUID & id) const +{ + return memory_storage.readName(id); +} + + +UUID UsersConfigAccessStorage::insertImpl(const AccessEntityPtr & entity, bool) +{ + throwReadonlyCannotInsert(entity->getType(), entity->getFullName()); +} + + +void UsersConfigAccessStorage::removeImpl(const UUID & id) +{ + auto entity = read(id); + throwReadonlyCannotRemove(entity->getType(), entity->getFullName()); +} + + +void UsersConfigAccessStorage::updateImpl(const UUID & id, const UpdateFunc &) +{ + auto entity = read(id); + throwReadonlyCannotUpdate(entity->getType(), entity->getFullName()); +} + + +IAccessStorage::SubscriptionPtr UsersConfigAccessStorage::subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const +{ + return memory_storage.subscribeForChanges(id, handler); +} + + +IAccessStorage::SubscriptionPtr UsersConfigAccessStorage::subscribeForChangesImpl(std::type_index type, const OnChangedHandler & handler) const +{ + return memory_storage.subscribeForChanges(type, handler); +} + + +bool UsersConfigAccessStorage::hasSubscriptionImpl(const UUID & id) const +{ + return memory_storage.hasSubscription(id); +} + + +bool UsersConfigAccessStorage::hasSubscriptionImpl(std::type_index type) const +{ + return memory_storage.hasSubscription(type); +} +} diff --git a/dbms/src/Access/UsersConfigAccessStorage.h b/dbms/src/Access/UsersConfigAccessStorage.h new file mode 100644 index 00000000000..9b0bf2ed17c --- /dev/null +++ b/dbms/src/Access/UsersConfigAccessStorage.h @@ -0,0 +1,42 @@ +#pragma once + +#include + + +namespace Poco +{ + namespace Util + { + class AbstractConfiguration; + } +} + + +namespace DB +{ +/// Implementation of IAccessStorage which loads all from users.xml periodically. +class UsersConfigAccessStorage : public IAccessStorage +{ +public: + UsersConfigAccessStorage(); + ~UsersConfigAccessStorage() override; + + void loadFromConfig(const Poco::Util::AbstractConfiguration & config); + +private: + std::optional findImpl(std::type_index type, const String & name) const override; + std::vector findAllImpl(std::type_index type) const override; + bool existsImpl(const UUID & id) const override; + AccessEntityPtr readImpl(const UUID & id) const override; + String readNameImpl(const UUID & id) const override; + UUID insertImpl(const AccessEntityPtr & entity, bool replace_if_exists) override; + void removeImpl(const UUID & id) override; + void updateImpl(const UUID & id, const UpdateFunc & update_func) override; + SubscriptionPtr subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const override; + SubscriptionPtr subscribeForChangesImpl(std::type_index type, const OnChangedHandler & handler) const override; + bool hasSubscriptionImpl(const UUID & id) const override; + bool hasSubscriptionImpl(std::type_index type) const override; + + MemoryAccessStorage memory_storage; +}; +} diff --git a/dbms/src/Columns/ColumnDecimal.h b/dbms/src/Columns/ColumnDecimal.h index 5c6f7f0fdd5..73fd5a18f0b 100644 --- a/dbms/src/Columns/ColumnDecimal.h +++ b/dbms/src/Columns/ColumnDecimal.h @@ -96,6 +96,7 @@ public: void insertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast(src).getData()[n]); } void insertData(const char * pos, size_t /*length*/) override; void insertDefault() override { data.push_back(T()); } + virtual void insertManyDefaults(size_t length) override { data.resize_fill(data.size() + length); } void insert(const Field & x) override { data.push_back(DB::get>(x)); } void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; diff --git a/dbms/src/Columns/ColumnFixedString.h b/dbms/src/Columns/ColumnFixedString.h index 91f0e92c0a9..a91a82d8524 100644 --- a/dbms/src/Columns/ColumnFixedString.h +++ b/dbms/src/Columns/ColumnFixedString.h @@ -92,6 +92,11 @@ public: chars.resize_fill(chars.size() + n); } + virtual void insertManyDefaults(size_t length) override + { + chars.resize_fill(chars.size() + n * length); + } + void popBack(size_t elems) override { chars.resize_assume_reserved(chars.size() - n * elems); diff --git a/dbms/src/Columns/ColumnString.h b/dbms/src/Columns/ColumnString.h index 7c686f79767..8f1eced92f9 100644 --- a/dbms/src/Columns/ColumnString.h +++ b/dbms/src/Columns/ColumnString.h @@ -205,6 +205,13 @@ public: offsets.push_back(offsets.back() + 1); } + virtual void insertManyDefaults(size_t length) override + { + chars.resize_fill(chars.size() + length); + for (size_t i = 0; i < length; ++i) + offsets.push_back(offsets.back() + 1); + } + int compareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override { const ColumnString & rhs = assert_cast(rhs_); diff --git a/dbms/src/Columns/ColumnVector.h b/dbms/src/Columns/ColumnVector.h index 072f9b48960..85ca1839853 100644 --- a/dbms/src/Columns/ColumnVector.h +++ b/dbms/src/Columns/ColumnVector.h @@ -144,6 +144,11 @@ public: data.push_back(T()); } + virtual void insertManyDefaults(size_t length) override + { + data.resize_fill(data.size() + length, T()); + } + void popBack(size_t n) override { data.resize_assume_reserved(data.size() - n); diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index b18fef232ad..d780041f5e2 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -466,6 +466,12 @@ namespace ErrorCodes extern const int INCORRECT_DICTIONARY_DEFINITION = 489; extern const int CANNOT_FORMAT_DATETIME = 490; extern const int UNACCEPTABLE_URL = 491; + extern const int ACCESS_ENTITY_NOT_FOUND = 492; + extern const int ACCESS_ENTITY_ALREADY_EXISTS = 493; + extern const int ACCESS_ENTITY_FOUND_DUPLICATES = 494; + extern const int ACCESS_ENTITY_STORAGE_READONLY = 495; + extern const int QUOTA_REQUIRES_CLIENT_KEY = 496; + extern const int NOT_ENOUGH_PRIVILEGES = 497; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/src/Common/IntervalKind.cpp b/dbms/src/Common/IntervalKind.cpp new file mode 100644 index 00000000000..9443844a54b --- /dev/null +++ b/dbms/src/Common/IntervalKind.cpp @@ -0,0 +1,162 @@ +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +const char * IntervalKind::toString() const +{ + switch (kind) + { + case IntervalKind::Second: return "Second"; + case IntervalKind::Minute: return "Minute"; + case IntervalKind::Hour: return "Hour"; + case IntervalKind::Day: return "Day"; + case IntervalKind::Week: return "Week"; + case IntervalKind::Month: return "Month"; + case IntervalKind::Quarter: return "Quarter"; + case IntervalKind::Year: return "Year"; + } + __builtin_unreachable(); +} + + +Int32 IntervalKind::toAvgSeconds() const +{ + switch (kind) + { + case IntervalKind::Second: return 1; + case IntervalKind::Minute: return 60; + case IntervalKind::Hour: return 3600; + case IntervalKind::Day: return 86400; + case IntervalKind::Week: return 604800; + case IntervalKind::Month: return 2629746; /// Exactly 1/12 of a year. + case IntervalKind::Quarter: return 7889238; /// Exactly 1/4 of a year. + case IntervalKind::Year: return 31556952; /// The average length of a Gregorian year is equal to 365.2425 days + } + __builtin_unreachable(); +} + + +IntervalKind IntervalKind::fromAvgSeconds(Int64 num_seconds) +{ + if (num_seconds) + { + if (!(num_seconds % 31556952)) + return IntervalKind::Year; + if (!(num_seconds % 7889238)) + return IntervalKind::Quarter; + if (!(num_seconds % 604800)) + return IntervalKind::Week; + if (!(num_seconds % 2629746)) + return IntervalKind::Month; + if (!(num_seconds % 86400)) + return IntervalKind::Day; + if (!(num_seconds % 3600)) + return IntervalKind::Hour; + if (!(num_seconds % 60)) + return IntervalKind::Minute; + } + return IntervalKind::Second; +} + + +const char * IntervalKind::toKeyword() const +{ + switch (kind) + { + case IntervalKind::Second: return "SECOND"; + case IntervalKind::Minute: return "MINUTE"; + case IntervalKind::Hour: return "HOUR"; + case IntervalKind::Day: return "DAY"; + case IntervalKind::Week: return "WEEK"; + case IntervalKind::Month: return "MONTH"; + case IntervalKind::Quarter: return "QUARTER"; + case IntervalKind::Year: return "YEAR"; + } + __builtin_unreachable(); +} + + +const char * IntervalKind::toDateDiffUnit() const +{ + switch (kind) + { + case IntervalKind::Second: + return "second"; + case IntervalKind::Minute: + return "minute"; + case IntervalKind::Hour: + return "hour"; + case IntervalKind::Day: + return "day"; + case IntervalKind::Week: + return "week"; + case IntervalKind::Month: + return "month"; + case IntervalKind::Quarter: + return "quarter"; + case IntervalKind::Year: + return "year"; + } + __builtin_unreachable(); +} + + +const char * IntervalKind::toNameOfFunctionToIntervalDataType() const +{ + switch (kind) + { + case IntervalKind::Second: + return "toIntervalSecond"; + case IntervalKind::Minute: + return "toIntervalMinute"; + case IntervalKind::Hour: + return "toIntervalHour"; + case IntervalKind::Day: + return "toIntervalDay"; + case IntervalKind::Week: + return "toIntervalWeek"; + case IntervalKind::Month: + return "toIntervalMonth"; + case IntervalKind::Quarter: + return "toIntervalQuarter"; + case IntervalKind::Year: + return "toIntervalYear"; + } + __builtin_unreachable(); +} + + +const char * IntervalKind::toNameOfFunctionExtractTimePart() const +{ + switch (kind) + { + case IntervalKind::Second: + return "toSecond"; + case IntervalKind::Minute: + return "toMinute"; + case IntervalKind::Hour: + return "toHour"; + case IntervalKind::Day: + return "toDayOfMonth"; + case IntervalKind::Week: + // TODO: SELECT toRelativeWeekNum(toDate('2017-06-15')) - toRelativeWeekNum(toStartOfYear(toDate('2017-06-15'))) + // else if (ParserKeyword("WEEK").ignore(pos, expected)) + // function_name = "toRelativeWeekNum"; + throw Exception("The syntax 'EXTRACT(WEEK FROM date)' is not supported, cannot extract the number of a week", ErrorCodes::SYNTAX_ERROR); + case IntervalKind::Month: + return "toMonth"; + case IntervalKind::Quarter: + return "toQuarter"; + case IntervalKind::Year: + return "toYear"; + } + __builtin_unreachable(); +} +} diff --git a/dbms/src/Common/IntervalKind.h b/dbms/src/Common/IntervalKind.h new file mode 100644 index 00000000000..9b7c4bd504e --- /dev/null +++ b/dbms/src/Common/IntervalKind.h @@ -0,0 +1,54 @@ +#pragma once + +#include + + +namespace DB +{ +/// Kind of a temporal interval. +struct IntervalKind +{ + enum Kind + { + Second, + Minute, + Hour, + Day, + Week, + Month, + Quarter, + Year, + }; + Kind kind = Second; + + IntervalKind(Kind kind_ = Second) : kind(kind_) {} + operator Kind() const { return kind; } + + const char * toString() const; + + /// Returns number of seconds in one interval. + /// For `Month`, `Quarter` and `Year` the function returns an average number of seconds. + Int32 toAvgSeconds() const; + + /// Chooses an interval kind based on number of seconds. + /// For example, `IntervalKind::fromAvgSeconds(3600)` returns `IntervalKind::Hour`. + static IntervalKind fromAvgSeconds(Int64 num_seconds); + + /// Returns an uppercased version of what `toString()` returns. + const char * toKeyword() const; + + /// Returns the string which can be passed to the `unit` parameter of the dateDiff() function. + /// For example, `IntervalKind{IntervalKind::Day}.getDateDiffParameter()` returns "day". + const char * toDateDiffUnit() const; + + /// Returns the name of the function converting a number to the interval data type. + /// For example, `IntervalKind{IntervalKind::Day}.getToIntervalDataTypeFunctionName()` + /// returns "toIntervalDay". + const char * toNameOfFunctionToIntervalDataType() const; + + /// Returns the name of the function extracting time part from a date or a time. + /// For example, `IntervalKind{IntervalKind::Day}.getExtractTimePartFunctionName()` + /// returns "toDayOfMonth". + const char * toNameOfFunctionExtractTimePart() const; +}; +} diff --git a/dbms/src/Common/intExp.h b/dbms/src/Common/intExp.h index 163d835819f..0212eb4c084 100644 --- a/dbms/src/Common/intExp.h +++ b/dbms/src/Common/intExp.h @@ -3,11 +3,18 @@ #include #include -#include + +// Also defined in Core/Defines.h +#if !defined(NO_SANITIZE_UNDEFINED) +#if defined(__clang__) + #define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined"))) +#else + #define NO_SANITIZE_UNDEFINED +#endif +#endif /// On overlow, the function returns unspecified value. - inline NO_SANITIZE_UNDEFINED uint64_t intExp2(int x) { return 1ULL << x; diff --git a/dbms/src/Common/typeid_cast.h b/dbms/src/Common/typeid_cast.h index 9285355e788..29ad2e520c0 100644 --- a/dbms/src/Common/typeid_cast.h +++ b/dbms/src/Common/typeid_cast.h @@ -3,8 +3,10 @@ #include #include #include +#include #include +#include #include #include @@ -27,7 +29,7 @@ std::enable_if_t, To> typeid_cast(From & from) { try { - if (typeid(from) == typeid(To)) + if ((typeid(From) == typeid(To)) || (typeid(from) == typeid(To))) return static_cast(from); } catch (const std::exception & e) @@ -39,12 +41,13 @@ std::enable_if_t, To> typeid_cast(From & from) DB::ErrorCodes::BAD_CAST); } + template -To typeid_cast(From * from) +std::enable_if_t, To> typeid_cast(From * from) { try { - if (typeid(*from) == typeid(std::remove_pointer_t)) + if ((typeid(From) == typeid(std::remove_pointer_t)) || (typeid(*from) == typeid(std::remove_pointer_t))) return static_cast(from); else return nullptr; @@ -54,3 +57,20 @@ To typeid_cast(From * from) throw DB::Exception(e.what(), DB::ErrorCodes::BAD_CAST); } } + + +template +std::enable_if_t, To> typeid_cast(const std::shared_ptr & from) +{ + try + { + if ((typeid(From) == typeid(typename To::element_type)) || (typeid(*from) == typeid(typename To::element_type))) + return std::static_pointer_cast(from); + else + return nullptr; + } + catch (const std::exception & e) + { + throw DB::Exception(e.what(), DB::ErrorCodes::BAD_CAST); + } +} diff --git a/dbms/src/Core/DecimalComparison.h b/dbms/src/Core/DecimalComparison.h index bc676ae86c8..cb332ad3779 100644 --- a/dbms/src/Core/DecimalComparison.h +++ b/dbms/src/Core/DecimalComparison.h @@ -88,9 +88,9 @@ public: Shift shift; if (scale_a < scale_b) - shift.a = DataTypeDecimal(maxDecimalPrecision(), scale_b).getScaleMultiplier(scale_b - scale_a); + shift.a = B::getScaleMultiplier(scale_b - scale_a); if (scale_a > scale_b) - shift.b = DataTypeDecimal(maxDecimalPrecision(), scale_a).getScaleMultiplier(scale_a - scale_b); + shift.b = A::getScaleMultiplier(scale_a - scale_b); return applyWithScale(a, b, shift); } diff --git a/dbms/src/Core/Field.cpp b/dbms/src/Core/Field.cpp index 9d27e33c414..505627aaedb 100644 --- a/dbms/src/Core/Field.cpp +++ b/dbms/src/Core/Field.cpp @@ -300,21 +300,6 @@ namespace DB } - template <> Decimal32 DecimalField::getScaleMultiplier() const - { - return DataTypeDecimal::getScaleMultiplier(scale); - } - - template <> Decimal64 DecimalField::getScaleMultiplier() const - { - return DataTypeDecimal::getScaleMultiplier(scale); - } - - template <> Decimal128 DecimalField::getScaleMultiplier() const - { - return DataTypeDecimal::getScaleMultiplier(scale); - } - template static bool decEqual(T x, T y, UInt32 x_scale, UInt32 y_scale) { diff --git a/dbms/src/Core/Field.h b/dbms/src/Core/Field.h index 3d34502c339..885545844f4 100644 --- a/dbms/src/Core/Field.h +++ b/dbms/src/Core/Field.h @@ -102,7 +102,7 @@ public: operator T() const { return dec; } T getValue() const { return dec; } - T getScaleMultiplier() const; + T getScaleMultiplier() const { return T::getScaleMultiplier(scale); } UInt32 getScale() const { return scale; } template diff --git a/dbms/src/Core/Types.h b/dbms/src/Core/Types.h index bbc309aff94..511446b442f 100644 --- a/dbms/src/Core/Types.h +++ b/dbms/src/Core/Types.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB @@ -145,6 +146,8 @@ struct Decimal const Decimal & operator /= (const T & x) { value /= x; return *this; } const Decimal & operator %= (const T & x) { value %= x; return *this; } + static T getScaleMultiplier(UInt32 scale); + T value; }; @@ -170,6 +173,10 @@ template <> struct NativeType { using Type = Int32; }; template <> struct NativeType { using Type = Int64; }; template <> struct NativeType { using Type = Int128; }; +template <> inline Int32 Decimal32::getScaleMultiplier(UInt32 scale) { return common::exp10_i32(scale); } +template <> inline Int64 Decimal64::getScaleMultiplier(UInt32 scale) { return common::exp10_i64(scale); } +template <> inline Int128 Decimal128::getScaleMultiplier(UInt32 scale) { return common::exp10_i128(scale); } + inline const char * getTypeName(TypeIndex idx) { switch (idx) diff --git a/dbms/src/DataStreams/IBlockInputStream.cpp b/dbms/src/DataStreams/IBlockInputStream.cpp index 2e30749e89f..df81f26f665 100644 --- a/dbms/src/DataStreams/IBlockInputStream.cpp +++ b/dbms/src/DataStreams/IBlockInputStream.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include @@ -70,7 +70,7 @@ Block IBlockInputStream::read() if (limits.mode == LIMITS_CURRENT && !limits.size_limits.check(info.rows, info.bytes, "result", ErrorCodes::TOO_MANY_ROWS_OR_BYTES)) limit_exceeded_need_break = true; - if (quota != nullptr) + if (quota) checkQuota(res); } else @@ -240,12 +240,8 @@ void IBlockInputStream::checkQuota(Block & block) case LIMITS_CURRENT: { - time_t current_time = time(nullptr); - double total_elapsed = info.total_stopwatch.elapsedSeconds(); - - quota->checkAndAddResultRowsBytes(current_time, block.rows(), block.bytes()); - quota->checkAndAddExecutionTime(current_time, Poco::Timespan((total_elapsed - prev_elapsed) * 1000000.0)); - + UInt64 total_elapsed = info.total_stopwatch.elapsedNanoseconds(); + quota->used({Quota::RESULT_ROWS, block.rows()}, {Quota::RESULT_BYTES, block.bytes()}, {Quota::EXECUTION_TIME, total_elapsed - prev_elapsed}); prev_elapsed = total_elapsed; break; } @@ -291,10 +287,8 @@ void IBlockInputStream::progressImpl(const Progress & value) limits.speed_limits.throttle(progress.read_rows, progress.read_bytes, total_rows, total_elapsed_microseconds); - if (quota != nullptr && limits.mode == LIMITS_TOTAL) - { - quota->checkAndAddReadRowsBytes(time(nullptr), value.read_rows, value.read_bytes); - } + if (quota && limits.mode == LIMITS_TOTAL) + quota->used({Quota::READ_ROWS, value.read_rows}, {Quota::READ_BYTES, value.read_bytes}); } } diff --git a/dbms/src/DataStreams/IBlockInputStream.h b/dbms/src/DataStreams/IBlockInputStream.h index dfa9194a6f9..69aadf44c09 100644 --- a/dbms/src/DataStreams/IBlockInputStream.h +++ b/dbms/src/DataStreams/IBlockInputStream.h @@ -23,7 +23,7 @@ namespace ErrorCodes } class ProcessListElement; -class QuotaForIntervals; +class QuotaContext; class QueryStatus; struct SortColumnDescription; using SortDescription = std::vector; @@ -220,9 +220,9 @@ public: /** Set the quota. If you set a quota on the amount of raw data, * then you should also set mode = LIMITS_TOTAL to LocalLimits with setLimits. */ - virtual void setQuota(QuotaForIntervals & quota_) + virtual void setQuota(const std::shared_ptr & quota_) { - quota = "a_; + quota = quota_; } /// Enable calculation of minimums and maximums by the result columns. @@ -273,8 +273,8 @@ private: LocalLimits limits; - QuotaForIntervals * quota = nullptr; /// If nullptr - the quota is not used. - double prev_elapsed = 0; + std::shared_ptr quota; /// If nullptr - the quota is not used. + UInt64 prev_elapsed = 0; /// The approximate total number of rows to read. For progress bar. size_t total_rows_approx = 0; diff --git a/dbms/src/DataStreams/ParallelParsingBlockInputStream.cpp b/dbms/src/DataStreams/ParallelParsingBlockInputStream.cpp index 21233da9327..c894af82580 100644 --- a/dbms/src/DataStreams/ParallelParsingBlockInputStream.cpp +++ b/dbms/src/DataStreams/ParallelParsingBlockInputStream.cpp @@ -1,5 +1,4 @@ #include -#include "ParallelParsingBlockInputStream.h" namespace DB { @@ -15,7 +14,7 @@ void ParallelParsingBlockInputStream::segmentatorThreadFunction() auto & unit = processing_units[current_unit_number]; { - std::unique_lock lock(mutex); + std::unique_lock lock(mutex); segmentator_condvar.wait(lock, [&]{ return unit.status == READY_TO_INSERT || finished; }); } @@ -85,7 +84,7 @@ void ParallelParsingBlockInputStream::parserThreadFunction(size_t current_unit_n // except at the end of file. Also see a matching assert in readImpl(). assert(unit.is_last || unit.block_ext.block.size() > 0); - std::unique_lock lock(mutex); + std::unique_lock lock(mutex); unit.status = READY_TO_READ; reader_condvar.notify_all(); } @@ -99,7 +98,7 @@ void ParallelParsingBlockInputStream::onBackgroundException() { tryLogCurrentException(__PRETTY_FUNCTION__); - std::unique_lock lock(mutex); + std::unique_lock lock(mutex); if (!background_exception) { background_exception = std::current_exception(); @@ -116,7 +115,7 @@ Block ParallelParsingBlockInputStream::readImpl() /** * Check for background exception and rethrow it before we return. */ - std::unique_lock lock(mutex); + std::unique_lock lock(mutex); if (background_exception) { lock.unlock(); @@ -134,7 +133,7 @@ Block ParallelParsingBlockInputStream::readImpl() { // We have read out all the Blocks from the previous Processing Unit, // wait for the current one to become ready. - std::unique_lock lock(mutex); + std::unique_lock lock(mutex); reader_condvar.wait(lock, [&](){ return unit.status == READY_TO_READ || finished; }); if (finished) @@ -190,7 +189,7 @@ Block ParallelParsingBlockInputStream::readImpl() else { // Pass the unit back to the segmentator. - std::unique_lock lock(mutex); + std::unique_lock lock(mutex); unit.status = READY_TO_INSERT; segmentator_condvar.notify_all(); } diff --git a/dbms/src/DataStreams/ParallelParsingBlockInputStream.h b/dbms/src/DataStreams/ParallelParsingBlockInputStream.h index 4b5e091cfc9..8c276f2f7dd 100644 --- a/dbms/src/DataStreams/ParallelParsingBlockInputStream.h +++ b/dbms/src/DataStreams/ParallelParsingBlockInputStream.h @@ -227,7 +227,7 @@ private: finished = true; { - std::unique_lock lock(mutex); + std::unique_lock lock(mutex); segmentator_condvar.notify_all(); reader_condvar.notify_all(); } @@ -255,4 +255,4 @@ private: void onBackgroundException(); }; -}; +} diff --git a/dbms/src/DataStreams/SummingSortedBlockInputStream.cpp b/dbms/src/DataStreams/SummingSortedBlockInputStream.cpp index 35fdc948239..9ac7d6a3397 100644 --- a/dbms/src/DataStreams/SummingSortedBlockInputStream.cpp +++ b/dbms/src/DataStreams/SummingSortedBlockInputStream.cpp @@ -78,7 +78,9 @@ SummingSortedBlockInputStream::SummingSortedBlockInputStream( else { bool is_agg_func = WhichDataType(column.type).isAggregateFunction(); - if (!column.type->isSummable() && !is_agg_func) + + /// There are special const columns for example after prewere sections. + if ((!column.type->isSummable() && !is_agg_func) || isColumnConst(*column.column)) { column_numbers_not_to_aggregate.push_back(i); continue; @@ -198,6 +200,10 @@ SummingSortedBlockInputStream::SummingSortedBlockInputStream( void SummingSortedBlockInputStream::insertCurrentRowIfNeeded(MutableColumns & merged_columns) { + /// We have nothing to aggregate. It means that it could be non-zero, because we have columns_not_to_aggregate. + if (columns_to_aggregate.empty()) + current_row_is_zero = false; + for (auto & desc : columns_to_aggregate) { // Do not insert if the aggregation state hasn't been created diff --git a/dbms/src/DataTypes/DataTypeInterval.cpp b/dbms/src/DataTypes/DataTypeInterval.cpp index c7ee3ede334..57d071a8666 100644 --- a/dbms/src/DataTypes/DataTypeInterval.cpp +++ b/dbms/src/DataTypes/DataTypeInterval.cpp @@ -13,14 +13,14 @@ bool DataTypeInterval::equals(const IDataType & rhs) const void registerDataTypeInterval(DataTypeFactory & factory) { - factory.registerSimpleDataType("IntervalSecond", [] { return DataTypePtr(std::make_shared(DataTypeInterval::Second)); }); - factory.registerSimpleDataType("IntervalMinute", [] { return DataTypePtr(std::make_shared(DataTypeInterval::Minute)); }); - factory.registerSimpleDataType("IntervalHour", [] { return DataTypePtr(std::make_shared(DataTypeInterval::Hour)); }); - factory.registerSimpleDataType("IntervalDay", [] { return DataTypePtr(std::make_shared(DataTypeInterval::Day)); }); - factory.registerSimpleDataType("IntervalWeek", [] { return DataTypePtr(std::make_shared(DataTypeInterval::Week)); }); - factory.registerSimpleDataType("IntervalMonth", [] { return DataTypePtr(std::make_shared(DataTypeInterval::Month)); }); - factory.registerSimpleDataType("IntervalQuarter", [] { return DataTypePtr(std::make_shared(DataTypeInterval::Quarter)); }); - factory.registerSimpleDataType("IntervalYear", [] { return DataTypePtr(std::make_shared(DataTypeInterval::Year)); }); + factory.registerSimpleDataType("IntervalSecond", [] { return DataTypePtr(std::make_shared(IntervalKind::Second)); }); + factory.registerSimpleDataType("IntervalMinute", [] { return DataTypePtr(std::make_shared(IntervalKind::Minute)); }); + factory.registerSimpleDataType("IntervalHour", [] { return DataTypePtr(std::make_shared(IntervalKind::Hour)); }); + factory.registerSimpleDataType("IntervalDay", [] { return DataTypePtr(std::make_shared(IntervalKind::Day)); }); + factory.registerSimpleDataType("IntervalWeek", [] { return DataTypePtr(std::make_shared(IntervalKind::Week)); }); + factory.registerSimpleDataType("IntervalMonth", [] { return DataTypePtr(std::make_shared(IntervalKind::Month)); }); + factory.registerSimpleDataType("IntervalQuarter", [] { return DataTypePtr(std::make_shared(IntervalKind::Quarter)); }); + factory.registerSimpleDataType("IntervalYear", [] { return DataTypePtr(std::make_shared(IntervalKind::Year)); }); } } diff --git a/dbms/src/DataTypes/DataTypeInterval.h b/dbms/src/DataTypes/DataTypeInterval.h index fa99ac430b6..111a2489d65 100644 --- a/dbms/src/DataTypes/DataTypeInterval.h +++ b/dbms/src/DataTypes/DataTypeInterval.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB @@ -16,47 +17,17 @@ namespace DB */ class DataTypeInterval final : public DataTypeNumberBase { -public: - enum Kind - { - Second, - Minute, - Hour, - Day, - Week, - Month, - Quarter, - Year - }; - private: - Kind kind; + IntervalKind kind; public: static constexpr bool is_parametric = true; - Kind getKind() const { return kind; } + IntervalKind getKind() const { return kind; } - const char * kindToString() const - { - switch (kind) - { - case Second: return "Second"; - case Minute: return "Minute"; - case Hour: return "Hour"; - case Day: return "Day"; - case Week: return "Week"; - case Month: return "Month"; - case Quarter: return "Quarter"; - case Year: return "Year"; - } + DataTypeInterval(IntervalKind kind_) : kind(kind_) {} - __builtin_unreachable(); - } - - DataTypeInterval(Kind kind_) : kind(kind_) {} - - std::string doGetName() const override { return std::string("Interval") + kindToString(); } + std::string doGetName() const override { return std::string("Interval") + kind.toString(); } const char * getFamilyName() const override { return "Interval"; } TypeIndex getTypeId() const override { return TypeIndex::Interval; } diff --git a/dbms/src/DataTypes/DataTypesDecimal.cpp b/dbms/src/DataTypes/DataTypesDecimal.cpp index e8caae63a09..84fc31a5ed7 100644 --- a/dbms/src/DataTypes/DataTypesDecimal.cpp +++ b/dbms/src/DataTypes/DataTypesDecimal.cpp @@ -58,7 +58,7 @@ bool DataTypeDecimal::tryReadText(T & x, ReadBuffer & istr, UInt32 precision, { UInt32 unread_scale = scale; bool done = tryReadDecimalText(istr, x, precision, unread_scale); - x *= getScaleMultiplier(unread_scale); + x *= T::getScaleMultiplier(unread_scale); return done; } @@ -70,7 +70,7 @@ void DataTypeDecimal::readText(T & x, ReadBuffer & istr, UInt32 precision, UI readCSVDecimalText(istr, x, precision, unread_scale); else readDecimalText(istr, x, precision, unread_scale); - x *= getScaleMultiplier(unread_scale); + x *= T::getScaleMultiplier(unread_scale); } template @@ -96,7 +96,7 @@ T DataTypeDecimal::parseFromString(const String & str) const T x; UInt32 unread_scale = scale; readDecimalText(buf, x, precision, unread_scale, true); - x *= getScaleMultiplier(unread_scale); + x *= T::getScaleMultiplier(unread_scale); return x; } @@ -271,25 +271,6 @@ void registerDataTypeDecimal(DataTypeFactory & factory) } -template <> -Decimal32 DataTypeDecimal::getScaleMultiplier(UInt32 scale_) -{ - return decimalScaleMultiplier(scale_); -} - -template <> -Decimal64 DataTypeDecimal::getScaleMultiplier(UInt32 scale_) -{ - return decimalScaleMultiplier(scale_); -} - -template <> -Decimal128 DataTypeDecimal::getScaleMultiplier(UInt32 scale_) -{ - return decimalScaleMultiplier(scale_); -} - - /// Explicit template instantiations. template class DataTypeDecimal; template class DataTypeDecimal; diff --git a/dbms/src/DataTypes/DataTypesDecimal.h b/dbms/src/DataTypes/DataTypesDecimal.h index e59a2b6e3fd..8de80050bae 100644 --- a/dbms/src/DataTypes/DataTypesDecimal.h +++ b/dbms/src/DataTypes/DataTypesDecimal.h @@ -130,7 +130,7 @@ public: UInt32 getPrecision() const { return precision; } UInt32 getScale() const { return scale; } - T getScaleMultiplier() const { return getScaleMultiplier(scale); } + T getScaleMultiplier() const { return T::getScaleMultiplier(scale); } T wholePart(T x) const { @@ -148,7 +148,7 @@ public: return x % getScaleMultiplier(); } - T maxWholeValue() const { return getScaleMultiplier(maxPrecision() - scale) - T(1); } + T maxWholeValue() const { return T::getScaleMultiplier(maxPrecision() - scale) - T(1); } bool canStoreWhole(T x) const { @@ -165,7 +165,7 @@ public: if (getScale() < x.getScale()) throw Exception("Decimal result's scale is less then argiment's one", ErrorCodes::ARGUMENT_OUT_OF_BOUND); UInt32 scale_delta = getScale() - x.getScale(); /// scale_delta >= 0 - return getScaleMultiplier(scale_delta); + return T::getScaleMultiplier(scale_delta); } template @@ -181,7 +181,6 @@ public: void readText(T & x, ReadBuffer & istr, bool csv = false) const { readText(x, istr, precision, scale, csv); } static void readText(T & x, ReadBuffer & istr, UInt32 precision, UInt32 scale, bool csv = false); static bool tryReadText(T & x, ReadBuffer & istr, UInt32 precision, UInt32 scale); - static T getScaleMultiplier(UInt32 scale); private: const UInt32 precision; @@ -264,12 +263,12 @@ convertDecimals(const typename FromDataType::FieldType & value, UInt32 scale_fro MaxNativeType converted_value; if (scale_to > scale_from) { - converted_value = DataTypeDecimal::getScaleMultiplier(scale_to - scale_from); + converted_value = MaxFieldType::getScaleMultiplier(scale_to - scale_from); if (common::mulOverflow(static_cast(value), converted_value, converted_value)) throw Exception("Decimal convert overflow", ErrorCodes::DECIMAL_OVERFLOW); } else - converted_value = value / DataTypeDecimal::getScaleMultiplier(scale_from - scale_to); + converted_value = value / MaxFieldType::getScaleMultiplier(scale_from - scale_to); if constexpr (sizeof(FromFieldType) > sizeof(ToFieldType)) { @@ -289,7 +288,7 @@ convertFromDecimal(const typename FromDataType::FieldType & value, UInt32 scale) using ToFieldType = typename ToDataType::FieldType; if constexpr (std::is_floating_point_v) - return static_cast(value) / FromDataType::getScaleMultiplier(scale); + return static_cast(value) / FromFieldType::getScaleMultiplier(scale); else { FromFieldType converted_value = convertDecimals(value, scale, 0); @@ -320,14 +319,15 @@ inline std::enable_if_t && IsDataTypeDecimal) { if (!std::isfinite(value)) throw Exception("Decimal convert overflow. Cannot convert infinity or NaN to decimal", ErrorCodes::DECIMAL_OVERFLOW); - auto out = value * ToDataType::getScaleMultiplier(scale); + auto out = value * ToFieldType::getScaleMultiplier(scale); if constexpr (std::is_same_v) { static constexpr __int128 min_int128 = __int128(0x8000000000000000ll) << 64; diff --git a/dbms/src/Dictionaries/CacheDictionary.inc.h b/dbms/src/Dictionaries/CacheDictionary.inc.h index c10cde8c4fd..87005ac821f 100644 --- a/dbms/src/Dictionaries/CacheDictionary.inc.h +++ b/dbms/src/Dictionaries/CacheDictionary.inc.h @@ -3,8 +3,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -334,7 +334,7 @@ void CacheDictionary::update( backoff_end_time = now + std::chrono::seconds(calculateDurationWithBackoff(rnd_engine, error_count)); tryLogException(last_exception, log, "Could not update cache dictionary '" + getName() + - "', next update is scheduled at " + DateLUT::instance().timeToString(std::chrono::system_clock::to_time_t(backoff_end_time))); + "', next update is scheduled at " + ext::to_string(backoff_end_time)); } } diff --git a/dbms/src/Formats/FormatFactory.cpp b/dbms/src/Formats/FormatFactory.cpp index 57071b17c28..dfbaef334e0 100644 --- a/dbms/src/Formats/FormatFactory.cpp +++ b/dbms/src/Formats/FormatFactory.cpp @@ -281,6 +281,8 @@ void registerInputFormatProcessorTSKV(FormatFactory & factory); void registerOutputFormatProcessorTSKV(FormatFactory & factory); void registerInputFormatProcessorJSONEachRow(FormatFactory & factory); void registerOutputFormatProcessorJSONEachRow(FormatFactory & factory); +void registerInputFormatProcessorJSONCompactEachRow(FormatFactory & factory); +void registerOutputFormatProcessorJSONCompactEachRow(FormatFactory & factory); void registerInputFormatProcessorParquet(FormatFactory & factory); void registerInputFormatProcessorORC(FormatFactory & factory); void registerOutputFormatProcessorParquet(FormatFactory & factory); @@ -336,6 +338,8 @@ FormatFactory::FormatFactory() registerOutputFormatProcessorTSKV(*this); registerInputFormatProcessorJSONEachRow(*this); registerOutputFormatProcessorJSONEachRow(*this); + registerInputFormatProcessorJSONCompactEachRow(*this); + registerOutputFormatProcessorJSONCompactEachRow(*this); registerInputFormatProcessorProtobuf(*this); registerOutputFormatProcessorProtobuf(*this); registerInputFormatProcessorCapnProto(*this); diff --git a/dbms/src/Functions/FunctionBinaryArithmetic.h b/dbms/src/Functions/FunctionBinaryArithmetic.h index 407c096a9e5..cab13a405c7 100644 --- a/dbms/src/Functions/FunctionBinaryArithmetic.h +++ b/dbms/src/Functions/FunctionBinaryArithmetic.h @@ -508,7 +508,7 @@ class FunctionBinaryArithmetic : public IFunction } std::stringstream function_name; - function_name << (function_is_plus ? "add" : "subtract") << interval_data_type->kindToString() << 's'; + function_name << (function_is_plus ? "add" : "subtract") << interval_data_type->getKind().toString() << 's'; return FunctionFactory::instance().get(function_name.str(), context); } diff --git a/dbms/src/Functions/FunctionsConversion.h b/dbms/src/Functions/FunctionsConversion.h index 09a23f83414..e0f828c395a 100644 --- a/dbms/src/Functions/FunctionsConversion.h +++ b/dbms/src/Functions/FunctionsConversion.h @@ -735,7 +735,7 @@ struct NameToDecimal128 { static constexpr auto name = "toDecimal128"; }; struct NameToInterval ## INTERVAL_KIND \ { \ static constexpr auto name = "toInterval" #INTERVAL_KIND; \ - static constexpr int kind = DataTypeInterval::INTERVAL_KIND; \ + static constexpr auto kind = IntervalKind::INTERVAL_KIND; \ }; DEFINE_NAME_TO_INTERVAL(Second) @@ -786,7 +786,7 @@ public: if constexpr (std::is_same_v) { - return std::make_shared(DataTypeInterval::Kind(Name::kind)); + return std::make_shared(Name::kind); } else if constexpr (to_decimal) { diff --git a/dbms/src/Functions/currentQuota.cpp b/dbms/src/Functions/currentQuota.cpp new file mode 100644 index 00000000000..fef26f333fc --- /dev/null +++ b/dbms/src/Functions/currentQuota.cpp @@ -0,0 +1,134 @@ +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +class FunctionCurrentQuota : public IFunction +{ + const String quota_name; + +public: + static constexpr auto name = "currentQuota"; + static FunctionPtr create(const Context & context) + { + return std::make_shared(context.getQuota()->getUsageInfo().quota_name); + } + + explicit FunctionCurrentQuota(const String & quota_name_) : quota_name{quota_name_} + { + } + + String getName() const override + { + return name; + } + size_t getNumberOfArguments() const override + { + return 0; + } + + DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override + { + return std::make_shared(); + } + + bool isDeterministic() const override { return false; } + + void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override + { + block.getByPosition(result).column = DataTypeString().createColumnConst(input_rows_count, quota_name); + } +}; + + +class FunctionCurrentQuotaId : public IFunction +{ + const UUID quota_id; + +public: + static constexpr auto name = "currentQuotaID"; + static FunctionPtr create(const Context & context) + { + return std::make_shared(context.getQuota()->getUsageInfo().quota_id); + } + + explicit FunctionCurrentQuotaId(const UUID quota_id_) : quota_id{quota_id_} + { + } + + String getName() const override + { + return name; + } + size_t getNumberOfArguments() const override + { + return 0; + } + + DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override + { + return std::make_shared(); + } + + bool isDeterministic() const override { return false; } + + void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override + { + block.getByPosition(result).column = DataTypeUUID().createColumnConst(input_rows_count, quota_id); + } +}; + + +class FunctionCurrentQuotaKey : public IFunction +{ + const String quota_key; + +public: + static constexpr auto name = "currentQuotaKey"; + static FunctionPtr create(const Context & context) + { + return std::make_shared(context.getQuota()->getUsageInfo().quota_key); + } + + explicit FunctionCurrentQuotaKey(const String & quota_key_) : quota_key{quota_key_} + { + } + + String getName() const override + { + return name; + } + size_t getNumberOfArguments() const override + { + return 0; + } + + DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override + { + return std::make_shared(); + } + + bool isDeterministic() const override { return false; } + + void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override + { + block.getByPosition(result).column = DataTypeString().createColumnConst(input_rows_count, quota_key); + } +}; + + +void registerFunctionCurrentQuota(FunctionFactory & factory) +{ + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); +} + +} diff --git a/dbms/src/Functions/registerFunctionsMiscellaneous.cpp b/dbms/src/Functions/registerFunctionsMiscellaneous.cpp index ae75b9c0962..9529cd3a56a 100644 --- a/dbms/src/Functions/registerFunctionsMiscellaneous.cpp +++ b/dbms/src/Functions/registerFunctionsMiscellaneous.cpp @@ -7,6 +7,7 @@ class FunctionFactory; void registerFunctionCurrentDatabase(FunctionFactory &); void registerFunctionCurrentUser(FunctionFactory &); +void registerFunctionCurrentQuota(FunctionFactory &); void registerFunctionHostName(FunctionFactory &); void registerFunctionFQDN(FunctionFactory &); void registerFunctionVisibleWidth(FunctionFactory &); @@ -62,6 +63,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) { registerFunctionCurrentDatabase(factory); registerFunctionCurrentUser(factory); + registerFunctionCurrentQuota(factory); registerFunctionHostName(factory); registerFunctionFQDN(factory); registerFunctionVisibleWidth(factory); diff --git a/dbms/src/Functions/toStartOfInterval.cpp b/dbms/src/Functions/toStartOfInterval.cpp index 21e500602e1..29ec814a6ee 100644 --- a/dbms/src/Functions/toStartOfInterval.cpp +++ b/dbms/src/Functions/toStartOfInterval.cpp @@ -23,11 +23,11 @@ namespace { static constexpr auto function_name = "toStartOfInterval"; - template + template struct Transform; template <> - struct Transform + struct Transform { static UInt16 execute(UInt16 d, UInt64 years, const DateLUTImpl & time_zone) { @@ -41,7 +41,7 @@ namespace }; template <> - struct Transform + struct Transform { static UInt16 execute(UInt16 d, UInt64 quarters, const DateLUTImpl & time_zone) { @@ -55,7 +55,7 @@ namespace }; template <> - struct Transform + struct Transform { static UInt16 execute(UInt16 d, UInt64 months, const DateLUTImpl & time_zone) { @@ -69,7 +69,7 @@ namespace }; template <> - struct Transform + struct Transform { static UInt16 execute(UInt16 d, UInt64 weeks, const DateLUTImpl & time_zone) { @@ -83,7 +83,7 @@ namespace }; template <> - struct Transform + struct Transform { static UInt32 execute(UInt16 d, UInt64 days, const DateLUTImpl & time_zone) { @@ -97,7 +97,7 @@ namespace }; template <> - struct Transform + struct Transform { static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } @@ -105,7 +105,7 @@ namespace }; template <> - struct Transform + struct Transform { static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } @@ -116,7 +116,7 @@ namespace }; template <> - struct Transform + struct Transform { static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } @@ -163,9 +163,9 @@ public: "Illegal type " + arguments[1].type->getName() + " of argument of function " + getName() + ". Should be an interval of time", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - result_type_is_date = (interval_type->getKind() == DataTypeInterval::Year) - || (interval_type->getKind() == DataTypeInterval::Quarter) || (interval_type->getKind() == DataTypeInterval::Month) - || (interval_type->getKind() == DataTypeInterval::Week); + result_type_is_date = (interval_type->getKind() == IntervalKind::Year) + || (interval_type->getKind() == IntervalKind::Quarter) || (interval_type->getKind() == IntervalKind::Month) + || (interval_type->getKind() == IntervalKind::Week); }; auto check_timezone_argument = [&] @@ -177,7 +177,7 @@ public: ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); if (first_argument_is_date && result_type_is_date) throw Exception( - "The timezone argument of function " + getName() + " with interval type " + interval_type->kindToString() + "The timezone argument of function " + getName() + " with interval type " + interval_type->getKind().toString() + " is allowed only when the 1st argument has the type DateTime", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); }; @@ -269,28 +269,28 @@ private: switch (interval_type->getKind()) { - case DataTypeInterval::Second: - return execute(time_column, num_units, time_zone); - case DataTypeInterval::Minute: - return execute(time_column, num_units, time_zone); - case DataTypeInterval::Hour: - return execute(time_column, num_units, time_zone); - case DataTypeInterval::Day: - return execute(time_column, num_units, time_zone); - case DataTypeInterval::Week: - return execute(time_column, num_units, time_zone); - case DataTypeInterval::Month: - return execute(time_column, num_units, time_zone); - case DataTypeInterval::Quarter: - return execute(time_column, num_units, time_zone); - case DataTypeInterval::Year: - return execute(time_column, num_units, time_zone); + case IntervalKind::Second: + return execute(time_column, num_units, time_zone); + case IntervalKind::Minute: + return execute(time_column, num_units, time_zone); + case IntervalKind::Hour: + return execute(time_column, num_units, time_zone); + case IntervalKind::Day: + return execute(time_column, num_units, time_zone); + case IntervalKind::Week: + return execute(time_column, num_units, time_zone); + case IntervalKind::Month: + return execute(time_column, num_units, time_zone); + case IntervalKind::Quarter: + return execute(time_column, num_units, time_zone); + case IntervalKind::Year: + return execute(time_column, num_units, time_zone); } __builtin_unreachable(); } - template + template ColumnPtr execute(const ColumnVector & time_column, UInt64 num_units, const DateLUTImpl & time_zone) { const auto & time_data = time_column.getData(); diff --git a/dbms/src/IO/WriteHelpers.h b/dbms/src/IO/WriteHelpers.h index 509c37257ad..1f5b6ca7f42 100644 --- a/dbms/src/IO/WriteHelpers.h +++ b/dbms/src/IO/WriteHelpers.h @@ -18,7 +18,6 @@ #include #include #include -#include #include #include @@ -764,12 +763,6 @@ inline void writeText(const LocalDateTime & x, WriteBuffer & buf) { writeDateTim inline void writeText(const UUID & x, WriteBuffer & buf) { writeUUIDText(x, buf); } inline void writeText(const UInt128 & x, WriteBuffer & buf) { writeText(UUID(x), buf); } -template inline T decimalScaleMultiplier(UInt32 scale); -template <> inline Int32 decimalScaleMultiplier(UInt32 scale) { return common::exp10_i32(scale); } -template <> inline Int64 decimalScaleMultiplier(UInt32 scale) { return common::exp10_i64(scale); } -template <> inline Int128 decimalScaleMultiplier(UInt32 scale) { return common::exp10_i128(scale); } - - template void writeText(Decimal value, UInt32 scale, WriteBuffer & ostr) { @@ -781,7 +774,7 @@ void writeText(Decimal value, UInt32 scale, WriteBuffer & ostr) T whole_part = value; if (scale) - whole_part = value / decimalScaleMultiplier(scale); + whole_part = value / Decimal::getScaleMultiplier(scale); writeIntText(whole_part, ostr); if (scale) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 8dc4e57739f..f303356be34 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -24,9 +25,11 @@ #include #include #include +#include +#include +#include #include #include -#include #include #include #include @@ -37,7 +40,6 @@ #include #include #include -#include #include #include #include @@ -91,6 +93,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int SCALAR_ALREADY_EXISTS; extern const int UNKNOWN_SCALAR; + extern const int NOT_ENOUGH_PRIVILEGES; } @@ -130,8 +133,8 @@ struct ContextShared mutable std::optional external_models_loader; String default_profile_name; /// Default profile name used for default values. String system_profile_name; /// Profile used by system processes + AccessControlManager access_control_manager; std::unique_ptr users_manager; /// Known users. - Quotas quotas; /// Known quotas for resource use. mutable UncompressedCachePtr uncompressed_cache; /// The cache of decompressed blocks. mutable MarkCachePtr mark_cache; /// Cache of marks in compressed files. ProcessList process_list; /// Executing queries at the moment. @@ -326,7 +329,7 @@ Context & Context::operator=(const Context &) = default; Context Context::createGlobal() { Context res; - res.quota = std::make_shared(); + res.quota = std::make_shared(); res.shared = std::make_shared(); return res; } @@ -585,12 +588,31 @@ const Poco::Util::AbstractConfiguration & Context::getConfigRef() const return shared->config ? *shared->config : Poco::Util::Application::instance().config(); } +AccessControlManager & Context::getAccessControlManager() +{ + auto lock = getLock(); + return shared->access_control_manager; +} + +const AccessControlManager & Context::getAccessControlManager() const +{ + auto lock = getLock(); + return shared->access_control_manager; +} + +void Context::checkQuotaManagementIsAllowed() +{ + if (!is_quota_management_allowed) + throw Exception( + "User " + client_info.current_user + " doesn't have enough privileges to manage quotas", ErrorCodes::NOT_ENOUGH_PRIVILEGES); +} + void Context::setUsersConfig(const ConfigurationPtr & config) { auto lock = getLock(); shared->users_config = config; + shared->access_control_manager.loadFromConfig(*shared->users_config); shared->users_manager->loadFromConfig(*shared->users_config); - shared->quotas.loadFromConfig(*shared->users_config); } ConfigurationPtr Context::getUsersConfig() @@ -631,7 +653,8 @@ void Context::calculateUserSettings() { auto lock = getLock(); - String profile = shared->users_manager->getUser(client_info.current_user)->profile; + auto user = getUser(client_info.current_user); + String profile = user->profile; /// 1) Set default settings (hardcoded values) /// NOTE: we ignore global_context settings (from which it is usually copied) @@ -646,6 +669,10 @@ void Context::calculateUserSettings() /// 3) Apply settings from current user setProfile(profile); + + quota = getAccessControlManager().createQuotaContext( + client_info.current_user, client_info.current_address.host(), client_info.quota_key); + is_quota_management_allowed = user->is_quota_management_allowed; } @@ -678,24 +705,9 @@ void Context::setUser(const String & name, const String & password, const Poco:: client_info.quota_key = quota_key; calculateUserSettings(); - - setQuota(user_props->quota, quota_key, name, address.host()); } -void Context::setQuota(const String & name, const String & quota_key, const String & user_name, const Poco::Net::IPAddress & address) -{ - auto lock = getLock(); - quota = shared->quotas.get(name, quota_key, user_name, address); -} - - -QuotaForIntervals & Context::getQuota() -{ - auto lock = getLock(); - return *quota; -} - void Context::checkDatabaseAccessRights(const std::string & database_name) const { auto lock = getLock(); diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index e94d8125064..c3671990dc3 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -44,7 +44,7 @@ namespace DB struct ContextShared; class Context; -class QuotaForIntervals; +class QuotaContext; class EmbeddedDictionaries; class ExternalDictionariesLoader; class ExternalModelsLoader; @@ -77,6 +77,7 @@ class ActionLocksManager; using ActionLocksManagerPtr = std::shared_ptr; class ShellCommand; class ICompressionCodec; +class AccessControlManager; class SettingsConstraints; class RemoteHostFilter; @@ -137,7 +138,8 @@ private: InputInitializer input_initializer_callback; InputBlocksReader input_blocks_reader; - std::shared_ptr quota; /// Current quota. By default - empty quota, that have no limits. + std::shared_ptr quota; /// Current quota. By default - empty quota, that have no limits. + bool is_quota_management_allowed = false; /// Whether the current user is allowed to manage quotas via SQL commands. String current_database; Settings settings; /// Setting for query execution. std::shared_ptr settings_constraints; @@ -201,6 +203,11 @@ public: void setConfig(const ConfigurationPtr & config); const Poco::Util::AbstractConfiguration & getConfigRef() const; + AccessControlManager & getAccessControlManager(); + const AccessControlManager & getAccessControlManager() const; + std::shared_ptr getQuota() const { return quota; } + void checkQuotaManagementIsAllowed(); + /** Take the list of users, quotas and configuration profiles from this config. * The list of users is completely replaced. * The accumulated quota values are not reset if the quota is not deleted. @@ -240,9 +247,6 @@ public: ClientInfo & getClientInfo() { return client_info; } const ClientInfo & getClientInfo() const { return client_info; } - void setQuota(const String & name, const String & quota_key, const String & user_name, const Poco::Net::IPAddress & address); - QuotaForIntervals & getQuota(); - void addDependency(const DatabaseAndTableName & from, const DatabaseAndTableName & where); void removeDependency(const DatabaseAndTableName & from, const DatabaseAndTableName & where); Dependencies getDependencies(const String & database_name, const String & table_name) const; @@ -410,7 +414,6 @@ public: const Settings & getSettingsRef() const { return settings; } Settings & getSettingsRef() { return settings; } - void setProgressCallback(ProgressCallback callback); /// Used in InterpreterSelectQuery to pass it to the IBlockInputStream. ProgressCallback getProgressCallback() const; diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index 7bc5aedae6d..b81e549a9d2 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -2,13 +2,13 @@ #include #include -#include #include #include #include #include #include #include +#include #include @@ -560,8 +560,8 @@ public: /// The function doesn't touch the objects which were never tried to load. void reloadOutdated() { - /// Iterate through all the objects and find loaded ones which should be checked if they were modified. - std::unordered_map is_modified_map; + /// Iterate through all the objects and find loaded ones which should be checked if they need update. + std::unordered_map should_update_map; { std::lock_guard lock{mutex}; TimePoint now = std::chrono::system_clock::now(); @@ -569,22 +569,26 @@ public: { const auto & info = name_and_info.second; if ((now >= info.next_update_time) && !info.loading() && info.loaded()) - is_modified_map.emplace(info.object, true); + should_update_map.emplace(info.object, info.failedToReload()); } } /// Find out which of the loaded objects were modified. - /// We couldn't perform these checks while we were building `is_modified_map` because + /// We couldn't perform these checks while we were building `should_update_map` because /// the `mutex` should be unlocked while we're calling the function object->isModified() - for (auto & [object, is_modified_flag] : is_modified_map) + for (auto & [object, should_update_flag] : should_update_map) { try { - is_modified_flag = object->isModified(); + /// Maybe alredy true, if we have an exception + if (!should_update_flag) + should_update_flag = object->isModified(); } catch (...) { tryLogCurrentException(log, "Could not check if " + type_name + " '" + object->getName() + "' was modified"); + /// Cannot check isModified, so update + should_update_flag = true; } } @@ -598,19 +602,18 @@ public: { if (info.loaded()) { - auto it = is_modified_map.find(info.object); - if (it == is_modified_map.end()) - continue; /// Object has been just loaded (it wasn't loaded while we were building the map `is_modified_map`), so we don't have to reload it right now. + auto it = should_update_map.find(info.object); + if (it == should_update_map.end()) + continue; /// Object has been just loaded (it wasn't loaded while we were building the map `should_update_map`), so we don't have to reload it right now. - bool is_modified_flag = it->second; - if (!is_modified_flag) + bool should_update_flag = it->second; + if (!should_update_flag) { - /// Object wasn't modified so we only have to set `next_update_time`. info.next_update_time = calculateNextUpdateTime(info.object, info.error_count); continue; } - /// Object was modified and should be reloaded. + /// Object was modified or it was failed to reload last time, so it should be reloaded. startLoading(name, info); } else if (info.failed()) @@ -633,6 +636,7 @@ private: bool loading() const { return loading_id != 0; } bool wasLoading() const { return loaded() || failed() || loading(); } bool ready() const { return (loaded() || failed()) && !forced_to_reload; } + bool failedToReload() const { return loaded() && exception != nullptr; } Status status() const { @@ -874,8 +878,7 @@ private: { if (next_update_time == TimePoint::max()) return String(); - return ", next update is scheduled at " - + DateLUT::instance().timeToString(std::chrono::system_clock::to_time_t(next_update_time)); + return ", next update is scheduled at " + ext::to_string(next_update_time); }; if (previous_version) tryLogException(new_exception, log, "Could not update " + type_name + " '" + name + "'" diff --git a/dbms/src/Interpreters/IInterpreter.h b/dbms/src/Interpreters/IInterpreter.h index e1090061cf3..e2248a7ec7a 100644 --- a/dbms/src/Interpreters/IInterpreter.h +++ b/dbms/src/Interpreters/IInterpreter.h @@ -22,6 +22,9 @@ public: virtual bool canExecuteWithProcessors() const { return false; } + virtual bool ignoreQuota() const { return false; } + virtual bool ignoreLimits() const { return false; } + virtual ~IInterpreter() {} }; diff --git a/dbms/src/Interpreters/InterpreterCreateQuotaQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuotaQuery.cpp new file mode 100644 index 00000000000..0dd81f5cb27 --- /dev/null +++ b/dbms/src/Interpreters/InterpreterCreateQuotaQuery.cpp @@ -0,0 +1,121 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +BlockIO InterpreterCreateQuotaQuery::execute() +{ + context.checkQuotaManagementIsAllowed(); + const auto & query = query_ptr->as(); + auto & access_control = context.getAccessControlManager(); + + if (query.alter) + { + auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr + { + auto updated_quota = typeid_cast>(entity->clone()); + updateQuotaFromQuery(*updated_quota, query); + return updated_quota; + }; + if (query.if_exists) + { + if (auto id = access_control.find(query.name)) + access_control.tryUpdate(*id, update_func); + } + else + access_control.update(access_control.getID(query.name), update_func); + } + else + { + auto new_quota = std::make_shared(); + updateQuotaFromQuery(*new_quota, query); + + if (query.if_not_exists) + access_control.tryInsert(new_quota); + else if (query.or_replace) + access_control.insertOrReplace(new_quota); + else + access_control.insert(new_quota); + } + + return {}; +} + + +void InterpreterCreateQuotaQuery::updateQuotaFromQuery(Quota & quota, const ASTCreateQuotaQuery & query) +{ + if (query.alter) + { + if (!query.new_name.empty()) + quota.setName(query.new_name); + } + else + quota.setName(query.name); + + if (query.key_type) + quota.key_type = *query.key_type; + + auto & quota_all_limits = quota.all_limits; + for (const auto & query_limits : query.all_limits) + { + auto duration = query_limits.duration; + + auto it = boost::range::find_if(quota_all_limits, [&](const Quota::Limits & x) { return x.duration == duration; }); + if (query_limits.unset_tracking) + { + if (it != quota_all_limits.end()) + quota_all_limits.erase(it); + continue; + } + + if (it == quota_all_limits.end()) + { + /// We keep `all_limits` sorted by duration. + it = quota_all_limits.insert( + boost::range::upper_bound( + quota_all_limits, + duration, + [](const std::chrono::seconds & lhs, const Quota::Limits & rhs) { return lhs < rhs.duration; }), + Quota::Limits{}); + it->duration = duration; + } + + auto & quota_limits = *it; + quota_limits.randomize_interval = query_limits.randomize_interval; + for (auto resource_type : ext::range(Quota::MAX_RESOURCE_TYPE)) + { + if (query_limits.max[resource_type]) + quota_limits.max[resource_type] = *query_limits.max[resource_type]; + } + } + + if (query.roles) + { + const auto & query_roles = *query.roles; + + /// We keep `roles` sorted. + quota.roles = query_roles.roles; + if (query_roles.current_user) + quota.roles.push_back(context.getClientInfo().current_user); + boost::range::sort(quota.roles); + quota.roles.erase(std::unique(quota.roles.begin(), quota.roles.end()), quota.roles.end()); + + quota.all_roles = query_roles.all_roles; + + /// We keep `except_roles` sorted. + quota.except_roles = query_roles.except_roles; + if (query_roles.except_current_user) + quota.except_roles.push_back(context.getClientInfo().current_user); + boost::range::sort(quota.except_roles); + quota.except_roles.erase(std::unique(quota.except_roles.begin(), quota.except_roles.end()), quota.except_roles.end()); + } +} +} diff --git a/dbms/src/Interpreters/InterpreterCreateQuotaQuery.h b/dbms/src/Interpreters/InterpreterCreateQuotaQuery.h new file mode 100644 index 00000000000..bbf91bbe1d3 --- /dev/null +++ b/dbms/src/Interpreters/InterpreterCreateQuotaQuery.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class ASTCreateQuotaQuery; +struct Quota; + + +class InterpreterCreateQuotaQuery : public IInterpreter +{ +public: + InterpreterCreateQuotaQuery(const ASTPtr & query_ptr_, Context & context_) : query_ptr(query_ptr_), context(context_) {} + + BlockIO execute() override; + + bool ignoreQuota() const override { return true; } + bool ignoreLimits() const override { return true; } + +private: + void updateQuotaFromQuery(Quota & quota, const ASTCreateQuotaQuery & query); + + ASTPtr query_ptr; + Context & context; +}; +} diff --git a/dbms/src/Interpreters/InterpreterDropAccessEntityQuery.cpp b/dbms/src/Interpreters/InterpreterDropAccessEntityQuery.cpp new file mode 100644 index 00000000000..7f18084038c --- /dev/null +++ b/dbms/src/Interpreters/InterpreterDropAccessEntityQuery.cpp @@ -0,0 +1,31 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ +BlockIO InterpreterDropAccessEntityQuery::execute() +{ + const auto & query = query_ptr->as(); + auto & access_control = context.getAccessControlManager(); + using Kind = ASTDropAccessEntityQuery::Kind; + + switch (query.kind) + { + case Kind::QUOTA: + { + context.checkQuotaManagementIsAllowed(); + if (query.if_exists) + access_control.tryRemove(access_control.find(query.names)); + else + access_control.remove(access_control.getIDs(query.names)); + return {}; + } + } + + __builtin_unreachable(); +} +} diff --git a/dbms/src/Interpreters/InterpreterDropAccessEntityQuery.h b/dbms/src/Interpreters/InterpreterDropAccessEntityQuery.h new file mode 100644 index 00000000000..2a0e749b265 --- /dev/null +++ b/dbms/src/Interpreters/InterpreterDropAccessEntityQuery.h @@ -0,0 +1,20 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class InterpreterDropAccessEntityQuery : public IInterpreter +{ +public: + InterpreterDropAccessEntityQuery(const ASTPtr & query_ptr_, Context & context_) : query_ptr(query_ptr_), context(context_) {} + + BlockIO execute() override; + +private: + ASTPtr query_ptr; + Context & context; +}; +} diff --git a/dbms/src/Interpreters/InterpreterFactory.cpp b/dbms/src/Interpreters/InterpreterFactory.cpp index d27c9c8baeb..33e9da95dfc 100644 --- a/dbms/src/Interpreters/InterpreterFactory.cpp +++ b/dbms/src/Interpreters/InterpreterFactory.cpp @@ -1,6 +1,8 @@ #include #include #include +#include +#include #include #include #include @@ -9,7 +11,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -19,8 +23,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -31,8 +37,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -187,6 +195,22 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, Context & { return std::make_unique(query, context); } + else if (query->as()) + { + return std::make_unique(query, context); + } + else if (query->as()) + { + return std::make_unique(query, context); + } + else if (query->as()) + { + return std::make_unique(query, context); + } + else if (query->as()) + { + return std::make_unique(query, context); + } else throw Exception("Unknown type of query: " + query->getID(), ErrorCodes::UNKNOWN_TYPE_OF_QUERY); } diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 0f3d5d82f96..d4565f6894d 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -419,6 +419,17 @@ InterpreterSelectQuery::InterpreterSelectQuery( /// null non-const columns to avoid useless memory allocations. However, a valid block sample /// requires all columns to be of size 0, thus we need to sanitize the block here. sanitizeBlock(result_header); + + /// Remove limits for some tables in the `system` database. + if (storage && (storage->getDatabaseName() == "system")) + { + String table_name = storage->getTableName(); + if ((table_name == "quotas") || (table_name == "quota_usage") || (table_name == "one")) + { + options.ignore_quota = true; + options.ignore_limits = true; + } + } } @@ -1776,14 +1787,14 @@ void InterpreterSelectQuery::executeFetchColumns( limits.speed_limits.timeout_before_checking_execution_speed = settings.timeout_before_checking_execution_speed; } - QuotaForIntervals & quota = context->getQuota(); + auto quota = context->getQuota(); for (auto & stream : streams) { if (!options.ignore_limits) stream->setLimits(limits); - if (options.to_stage == QueryProcessingStage::Complete) + if (!options.ignore_quota && (options.to_stage == QueryProcessingStage::Complete)) stream->setQuota(quota); } @@ -1793,7 +1804,7 @@ void InterpreterSelectQuery::executeFetchColumns( if (!options.ignore_limits) pipe.setLimits(limits); - if (options.to_stage == QueryProcessingStage::Complete) + if (!options.ignore_quota && (options.to_stage == QueryProcessingStage::Complete)) pipe.setQuota(quota); } } diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index 083a4ebe680..6b95d7aeea7 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -74,6 +74,9 @@ public: QueryPipeline executeWithProcessors() override; bool canExecuteWithProcessors() const override { return true; } + bool ignoreLimits() const override { return options.ignore_limits; } + bool ignoreQuota() const override { return options.ignore_quota; } + Block getSampleBlock(); void ignoreWithTotals(); @@ -260,7 +263,7 @@ private: */ void initSettings(); - const SelectQueryOptions options; + SelectQueryOptions options; ASTPtr query_ptr; std::shared_ptr context; SyntaxAnalyzerResultPtr syntax_analyzer_result; diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index 2d7fde72875..e76f2668d3d 100644 --- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -107,6 +107,19 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( result_header = getCommonHeaderForUnion(headers); } + + /// InterpreterSelectWithUnionQuery ignores limits if all nested interpreters ignore limits. + bool all_nested_ignore_limits = true; + bool all_nested_ignore_quota = true; + for (auto & interpreter : nested_interpreters) + { + if (!interpreter->ignoreLimits()) + all_nested_ignore_limits = false; + if (!interpreter->ignoreQuota()) + all_nested_ignore_quota = false; + } + options.ignore_limits |= all_nested_ignore_limits; + options.ignore_quota |= all_nested_ignore_quota; } diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h index 4e425d260e6..e18627fec2a 100644 --- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h @@ -34,6 +34,9 @@ public: QueryPipeline executeWithProcessors() override; bool canExecuteWithProcessors() const override { return true; } + bool ignoreLimits() const override { return options.ignore_limits; } + bool ignoreQuota() const override { return options.ignore_quota; } + Block getSampleBlock(); static Block getSampleBlock( @@ -45,7 +48,7 @@ public: ASTPtr getQuery() const { return query_ptr; } private: - const SelectQueryOptions options; + SelectQueryOptions options; ASTPtr query_ptr; std::shared_ptr context; diff --git a/dbms/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp b/dbms/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp new file mode 100644 index 00000000000..d0ef8992691 --- /dev/null +++ b/dbms/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp @@ -0,0 +1,89 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +BlockIO InterpreterShowCreateAccessEntityQuery::execute() +{ + BlockIO res; + res.in = executeImpl(); + return res; +} + + +BlockInputStreamPtr InterpreterShowCreateAccessEntityQuery::executeImpl() +{ + const auto & show_query = query_ptr->as(); + + /// Build a create query. + ASTPtr create_query = getCreateQuotaQuery(show_query); + + /// Build the result column. + std::stringstream create_query_ss; + formatAST(*create_query, create_query_ss, false, true); + String create_query_str = create_query_ss.str(); + MutableColumnPtr column = ColumnString::create(); + column->insert(create_query_str); + + /// Prepare description of the result column. + std::stringstream desc_ss; + formatAST(show_query, desc_ss, false, true); + String desc = desc_ss.str(); + String prefix = "SHOW "; + if (startsWith(desc, prefix)) + desc = desc.substr(prefix.length()); /// `desc` always starts with "SHOW ", so we can trim this prefix. + + return std::make_shared(Block{{std::move(column), std::make_shared(), desc}}); +} + + +ASTPtr InterpreterShowCreateAccessEntityQuery::getCreateQuotaQuery(const ASTShowCreateAccessEntityQuery & show_query) const +{ + auto & access_control = context.getAccessControlManager(); + + QuotaPtr quota; + if (show_query.current_quota) + quota = access_control.read(context.getQuota()->getUsageInfo().quota_id); + else + quota = access_control.read(show_query.name); + + auto create_query = std::make_shared(); + create_query->name = quota->getName(); + create_query->key_type = quota->key_type; + create_query->all_limits.reserve(quota->all_limits.size()); + + for (const auto & limits : quota->all_limits) + { + ASTCreateQuotaQuery::Limits create_query_limits; + create_query_limits.duration = limits.duration; + create_query_limits.randomize_interval = limits.randomize_interval; + for (auto resource_type : ext::range(Quota::MAX_RESOURCE_TYPE)) + if (limits.max[resource_type]) + create_query_limits.max[resource_type] = limits.max[resource_type]; + create_query->all_limits.push_back(create_query_limits); + } + + if (!quota->roles.empty() || quota->all_roles) + { + auto create_query_roles = std::make_shared(); + create_query_roles->roles = quota->roles; + create_query_roles->all_roles = quota->all_roles; + create_query_roles->except_roles = quota->except_roles; + create_query->roles = std::move(create_query_roles); + } + + return create_query; +} +} diff --git a/dbms/src/Interpreters/InterpreterShowCreateAccessEntityQuery.h b/dbms/src/Interpreters/InterpreterShowCreateAccessEntityQuery.h new file mode 100644 index 00000000000..94b06dadb19 --- /dev/null +++ b/dbms/src/Interpreters/InterpreterShowCreateAccessEntityQuery.h @@ -0,0 +1,35 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class Context; +class ASTShowCreateAccessEntityQuery; + + +/** Returns a single item containing a statement which could be used to create a specified role. + */ +class InterpreterShowCreateAccessEntityQuery : public IInterpreter +{ +public: + InterpreterShowCreateAccessEntityQuery(const ASTPtr & query_ptr_, const Context & context_) + : query_ptr(query_ptr_), context(context_) {} + + BlockIO execute() override; + + bool ignoreQuota() const override { return true; } + bool ignoreLimits() const override { return true; } + +private: + ASTPtr query_ptr; + const Context & context; + + BlockInputStreamPtr executeImpl(); + ASTPtr getCreateQuotaQuery(const ASTShowCreateAccessEntityQuery & show_query) const; +}; + + +} diff --git a/dbms/src/Interpreters/InterpreterShowQuotasQuery.cpp b/dbms/src/Interpreters/InterpreterShowQuotasQuery.cpp new file mode 100644 index 00000000000..73653e26781 --- /dev/null +++ b/dbms/src/Interpreters/InterpreterShowQuotasQuery.cpp @@ -0,0 +1,73 @@ +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +InterpreterShowQuotasQuery::InterpreterShowQuotasQuery(const ASTPtr & query_ptr_, Context & context_) + : query_ptr(query_ptr_), context(context_) +{ +} + + +String InterpreterShowQuotasQuery::getRewrittenQuery() +{ + const auto & query = query_ptr->as(); + + /// Transform the query into some kind of "SELECT from system.quotas" query. + String expr; + String filter; + String table_name; + String order_by; + if (query.usage) + { + expr = "name || ' key=\\'' || key || '\\'' || if(isNull(end_of_interval), '', ' interval=[' || " + "toString(end_of_interval - duration) || ' .. ' || " + "toString(end_of_interval) || ']'"; + for (auto resource_type : ext::range_with_static_cast(Quota::MAX_RESOURCE_TYPE)) + { + String column_name = Quota::resourceTypeToColumnName(resource_type); + expr += String{" || ' "} + column_name + "=' || toString(" + column_name + ")"; + expr += String{" || if(max_"} + column_name + "=0, '', '/' || toString(max_" + column_name + "))"; + } + expr += ")"; + + if (query.current) + filter = "(id = currentQuotaID()) AND (key = currentQuotaKey())"; + + table_name = "system.quota_usage"; + order_by = "name, key, duration"; + } + else + { + expr = "name"; + table_name = "system.quotas"; + order_by = "name"; + } + + /// Prepare description of the result column. + std::stringstream ss; + formatAST(query, ss, false, true); + String desc = ss.str(); + String prefix = "SHOW "; + if (startsWith(desc, prefix)) + desc = desc.substr(prefix.length()); /// `desc` always starts with "SHOW ", so we can trim this prefix. + + /// Build a new query. + return "SELECT " + expr + " AS " + backQuote(desc) + " FROM " + table_name + (filter.empty() ? "" : (" WHERE " + filter)) + + (order_by.empty() ? "" : (" ORDER BY " + order_by)); +} + + +BlockIO InterpreterShowQuotasQuery::execute() +{ + return executeQuery(getRewrittenQuery(), context, true); +} + +} diff --git a/dbms/src/Interpreters/InterpreterShowQuotasQuery.h b/dbms/src/Interpreters/InterpreterShowQuotasQuery.h new file mode 100644 index 00000000000..ae608e81ce5 --- /dev/null +++ b/dbms/src/Interpreters/InterpreterShowQuotasQuery.h @@ -0,0 +1,28 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class Context; + +class InterpreterShowQuotasQuery : public IInterpreter +{ +public: + InterpreterShowQuotasQuery(const ASTPtr & query_ptr_, Context & context_); + + BlockIO execute() override; + + bool ignoreQuota() const override { return true; } + bool ignoreLimits() const override { return true; } + +private: + ASTPtr query_ptr; + Context & context; + + String getRewrittenQuery(); +}; + +} diff --git a/dbms/src/Interpreters/InterpreterSystemQuery.cpp b/dbms/src/Interpreters/InterpreterSystemQuery.cpp index 664efca90f9..c742ac37a5f 100644 --- a/dbms/src/Interpreters/InterpreterSystemQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSystemQuery.cpp @@ -121,7 +121,9 @@ void startStopAction(Context & context, ASTSystemQuery & query, StorageActionBlo InterpreterSystemQuery::InterpreterSystemQuery(const ASTPtr & query_ptr_, Context & context_) - : query_ptr(query_ptr_->clone()), context(context_), log(&Poco::Logger::get("InterpreterSystemQuery")) {} + : query_ptr(query_ptr_->clone()), context(context_), log(&Poco::Logger::get("InterpreterSystemQuery")) +{ +} BlockIO InterpreterSystemQuery::execute() diff --git a/dbms/src/Interpreters/InterpreterSystemQuery.h b/dbms/src/Interpreters/InterpreterSystemQuery.h index 31945745c1e..97ff9d348e6 100644 --- a/dbms/src/Interpreters/InterpreterSystemQuery.h +++ b/dbms/src/Interpreters/InterpreterSystemQuery.h @@ -20,6 +20,9 @@ public: BlockIO execute() override; + bool ignoreQuota() const override { return true; } + bool ignoreLimits() const override { return true; } + private: ASTPtr query_ptr; Context & context; diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index fa982a1d5b2..df0afee3eb9 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -193,10 +193,10 @@ static const IColumn * extractAsofColumn(const ColumnRawPtrs & key_columns) return key_columns.back(); } -template +template static KeyGetter createKeyGetter(const ColumnRawPtrs & key_columns, const Sizes & key_sizes) { - if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof) + if constexpr (is_asof_join) { auto key_column_copy = key_columns; auto key_size_copy = key_sizes; @@ -360,28 +360,19 @@ void Join::setSampleBlock(const Block & block) namespace { /// Inserting an element into a hash table of the form `key -> reference to a string`, which will then be used by JOIN. - template + template struct Inserter { - static void insert(const Join &, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool); - }; - - template - struct Inserter - { - static ALWAYS_INLINE void insert(const Join & join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool) + static ALWAYS_INLINE void insertOne(const Join & join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, + Arena & pool) { auto emplace_result = key_getter.emplaceKey(map, i, pool); if (emplace_result.isInserted() || join.anyTakeLastRow()) new (&emplace_result.getMapped()) typename Map::mapped_type(stored_block, i); } - }; - template - struct Inserter - { - static ALWAYS_INLINE void insert(const Join &, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool) + static ALWAYS_INLINE void insertAll(const Join &, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool) { auto emplace_result = key_getter.emplaceKey(map, i, pool); @@ -393,13 +384,9 @@ namespace emplace_result.getMapped().insert({stored_block, i}, pool); } } - }; - template - struct Inserter - { - static ALWAYS_INLINE void insert(Join & join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool, - const IColumn * asof_column) + static ALWAYS_INLINE void insertAsof(Join & join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool, + const IColumn * asof_column) { auto emplace_result = key_getter.emplaceKey(map, i, pool); typename Map::mapped_type * time_series_map = &emplace_result.getMapped(); @@ -416,21 +403,27 @@ namespace Join & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool) { + constexpr bool mapped_one = std::is_same_v || + std::is_same_v; + constexpr bool is_asof_join = STRICTNESS == ASTTableJoin::Strictness::Asof; + const IColumn * asof_column [[maybe_unused]] = nullptr; - if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof) + if constexpr (is_asof_join) asof_column = extractAsofColumn(key_columns); - auto key_getter = createKeyGetter(key_columns, key_sizes); + auto key_getter = createKeyGetter(key_columns, key_sizes); for (size_t i = 0; i < rows; ++i) { if (has_null_map && (*null_map)[i]) continue; - if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof) - Inserter::insert(join, map, key_getter, stored_block, i, pool, asof_column); + if constexpr (is_asof_join) + Inserter::insertAsof(join, map, key_getter, stored_block, i, pool, asof_column); + else if constexpr (mapped_one) + Inserter::insertOne(join, map, key_getter, stored_block, i, pool); else - Inserter::insert(join, map, key_getter, stored_block, i, pool); + Inserter::insertAll(join, map, key_getter, stored_block, i, pool); } } @@ -508,7 +501,7 @@ void Join::initRightBlockStructure() JoinCommon::convertColumnsToNullable(saved_block_sample, (isFull(kind) ? right_table_keys.columns() : 0)); } -Block * Join::storeRightBlock(const Block & source_block) +Block Join::structureRightBlock(const Block & source_block) const { /// Rare case, when joined columns are constant. To avoid code bloat, simply materialize them. Block block = materializeBlock(source_block); @@ -522,14 +515,11 @@ Block * Join::storeRightBlock(const Block & source_block) structured_block.insert(column); } - blocks.push_back(structured_block); - return &blocks.back(); + return structured_block; } bool Join::addJoinedBlock(const Block & block) { - std::unique_lock lock(rwlock); - if (empty()) throw Exception("Logical error: Join was not initialized", ErrorCodes::LOGICAL_ERROR); @@ -541,32 +531,45 @@ bool Join::addJoinedBlock(const Block & block) ConstNullMapPtr null_map{}; ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map); - size_t rows = block.rows(); - if (rows) - has_no_rows_in_maps = false; - - Block * stored_block = storeRightBlock(block); - - if (kind != ASTTableJoin::Kind::Cross) - { - joinDispatch(kind, strictness, maps, [&](auto, auto strictness_, auto & map) - { - insertFromBlockImpl(*this, type, map, rows, key_columns, key_sizes, stored_block, null_map, pool); - }); - } - /// If RIGHT or FULL save blocks with nulls for NonJoinedBlockInputStream + UInt8 save_nullmap = 0; if (isRightOrFull(kind) && null_map) { - UInt8 has_null = 0; - for (size_t i = 0; !has_null && i < null_map->size(); ++i) - has_null |= (*null_map)[i]; - - if (has_null) - blocks_nullmaps.emplace_back(stored_block, null_map_holder); + for (size_t i = 0; !save_nullmap && i < null_map->size(); ++i) + save_nullmap |= (*null_map)[i]; } - return table_join->sizeLimits().check(getTotalRowCount(), getTotalByteCount(), "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED); + Block structured_block = structureRightBlock(block); + size_t total_rows = 0; + size_t total_bytes = 0; + + { + std::unique_lock lock(rwlock); + + blocks.emplace_back(std::move(structured_block)); + Block * stored_block = &blocks.back(); + + size_t rows = block.rows(); + if (rows) + has_no_rows_in_maps = false; + + if (kind != ASTTableJoin::Kind::Cross) + { + joinDispatch(kind, strictness, maps, [&](auto, auto strictness_, auto & map) + { + insertFromBlockImpl(*this, type, map, rows, key_columns, key_sizes, stored_block, null_map, pool); + }); + } + + if (save_nullmap) + blocks_nullmaps.emplace_back(stored_block, null_map_holder); + + /// TODO: Do not calculate them every time + total_rows = getTotalRowCount(); + total_bytes = getTotalByteCount(); + } + + return table_join->sizeLimits().check(total_rows, total_bytes, "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED); } @@ -582,7 +585,15 @@ public: const Block & block_with_columns_to_add, const Block & block, const Block & saved_block_sample, - const ColumnsWithTypeAndName & extras) + const ColumnsWithTypeAndName & extras, + const Join & join_, + const ColumnRawPtrs & key_columns_, + const Sizes & key_sizes_) + : join(join_) + , key_columns(key_columns_) + , key_sizes(key_sizes_) + , rows_to_add(block.rows()) + , need_filter(false) { size_t num_columns_to_add = sample_block_with_columns_to_add.columns(); @@ -613,23 +624,43 @@ public: return ColumnWithTypeAndName(std::move(columns[i]), type_name[i].first, type_name[i].second); } + template void appendFromBlock(const Block & block, size_t row_num) { + if constexpr (has_defaults) + applyLazyDefaults(); + for (size_t j = 0; j < right_indexes.size(); ++j) columns[j]->insertFrom(*block.getByPosition(right_indexes[j]).column, row_num); } - void appendDefaultRow() { - for (size_t j = 0; j < right_indexes.size(); ++j) - columns[j]->insertDefault(); + ++lazy_defaults_count; } + void applyLazyDefaults() + { + if (lazy_defaults_count) + { + for (size_t j = 0; j < right_indexes.size(); ++j) + columns[j]->insertManyDefaults(lazy_defaults_count); + lazy_defaults_count = 0; + } + } + + const Join & join; + const ColumnRawPtrs & key_columns; + const Sizes & key_sizes; + size_t rows_to_add; + std::unique_ptr offsets_to_replicate; + bool need_filter; + private: TypeAndNames type_name; MutableColumns columns; std::vector right_indexes; + size_t lazy_defaults_count = 0; void addColumn(const ColumnWithTypeAndName & src_column) { @@ -639,131 +670,190 @@ private: } }; -template -void addFoundRow(const typename Map::mapped_type & mapped, AddedColumns & added, IColumn::Offset & current_offset [[maybe_unused]]) +template +void addFoundRowAll(const typename Map::mapped_type & mapped, AddedColumns & added, IColumn::Offset & current_offset) { - if constexpr (STRICTNESS == ASTTableJoin::Strictness::Any) - { - added.appendFromBlock(*mapped.block, mapped.row_num); - } + if constexpr (add_missing) + added.applyLazyDefaults(); - if constexpr (STRICTNESS == ASTTableJoin::Strictness::All) + for (auto it = mapped.begin(); it.ok(); ++it) { - for (auto it = mapped.begin(); it.ok(); ++it) - { - added.appendFromBlock(*it->block, it->row_num); - ++current_offset; - } + added.appendFromBlock(*it->block, it->row_num); + ++current_offset; } }; -template +template void addNotFoundRow(AddedColumns & added [[maybe_unused]], IColumn::Offset & current_offset [[maybe_unused]]) { - if constexpr (_add_missing) + if constexpr (add_missing) { added.appendDefaultRow(); - ++current_offset; + if constexpr (need_offset) + ++current_offset; } } +template +void setUsed(IColumn::Filter & filter [[maybe_unused]], size_t pos [[maybe_unused]]) +{ + if constexpr (need_filter) + filter[pos] = 1; +} + /// Joins right table columns which indexes are present in right_indexes using specified map. /// Makes filter (1 if row presented in right table) and returns offsets to replicate (for ALL JOINS). -template -std::unique_ptr NO_INLINE joinRightIndexedColumns( - const Join & join, const Map & map, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, - AddedColumns & added_columns, ConstNullMapPtr null_map, IColumn::Filter & filter) +template +NO_INLINE IColumn::Filter joinRightColumns(const Map & map, AddedColumns & added_columns, const ConstNullMapPtr & null_map [[maybe_unused]]) { - std::unique_ptr offsets_to_replicate; - if constexpr (STRICTNESS == ASTTableJoin::Strictness::All) - offsets_to_replicate = std::make_unique(rows); + constexpr bool is_any_join = STRICTNESS == ASTTableJoin::Strictness::Any; + constexpr bool is_all_join = STRICTNESS == ASTTableJoin::Strictness::All; + constexpr bool is_asof_join = STRICTNESS == ASTTableJoin::Strictness::Asof; + constexpr bool is_semi_join = STRICTNESS == ASTTableJoin::Strictness::Semi; + constexpr bool is_anti_join = STRICTNESS == ASTTableJoin::Strictness::Anti; + constexpr bool left = KIND == ASTTableJoin::Kind::Left; + constexpr bool right = KIND == ASTTableJoin::Kind::Right; + constexpr bool full = KIND == ASTTableJoin::Kind::Full; + + constexpr bool add_missing = (left || full) && !is_semi_join; + constexpr bool need_replication = is_all_join || (is_any_join && right) || (is_semi_join && right); + + size_t rows = added_columns.rows_to_add; + IColumn::Filter filter; + if constexpr (need_filter) + filter = IColumn::Filter(rows, 0); Arena pool; - const IColumn * asof_column [[maybe_unused]] = nullptr; - if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof) - asof_column = extractAsofColumn(key_columns); - auto key_getter = createKeyGetter(key_columns, key_sizes); + if constexpr (need_replication) + added_columns.offsets_to_replicate = std::make_unique(rows); + const IColumn * asof_column [[maybe_unused]] = nullptr; + if constexpr (is_asof_join) + asof_column = extractAsofColumn(added_columns.key_columns); + + auto key_getter = createKeyGetter(added_columns.key_columns, added_columns.key_sizes); IColumn::Offset current_offset = 0; for (size_t i = 0; i < rows; ++i) { - if (_has_null_map && (*null_map)[i]) + if constexpr (has_null_map) { - addNotFoundRow<_add_missing>(added_columns, current_offset); + if ((*null_map)[i]) + { + addNotFoundRow(added_columns, current_offset); + + if constexpr (need_replication) + (*added_columns.offsets_to_replicate)[i] = current_offset; + continue; + } + } + + auto find_result = key_getter.findKey(map, i, pool); + + if (find_result.isFound()) + { + auto & mapped = find_result.getMapped(); + + if constexpr (is_asof_join) + { + const Join & join = added_columns.join; + if (const RowRef * found = mapped.findAsof(join.getAsofType(), join.getAsofInequality(), asof_column, i)) + { + setUsed(filter, i); + mapped.setUsed(); + added_columns.appendFromBlock(*found->block, found->row_num); + } + else + addNotFoundRow(added_columns, current_offset); + } + else if constexpr (is_all_join) + { + setUsed(filter, i); + mapped.setUsed(); + addFoundRowAll(mapped, added_columns, current_offset); + } + else if constexpr ((is_any_join || is_semi_join) && right) + { + /// Use first appered left key + it needs left columns replication + if (mapped.setUsedOnce()) + { + setUsed(filter, i); + addFoundRowAll(mapped, added_columns, current_offset); + } + } + else if constexpr (is_any_join && KIND == ASTTableJoin::Kind::Inner) + { + /// Use first appered left key only + if (mapped.setUsedOnce()) + { + setUsed(filter, i); + added_columns.appendFromBlock(*mapped.block, mapped.row_num); + } + } + else if constexpr (is_any_join && full) + { + /// TODO + } + else if constexpr (is_anti_join) + { + if constexpr (right) + mapped.setUsed(); + } + else /// ANY LEFT, SEMI LEFT, old ANY (RightAny) + { + setUsed(filter, i); + mapped.setUsed(); + added_columns.appendFromBlock(*mapped.block, mapped.row_num); + } } else { - auto find_result = key_getter.findKey(map, i, pool); - - if (find_result.isFound()) - { - auto & mapped = find_result.getMapped(); - - if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof) - { - if (const RowRef * found = mapped.findAsof(join.getAsofType(), join.getAsofInequality(), asof_column, i)) - { - filter[i] = 1; - mapped.setUsed(); - added_columns.appendFromBlock(*found->block, found->row_num); - } - else - addNotFoundRow<_add_missing>(added_columns, current_offset); - } - else - { - filter[i] = 1; - mapped.setUsed(); - addFoundRow(mapped, added_columns, current_offset); - } - } - else - addNotFoundRow<_add_missing>(added_columns, current_offset); + if constexpr (is_anti_join && left) + setUsed(filter, i); + addNotFoundRow(added_columns, current_offset); } - if constexpr (STRICTNESS == ASTTableJoin::Strictness::All) - (*offsets_to_replicate)[i] = current_offset; + if constexpr (need_replication) + (*added_columns.offsets_to_replicate)[i] = current_offset; } - return offsets_to_replicate; -} - -template -IColumn::Filter joinRightColumns( - const Join & join, const Map & map, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, - AddedColumns & added_columns, ConstNullMapPtr null_map, std::unique_ptr & offsets_to_replicate) -{ - constexpr bool left_or_full = static_in_v; - - IColumn::Filter filter(rows, 0); - - if (null_map) - offsets_to_replicate = joinRightIndexedColumns( - join, map, rows, key_columns, key_sizes, added_columns, null_map, filter); - else - offsets_to_replicate = joinRightIndexedColumns( - join, map, rows, key_columns, key_sizes, added_columns, null_map, filter); - + added_columns.applyLazyDefaults(); return filter; } +template +IColumn::Filter joinRightColumnsSwitchNullability(const Map & map, AddedColumns & added_columns, const ConstNullMapPtr & null_map) +{ + if (added_columns.need_filter) + { + if (null_map) + return joinRightColumns(map, added_columns, null_map); + else + return joinRightColumns(map, added_columns, nullptr); + } + else + { + if (null_map) + return joinRightColumns(map, added_columns, null_map); + else + return joinRightColumns(map, added_columns, nullptr); + } +} + template -IColumn::Filter switchJoinRightColumns( - Join::Type type, const Join & join, - const Maps & maps_, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, - AddedColumns & added_columns, ConstNullMapPtr null_map, - std::unique_ptr & offsets_to_replicate) +IColumn::Filter switchJoinRightColumns(const Maps & maps_, AddedColumns & added_columns, Join::Type type, const ConstNullMapPtr & null_map) { switch (type) { #define M(TYPE) \ case Join::Type::TYPE: \ - return joinRightColumns>::Type>(\ - join, *maps_.TYPE, rows, key_columns, key_sizes, added_columns, null_map, offsets_to_replicate); + return joinRightColumnsSwitchNullability>::Type>(\ + *maps_.TYPE, added_columns, null_map);\ + break; APPLY_FOR_JOIN_VARIANTS(M) #undef M @@ -782,6 +872,20 @@ void Join::joinBlockImpl( const Block & block_with_columns_to_add, const Maps & maps_) const { + constexpr bool is_any_join = STRICTNESS == ASTTableJoin::Strictness::Any; + constexpr bool is_all_join = STRICTNESS == ASTTableJoin::Strictness::All; + constexpr bool is_asof_join = STRICTNESS == ASTTableJoin::Strictness::Asof; + constexpr bool is_semi_join = STRICTNESS == ASTTableJoin::Strictness::Semi; + constexpr bool is_anti_join = STRICTNESS == ASTTableJoin::Strictness::Anti; + + constexpr bool left = KIND == ASTTableJoin::Kind::Left; + constexpr bool right = KIND == ASTTableJoin::Kind::Right; + constexpr bool inner = KIND == ASTTableJoin::Kind::Inner; + constexpr bool full = KIND == ASTTableJoin::Kind::Full; + + constexpr bool need_replication = is_all_join || (is_any_join && right) || (is_semi_join && right); + constexpr bool need_filter = !need_replication && (inner || right || (is_semi_join && left) || (is_anti_join && left)); + /// Rare case, when keys are constant. To avoid code bloat, simply materialize them. Columns materialized_columns; ColumnRawPtrs key_columns = JoinCommon::temporaryMaterializeColumns(block, key_names_left, materialized_columns); @@ -796,8 +900,7 @@ void Join::joinBlockImpl( * Because if they are constants, then in the "not joined" rows, they may have different values * - default values, which can differ from the values of these constants. */ - constexpr bool right_or_full = static_in_v; - if constexpr (right_or_full) + if constexpr (right || full) { materializeBlockInplace(block); @@ -811,25 +914,22 @@ void Join::joinBlockImpl( * For ASOF, the last column is used as the ASOF column */ ColumnsWithTypeAndName extras; - if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof) + if constexpr (is_asof_join) extras.push_back(right_table_keys.getByName(key_names_right.back())); - AddedColumns added(sample_block_with_columns_to_add, block_with_columns_to_add, block, saved_block_sample, extras); - std::unique_ptr offsets_to_replicate; + AddedColumns added_columns(sample_block_with_columns_to_add, block_with_columns_to_add, block, saved_block_sample, + extras, *this, key_columns, key_sizes); + bool has_required_right_keys = (required_right_keys.columns() != 0); + added_columns.need_filter = need_filter || has_required_right_keys; - IColumn::Filter row_filter = switchJoinRightColumns( - type, *this, maps_, block.rows(), key_columns, key_sizes, added, null_map, offsets_to_replicate); + IColumn::Filter row_filter = switchJoinRightColumns(maps_, added_columns, type, null_map); - for (size_t i = 0; i < added.size(); ++i) - block.insert(added.moveColumn(i)); - - /// Filter & insert missing rows - constexpr bool is_all_join = STRICTNESS == ASTTableJoin::Strictness::All; - constexpr bool inner_or_right = static_in_v; + for (size_t i = 0; i < added_columns.size(); ++i) + block.insert(added_columns.moveColumn(i)); std::vector right_keys_to_replicate [[maybe_unused]]; - if constexpr (!is_all_join && inner_or_right) + if constexpr (need_filter) { /// If ANY INNER | RIGHT JOIN - filter all the columns except the new ones. for (size_t i = 0; i < existing_columns; ++i) @@ -846,7 +946,7 @@ void Join::joinBlockImpl( block.insert(correctNullability({col.column, col.type, right_key.name}, is_nullable)); } } - else + else if (has_required_right_keys) { /// Some trash to represent IColumn::Filter as ColumnUInt8 needed for ColumnNullable::applyNullMap() auto null_map_filter_ptr = ColumnUInt8::create(); @@ -866,15 +966,14 @@ void Join::joinBlockImpl( ColumnPtr thin_column = filterWithBlanks(col.column, filter); block.insert(correctNullability({thin_column, col.type, right_key.name}, is_nullable, null_map_filter)); - if constexpr (is_all_join) + if constexpr (need_replication) right_keys_to_replicate.push_back(block.getPositionByName(right_key.name)); } } - if constexpr (is_all_join) + if constexpr (need_replication) { - if (!offsets_to_replicate) - throw Exception("No data to filter columns", ErrorCodes::LOGICAL_ERROR); + std::unique_ptr & offsets_to_replicate = added_columns.offsets_to_replicate; /// If ALL ... JOIN - we replicate all the columns except the new ones. for (size_t i = 0; i < existing_columns; ++i) @@ -964,7 +1063,7 @@ DataTypePtr Join::joinGetReturnType(const String & column_name) const template void Join::joinGetImpl(Block & block, const String & column_name, const Maps & maps_) const { - joinBlockImpl( + joinBlockImpl( block, {block.getByPosition(0).name}, {sample_block_with_columns_to_add.getByName(column_name)}, maps_); } @@ -981,9 +1080,10 @@ void Join::joinGet(Block & block, const String & column_name) const checkTypeOfKey(block, right_table_keys); - if (kind == ASTTableJoin::Kind::Left && strictness == ASTTableJoin::Strictness::Any) + if ((strictness == ASTTableJoin::Strictness::Any || strictness == ASTTableJoin::Strictness::RightAny) && + kind == ASTTableJoin::Kind::Left) { - joinGetImpl(block, column_name, std::get(maps)); + joinGetImpl(block, column_name, std::get(maps)); } else throw Exception("joinGet only supports StorageJoin of type Left Any", ErrorCodes::LOGICAL_ERROR); @@ -1017,50 +1117,44 @@ void Join::joinTotals(Block & block) const } -template -struct AdderNonJoined; - template -struct AdderNonJoined +struct AdderNonJoined { static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_right) { - for (size_t j = 0; j < columns_right.size(); ++j) + constexpr bool mapped_asof = std::is_same_v; + constexpr bool mapped_one = std::is_same_v || std::is_same_v; + + if constexpr (mapped_asof) { - const auto & mapped_column = mapped.block->getByPosition(j).column; - columns_right[j]->insertFrom(*mapped_column, mapped.row_num); + /// Do nothing } - - ++rows_added; - } -}; - -template -struct AdderNonJoined -{ - static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_right) - { - for (auto it = mapped.begin(); it.ok(); ++it) + else if constexpr (mapped_one) { for (size_t j = 0; j < columns_right.size(); ++j) { - const auto & mapped_column = it->block->getByPosition(j).column; - columns_right[j]->insertFrom(*mapped_column, it->row_num); + const auto & mapped_column = mapped.block->getByPosition(j).column; + columns_right[j]->insertFrom(*mapped_column, mapped.row_num); } ++rows_added; } + else + { + for (auto it = mapped.begin(); it.ok(); ++it) + { + for (size_t j = 0; j < columns_right.size(); ++j) + { + const auto & mapped_column = it->block->getByPosition(j).column; + columns_right[j]->insertFrom(*mapped_column, it->row_num); + } + + ++rows_added; + } + } } }; -template -struct AdderNonJoined -{ - static void add(const Mapped & /*mapped*/, size_t & /*rows_added*/, MutableColumns & /*columns_right*/) - { - // If we have a leftover match in the right hand side, not required to join because we are only support asof left/inner - } -}; /// Stream from not joined earlier rows of the right table. class NonJoinedBlockInputStream : public IBlockInputStream @@ -1269,10 +1363,11 @@ private: for (; it != end; ++it) { const Mapped & mapped = it->getMapped(); + if (mapped.getUsed()) continue; - AdderNonJoined::add(mapped, rows_added, columns_keys_and_right); + AdderNonJoined::add(mapped, rows_added, columns_keys_and_right); if (rows_added >= max_block_size) { @@ -1312,6 +1407,10 @@ private: BlockInputStreamPtr Join::createStreamWithNonJoinedRows(const Block & result_sample_block, UInt64 max_block_size) const { + if (table_join->strictness() == ASTTableJoin::Strictness::Asof || + table_join->strictness() == ASTTableJoin::Strictness::Semi) + return {}; + if (isRightOrFull(table_join->kind())) return std::make_shared(*this, result_sample_block, max_block_size); return {}; diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index eec52788ae0..ff46380db13 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -44,6 +44,16 @@ struct WithFlags : T mutable std::atomic used {}; void setUsed() const { used.store(true, std::memory_order_relaxed); } /// Could be set simultaneously from different threads. bool getUsed() const { return used; } + + bool setUsedOnce() const + { + /// fast check to prevent heavy CAS with seq_cst order + if (used.load(std::memory_order_relaxed)) + return false; + + bool expected = false; + return used.compare_exchange_strong(expected, true); + } }; template @@ -54,13 +64,14 @@ struct WithFlags : T void setUsed() const {} bool getUsed() const { return true; } + bool setUsedOnce() const { return true; } }; -using MappedAny = WithFlags; -using MappedAll = WithFlags; -using MappedAnyFull = WithFlags; -using MappedAllFull = WithFlags; -using MappedAsof = WithFlags; +using MappedOne = WithFlags; +using MappedAll = WithFlags; +using MappedOneFlagged = WithFlags; +using MappedAllFlagged = WithFlags; +using MappedAsof = WithFlags; } @@ -68,11 +79,23 @@ using MappedAsof = WithFlags; * It is just a hash table: keys -> rows of joined ("right") table. * Additionally, CROSS JOIN is supported: instead of hash table, it use just set of blocks without keys. * - * JOIN-s could be of nine types: ANY/ALL × LEFT/INNER/RIGHT/FULL, and also CROSS. + * JOIN-s could be of these types: + * - ALL × LEFT/INNER/RIGHT/FULL + * - ANY × LEFT/INNER/RIGHT + * - SEMI/ANTI x LEFT/RIGHT + * - ASOF x LEFT/INNER + * - CROSS * - * If ANY is specified - then select only one row from the "right" table, (first encountered row), even if there was more matching rows. - * If ALL is specified - usual JOIN, when rows are multiplied by number of matching rows from the "right" table. - * ANY is more efficient. + * ALL means usual JOIN, when rows are multiplied by number of matching rows from the "right" table. + * ANY uses one line per unique key from right talbe. For LEFT JOIN it would be any row (with needed joined key) from the right table, + * for RIGHT JOIN it would be any row from the left table and for INNER one it would be any row from right and any row from left. + * SEMI JOIN filter left table by keys that are present in right table for LEFT JOIN, and filter right table by keys from left table + * for RIGHT JOIN. In other words SEMI JOIN returns only rows which joining keys present in another table. + * ANTI JOIN is the same as SEMI JOIN but returns rows with joining keys that are NOT present in another table. + * SEMI/ANTI JOINs allow to get values from both tables. For filter table it gets any row with joining same key. For ANTI JOIN it returns + * defaults other table columns. + * ASOF JOIN is not-equi join. For one key column it finds nearest value to join according to join inequality. + * It's expected that ANY|SEMI LEFT JOIN is more efficient that ALL one. * * If INNER is specified - leave only rows that have matching rows from "right" table. * If LEFT is specified - in case when there is no matching row in "right" table, fill it with default values instead. @@ -264,13 +287,13 @@ public: } }; - using MapsAny = MapsTemplate; + using MapsOne = MapsTemplate; using MapsAll = MapsTemplate; - using MapsAnyFull = MapsTemplate; - using MapsAllFull = MapsTemplate; + using MapsOneFlagged = MapsTemplate; + using MapsAllFlagged = MapsTemplate; using MapsAsof = MapsTemplate; - using MapsVariant = std::variant; + using MapsVariant = std::variant; private: friend class NonJoinedBlockInputStream; @@ -341,8 +364,8 @@ private: */ void setSampleBlock(const Block & block); - /// Modify (structure) and save right block, @returns pointer to saved block - Block * storeRightBlock(const Block & stored_block); + /// Modify (structure) right block to save it in block list + Block structureRightBlock(const Block & stored_block) const; void initRightBlockStructure(); void initRequiredRightKeys(); diff --git a/dbms/src/Interpreters/Quota.cpp b/dbms/src/Interpreters/Quota.cpp deleted file mode 100644 index 5123f4fd3e8..00000000000 --- a/dbms/src/Interpreters/Quota.cpp +++ /dev/null @@ -1,345 +0,0 @@ -#include - -#include - -#include -#include -#include -#include - -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int QUOTA_EXPIRED; - extern const int QUOTA_DOESNT_ALLOW_KEYS; - extern const int UNKNOWN_QUOTA; -} - - -template -void QuotaValues::initFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config) -{ - queries = config.getUInt64(config_elem + ".queries", 0); - errors = config.getUInt64(config_elem + ".errors", 0); - result_rows = config.getUInt64(config_elem + ".result_rows", 0); - result_bytes = config.getUInt64(config_elem + ".result_bytes", 0); - read_rows = config.getUInt64(config_elem + ".read_rows", 0); - read_bytes = config.getUInt64(config_elem + ".read_bytes", 0); - execution_time_usec = config.getUInt64(config_elem + ".execution_time", 0) * 1000000ULL; -} - -template void QuotaValues::initFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config); -template void QuotaValues>::initFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config); - - -void QuotaForInterval::initFromConfig( - const String & config_elem, time_t duration_, bool randomize_, time_t offset_, const Poco::Util::AbstractConfiguration & config) -{ - rounded_time.store(0, std::memory_order_relaxed); - duration = duration_; - randomize = randomize_; - offset = offset_; - max.initFromConfig(config_elem, config); -} - -void QuotaForInterval::checkExceeded(time_t current_time, const String & quota_name, const String & user_name) -{ - updateTime(current_time); - check(max.queries, used.queries, quota_name, user_name, "Queries"); - check(max.errors, used.errors, quota_name, user_name, "Errors"); - check(max.result_rows, used.result_rows, quota_name, user_name, "Total result rows"); - check(max.result_bytes, used.result_bytes, quota_name, user_name, "Total result bytes"); - check(max.read_rows, used.read_rows, quota_name, user_name, "Total rows read"); - check(max.read_bytes, used.read_bytes, quota_name, user_name, "Total bytes read"); - check(max.execution_time_usec / 1000000, used.execution_time_usec / 1000000, quota_name, user_name, "Total execution time"); -} - -String QuotaForInterval::toString() const -{ - std::stringstream res; - - auto loaded_rounded_time = rounded_time.load(std::memory_order_relaxed); - - res << std::fixed << std::setprecision(3) - << "Interval: " << LocalDateTime(loaded_rounded_time) << " - " << LocalDateTime(loaded_rounded_time + duration) << ".\n" - << "Queries: " << used.queries << ".\n" - << "Errors: " << used.errors << ".\n" - << "Result rows: " << used.result_rows << ".\n" - << "Result bytes: " << used.result_bytes << ".\n" - << "Read rows: " << used.read_rows << ".\n" - << "Read bytes: " << used.read_bytes << ".\n" - << "Execution time: " << used.execution_time_usec / 1000000.0 << " sec.\n"; - - return res.str(); -} - -void QuotaForInterval::addQuery() noexcept -{ - ++used.queries; -} - -void QuotaForInterval::addError() noexcept -{ - ++used.errors; -} - -void QuotaForInterval::checkAndAddResultRowsBytes(time_t current_time, const String & quota_name, const String & user_name, size_t rows, size_t bytes) -{ - used.result_rows += rows; - used.result_bytes += bytes; - checkExceeded(current_time, quota_name, user_name); -} - -void QuotaForInterval::checkAndAddReadRowsBytes(time_t current_time, const String & quota_name, const String & user_name, size_t rows, size_t bytes) -{ - used.read_rows += rows; - used.read_bytes += bytes; - checkExceeded(current_time, quota_name, user_name); -} - -void QuotaForInterval::checkAndAddExecutionTime(time_t current_time, const String & quota_name, const String & user_name, Poco::Timespan amount) -{ - /// Information about internals of Poco::Timespan used. - used.execution_time_usec += amount.totalMicroseconds(); - checkExceeded(current_time, quota_name, user_name); -} - -void QuotaForInterval::updateTime(time_t current_time) -{ - /** If current time is greater than end of interval, - * then clear accumulated quota values and switch to next interval [rounded_time, rounded_time + duration). - */ - - auto loaded_rounded_time = rounded_time.load(std::memory_order_acquire); - while (true) - { - if (current_time < loaded_rounded_time + static_cast(duration)) - break; - - time_t new_rounded_time = (current_time - offset) / duration * duration + offset; - if (rounded_time.compare_exchange_strong(loaded_rounded_time, new_rounded_time)) - { - used.clear(); - break; - } - } -} - -void QuotaForInterval::check( - size_t max_amount, size_t used_amount, - const String & quota_name, const String & user_name, const char * resource_name) -{ - if (max_amount && used_amount > max_amount) - { - std::stringstream message; - message << "Quota for user '" << user_name << "' for "; - - if (duration == 3600) - message << "1 hour"; - else if (duration == 60) - message << "1 minute"; - else if (duration % 3600 == 0) - message << (duration / 3600) << " hours"; - else if (duration % 60 == 0) - message << (duration / 60) << " minutes"; - else - message << duration << " seconds"; - - message << " has been exceeded. " - << resource_name << ": " << used_amount << ", max: " << max_amount << ". " - << "Interval will end at " << LocalDateTime(rounded_time.load(std::memory_order_relaxed) + duration) << ". " - << "Name of quota template: '" << quota_name << "'."; - - throw Exception(message.str(), ErrorCodes::QUOTA_EXPIRED); - } -} - - -void QuotaForIntervals::initFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config, pcg64 & rng) -{ - Poco::Util::AbstractConfiguration::Keys config_keys; - config.keys(config_elem, config_keys); - - for (Poco::Util::AbstractConfiguration::Keys::const_iterator it = config_keys.begin(); it != config_keys.end(); ++it) - { - if (!startsWith(*it, "interval")) - continue; - - String interval_config_elem = config_elem + "." + *it; - time_t duration = config.getInt(interval_config_elem + ".duration", 0); - time_t offset = 0; - - if (!duration) /// Skip quotas with zero duration - continue; - - bool randomize = config.getBool(interval_config_elem + ".randomize", false); - if (randomize) - offset = std::uniform_int_distribution(0, duration - 1)(rng); - - cont[duration].initFromConfig(interval_config_elem, duration, randomize, offset, config); - } -} - -void QuotaForIntervals::setMax(const QuotaForIntervals & quota) -{ - for (Container::iterator it = cont.begin(); it != cont.end();) - { - if (quota.cont.count(it->first)) - ++it; - else - cont.erase(it++); - } - - for (auto & x : quota.cont) - { - if (!cont.count(x.first)) - cont.emplace(x.first, x.second); - else - cont[x.first].max = x.second.max; - } -} - -void QuotaForIntervals::checkExceeded(time_t current_time) -{ - for (Container::reverse_iterator it = cont.rbegin(); it != cont.rend(); ++it) - it->second.checkExceeded(current_time, quota_name, user_name); -} - -void QuotaForIntervals::addQuery() noexcept -{ - for (Container::reverse_iterator it = cont.rbegin(); it != cont.rend(); ++it) - it->second.addQuery(); -} - -void QuotaForIntervals::addError() noexcept -{ - for (Container::reverse_iterator it = cont.rbegin(); it != cont.rend(); ++it) - it->second.addError(); -} - -void QuotaForIntervals::checkAndAddResultRowsBytes(time_t current_time, size_t rows, size_t bytes) -{ - for (Container::reverse_iterator it = cont.rbegin(); it != cont.rend(); ++it) - it->second.checkAndAddResultRowsBytes(current_time, quota_name, user_name, rows, bytes); -} - -void QuotaForIntervals::checkAndAddReadRowsBytes(time_t current_time, size_t rows, size_t bytes) -{ - for (Container::reverse_iterator it = cont.rbegin(); it != cont.rend(); ++it) - it->second.checkAndAddReadRowsBytes(current_time, quota_name, user_name, rows, bytes); -} - -void QuotaForIntervals::checkAndAddExecutionTime(time_t current_time, Poco::Timespan amount) -{ - for (Container::reverse_iterator it = cont.rbegin(); it != cont.rend(); ++it) - it->second.checkAndAddExecutionTime(current_time, quota_name, user_name, amount); -} - -String QuotaForIntervals::toString() const -{ - std::stringstream res; - - for (Container::const_reverse_iterator it = cont.rbegin(); it != cont.rend(); ++it) - res << std::endl << it->second.toString(); - - return res.str(); -} - - -void Quota::loadFromConfig(const String & config_elem, const String & name_, const Poco::Util::AbstractConfiguration & config, pcg64 & rng) -{ - name = name_; - - bool new_keyed_by_ip = config.has(config_elem + ".keyed_by_ip"); - bool new_is_keyed = new_keyed_by_ip || config.has(config_elem + ".keyed"); - - if (new_is_keyed != is_keyed || new_keyed_by_ip != keyed_by_ip) - { - keyed_by_ip = new_keyed_by_ip; - is_keyed = new_is_keyed; - /// Meaning of keys has been changed. Throw away accumulated values. - quota_for_keys.clear(); - } - - ignore_key_if_not_keyed = config.has(config_elem + ".ignore_key_if_not_keyed"); - - QuotaForIntervals new_max(name, {}); - new_max.initFromConfig(config_elem, config, rng); - if (!new_max.hasEqualConfiguration(max)) - { - max = new_max; - for (auto & quota : quota_for_keys) - quota.second->setMax(max); - } -} - -QuotaForIntervalsPtr Quota::get(const String & quota_key, const String & user_name, const Poco::Net::IPAddress & ip) -{ - if (!quota_key.empty() && !ignore_key_if_not_keyed && (!is_keyed || keyed_by_ip)) - throw Exception("Quota " + name + " (for user " + user_name + ") doesn't allow client supplied keys.", - ErrorCodes::QUOTA_DOESNT_ALLOW_KEYS); - - /** Quota is calculated separately: - * - for each IP-address, if 'keyed_by_ip'; - * - otherwise for each 'quota_key', if present; - * - otherwise for each 'user_name'. - */ - - UInt64 quota_key_hashed = sipHash64( - keyed_by_ip - ? ip.toString() - : (!quota_key.empty() - ? quota_key - : user_name)); - - std::lock_guard lock(mutex); - - Container::iterator it = quota_for_keys.find(quota_key_hashed); - if (quota_for_keys.end() == it) - it = quota_for_keys.emplace(quota_key_hashed, std::make_shared(max, user_name)).first; - - return it->second; -} - - -void Quotas::loadFromConfig(const Poco::Util::AbstractConfiguration & config) -{ - pcg64 rng; - - Poco::Util::AbstractConfiguration::Keys config_keys; - config.keys("quotas", config_keys); - - /// Remove keys, that now absent in config. - std::set keys_set(config_keys.begin(), config_keys.end()); - for (Container::iterator it = cont.begin(); it != cont.end();) - { - if (keys_set.count(it->first)) - ++it; - else - cont.erase(it++); - } - - for (Poco::Util::AbstractConfiguration::Keys::const_iterator it = config_keys.begin(); it != config_keys.end(); ++it) - { - if (!cont.count(*it)) - cont.try_emplace(*it); - cont[*it].loadFromConfig("quotas." + *it, *it, config, rng); - } -} - -QuotaForIntervalsPtr Quotas::get(const String & name, const String & quota_key, const String & user_name, const Poco::Net::IPAddress & ip) -{ - Container::iterator it = cont.find(name); - if (cont.end() == it) - throw Exception("Unknown quota " + name, ErrorCodes::UNKNOWN_QUOTA); - - return it->second.get(quota_key, user_name, ip); -} - -} diff --git a/dbms/src/Interpreters/Quota.h b/dbms/src/Interpreters/Quota.h deleted file mode 100644 index c1fb3f143fb..00000000000 --- a/dbms/src/Interpreters/Quota.h +++ /dev/null @@ -1,263 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include - -#include -#include - -#include - -#include -#include -#include - - -namespace DB -{ - -/** Quota for resources consumption for specific interval. - * Used to limit resource usage by user. - * Quota is applied "softly" - could be slightly exceed, because it is checked usually only on each block of processed data. - * Accumulated values are not persisted and are lost on server restart. - * Quota is local to server, - * but for distributed queries, accumulated values for read rows and bytes - * are collected from all participating servers and accumulated locally. - */ - -/// Used both for maximum allowed values and for counters of current accumulated values. -template /// either size_t or std::atomic -struct QuotaValues -{ - /// Zero values (for maximums) means no limit. - Counter queries; /// Number of queries. - Counter errors; /// Number of queries with exceptions. - Counter result_rows; /// Number of rows returned as result. - Counter result_bytes; /// Number of bytes returned as result. - Counter read_rows; /// Number of rows read from tables. - Counter read_bytes; /// Number of bytes read from tables. - Counter execution_time_usec; /// Total amount of query execution time in microseconds. - - QuotaValues() - { - clear(); - } - - QuotaValues(const QuotaValues & rhs) - { - tuple() = rhs.tuple(); - } - - QuotaValues & operator=(const QuotaValues & rhs) - { - tuple() = rhs.tuple(); - return *this; - } - - void clear() - { - tuple() = std::make_tuple(0, 0, 0, 0, 0, 0, 0); - } - - void initFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config); - - bool operator== (const QuotaValues & rhs) const - { - return tuple() == rhs.tuple(); - } - -private: - auto tuple() - { - return std::forward_as_tuple(queries, errors, result_rows, result_bytes, read_rows, read_bytes, execution_time_usec); - } - - auto tuple() const - { - return std::make_tuple(queries, errors, result_rows, result_bytes, read_rows, read_bytes, execution_time_usec); - } -}; - -template <> -inline auto QuotaValues>::tuple() const -{ - return std::make_tuple( - queries.load(std::memory_order_relaxed), - errors.load(std::memory_order_relaxed), - result_rows.load(std::memory_order_relaxed), - result_bytes.load(std::memory_order_relaxed), - read_rows.load(std::memory_order_relaxed), - read_bytes.load(std::memory_order_relaxed), - execution_time_usec.load(std::memory_order_relaxed)); -} - - -/// Time, rounded down to start of interval; limits for that interval and accumulated values. -struct QuotaForInterval -{ - std::atomic rounded_time {0}; - size_t duration = 0; - bool randomize = false; - time_t offset = 0; /// Offset of interval for randomization (to avoid DoS if intervals for many users end at one time). - QuotaValues max; - QuotaValues> used; - - QuotaForInterval() = default; - QuotaForInterval(time_t duration_) : duration(duration_) {} - - void initFromConfig(const String & config_elem, time_t duration_, bool randomize_, time_t offset_, const Poco::Util::AbstractConfiguration & config); - - /// Increase current value. - void addQuery() noexcept; - void addError() noexcept; - - /// Check if quota is already exceeded. If that, throw an exception. - void checkExceeded(time_t current_time, const String & quota_name, const String & user_name); - - /// Check corresponding value. If exceeded, throw an exception. Otherwise, increase that value. - void checkAndAddResultRowsBytes(time_t current_time, const String & quota_name, const String & user_name, size_t rows, size_t bytes); - void checkAndAddReadRowsBytes(time_t current_time, const String & quota_name, const String & user_name, size_t rows, size_t bytes); - void checkAndAddExecutionTime(time_t current_time, const String & quota_name, const String & user_name, Poco::Timespan amount); - - /// Get a text, describing what quota is exceeded. - String toString() const; - - /// Only compare configuration, not accumulated (used) values or random offsets. - bool operator== (const QuotaForInterval & rhs) const - { - return randomize == rhs.randomize - && duration == rhs.duration - && max == rhs.max; - } - - QuotaForInterval & operator= (const QuotaForInterval & rhs) - { - rounded_time.store(rhs.rounded_time.load(std::memory_order_relaxed)); - duration = rhs.duration; - randomize = rhs.randomize; - offset = rhs.offset; - max = rhs.max; - used = rhs.used; - return *this; - } - - QuotaForInterval(const QuotaForInterval & rhs) - { - *this = rhs; - } - -private: - /// Reset counters of used resources, if interval for quota is expired. - void updateTime(time_t current_time); - void check(size_t max_amount, size_t used_amount, - const String & quota_name, const String & user_name, const char * resource_name); -}; - - -struct Quota; - -/// Length of interval -> quota: maximum allowed and currently accumulated values for that interval (example: 3600 -> values for current hour). -class QuotaForIntervals -{ -private: - /// While checking, will walk through intervals in order of decreasing size - from largest to smallest. - /// To report first about largest interval on what quota was exceeded. - using Container = std::map; - Container cont; - - std::string quota_name; - std::string user_name; /// user name is set only for current counters for user, not for object that contain maximum values (limits). - -public: - QuotaForIntervals(const std::string & quota_name_, const std::string & user_name_) - : quota_name(quota_name_), user_name(user_name_) {} - - QuotaForIntervals(const QuotaForIntervals & other, const std::string & user_name_) - : QuotaForIntervals(other) - { - user_name = user_name_; - } - - QuotaForIntervals() = default; - QuotaForIntervals(const QuotaForIntervals &) = default; - QuotaForIntervals & operator=(const QuotaForIntervals &) = default; - - /// Is there at least one interval for counting quota? - bool empty() const - { - return cont.empty(); - } - - void initFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config, pcg64 & rng); - - /// Set maximum values (limits) from passed argument. - /// Remove intervals that does not exist in argument. Add intervals from argument, that we don't have. - void setMax(const QuotaForIntervals & quota); - - void addQuery() noexcept; - void addError() noexcept; - - void checkExceeded(time_t current_time); - - void checkAndAddResultRowsBytes(time_t current_time, size_t rows, size_t bytes); - void checkAndAddReadRowsBytes(time_t current_time, size_t rows, size_t bytes); - void checkAndAddExecutionTime(time_t current_time, Poco::Timespan amount); - - /// Get text, describing what part of quota has been exceeded. - String toString() const; - - bool hasEqualConfiguration(const QuotaForIntervals & rhs) const - { - return cont == rhs.cont && quota_name == rhs.quota_name; - } -}; - -using QuotaForIntervalsPtr = std::shared_ptr; - - -/// Quota key -> quotas (max and current values) for intervals. If quota doesn't have keys, then values stored at key 0. -struct Quota -{ - using Container = std::unordered_map; - - String name; - - /// Maximum values from config. - QuotaForIntervals max; - /// Maximum and accumulated values for different keys. - /// For all keys, maximum values are the same and taken from 'max'. - Container quota_for_keys; - std::mutex mutex; - - bool is_keyed = false; - - /// If the quota is not keyed, but the user passed some key, ignore it instead of throwing exception. - /// For transitional periods, when you want to enable quota keys - /// - first, enable passing keys from your application, then make quota keyed in ClickHouse users config. - bool ignore_key_if_not_keyed = false; - - bool keyed_by_ip = false; - - void loadFromConfig(const String & config_elem, const String & name_, const Poco::Util::AbstractConfiguration & config, pcg64 & rng); - QuotaForIntervalsPtr get(const String & quota_key, const String & user_name, const Poco::Net::IPAddress & ip); -}; - - -class Quotas -{ -private: - /// Name of quota -> quota. - using Container = std::unordered_map; - Container cont; - -public: - void loadFromConfig(const Poco::Util::AbstractConfiguration & config); - QuotaForIntervalsPtr get(const String & name, const String & quota_key, - const String & user_name, const Poco::Net::IPAddress & ip); -}; - -} diff --git a/dbms/src/Interpreters/SelectQueryOptions.h b/dbms/src/Interpreters/SelectQueryOptions.h index 4fd94a830b8..a49245f5609 100644 --- a/dbms/src/Interpreters/SelectQueryOptions.h +++ b/dbms/src/Interpreters/SelectQueryOptions.h @@ -24,19 +24,16 @@ struct SelectQueryOptions { QueryProcessingStage::Enum to_stage; size_t subquery_depth; - bool only_analyze; - bool modify_inplace; - bool remove_duplicates; - bool ignore_limits; + bool only_analyze = false; + bool modify_inplace = false; + bool remove_duplicates = false; + bool ignore_quota = false; + bool ignore_limits = false; SelectQueryOptions(QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, size_t depth = 0) - : to_stage(stage) - , subquery_depth(depth) - , only_analyze(false) - , modify_inplace(false) - , remove_duplicates(false) - , ignore_limits(false) - {} + : to_stage(stage), subquery_depth(depth) + { + } SelectQueryOptions copy() const { return *this; } diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 935ac67808a..0c9355c924e 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -540,7 +540,7 @@ void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const } } -void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_default_strictness, ASTTableJoin & out_table_join) +void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_default_strictness, bool old_any, ASTTableJoin & out_table_join) { const ASTTablesInSelectQueryElement * node = select_query.join(); if (!node) @@ -560,6 +560,9 @@ void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_defaul DB::ErrorCodes::EXPECTED_ALL_OR_ANY); } + if (old_any && table_join.strictness == ASTTableJoin::Strictness::Any) + table_join.strictness = ASTTableJoin::Strictness::RightAny; + out_table_join = table_join; } @@ -628,13 +631,8 @@ void checkJoin(const ASTTablesInSelectQueryElement * join) const auto & table_join = join->table_join->as(); if (table_join.strictness == ASTTableJoin::Strictness::Any) - if (table_join.kind != ASTTableJoin::Kind::Left) - throw Exception("Old ANY INNER|RIGHT|FULL JOINs are disabled by default. Their logic would be changed. " - "Old logic is many-to-one for all kinds of ANY JOINs. It's equil to apply distinct for right table keys. " - "Default bahaviour is reserved for many-to-one LEFT JOIN, one-to-many RIGHT JOIN and one-to-one INNER JOIN. " - "It would be equal to apply distinct for keys to right, left and both tables respectively. " - "Set any_join_distinct_right_table_keys=1 to enable old bahaviour.", - ErrorCodes::NOT_IMPLEMENTED); + if (table_join.kind == ASTTableJoin::Kind::Full) + throw Exception("ANY FULL JOINs are not implemented.", ErrorCodes::NOT_IMPLEMENTED); } std::vector getAggregates(const ASTPtr & query) @@ -958,7 +956,8 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( /// Push the predicate expression down to the subqueries. result.rewrite_subqueries = PredicateExpressionsOptimizer(select_query, settings, context).optimize(); - setJoinStrictness(*select_query, settings.join_default_strictness, result.analyzed_join->table_join); + setJoinStrictness(*select_query, settings.join_default_strictness, settings.any_join_distinct_right_table_keys, + result.analyzed_join->table_join); collectJoinedColumns(*result.analyzed_join, *select_query, source_columns_set, result.aliases); } diff --git a/dbms/src/Interpreters/Users.cpp b/dbms/src/Interpreters/Users.cpp index 2ca2873e95b..e66b5119f84 100644 --- a/dbms/src/Interpreters/Users.cpp +++ b/dbms/src/Interpreters/Users.cpp @@ -49,7 +49,6 @@ User::User(const String & name_, const String & config_elem, const Poco::Util::A } profile = config.getString(config_elem + ".profile"); - quota = config.getString(config_elem + ".quota"); /// Fill list of allowed hosts. const auto config_networks = config_elem + ".networks"; @@ -130,7 +129,9 @@ User::User(const String & name_, const String & config_elem, const Poco::Util::A } } } + + if (config.has(config_elem + ".allow_quota_management")) + is_quota_management_allowed = config.getBool(config_elem + ".allow_quota_management"); } - } diff --git a/dbms/src/Interpreters/Users.h b/dbms/src/Interpreters/Users.h index e116772855a..6f9a47c4422 100644 --- a/dbms/src/Interpreters/Users.h +++ b/dbms/src/Interpreters/Users.h @@ -30,7 +30,6 @@ struct User Authentication authentication; String profile; - String quota; AllowedClientHosts allowed_client_hosts; @@ -48,6 +47,8 @@ struct User using DatabaseMap = std::unordered_map; DatabaseMap table_props; + bool is_quota_management_allowed = false; + User(const String & name_, const String & config_elem, const Poco::Util::AbstractConfiguration & config); }; diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 41c8e288ffe..2c6bf087f8d 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -24,7 +24,7 @@ #include -#include +#include #include #include #include @@ -150,7 +150,7 @@ static void logException(Context & context, QueryLogElement & elem) static void onExceptionBeforeStart(const String & query_for_logging, Context & context, time_t current_time) { /// Exception before the query execution. - context.getQuota().addError(); + context.getQuota()->used(Quota::ERRORS, 1, /* check_exceeded = */ false); const Settings & settings = context.getSettingsRef(); @@ -271,11 +271,6 @@ static std::tuple executeQueryImpl( /// Check the limits. checkASTSizeLimits(*ast, settings); - QuotaForIntervals & quota = context.getQuota(); - - quota.addQuery(); /// NOTE Seems that when new time interval has come, first query is not accounted in number of queries. - quota.checkExceeded(current_time); - /// Put query to process list. But don't put SHOW PROCESSLIST query itself. ProcessList::EntryPtr process_list_entry; if (!internal && !ast->as()) @@ -313,6 +308,21 @@ static std::tuple executeQueryImpl( auto interpreter = InterpreterFactory::get(ast, context, stage); bool use_processors = settings.experimental_use_processors && allow_processors && interpreter->canExecuteWithProcessors(); + QuotaContextPtr quota; + if (!interpreter->ignoreQuota()) + { + quota = context.getQuota(); + quota->used(Quota::QUERIES, 1); + quota->checkExceeded(Quota::ERRORS); + } + + IBlockInputStream::LocalLimits limits; + if (!interpreter->ignoreLimits()) + { + limits.mode = IBlockInputStream::LIMITS_CURRENT; + limits.size_limits = SizeLimits(settings.max_result_rows, settings.max_result_bytes, settings.result_overflow_mode); + } + if (use_processors) pipeline = interpreter->executeWithProcessors(); else @@ -339,17 +349,12 @@ static std::tuple executeQueryImpl( /// Hold element of process list till end of query execution. res.process_list_entry = process_list_entry; - IBlockInputStream::LocalLimits limits; - limits.mode = IBlockInputStream::LIMITS_CURRENT; - limits.size_limits = SizeLimits(settings.max_result_rows, settings.max_result_bytes, settings.result_overflow_mode); - if (use_processors) { - pipeline.setProgressCallback(context.getProgressCallback()); - pipeline.setProcessListElement(context.getProcessListElement()); - /// Limits on the result, the quota on the result, and also callback for progress. /// Limits apply only to the final result. + pipeline.setProgressCallback(context.getProgressCallback()); + pipeline.setProcessListElement(context.getProcessListElement()); if (stage == QueryProcessingStage::Complete) { pipeline.resize(1); @@ -363,17 +368,18 @@ static std::tuple executeQueryImpl( } else { + /// Limits on the result, the quota on the result, and also callback for progress. + /// Limits apply only to the final result. if (res.in) { res.in->setProgressCallback(context.getProgressCallback()); res.in->setProcessListElement(context.getProcessListElement()); - - /// Limits on the result, the quota on the result, and also callback for progress. - /// Limits apply only to the final result. if (stage == QueryProcessingStage::Complete) { - res.in->setLimits(limits); - res.in->setQuota(quota); + if (!interpreter->ignoreQuota()) + res.in->setQuota(quota); + if (!interpreter->ignoreLimits()) + res.in->setLimits(limits); } } @@ -484,7 +490,7 @@ static std::tuple executeQueryImpl( auto exception_callback = [elem, &context, log_queries] () mutable { - context.getQuota().addError(); + context.getQuota()->used(Quota::ERRORS, 1, /* check_exceeded = */ false); elem.type = QueryLogElement::EXCEPTION_WHILE_PROCESSING; diff --git a/dbms/src/Interpreters/joinDispatch.h b/dbms/src/Interpreters/joinDispatch.h index 438641590d2..840b9b91a66 100644 --- a/dbms/src/Interpreters/joinDispatch.h +++ b/dbms/src/Interpreters/joinDispatch.h @@ -12,57 +12,50 @@ namespace DB { -template -struct MapGetterImpl; +template +struct MapGetter; -template <> -struct MapGetterImpl -{ - using Map = Join::MapsAny; -}; +template <> struct MapGetter { using Map = Join::MapsOne; }; +template <> struct MapGetter { using Map = Join::MapsOne; }; +template <> struct MapGetter { using Map = Join::MapsOneFlagged; }; +template <> struct MapGetter { using Map = Join::MapsOneFlagged; }; -template <> -struct MapGetterImpl -{ - using Map = Join::MapsAnyFull; -}; +template <> struct MapGetter { using Map = Join::MapsOne; }; +template <> struct MapGetter { using Map = Join::MapsOneFlagged; }; +template <> struct MapGetter { using Map = Join::MapsAllFlagged; }; +template <> struct MapGetter { using Map = Join::MapsAllFlagged; }; -template <> -struct MapGetterImpl -{ - using Map = Join::MapsAll; -}; +template <> struct MapGetter { using Map = Join::MapsAll; }; +template <> struct MapGetter { using Map = Join::MapsAll; }; +template <> struct MapGetter { using Map = Join::MapsAllFlagged; }; +template <> struct MapGetter { using Map = Join::MapsAllFlagged; }; -template <> -struct MapGetterImpl -{ - using Map = Join::MapsAllFull; -}; +/// Only SEMI LEFT and SEMI RIGHT are valid. INNER and FULL are here for templates instantiation. +template <> struct MapGetter { using Map = Join::MapsOne; }; +template <> struct MapGetter { using Map = Join::MapsOne; }; +template <> struct MapGetter { using Map = Join::MapsAllFlagged; }; +template <> struct MapGetter { using Map = Join::MapsOne; }; -template -struct MapGetterImpl +/// Only SEMI LEFT and SEMI RIGHT are valid. INNER and FULL are here for templates instantiation. +template <> struct MapGetter { using Map = Join::MapsOne; }; +template <> struct MapGetter { using Map = Join::MapsOne; }; +template <> struct MapGetter { using Map = Join::MapsAllFlagged; }; +template <> struct MapGetter { using Map = Join::MapsOne; }; + +template +struct MapGetter { using Map = Join::MapsAsof; }; -template -struct KindTrait -{ - // Affects the Adder trait so that when the right part is empty, adding a default value on the left - static constexpr bool fill_left = static_in_v; - // Affects the Map trait so that a `used` flag is attached to map slots in order to - // generate default values on the right when the left part is empty - static constexpr bool fill_right = static_in_v; -}; - -template -using Map = typename MapGetterImpl::fill_right, strictness>::Map; - -static constexpr std::array STRICTNESSES = { +static constexpr std::array STRICTNESSES = { + ASTTableJoin::Strictness::RightAny, ASTTableJoin::Strictness::Any, ASTTableJoin::Strictness::All, - ASTTableJoin::Strictness::Asof + ASTTableJoin::Strictness::Asof, + ASTTableJoin::Strictness::Semi, + ASTTableJoin::Strictness::Anti, }; static constexpr std::array KINDS = { @@ -81,7 +74,7 @@ inline bool joinDispatchInit(ASTTableJoin::Kind kind, ASTTableJoin::Strictness s constexpr auto j = ij % STRICTNESSES.size(); if (kind == KINDS[i] && strictness == STRICTNESSES[j]) { - maps = Map(); + maps = typename MapGetter::Map(); return true; } return false; @@ -103,7 +96,7 @@ inline bool joinDispatch(ASTTableJoin::Kind kind, ASTTableJoin::Strictness stric func( std::integral_constant(), std::integral_constant(), - std::get>(maps)); + std::get::Map>(maps)); return true; } return false; diff --git a/dbms/src/Parsers/ASTCreateQuotaQuery.cpp b/dbms/src/Parsers/ASTCreateQuotaQuery.cpp new file mode 100644 index 00000000000..2814515d61f --- /dev/null +++ b/dbms/src/Parsers/ASTCreateQuotaQuery.cpp @@ -0,0 +1,142 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace +{ + using KeyType = Quota::KeyType; + using ResourceType = Quota::ResourceType; + using ResourceAmount = Quota::ResourceAmount; + + + void formatKeyType(const KeyType & key_type, const IAST::FormatSettings & settings) + { + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " KEYED BY " << (settings.hilite ? IAST::hilite_none : "") << "'" + << Quota::getNameOfKeyType(key_type) << "'"; + } + + + void formatRenameTo(const String & new_name, const IAST::FormatSettings & settings) + { + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " RENAME TO " << (settings.hilite ? IAST::hilite_none : "") + << backQuote(new_name); + } + + + void formatLimit(ResourceType resource_type, ResourceAmount max, const IAST::FormatSettings & settings) + { + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " MAX " << Quota::resourceTypeToKeyword(resource_type) + << (settings.hilite ? IAST::hilite_none : ""); + + settings.ostr << (settings.hilite ? IAST::hilite_operator : "") << " = " << (settings.hilite ? IAST::hilite_none : ""); + + if (max == Quota::UNLIMITED) + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "ANY" << (settings.hilite ? IAST::hilite_none : ""); + else if (resource_type == Quota::EXECUTION_TIME) + settings.ostr << Quota::executionTimeToSeconds(max); + else + settings.ostr << max; + } + + + void formatLimits(const ASTCreateQuotaQuery::Limits & limits, const IAST::FormatSettings & settings) + { + auto interval_kind = IntervalKind::fromAvgSeconds(limits.duration.count()); + Int64 num_intervals = limits.duration.count() / interval_kind.toAvgSeconds(); + + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") + << " FOR" + << (limits.randomize_interval ? " RANDOMIZED" : "") + << " INTERVAL " + << (settings.hilite ? IAST::hilite_none : "") + << num_intervals << " " + << (settings.hilite ? IAST::hilite_keyword : "") + << interval_kind.toKeyword() + << (settings.hilite ? IAST::hilite_none : ""); + + if (limits.unset_tracking) + { + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " UNSET TRACKING" << (settings.hilite ? IAST::hilite_none : ""); + } + else + { + bool limit_found = false; + for (auto resource_type : ext::range_with_static_cast(Quota::MAX_RESOURCE_TYPE)) + { + if (limits.max[resource_type]) + { + if (limit_found) + settings.ostr << ","; + limit_found = true; + formatLimit(resource_type, *limits.max[resource_type], settings); + } + } + if (!limit_found) + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " TRACKING" << (settings.hilite ? IAST::hilite_none : ""); + } + } + + void formatAllLimits(const std::vector & all_limits, const IAST::FormatSettings & settings) + { + bool need_comma = false; + for (auto & limits : all_limits) + { + if (need_comma) + settings.ostr << ","; + need_comma = true; + + formatLimits(limits, settings); + } + } + + void formatRoles(const ASTRoleList & roles, const IAST::FormatSettings & settings) + { + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " TO " << (settings.hilite ? IAST::hilite_none : ""); + roles.format(settings); + } +} + + +String ASTCreateQuotaQuery::getID(char) const +{ + return "CreateQuotaQuery"; +} + + +ASTPtr ASTCreateQuotaQuery::clone() const +{ + return std::make_shared(*this); +} + + +void ASTCreateQuotaQuery::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const +{ + settings.ostr << (settings.hilite ? hilite_keyword : "") << (alter ? "ALTER QUOTA" : "CREATE QUOTA") + << (settings.hilite ? hilite_none : ""); + + if (if_exists) + settings.ostr << (settings.hilite ? hilite_keyword : "") << " IF EXISTS" << (settings.hilite ? hilite_none : ""); + else if (if_not_exists) + settings.ostr << (settings.hilite ? hilite_keyword : "") << " IF NOT EXISTS" << (settings.hilite ? hilite_none : ""); + else if (or_replace) + settings.ostr << (settings.hilite ? hilite_keyword : "") << " OR REPLACE" << (settings.hilite ? hilite_none : ""); + + settings.ostr << " " << backQuoteIfNeed(name); + + if (!new_name.empty()) + formatRenameTo(new_name, settings); + + if (key_type) + formatKeyType(*key_type, settings); + + formatAllLimits(all_limits, settings); + + if (roles) + formatRoles(*roles, settings); +} +} diff --git a/dbms/src/Parsers/ASTCreateQuotaQuery.h b/dbms/src/Parsers/ASTCreateQuotaQuery.h new file mode 100644 index 00000000000..056a445f23b --- /dev/null +++ b/dbms/src/Parsers/ASTCreateQuotaQuery.h @@ -0,0 +1,62 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class ASTRoleList; + + +/** CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name + * [KEYED BY {'none' | 'user name' | 'ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}] + * [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY} + * {[SET] MAX {{QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = {number | ANY} } [,...] | + * [SET] TRACKING} [,...]] + * [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] + * + * ALTER QUOTA [IF EXISTS] name + * [RENAME TO new_name] + * [KEYED BY {'none' | 'user name' | 'ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}] + * [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY} + * {[SET] MAX {{QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = {number | ANY} } [,...] | + * [SET] TRACKING | + * UNSET TRACKING} [,...]] + * [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] + */ +class ASTCreateQuotaQuery : public IAST +{ +public: + bool alter = false; + + bool if_exists = false; + bool if_not_exists = false; + bool or_replace = false; + + String name; + String new_name; + + using KeyType = Quota::KeyType; + std::optional key_type; + + using ResourceType = Quota::ResourceType; + using ResourceAmount = Quota::ResourceAmount; + static constexpr size_t MAX_RESOURCE_TYPE = Quota::MAX_RESOURCE_TYPE; + + struct Limits + { + std::optional max[MAX_RESOURCE_TYPE]; + bool unset_tracking = false; + std::chrono::seconds duration = std::chrono::seconds::zero(); + bool randomize_interval = false; + }; + std::vector all_limits; + + std::shared_ptr roles; + + String getID(char) const override; + ASTPtr clone() const override; + void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; +}; +} diff --git a/dbms/src/Parsers/ASTDropAccessEntityQuery.cpp b/dbms/src/Parsers/ASTDropAccessEntityQuery.cpp new file mode 100644 index 00000000000..80d69ed5316 --- /dev/null +++ b/dbms/src/Parsers/ASTDropAccessEntityQuery.cpp @@ -0,0 +1,56 @@ +#include +#include + + +namespace DB +{ +namespace +{ + using Kind = ASTDropAccessEntityQuery::Kind; + + const char * kindToKeyword(Kind kind) + { + switch (kind) + { + case Kind::QUOTA: return "QUOTA"; + } + __builtin_unreachable(); + } +} + + +ASTDropAccessEntityQuery::ASTDropAccessEntityQuery(Kind kind_) + : kind(kind_), keyword(kindToKeyword(kind_)) +{ +} + + +String ASTDropAccessEntityQuery::getID(char) const +{ + return String("DROP ") + keyword + " query"; +} + + +ASTPtr ASTDropAccessEntityQuery::clone() const +{ + return std::make_shared(*this); +} + + +void ASTDropAccessEntityQuery::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const +{ + settings.ostr << (settings.hilite ? hilite_keyword : "") + << "DROP " << keyword + << (if_exists ? " IF EXISTS" : "") + << (settings.hilite ? hilite_none : ""); + + bool need_comma = false; + for (const auto & name : names) + { + if (need_comma) + settings.ostr << ','; + need_comma = true; + settings.ostr << ' ' << backQuoteIfNeed(name); + } +} +} diff --git a/dbms/src/Parsers/ASTDropAccessEntityQuery.h b/dbms/src/Parsers/ASTDropAccessEntityQuery.h new file mode 100644 index 00000000000..91b76253db4 --- /dev/null +++ b/dbms/src/Parsers/ASTDropAccessEntityQuery.h @@ -0,0 +1,28 @@ +#pragma once + +#include + + +namespace DB +{ + +/** DROP QUOTA [IF EXISTS] name [,...] + */ +class ASTDropAccessEntityQuery : public IAST +{ +public: + enum class Kind + { + QUOTA, + }; + const Kind kind; + const char * const keyword; + bool if_exists = false; + Strings names; + + ASTDropAccessEntityQuery(Kind kind_); + String getID(char) const override; + ASTPtr clone() const override; + void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; +}; +} diff --git a/dbms/src/Parsers/ASTRoleList.cpp b/dbms/src/Parsers/ASTRoleList.cpp new file mode 100644 index 00000000000..9e0a4fffc36 --- /dev/null +++ b/dbms/src/Parsers/ASTRoleList.cpp @@ -0,0 +1,56 @@ +#include +#include + + +namespace DB +{ +void ASTRoleList::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const +{ + if (empty()) + { + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "NONE" << (settings.hilite ? IAST::hilite_none : ""); + return; + } + + bool need_comma = false; + if (current_user) + { + if (std::exchange(need_comma, true)) + settings.ostr << ", "; + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "CURRENT_USER" << (settings.hilite ? IAST::hilite_none : ""); + } + + for (auto & role : roles) + { + if (std::exchange(need_comma, true)) + settings.ostr << ", "; + settings.ostr << backQuoteIfNeed(role); + } + + if (all_roles) + { + if (std::exchange(need_comma, true)) + settings.ostr << ", "; + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "ALL" << (settings.hilite ? IAST::hilite_none : ""); + if (except_current_user || !except_roles.empty()) + { + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " EXCEPT " << (settings.hilite ? IAST::hilite_none : ""); + need_comma = false; + + if (except_current_user) + { + if (std::exchange(need_comma, true)) + settings.ostr << ", "; + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "CURRENT_USER" << (settings.hilite ? IAST::hilite_none : ""); + } + + for (auto & except_role : except_roles) + { + if (std::exchange(need_comma, true)) + settings.ostr << ", "; + settings.ostr << backQuoteIfNeed(except_role); + } + } + } +} +} diff --git a/dbms/src/Parsers/ASTRoleList.h b/dbms/src/Parsers/ASTRoleList.h new file mode 100644 index 00000000000..5e8859732c2 --- /dev/null +++ b/dbms/src/Parsers/ASTRoleList.h @@ -0,0 +1,25 @@ +#pragma once + +#include +#include + + +namespace DB +{ +/// {role|CURRENT_USER} [,...] | NONE | ALL | ALL EXCEPT {role|CURRENT_USER} [,...] +class ASTRoleList : public IAST +{ +public: + Strings roles; + bool current_user = false; + bool all_roles = false; + Strings except_roles; + bool except_current_user = false; + + bool empty() const { return roles.empty() && !current_user && !all_roles; } + + String getID(char) const override { return "RoleList"; } + ASTPtr clone() const override { return std::make_shared(*this); } + void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; +}; +} diff --git a/dbms/src/Parsers/ASTShowCreateAccessEntityQuery.cpp b/dbms/src/Parsers/ASTShowCreateAccessEntityQuery.cpp new file mode 100644 index 00000000000..8509a902014 --- /dev/null +++ b/dbms/src/Parsers/ASTShowCreateAccessEntityQuery.cpp @@ -0,0 +1,51 @@ +#include +#include + + +namespace DB +{ +namespace +{ + using Kind = ASTShowCreateAccessEntityQuery::Kind; + + const char * kindToKeyword(Kind kind) + { + switch (kind) + { + case Kind::QUOTA: return "QUOTA"; + } + __builtin_unreachable(); + } +} + + +ASTShowCreateAccessEntityQuery::ASTShowCreateAccessEntityQuery(Kind kind_) + : kind(kind_), keyword(kindToKeyword(kind_)) +{ +} + + +String ASTShowCreateAccessEntityQuery::getID(char) const +{ + return String("SHOW CREATE ") + keyword + " query"; +} + + +ASTPtr ASTShowCreateAccessEntityQuery::clone() const +{ + return std::make_shared(*this); +} + + +void ASTShowCreateAccessEntityQuery::formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const +{ + settings.ostr << (settings.hilite ? hilite_keyword : "") + << "SHOW CREATE " << keyword + << (settings.hilite ? hilite_none : ""); + + if (current_quota) + settings.ostr << (settings.hilite ? hilite_keyword : "") << " CURRENT" << (settings.hilite ? hilite_none : ""); + else + settings.ostr << " " << backQuoteIfNeed(name); +} +} diff --git a/dbms/src/Parsers/ASTShowCreateAccessEntityQuery.h b/dbms/src/Parsers/ASTShowCreateAccessEntityQuery.h new file mode 100644 index 00000000000..32c0ace101b --- /dev/null +++ b/dbms/src/Parsers/ASTShowCreateAccessEntityQuery.h @@ -0,0 +1,30 @@ +#pragma once + +#include + + +namespace DB +{ +/** SHOW CREATE QUOTA [name | CURRENT] + */ +class ASTShowCreateAccessEntityQuery : public ASTQueryWithOutput +{ +public: + enum class Kind + { + QUOTA, + }; + const Kind kind; + const char * const keyword; + String name; + bool current_quota = false; + + ASTShowCreateAccessEntityQuery(Kind kind_); + String getID(char) const override; + ASTPtr clone() const override; + +protected: + void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; +}; + +} diff --git a/dbms/src/Parsers/ASTShowQuotasQuery.cpp b/dbms/src/Parsers/ASTShowQuotasQuery.cpp new file mode 100644 index 00000000000..ca7bd5e853f --- /dev/null +++ b/dbms/src/Parsers/ASTShowQuotasQuery.cpp @@ -0,0 +1,35 @@ +#include +#include + + +namespace DB +{ +String ASTShowQuotasQuery::getID(char) const +{ + if (usage) + return "SHOW QUOTA USAGE query"; + else + return "SHOW QUOTAS query"; +} + + +ASTPtr ASTShowQuotasQuery::clone() const +{ + return std::make_shared(*this); +} + + +void ASTShowQuotasQuery::formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const +{ + settings.ostr << (settings.hilite ? hilite_keyword : ""); + + if (usage && current) + settings.ostr << "SHOW QUOTA USAGE"; + else if (usage) + settings.ostr << "SHOW QUOTA USAGE ALL"; + else + settings.ostr << "SHOW QUOTAS"; + + settings.ostr << (settings.hilite ? hilite_none : ""); +} +} diff --git a/dbms/src/Parsers/ASTShowQuotasQuery.h b/dbms/src/Parsers/ASTShowQuotasQuery.h new file mode 100644 index 00000000000..27a08a99a54 --- /dev/null +++ b/dbms/src/Parsers/ASTShowQuotasQuery.h @@ -0,0 +1,24 @@ +#pragma once + +#include + + +namespace DB +{ +/** SHOW QUOTAS + * SHOW QUOTA USAGE [CURRENT | ALL] + */ +class ASTShowQuotasQuery : public ASTQueryWithOutput +{ +public: + bool usage = false; + bool current = false; + + String getID(char) const override; + ASTPtr clone() const override; + +protected: + void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; +}; + +} diff --git a/dbms/src/Parsers/ASTTablesInSelectQuery.cpp b/dbms/src/Parsers/ASTTablesInSelectQuery.cpp index 47be2008284..18750d5ccd3 100644 --- a/dbms/src/Parsers/ASTTablesInSelectQuery.cpp +++ b/dbms/src/Parsers/ASTTablesInSelectQuery.cpp @@ -140,6 +140,7 @@ void ASTTableJoin::formatImplBeforeTable(const FormatSettings & settings, Format { case Strictness::Unspecified: break; + case Strictness::RightAny: case Strictness::Any: settings.ostr << "ANY "; break; @@ -149,6 +150,12 @@ void ASTTableJoin::formatImplBeforeTable(const FormatSettings & settings, Format case Strictness::Asof: settings.ostr << "ASOF "; break; + case Strictness::Semi: + settings.ostr << "SEMI "; + break; + case Strictness::Anti: + settings.ostr << "ANTI "; + break; } } diff --git a/dbms/src/Parsers/ASTTablesInSelectQuery.h b/dbms/src/Parsers/ASTTablesInSelectQuery.h index 9691dee96fa..01c6914b46c 100644 --- a/dbms/src/Parsers/ASTTablesInSelectQuery.h +++ b/dbms/src/Parsers/ASTTablesInSelectQuery.h @@ -25,7 +25,7 @@ namespace DB * SAMPLE 1000000 * * Table expressions may be combined with JOINs of following kinds: - * [GLOBAL] [ANY|ALL|] INNER|LEFT|RIGHT|FULL [OUTER] JOIN table_expr + * [GLOBAL] [ANY|ALL|ASOF|SEMI] [INNER|LEFT|RIGHT|FULL] [OUTER] JOIN table_expr * CROSS JOIN * , (comma) * @@ -74,9 +74,12 @@ struct ASTTableJoin : public IAST enum class Strictness { Unspecified, - Any, /// If there are many suitable rows to join, use any from them (also known as unique JOIN). + RightAny, /// Old ANY JOIN. If there are many suitable rows in right table, use any from them to join. + Any, /// Semi Join with any value from filtering table. For LEFT JOIN with Any and RightAny are the same. All, /// If there are many suitable rows to join, use all of them and replicate rows of "left" table (usual semantic of JOIN). Asof, /// For the last JOIN column, pick the latest value + Semi, /// LEFT or RIGHT. SEMI LEFT JOIN filters left table by values exists in right table. SEMI RIGHT - otherwise. + Anti, /// LEFT or RIGHT. Same as SEMI JOIN but filter values that are NOT exists in other table. }; /// Join method. @@ -165,5 +168,4 @@ struct ASTTablesInSelectQuery : public IAST void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; }; - } diff --git a/dbms/src/Parsers/CommonParsers.h b/dbms/src/Parsers/CommonParsers.h index 2eafc1c1853..85b5217b617 100644 --- a/dbms/src/Parsers/CommonParsers.h +++ b/dbms/src/Parsers/CommonParsers.h @@ -46,99 +46,6 @@ protected: } }; -class ParserInterval: public IParserBase -{ -public: - enum class IntervalKind - { - Incorrect, - Second, - Minute, - Hour, - Day, - Week, - Month, - Quarter, - Year - }; - - IntervalKind interval_kind; - - ParserInterval() : interval_kind(IntervalKind::Incorrect) {} - - const char * getToIntervalKindFunctionName() - { - switch (interval_kind) - { - case ParserInterval::IntervalKind::Second: - return "toIntervalSecond"; - case ParserInterval::IntervalKind::Minute: - return "toIntervalMinute"; - case ParserInterval::IntervalKind::Hour: - return "toIntervalHour"; - case ParserInterval::IntervalKind::Day: - return "toIntervalDay"; - case ParserInterval::IntervalKind::Week: - return "toIntervalWeek"; - case ParserInterval::IntervalKind::Month: - return "toIntervalMonth"; - case ParserInterval::IntervalKind::Quarter: - return "toIntervalQuarter"; - case ParserInterval::IntervalKind::Year: - return "toIntervalYear"; - default: - return nullptr; - } - } - -protected: - const char * getName() const override { return "interval"; } - - bool parseImpl(Pos & pos, ASTPtr & /*node*/, Expected & expected) override - { - if (ParserKeyword("SECOND").ignore(pos, expected) || ParserKeyword("SQL_TSI_SECOND").ignore(pos, expected) - || ParserKeyword("SS").ignore(pos, expected) || ParserKeyword("S").ignore(pos, expected)) - interval_kind = IntervalKind::Second; - else if ( - ParserKeyword("MINUTE").ignore(pos, expected) || ParserKeyword("SQL_TSI_MINUTE").ignore(pos, expected) - || ParserKeyword("MI").ignore(pos, expected) || ParserKeyword("N").ignore(pos, expected)) - interval_kind = IntervalKind::Minute; - else if ( - ParserKeyword("HOUR").ignore(pos, expected) || ParserKeyword("SQL_TSI_HOUR").ignore(pos, expected) - || ParserKeyword("HH").ignore(pos, expected)) - interval_kind = IntervalKind::Hour; - else if ( - ParserKeyword("DAY").ignore(pos, expected) || ParserKeyword("SQL_TSI_DAY").ignore(pos, expected) - || ParserKeyword("DD").ignore(pos, expected) || ParserKeyword("D").ignore(pos, expected)) - interval_kind = IntervalKind::Day; - else if ( - ParserKeyword("WEEK").ignore(pos, expected) || ParserKeyword("SQL_TSI_WEEK").ignore(pos, expected) - || ParserKeyword("WK").ignore(pos, expected) || ParserKeyword("WW").ignore(pos, expected)) - interval_kind = IntervalKind::Week; - else if ( - ParserKeyword("MONTH").ignore(pos, expected) || ParserKeyword("SQL_TSI_MONTH").ignore(pos, expected) - || ParserKeyword("MM").ignore(pos, expected) || ParserKeyword("M").ignore(pos, expected)) - interval_kind = IntervalKind::Month; - else if ( - ParserKeyword("QUARTER").ignore(pos, expected) || ParserKeyword("SQL_TSI_QUARTER").ignore(pos, expected) - || ParserKeyword("QQ").ignore(pos, expected) || ParserKeyword("Q").ignore(pos, expected)) - interval_kind = IntervalKind::Quarter; - else if ( - ParserKeyword("YEAR").ignore(pos, expected) || ParserKeyword("SQL_TSI_YEAR").ignore(pos, expected) - || ParserKeyword("YYYY").ignore(pos, expected) || ParserKeyword("YY").ignore(pos, expected)) - interval_kind = IntervalKind::Year; - else - interval_kind = IntervalKind::Incorrect; - - if (interval_kind == IntervalKind::Incorrect) - { - expected.add(pos, "YEAR, QUARTER, MONTH, WEEK, DAY, HOUR, MINUTE or SECOND"); - return false; - } - /// one of ParserKeyword already made ++pos - return true; - } -}; // Parser always returns true and do nothing. class ParserNothing : public IParserBase diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp index c26f9363797..1f1ba4edee7 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.cpp +++ b/dbms/src/Parsers/ExpressionElementParsers.cpp @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include #include @@ -690,44 +690,11 @@ bool ParserExtractExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & exp ++pos; ASTPtr expr; - const char * function_name = nullptr; - ParserInterval interval_parser; - if (!interval_parser.ignore(pos, expected)) + IntervalKind interval_kind; + if (!parseIntervalKind(pos, expected, interval_kind)) return false; - switch (interval_parser.interval_kind) - { - case ParserInterval::IntervalKind::Second: - function_name = "toSecond"; - break; - case ParserInterval::IntervalKind::Minute: - function_name = "toMinute"; - break; - case ParserInterval::IntervalKind::Hour: - function_name = "toHour"; - break; - case ParserInterval::IntervalKind::Day: - function_name = "toDayOfMonth"; - break; - case ParserInterval::IntervalKind::Week: - // TODO: SELECT toRelativeWeekNum(toDate('2017-06-15')) - toRelativeWeekNum(toStartOfYear(toDate('2017-06-15'))) - // else if (ParserKeyword("WEEK").ignore(pos, expected)) - // function_name = "toRelativeWeekNum"; - return false; - case ParserInterval::IntervalKind::Month: - function_name = "toMonth"; - break; - case ParserInterval::IntervalKind::Quarter: - function_name = "toQuarter"; - break; - case ParserInterval::IntervalKind::Year: - function_name = "toYear"; - break; - default: - return false; - } - ParserKeyword s_from("FROM"); if (!s_from.ignore(pos, expected)) return false; @@ -742,7 +709,7 @@ bool ParserExtractExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & exp auto function = std::make_shared(); auto exp_list = std::make_shared(); - function->name = function_name; //"toYear"; + function->name = interval_kind.toNameOfFunctionExtractTimePart(); function->arguments = exp_list; function->children.push_back(exp_list); exp_list->children.push_back(expr); @@ -770,8 +737,8 @@ bool ParserDateAddExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return false; ++pos; - ParserInterval interval_parser; - if (interval_parser.ignore(pos, expected)) + IntervalKind interval_kind; + if (parseIntervalKind(pos, expected, interval_kind)) { /// function(unit, offset, timestamp) if (pos->type != TokenType::Comma) @@ -804,20 +771,18 @@ bool ParserDateAddExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & exp if (!ParserExpression().parse(pos, offset_node, expected)) return false; - interval_parser.ignore(pos, expected); - + if (!parseIntervalKind(pos, expected, interval_kind)) + return false; } if (pos->type != TokenType::ClosingRoundBracket) return false; ++pos; - const char * interval_function_name = interval_parser.getToIntervalKindFunctionName(); - auto interval_expr_list_args = std::make_shared(); interval_expr_list_args->children = {offset_node}; auto interval_func_node = std::make_shared(); - interval_func_node->name = interval_function_name; + interval_func_node->name = interval_kind.toNameOfFunctionToIntervalDataType(); interval_func_node->arguments = std::move(interval_expr_list_args); interval_func_node->children.push_back(interval_func_node->arguments); @@ -836,7 +801,6 @@ bool ParserDateAddExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & exp bool ParserDateDiffExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - const char * interval_name = nullptr; ASTPtr left_node; ASTPtr right_node; @@ -848,40 +812,10 @@ bool ParserDateDiffExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & ex return false; ++pos; - ParserInterval interval_parser; - if (!interval_parser.ignore(pos, expected)) + IntervalKind interval_kind; + if (!parseIntervalKind(pos, expected, interval_kind)) return false; - switch (interval_parser.interval_kind) - { - case ParserInterval::IntervalKind::Second: - interval_name = "second"; - break; - case ParserInterval::IntervalKind::Minute: - interval_name = "minute"; - break; - case ParserInterval::IntervalKind::Hour: - interval_name = "hour"; - break; - case ParserInterval::IntervalKind::Day: - interval_name = "day"; - break; - case ParserInterval::IntervalKind::Week: - interval_name = "week"; - break; - case ParserInterval::IntervalKind::Month: - interval_name = "month"; - break; - case ParserInterval::IntervalKind::Quarter: - interval_name = "quarter"; - break; - case ParserInterval::IntervalKind::Year: - interval_name = "year"; - break; - default: - return false; - } - if (pos->type != TokenType::Comma) return false; ++pos; @@ -901,7 +835,7 @@ bool ParserDateDiffExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & ex ++pos; auto expr_list_args = std::make_shared(); - expr_list_args->children = {std::make_shared(interval_name), left_node, right_node}; + expr_list_args->children = {std::make_shared(interval_kind.toDateDiffUnit()), left_node, right_node}; auto func_node = std::make_shared(); func_node->name = "dateDiff"; @@ -1134,11 +1068,14 @@ const char * ParserAlias::restricted_keywords[] = "INNER", "FULL", "CROSS", - "ASOF", "JOIN", "GLOBAL", "ANY", "ALL", + "ASOF", + "SEMI", + "ANTI", + "ONLY", /// YQL synonym for ANTI "ON", "USING", "PREWHERE", diff --git a/dbms/src/Parsers/ExpressionListParsers.cpp b/dbms/src/Parsers/ExpressionListParsers.cpp index 6d33368d88b..060d1e89f02 100644 --- a/dbms/src/Parsers/ExpressionListParsers.cpp +++ b/dbms/src/Parsers/ExpressionListParsers.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include #include @@ -604,13 +604,10 @@ bool ParserIntervalOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expec if (!ParserExpressionWithOptionalAlias(false).parse(pos, expr, expected)) return false; - - ParserInterval interval_parser; - if (!interval_parser.ignore(pos, expected)) + IntervalKind interval_kind; + if (!parseIntervalKind(pos, expected, interval_kind)) return false; - const char * function_name = interval_parser.getToIntervalKindFunctionName(); - /// the function corresponding to the operator auto function = std::make_shared(); @@ -618,7 +615,7 @@ bool ParserIntervalOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expec auto exp_list = std::make_shared(); /// the first argument of the function is the previous element, the second is the next one - function->name = function_name; + function->name = interval_kind.toNameOfFunctionToIntervalDataType(); function->arguments = exp_list; function->children.push_back(exp_list); diff --git a/dbms/src/Parsers/IParserBase.cpp b/dbms/src/Parsers/IParserBase.cpp index 64162a595c9..e4caffa992e 100644 --- a/dbms/src/Parsers/IParserBase.cpp +++ b/dbms/src/Parsers/IParserBase.cpp @@ -12,20 +12,15 @@ namespace ErrorCodes bool IParserBase::parse(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; expected.add(pos, getName()); - pos.increaseDepth(); - bool res = parseImpl(pos, node, expected); - pos.decreaseDepth(); - - if (!res) + return wrapParseImpl(pos, IncreaseDepthTag{}, [&] { - node = nullptr; - pos = begin; - } - - return res; + bool res = parseImpl(pos, node, expected); + if (!res) + node = nullptr; + return res; + }); } } diff --git a/dbms/src/Parsers/IParserBase.h b/dbms/src/Parsers/IParserBase.h index 67b222b1b71..95951d5acb8 100644 --- a/dbms/src/Parsers/IParserBase.h +++ b/dbms/src/Parsers/IParserBase.h @@ -11,6 +11,30 @@ namespace DB class IParserBase : public IParser { public: + template + static bool wrapParseImpl(Pos & pos, const F & func) + { + Pos begin = pos; + bool res = func(); + if (!res) + pos = begin; + return res; + } + + struct IncreaseDepthTag {}; + + template + static bool wrapParseImpl(Pos & pos, IncreaseDepthTag, const F & func) + { + Pos begin = pos; + pos.increaseDepth(); + bool res = func(); + pos.decreaseDepth(); + if (!res) + pos = begin; + return res; + } + bool parse(Pos & pos, ASTPtr & node, Expected & expected); protected: diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp index b0ca9c399ec..a014b861e77 100644 --- a/dbms/src/Parsers/ParserCreateQuery.cpp +++ b/dbms/src/Parsers/ParserCreateQuery.cpp @@ -849,12 +849,12 @@ bool ParserCreateDictionaryQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, E return false; } - if (s_if_not_exists.ignore(pos, expected)) - if_not_exists = true; - if (!s_dictionary.ignore(pos, expected)) return false; + if (s_if_not_exists.ignore(pos, expected)) + if_not_exists = true; + if (!name_p.parse(pos, name, expected)) return false; diff --git a/dbms/src/Parsers/ParserCreateQuotaQuery.cpp b/dbms/src/Parsers/ParserCreateQuotaQuery.cpp new file mode 100644 index 00000000000..cc5fa4bfbcc --- /dev/null +++ b/dbms/src/Parsers/ParserCreateQuotaQuery.cpp @@ -0,0 +1,261 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + + +namespace +{ + using KeyType = Quota::KeyType; + using ResourceType = Quota::ResourceType; + using ResourceAmount = Quota::ResourceAmount; + + bool parseRenameTo(IParserBase::Pos & pos, Expected & expected, String & new_name, bool alter) + { + return IParserBase::wrapParseImpl(pos, [&] + { + if (!new_name.empty() || !alter) + return false; + + if (!ParserKeyword{"RENAME TO"}.ignore(pos, expected)) + return false; + + return parseIdentifierOrStringLiteral(pos, expected, new_name); + }); + } + + bool parseKeyType(IParserBase::Pos & pos, Expected & expected, std::optional & key_type) + { + return IParserBase::wrapParseImpl(pos, [&] + { + if (key_type) + return false; + + if (!ParserKeyword{"KEYED BY"}.ignore(pos, expected)) + return false; + + ASTPtr key_type_ast; + if (!ParserStringLiteral().parse(pos, key_type_ast, expected)) + return false; + + const String & key_type_str = key_type_ast->as().value.safeGet(); + for (auto kt : ext::range_with_static_cast(Quota::MAX_KEY_TYPE)) + if (boost::iequals(Quota::getNameOfKeyType(kt), key_type_str)) + { + key_type = kt; + return true; + } + + String all_key_types_str; + for (auto kt : ext::range_with_static_cast(Quota::MAX_KEY_TYPE)) + all_key_types_str += String(all_key_types_str.empty() ? "" : ", ") + "'" + Quota::getNameOfKeyType(kt) + "'"; + String msg = "Quota cannot be keyed by '" + key_type_str + "'. Expected one of these literals: " + all_key_types_str; + throw Exception(msg, ErrorCodes::SYNTAX_ERROR); + }); + } + + bool parseLimit(IParserBase::Pos & pos, Expected & expected, ResourceType & resource_type, ResourceAmount & max) + { + return IParserBase::wrapParseImpl(pos, [&] + { + if (!ParserKeyword{"MAX"}.ignore(pos, expected)) + return false; + + bool resource_type_set = false; + for (auto rt : ext::range_with_static_cast(Quota::MAX_RESOURCE_TYPE)) + { + if (ParserKeyword{Quota::resourceTypeToKeyword(rt)}.ignore(pos, expected)) + { + resource_type = rt; + resource_type_set = true; + break; + } + } + if (!resource_type_set) + return false; + + if (!ParserToken{TokenType::Equals}.ignore(pos, expected)) + return false; + + ASTPtr max_ast; + if (ParserNumber{}.parse(pos, max_ast, expected)) + { + const Field & max_field = max_ast->as().value; + if (resource_type == Quota::EXECUTION_TIME) + max = Quota::secondsToExecutionTime(applyVisitor(FieldVisitorConvertToNumber(), max_field)); + else + max = applyVisitor(FieldVisitorConvertToNumber(), max_field); + } + else if (ParserKeyword{"ANY"}.ignore(pos, expected)) + { + max = Quota::UNLIMITED; + } + else + return false; + + return true; + }); + } + + bool parseCommaAndLimit(IParserBase::Pos & pos, Expected & expected, ResourceType & resource_type, ResourceAmount & max) + { + return IParserBase::wrapParseImpl(pos, [&] + { + if (!ParserToken{TokenType::Comma}.ignore(pos, expected)) + return false; + + return parseLimit(pos, expected, resource_type, max); + }); + } + + bool parseLimits(IParserBase::Pos & pos, Expected & expected, ASTCreateQuotaQuery::Limits & limits, bool alter) + { + return IParserBase::wrapParseImpl(pos, [&] + { + ASTCreateQuotaQuery::Limits new_limits; + if (!ParserKeyword{"FOR"}.ignore(pos, expected)) + return false; + + new_limits.randomize_interval = ParserKeyword{"RANDOMIZED"}.ignore(pos, expected); + + if (!ParserKeyword{"INTERVAL"}.ignore(pos, expected)) + return false; + + ASTPtr num_intervals_ast; + if (!ParserNumber{}.parse(pos, num_intervals_ast, expected)) + return false; + + double num_intervals = applyVisitor(FieldVisitorConvertToNumber(), num_intervals_ast->as().value); + + IntervalKind interval_kind; + if (!parseIntervalKind(pos, expected, interval_kind)) + return false; + + new_limits.duration = std::chrono::seconds(static_cast(num_intervals * interval_kind.toAvgSeconds())); + + if (alter && ParserKeyword{"UNSET TRACKING"}.ignore(pos, expected)) + { + new_limits.unset_tracking = true; + } + else if (ParserKeyword{"SET TRACKING"}.ignore(pos, expected) || ParserKeyword{"TRACKING"}.ignore(pos, expected)) + { + } + else + { + ParserKeyword{"SET"}.ignore(pos, expected); + ResourceType resource_type; + ResourceAmount max; + if (!parseLimit(pos, expected, resource_type, max)) + return false; + + new_limits.max[resource_type] = max; + while (parseCommaAndLimit(pos, expected, resource_type, max)) + new_limits.max[resource_type] = max; + } + + limits = new_limits; + return true; + }); + } + + bool parseAllLimits(IParserBase::Pos & pos, Expected & expected, std::vector & all_limits, bool alter) + { + return IParserBase::wrapParseImpl(pos, [&] + { + do + { + ASTCreateQuotaQuery::Limits limits; + if (!parseLimits(pos, expected, limits, alter)) + return false; + all_limits.push_back(limits); + } + while (ParserToken{TokenType::Comma}.ignore(pos, expected)); + return true; + }); + } + + bool parseRoles(IParserBase::Pos & pos, Expected & expected, std::shared_ptr & roles) + { + return IParserBase::wrapParseImpl(pos, [&] + { + ASTPtr node; + if (roles || !ParserKeyword{"TO"}.ignore(pos, expected) || !ParserRoleList{}.parse(pos, node, expected)) + return false; + + roles = std::static_pointer_cast(node); + return true; + }); + } +} + + +bool ParserCreateQuotaQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + bool alter; + if (ParserKeyword{"CREATE QUOTA"}.ignore(pos, expected)) + alter = false; + else if (ParserKeyword{"ALTER QUOTA"}.ignore(pos, expected)) + alter = true; + else + return false; + + bool if_exists = false; + bool if_not_exists = false; + bool or_replace = false; + if (alter) + { + if (ParserKeyword{"IF EXISTS"}.ignore(pos, expected)) + if_exists = true; + } + else + { + if (ParserKeyword{"IF NOT EXISTS"}.ignore(pos, expected)) + if_not_exists = true; + else if (ParserKeyword{"OR REPLACE"}.ignore(pos, expected)) + or_replace = true; + } + + String name; + if (!parseIdentifierOrStringLiteral(pos, expected, name)) + return false; + + String new_name; + std::optional key_type; + std::vector all_limits; + std::shared_ptr roles; + + while (parseRenameTo(pos, expected, new_name, alter) || parseKeyType(pos, expected, key_type) + || parseAllLimits(pos, expected, all_limits, alter) || parseRoles(pos, expected, roles)) + ; + + auto query = std::make_shared(); + node = query; + + query->alter = alter; + query->if_exists = if_exists; + query->if_not_exists = if_not_exists; + query->or_replace = or_replace; + query->name = std::move(name); + query->new_name = std::move(new_name); + query->key_type = key_type; + query->all_limits = std::move(all_limits); + query->roles = std::move(roles); + + return true; +} +} diff --git a/dbms/src/Parsers/ParserCreateQuotaQuery.h b/dbms/src/Parsers/ParserCreateQuotaQuery.h new file mode 100644 index 00000000000..aef33f72e67 --- /dev/null +++ b/dbms/src/Parsers/ParserCreateQuotaQuery.h @@ -0,0 +1,31 @@ +#pragma once + +#include + + +namespace DB +{ +/** Parses queries like + * CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name + * [KEYED BY {'none' | 'user name' | 'ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}] + * [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY} + * {[SET] MAX {{QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = {number | ANY} } [,...] | + * [SET] TRACKING} [,...]] + * [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] + * + * ALTER QUOTA [IF EXISTS] name + * [RENAME TO new_name] + * [KEYED BY {'none' | 'user name' | 'ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}] + * [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY} + * {[SET] MAX {{QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = {number | ANY} } [,...] | + * [SET] TRACKING | + * UNSET TRACKING} [,...]] + * [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] + */ +class ParserCreateQuotaQuery : public IParserBase +{ +protected: + const char * getName() const override { return "CREATE QUOTA or ALTER QUOTA query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; +} diff --git a/dbms/src/Parsers/ParserDropAccessEntityQuery.cpp b/dbms/src/Parsers/ParserDropAccessEntityQuery.cpp new file mode 100644 index 00000000000..c6d5ff889fc --- /dev/null +++ b/dbms/src/Parsers/ParserDropAccessEntityQuery.cpp @@ -0,0 +1,45 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ +bool ParserDropAccessEntityQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (!ParserKeyword{"DROP"}.ignore(pos, expected)) + return false; + + using Kind = ASTDropAccessEntityQuery::Kind; + Kind kind; + if (ParserKeyword{"QUOTA"}.ignore(pos, expected)) + kind = Kind::QUOTA; + else + return false; + + bool if_exists = false; + if (ParserKeyword{"IF EXISTS"}.ignore(pos, expected)) + if_exists = true; + + Strings names; + do + { + String name; + if (!parseIdentifierOrStringLiteral(pos, expected, name)) + return false; + + names.push_back(std::move(name)); + } + while (ParserToken{TokenType::Comma}.ignore(pos, expected)); + + auto query = std::make_shared(kind); + node = query; + + query->if_exists = if_exists; + query->names = std::move(names); + + return true; +} +} diff --git a/dbms/src/Parsers/ParserDropAccessEntityQuery.h b/dbms/src/Parsers/ParserDropAccessEntityQuery.h new file mode 100644 index 00000000000..f479e0d0add --- /dev/null +++ b/dbms/src/Parsers/ParserDropAccessEntityQuery.h @@ -0,0 +1,17 @@ +#pragma once + +#include + + +namespace DB +{ +/** Parses queries like + * DROP QUOTA [IF EXISTS] name [,...] + */ +class ParserDropAccessEntityQuery : public IParserBase +{ +protected: + const char * getName() const override { return "DROP QUOTA query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; +} diff --git a/dbms/src/Parsers/ParserQuery.cpp b/dbms/src/Parsers/ParserQuery.cpp index b7bdd517a43..a3bb652032e 100644 --- a/dbms/src/Parsers/ParserQuery.cpp +++ b/dbms/src/Parsers/ParserQuery.cpp @@ -9,6 +9,8 @@ #include #include #include +#include +#include namespace DB @@ -22,12 +24,16 @@ bool ParserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserUseQuery use_p; ParserSetQuery set_p; ParserSystemQuery system_p; + ParserCreateQuotaQuery create_quota_p; + ParserDropAccessEntityQuery drop_access_entity_p; bool res = query_with_output_p.parse(pos, node, expected) || insert_p.parse(pos, node, expected) || use_p.parse(pos, node, expected) || set_p.parse(pos, node, expected) - || system_p.parse(pos, node, expected); + || system_p.parse(pos, node, expected) + || create_quota_p.parse(pos, node, expected) + || drop_access_entity_p.parse(pos, node, expected); return res; } diff --git a/dbms/src/Parsers/ParserQueryWithOutput.cpp b/dbms/src/Parsers/ParserQueryWithOutput.cpp index 1c44c639848..d08ae984c90 100644 --- a/dbms/src/Parsers/ParserQueryWithOutput.cpp +++ b/dbms/src/Parsers/ParserQueryWithOutput.cpp @@ -14,6 +14,8 @@ #include #include #include +#include +#include namespace DB @@ -34,6 +36,8 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ParserOptimizeQuery optimize_p; ParserKillQueryQuery kill_query_p; ParserWatchQuery watch_p; + ParserShowCreateAccessEntityQuery show_create_access_entity_p; + ParserShowQuotasQuery show_quotas_p; ASTPtr query; @@ -49,6 +53,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec analyze_syntax = true; bool parsed = select_p.parse(pos, query, expected) + || show_create_access_entity_p.parse(pos, query, expected) /// should be before `show_tables_p` || show_tables_p.parse(pos, query, expected) || table_p.parse(pos, query, expected) || describe_table_p.parse(pos, query, expected) @@ -60,7 +65,8 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec || check_p.parse(pos, query, expected) || kill_query_p.parse(pos, query, expected) || optimize_p.parse(pos, query, expected) - || watch_p.parse(pos, query, expected); + || watch_p.parse(pos, query, expected) + || show_quotas_p.parse(pos, query, expected); if (!parsed) return false; diff --git a/dbms/src/Parsers/ParserRoleList.cpp b/dbms/src/Parsers/ParserRoleList.cpp new file mode 100644 index 00000000000..ac8914de776 --- /dev/null +++ b/dbms/src/Parsers/ParserRoleList.cpp @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ + +bool ParserRoleList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + Strings roles; + bool current_user = false; + bool all_roles = false; + Strings except_roles; + bool except_current_user = false; + + bool except_mode = false; + while (true) + { + if (ParserKeyword{"NONE"}.ignore(pos, expected)) + { + } + else if (ParserKeyword{"CURRENT_USER"}.ignore(pos, expected) || + ParserKeyword{"currentUser"}.ignore(pos, expected)) + { + if (ParserToken{TokenType::OpeningRoundBracket}.ignore(pos, expected)) + { + if (!ParserToken{TokenType::ClosingRoundBracket}.ignore(pos, expected)) + return false; + } + if (except_mode && !current_user) + except_current_user = true; + else + current_user = true; + } + else if (ParserKeyword{"ALL"}.ignore(pos, expected)) + { + all_roles = true; + if (ParserKeyword{"EXCEPT"}.ignore(pos, expected)) + { + except_mode = true; + continue; + } + } + else + { + String name; + if (!parseIdentifierOrStringLiteral(pos, expected, name)) + return false; + if (except_mode && (boost::range::find(roles, name) == roles.end())) + except_roles.push_back(name); + else + roles.push_back(name); + } + + if (!ParserToken{TokenType::Comma}.ignore(pos, expected)) + break; + } + + if (all_roles) + { + current_user = false; + roles.clear(); + } + + auto result = std::make_shared(); + result->roles = std::move(roles); + result->current_user = current_user; + result->all_roles = all_roles; + result->except_roles = std::move(except_roles); + result->except_current_user = except_current_user; + node = result; + return true; +} + +} diff --git a/dbms/src/Parsers/ParserRoleList.h b/dbms/src/Parsers/ParserRoleList.h new file mode 100644 index 00000000000..eca205a748c --- /dev/null +++ b/dbms/src/Parsers/ParserRoleList.h @@ -0,0 +1,18 @@ +#pragma once + +#include + + +namespace DB +{ +/** Parses a string like this: + * {role|CURRENT_USER} [,...] | NONE | ALL | ALL EXCEPT {role|CURRENT_USER} [,...] + */ +class ParserRoleList : public IParserBase +{ +protected: + const char * getName() const { return "RoleList"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected); +}; + +} diff --git a/dbms/src/Parsers/ParserShowCreateAccessEntityQuery.cpp b/dbms/src/Parsers/ParserShowCreateAccessEntityQuery.cpp new file mode 100644 index 00000000000..661330ffd0b --- /dev/null +++ b/dbms/src/Parsers/ParserShowCreateAccessEntityQuery.cpp @@ -0,0 +1,47 @@ +#include +#include +#include +#include + + +namespace DB +{ +bool ParserShowCreateAccessEntityQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (!ParserKeyword{"SHOW CREATE"}.ignore(pos, expected)) + return false; + + using Kind = ASTShowCreateAccessEntityQuery::Kind; + Kind kind; + if (ParserKeyword{"QUOTA"}.ignore(pos, expected)) + kind = Kind::QUOTA; + else + return false; + + String name; + bool current_quota = false; + + if ((kind == Kind::QUOTA) && ParserKeyword{"CURRENT"}.ignore(pos, expected)) + { + /// SHOW CREATE QUOTA CURRENT + current_quota = true; + } + else if (parseIdentifierOrStringLiteral(pos, expected, name)) + { + /// SHOW CREATE QUOTA name + } + else + { + /// SHOW CREATE QUOTA + current_quota = true; + } + + auto query = std::make_shared(kind); + node = query; + + query->name = std::move(name); + query->current_quota = current_quota; + + return true; +} +} diff --git a/dbms/src/Parsers/ParserShowCreateAccessEntityQuery.h b/dbms/src/Parsers/ParserShowCreateAccessEntityQuery.h new file mode 100644 index 00000000000..4572b54de27 --- /dev/null +++ b/dbms/src/Parsers/ParserShowCreateAccessEntityQuery.h @@ -0,0 +1,17 @@ +#pragma once + +#include + + +namespace DB +{ +/** Parses queries like + * SHOW CREATE QUOTA [name | CURRENT] + */ +class ParserShowCreateAccessEntityQuery : public IParserBase +{ +protected: + const char * getName() const override { return "SHOW CREATE QUOTA query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; +} diff --git a/dbms/src/Parsers/ParserShowQuotasQuery.cpp b/dbms/src/Parsers/ParserShowQuotasQuery.cpp new file mode 100644 index 00000000000..69cbd352969 --- /dev/null +++ b/dbms/src/Parsers/ParserShowQuotasQuery.cpp @@ -0,0 +1,42 @@ +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +bool ParserShowQuotasQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + bool usage; + bool current; + if (ParserKeyword{"SHOW QUOTAS"}.ignore(pos, expected)) + { + usage = false; + current = false; + } + else if (ParserKeyword{"SHOW QUOTA USAGE"}.ignore(pos, expected)) + { + usage = true; + if (ParserKeyword{"ALL"}.ignore(pos, expected)) + { + current = false; + } + else + { + ParserKeyword{"CURRENT"}.ignore(pos, expected); + current = true; + } + } + else + return false; + + auto query = std::make_shared(); + query->usage = usage; + query->current = current; + node = query; + return true; +} +} diff --git a/dbms/src/Parsers/ParserShowQuotasQuery.h b/dbms/src/Parsers/ParserShowQuotasQuery.h new file mode 100644 index 00000000000..5b00b525f98 --- /dev/null +++ b/dbms/src/Parsers/ParserShowQuotasQuery.h @@ -0,0 +1,18 @@ +#pragma once + +#include + + +namespace DB +{ +/** Parses queries like + * SHOW QUOTAS + * SHOW QUOTA USAGE [CURRENT | ALL] + */ +class ParserShowQuotasQuery : public IParserBase +{ +protected: + const char * getName() const override { return "SHOW QUOTA query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; +} diff --git a/dbms/src/Parsers/ParserTablesInSelectQuery.cpp b/dbms/src/Parsers/ParserTablesInSelectQuery.cpp index 6b970b0565f..7e84925b203 100644 --- a/dbms/src/Parsers/ParserTablesInSelectQuery.cpp +++ b/dbms/src/Parsers/ParserTablesInSelectQuery.cpp @@ -137,6 +137,10 @@ bool ParserTablesInSelectQueryElement::parseImpl(Pos & pos, ASTPtr & node, Expec table_join->strictness = ASTTableJoin::Strictness::All; else if (ParserKeyword("ASOF").ignore(pos)) table_join->strictness = ASTTableJoin::Strictness::Asof; + else if (ParserKeyword("SEMI").ignore(pos)) + table_join->strictness = ASTTableJoin::Strictness::Semi; + else if (ParserKeyword("ANTI").ignore(pos) || ParserKeyword("ONLY").ignore(pos)) + table_join->strictness = ASTTableJoin::Strictness::Anti; else table_join->strictness = ASTTableJoin::Strictness::Unspecified; @@ -153,13 +157,21 @@ bool ParserTablesInSelectQueryElement::parseImpl(Pos & pos, ASTPtr & node, Expec else { /// Use INNER by default as in another DBMS. - table_join->kind = ASTTableJoin::Kind::Inner; + if (table_join->strictness == ASTTableJoin::Strictness::Semi || + table_join->strictness == ASTTableJoin::Strictness::Anti) + table_join->kind = ASTTableJoin::Kind::Left; + else + table_join->kind = ASTTableJoin::Kind::Inner; } if (table_join->strictness != ASTTableJoin::Strictness::Unspecified && table_join->kind == ASTTableJoin::Kind::Cross) throw Exception("You must not specify ANY or ALL for CROSS JOIN.", ErrorCodes::SYNTAX_ERROR); + if ((table_join->strictness == ASTTableJoin::Strictness::Semi || table_join->strictness == ASTTableJoin::Strictness::Anti) && + (table_join->kind != ASTTableJoin::Kind::Left && table_join->kind != ASTTableJoin::Kind::Right)) + throw Exception("SEMI|ANTI JOIN should be LEFT or RIGHT.", ErrorCodes::SYNTAX_ERROR); + /// Optional OUTER keyword for outer joins. if (table_join->kind == ASTTableJoin::Kind::Left || table_join->kind == ASTTableJoin::Kind::Right diff --git a/dbms/src/Parsers/parseIntervalKind.cpp b/dbms/src/Parsers/parseIntervalKind.cpp new file mode 100644 index 00000000000..7d36133e81c --- /dev/null +++ b/dbms/src/Parsers/parseIntervalKind.cpp @@ -0,0 +1,68 @@ +#include +#include +#include + + +namespace DB +{ +bool parseIntervalKind(IParser::Pos & pos, Expected & expected, IntervalKind & result) +{ + if (ParserKeyword("SECOND").ignore(pos, expected) || ParserKeyword("SQL_TSI_SECOND").ignore(pos, expected) + || ParserKeyword("SS").ignore(pos, expected) || ParserKeyword("S").ignore(pos, expected)) + { + result = IntervalKind::Second; + return true; + } + + if (ParserKeyword("MINUTE").ignore(pos, expected) || ParserKeyword("SQL_TSI_MINUTE").ignore(pos, expected) + || ParserKeyword("MI").ignore(pos, expected) || ParserKeyword("N").ignore(pos, expected)) + { + result = IntervalKind::Minute; + return true; + } + + if (ParserKeyword("HOUR").ignore(pos, expected) || ParserKeyword("SQL_TSI_HOUR").ignore(pos, expected) + || ParserKeyword("HH").ignore(pos, expected)) + { + result = IntervalKind::Hour; + return true; + } + + if (ParserKeyword("DAY").ignore(pos, expected) || ParserKeyword("SQL_TSI_DAY").ignore(pos, expected) + || ParserKeyword("DD").ignore(pos, expected) || ParserKeyword("D").ignore(pos, expected)) + { + result = IntervalKind::Day; + return true; + } + + if (ParserKeyword("WEEK").ignore(pos, expected) || ParserKeyword("SQL_TSI_WEEK").ignore(pos, expected) + || ParserKeyword("WK").ignore(pos, expected) || ParserKeyword("WW").ignore(pos, expected)) + { + result = IntervalKind::Week; + return true; + } + + if (ParserKeyword("MONTH").ignore(pos, expected) || ParserKeyword("SQL_TSI_MONTH").ignore(pos, expected) + || ParserKeyword("MM").ignore(pos, expected) || ParserKeyword("M").ignore(pos, expected)) + { + result = IntervalKind::Month; + return true; + } + + if (ParserKeyword("QUARTER").ignore(pos, expected) || ParserKeyword("SQL_TSI_QUARTER").ignore(pos, expected) + || ParserKeyword("QQ").ignore(pos, expected) || ParserKeyword("Q").ignore(pos, expected)) + { + result = IntervalKind::Quarter; + return true; + } + + if (ParserKeyword("YEAR").ignore(pos, expected) || ParserKeyword("SQL_TSI_YEAR").ignore(pos, expected) + || ParserKeyword("YYYY").ignore(pos, expected) || ParserKeyword("YY").ignore(pos, expected)) + { + result = IntervalKind::Year; + return true; + } + + return false; +} +} diff --git a/dbms/src/Parsers/parseIntervalKind.h b/dbms/src/Parsers/parseIntervalKind.h new file mode 100644 index 00000000000..59f2824dfe2 --- /dev/null +++ b/dbms/src/Parsers/parseIntervalKind.h @@ -0,0 +1,11 @@ +#pragma once + +#include +#include + + +namespace DB +{ +/// Parses an interval kind. +bool parseIntervalKind(IParser::Pos & pos, Expected & expected, IntervalKind & result); +} diff --git a/dbms/src/Processors/Executors/PipelineExecutor.cpp b/dbms/src/Processors/Executors/PipelineExecutor.cpp index 8892418d0dc..9013b83486a 100644 --- a/dbms/src/Processors/Executors/PipelineExecutor.cpp +++ b/dbms/src/Processors/Executors/PipelineExecutor.cpp @@ -7,7 +7,6 @@ #include #include -#include #include #include #include @@ -52,26 +51,25 @@ bool PipelineExecutor::addEdges(UInt64 node) const IProcessor * cur = graph[node].processor; - auto add_edge = [&](auto & from_port, const IProcessor * to_proc, Edges & edges) + auto add_edge = [&](auto & from_port, const IProcessor * to_proc, Edges & edges, + bool is_backward, UInt64 input_port_number, UInt64 output_port_number, + std::vector * update_list) { auto it = processors_map.find(to_proc); if (it == processors_map.end()) throwUnknownProcessor(to_proc, cur, true); UInt64 proc_num = it->second; - Edge * edge_ptr = nullptr; for (auto & edge : edges) - if (edge.to == proc_num) - edge_ptr = &edge; - - if (!edge_ptr) { - edge_ptr = &edges.emplace_back(); - edge_ptr->to = proc_num; + if (edge.to == proc_num) + throw Exception("Multiple edges are not allowed for the same processors.", ErrorCodes::LOGICAL_ERROR); } - from_port.setVersion(&edge_ptr->version); + auto & edge = edges.emplace_back(proc_num, is_backward, input_port_number, output_port_number, update_list); + + from_port.setUpdateInfo(&edge.update_info); }; bool was_edge_added = false; @@ -83,10 +81,11 @@ bool PipelineExecutor::addEdges(UInt64 node) { was_edge_added = true; - for (auto it = std::next(inputs.begin(), from_input); it != inputs.end(); ++it) + for (auto it = std::next(inputs.begin(), from_input); it != inputs.end(); ++it, ++from_input) { const IProcessor * proc = &it->getOutputPort().getProcessor(); - add_edge(*it, proc, graph[node].backEdges); + auto output_port_number = proc->getOutputPortNumber(&it->getOutputPort()); + add_edge(*it, proc, graph[node].backEdges, true, from_input, output_port_number, &graph[node].post_updated_input_ports); } } @@ -97,10 +96,11 @@ bool PipelineExecutor::addEdges(UInt64 node) { was_edge_added = true; - for (auto it = std::next(outputs.begin(), from_output); it != outputs.end(); ++it) + for (auto it = std::next(outputs.begin(), from_output); it != outputs.end(); ++it, ++from_output) { const IProcessor * proc = &it->getInputPort().getProcessor(); - add_edge(*it, proc, graph[node].directEdges); + auto input_port_number = proc->getInputPortNumber(&it->getInputPort()); + add_edge(*it, proc, graph[node].directEdges, false, input_port_number, from_output, &graph[node].post_updated_output_ports); } } @@ -131,6 +131,7 @@ void PipelineExecutor::addChildlessProcessorsToStack(Stack & stack) if (graph[proc].directEdges.empty()) { stack.push(proc); + /// do not lock mutex, as this function is executedin single thread graph[proc].status = ExecStatus::Preparing; } } @@ -195,9 +196,20 @@ void PipelineExecutor::expandPipeline(Stack & stack, UInt64 pid) UInt64 num_processors = processors.size(); for (UInt64 node = 0; node < num_processors; ++node) { + size_t num_direct_edges = graph[node].directEdges.size(); + size_t num_back_edges = graph[node].backEdges.size(); + if (addEdges(node)) { - if (graph[node].status == ExecStatus::Idle || graph[node].status == ExecStatus::New) + std::lock_guard guard(graph[node].status_mutex); + + for (; num_back_edges < graph[node].backEdges.size(); ++num_back_edges) + graph[node].updated_input_ports.emplace_back(num_back_edges); + + for (; num_direct_edges < graph[node].directEdges.size(); ++num_direct_edges) + graph[node].updated_output_ports.emplace_back(num_direct_edges); + + if (graph[node].status == ExecStatus::Idle) { graph[node].status = ExecStatus::Preparing; stack.push(node); @@ -212,34 +224,26 @@ bool PipelineExecutor::tryAddProcessorToStackIfUpdated(Edge & edge, Stack & stac auto & node = graph[edge.to]; - ExecStatus status = node.status.load(); + std::lock_guard guard(node.status_mutex); - /// Don't add processor if nothing was read from port. - if (status != ExecStatus::New && edge.version == edge.prev_version) - return false; + ExecStatus status = node.status; if (status == ExecStatus::Finished) return false; - /// Signal that node need to be prepared. - node.need_to_be_prepared = true; - edge.prev_version = edge.version; + if (edge.backward) + node.updated_output_ports.push_back(edge.output_port_number); + else + node.updated_input_ports.push_back(edge.input_port_number); - /// Try to get ownership for node. - - /// Assume that current status is New or Idle. Otherwise, can't prepare node. - if (status != ExecStatus::New) - status = ExecStatus::Idle; - - /// Statuses but New and Idle are not interesting because they own node. - /// Prepare will be called in owning thread before changing status. - while (!node.status.compare_exchange_weak(status, ExecStatus::Preparing)) - if (!(status == ExecStatus::New || status == ExecStatus::Idle) || !node.need_to_be_prepared) - return false; - - stack.push(edge.to); - return true; + if (status == ExecStatus::Idle) + { + node.status = ExecStatus::Preparing; + stack.push(edge.to); + return true; + } + return false; } bool PipelineExecutor::prepareProcessor(UInt64 pid, Stack & children, Stack & parents, size_t thread_number, bool async) @@ -247,105 +251,117 @@ bool PipelineExecutor::prepareProcessor(UInt64 pid, Stack & children, Stack & pa /// In this method we have ownership on node. auto & node = graph[pid]; + bool need_traverse = false; + bool need_expand_pipeline = false; + + std::vector updated_back_edges; + std::vector updated_direct_edges; + { /// Stopwatch watch; - /// Disable flag before prepare call. Otherwise, we can skip prepare request. - /// Prepare can be called more times than needed, but it's ok. - node.need_to_be_prepared = false; + std::lock_guard guard(node.status_mutex); - auto status = node.processor->prepare(); + auto status = node.processor->prepare(node.updated_input_ports, node.updated_output_ports); + node.updated_input_ports.clear(); + node.updated_output_ports.clear(); /// node.execution_state->preparation_time_ns += watch.elapsed(); node.last_processor_status = status; - } - auto add_neighbours_to_prepare_queue = [&] () - { - for (auto & edge : node.backEdges) - tryAddProcessorToStackIfUpdated(edge, parents); - - for (auto & edge : node.directEdges) - tryAddProcessorToStackIfUpdated(edge, children); - }; - - auto try_release_ownership = [&] () - { - /// This function can be called after expand pipeline, where node from outer scope is not longer valid. - auto & node_ = graph[pid]; - ExecStatus expected = ExecStatus::Idle; - node_.status = ExecStatus::Idle; - - if (node_.need_to_be_prepared) + switch (node.last_processor_status) { - while (!node_.status.compare_exchange_weak(expected, ExecStatus::Preparing)) - if (!(expected == ExecStatus::Idle) || !node_.need_to_be_prepared) - return; - - children.push(pid); - } - }; - - switch (node.last_processor_status) - { - case IProcessor::Status::NeedData: - case IProcessor::Status::PortFull: - { - add_neighbours_to_prepare_queue(); - try_release_ownership(); - - break; - } - case IProcessor::Status::Finished: - { - add_neighbours_to_prepare_queue(); - node.status = ExecStatus::Finished; - break; - } - case IProcessor::Status::Ready: - { - node.status = ExecStatus::Executing; - return true; - } - case IProcessor::Status::Async: - { - throw Exception("Async is temporary not supported.", ErrorCodes::LOGICAL_ERROR); + case IProcessor::Status::NeedData: + case IProcessor::Status::PortFull: + { + need_traverse = true; + node.status = ExecStatus::Idle; + break; + } + case IProcessor::Status::Finished: + { + need_traverse = true; + node.status = ExecStatus::Finished; + break; + } + case IProcessor::Status::Ready: + { + node.status = ExecStatus::Executing; + return true; + } + case IProcessor::Status::Async: + { + throw Exception("Async is temporary not supported.", ErrorCodes::LOGICAL_ERROR); // node.status = ExecStatus::Executing; // addAsyncJob(pid); // break; - } - case IProcessor::Status::Wait: - { - if (!async) - throw Exception("Processor returned status Wait before Async.", ErrorCodes::LOGICAL_ERROR); - break; - } - case IProcessor::Status::ExpandPipeline: - { - executor_contexts[thread_number]->task_list.emplace_back( - node.execution_state.get(), - &parents - ); - - ExpandPipelineTask * desired = &executor_contexts[thread_number]->task_list.back(); - ExpandPipelineTask * expected = nullptr; - - while (!expand_pipeline_task.compare_exchange_strong(expected, desired)) + } + case IProcessor::Status::Wait: { - doExpandPipeline(expected, true); - expected = nullptr; + if (!async) + throw Exception("Processor returned status Wait before Async.", ErrorCodes::LOGICAL_ERROR); + break; + } + case IProcessor::Status::ExpandPipeline: + { + need_expand_pipeline = true; + break; + } + } + + if (need_traverse) + { + for (auto & edge_id : node.post_updated_input_ports) + { + auto edge = static_cast(edge_id); + updated_back_edges.emplace_back(edge); + edge->update_info.trigger(); } - doExpandPipeline(desired, true); + for (auto & edge_id : node.post_updated_output_ports) + { + auto edge = static_cast(edge_id); + updated_direct_edges.emplace_back(edge); + edge->update_info.trigger(); + } - /// node is not longer valid after pipeline was expanded - graph[pid].need_to_be_prepared = true; - try_release_ownership(); - break; + node.post_updated_input_ports.clear(); + node.post_updated_output_ports.clear(); } } + if (need_traverse) + { + for (auto & edge : updated_back_edges) + tryAddProcessorToStackIfUpdated(*edge, parents); + + for (auto & edge : updated_direct_edges) + tryAddProcessorToStackIfUpdated(*edge, children); + } + + if (need_expand_pipeline) + { + executor_contexts[thread_number]->task_list.emplace_back( + node.execution_state.get(), + &parents + ); + + ExpandPipelineTask * desired = &executor_contexts[thread_number]->task_list.back(); + ExpandPipelineTask * expected = nullptr; + + while (!expand_pipeline_task.compare_exchange_strong(expected, desired)) + { + doExpandPipeline(expected, true); + expected = nullptr; + } + + doExpandPipeline(desired, true); + + /// Add itself back to be prepared again. + children.push(pid); + } + return false; } @@ -427,7 +443,7 @@ void PipelineExecutor::execute(size_t num_threads) bool all_processors_finished = true; for (auto & node : graph) - if (node.status != ExecStatus::Finished) + if (node.status != ExecStatus::Finished) /// Single thread, do not hold mutex all_processors_finished = false; if (!all_processors_finished) diff --git a/dbms/src/Processors/Executors/PipelineExecutor.h b/dbms/src/Processors/Executors/PipelineExecutor.h index b5e3c7a0e1e..aded3de3008 100644 --- a/dbms/src/Processors/Executors/PipelineExecutor.h +++ b/dbms/src/Processors/Executors/PipelineExecutor.h @@ -43,12 +43,23 @@ private: struct Edge { + Edge(UInt64 to_, bool backward_, + UInt64 input_port_number_, UInt64 output_port_number_, std::vector * update_list) + : to(to_), backward(backward_) + , input_port_number(input_port_number_), output_port_number(output_port_number_) + { + update_info.update_list = update_list; + update_info.id = this; + } + UInt64 to = std::numeric_limits::max(); + bool backward; + UInt64 input_port_number; + UInt64 output_port_number; /// Edge version is increased when port's state is changed (e.g. when data is pushed). See Port.h for details. /// To compare version with prev_version we can decide if neighbour processor need to be prepared. - UInt64 version = 0; - UInt64 prev_version = 0; + Port::UpdateInfo update_info; }; /// Use std::list because new ports can be added to processor during execution. @@ -58,7 +69,6 @@ private: /// Can be owning or not. Owning means that executor who set this status can change node's data and nobody else can. enum class ExecStatus { - New, /// prepare wasn't called yet. Initial state. Non-owning. Idle, /// prepare returned NeedData or PortFull. Non-owning. Preparing, /// some executor is preparing processor, or processor is in task_queue. Owning. Executing, /// prepare returned Ready and task is executing. Owning. @@ -87,17 +97,22 @@ private: Edges directEdges; Edges backEdges; - std::atomic status; - /// This flag can be set by any executor. - /// When enabled, any executor can try to atomically set Preparing state to status. - std::atomic_bool need_to_be_prepared; + ExecStatus status; + std::mutex status_mutex; + + std::vector post_updated_input_ports; + std::vector post_updated_output_ports; + /// Last state for profiling. IProcessor::Status last_processor_status = IProcessor::Status::NeedData; std::unique_ptr execution_state; + IProcessor::PortNumbers updated_input_ports; + IProcessor::PortNumbers updated_output_ports; + Node(IProcessor * processor_, UInt64 processor_id) - : processor(processor_), status(ExecStatus::New), need_to_be_prepared(false) + : processor(processor_), status(ExecStatus::Idle) { execution_state = std::make_unique(); execution_state->processor = processor; @@ -105,8 +120,8 @@ private: } Node(Node && other) noexcept - : processor(other.processor), status(other.status.load()) - , need_to_be_prepared(other.need_to_be_prepared.load()), execution_state(std::move(other.execution_state)) + : processor(other.processor), status(other.status) + , execution_state(std::move(other.execution_state)) { } }; diff --git a/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.cpp b/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.cpp index 5d632bdcef5..0522e7a5323 100644 --- a/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.cpp +++ b/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.cpp @@ -198,7 +198,7 @@ void TreeExecutorBlockInputStream::setLimits(const IBlockInputStream::LocalLimit source->setLimits(limits_); } -void TreeExecutorBlockInputStream::setQuota(QuotaForIntervals & quota_) +void TreeExecutorBlockInputStream::setQuota(const std::shared_ptr & quota_) { for (auto & source : sources_with_progress) source->setQuota(quota_); diff --git a/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.h b/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.h index da1d60dd972..176fbd06af8 100644 --- a/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.h +++ b/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.h @@ -31,7 +31,7 @@ public: void setProgressCallback(const ProgressCallback & callback) final; void setProcessListElement(QueryStatus * elem) final; void setLimits(const LocalLimits & limits_) final; - void setQuota(QuotaForIntervals & quota_) final; + void setQuota(const std::shared_ptr & quota_) final; void addTotalRowsApprox(size_t value) final; protected: diff --git a/dbms/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp new file mode 100644 index 00000000000..d4530e7b09d --- /dev/null +++ b/dbms/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp @@ -0,0 +1,238 @@ +#include + +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INCORRECT_DATA; + extern const int CANNOT_READ_ALL_DATA; + extern const int LOGICAL_ERROR; +} + + +JSONCompactEachRowRowInputFormat::JSONCompactEachRowRowInputFormat(ReadBuffer & in_, + const Block & header_, + Params params_, + const FormatSettings & format_settings_, + bool with_names_) + : IRowInputFormat(header_, in_, std::move(params_)), format_settings(format_settings_), with_names(with_names_) +{ + /// In this format, BOM at beginning of stream cannot be confused with value, so it is safe to skip it. + skipBOMIfExists(in); + auto & sample = getPort().getHeader(); + size_t num_columns = sample.columns(); + + data_types.resize(num_columns); + column_indexes_by_names.reserve(num_columns); + + for (size_t i = 0; i < num_columns; ++i) + { + const auto & column_info = sample.getByPosition(i); + + data_types[i] = column_info.type; + column_indexes_by_names.emplace(column_info.name, i); + } +} + +void JSONCompactEachRowRowInputFormat::readPrefix() +{ + if (with_names) + { + size_t num_columns = getPort().getHeader().columns(); + read_columns.assign(num_columns, false); + + assertChar('[', in); + do + { + skipWhitespaceIfAny(in); + String column_name; + readJSONString(column_name, in); + addInputColumn(column_name); + skipWhitespaceIfAny(in); + } + while (checkChar(',', in)); + assertChar(']', in); + skipEndOfLine(); + + /// Type checking + assertChar('[', in); + for (size_t i = 0; i < column_indexes_for_input_fields.size(); ++i) + { + skipWhitespaceIfAny(in); + String data_type; + readJSONString(data_type, in); + + if (column_indexes_for_input_fields[i] && + data_types[*column_indexes_for_input_fields[i]]->getName() != data_type) + { + throw Exception( + "Type of '" + getPort().getHeader().getByPosition(*column_indexes_for_input_fields[i]).name + + "' must be " + data_types[*column_indexes_for_input_fields[i]]->getName() + + ", not " + data_type, + ErrorCodes::INCORRECT_DATA + ); + } + + if (i != column_indexes_for_input_fields.size() - 1) + assertChar(',', in); + skipWhitespaceIfAny(in); + } + assertChar(']', in); + } + else + { + size_t num_columns = getPort().getHeader().columns(); + read_columns.assign(num_columns, true); + column_indexes_for_input_fields.resize(num_columns); + + for (size_t i = 0; i < num_columns; ++i) + { + column_indexes_for_input_fields[i] = i; + } + } + + for (size_t i = 0; i < read_columns.size(); ++i) + { + if (!read_columns[i]) + { + not_seen_columns.emplace_back(i); + } + } +} + +void JSONCompactEachRowRowInputFormat::addInputColumn(const String & column_name) +{ + names_of_columns.emplace_back(column_name); + + const auto column_it = column_indexes_by_names.find(column_name); + if (column_it == column_indexes_by_names.end()) + { + if (format_settings.skip_unknown_fields) + { + column_indexes_for_input_fields.push_back(std::nullopt); + return; + } + + throw Exception( + "Unknown field found in JSONCompactEachRow header: '" + column_name + "' " + + "at position " + std::to_string(column_indexes_for_input_fields.size()) + + "\nSet the 'input_format_skip_unknown_fields' parameter explicitly to ignore and proceed", + ErrorCodes::INCORRECT_DATA + ); + } + + const auto column_index = column_it->second; + + if (read_columns[column_index]) + throw Exception("Duplicate field found while parsing JSONCompactEachRow header: " + column_name, ErrorCodes::INCORRECT_DATA); + + read_columns[column_index] = true; + column_indexes_for_input_fields.emplace_back(column_index); +} + +bool JSONCompactEachRowRowInputFormat::readRow(DB::MutableColumns &columns, DB::RowReadExtension &ext) +{ + skipEndOfLine(); + + if (in.eof()) + return false; + + size_t num_columns = columns.size(); + + read_columns.assign(num_columns, false); + + assertChar('[', in); + for (size_t file_column = 0; file_column < column_indexes_for_input_fields.size(); ++file_column) + { + const auto & table_column = column_indexes_for_input_fields[file_column]; + if (table_column) + { + readField(*table_column, columns); + } + else + { + skipJSONField(in, StringRef(names_of_columns[file_column])); + } + + skipWhitespaceIfAny(in); + if (in.eof()) + throw Exception("Unexpected end of stream while parsing JSONCompactEachRow format", ErrorCodes::CANNOT_READ_ALL_DATA); + if (file_column + 1 != column_indexes_for_input_fields.size()) + { + assertChar(',', in); + skipWhitespaceIfAny(in); + } + } + assertChar(']', in); + + for (size_t i = 0; i < not_seen_columns.size(); i++) + { + columns[not_seen_columns[i]]->insertDefault(); + } + + ext.read_columns = read_columns; + return true; +} + +void JSONCompactEachRowRowInputFormat::skipEndOfLine() +{ + skipWhitespaceIfAny(in); + if (!in.eof() && (*in.position() == ',' || *in.position() == ';')) + ++in.position(); + + skipWhitespaceIfAny(in); +} + +void JSONCompactEachRowRowInputFormat::readField(size_t index, MutableColumns & columns) +{ + try + { + read_columns[index] = true; + const auto & type = data_types[index]; + if (format_settings.null_as_default && !type->isNullable()) + read_columns[index] = DataTypeNullable::deserializeTextJSON(*columns[index], in, format_settings, type); + else + type->deserializeAsTextJSON(*columns[index], in, format_settings); + } + catch (Exception & e) + { + e.addMessage("(while read the value of key " + getPort().getHeader().getByPosition(index).name + ")"); + throw; + } +} + +void JSONCompactEachRowRowInputFormat::syncAfterError() +{ + skipToUnescapedNextLineOrEOF(in); +} + +void registerInputFormatProcessorJSONCompactEachRow(FormatFactory & factory) +{ + factory.registerInputFormatProcessor("JSONCompactEachRow", []( + ReadBuffer & buf, + const Block & sample, + const Context &, + IRowInputFormat::Params params, + const FormatSettings & settings) + { + return std::make_shared(buf, sample, std::move(params), settings, false); + }); + + factory.registerInputFormatProcessor("JSONCompactEachRowWithNamesAndTypes", []( + ReadBuffer & buf, + const Block & sample, + const Context &, + IRowInputFormat::Params params, + const FormatSettings & settings) + { + return std::make_shared(buf, sample, std::move(params), settings, true); + }); +} + +} diff --git a/dbms/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h b/dbms/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h new file mode 100644 index 00000000000..e633475d0f4 --- /dev/null +++ b/dbms/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h @@ -0,0 +1,54 @@ +#pragma once + +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +class ReadBuffer; + +/** A stream for reading data in JSONCompactEachRow and JSONCompactEachRowWithNamesAndTypes formats +*/ +class JSONCompactEachRowRowInputFormat : public IRowInputFormat +{ +public: + JSONCompactEachRowRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSettings & format_settings_, bool with_names_); + + String getName() const override { return "JSONCompactEachRowRowInputFormat"; } + + + void readPrefix() override; + bool readRow(MutableColumns & columns, RowReadExtension & ext) override; + bool allowSyncAfterError() const override { return true; } + void syncAfterError() override; + + +private: + void addInputColumn(const String & column_name); + void skipEndOfLine(); + void readField(size_t index, MutableColumns & columns); + + const FormatSettings format_settings; + + using IndexesMap = std::unordered_map; + IndexesMap column_indexes_by_names; + + using OptionalIndexes = std::vector>; + OptionalIndexes column_indexes_for_input_fields; + + DataTypes data_types; + std::vector read_columns; + std::vector not_seen_columns; + + /// This is for the correct exceptions in skipping unknown fields. + std::vector names_of_columns; + + bool with_names; +}; + +} diff --git a/dbms/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp new file mode 100644 index 00000000000..433cc4515ae --- /dev/null +++ b/dbms/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp @@ -0,0 +1,116 @@ +#include +#include +#include +#include + + +namespace DB +{ + + +JSONCompactEachRowRowOutputFormat::JSONCompactEachRowRowOutputFormat(WriteBuffer & out_, + const Block & header_, + FormatFactory::WriteCallback callback, + const FormatSettings & settings_, + bool with_names_) + : IRowOutputFormat(header_, out_, callback), settings(settings_), with_names(with_names_) +{ + auto & sample = getPort(PortKind::Main).getHeader(); + NamesAndTypesList columns(sample.getNamesAndTypesList()); + fields.assign(columns.begin(), columns.end()); +} + + +void JSONCompactEachRowRowOutputFormat::writeField(const IColumn & column, const IDataType & type, size_t row_num) +{ + type.serializeAsTextJSON(column, row_num, out, settings); +} + + +void JSONCompactEachRowRowOutputFormat::writeFieldDelimiter() +{ + writeCString(", ", out); +} + + +void JSONCompactEachRowRowOutputFormat::writeRowStartDelimiter() +{ + writeChar('[', out); +} + + +void JSONCompactEachRowRowOutputFormat::writeRowEndDelimiter() +{ + writeCString("]\n", out); +} + +void JSONCompactEachRowRowOutputFormat::writeTotals(const Columns & columns, size_t row_num) +{ + writeChar('\n', out); + size_t num_columns = columns.size(); + writeChar('[', out); + for (size_t i = 0; i < num_columns; ++i) + { + if (i != 0) + JSONCompactEachRowRowOutputFormat::writeFieldDelimiter(); + + JSONCompactEachRowRowOutputFormat::writeField(*columns[i], *types[i], row_num); + } + writeCString("]\n", out); +} + +void JSONCompactEachRowRowOutputFormat::writePrefix() +{ + if (with_names) + { + writeChar('[', out); + for (size_t i = 0; i < fields.size(); ++i) + { + writeChar('\"', out); + writeString(fields[i].name, out); + writeChar('\"', out); + if (i != fields.size() - 1) + writeCString(", ", out); + } + writeCString("]\n[", out); + for (size_t i = 0; i < fields.size(); ++i) + { + writeJSONString(fields[i].type->getName(), out, settings); + if (i != fields.size() - 1) + writeCString(", ", out); + } + writeCString("]\n", out); + } +} + +void JSONCompactEachRowRowOutputFormat::consumeTotals(DB::Chunk chunk) +{ + if (with_names) + IRowOutputFormat::consumeTotals(std::move(chunk)); +} + +void registerOutputFormatProcessorJSONCompactEachRow(FormatFactory & factory) +{ + factory.registerOutputFormatProcessor("JSONCompactEachRow", []( + WriteBuffer & buf, + const Block & sample, + const Context &, + FormatFactory::WriteCallback callback, + const FormatSettings & format_settings) + { + return std::make_shared(buf, sample, callback, format_settings, false); + }); + + factory.registerOutputFormatProcessor("JSONCompactEachRowWithNamesAndTypes", []( + WriteBuffer &buf, + const Block &sample, + const Context &, + FormatFactory::WriteCallback callback, + const FormatSettings &format_settings) + { + return std::make_shared(buf, sample, callback, format_settings, true); + }); +} + + +} diff --git a/dbms/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.h b/dbms/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.h new file mode 100644 index 00000000000..a7857a82d2d --- /dev/null +++ b/dbms/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.h @@ -0,0 +1,45 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB +{ + +/** The stream for outputting data in JSON format, by object per line. + * Does not validate UTF-8. + */ +class JSONCompactEachRowRowOutputFormat : public IRowOutputFormat +{ +public: + JSONCompactEachRowRowOutputFormat(WriteBuffer & out_, const Block & header_, FormatFactory::WriteCallback callback, const FormatSettings & settings_, bool with_names); + + String getName() const override { return "JSONCompactEachRowRowOutputFormat"; } + + void writePrefix() override; + + void writeBeforeTotals() override {} + void writeTotals(const Columns & columns, size_t row_num) override; + void writeAfterTotals() override {} + + void writeField(const IColumn & column, const IDataType & type, size_t row_num) override; + void writeFieldDelimiter() override; + void writeRowStartDelimiter() override; + void writeRowEndDelimiter() override; + +protected: + void consumeTotals(Chunk) override; + /// No extremes. + void consumeExtremes(Chunk) override {} + +private: + FormatSettings settings; + + NamesAndTypes fields; + + bool with_names; +}; +} diff --git a/dbms/src/Processors/IProcessor.h b/dbms/src/Processors/IProcessor.h index ed59f4e591d..852bde2d467 100644 --- a/dbms/src/Processors/IProcessor.h +++ b/dbms/src/Processors/IProcessor.h @@ -171,7 +171,15 @@ public: * - method 'prepare' cannot be executed in parallel even for different objects, * if they are connected (including indirectly) to each other by their ports; */ - virtual Status prepare() = 0; + virtual Status prepare() + { + throw Exception("Method 'prepare' is not implemented for " + getName() + " processor", ErrorCodes::NOT_IMPLEMENTED); + } + + using PortNumbers = std::vector; + + /// Optimization for prepare in case we know ports were updated. + virtual Status prepare(const PortNumbers & /*updated_input_ports*/, const PortNumbers & /*updated_output_ports*/) { return prepare(); } /** You may call this method if 'prepare' returned Ready. * This method cannot access any ports. It should use only data that was prepared by 'prepare' method. @@ -183,11 +191,6 @@ public: throw Exception("Method 'work' is not implemented for " + getName() + " processor", ErrorCodes::NOT_IMPLEMENTED); } - virtual void work(size_t /*thread_num*/) - { - work(); - } - /** You may call this method if 'prepare' returned Async. * This method cannot access any ports. It should use only data that was prepared by 'prepare' method. * @@ -226,6 +229,34 @@ public: auto & getInputs() { return inputs; } auto & getOutputs() { return outputs; } + UInt64 getInputPortNumber(const InputPort * input_port) const + { + UInt64 number = 0; + for (auto & port : inputs) + { + if (&port == input_port) + return number; + + ++number; + } + + throw Exception("Can't find input port for " + getName() + " processor", ErrorCodes::LOGICAL_ERROR); + } + + UInt64 getOutputPortNumber(const OutputPort * output_port) const + { + UInt64 number = 0; + for (auto & port : outputs) + { + if (&port == output_port) + return number; + + ++number; + } + + throw Exception("Can't find output port for " + getName() + " processor", ErrorCodes::LOGICAL_ERROR); + } + const auto & getInputs() const { return inputs; } const auto & getOutputs() const { return outputs; } diff --git a/dbms/src/Processors/Pipe.cpp b/dbms/src/Processors/Pipe.cpp index b31cfd58848..17b44a48ea1 100644 --- a/dbms/src/Processors/Pipe.cpp +++ b/dbms/src/Processors/Pipe.cpp @@ -97,7 +97,7 @@ void Pipe::setLimits(const ISourceWithProgress::LocalLimits & limits) } } -void Pipe::setQuota(QuotaForIntervals & quota) +void Pipe::setQuota(const std::shared_ptr & quota) { for (auto & processor : processors) { diff --git a/dbms/src/Processors/Pipe.h b/dbms/src/Processors/Pipe.h index 72cb90c4b9e..d734c89f485 100644 --- a/dbms/src/Processors/Pipe.h +++ b/dbms/src/Processors/Pipe.h @@ -8,8 +8,6 @@ namespace DB class Pipe; using Pipes = std::vector; -class QuotaForIntervals; - /// Pipe is a set of processors which represents the part of pipeline with single output. /// All processors in pipe are connected. All ports are connected except the output one. class Pipe @@ -39,7 +37,7 @@ public: /// Specify quotas and limits for every ISourceWithProgress. void setLimits(const SourceWithProgress::LocalLimits & limits); - void setQuota(QuotaForIntervals & quota); + void setQuota(const std::shared_ptr & quota); /// Set information about preferred executor number for sources. void pinSources(size_t executor_number); diff --git a/dbms/src/Processors/Port.h b/dbms/src/Processors/Port.h index 37d1ea9bd46..ff5d1d8dee0 100644 --- a/dbms/src/Processors/Port.h +++ b/dbms/src/Processors/Port.h @@ -28,6 +28,25 @@ class Port friend void connect(OutputPort &, InputPort &); friend class IProcessor; +public: + struct UpdateInfo + { + std::vector * update_list = nullptr; + void * id = nullptr; + UInt64 version = 0; + UInt64 prev_version = 0; + + void inline ALWAYS_INLINE update() + { + if (version == prev_version && update_list) + update_list->push_back(id); + + ++version; + } + + void inline ALWAYS_INLINE trigger() { prev_version = version; } + }; + protected: /// Shared state of two connected ports. class State @@ -182,12 +201,17 @@ protected: IProcessor * processor = nullptr; + /// If update_info was set, will call update() for it in case port's state have changed. + UpdateInfo * update_info = nullptr; + public: using Data = State::Data; Port(Block header_) : header(std::move(header_)) {} Port(Block header_, IProcessor * processor_) : header(std::move(header_)), processor(processor_) {} + void setUpdateInfo(UpdateInfo * info) { update_info = info; } + const Block & getHeader() const { return header; } bool ALWAYS_INLINE isConnected() const { return state != nullptr; } @@ -216,6 +240,13 @@ public: throw Exception("Port does not belong to Processor", ErrorCodes::LOGICAL_ERROR); return *processor; } + +protected: + void inline ALWAYS_INLINE updateVersion() + { + if (likely(update_info)) + update_info->update(); + } }; /// Invariants: @@ -230,20 +261,14 @@ class InputPort : public Port private: OutputPort * output_port = nullptr; - /// If version was set, it will be increased on each pull. - UInt64 * version = nullptr; - mutable bool is_finished = false; public: using Port::Port; - void setVersion(UInt64 * value) { version = value; } - Data ALWAYS_INLINE pullData() { - if (version) - ++(*version); + updateVersion(); assumeConnected(); @@ -296,8 +321,8 @@ public: { assumeConnected(); - if ((state->setFlags(State::IS_NEEDED, State::IS_NEEDED) & State::IS_NEEDED) == 0 && version) - ++(*version); + if ((state->setFlags(State::IS_NEEDED, State::IS_NEEDED) & State::IS_NEEDED) == 0) + updateVersion(); } void ALWAYS_INLINE setNotNeeded() @@ -310,8 +335,8 @@ public: { assumeConnected(); - if ((state->setFlags(State::IS_FINISHED, State::IS_FINISHED) & State::IS_FINISHED) == 0 && version) - ++(*version); + if ((state->setFlags(State::IS_FINISHED, State::IS_FINISHED) & State::IS_FINISHED) == 0) + updateVersion(); is_finished = true; } @@ -353,14 +378,9 @@ class OutputPort : public Port private: InputPort * input_port = nullptr; - /// If version was set, it will be increased on each push. - UInt64 * version = nullptr; - public: using Port::Port; - void setVersion(UInt64 * value) { version = value; } - void ALWAYS_INLINE push(Chunk chunk) { pushData({.chunk = std::move(chunk), .exception = {}}); @@ -385,8 +405,7 @@ public: throw Exception(msg, ErrorCodes::LOGICAL_ERROR); } - if (version) - ++(*version); + updateVersion(); assumeConnected(); @@ -401,8 +420,8 @@ public: auto flags = state->setFlags(State::IS_FINISHED, State::IS_FINISHED); - if (version && (flags & State::IS_FINISHED) == 0) - ++(*version); + if ((flags & State::IS_FINISHED) == 0) + updateVersion(); } bool ALWAYS_INLINE isNeeded() const diff --git a/dbms/src/Processors/ResizeProcessor.cpp b/dbms/src/Processors/ResizeProcessor.cpp index b3cb3a1735d..59d1f0db75e 100644 --- a/dbms/src/Processors/ResizeProcessor.cpp +++ b/dbms/src/Processors/ResizeProcessor.cpp @@ -153,5 +153,109 @@ ResizeProcessor::Status ResizeProcessor::prepare() return get_status_if_no_inputs(); } +IProcessor::Status ResizeProcessor::prepare(const PortNumbers & updated_inputs, const PortNumbers & updated_outputs) +{ + if (!initialized) + { + initialized = true; + + for (auto & input : inputs) + { + input.setNeeded(); + input_ports.push_back({.port = &input, .status = InputStatus::NotActive}); + } + + for (auto & output : outputs) + output_ports.push_back({.port = &output, .status = OutputStatus::NotActive}); + } + + for (auto & output_number : updated_outputs) + { + auto & output = output_ports[output_number]; + if (output.port->isFinished()) + { + if (output.status != OutputStatus::Finished) + { + ++num_finished_outputs; + output.status = OutputStatus::Finished; + } + + continue; + } + + if (output.port->canPush()) + { + if (output.status != OutputStatus::NeedData) + { + output.status = OutputStatus::NeedData; + waiting_outputs.push(output_number); + } + } + } + + if (num_finished_outputs == outputs.size()) + { + for (auto & input : inputs) + input.close(); + + return Status::Finished; + } + + for (auto & input_number : updated_inputs) + { + auto & input = input_ports[input_number]; + if (input.port->isFinished()) + { + if (input.status != InputStatus::Finished) + { + input.status = InputStatus::Finished; + ++num_finished_inputs; + } + continue; + } + + if (input.port->hasData()) + { + if (input.status != InputStatus::HasData) + { + input.status = InputStatus::HasData; + inputs_with_data.push(input_number); + } + } + } + + while (!waiting_outputs.empty() && !inputs_with_data.empty()) + { + auto & waiting_output = output_ports[waiting_outputs.front()]; + waiting_outputs.pop(); + + auto & input_with_data = input_ports[inputs_with_data.front()]; + inputs_with_data.pop(); + + waiting_output.port->pushData(input_with_data.port->pullData()); + input_with_data.status = InputStatus::NotActive; + waiting_output.status = OutputStatus::NotActive; + + if (input_with_data.port->isFinished()) + { + input_with_data.status = InputStatus::Finished; + ++num_finished_inputs; + } + } + + if (num_finished_inputs == inputs.size()) + { + for (auto & output : outputs) + output.finish(); + + return Status::Finished; + } + + if (!waiting_outputs.empty()) + return Status::NeedData; + + return Status::PortFull; +} + } diff --git a/dbms/src/Processors/ResizeProcessor.h b/dbms/src/Processors/ResizeProcessor.h index 67574c384a1..3a9c906ecbd 100644 --- a/dbms/src/Processors/ResizeProcessor.h +++ b/dbms/src/Processors/ResizeProcessor.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB @@ -31,10 +32,46 @@ public: String getName() const override { return "Resize"; } Status prepare() override; + Status prepare(const PortNumbers &, const PortNumbers &) override; private: InputPorts::iterator current_input; OutputPorts::iterator current_output; + + size_t num_finished_inputs = 0; + size_t num_finished_outputs = 0; + std::queue waiting_outputs; + std::queue inputs_with_data; + bool initialized = false; + + enum class OutputStatus + { + NotActive, + NeedData, + Finished, + }; + + enum class InputStatus + { + NotActive, + HasData, + Finished, + }; + + struct InputPortWithStatus + { + InputPort * port; + InputStatus status; + }; + + struct OutputPortWithStatus + { + OutputPort * port; + OutputStatus status; + }; + + std::vector input_ports; + std::vector output_ports; }; } diff --git a/dbms/src/Processors/Sources/SourceFromInputStream.h b/dbms/src/Processors/Sources/SourceFromInputStream.h index 888439f15d5..8e750a33faf 100644 --- a/dbms/src/Processors/Sources/SourceFromInputStream.h +++ b/dbms/src/Processors/Sources/SourceFromInputStream.h @@ -25,7 +25,7 @@ public: /// Implementation for methods from ISourceWithProgress. void setLimits(const LocalLimits & limits_) final { stream->setLimits(limits_); } - void setQuota(QuotaForIntervals & quota_) final { stream->setQuota(quota_); } + void setQuota(const std::shared_ptr & quota_) final { stream->setQuota(quota_); } void setProcessListElement(QueryStatus * elem) final { stream->setProcessListElement(elem); } void setProgressCallback(const ProgressCallback & callback) final { stream->setProgressCallback(callback); } void addTotalRowsApprox(size_t value) final { stream->addTotalRowsApprox(value); } diff --git a/dbms/src/Processors/Sources/SourceWithProgress.cpp b/dbms/src/Processors/Sources/SourceWithProgress.cpp index 21f9d5ca9bb..fac2a53ea54 100644 --- a/dbms/src/Processors/Sources/SourceWithProgress.cpp +++ b/dbms/src/Processors/Sources/SourceWithProgress.cpp @@ -1,7 +1,7 @@ #include #include -#include +#include namespace DB { @@ -72,10 +72,8 @@ void SourceWithProgress::progress(const Progress & value) /// It is here for compatibility with IBlockInputsStream. limits.speed_limits.throttle(progress.read_rows, progress.read_bytes, total_rows, total_elapsed_microseconds); - if (quota != nullptr && limits.mode == LimitsMode::LIMITS_TOTAL) - { - quota->checkAndAddReadRowsBytes(time(nullptr), value.read_rows, value.read_bytes); - } + if (quota && limits.mode == LimitsMode::LIMITS_TOTAL) + quota->used({Quota::READ_ROWS, value.read_rows}, {Quota::READ_BYTES, value.read_bytes}); } } diff --git a/dbms/src/Processors/Sources/SourceWithProgress.h b/dbms/src/Processors/Sources/SourceWithProgress.h index 833e5eccb6f..59e8c6afa20 100644 --- a/dbms/src/Processors/Sources/SourceWithProgress.h +++ b/dbms/src/Processors/Sources/SourceWithProgress.h @@ -21,7 +21,7 @@ public: /// Set the quota. If you set a quota on the amount of raw data, /// then you should also set mode = LIMITS_TOTAL to LocalLimits with setLimits. - virtual void setQuota(QuotaForIntervals & quota_) = 0; + virtual void setQuota(const std::shared_ptr & quota_) = 0; /// Set the pointer to the process list item. /// General information about the resources spent on the request will be written into it. @@ -49,7 +49,7 @@ public: using LimitsMode = IBlockInputStream::LimitsMode; void setLimits(const LocalLimits & limits_) final { limits = limits_; } - void setQuota(QuotaForIntervals & quota_) final { quota = "a_; } + void setQuota(const std::shared_ptr & quota_) final { quota = quota_; } void setProcessListElement(QueryStatus * elem) final { process_list_elem = elem; } void setProgressCallback(const ProgressCallback & callback) final { progress_callback = callback; } void addTotalRowsApprox(size_t value) final { total_rows_approx += value; } @@ -60,7 +60,7 @@ protected: private: LocalLimits limits; - QuotaForIntervals * quota = nullptr; + std::shared_ptr quota; ProgressCallback progress_callback; QueryStatus * process_list_elem = nullptr; diff --git a/dbms/src/Processors/Transforms/FilterTransform.cpp b/dbms/src/Processors/Transforms/FilterTransform.cpp index 058df590f0c..9cad9f85f92 100644 --- a/dbms/src/Processors/Transforms/FilterTransform.cpp +++ b/dbms/src/Processors/Transforms/FilterTransform.cpp @@ -65,14 +65,23 @@ FilterTransform::FilterTransform( IProcessor::Status FilterTransform::prepare() { if (constant_filter_description.always_false - || expression->checkColumnIsAlwaysFalse(filter_column_name)) + /// Optimization for `WHERE column in (empty set)`. + /// The result will not change after set was created, so we can skip this check. + /// It is implemented in prepare() stop pipeline before reading from input port. + || (!are_prepared_sets_initialized && expression->checkColumnIsAlwaysFalse(filter_column_name))) { input.close(); output.finish(); return Status::Finished; } - return ISimpleTransform::prepare(); + auto status = ISimpleTransform::prepare(); + + /// Until prepared sets are initialized, output port will be unneeded, and prepare will return PortFull. + if (status != IProcessor::Status::PortFull) + are_prepared_sets_initialized = true; + + return status; } diff --git a/dbms/src/Processors/Transforms/FilterTransform.h b/dbms/src/Processors/Transforms/FilterTransform.h index 127eb5a8039..1652473aa3c 100644 --- a/dbms/src/Processors/Transforms/FilterTransform.h +++ b/dbms/src/Processors/Transforms/FilterTransform.h @@ -36,6 +36,8 @@ private: /// Header after expression, but before removing filter column. Block transformed_header; + bool are_prepared_sets_initialized = false; + void removeFilterIfNeed(Chunk & chunk); }; diff --git a/dbms/src/Processors/Transforms/LimitsCheckingTransform.cpp b/dbms/src/Processors/Transforms/LimitsCheckingTransform.cpp index 4947d11974b..1f621439048 100644 --- a/dbms/src/Processors/Transforms/LimitsCheckingTransform.cpp +++ b/dbms/src/Processors/Transforms/LimitsCheckingTransform.cpp @@ -1,5 +1,5 @@ #include -#include +#include namespace DB { @@ -73,7 +73,7 @@ void LimitsCheckingTransform::transform(Chunk & chunk) !limits.size_limits.check(info.rows, info.bytes, "result", ErrorCodes::TOO_MANY_ROWS_OR_BYTES)) stopReading(); - if (quota != nullptr) + if (quota) checkQuota(chunk); } } @@ -100,12 +100,8 @@ void LimitsCheckingTransform::checkQuota(Chunk & chunk) case LimitsMode::LIMITS_CURRENT: { - time_t current_time = time(nullptr); - double total_elapsed = info.total_stopwatch.elapsedSeconds(); - - quota->checkAndAddResultRowsBytes(current_time, chunk.getNumRows(), chunk.bytes()); - quota->checkAndAddExecutionTime(current_time, Poco::Timespan((total_elapsed - prev_elapsed) * 1000000.0)); - + UInt64 total_elapsed = info.total_stopwatch.elapsedNanoseconds(); + quota->used({Quota::RESULT_ROWS, chunk.getNumRows()}, {Quota::RESULT_BYTES, chunk.bytes()}, {Quota::EXECUTION_TIME, total_elapsed - prev_elapsed}); prev_elapsed = total_elapsed; break; } diff --git a/dbms/src/Processors/Transforms/LimitsCheckingTransform.h b/dbms/src/Processors/Transforms/LimitsCheckingTransform.h index 53116446a75..9410301030a 100644 --- a/dbms/src/Processors/Transforms/LimitsCheckingTransform.h +++ b/dbms/src/Processors/Transforms/LimitsCheckingTransform.h @@ -36,7 +36,7 @@ public: String getName() const override { return "LimitsCheckingTransform"; } - void setQuota(QuotaForIntervals & quota_) { quota = "a_; } + void setQuota(const std::shared_ptr & quota_) { quota = quota_; } protected: void transform(Chunk & chunk) override; @@ -44,8 +44,8 @@ protected: private: LocalLimits limits; - QuotaForIntervals * quota = nullptr; - double prev_elapsed = 0; + std::shared_ptr quota; + UInt64 prev_elapsed = 0; ProcessorProfileInfo info; diff --git a/dbms/src/Processors/Transforms/MergingSortedTransform.h b/dbms/src/Processors/Transforms/MergingSortedTransform.h index 0991835bfaf..b32dd076c5f 100644 --- a/dbms/src/Processors/Transforms/MergingSortedTransform.h +++ b/dbms/src/Processors/Transforms/MergingSortedTransform.h @@ -59,8 +59,11 @@ protected: auto num_rows = chunk.getNumRows(); columns = chunk.mutateColumns(); if (limit_rows && num_rows > limit_rows) + { + num_rows = limit_rows; for (auto & column : columns) - column = (*column->cut(0, limit_rows)->convertToFullColumnIfConst()).mutate(); + column = (*column->cut(0, num_rows)->convertToFullColumnIfConst()).mutate(); + } total_merged_rows += num_rows; merged_rows = num_rows; diff --git a/dbms/src/Storages/Distributed/DirectoryMonitor.cpp b/dbms/src/Storages/Distributed/DirectoryMonitor.cpp index 5a6f13b0567..21147417824 100644 --- a/dbms/src/Storages/Distributed/DirectoryMonitor.cpp +++ b/dbms/src/Storages/Distributed/DirectoryMonitor.cpp @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include #include #include @@ -40,6 +42,7 @@ namespace ErrorCodes extern const int CHECKSUM_DOESNT_MATCH; extern const int TOO_LARGE_SIZE_COMPRESSED; extern const int ATTEMPT_TO_READ_AFTER_EOF; + extern const int CORRUPTED_DATA; } @@ -60,6 +63,19 @@ namespace return pools; } + + void assertChecksum(CityHash_v1_0_2::uint128 expected, CityHash_v1_0_2::uint128 calculated) + { + if (expected != calculated) + { + String message = "Checksum of extra info doesn't match: corrupted data." + " Reference: " + getHexUIntLowercase(expected.first) + getHexUIntLowercase(expected.second) + + ". Actual: " + getHexUIntLowercase(calculated.first) + getHexUIntLowercase(calculated.second) + + "."; + throw Exception(message, ErrorCodes::CHECKSUM_DOESNT_MATCH); + } + } + } @@ -277,13 +293,21 @@ void StorageDistributedDirectoryMonitor::readQueryAndSettings( if (query_size == DBMS_DISTRIBUTED_SIGNATURE_EXTRA_INFO) { + UInt64 initiator_revision; + CityHash_v1_0_2::uint128 expected; + CityHash_v1_0_2::uint128 calculated; + /// Read extra information. String extra_info_as_string; readStringBinary(extra_info_as_string, in); - readVarUInt(query_size, in); - ReadBufferFromString extra_info(extra_info_as_string); + /// To avoid out-of-bound, other cases will be checked in read*() helpers. + if (extra_info_as_string.size() < sizeof(expected)) + throw Exception("Not enough data", ErrorCodes::CORRUPTED_DATA); + + StringRef extra_info_ref(extra_info_as_string.data(), extra_info_as_string.size() - sizeof(expected)); + ReadBufferFromMemory extra_info(extra_info_ref.data, extra_info_ref.size); + ReadBuffer checksum(extra_info_as_string.data(), sizeof(expected), extra_info_ref.size); - UInt64 initiator_revision; readVarUInt(initiator_revision, extra_info); if (ClickHouseRevision::get() < initiator_revision) { @@ -293,13 +317,29 @@ void StorageDistributedDirectoryMonitor::readQueryAndSettings( << "It may lack support for new features."); } + /// Extra checksum (all data except itself -- this checksum) + readPODBinary(expected, checksum); + calculated = CityHash_v1_0_2::CityHash128(extra_info_ref.data, extra_info_ref.size); + assertChecksum(expected, calculated); + insert_settings.deserialize(extra_info); + /// Read query + readStringBinary(insert_query, in); + + /// Query checksum + readPODBinary(expected, extra_info); + calculated = CityHash_v1_0_2::CityHash128(insert_query.data(), insert_query.size()); + assertChecksum(expected, calculated); + /// Add handling new data here, for example: /// if (initiator_revision >= DBMS_MIN_REVISION_WITH_MY_NEW_DATA) /// readVarUInt(my_new_data, extra_info); + + return; } - else if (query_size == DBMS_DISTRIBUTED_SIGNATURE_SETTINGS_OLD_FORMAT) + + if (query_size == DBMS_DISTRIBUTED_SIGNATURE_SETTINGS_OLD_FORMAT) { insert_settings.deserialize(in, SettingsBinaryFormat::OLD); readVarUInt(query_size, in); diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp index 61cb10cc38e..181909cff7a 100644 --- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp +++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp @@ -595,9 +595,14 @@ void DistributedBlockOutputStream::writeToShard(const Block & block, const std:: writeVarUInt(ClickHouseRevision::get(), extra_info); context.getSettingsRef().serialize(extra_info); + writePODBinary(CityHash_v1_0_2::CityHash128(query_string.data(), query_string.size()), extra_info); + /// Add new fields here, for example: /// writeVarUInt(my_new_data, extra_info); + const auto &extra_info_ref = extra_info.stringRef(); + writePODBinary(CityHash_v1_0_2::CityHash128(extra_info_ref.data, extra_info_ref.size), extra_info); + writeVarUInt(DBMS_DISTRIBUTED_SIGNATURE_EXTRA_INFO, out); writeStringBinary(extra_info.str(), out); diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexGranularity.h b/dbms/src/Storages/MergeTree/MergeTreeIndexGranularity.h index ff391be596c..0c76f74c3df 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexGranularity.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexGranularity.h @@ -43,6 +43,9 @@ public: /// Total rows size_t getTotalRows() const; + /// Total number marks without final mark if it exists + size_t getMarksCountWithoutFinal() const { return getMarksCount() - hasFinalMark(); } + /// Rows after mark to next mark inline size_t getMarkRows(size_t mark_index) const { diff --git a/dbms/src/Storages/MergeTree/checkDataPart.cpp b/dbms/src/Storages/MergeTree/checkDataPart.cpp index 092cc78e313..a2d6a836d6f 100644 --- a/dbms/src/Storages/MergeTree/checkDataPart.cpp +++ b/dbms/src/Storages/MergeTree/checkDataPart.cpp @@ -143,12 +143,14 @@ public: + toString(compressed_hashing_buf.count()) + " (compressed), " + toString(uncompressed_hashing_buf.count()) + " (uncompressed)", ErrorCodes::CORRUPTED_DATA); + /// Maybe we have final mark. if (index_granularity.hasFinalMark()) { auto final_mark_rows = readMarkFromFile().second; if (final_mark_rows != 0) throw Exception("Incorrect final mark at the end of " + mrk_file_path + " expected 0 rows, got " + toString(final_mark_rows), ErrorCodes::CORRUPTED_DATA); } + if (!mrk_hashing_buf.eof()) throw Exception("EOF expected in " + mrk_file_path + " file" + " at position " @@ -379,7 +381,8 @@ MergeTreeData::DataPart::Checksums checkDataPart( size_t read_size = tmp_column->size(); column_size += read_size; - if (read_size < rows_after_mark || mark_num == adaptive_index_granularity.getMarksCount()) + /// We already checked all marks except final (it will be checked in assertEnd()). + if (mark_num == adaptive_index_granularity.getMarksCountWithoutFinal()) break; else if (marks_eof) throw Exception("Unexpected end of mrk file while reading column " + name_type.name, ErrorCodes::CORRUPTED_DATA); diff --git a/dbms/src/Storages/StorageJoin.cpp b/dbms/src/Storages/StorageJoin.cpp index 33127f97874..12444867b6b 100644 --- a/dbms/src/Storages/StorageJoin.cpp +++ b/dbms/src/Storages/StorageJoin.cpp @@ -99,7 +99,7 @@ void registerStorageJoin(StorageFactory & factory) const String strictness_str = Poco::toLower(*opt_strictness_id); ASTTableJoin::Strictness strictness; if (strictness_str == "any") - strictness = ASTTableJoin::Strictness::Any; + strictness = ASTTableJoin::Strictness::RightAny; else if (strictness_str == "all") strictness = ASTTableJoin::Strictness::All; else @@ -329,7 +329,7 @@ private: for (; it != end; ++it) { - if constexpr (STRICTNESS == ASTTableJoin::Strictness::Any) + if constexpr (STRICTNESS == ASTTableJoin::Strictness::RightAny) { for (size_t j = 0; j < columns.size(); ++j) if (j == key_pos) @@ -338,9 +338,16 @@ private: columns[j]->insertFrom(*it->getMapped().block->getByPosition(column_indices[j]).column.get(), it->getMapped().row_num); ++rows_added; } - else if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof) + else if constexpr (STRICTNESS == ASTTableJoin::Strictness::Any) { - throw Exception("ASOF join storage is not implemented yet", ErrorCodes::NOT_IMPLEMENTED); + throw Exception("New ANY join storage is not implemented yet (set any_join_distinct_right_table_keys=1 to use old one)", + ErrorCodes::NOT_IMPLEMENTED); + } + else if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof || + STRICTNESS == ASTTableJoin::Strictness::Semi || + STRICTNESS == ASTTableJoin::Strictness::Anti) + { + throw Exception("ASOF|SEMI|ANTI join storage is not implemented yet", ErrorCodes::NOT_IMPLEMENTED); } else for (auto ref_it = it->getMapped().begin(); ref_it.ok(); ++ref_it) diff --git a/dbms/src/Storages/System/StorageSystemQuotaUsage.cpp b/dbms/src/Storages/System/StorageSystemQuotaUsage.cpp new file mode 100644 index 00000000000..8835e77eeb5 --- /dev/null +++ b/dbms/src/Storages/System/StorageSystemQuotaUsage.cpp @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +NamesAndTypesList StorageSystemQuotaUsage::getNamesAndTypes() +{ + NamesAndTypesList names_and_types{ + {"name", std::make_shared()}, + {"id", std::make_shared()}, + {"key", std::make_shared()}, + {"duration", std::make_shared(std::make_shared())}, + {"end_of_interval", std::make_shared(std::make_shared())}}; + + for (auto resource_type : ext::range_with_static_cast(Quota::MAX_RESOURCE_TYPE)) + { + DataTypePtr data_type; + if (resource_type == Quota::EXECUTION_TIME) + data_type = std::make_shared(); + else + data_type = std::make_shared(); + + String column_name = Quota::resourceTypeToColumnName(resource_type); + names_and_types.push_back({column_name, std::make_shared(data_type)}); + names_and_types.push_back({String("max_") + column_name, std::make_shared(data_type)}); + } + return names_and_types; +} + + +void StorageSystemQuotaUsage::fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo &) const +{ + const auto & access_control = context.getAccessControlManager(); + for (const auto & info : access_control.getQuotaUsageInfo()) + { + for (const auto & interval : info.intervals) + { + size_t i = 0; + res_columns[i++]->insert(info.quota_name); + res_columns[i++]->insert(info.quota_id); + res_columns[i++]->insert(info.quota_key); + res_columns[i++]->insert(std::chrono::seconds{interval.duration}.count()); + res_columns[i++]->insert(std::chrono::system_clock::to_time_t(interval.end_of_interval)); + for (auto resource_type : ext::range(Quota::MAX_RESOURCE_TYPE)) + { + if (resource_type == Quota::EXECUTION_TIME) + { + res_columns[i++]->insert(Quota::executionTimeToSeconds(interval.used[resource_type])); + res_columns[i++]->insert(Quota::executionTimeToSeconds(interval.max[resource_type])); + } + else + { + res_columns[i++]->insert(interval.used[resource_type]); + res_columns[i++]->insert(interval.max[resource_type]); + } + } + } + + if (info.intervals.empty()) + { + size_t i = 0; + res_columns[i++]->insert(info.quota_name); + res_columns[i++]->insert(info.quota_id); + res_columns[i++]->insert(info.quota_key); + for (size_t j = 0; j != Quota::MAX_RESOURCE_TYPE * 2 + 2; ++j) + res_columns[i++]->insertDefault(); + } + } +} +} diff --git a/dbms/src/Storages/System/StorageSystemQuotaUsage.h b/dbms/src/Storages/System/StorageSystemQuotaUsage.h new file mode 100644 index 00000000000..f2151b27612 --- /dev/null +++ b/dbms/src/Storages/System/StorageSystemQuotaUsage.h @@ -0,0 +1,28 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +class Context; + + +/** Implements the `quota_usage` system tables, which allows you to get information about + * how the quotas are used by all users. + */ +class StorageSystemQuotaUsage : public ext::shared_ptr_helper, public IStorageSystemOneBlock +{ +public: + std::string getName() const override { return "SystemQuotaUsage"; } + static NamesAndTypesList getNamesAndTypes(); + +protected: + friend struct ext::shared_ptr_helper; + using IStorageSystemOneBlock::IStorageSystemOneBlock; + void fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo &) const override; +}; + +} diff --git a/dbms/src/Storages/System/StorageSystemQuotas.cpp b/dbms/src/Storages/System/StorageSystemQuotas.cpp new file mode 100644 index 00000000000..b82e348c86d --- /dev/null +++ b/dbms/src/Storages/System/StorageSystemQuotas.cpp @@ -0,0 +1,124 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace +{ + DataTypeEnum8::Values getKeyTypeEnumValues() + { + DataTypeEnum8::Values enum_values; + for (auto key_type : ext::range_with_static_cast(Quota::MAX_KEY_TYPE)) + enum_values.push_back({Quota::getNameOfKeyType(key_type), static_cast(key_type)}); + return enum_values; + } +} + + +NamesAndTypesList StorageSystemQuotas::getNamesAndTypes() +{ + NamesAndTypesList names_and_types{ + {"name", std::make_shared()}, + {"id", std::make_shared()}, + {"source", std::make_shared()}, + {"key_type", std::make_shared(getKeyTypeEnumValues())}, + {"roles", std::make_shared(std::make_shared())}, + {"all_roles", std::make_shared()}, + {"except_roles", std::make_shared(std::make_shared())}, + {"intervals.duration", std::make_shared(std::make_shared())}, + {"intervals.randomize_interval", std::make_shared(std::make_shared())}}; + + for (auto resource_type : ext::range_with_static_cast(Quota::MAX_RESOURCE_TYPE)) + { + DataTypePtr data_type; + if (resource_type == Quota::EXECUTION_TIME) + data_type = std::make_shared(); + else + data_type = std::make_shared(); + + String column_name = String("intervals.max_") + Quota::resourceTypeToColumnName(resource_type); + names_and_types.push_back({column_name, std::make_shared(data_type)}); + } + return names_and_types; +} + + +void StorageSystemQuotas::fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo &) const +{ + size_t i = 0; + auto & name_column = *res_columns[i++]; + auto & id_column = *res_columns[i++]; + auto & storage_name_column = *res_columns[i++]; + auto & key_type_column = *res_columns[i++]; + auto & roles_data = assert_cast(*res_columns[i]).getData(); + auto & roles_offsets = assert_cast(*res_columns[i++]).getOffsets(); + auto & all_roles_column = *res_columns[i++]; + auto & except_roles_data = assert_cast(*res_columns[i]).getData(); + auto & except_roles_offsets = assert_cast(*res_columns[i++]).getOffsets(); + auto & durations_data = assert_cast(*res_columns[i]).getData(); + auto & durations_offsets = assert_cast(*res_columns[i++]).getOffsets(); + auto & randomize_intervals_data = assert_cast(*res_columns[i]).getData(); + auto & randomize_intervals_offsets = assert_cast(*res_columns[i++]).getOffsets(); + IColumn * limits_data[Quota::MAX_RESOURCE_TYPE]; + ColumnArray::Offsets * limits_offsets[Quota::MAX_RESOURCE_TYPE]; + for (auto resource_type : ext::range(Quota::MAX_RESOURCE_TYPE)) + { + limits_data[resource_type] = &assert_cast(*res_columns[i]).getData(); + limits_offsets[resource_type] = &assert_cast(*res_columns[i++]).getOffsets(); + } + + const auto & access_control = context.getAccessControlManager(); + for (const auto & id : access_control.findAll()) + { + auto quota = access_control.tryRead(id); + if (!quota) + continue; + const auto * storage = access_control.findStorage(id); + String storage_name = storage ? storage->getStorageName() : ""; + + name_column.insert(quota->getName()); + id_column.insert(id); + storage_name_column.insert(storage_name); + key_type_column.insert(static_cast(quota->key_type)); + + for (const auto & role : quota->roles) + roles_data.insert(role); + roles_offsets.push_back(roles_data.size()); + + all_roles_column.insert(static_cast(quota->all_roles)); + + for (const auto & except_role : quota->except_roles) + except_roles_data.insert(except_role); + except_roles_offsets.push_back(except_roles_data.size()); + + for (const auto & limits : quota->all_limits) + { + durations_data.insert(std::chrono::seconds{limits.duration}.count()); + randomize_intervals_data.insert(static_cast(limits.randomize_interval)); + for (auto resource_type : ext::range(Quota::MAX_RESOURCE_TYPE)) + { + if (resource_type == Quota::EXECUTION_TIME) + limits_data[resource_type]->insert(Quota::executionTimeToSeconds(limits.max[resource_type])); + else + limits_data[resource_type]->insert(limits.max[resource_type]); + } + } + + durations_offsets.push_back(durations_data.size()); + randomize_intervals_offsets.push_back(randomize_intervals_data.size()); + for (auto resource_type : ext::range(Quota::MAX_RESOURCE_TYPE)) + limits_offsets[resource_type]->push_back(limits_data[resource_type]->size()); + } +} +} diff --git a/dbms/src/Storages/System/StorageSystemQuotas.h b/dbms/src/Storages/System/StorageSystemQuotas.h new file mode 100644 index 00000000000..0f54f193654 --- /dev/null +++ b/dbms/src/Storages/System/StorageSystemQuotas.h @@ -0,0 +1,27 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +class Context; + + +/** Implements the `quotas` system tables, which allows you to get information about quotas. + */ +class StorageSystemQuotas : public ext::shared_ptr_helper, public IStorageSystemOneBlock +{ +public: + std::string getName() const override { return "SystemQuotas"; } + static NamesAndTypesList getNamesAndTypes(); + +protected: + friend struct ext::shared_ptr_helper; + using IStorageSystemOneBlock::IStorageSystemOneBlock; + void fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo &) const override; +}; + +} diff --git a/dbms/src/Storages/System/attachSystemTables.cpp b/dbms/src/Storages/System/attachSystemTables.cpp index 528bdd06a21..2b8e630cbed 100644 --- a/dbms/src/Storages/System/attachSystemTables.cpp +++ b/dbms/src/Storages/System/attachSystemTables.cpp @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #include #include @@ -52,6 +54,8 @@ void attachSystemTablesLocal(IDatabase & system_database) system_database.attachTable("functions", StorageSystemFunctions::create("functions")); system_database.attachTable("events", StorageSystemEvents::create("events")); system_database.attachTable("settings", StorageSystemSettings::create("settings")); + system_database.attachTable("quotas", StorageSystemQuotas::create("quotas")); + system_database.attachTable("quota_usage", StorageSystemQuotaUsage::create("quota_usage")); system_database.attachTable("merge_tree_settings", SystemMergeTreeSettings::create("merge_tree_settings")); system_database.attachTable("build_options", StorageSystemBuildOptions::create("build_options")); system_database.attachTable("formats", StorageSystemFormats::create("formats")); diff --git a/dbms/tests/integration/test_quota/__init__.py b/dbms/tests/integration/test_quota/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/integration/test_quota/configs/users.d/quota.xml b/dbms/tests/integration/test_quota/configs/users.d/quota.xml new file mode 120000 index 00000000000..9b12dec9c53 --- /dev/null +++ b/dbms/tests/integration/test_quota/configs/users.d/quota.xml @@ -0,0 +1 @@ +../../normal_limits.xml \ No newline at end of file diff --git a/dbms/tests/integration/test_quota/configs/users.xml b/dbms/tests/integration/test_quota/configs/users.xml new file mode 100644 index 00000000000..15a5364449b --- /dev/null +++ b/dbms/tests/integration/test_quota/configs/users.xml @@ -0,0 +1,18 @@ + + + + + + + + + + + ::/0 + + default + myQuota + true + + + diff --git a/dbms/tests/integration/test_quota/no_quotas.xml b/dbms/tests/integration/test_quota/no_quotas.xml new file mode 100644 index 00000000000..9aba4ac0914 --- /dev/null +++ b/dbms/tests/integration/test_quota/no_quotas.xml @@ -0,0 +1,3 @@ + + + diff --git a/dbms/tests/integration/test_quota/normal_limits.xml b/dbms/tests/integration/test_quota/normal_limits.xml new file mode 100644 index 00000000000..b7c3a67b5cc --- /dev/null +++ b/dbms/tests/integration/test_quota/normal_limits.xml @@ -0,0 +1,17 @@ + + + + + + + 31556952 + + + 1000 + 0 + 1000 + 0 + + + + diff --git a/dbms/tests/integration/test_quota/simpliest.xml b/dbms/tests/integration/test_quota/simpliest.xml new file mode 100644 index 00000000000..6d51d68d8d9 --- /dev/null +++ b/dbms/tests/integration/test_quota/simpliest.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/dbms/tests/integration/test_quota/test.py b/dbms/tests/integration/test_quota/test.py new file mode 100644 index 00000000000..e7caaf5cd06 --- /dev/null +++ b/dbms/tests/integration/test_quota/test.py @@ -0,0 +1,251 @@ +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry +import os +import re +import time + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance('instance', + config_dir="configs") + +query_from_system_quotas = "SELECT * FROM system.quotas ORDER BY name"; + +query_from_system_quota_usage = "SELECT id, key, duration, "\ + "queries, errors, result_rows, result_bytes, read_rows, read_bytes "\ + "FROM system.quota_usage ORDER BY id, key, duration"; + +def system_quotas(): + return instance.query(query_from_system_quotas).rstrip('\n') + +def system_quota_usage(): + return instance.query(query_from_system_quota_usage).rstrip('\n') + + +def copy_quota_xml(local_file_name, reload_immediately = True): + script_dir = os.path.dirname(os.path.realpath(__file__)) + instance.copy_file_to_container(os.path.join(script_dir, local_file_name), '/etc/clickhouse-server/users.d/quota.xml') + if reload_immediately: + instance.query("SYSTEM RELOAD CONFIG") + + +@pytest.fixture(scope="module", autouse=True) +def started_cluster(): + try: + cluster.start() + + instance.query("CREATE TABLE test_table(x UInt32) ENGINE = MergeTree ORDER BY tuple()") + instance.query("INSERT INTO test_table SELECT number FROM numbers(50)") + + yield cluster + + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def reset_quotas_and_usage_info(): + try: + yield + finally: + instance.query("DROP QUOTA IF EXISTS qA, qB") + copy_quota_xml('simpliest.xml') # To reset usage info. + copy_quota_xml('normal_limits.xml') + + +def test_quota_from_users_xml(): + assert instance.query("SELECT currentQuota()") == "myQuota\n" + assert instance.query("SELECT currentQuotaID()") == "e651da9c-a748-8703-061a-7e5e5096dae7\n" + assert instance.query("SELECT currentQuotaKey()") == "default\n" + assert system_quotas() == "myQuota\te651da9c-a748-8703-061a-7e5e5096dae7\tusers.xml\tuser name\t['default']\t0\t[]\t[31556952]\t[0]\t[1000]\t[0]\t[0]\t[0]\t[1000]\t[0]\t[0]" + assert system_quota_usage() == "e651da9c-a748-8703-061a-7e5e5096dae7\tdefault\t31556952\t0\t0\t0\t0\t0\t0" + + instance.query("SELECT * from test_table") + assert system_quota_usage() == "e651da9c-a748-8703-061a-7e5e5096dae7\tdefault\t31556952\t1\t0\t50\t200\t50\t200" + + instance.query("SELECT COUNT() from test_table") + assert system_quota_usage() == "e651da9c-a748-8703-061a-7e5e5096dae7\tdefault\t31556952\t2\t0\t51\t208\t50\t200" + + +def test_simpliest_quota(): + # Simpliest quota doesn't even track usage. + copy_quota_xml('simpliest.xml') + assert system_quotas() == "myQuota\te651da9c-a748-8703-061a-7e5e5096dae7\tusers.xml\tuser name\t['default']\t0\t[]\t[]\t[]\t[]\t[]\t[]\t[]\t[]\t[]\t[]" + assert system_quota_usage() == "e651da9c-a748-8703-061a-7e5e5096dae7\tdefault\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N" + + instance.query("SELECT * from test_table") + assert system_quota_usage() == "e651da9c-a748-8703-061a-7e5e5096dae7\tdefault\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N" + + +def test_tracking_quota(): + # Now we're tracking usage. + copy_quota_xml('tracking.xml') + assert system_quotas() == "myQuota\te651da9c-a748-8703-061a-7e5e5096dae7\tusers.xml\tuser name\t['default']\t0\t[]\t[31556952]\t[0]\t[0]\t[0]\t[0]\t[0]\t[0]\t[0]\t[0]" + assert system_quota_usage() == "e651da9c-a748-8703-061a-7e5e5096dae7\tdefault\t31556952\t0\t0\t0\t0\t0\t0" + + instance.query("SELECT * from test_table") + assert system_quota_usage() == "e651da9c-a748-8703-061a-7e5e5096dae7\tdefault\t31556952\t1\t0\t50\t200\t50\t200" + + instance.query("SELECT COUNT() from test_table") + assert system_quota_usage() == "e651da9c-a748-8703-061a-7e5e5096dae7\tdefault\t31556952\t2\t0\t51\t208\t50\t200" + + +def test_exceed_quota(): + # Change quota, now the limits are tiny so we will exceed the quota. + copy_quota_xml('tiny_limits.xml') + assert system_quotas() == "myQuota\te651da9c-a748-8703-061a-7e5e5096dae7\tusers.xml\tuser name\t['default']\t0\t[]\t[31556952]\t[0]\t[1]\t[1]\t[1]\t[0]\t[1]\t[0]\t[0]" + assert system_quota_usage() == "e651da9c-a748-8703-061a-7e5e5096dae7\tdefault\t31556952\t0\t0\t0\t0\t0\t0" + + assert re.search("Quota.*has\ been\ exceeded", instance.query_and_get_error("SELECT * from test_table")) + assert system_quota_usage() == "e651da9c-a748-8703-061a-7e5e5096dae7\tdefault\t31556952\t1\t1\t0\t0\t50\t0" + + # Change quota, now the limits are enough to execute queries. + copy_quota_xml('normal_limits.xml') + assert system_quotas() == "myQuota\te651da9c-a748-8703-061a-7e5e5096dae7\tusers.xml\tuser name\t['default']\t0\t[]\t[31556952]\t[0]\t[1000]\t[0]\t[0]\t[0]\t[1000]\t[0]\t[0]" + assert system_quota_usage() == "e651da9c-a748-8703-061a-7e5e5096dae7\tdefault\t31556952\t1\t1\t0\t0\t50\t0" + + instance.query("SELECT * from test_table") + assert system_quota_usage() == "e651da9c-a748-8703-061a-7e5e5096dae7\tdefault\t31556952\t2\t1\t50\t200\t100\t200" + + +def test_add_remove_interval(): + assert system_quotas() == "myQuota\te651da9c-a748-8703-061a-7e5e5096dae7\tusers.xml\tuser name\t['default']\t0\t[]\t[31556952]\t[0]\t[1000]\t[0]\t[0]\t[0]\t[1000]\t[0]\t[0]" + assert system_quota_usage() == "e651da9c-a748-8703-061a-7e5e5096dae7\tdefault\t31556952\t0\t0\t0\t0\t0\t0" + + # Add interval. + copy_quota_xml('two_intervals.xml') + assert system_quotas() == "myQuota\te651da9c-a748-8703-061a-7e5e5096dae7\tusers.xml\tuser name\t['default']\t0\t[]\t[31556952,63113904]\t[0,1]\t[1000,0]\t[0,0]\t[0,0]\t[0,30000]\t[1000,0]\t[0,20000]\t[0,120]" + assert system_quota_usage() == "e651da9c-a748-8703-061a-7e5e5096dae7\tdefault\t31556952\t0\t0\t0\t0\t0\t0\n"\ + "e651da9c-a748-8703-061a-7e5e5096dae7\tdefault\t63113904\t0\t0\t0\t0\t0\t0" + + instance.query("SELECT * from test_table") + assert system_quota_usage() == "e651da9c-a748-8703-061a-7e5e5096dae7\tdefault\t31556952\t1\t0\t50\t200\t50\t200\n"\ + "e651da9c-a748-8703-061a-7e5e5096dae7\tdefault\t63113904\t1\t0\t50\t200\t50\t200" + + # Remove interval. + copy_quota_xml('normal_limits.xml') + assert system_quotas() == "myQuota\te651da9c-a748-8703-061a-7e5e5096dae7\tusers.xml\tuser name\t['default']\t0\t[]\t[31556952]\t[0]\t[1000]\t[0]\t[0]\t[0]\t[1000]\t[0]\t[0]" + assert system_quota_usage() == "e651da9c-a748-8703-061a-7e5e5096dae7\tdefault\t31556952\t1\t0\t50\t200\t50\t200" + + instance.query("SELECT * from test_table") + assert system_quota_usage() == "e651da9c-a748-8703-061a-7e5e5096dae7\tdefault\t31556952\t2\t0\t100\t400\t100\t400" + + # Remove all intervals. + copy_quota_xml('simpliest.xml') + assert system_quotas() == "myQuota\te651da9c-a748-8703-061a-7e5e5096dae7\tusers.xml\tuser name\t['default']\t0\t[]\t[]\t[]\t[]\t[]\t[]\t[]\t[]\t[]\t[]" + assert system_quota_usage() == "e651da9c-a748-8703-061a-7e5e5096dae7\tdefault\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N" + + instance.query("SELECT * from test_table") + assert system_quota_usage() == "e651da9c-a748-8703-061a-7e5e5096dae7\tdefault\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N" + + # Add one interval back. + copy_quota_xml('normal_limits.xml') + assert system_quotas() == "myQuota\te651da9c-a748-8703-061a-7e5e5096dae7\tusers.xml\tuser name\t['default']\t0\t[]\t[31556952]\t[0]\t[1000]\t[0]\t[0]\t[0]\t[1000]\t[0]\t[0]" + assert system_quota_usage() == "e651da9c-a748-8703-061a-7e5e5096dae7\tdefault\t31556952\t0\t0\t0\t0\t0\t0" + + +def test_add_remove_quota(): + assert system_quotas() == "myQuota\te651da9c-a748-8703-061a-7e5e5096dae7\tusers.xml\tuser name\t['default']\t0\t[]\t[31556952]\t[0]\t[1000]\t[0]\t[0]\t[0]\t[1000]\t[0]\t[0]" + assert system_quota_usage() == "e651da9c-a748-8703-061a-7e5e5096dae7\tdefault\t31556952\t0\t0\t0\t0\t0\t0" + + # Add quota. + copy_quota_xml('two_quotas.xml') + assert system_quotas() ==\ + "myQuota\te651da9c-a748-8703-061a-7e5e5096dae7\tusers.xml\tuser name\t['default']\t0\t[]\t[31556952]\t[0]\t[1000]\t[0]\t[0]\t[0]\t[1000]\t[0]\t[0]\n"\ + "myQuota2\t4590510c-4d13-bf21-ec8a-c2187b092e73\tusers.xml\tclient key or user name\t[]\t0\t[]\t[3600,2629746]\t[1,0]\t[0,0]\t[0,0]\t[4000,0]\t[400000,0]\t[4000,0]\t[400000,0]\t[60,1800]" + + # Drop quota. + copy_quota_xml('normal_limits.xml') + assert system_quotas() == "myQuota\te651da9c-a748-8703-061a-7e5e5096dae7\tusers.xml\tuser name\t['default']\t0\t[]\t[31556952]\t[0]\t[1000]\t[0]\t[0]\t[0]\t[1000]\t[0]\t[0]" + + # Drop all quotas. + copy_quota_xml('no_quotas.xml') + assert system_quotas() == "" + assert system_quota_usage() == "" + + # Add one quota back. + copy_quota_xml('normal_limits.xml') + assert system_quotas() == "myQuota\te651da9c-a748-8703-061a-7e5e5096dae7\tusers.xml\tuser name\t['default']\t0\t[]\t[31556952]\t[0]\t[1000]\t[0]\t[0]\t[0]\t[1000]\t[0]\t[0]" + assert system_quota_usage() == "e651da9c-a748-8703-061a-7e5e5096dae7\tdefault\t31556952\t0\t0\t0\t0\t0\t0" + + +def test_reload_users_xml_by_timer(): + assert system_quotas() == "myQuota\te651da9c-a748-8703-061a-7e5e5096dae7\tusers.xml\tuser name\t['default']\t0\t[]\t[31556952]\t[0]\t[1000]\t[0]\t[0]\t[0]\t[1000]\t[0]\t[0]" + + time.sleep(1) # The modification time of the 'quota.xml' file should be different, + # because config files are reload by timer only when the modification time is changed. + copy_quota_xml('tiny_limits.xml', reload_immediately=False) + assert_eq_with_retry(instance, query_from_system_quotas, "myQuota\te651da9c-a748-8703-061a-7e5e5096dae7\tusers.xml\tuser name\t['default']\t0\t[]\t[31556952]\t[0]\t[1]\t[1]\t[1]\t[0]\t[1]\t[0]\t[0]") + + +def test_dcl_introspection(): + assert instance.query("SHOW QUOTAS") == "myQuota\n" + assert instance.query("SHOW CREATE QUOTA myQuota") == "CREATE QUOTA myQuota KEYED BY \\'user name\\' FOR INTERVAL 1 YEAR MAX QUERIES = 1000, MAX READ ROWS = 1000 TO default\n" + expected_usage = "myQuota key=\\\\'default\\\\' interval=\[.*\] queries=0/1000 errors=0 result_rows=0 result_bytes=0 read_rows=0/1000 read_bytes=0 execution_time=0" + assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE")) + assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE CURRENT")) + assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE ALL")) + + instance.query("SELECT * from test_table") + expected_usage = "myQuota key=\\\\'default\\\\' interval=\[.*\] queries=1/1000 errors=0 result_rows=50 result_bytes=200 read_rows=50/1000 read_bytes=200 execution_time=.*" + assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE")) + + # Add interval. + copy_quota_xml('two_intervals.xml') + assert instance.query("SHOW QUOTAS") == "myQuota\n" + assert instance.query("SHOW CREATE QUOTA myQuota") == "CREATE QUOTA myQuota KEYED BY \\'user name\\' FOR INTERVAL 1 YEAR MAX QUERIES = 1000, MAX READ ROWS = 1000, FOR RANDOMIZED INTERVAL 2 YEAR MAX RESULT BYTES = 30000, MAX READ BYTES = 20000, MAX EXECUTION TIME = 120 TO default\n" + expected_usage = "myQuota key=\\\\'default\\\\' interval=\[.*\] queries=1/1000 errors=0 result_rows=50 result_bytes=200 read_rows=50/1000 read_bytes=200 execution_time=.*\n"\ + "myQuota key=\\\\'default\\\\' interval=\[.*\] queries=0 errors=0 result_rows=0 result_bytes=0/30000 read_rows=0 read_bytes=0/20000 execution_time=0/120" + assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE")) + + # Drop interval, add quota. + copy_quota_xml('two_quotas.xml') + assert instance.query("SHOW QUOTAS") == "myQuota\nmyQuota2\n" + assert instance.query("SHOW CREATE QUOTA myQuota") == "CREATE QUOTA myQuota KEYED BY \\'user name\\' FOR INTERVAL 1 YEAR MAX QUERIES = 1000, MAX READ ROWS = 1000 TO default\n" + assert instance.query("SHOW CREATE QUOTA myQuota2") == "CREATE QUOTA myQuota2 KEYED BY \\'client key or user name\\' FOR RANDOMIZED INTERVAL 1 HOUR MAX RESULT ROWS = 4000, MAX RESULT BYTES = 400000, MAX READ ROWS = 4000, MAX READ BYTES = 400000, MAX EXECUTION TIME = 60, FOR INTERVAL 1 MONTH MAX EXECUTION TIME = 1800\n" + expected_usage = "myQuota key=\\\\'default\\\\' interval=\[.*\] queries=1/1000 errors=0 result_rows=50 result_bytes=200 read_rows=50/1000 read_bytes=200 execution_time=.*" + assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE")) + + +def test_dcl_management(): + copy_quota_xml('no_quotas.xml') + assert instance.query("SHOW QUOTAS") == "" + assert instance.query("SHOW QUOTA USAGE") == "" + + instance.query("CREATE QUOTA qA FOR INTERVAL 15 MONTH SET MAX QUERIES = 123 TO CURRENT_USER") + assert instance.query("SHOW QUOTAS") == "qA\n" + assert instance.query("SHOW CREATE QUOTA qA") == "CREATE QUOTA qA KEYED BY \\'none\\' FOR INTERVAL 5 QUARTER MAX QUERIES = 123 TO default\n" + expected_usage = "qA key=\\\\'\\\\' interval=\[.*\] queries=0/123 errors=0 result_rows=0 result_bytes=0 read_rows=0 read_bytes=0 execution_time=.*" + assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE")) + + instance.query("SELECT * from test_table") + expected_usage = "qA key=\\\\'\\\\' interval=\[.*\] queries=1/123 errors=0 result_rows=50 result_bytes=200 read_rows=50 read_bytes=200 execution_time=.*" + assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE")) + + instance.query("ALTER QUOTA qA FOR INTERVAL 15 MONTH MAX QUERIES = 321, MAX ERRORS = 10, FOR INTERVAL 0.5 HOUR MAX EXECUTION TIME = 0.5") + assert instance.query("SHOW CREATE QUOTA qA") == "CREATE QUOTA qA KEYED BY \\'none\\' FOR INTERVAL 30 MINUTE MAX EXECUTION TIME = 0.5, FOR INTERVAL 5 QUARTER MAX QUERIES = 321, MAX ERRORS = 10 TO default\n" + expected_usage = "qA key=\\\\'\\\\' interval=\[.*\] queries=0 errors=0 result_rows=0 result_bytes=0 read_rows=0 read_bytes=0 execution_time=.*/0.5\n"\ + "qA key=\\\\'\\\\' interval=\[.*\] queries=1/321 errors=0/10 result_rows=50 result_bytes=200 read_rows=50 read_bytes=200 execution_time=.*" + assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE")) + + instance.query("ALTER QUOTA qA FOR INTERVAL 15 MONTH UNSET TRACKING, FOR RANDOMIZED INTERVAL 16 MONTH SET TRACKING, FOR INTERVAL 1800 SECOND UNSET TRACKING") + assert instance.query("SHOW CREATE QUOTA qA") == "CREATE QUOTA qA KEYED BY \\'none\\' FOR RANDOMIZED INTERVAL 16 MONTH TRACKING TO default\n" + expected_usage = "qA key=\\\\'\\\\' interval=\[.*\] queries=0 errors=0 result_rows=0 result_bytes=0 read_rows=0 read_bytes=0 execution_time=.*" + assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE")) + + instance.query("SELECT * from test_table") + expected_usage = "qA key=\\\\'\\\\' interval=\[.*\] queries=1 errors=0 result_rows=50 result_bytes=200 read_rows=50 read_bytes=200 execution_time=.*" + assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE")) + + instance.query("ALTER QUOTA qA RENAME TO qB") + assert instance.query("SHOW CREATE QUOTA qB") == "CREATE QUOTA qB KEYED BY \\'none\\' FOR RANDOMIZED INTERVAL 16 MONTH TRACKING TO default\n" + expected_usage = "qB key=\\\\'\\\\' interval=\[.*\] queries=1 errors=0 result_rows=50 result_bytes=200 read_rows=50 read_bytes=200 execution_time=.*" + assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE")) + + instance.query("DROP QUOTA qB") + assert instance.query("SHOW QUOTAS") == "" + assert instance.query("SHOW QUOTA USAGE") == "" + + +def test_users_xml_is_readonly(): + assert re.search("storage is readonly", instance.query_and_get_error("DROP QUOTA myQuota")) diff --git a/dbms/tests/integration/test_quota/tiny_limits.xml b/dbms/tests/integration/test_quota/tiny_limits.xml new file mode 100644 index 00000000000..3ab8858738a --- /dev/null +++ b/dbms/tests/integration/test_quota/tiny_limits.xml @@ -0,0 +1,17 @@ + + + + + + + 31556952 + + + 1 + 1 + 1 + 1 + + + + diff --git a/dbms/tests/integration/test_quota/tracking.xml b/dbms/tests/integration/test_quota/tracking.xml new file mode 100644 index 00000000000..47e12bf8005 --- /dev/null +++ b/dbms/tests/integration/test_quota/tracking.xml @@ -0,0 +1,17 @@ + + + + + + + 31556952 + + + 0 + 0 + 0 + 0 + + + + diff --git a/dbms/tests/integration/test_quota/two_intervals.xml b/dbms/tests/integration/test_quota/two_intervals.xml new file mode 100644 index 00000000000..d0de605b895 --- /dev/null +++ b/dbms/tests/integration/test_quota/two_intervals.xml @@ -0,0 +1,20 @@ + + + + + + 31556952 + 1000 + 1000 + + + + true + 63113904 + 20000 + 30000 + 120 + + + + diff --git a/dbms/tests/integration/test_quota/two_quotas.xml b/dbms/tests/integration/test_quota/two_quotas.xml new file mode 100644 index 00000000000..c08cc82aca7 --- /dev/null +++ b/dbms/tests/integration/test_quota/two_quotas.xml @@ -0,0 +1,29 @@ + + + + + + 31556952 + 1000 + 1000 + + + + + + + true + 3600 + 4000 + 4000 + 400000 + 400000 + 60 + + + 2629746 + 1800 + + + + diff --git a/dbms/tests/performance/parse_engine_file.xml b/dbms/tests/performance/parse_engine_file.xml index 6bd4af0b45b..8308d8f049f 100644 --- a/dbms/tests/performance/parse_engine_file.xml +++ b/dbms/tests/performance/parse_engine_file.xml @@ -32,6 +32,8 @@ CSVWithNames Values JSONEachRow + JSONCompactEachRow + JSONCompactEachRowWithNamesAndTypes TSKV RowBinary Native diff --git a/dbms/tests/performance/select_format.xml b/dbms/tests/performance/select_format.xml index c5ad1acd396..55ab7b2d458 100644 --- a/dbms/tests/performance/select_format.xml +++ b/dbms/tests/performance/select_format.xml @@ -34,6 +34,7 @@ JSON JSONCompact JSONEachRow + JSONCompactEachRow TSKV Pretty PrettyCompact diff --git a/dbms/tests/queries/0_stateless/00203_full_join.reference b/dbms/tests/queries/0_stateless/00203_full_join.reference index eedd5818063..d97597d17d3 100644 --- a/dbms/tests/queries/0_stateless/00203_full_join.reference +++ b/dbms/tests/queries/0_stateless/00203_full_join.reference @@ -43,3 +43,5 @@ Hello [0,1,2] 5 6 7 ddd 2 3 4 bbb ccc 5 6 7 ddd +2 3 4 bbb ccc +5 6 7 ddd diff --git a/dbms/tests/queries/0_stateless/00203_full_join.sql b/dbms/tests/queries/0_stateless/00203_full_join.sql index 0250aef35eb..9b07e9c84e7 100644 --- a/dbms/tests/queries/0_stateless/00203_full_join.sql +++ b/dbms/tests/queries/0_stateless/00203_full_join.sql @@ -27,7 +27,7 @@ SELECT k1, k2, k3, val_t1, val_t2 FROM t1_00203 ANY RIGHT JOIN t2_00203 USING (k SET any_join_distinct_right_table_keys = 0; SELECT k1, k2, k3, val_t1, val_t2 FROM t1_00203 ANY FULL JOIN t2_00203 USING (k3, k1, k2 AS k2_alias) ORDER BY k1, k2, k3; -- { serverError 48 } -SELECT k1, k2, k3, val_t1, val_t2 FROM t1_00203 ANY RIGHT JOIN t2_00203 USING (k3, k1, k2 AS k2_alias) ORDER BY k1, k2, k3; -- { serverError 48 } +SELECT k1, k2, k3, val_t1, val_t2 FROM t1_00203 ANY RIGHT JOIN t2_00203 USING (k3, k1, k2 AS k2_alias) ORDER BY k1, k2, k3; DROP TABLE t1_00203; DROP TABLE t2_00203; diff --git a/dbms/tests/queries/0_stateless/00586_removing_unused_columns_from_subquery.sql b/dbms/tests/queries/0_stateless/00586_removing_unused_columns_from_subquery.sql index c22b72e4126..ce52c652df0 100644 --- a/dbms/tests/queries/0_stateless/00586_removing_unused_columns_from_subquery.sql +++ b/dbms/tests/queries/0_stateless/00586_removing_unused_columns_from_subquery.sql @@ -20,7 +20,7 @@ FROM learnerHash, passed - eventTime AS diff FROM statements - GLOBAL ANY INNER JOIN + GLOBAL SEMI LEFT JOIN ( SELECT learnerHash, diff --git a/dbms/tests/queries/0_stateless/00679_replace_asterisk.sql b/dbms/tests/queries/0_stateless/00679_replace_asterisk.sql index 27ff799be62..19aa939b132 100644 --- a/dbms/tests/queries/0_stateless/00679_replace_asterisk.sql +++ b/dbms/tests/queries/0_stateless/00679_replace_asterisk.sql @@ -1,7 +1,6 @@ -set any_join_distinct_right_table_keys = 1; SET joined_subquery_requires_alias = 0; SELECT * FROM (SELECT 1 AS id, 2 AS value); -SELECT * FROM (SELECT 1 AS id, 2 AS value, 3 AS A) ANY INNER JOIN (SELECT 1 AS id, 4 AS values, 5 AS D) USING id; -SELECT *, d.* FROM ( SELECT 1 AS id, 2 AS value ) ANY INNER JOIN ( SELECT 1 AS id, 3 AS values ) AS d USING id; -SELECT *, d.*, d.values FROM ( SELECT 1 AS id, 2 AS value ) ANY INNER JOIN ( SELECT 1 AS id, 3 AS values ) AS d USING id; +SELECT * FROM (SELECT 1 AS id, 2 AS value, 3 AS A) SEMI LEFT JOIN (SELECT 1 AS id, 4 AS values, 5 AS D) USING id; +SELECT *, d.* FROM ( SELECT 1 AS id, 2 AS value ) SEMI LEFT JOIN ( SELECT 1 AS id, 3 AS values ) AS d USING id; +SELECT *, d.*, d.values FROM ( SELECT 1 AS id, 2 AS value ) SEMI LEFT JOIN ( SELECT 1 AS id, 3 AS values ) AS d USING id; diff --git a/dbms/tests/queries/0_stateless/00721_force_by_identical_result_after_merge_zookeeper.sql b/dbms/tests/queries/0_stateless/00721_force_by_identical_result_after_merge_zookeeper.sql index abf2903d3ea..aa386829276 100644 --- a/dbms/tests/queries/0_stateless/00721_force_by_identical_result_after_merge_zookeeper.sql +++ b/dbms/tests/queries/0_stateless/00721_force_by_identical_result_after_merge_zookeeper.sql @@ -12,8 +12,7 @@ SYSTEM SYNC REPLICA byte_identical_r2; ALTER TABLE byte_identical_r1 ADD COLUMN y DEFAULT rand(); OPTIMIZE TABLE byte_identical_r1 PARTITION tuple() FINAL; -SET any_join_distinct_right_table_keys = 1; -SELECT x, t1.y - t2.y FROM byte_identical_r1 t1 ANY INNER JOIN byte_identical_r2 t2 USING x ORDER BY x; +SELECT x, t1.y - t2.y FROM byte_identical_r1 t1 SEMI LEFT JOIN byte_identical_r2 t2 USING x ORDER BY x; DROP TABLE byte_identical_r1; DROP TABLE byte_identical_r2; diff --git a/dbms/tests/queries/0_stateless/00819_full_join_wrong_columns_in_block.reference b/dbms/tests/queries/0_stateless/00819_full_join_wrong_columns_in_block.reference index 074ee47e294..afbe9855519 100644 --- a/dbms/tests/queries/0_stateless/00819_full_join_wrong_columns_in_block.reference +++ b/dbms/tests/queries/0_stateless/00819_full_join_wrong_columns_in_block.reference @@ -6,3 +6,6 @@ 1 x x 1 x x 1 x x +1 x x +1 x x +1 x x diff --git a/dbms/tests/queries/0_stateless/00819_full_join_wrong_columns_in_block.sql b/dbms/tests/queries/0_stateless/00819_full_join_wrong_columns_in_block.sql index cf1c0bfe1f7..14a7424e634 100644 --- a/dbms/tests/queries/0_stateless/00819_full_join_wrong_columns_in_block.sql +++ b/dbms/tests/queries/0_stateless/00819_full_join_wrong_columns_in_block.sql @@ -12,5 +12,7 @@ SELECT * FROM (SELECT 1 AS a, 'x' AS b) any full join (SELECT 1 as a, 'y' as b) SELECT * FROM (SELECT 1 AS a, 'x' AS b) any right join (SELECT 1 as a, 'y' as b) using a; SET any_join_distinct_right_table_keys = 0; +SELECT * FROM (SELECT 1 AS a, 'x' AS b) any join (SELECT 1 as a, 'y' as b) using a; +SELECT * FROM (SELECT 1 AS a, 'x' AS b) left join (SELECT 1 as a, 'y' as b) using a; +SELECT * FROM (SELECT 1 AS a, 'x' AS b) any right join (SELECT 1 as a, 'y' as b) using a; SELECT * FROM (SELECT 1 AS a, 'x' AS b) any full join (SELECT 1 as a, 'y' as b) using a; -- { serverError 48 } -SELECT * FROM (SELECT 1 AS a, 'x' AS b) any right join (SELECT 1 as a, 'y' as b) using a; -- { serverError 48 } diff --git a/dbms/tests/queries/0_stateless/00859_distinct_with_join.sql b/dbms/tests/queries/0_stateless/00859_distinct_with_join.sql index 23c41549502..4fb6f4ec046 100644 --- a/dbms/tests/queries/0_stateless/00859_distinct_with_join.sql +++ b/dbms/tests/queries/0_stateless/00859_distinct_with_join.sql @@ -1,5 +1,3 @@ -set any_join_distinct_right_table_keys = 1; - drop table if exists fooL; drop table if exists fooR; create table fooL (a Int32, v String) engine = Memory; @@ -9,7 +7,7 @@ insert into fooL select number, 'L' || toString(number) from numbers(2); insert into fooL select number, 'LL' || toString(number) from numbers(2); insert into fooR select number, 'R' || toString(number) from numbers(2); -select distinct a from fooL any join fooR using(a) order by a; +select distinct a from fooL semi left join fooR using(a) order by a; drop table fooL; drop table fooR; diff --git a/dbms/tests/queries/0_stateless/00956_join_use_nulls_with_array_column.sql b/dbms/tests/queries/0_stateless/00956_join_use_nulls_with_array_column.sql index 244e04a564a..f70bccd68fd 100644 --- a/dbms/tests/queries/0_stateless/00956_join_use_nulls_with_array_column.sql +++ b/dbms/tests/queries/0_stateless/00956_join_use_nulls_with_array_column.sql @@ -1,4 +1,3 @@ -SET any_join_distinct_right_table_keys = 1; SET join_use_nulls = 1; -SELECT number FROM system.numbers ANY INNER JOIN (SELECT number, ['test'] FROM system.numbers LIMIT 1) js2 USING (number) LIMIT 1; +SELECT number FROM system.numbers SEMI LEFT JOIN (SELECT number, ['test'] FROM system.numbers LIMIT 1) js2 USING (number) LIMIT 1; SELECT number FROM system.numbers ANY LEFT JOIN (SELECT number, ['test'] FROM system.numbers LIMIT 1) js2 USING (number) LIMIT 1; diff --git a/dbms/tests/queries/0_stateless/01009_insert_select_data_loss.sql b/dbms/tests/queries/0_stateless/01009_insert_select_data_loss.sql index 9a754d94323..7ecffd8653c 100644 --- a/dbms/tests/queries/0_stateless/01009_insert_select_data_loss.sql +++ b/dbms/tests/queries/0_stateless/01009_insert_select_data_loss.sql @@ -1,5 +1,5 @@ drop table if exists tab; create table tab (x UInt64) engine = MergeTree order by tuple(); -insert into tab select number as n from numbers(20) any inner join (select number * 10 as n from numbers(2)) using(n) settings any_join_distinct_right_table_keys = 1, max_block_size = 5; +insert into tab select number as n from numbers(20) semi left join (select number * 10 as n from numbers(2)) using(n) settings max_block_size = 5; select * from tab order by x; diff --git a/dbms/tests/queries/0_stateless/01009_insert_select_nicelulu.sql b/dbms/tests/queries/0_stateless/01009_insert_select_nicelulu.sql index 90a902c352d..3fe7ec04e85 100644 --- a/dbms/tests/queries/0_stateless/01009_insert_select_nicelulu.sql +++ b/dbms/tests/queries/0_stateless/01009_insert_select_nicelulu.sql @@ -1,4 +1,3 @@ -Set any_join_distinct_right_table_keys=1; DROP TABLE IF EXISTS test_insert_t1; DROP TABLE IF EXISTS test_insert_t2; DROP TABLE IF EXISTS test_insert_t3; @@ -15,7 +14,7 @@ INSERT INTO test_insert_t2 SELECT '2019-09-01',toString(number) FROM system.numb INSERT INTO test_insert_t2 SELECT '2019-09-01',toString(number) FROM system.numbers WHERE number >=700000 limit 200; INSERT INTO test_insert_t2 SELECT '2019-09-01',toString(number) FROM system.numbers WHERE number >=900000 limit 200; -INSERT INTO test_insert_t3 SELECT '2019-09-01', uid, name, city FROM ( SELECT dt, uid, name, city FROM test_insert_t1 WHERE dt = '2019-09-01') t1 GLOBAL ANY INNER JOIN (SELECT uid FROM test_insert_t2 WHERE dt = '2019-09-01') t2 ON t1.uid=t2.uid; +INSERT INTO test_insert_t3 SELECT '2019-09-01', uid, name, city FROM ( SELECT dt, uid, name, city FROM test_insert_t1 WHERE dt = '2019-09-01') t1 GLOBAL SEMI LEFT JOIN (SELECT uid FROM test_insert_t2 WHERE dt = '2019-09-01') t2 ON t1.uid=t2.uid; SELECT count(*) FROM test_insert_t3; diff --git a/dbms/tests/queries/0_stateless/01030_incorrect_count_summing_merge_tree.reference b/dbms/tests/queries/0_stateless/01030_incorrect_count_summing_merge_tree.reference new file mode 100644 index 00000000000..b12baf4709e --- /dev/null +++ b/dbms/tests/queries/0_stateless/01030_incorrect_count_summing_merge_tree.reference @@ -0,0 +1,80 @@ +-- SummingMergeTree with Nullable column without duplicates. +2018-02-01 00:00:00 1 +2018-02-02 00:00:00 2 +-- 2 2 +2 +2 +-- 2 2 +2 +2 +-- 2 2 +2 +2 +-- 2 2 2 2 +2 +2 +2 +2 +-- 2 2 +2 +2 +-- SummingMergeTree with Nullable column with duplicates +2018-02-01 00:00:00 4 +2018-02-02 00:00:00 6 +-- 4 2 +4 +2 +-- 4 2 +4 +2 +-- 4 2 +4 +2 +-- 2 2 2 2 +2 +2 +2 +2 +-- 2 2 +2 +2 +-- SummingMergeTree without Nullable column without duplicates. +2018-02-01 00:00:00 1 +2018-02-02 00:00:00 2 +-- 2 2 +2 +2 +-- 2 2 +2 +2 +-- 2 2 +2 +2 +-- 2 2 2 2 +2 +2 +2 +2 +-- 2 2 +2 +2 +-- SummingMergeTree without Nullable column with duplicates. +2018-02-01 00:00:00 4 +2018-02-02 00:00:00 6 +-- 4 2 +4 +2 +-- 4 2 +4 +2 +-- 4 2 +4 +2 +-- 2 2 2 2 +2 +2 +2 +2 +-- 2 2 +2 +2 diff --git a/dbms/tests/queries/0_stateless/01030_incorrect_count_summing_merge_tree.sql b/dbms/tests/queries/0_stateless/01030_incorrect_count_summing_merge_tree.sql new file mode 100644 index 00000000000..a9f7bf7ecd7 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01030_incorrect_count_summing_merge_tree.sql @@ -0,0 +1,123 @@ +select '-- SummingMergeTree with Nullable column without duplicates.'; + +drop table if exists tst; +create table tst (timestamp DateTime, val Nullable(Int8)) engine SummingMergeTree partition by toYYYYMM(timestamp) ORDER by (timestamp); +insert into tst values ('2018-02-01 00:00:00', 1), ('2018-02-02 00:00:00', 2); + +select * from tst final; + +select '-- 2 2'; +select count() from tst; +select count() from tst final; + +select '-- 2 2'; +select count() from tst where timestamp is not null; +select count() from tst final where timestamp is not null; + +select '-- 2 2'; +select count() from tst where val is not null; +select count() from tst final where val is not null; + +select '-- 2 2 2 2'; +select count() from tst final where timestamp>0; +select count() from tst final prewhere timestamp > 0; +select count() from tst final where timestamp > '2017-01-01 00:00:00'; +select count() from tst final prewhere timestamp > '2017-01-01 00:00:00'; + +select '-- 2 2'; +select count() from tst final where val>0; +select count() from tst final prewhere val>0; + +select '-- SummingMergeTree with Nullable column with duplicates'; + +drop table if exists tst; +create table tst (timestamp DateTime, val Nullable(Int8)) engine SummingMergeTree partition by toYYYYMM(timestamp) ORDER by (timestamp); +insert into tst values ('2018-02-01 00:00:00', 1), ('2018-02-02 00:00:00', 2), ('2018-02-01 00:00:00', 3), ('2018-02-02 00:00:00', 4); + +select * from tst final; + +select '-- 4 2'; +select count() from tst; +select count() from tst final; + +select '-- 4 2'; +select count() from tst where timestamp is not null; +select count() from tst final where timestamp is not null; + +select '-- 4 2'; +select count() from tst where val is not null; +select count() from tst final where val is not null; + +select '-- 2 2 2 2'; +select count() from tst final where timestamp>0; +select count() from tst final prewhere timestamp > 0; +select count() from tst final where timestamp > '2017-01-01 00:00:00'; +select count() from tst final prewhere timestamp > '2017-01-01 00:00:00'; + +select '-- 2 2'; +select count() from tst final where val>0; +select count() from tst final prewhere val>0; + +select '-- SummingMergeTree without Nullable column without duplicates.'; + +drop table if exists tst; +create table tst (timestamp DateTime, val Int8) engine SummingMergeTree partition by toYYYYMM(timestamp) ORDER by (timestamp); +insert into tst values ('2018-02-01 00:00:00', 1), ('2018-02-02 00:00:00', 2); + +select * from tst final; + +select '-- 2 2'; +select count() from tst; +select count() from tst final; + +select '-- 2 2 '; +select count() from tst where timestamp is not null; +select count() from tst final where timestamp is not null; + +select '-- 2 2'; +select count() from tst where val is not null; +select count() from tst final where val is not null; + +select '-- 2 2 2 2'; +select count() from tst final where timestamp>0; +select count() from tst final prewhere timestamp > 0; +select count() from tst final where timestamp > '2017-01-01 00:00:00'; +select count() from tst final prewhere timestamp > '2017-01-01 00:00:00'; + +select '-- 2 2'; +select count() from tst final where val>0; +select count() from tst final prewhere val>0; + +drop table tst; + +select '-- SummingMergeTree without Nullable column with duplicates.'; + +drop table if exists tst; +create table tst (timestamp DateTime, val Int8) engine SummingMergeTree partition by toYYYYMM(timestamp) ORDER by (timestamp); +insert into tst values ('2018-02-01 00:00:00', 1), ('2018-02-02 00:00:00', 2), ('2018-02-01 00:00:00', 3), ('2018-02-02 00:00:00', 4); + +select * from tst final; + +select '-- 4 2'; +select count() from tst; +select count() from tst final; + +select '-- 4 2'; +select count() from tst where timestamp is not null; +select count() from tst final where timestamp is not null; + +select '-- 4 2'; +select count() from tst where val is not null; +select count() from tst final where val is not null; + +select '-- 2 2 2 2'; +select count() from tst final where timestamp>0; +select count() from tst final prewhere timestamp > 0; +select count() from tst final where timestamp > '2017-01-01 00:00:00'; +select count() from tst final prewhere timestamp > '2017-01-01 00:00:00'; + +select '-- 2 2'; +select count() from tst final where val>0; +select count() from tst final prewhere val>0; + +drop table tst; diff --git a/dbms/tests/queries/0_stateless/01031_new_any_join.reference b/dbms/tests/queries/0_stateless/01031_new_any_join.reference new file mode 100644 index 00000000000..1fd9a5352e3 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01031_new_any_join.reference @@ -0,0 +1,32 @@ +any left +0 a1 0 +1 a2 0 +2 a3 2 b1 +3 a4 0 +4 a5 4 b3 +any left (rev) +0 5 b6 +2 a3 2 b1 +2 a3 2 b2 +4 a5 4 b3 +4 a5 4 b4 +4 a5 4 b5 +any inner +2 a3 2 b1 +4 a5 4 b3 +any inner (rev) +2 a3 2 b1 +4 a5 4 b3 +any right +0 5 b6 +2 a3 2 b1 +2 a3 2 b2 +4 a5 4 b3 +4 a5 4 b4 +4 a5 4 b5 +any right (rev) +0 a1 0 +1 a2 0 +2 a3 2 b1 +3 a4 0 +4 a5 4 b3 diff --git a/dbms/tests/queries/0_stateless/01031_new_any_join.sql b/dbms/tests/queries/0_stateless/01031_new_any_join.sql new file mode 100644 index 00000000000..de86d8eebc5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01031_new_any_join.sql @@ -0,0 +1,32 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE t1 (x UInt32, s String) engine = Memory; +CREATE TABLE t2 (x UInt32, s String) engine = Memory; + +INSERT INTO t1 (x, s) VALUES (0, 'a1'), (1, 'a2'), (2, 'a3'), (3, 'a4'), (4, 'a5'); +INSERT INTO t2 (x, s) VALUES (2, 'b1'), (2, 'b2'), (4, 'b3'), (4, 'b4'), (4, 'b5'), (5, 'b6'); + +SET join_use_nulls = 0; +SET any_join_distinct_right_table_keys = 0; + +SELECT 'any left'; +SELECT t1.*, t2.* FROM t1 ANY LEFT JOIN t2 USING(x) ORDER BY t1.x, t2.x; + +SELECT 'any left (rev)'; +SELECT t1.*, t2.* FROM t2 ANY LEFT JOIN t1 USING(x) ORDER BY t1.x, t2.x; + +SELECT 'any inner'; +SELECT t1.*, t2.* FROM t1 ANY INNER JOIN t2 USING(x) ORDER BY t1.x, t2.x; + +SELECT 'any inner (rev)'; +SELECT t1.*, t2.* FROM t2 ANY INNER JOIN t1 USING(x) ORDER BY t1.x, t2.x; + +SELECT 'any right'; +SELECT t1.*, t2.* FROM t1 ANY RIGHT JOIN t2 USING(x) ORDER BY t1.x, t2.x; + +SELECT 'any right (rev)'; +SELECT t1.*, t2.* FROM t2 ANY RIGHT JOIN t1 USING(x) ORDER BY t1.x, t2.x; + +DROP TABLE t1; +DROP TABLE t2; diff --git a/dbms/tests/queries/0_stateless/01031_semi_anti_join.reference b/dbms/tests/queries/0_stateless/01031_semi_anti_join.reference new file mode 100644 index 00000000000..782147f1f6f --- /dev/null +++ b/dbms/tests/queries/0_stateless/01031_semi_anti_join.reference @@ -0,0 +1,16 @@ +semi left +2 a3 2 b1 +2 a6 2 b1 +4 a5 4 b3 +semi right +2 a3 2 b1 +2 a3 2 b2 +4 a5 4 b3 +4 a5 4 b4 +4 a5 4 b5 +anti left +0 a1 0 +1 a2 1 +3 a4 3 +anti right +0 5 b6 diff --git a/dbms/tests/queries/0_stateless/01031_semi_anti_join.sql b/dbms/tests/queries/0_stateless/01031_semi_anti_join.sql new file mode 100644 index 00000000000..19ea219563a --- /dev/null +++ b/dbms/tests/queries/0_stateless/01031_semi_anti_join.sql @@ -0,0 +1,25 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE t1 (x UInt32, s String) engine = Memory; +CREATE TABLE t2 (x UInt32, s String) engine = Memory; + +INSERT INTO t1 (x, s) VALUES (0, 'a1'), (1, 'a2'), (2, 'a3'), (3, 'a4'), (4, 'a5'), (2, 'a6'); +INSERT INTO t2 (x, s) VALUES (2, 'b1'), (2, 'b2'), (4, 'b3'), (4, 'b4'), (4, 'b5'), (5, 'b6'); + +SET join_use_nulls = 0; + +SELECT 'semi left'; +SELECT t1.*, t2.* FROM t1 SEMI LEFT JOIN t2 USING(x) ORDER BY t1.x, t2.x; + +SELECT 'semi right'; +SELECT t1.*, t2.* FROM t1 SEMI RIGHT JOIN t2 USING(x) ORDER BY t1.x, t2.x; + +SELECT 'anti left'; +SELECT t1.*, t2.* FROM t1 ANTI LEFT JOIN t2 USING(x) ORDER BY t1.x, t2.x; + +SELECT 'anti right'; +SELECT t1.*, t2.* FROM t1 ANTI RIGHT JOIN t2 USING(x) ORDER BY t1.x, t2.x; + +DROP TABLE t1; +DROP TABLE t2; diff --git a/dbms/tests/queries/0_stateless/01033_dictionaries_lifetime.reference b/dbms/tests/queries/0_stateless/01033_dictionaries_lifetime.reference index b69b141bbe4..07c56f08482 100644 --- a/dbms/tests/queries/0_stateless/01033_dictionaries_lifetime.reference +++ b/dbms/tests/queries/0_stateless/01033_dictionaries_lifetime.reference @@ -1,2 +1,3 @@ INITIALIZING DICTIONARY +1 1 10 diff --git a/dbms/tests/queries/0_stateless/01033_dictionaries_lifetime.sql b/dbms/tests/queries/0_stateless/01033_dictionaries_lifetime.sql index 0497349f86f..8b16c401afe 100644 --- a/dbms/tests/queries/0_stateless/01033_dictionaries_lifetime.sql +++ b/dbms/tests/queries/0_stateless/01033_dictionaries_lifetime.sql @@ -36,6 +36,8 @@ LAYOUT(FLAT()); SELECT 'INITIALIZING DICTIONARY'; +SELECT dictGetUInt8('ordinary_db.dict1', 'second_column', toUInt64(100500)); + SELECT lifetime_min, lifetime_max FROM system.dictionaries WHERE name = 'dict1'; DROP DICTIONARY IF EXISTS ordinary_db.dict1; diff --git a/dbms/tests/queries/0_stateless/01033_quota_dcl.reference b/dbms/tests/queries/0_stateless/01033_quota_dcl.reference new file mode 100644 index 00000000000..7f92f992dd5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01033_quota_dcl.reference @@ -0,0 +1,2 @@ +default +CREATE QUOTA default KEYED BY \'user name\' FOR INTERVAL 1 HOUR TRACKING TO default, readonly diff --git a/dbms/tests/queries/0_stateless/01033_quota_dcl.sql b/dbms/tests/queries/0_stateless/01033_quota_dcl.sql new file mode 100644 index 00000000000..a1c7f1fc204 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01033_quota_dcl.sql @@ -0,0 +1,3 @@ +SHOW QUOTAS; +SHOW CREATE QUOTA default; +CREATE QUOTA q1; -- { serverError 497 } diff --git a/dbms/tests/queries/0_stateless/01034_JSONCompactEachRow.reference b/dbms/tests/queries/0_stateless/01034_JSONCompactEachRow.reference new file mode 100644 index 00000000000..6ec53e11fc9 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01034_JSONCompactEachRow.reference @@ -0,0 +1,47 @@ +1 +[1, "a"] +[2, "b"] +[3, "c"] +2 +["a", "1"] +["b", "1"] +["c", "1"] +3 +["value", "name"] +["UInt8", "String"] +[1, "a"] +[2, "b"] +[3, "c"] +4 +["name", "c"] +["String", "UInt64"] +["a", "1"] +["b", "1"] +["c", "1"] + +["", "3"] +5 +["first", 1, 2, 0] +["second", 2, 0, 6] +6 +["first", 1, 2, 8] +["second", 2, 32, 6] +7 +[16, [15,16,0], ["first","second","third"]] +8 +["first", 1, 2, 0] +["second", 2, 0, 6] +9 +["first", 1, 2, 8] +["second", 2, 32, 6] +10 +["first", 1, 16, 8] +["second", 2, 32, 8] +11 +["v1", "v2", "v3", "v4"] +["String", "UInt8", "UInt16", "UInt8"] +["", 2, 3, 1] +12 +["v1", "n.id", "n.name"] +["UInt8", "Array(UInt8)", "Array(String)"] +[16, [15,16,0], ["first","second","third"]] diff --git a/dbms/tests/queries/0_stateless/01034_JSONCompactEachRow.sql b/dbms/tests/queries/0_stateless/01034_JSONCompactEachRow.sql new file mode 100644 index 00000000000..46a0e90e69d --- /dev/null +++ b/dbms/tests/queries/0_stateless/01034_JSONCompactEachRow.sql @@ -0,0 +1,63 @@ +DROP TABLE IF EXISTS test_table; +DROP TABLE IF EXISTS test_table_2; +SELECT 1; +/* Check JSONCompactEachRow Output */ +CREATE TABLE test_table (value UInt8, name String) ENGINE = MergeTree() ORDER BY value; +INSERT INTO test_table VALUES (1, 'a'), (2, 'b'), (3, 'c'); +SELECT * FROM test_table FORMAT JSONCompactEachRow; +SELECT 2; +/* Check Totals */ +SELECT name, count() AS c FROM test_table GROUP BY name WITH TOTALS ORDER BY name FORMAT JSONCompactEachRow; +SELECT 3; +/* Check JSONCompactEachRowWithNamesAndTypes Output */ +SELECT * FROM test_table FORMAT JSONCompactEachRowWithNamesAndTypes; +SELECT 4; +/* Check Totals */ +SELECT name, count() AS c FROM test_table GROUP BY name WITH TOTALS ORDER BY name FORMAT JSONCompactEachRowWithNamesAndTypes; +DROP TABLE IF EXISTS test_table; +SELECT 5; +/* Check JSONCompactEachRow Input */ +CREATE TABLE test_table (v1 String, v2 UInt8, v3 DEFAULT v2 * 16, v4 UInt8 DEFAULT 8) ENGINE = MergeTree() ORDER BY v2; +INSERT INTO test_table FORMAT JSONCompactEachRow ["first", 1, "2", null] ["second", 2, null, 6]; +SELECT * FROM test_table FORMAT JSONCompactEachRow; +TRUNCATE TABLE test_table; +SELECT 6; +/* Check input_format_null_as_default = 1 */ +SET input_format_null_as_default = 1; +INSERT INTO test_table FORMAT JSONCompactEachRow ["first", 1, "2", null] ["second", 2, null, 6]; +SELECT * FROM test_table FORMAT JSONCompactEachRow; +TRUNCATE TABLE test_table; +SELECT 7; +/* Check Nested */ +CREATE TABLE test_table_2 (v1 UInt8, n Nested(id UInt8, name String)) ENGINE = MergeTree() ORDER BY v1; +INSERT INTO test_table_2 FORMAT JSONCompactEachRow [16, [15, 16, null], ["first", "second", "third"]]; +SELECT * FROM test_table_2 FORMAT JSONCompactEachRow; +TRUNCATE TABLE test_table_2; +SELECT 8; +/* Check JSONCompactEachRowWithNamesAndTypes Output */ +SET input_format_null_as_default = 0; +INSERT INTO test_table FORMAT JSONCompactEachRowWithNamesAndTypes ["v1", "v2", "v3", "v4"]["String","UInt8","UInt16","UInt8"]["first", 1, "2", null]["second", 2, null, 6]; +SELECT * FROM test_table FORMAT JSONCompactEachRow; +TRUNCATE TABLE test_table; +SELECT 9; +/* Check input_format_null_as_default = 1 */ +SET input_format_null_as_default = 1; +INSERT INTO test_table FORMAT JSONCompactEachRowWithNamesAndTypes ["v1", "v2", "v3", "v4"]["String","UInt8","UInt16","UInt8"]["first", 1, "2", null] ["second", 2, null, 6]; +SELECT * FROM test_table FORMAT JSONCompactEachRow; +SELECT 10; +/* Check Header */ +TRUNCATE TABLE test_table; +SET input_format_skip_unknown_fields = 1; +INSERT INTO test_table FORMAT JSONCompactEachRowWithNamesAndTypes ["v1", "v2", "invalid_column"]["String", "UInt8", "UInt8"]["first", 1, 32]["second", 2, "64"]; +SELECT * FROM test_table FORMAT JSONCompactEachRow; +SELECT 11; +TRUNCATE TABLE test_table; +INSERT INTO test_table FORMAT JSONCompactEachRowWithNamesAndTypes ["v4", "v2", "v3"]["UInt8", "UInt8", "UInt16"][1, 2, 3] +SELECT * FROM test_table FORMAT JSONCompactEachRowWithNamesAndTypes; +SELECT 12; +/* Check Nested */ +INSERT INTO test_table_2 FORMAT JSONCompactEachRowWithNamesAndTypes ["v1", "n.id", "n.name"]["UInt8", "Array(UInt8)", "Array(String)"][16, [15, 16, null], ["first", "second", "third"]]; +SELECT * FROM test_table_2 FORMAT JSONCompactEachRowWithNamesAndTypes; + +DROP TABLE IF EXISTS test_table; +DROP TABLE IF EXISTS test_table_2; diff --git a/dbms/tests/queries/0_stateless/01040_dictionary_invalidate_query_failover.reference b/dbms/tests/queries/0_stateless/01040_dictionary_invalidate_query_failover.reference new file mode 100644 index 00000000000..1fca8dab675 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01040_dictionary_invalidate_query_failover.reference @@ -0,0 +1,5 @@ +122 + +Table dictdb.dict_invalidate doesn\'t exist. + +133 diff --git a/dbms/tests/queries/0_stateless/01040_dictionary_invalidate_query_failover.sh b/dbms/tests/queries/0_stateless/01040_dictionary_invalidate_query_failover.sh new file mode 100755 index 00000000000..ef5d3053f9a --- /dev/null +++ b/dbms/tests/queries/0_stateless/01040_dictionary_invalidate_query_failover.sh @@ -0,0 +1,84 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + + +$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS dictdb" + +$CLICKHOUSE_CLIENT --query "CREATE DATABASE dictdb Engine = Ordinary" + +$CLICKHOUSE_CLIENT --query " +CREATE TABLE dictdb.dict_invalidate +ENGINE = Memory AS +SELECT + 122 as dummy, + toDateTime('2019-10-29 18:51:35') AS last_time +FROM system.one" + + +$CLICKHOUSE_CLIENT --query " +CREATE DICTIONARY dictdb.invalidate +( + dummy UInt64, + two UInt8 EXPRESSION dummy +) +PRIMARY KEY dummy +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'dict_invalidate' DB 'dictdb' INVALIDATE_QUERY 'select max(last_time) from dictdb.dict_invalidate')) +LIFETIME(MIN 0 MAX 1) +LAYOUT(FLAT())" + +$CLICKHOUSE_CLIENT --query "SELECT dictGetUInt8('dictdb.invalidate', 'two', toUInt64(122))" + +$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb' AND name = 'invalidate'" + +# Bad solution, but it's quite complicated to detect, that invalidte_query stopped updates. +# In worst case we don't check anything, but fortunately it doesn't lead to false negatives. +sleep 5 + +$CLICKHOUSE_CLIENT --query "DROP TABLE dictdb.dict_invalidate" + +function check_exception_detected() +{ + + query_result=`$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb' AND name = 'invalidate'" 2>&1` + + while [ -z "$query_result" ] + do + query_result=`$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb' AND name = 'invalidate'" 2>&1` + sleep 0.1 + done +} + + +export -f check_exception_detected; +timeout 10 bash -c check_exception_detected 2> /dev/null + +$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb' AND name = 'invalidate'" 2>&1 | grep -Eo "Table dictdb.dict_invalidate .* exist." + +$CLICKHOUSE_CLIENT --query " +CREATE TABLE dictdb.dict_invalidate +ENGINE = Memory AS +SELECT + 133 as dummy, + toDateTime('2019-10-29 18:51:35') AS last_time +FROM system.one" + +function check_exception_fixed() +{ + query_result=`$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb' AND name = 'invalidate'" 2>&1` + + while [ "$query_result" ] + do + query_result=`$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb' AND name = 'invalidate'" 2>&1` + sleep 0.1 + done +} + +export -f check_exception_fixed; +timeout 10 bash -c check_exception_fixed 2> /dev/null + +$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb' AND name = 'invalidate'" 2>&1 +$CLICKHOUSE_CLIENT --query "SELECT dictGetUInt8('dictdb.invalidate', 'two', toUInt64(133))" + +$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS dictdb" diff --git a/dbms/tests/queries/0_stateless/01040_distributed_directory_monitor_batch_inserts.reference b/dbms/tests/queries/0_stateless/01040_distributed_directory_monitor_batch_inserts.reference new file mode 100644 index 00000000000..5565ed6787f --- /dev/null +++ b/dbms/tests/queries/0_stateless/01040_distributed_directory_monitor_batch_inserts.reference @@ -0,0 +1,4 @@ +0 +1 +0 +1 diff --git a/dbms/tests/queries/0_stateless/01040_distributed_directory_monitor_batch_inserts.sql b/dbms/tests/queries/0_stateless/01040_distributed_directory_monitor_batch_inserts.sql new file mode 100644 index 00000000000..ffc33ce6949 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01040_distributed_directory_monitor_batch_inserts.sql @@ -0,0 +1,9 @@ +SET distributed_directory_monitor_batch_inserts=1; +SET distributed_directory_monitor_sleep_time_ms=10; +SET distributed_directory_monitor_max_sleep_time_ms=100; + +CREATE TABLE test (key UInt64) ENGINE=TinyLog(); +CREATE TABLE dist_test AS test Engine=Distributed(test_cluster_two_shards, currentDatabase(), test, key); +INSERT INTO dist_test SELECT toUInt64(number) FROM numbers(2); +SYSTEM FLUSH DISTRIBUTED dist_test; +SELECT * FROM dist_test; diff --git a/dbms/tests/queries/0_stateless/01041_create_dictionary_if_not_exists.reference b/dbms/tests/queries/0_stateless/01041_create_dictionary_if_not_exists.reference new file mode 100644 index 00000000000..15eecd22cf1 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01041_create_dictionary_if_not_exists.reference @@ -0,0 +1,2 @@ +1.1 +1.1 diff --git a/dbms/tests/queries/0_stateless/01041_create_dictionary_if_not_exists.sql b/dbms/tests/queries/0_stateless/01041_create_dictionary_if_not_exists.sql new file mode 100644 index 00000000000..5002b7a59ab --- /dev/null +++ b/dbms/tests/queries/0_stateless/01041_create_dictionary_if_not_exists.sql @@ -0,0 +1,40 @@ +DROP DATABASE IF EXISTS dictdb; + +CREATE DATABASE dictdb ENGINE = Ordinary; + +CREATE TABLE dictdb.table_for_dict +( + key_column UInt64, + value Float64 +) +ENGINE = MergeTree() +ORDER BY key_column; + +INSERT INTO dictdb.table_for_dict VALUES (1, 1.1); + +CREATE DICTIONARY IF NOT EXISTS dictdb.dict_exists +( + key_column UInt64, + value Float64 DEFAULT 77.77 +) +PRIMARY KEY key_column +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' DB 'dictdb')) +LIFETIME(1) +LAYOUT(FLAT()); + +SELECT dictGetFloat64('dictdb.dict_exists', 'value', toUInt64(1)); + + +CREATE DICTIONARY IF NOT EXISTS dictdb.dict_exists +( + key_column UInt64, + value Float64 DEFAULT 77.77 +) +PRIMARY KEY key_column +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' DB 'dictdb')) +LIFETIME(1) +LAYOUT(FLAT()); + +SELECT dictGetFloat64('dictdb.dict_exists', 'value', toUInt64(1)); + +DROP DATABASE IF EXISTS dictdb; diff --git a/dbms/tests/queries/0_stateless/01042_check_query_and_last_granule_size.reference b/dbms/tests/queries/0_stateless/01042_check_query_and_last_granule_size.reference new file mode 100644 index 00000000000..a4fac158712 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01042_check_query_and_last_granule_size.reference @@ -0,0 +1,7 @@ +all_1_1_0 1 +all_1_1_0 1 +all_1_1_0 1 +all_1_1_0 1 +all_1_1_0 1 +all_2_2_0 1 +all_1_2_1 1 diff --git a/dbms/tests/queries/0_stateless/01042_check_query_and_last_granule_size.sql b/dbms/tests/queries/0_stateless/01042_check_query_and_last_granule_size.sql new file mode 100644 index 00000000000..9777ea1dc45 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01042_check_query_and_last_granule_size.sql @@ -0,0 +1,38 @@ +SET check_query_single_value_result = 0; +DROP TABLE IF EXISTS check_query_test; + +CREATE TABLE check_query_test (SomeKey UInt64, SomeValue String) ENGINE = MergeTree() ORDER BY SomeKey; + +-- Number of rows in last granule should be equals to granularity. +-- Rows in this table are short, so granularity will be 8192. +INSERT INTO check_query_test SELECT number, toString(number) FROM system.numbers LIMIT 81920; + +CHECK TABLE check_query_test; + +OPTIMIZE TABLE check_query_test; + +CHECK TABLE check_query_test; + +DROP TABLE IF EXISTS check_query_test; + +DROP TABLE IF EXISTS check_query_test_non_adaptive; + +CREATE TABLE check_query_test_non_adaptive (SomeKey UInt64, SomeValue String) ENGINE = MergeTree() ORDER BY SomeKey SETTINGS index_granularity_bytes = 0; + +INSERT INTO check_query_test_non_adaptive SELECT number, toString(number) FROM system.numbers LIMIT 81920; + +CHECK TABLE check_query_test_non_adaptive; + +OPTIMIZE TABLE check_query_test_non_adaptive; + +CHECK TABLE check_query_test_non_adaptive; + +INSERT INTO check_query_test_non_adaptive SELECT number, toString(number) FROM system.numbers LIMIT 77; + +CHECK TABLE check_query_test_non_adaptive; + +OPTIMIZE TABLE check_query_test_non_adaptive; + +CHECK TABLE check_query_test_non_adaptive; + +DROP TABLE IF EXISTS check_query_test_non_adaptive; diff --git a/docs/en/getting_started/example_datasets/metrica.md b/docs/en/getting_started/example_datasets/metrica.md index 19947273338..d89fe54f4eb 100644 --- a/docs/en/getting_started/example_datasets/metrica.md +++ b/docs/en/getting_started/example_datasets/metrica.md @@ -1,51 +1,62 @@ # Anonymized Yandex.Metrica Data -Dataset consists of two tables containing anonymized data about hits (`hits_v1`) and visits (`visits_v1`) of Yandex.Metrica. Each of the tables can be downloaded as a compressed `tsv.xz` file or as prepared partitions. In addition to that, an extended version of the `hits` table containing 100 million rows is available as [TSV](https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_100m_obfuscated_v1.tsv.xz) and as [prepared partitions](https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_100m_obfuscated_v1.tar.xz). +Dataset consists of two tables containing anonymized data about hits (`hits_v1`) and visits (`visits_v1`) of Yandex.Metrica. You can read more about Yandex.Metrica in [ClickHouse history](../../introduction/history.md) section. + +The dataset consists of two tables, either of them can be downloaded as a compressed `tsv.xz` file or as prepared partitions. In addition to that, an extended version of the `hits` table containing 100 million rows is available as TSV at and as prepared partitions at . ## Obtaining Tables from Prepared Partitions -**Download and import hits:** -```bash -$ curl -O https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_v1.tar -$ tar xvf hits_v1.tar -C /var/lib/clickhouse # path to ClickHouse data directory -$ # check permissions on unpacked data, fix if required -$ sudo service clickhouse-server restart -$ clickhouse-client --query "SELECT COUNT(*) FROM datasets.hits_v1" + +Download and import hits table: + +``` bash +curl -O https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_v1.tar +tar xvf hits_v1.tar -C /var/lib/clickhouse # path to ClickHouse data directory +# check permissions on unpacked data, fix if required +sudo service clickhouse-server restart +clickhouse-client --query "SELECT COUNT(*) FROM datasets.hits_v1" ``` -**Download and import visits:** -```bash -$ curl -O https://clickhouse-datasets.s3.yandex.net/visits/partitions/visits_v1.tar -$ tar xvf visits_v1.tar -C /var/lib/clickhouse # path to ClickHouse data directory -$ # check permissions on unpacked data, fix if required -$ sudo service clickhouse-server restart -$ clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1" +Download and import visits: + +``` bash +curl -O https://clickhouse-datasets.s3.yandex.net/visits/partitions/visits_v1.tar +tar xvf visits_v1.tar -C /var/lib/clickhouse # path to ClickHouse data directory +# check permissions on unpacked data, fix if required +sudo service clickhouse-server restart +clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1" ``` -## Obtaining Tables from Compressed tsv-file -**Download and import hits from compressed tsv-file** -```bash -$ curl https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_v1.tsv.xz | unxz --threads=`nproc` > hits_v1.tsv -$ # now create table -$ clickhouse-client --query "CREATE DATABASE IF NOT EXISTS datasets" -$ clickhouse-client --query "CREATE TABLE datasets.hits_v1 ( WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192" -$ # import data -$ cat hits_v1.tsv | clickhouse-client --query "INSERT INTO datasets.hits_v1 FORMAT TSV" --max_insert_block_size=100000 -$ # optionally you can optimize table -$ clickhouse-client --query "OPTIMIZE TABLE datasets.hits_v1 FINAL" -$ clickhouse-client --query "SELECT COUNT(*) FROM datasets.hits_v1" +## Obtaining Tables from Compressed TSV File + +Download and import hits from compressed TSV file: + +``` bash +curl https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_v1.tsv.xz | unxz --threads=`nproc` > hits_v1.tsv +# now create table +clickhouse-client --query "CREATE DATABASE IF NOT EXISTS datasets" +clickhouse-client --query "CREATE TABLE datasets.hits_v1 ( WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192" +# import data +cat hits_v1.tsv | clickhouse-client --query "INSERT INTO datasets.hits_v1 FORMAT TSV" --max_insert_block_size=100000 +# optionally you can optimize table +clickhouse-client --query "OPTIMIZE TABLE datasets.hits_v1 FINAL" +clickhouse-client --query "SELECT COUNT(*) FROM datasets.hits_v1" ``` -**Download and import visits from compressed tsv-file** -```bash -$ curl https://clickhouse-datasets.s3.yandex.net/visits/tsv/visits_v1.tsv.xz | unxz --threads=`nproc` > visits_v1.tsv -$ # now create table -$ clickhouse-client --query "CREATE DATABASE IF NOT EXISTS datasets" -$ clickhouse-client --query "CREATE TABLE datasets.visits_v1 ( CounterID UInt32, StartDate Date, Sign Int8, IsNew UInt8, VisitID UInt64, UserID UInt64, StartTime DateTime, Duration UInt32, UTCStartTime DateTime, PageViews Int32, Hits Int32, IsBounce UInt8, Referer String, StartURL String, RefererDomain String, StartURLDomain String, EndURL String, LinkURL String, IsDownload UInt8, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, PlaceID Int32, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), IsYandex UInt8, GoalReachesDepth Int32, GoalReachesURL Int32, GoalReachesAny Int32, SocialSourceNetworkID UInt8, SocialSourcePage String, MobilePhoneModel String, ClientEventTime DateTime, RegionID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RemoteIP UInt32, RemoteIP6 FixedString(16), IPNetworkID UInt32, SilverlightVersion3 UInt32, CodeVersion UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, UserAgentMajor UInt16, UserAgentMinor UInt16, WindowClientWidth UInt16, WindowClientHeight UInt16, SilverlightVersion2 UInt8, SilverlightVersion4 UInt16, FlashVersion3 UInt16, FlashVersion4 UInt16, ClientTimeZone Int16, OS UInt8, UserAgent UInt8, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, NetMajor UInt8, NetMinor UInt8, MobilePhone UInt8, SilverlightVersion1 UInt8, Age UInt8, Sex UInt8, Income UInt8, JavaEnable UInt8, CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, BrowserLanguage UInt16, BrowserCountry UInt16, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), Params Array(String), Goals Nested(ID UInt32, Serial UInt32, EventTime DateTime, Price Int64, OrderID String, CurrencyID UInt32), WatchIDs Array(UInt64), ParamSumPrice Int64, ParamCurrency FixedString(3), ParamCurrencyID UInt16, ClickLogID UInt64, ClickEventID Int32, ClickGoodEvent Int32, ClickEventTime DateTime, ClickPriorityID Int32, ClickPhraseID Int32, ClickPageID Int32, ClickPlaceID Int32, ClickTypeID Int32, ClickResourceID Int32, ClickCost UInt32, ClickClientIP UInt32, ClickDomainID UInt32, ClickURL String, ClickAttempt UInt8, ClickOrderID UInt32, ClickBannerID UInt32, ClickMarketCategoryID UInt32, ClickMarketPP UInt32, ClickMarketCategoryName String, ClickMarketPPName String, ClickAWAPSCampaignName String, ClickPageName String, ClickTargetType UInt16, ClickTargetPhraseID UInt64, ClickContextType UInt8, ClickSelectType Int8, ClickOptions String, ClickGroupBannerID Int32, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, FirstVisit DateTime, PredLastVisit Date, LastVisit Date, TotalVisits UInt32, TraficSource Nested(ID Int8, SearchEngineID UInt16, AdvEngineID UInt8, PlaceID UInt16, SocialSourceNetworkID UInt8, Domain String, SearchPhrase String, SocialSourcePage String), Attendance FixedString(16), CLID UInt32, YCLID UInt64, NormalizedRefererHash UInt64, SearchPhraseHash UInt64, RefererDomainHash UInt64, NormalizedStartURLHash UInt64, StartURLDomainHash UInt64, NormalizedEndURLHash UInt64, TopLevelDomain UInt64, URLScheme UInt64, OpenstatServiceNameHash UInt64, OpenstatCampaignIDHash UInt64, OpenstatAdIDHash UInt64, OpenstatSourceIDHash UInt64, UTMSourceHash UInt64, UTMMediumHash UInt64, UTMCampaignHash UInt64, UTMContentHash UInt64, UTMTermHash UInt64, FromHash UInt64, WebVisorEnabled UInt8, WebVisorActivity UInt32, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), Market Nested(Type UInt8, GoalID UInt32, OrderID String, OrderPrice Int64, PP UInt32, DirectPlaceID UInt32, DirectOrderID UInt32, DirectBannerID UInt32, GoodID String, GoodName String, GoodQuantity Int32, GoodPrice Int64), IslandID FixedString(16)) ENGINE = CollapsingMergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192, Sign)" -$ # import data -$ cat visits_v1.tsv | clickhouse-client --query "INSERT INTO datasets.visits_v1 FORMAT TSV" --max_insert_block_size=100000 -$ # optionally you can optimize table -$ clickhouse-client --query "OPTIMIZE TABLE datasets.visits_v1 FINAL" -$ clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1" +Download and import visits from compressed tsv-file: + +``` bash +curl https://clickhouse-datasets.s3.yandex.net/visits/tsv/visits_v1.tsv.xz | unxz --threads=`nproc` > visits_v1.tsv +# now create table +clickhouse-client --query "CREATE DATABASE IF NOT EXISTS datasets" +clickhouse-client --query "CREATE TABLE datasets.visits_v1 ( CounterID UInt32, StartDate Date, Sign Int8, IsNew UInt8, VisitID UInt64, UserID UInt64, StartTime DateTime, Duration UInt32, UTCStartTime DateTime, PageViews Int32, Hits Int32, IsBounce UInt8, Referer String, StartURL String, RefererDomain String, StartURLDomain String, EndURL String, LinkURL String, IsDownload UInt8, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, PlaceID Int32, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), IsYandex UInt8, GoalReachesDepth Int32, GoalReachesURL Int32, GoalReachesAny Int32, SocialSourceNetworkID UInt8, SocialSourcePage String, MobilePhoneModel String, ClientEventTime DateTime, RegionID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RemoteIP UInt32, RemoteIP6 FixedString(16), IPNetworkID UInt32, SilverlightVersion3 UInt32, CodeVersion UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, UserAgentMajor UInt16, UserAgentMinor UInt16, WindowClientWidth UInt16, WindowClientHeight UInt16, SilverlightVersion2 UInt8, SilverlightVersion4 UInt16, FlashVersion3 UInt16, FlashVersion4 UInt16, ClientTimeZone Int16, OS UInt8, UserAgent UInt8, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, NetMajor UInt8, NetMinor UInt8, MobilePhone UInt8, SilverlightVersion1 UInt8, Age UInt8, Sex UInt8, Income UInt8, JavaEnable UInt8, CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, BrowserLanguage UInt16, BrowserCountry UInt16, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), Params Array(String), Goals Nested(ID UInt32, Serial UInt32, EventTime DateTime, Price Int64, OrderID String, CurrencyID UInt32), WatchIDs Array(UInt64), ParamSumPrice Int64, ParamCurrency FixedString(3), ParamCurrencyID UInt16, ClickLogID UInt64, ClickEventID Int32, ClickGoodEvent Int32, ClickEventTime DateTime, ClickPriorityID Int32, ClickPhraseID Int32, ClickPageID Int32, ClickPlaceID Int32, ClickTypeID Int32, ClickResourceID Int32, ClickCost UInt32, ClickClientIP UInt32, ClickDomainID UInt32, ClickURL String, ClickAttempt UInt8, ClickOrderID UInt32, ClickBannerID UInt32, ClickMarketCategoryID UInt32, ClickMarketPP UInt32, ClickMarketCategoryName String, ClickMarketPPName String, ClickAWAPSCampaignName String, ClickPageName String, ClickTargetType UInt16, ClickTargetPhraseID UInt64, ClickContextType UInt8, ClickSelectType Int8, ClickOptions String, ClickGroupBannerID Int32, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, FirstVisit DateTime, PredLastVisit Date, LastVisit Date, TotalVisits UInt32, TraficSource Nested(ID Int8, SearchEngineID UInt16, AdvEngineID UInt8, PlaceID UInt16, SocialSourceNetworkID UInt8, Domain String, SearchPhrase String, SocialSourcePage String), Attendance FixedString(16), CLID UInt32, YCLID UInt64, NormalizedRefererHash UInt64, SearchPhraseHash UInt64, RefererDomainHash UInt64, NormalizedStartURLHash UInt64, StartURLDomainHash UInt64, NormalizedEndURLHash UInt64, TopLevelDomain UInt64, URLScheme UInt64, OpenstatServiceNameHash UInt64, OpenstatCampaignIDHash UInt64, OpenstatAdIDHash UInt64, OpenstatSourceIDHash UInt64, UTMSourceHash UInt64, UTMMediumHash UInt64, UTMCampaignHash UInt64, UTMContentHash UInt64, UTMTermHash UInt64, FromHash UInt64, WebVisorEnabled UInt8, WebVisorActivity UInt32, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), Market Nested(Type UInt8, GoalID UInt32, OrderID String, OrderPrice Int64, PP UInt32, DirectPlaceID UInt32, DirectOrderID UInt32, DirectBannerID UInt32, GoodID String, GoodName String, GoodQuantity Int32, GoodPrice Int64), IslandID FixedString(16)) ENGINE = CollapsingMergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192, Sign)" +# import data +cat visits_v1.tsv | clickhouse-client --query "INSERT INTO datasets.visits_v1 FORMAT TSV" --max_insert_block_size=100000 +# optionally you can optimize table +clickhouse-client --query "OPTIMIZE TABLE datasets.visits_v1 FINAL" +clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1" ``` -## Queries -Examples of queries to these tables (they are named `test.hits` and `test.visits`) can be found among [stateful tests](https://github.com/ClickHouse/ClickHouse/tree/master/dbms/tests/queries/1_stateful) and in some [performance tests](https://github.com/ClickHouse/ClickHouse/tree/master/dbms/tests/performance) of ClickHouse. +## Example Queries + +[ClickHouse tutorial](../../getting_started/tutorial.md) is based on Yandex.Metrica dataset and the recommended way to get started with this dataset is to just go through tutorial. + +Additional examples of queries to these tables can be found among [stateful tests](https://github.com/yandex/ClickHouse/tree/master/dbms/tests/queries/1_stateful) of ClickHouse (they are named `test.hists` and `test.visits` there). diff --git a/docs/en/getting_started/index.md b/docs/en/getting_started/index.md index ed7335b748b..bfdcb0e108a 100644 --- a/docs/en/getting_started/index.md +++ b/docs/en/getting_started/index.md @@ -1,147 +1,8 @@ # Getting Started -## System Requirements - -ClickHouse can run on any Linux, FreeBSD or Mac OS X with x86\_64 CPU architecture. - -Though pre-built binaries are typically compiled to leverage SSE 4.2 instruction set, so unless otherwise stated usage of CPU that supports it becomes an additional system requirement. Here's the command to check if current CPU has support for SSE 4.2: - -``` bash -$ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not supported" -``` - -## Installation - -### From DEB Packages - -Yandex ClickHouse team recommends using official pre-compiled `deb` packages for Debian or Ubuntu. - -To install official packages add the Yandex repository in `/etc/apt/sources.list` or in a separate `/etc/apt/sources.list.d/clickhouse.list` file: - -```bash -$ deb http://repo.yandex.ru/clickhouse/deb/stable/ main/ -``` - -If you want to use the most recent version, replace `stable` with `testing` (this is recommended for your testing environments). - -Then run these commands to actually install packages: - -```bash -$ sudo apt-get install dirmngr # optional -$ sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4 # optional -$ sudo apt-get update -$ sudo apt-get install clickhouse-client clickhouse-server -``` - -You can also download and install packages manually from here: . - -### From RPM Packages - -Yandex ClickHouse team recommends using official pre-compiled `rpm` packages for CentOS, RedHat and all other rpm-based Linux distributions. - -First you need to add the official repository: - -```bash -$ sudo yum install yum-utils -$ sudo rpm --import https://repo.yandex.ru/clickhouse/CLICKHOUSE-KEY.GPG -$ sudo yum-config-manager --add-repo https://repo.yandex.ru/clickhouse/rpm/stable/x86_64 -``` - -If you want to use the most recent version, replace `stable` with `testing` (this is recommended for your testing environments). - -Then run these commands to actually install packages: - -```bash -$ sudo yum install clickhouse-server clickhouse-client -``` - -You can also download and install packages manually from here: . - -### From Docker Image - -To run ClickHouse inside Docker follow the guide on [Docker Hub](https://hub.docker.com/r/yandex/clickhouse-server/). Those images use official `deb` packages inside. - -### From Sources - -To manually compile ClickHouse, follow the instructions for [Linux](../development/build.md) or [Mac OS X](../development/build_osx.md). - -You can compile packages and install them or use programs without installing packages. Also by building manually you can disable SSE 4.2 requirement or build for AArch64 CPUs. - -```text -Client: dbms/programs/clickhouse-client -Server: dbms/programs/clickhouse-server -``` - -You'll need to create a data and metadata folders and `chown` them for the desired user. Their paths can be changed in server config (src/dbms/programs/server/config.xml), by default they are: -```text -/opt/clickhouse/data/default/ -/opt/clickhouse/metadata/default/ -``` - -On Gentoo you can just use `emerge clickhouse` to install ClickHouse from sources. - -## Launch - -To start the server as a daemon, run: - -``` bash -$ sudo service clickhouse-server start -``` - -If you don't have `service` command, run as - -``` bash -$ sudo /etc/init.d/clickhouse-server start -``` - - -See the logs in the `/var/log/clickhouse-server/` directory. - -If the server doesn't start, check the configurations in the file `/etc/clickhouse-server/config.xml`. - -You can also manually launch the server from the console: - -``` bash -$ clickhouse-server --config-file=/etc/clickhouse-server/config.xml -``` - -In this case, the log will be printed to the console, which is convenient during development. -If the configuration file is in the current directory, you don't need to specify the `--config-file` parameter. By default, it uses `./config.xml`. - -ClickHouse supports access restriction settings. They are located in the `users.xml` file (next to `config.xml`). -By default, access is allowed from anywhere for the `default` user, without a password. See `user/default/networks`. -For more information, see the section ["Configuration Files"](../operations/configuration_files.md). - -After launching server, you can use the command-line client to connect to it: - -``` bash -$ clickhouse-client -``` - -By default it connects to `localhost:9000` on behalf of the user `default` without a password. It can also be used to connect to a remote server using `--host` argument. - -The terminal must use UTF-8 encoding. -For more information, see the section ["Command-line client"](../interfaces/cli.md). - -Example: -``` bash -$ ./clickhouse-client -ClickHouse client version 0.0.18749. -Connecting to localhost:9000. -Connected to ClickHouse server version 0.0.18749. -``` -```sql -SELECT 1 -``` -```text -┌─1─┐ -│ 1 │ -└───┘ -``` - -**Congratulations, the system works!** - -To continue experimenting, you can download one of test data sets or go through [tutorial](https://clickhouse.yandex/tutorial.html). +If you are new to ClickHouse and want to get a hands-on feeling of it's performance, first of all you need to go through the [installation process](install.md). After that you can: +* [Go through detailed tutorial](tutorial.md) +* [Experiment with example datasets](example_datasets/ontime.md) [Original article](https://clickhouse.yandex/docs/en/getting_started/) diff --git a/docs/en/getting_started/install.md b/docs/en/getting_started/install.md new file mode 100644 index 00000000000..e47500fa22f --- /dev/null +++ b/docs/en/getting_started/install.md @@ -0,0 +1,153 @@ +# Installation + +## System Requirements + +ClickHouse can run on any Linux, FreeBSD or Mac OS X with x86\_64, AArch64 or PowerPC64LE CPU architecture. + +Official pre-built binaries are typically compiled for x86\_64 and leverage SSE 4.2 instruction set, so unless otherwise stated usage of CPU that supports it becomes an additional system requirement. Here's the command to check if current CPU has support for SSE 4.2: + +``` bash +$ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not supported" +``` + +To run ClickHouse on processors that do not support SSE 4.2 or have AArch64 or PowerPC64LE architecture, you should [build ClickHouse from sources](#from-sources) with proper configuration adjustments. + +## Available Installation Options + +### From DEB Packages + +It is recommended to use official pre-compiled `deb` packages for Debian or Ubuntu. + +To install official packages add the Yandex repository in `/etc/apt/sources.list` or in a separate `/etc/apt/sources.list.d/clickhouse.list` file: + +``` +deb http://repo.yandex.ru/clickhouse/deb/stable/ main/ +``` + +If you want to use the most recent version, replace `stable` with `testing` (this is recommended for your testing environments). + +Then run these commands to actually install packages: + +```bash +sudo apt-get install dirmngr # optional +sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4 # optional +sudo apt-get update +sudo apt-get install clickhouse-client clickhouse-server +``` + +You can also download and install packages manually from here: . + +### From RPM Packages + +It is recommended to use official pre-compiled `rpm` packages for CentOS, RedHat and all other rpm-based Linux distributions. + +First you need to add the official repository: + +```bash +sudo yum install yum-utils +sudo rpm --import https://repo.yandex.ru/clickhouse/CLICKHOUSE-KEY.GPG +sudo yum-config-manager --add-repo https://repo.yandex.ru/clickhouse/rpm/stable/x86_64 +``` + +If you want to use the most recent version, replace `stable` with `testing` (this is recommended for your testing environments). + +Then run these commands to actually install packages: + +```bash +sudo yum install clickhouse-server clickhouse-client +``` + +You can also download and install packages manually from here: . + +### From Docker Image + +To run ClickHouse inside Docker follow the guide on [Docker Hub](https://hub.docker.com/r/yandex/clickhouse-server/). Those images use official `deb` packages inside. + +### From Sources + +To manually compile ClickHouse, follow the instructions for [Linux](../development/build.md) or [Mac OS X](../development/build_osx.md). + +You can compile packages and install them or use programs without installing packages. Also by building manually you can disable SSE 4.2 requirement or build for AArch64 CPUs. + +``` +Client: dbms/programs/clickhouse-client +Server: dbms/programs/clickhouse-server +``` + +You'll need to create a data and metadata folders and `chown` them for the desired user. Their paths can be changed in server config (src/dbms/programs/server/config.xml), by default they are: +``` +/opt/clickhouse/data/default/ +/opt/clickhouse/metadata/default/ +``` + +On Gentoo you can just use `emerge clickhouse` to install ClickHouse from sources. + +## Launch + +To start the server as a daemon, run: + +``` bash +$ sudo service clickhouse-server start +``` + +If you don't have `service` command, run as + +``` bash +$ sudo /etc/init.d/clickhouse-server start +``` + + +See the logs in the `/var/log/clickhouse-server/` directory. + +If the server doesn't start, check the configurations in the file `/etc/clickhouse-server/config.xml`. + +You can also manually launch the server from the console: + +``` bash +$ clickhouse-server --config-file=/etc/clickhouse-server/config.xml +``` + +In this case, the log will be printed to the console, which is convenient during development. +If the configuration file is in the current directory, you don't need to specify the `--config-file` parameter. By default, it uses `./config.xml`. + +ClickHouse supports access restriction settings. They are located in the `users.xml` file (next to `config.xml`). +By default, access is allowed from anywhere for the `default` user, without a password. See `user/default/networks`. +For more information, see the section ["Configuration Files"](../operations/configuration_files.md). + +After launching server, you can use the command-line client to connect to it: + +``` bash +$ clickhouse-client +``` + +By default it connects to `localhost:9000` on behalf of the user `default` without a password. It can also be used to connect to a remote server using `--host` argument. + +The terminal must use UTF-8 encoding. +For more information, see the section ["Command-line client"](../interfaces/cli.md). + +Example: +``` bash +$ ./clickhouse-client +ClickHouse client version 0.0.18749. +Connecting to localhost:9000. +Connected to ClickHouse server version 0.0.18749. + +:) SELECT 1 + +SELECT 1 + +┌─1─┐ +│ 1 │ +└───┘ + +1 rows in set. Elapsed: 0.003 sec. + +:) +``` + +**Congratulations, the system works!** + +To continue experimenting, you can download one of test data sets or go through [tutorial](https://clickhouse.yandex/tutorial.html). + + +[Original article](https://clickhouse.yandex/docs/en/getting_started/install/) diff --git a/docs/en/getting_started/tutorial.md b/docs/en/getting_started/tutorial.md new file mode 100644 index 00000000000..acdd9074beb --- /dev/null +++ b/docs/en/getting_started/tutorial.md @@ -0,0 +1,645 @@ +# ClickHouse Tutorial + +## What to Expect from This Tutorial? + +By going through this tutorial you'll learn how to set up basic ClickHouse cluster, it'll be small, but fault tolerant and scalable. We will use one of example datasets to fill it with data and execute some demo queries. + +## Single Node Setup + +To postpone complexities of distributed environment, we'll start with deploying ClickHouse on a single server or virtual machine. ClickHouse is usually installed from [deb](index.md#from-deb-packages) or [rpm](index.md#from-rpm-packages) packages, but there are [alternatives](index.md#from-docker-image) for the operating systems that do no support them. + +For example, you have chosen `deb` packages and executed: +``` bash +sudo apt-get install dirmngr +sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4 + +echo "deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" | sudo tee /etc/apt/sources.list.d/clickhouse.list +sudo apt-get update + +sudo apt-get install -y clickhouse-server clickhouse-client +``` + +What do we have in the packages that got installed: + +* `clickhouse-client` package contains [clickhouse-client](../interfaces/cli.md) application, interactive ClickHouse console client. +* `clickhouse-common` package contains a ClickHouse executable file. +* `clickhouse-server` package contains configuration files to run ClickHouse as a server. + +Server config files are located in `/etc/clickhouse-server/`. Before going further please notice the `` element in `config.xml`. Path determines the location for data storage, so it should be located on volume with large disk capacity, the default value is `/var/lib/clickhouse/`. If you want to adjust the configuration it's not really handy to directly edit `config.xml` file, considering it might get rewritten on future package updates. Recommended way to override the config elements is to create [files in config.d directory](../operations/configuration_files.md) which serve as "patches" to config.xml. + +As you might have noticed, `clickhouse-server` is not launched automatically after package installation. It won't be automatically restarted after updates either. The way you start the server depends on your init system, usually it's: + +``` bash +sudo service clickhouse-server start +``` +or + +``` bash +sudo /etc/init.d/clickhouse-server start +``` + +The default location for server logs is `/var/log/clickhouse-server/`. Server will be ready to handle client connections once `Ready for connections` message was logged. + +Once the `clickhouse-server` is up and running, we can use `clickhouse-client` to connect to the server and run some test queries like `SELECT "Hello, world!";`. + +
Quick tips for clickhouse-client +Interactive mode: +``` bash +clickhouse-client +clickhouse-client --host=... --port=... --user=... --password=... +``` + +Enable multiline queries: +``` bash +clickhouse-client -m +clickhouse-client --multiline +``` + +Run queries in batch-mode: +``` bash +clickhouse-client --query='SELECT 1' +echo 'SELECT 1' | clickhouse-client +clickhouse-client <<< 'SELECT 1' +``` + +Insert data from a file in specified format: +``` bash +clickhouse-client --query='INSERT INTO table VALUES' < data.txt +clickhouse-client --query='INSERT INTO table FORMAT TabSeparated' < data.tsv +``` +
+ +## Import Sample Dataset + +Now it's time to fill our ClickHouse server with some sample data. In this tutorial we'll use anonymized data of Yandex.Metrica, the first service that run ClickHouse in production way before it became open-source (more on that in [history section](../introduction/history.md)). There are [multiple ways to import Yandex.Metrica dataset](example_datasets/metrica.md) and for the sake of the tutorial we'll go with the most realistic one. + +### Download and Extract Table Data + +``` bash +curl https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_v1.tsv.xz | unxz --threads=`nproc` > hits_v1.tsv +curl https://clickhouse-datasets.s3.yandex.net/visits/tsv/visits_v1.tsv.xz | unxz --threads=`nproc` > visits_v1.tsv +``` + +The extracted files are about 10GB in size. + +### Create Tables + +Tables are logically grouped into "databases". There's a `default` database, but we'll create a new one named `tutorial`: + +``` bash +clickhouse-client --query "CREATE DATABASE IF NOT EXISTS tutorial" +``` + +Syntax for creating tables is way more complicated compared to databases (see [reference](../query_language/create.md). In general `CREATE TABLE` statement has to specify three key things: + +1. Name of table to create. +2. Table schema, i.e. list of columns and their [data types](../data_types/index.md). +3. [Table engine](../operations/table_engines/index.md) and it's settings, which determines all the details on how queries to this table will be physically executed. + +Yandex.Metrica is a web analytics service and sample dataset doesn't cover it's full functionality, so there are only two tables to create: + +* `hits` is a table with each action done by all users on all websites covered by the service. +* `visits` is a table that contains pre-built sessions instead of individual actions. + +Let's see and execute the real create table queries for these tables: + +``` sql +CREATE TABLE tutorial.hits_v1 +( + `WatchID` UInt64, + `JavaEnable` UInt8, + `Title` String, + `GoodEvent` Int16, + `EventTime` DateTime, + `EventDate` Date, + `CounterID` UInt32, + `ClientIP` UInt32, + `ClientIP6` FixedString(16), + `RegionID` UInt32, + `UserID` UInt64, + `CounterClass` Int8, + `OS` UInt8, + `UserAgent` UInt8, + `URL` String, + `Referer` String, + `URLDomain` String, + `RefererDomain` String, + `Refresh` UInt8, + `IsRobot` UInt8, + `RefererCategories` Array(UInt16), + `URLCategories` Array(UInt16), + `URLRegions` Array(UInt32), + `RefererRegions` Array(UInt32), + `ResolutionWidth` UInt16, + `ResolutionHeight` UInt16, + `ResolutionDepth` UInt8, + `FlashMajor` UInt8, + `FlashMinor` UInt8, + `FlashMinor2` String, + `NetMajor` UInt8, + `NetMinor` UInt8, + `UserAgentMajor` UInt16, + `UserAgentMinor` FixedString(2), + `CookieEnable` UInt8, + `JavascriptEnable` UInt8, + `IsMobile` UInt8, + `MobilePhone` UInt8, + `MobilePhoneModel` String, + `Params` String, + `IPNetworkID` UInt32, + `TraficSourceID` Int8, + `SearchEngineID` UInt16, + `SearchPhrase` String, + `AdvEngineID` UInt8, + `IsArtifical` UInt8, + `WindowClientWidth` UInt16, + `WindowClientHeight` UInt16, + `ClientTimeZone` Int16, + `ClientEventTime` DateTime, + `SilverlightVersion1` UInt8, + `SilverlightVersion2` UInt8, + `SilverlightVersion3` UInt32, + `SilverlightVersion4` UInt16, + `PageCharset` String, + `CodeVersion` UInt32, + `IsLink` UInt8, + `IsDownload` UInt8, + `IsNotBounce` UInt8, + `FUniqID` UInt64, + `HID` UInt32, + `IsOldCounter` UInt8, + `IsEvent` UInt8, + `IsParameter` UInt8, + `DontCountHits` UInt8, + `WithHash` UInt8, + `HitColor` FixedString(1), + `UTCEventTime` DateTime, + `Age` UInt8, + `Sex` UInt8, + `Income` UInt8, + `Interests` UInt16, + `Robotness` UInt8, + `GeneralInterests` Array(UInt16), + `RemoteIP` UInt32, + `RemoteIP6` FixedString(16), + `WindowName` Int32, + `OpenerName` Int32, + `HistoryLength` Int16, + `BrowserLanguage` FixedString(2), + `BrowserCountry` FixedString(2), + `SocialNetwork` String, + `SocialAction` String, + `HTTPError` UInt16, + `SendTiming` Int32, + `DNSTiming` Int32, + `ConnectTiming` Int32, + `ResponseStartTiming` Int32, + `ResponseEndTiming` Int32, + `FetchTiming` Int32, + `RedirectTiming` Int32, + `DOMInteractiveTiming` Int32, + `DOMContentLoadedTiming` Int32, + `DOMCompleteTiming` Int32, + `LoadEventStartTiming` Int32, + `LoadEventEndTiming` Int32, + `NSToDOMContentLoadedTiming` Int32, + `FirstPaintTiming` Int32, + `RedirectCount` Int8, + `SocialSourceNetworkID` UInt8, + `SocialSourcePage` String, + `ParamPrice` Int64, + `ParamOrderID` String, + `ParamCurrency` FixedString(3), + `ParamCurrencyID` UInt16, + `GoalsReached` Array(UInt32), + `OpenstatServiceName` String, + `OpenstatCampaignID` String, + `OpenstatAdID` String, + `OpenstatSourceID` String, + `UTMSource` String, + `UTMMedium` String, + `UTMCampaign` String, + `UTMContent` String, + `UTMTerm` String, + `FromTag` String, + `HasGCLID` UInt8, + `RefererHash` UInt64, + `URLHash` UInt64, + `CLID` UInt32, + `YCLID` UInt64, + `ShareService` String, + `ShareURL` String, + `ShareTitle` String, + `ParsedParams` Nested( + Key1 String, + Key2 String, + Key3 String, + Key4 String, + Key5 String, + ValueDouble Float64), + `IslandID` FixedString(16), + `RequestNum` UInt32, + `RequestTry` UInt8 +) +ENGINE = MergeTree() +PARTITION BY toYYYYMM(EventDate) +ORDER BY (CounterID, EventDate, intHash32(UserID)) +SAMPLE BY intHash32(UserID) +SETTINGS index_granularity = 8192 +``` + +``` sql +CREATE TABLE tutorial.visits_v1 +( + `CounterID` UInt32, + `StartDate` Date, + `Sign` Int8, + `IsNew` UInt8, + `VisitID` UInt64, + `UserID` UInt64, + `StartTime` DateTime, + `Duration` UInt32, + `UTCStartTime` DateTime, + `PageViews` Int32, + `Hits` Int32, + `IsBounce` UInt8, + `Referer` String, + `StartURL` String, + `RefererDomain` String, + `StartURLDomain` String, + `EndURL` String, + `LinkURL` String, + `IsDownload` UInt8, + `TraficSourceID` Int8, + `SearchEngineID` UInt16, + `SearchPhrase` String, + `AdvEngineID` UInt8, + `PlaceID` Int32, + `RefererCategories` Array(UInt16), + `URLCategories` Array(UInt16), + `URLRegions` Array(UInt32), + `RefererRegions` Array(UInt32), + `IsYandex` UInt8, + `GoalReachesDepth` Int32, + `GoalReachesURL` Int32, + `GoalReachesAny` Int32, + `SocialSourceNetworkID` UInt8, + `SocialSourcePage` String, + `MobilePhoneModel` String, + `ClientEventTime` DateTime, + `RegionID` UInt32, + `ClientIP` UInt32, + `ClientIP6` FixedString(16), + `RemoteIP` UInt32, + `RemoteIP6` FixedString(16), + `IPNetworkID` UInt32, + `SilverlightVersion3` UInt32, + `CodeVersion` UInt32, + `ResolutionWidth` UInt16, + `ResolutionHeight` UInt16, + `UserAgentMajor` UInt16, + `UserAgentMinor` UInt16, + `WindowClientWidth` UInt16, + `WindowClientHeight` UInt16, + `SilverlightVersion2` UInt8, + `SilverlightVersion4` UInt16, + `FlashVersion3` UInt16, + `FlashVersion4` UInt16, + `ClientTimeZone` Int16, + `OS` UInt8, + `UserAgent` UInt8, + `ResolutionDepth` UInt8, + `FlashMajor` UInt8, + `FlashMinor` UInt8, + `NetMajor` UInt8, + `NetMinor` UInt8, + `MobilePhone` UInt8, + `SilverlightVersion1` UInt8, + `Age` UInt8, + `Sex` UInt8, + `Income` UInt8, + `JavaEnable` UInt8, + `CookieEnable` UInt8, + `JavascriptEnable` UInt8, + `IsMobile` UInt8, + `BrowserLanguage` UInt16, + `BrowserCountry` UInt16, + `Interests` UInt16, + `Robotness` UInt8, + `GeneralInterests` Array(UInt16), + `Params` Array(String), + `Goals` Nested( + ID UInt32, + Serial UInt32, + EventTime DateTime, + Price Int64, + OrderID String, + CurrencyID UInt32), + `WatchIDs` Array(UInt64), + `ParamSumPrice` Int64, + `ParamCurrency` FixedString(3), + `ParamCurrencyID` UInt16, + `ClickLogID` UInt64, + `ClickEventID` Int32, + `ClickGoodEvent` Int32, + `ClickEventTime` DateTime, + `ClickPriorityID` Int32, + `ClickPhraseID` Int32, + `ClickPageID` Int32, + `ClickPlaceID` Int32, + `ClickTypeID` Int32, + `ClickResourceID` Int32, + `ClickCost` UInt32, + `ClickClientIP` UInt32, + `ClickDomainID` UInt32, + `ClickURL` String, + `ClickAttempt` UInt8, + `ClickOrderID` UInt32, + `ClickBannerID` UInt32, + `ClickMarketCategoryID` UInt32, + `ClickMarketPP` UInt32, + `ClickMarketCategoryName` String, + `ClickMarketPPName` String, + `ClickAWAPSCampaignName` String, + `ClickPageName` String, + `ClickTargetType` UInt16, + `ClickTargetPhraseID` UInt64, + `ClickContextType` UInt8, + `ClickSelectType` Int8, + `ClickOptions` String, + `ClickGroupBannerID` Int32, + `OpenstatServiceName` String, + `OpenstatCampaignID` String, + `OpenstatAdID` String, + `OpenstatSourceID` String, + `UTMSource` String, + `UTMMedium` String, + `UTMCampaign` String, + `UTMContent` String, + `UTMTerm` String, + `FromTag` String, + `HasGCLID` UInt8, + `FirstVisit` DateTime, + `PredLastVisit` Date, + `LastVisit` Date, + `TotalVisits` UInt32, + `TraficSource` Nested( + ID Int8, + SearchEngineID UInt16, + AdvEngineID UInt8, + PlaceID UInt16, + SocialSourceNetworkID UInt8, + Domain String, + SearchPhrase String, + SocialSourcePage String), + `Attendance` FixedString(16), + `CLID` UInt32, + `YCLID` UInt64, + `NormalizedRefererHash` UInt64, + `SearchPhraseHash` UInt64, + `RefererDomainHash` UInt64, + `NormalizedStartURLHash` UInt64, + `StartURLDomainHash` UInt64, + `NormalizedEndURLHash` UInt64, + `TopLevelDomain` UInt64, + `URLScheme` UInt64, + `OpenstatServiceNameHash` UInt64, + `OpenstatCampaignIDHash` UInt64, + `OpenstatAdIDHash` UInt64, + `OpenstatSourceIDHash` UInt64, + `UTMSourceHash` UInt64, + `UTMMediumHash` UInt64, + `UTMCampaignHash` UInt64, + `UTMContentHash` UInt64, + `UTMTermHash` UInt64, + `FromHash` UInt64, + `WebVisorEnabled` UInt8, + `WebVisorActivity` UInt32, + `ParsedParams` Nested( + Key1 String, + Key2 String, + Key3 String, + Key4 String, + Key5 String, + ValueDouble Float64), + `Market` Nested( + Type UInt8, + GoalID UInt32, + OrderID String, + OrderPrice Int64, + PP UInt32, + DirectPlaceID UInt32, + DirectOrderID UInt32, + DirectBannerID UInt32, + GoodID String, + GoodName String, + GoodQuantity Int32, + GoodPrice Int64), + `IslandID` FixedString(16) +) +ENGINE = CollapsingMergeTree(Sign) +PARTITION BY toYYYYMM(StartDate) +ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) +SAMPLE BY intHash32(UserID) +SETTINGS index_granularity = 8192 +``` + +You can execute those queries using interactive mode of `clickhouse-client` (just launch it in terminal without specifying a query in advance) or try some [alternative interface](../interfaces/index.md) if you ant. + +As we can see, `hits_v1` uses the [basic MergeTree engine](../operations/table_engines/mergetree.md), while the `visits_v1` uses the [Collapsing](../operations/table_engines/collapsingmergetree.md) variant. + +### Import Data + +Data import to ClickHouse is done via [INSERT INTO](../query_language/insert_into.md) query like in many other SQL databases. However data is usually provided in one of the [supported formats](../interfaces/formats.md) instead of `VALUES` clause (which is also supported). + +The files we downloaded earlier are in tab-separated format, so here's how to import them via console client: + +``` bash +clickhouse-client --query "INSERT INTO tutorial.hits_v1 FORMAT TSV" --max_insert_block_size=100000 < hits_v1.tsv +clickhouse-client --query "INSERT INTO tutorial.visits_v1 FORMAT TSV" --max_insert_block_size=100000 < visits_v1.tsv +``` + +ClickHouse has a lot of [settings to tune](../operations/settings/index.md) and one way to specify them in console client is via arguments, as we can see with `--max_insert_block_size`. The easiest way to figure out what settings are available, what do they mean and what the defaults are is to query the `system.settings` table: + +``` sql +SELECT name, value, changed, description +FROM system.settings +WHERE name LIKE '%max_insert_b%' +FORMAT TSV + +max_insert_block_size 1048576 0 "The maximum block size for insertion, if we control the creation of blocks for insertion." +``` + +Optionally you can [OPTIMIZE](../query_language/misc/#misc_operations-optimize) the tables after import. Tables that are configured with MergeTree-family engine always do merges of data parts in background to optimize data storage (or at least check if it makes sense). These queries will just force table engine to do storage optimization right now instead of some time later: +``` bash +clickhouse-client --query "OPTIMIZE TABLE tutorial.hits_v1 FINAL" +clickhouse-client --query "OPTIMIZE TABLE tutorial.visits_v1 FINAL" +``` + +This is I/O and CPU intensive operation so if the table constantly receives new data it's better to leave it alone and let merges run in background. + +Now we can check that the tables are successfully imported: +``` bash +clickhouse-client --query "SELECT COUNT(*) FROM tutorial.hits_v1" +clickhouse-client --query "SELECT COUNT(*) FROM tutorial.visits_v1" +``` + +## Example Queries + +``` sql +SELECT + StartURL AS URL, + AVG(Duration) AS AvgDuration +FROM tutorial.visits_v1 +WHERE StartDate BETWEEN '2014-03-23' AND '2014-03-30' +GROUP BY URL +ORDER BY AvgDuration DESC +LIMIT 10 +``` + +``` sql +SELECT + sum(Sign) AS visits, + sumIf(Sign, has(Goals.ID, 1105530)) AS goal_visits, + (100. * goal_visits) / visits AS goal_percent +FROM tutorial.visits_v1 +WHERE (CounterID = 912887) AND (toYYYYMM(StartDate) = 201403) AND (domain(StartURL) = 'yandex.ru') +``` + +## Cluster Deployment + +ClickHouse cluster is a homogenous cluster. Steps to set up: + +1. Install ClickHouse server on all machines of the cluster +2. Set up cluster configs in configuration files +3. Create local tables on each instance +4. Create a [Distributed table](../operations/table_engines/distributed.md) + +[Distributed table](../operations/table_engines/distributed.md) is actually a kind of "view" to local tables of ClickHouse cluster. SELECT query from a distributed table will be executed using resources of all cluster's shards. You may specify configs for multiple clusters and create multiple distributed tables providing views to different clusters. + +Example config for cluster with three shards, one replica each: +``` xml + + + + + example-perftest01j.yandex.ru + 9000 + + + + + example-perftest02j.yandex.ru + 9000 + + + + + example-perftest03j.yandex.ru + 9000 + + + + +``` + +For further demonstration let's create new local table with exactly the same `CREATE TABLE` query that we used for `hits_v1`, but different table name: +``` sql +CREATE TABLE tutorial.hits_local (...) ENGINE = MergeTree() ... +``` + +Creating a distributed table providing a view into local tables of the cluster: +``` sql +CREATE TABLE tutorial.hits_all AS tutorial.hits_local +ENGINE = Distributed(perftest_3shards_1replicas, tutorial, hits_local, rand()); +``` + +Common practice is to create similar Distributed tables on all machines of the cluster. This would allow to run distributed queries on any machine of the cluster. Also there's an alternative option to create temporary distributed table for a given SELECT query using [remote](../query_language/table_functions/remote.md) table function. + +Let's run [INSERT SELECT](../query_language/insert_into.md) into Distributed table to spread the table to multiple servers. + +``` sql +INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1; +``` + +!!! warning "Notice" + This approach is not suitable for sharding of large tables. There's a separate tool [clickhouse-copier](../operations/utils/clickhouse-copier.md) that can re-shard arbitrary large tables. + +As you could expect computationally heavy queries are executed N times faster being launched on 3 servers instead of one. + +In this case we have used a cluster with 3 shards each contains a single replica. + +To provide resilience in production environment we recommend that each shard should contain 2-3 replicas distributed between multiple data-centers. Note that ClickHouse supports unlimited number of replicas. + +Example config for cluster of one shard containing three replicas: +``` xml + + ... + + + + example-perftest01j.yandex.ru + 9000 + + + example-perftest02j.yandex.ru + 9000 + + + example-perftest03j.yandex.ru + 9000 + + + + +``` + +To enable native replication
ZooKeeper is required. ClickHouse will take care of data consistency on all replicas and run restore procedure after failure + automatically. It's recommended to deploy ZooKeeper cluster to separate servers. + +ZooKeeper is not a strict requirement: in some simple cases you can duplicate the data by writing it into all the replicas from your application code. This approach is **not** recommended, in this case ClickHouse won't be able to + guarantee data consistency on all replicas. This remains the responsibility of your application. + +ZooKeeper locations need to be specified in configuration file: +``` xml + + + zoo01.yandex.ru + 2181 + + + zoo02.yandex.ru + 2181 + + + zoo03.yandex.ru + 2181 + + +``` + +Also we need to set macros for identifying each shard and replica, it will be used on table creation: +``` xml + + 01 + 01 + +``` + +If there are no replicas at the moment on replicated table creation, a new first replica will be instantiated. If there are already live replicas, new replica will clone the data from existing ones. You have an option to create all replicated tables first and that insert data to it. Another option is to create some replicas and add the others after or during data insertion. + +``` sql +CREATE TABLE tutorial.hits_replica (...) +ENGINE = ReplcatedMergeTree( + '/clickhouse_perftest/tables/{shard}/hits', + '{replica}' +) +... +``` + +Here we use [ReplicatedMergeTree](../operations/table_engines/replication.md) table engine. In parameters we specify ZooKeeper path containing shard and replica identifiers. + +``` sql +INSERT INTO tutorial.hits_replica SELECT * FROM tutorial.hits_local; +``` +Replication operates in multi-master mode. Data can be loaded into any replica and it will be synced with other instances automatically. Replication is asynchronous so at a given moment of time not all replicas may contain recently inserted data. To allow data insertion at least one replica should be up. Others will sync up data and repair consistency once they will become active again. Please notice that such approach allows for the low possibility of loss of just appended data. diff --git a/docs/en/introduction/ya_metrika_task.md b/docs/en/introduction/history.md similarity index 98% rename from docs/en/introduction/ya_metrika_task.md rename to docs/en/introduction/history.md index 41b33eff581..e8f373880f1 100644 --- a/docs/en/introduction/ya_metrika_task.md +++ b/docs/en/introduction/history.md @@ -1,4 +1,4 @@ -# Yandex.Metrica Use Case +# ClickHouse History ClickHouse was originally developed to power [Yandex.Metrica](https://metrica.yandex.com/), [the second largest web analytics platform in the world](http://w3techs.com/technologies/overview/traffic_analysis/all), and continues to be the core component of this system. With more than 13 trillion records in the database and more than 20 billion events daily, ClickHouse allows generating custom reports on the fly directly from non-aggregated data. This article briefly covers the goals of ClickHouse in the early stages of its development. @@ -47,4 +47,4 @@ OLAPServer worked well for non-aggregated data, but it had many restrictions tha To remove the limitations of OLAPServer and solve the problem of working with non-aggregated data for all reports, we developed the ClickHouse DBMS. -[Original article](https://clickhouse.yandex/docs/en/introduction/ya_metrika_task/) +[Original article](https://clickhouse.yandex/docs/en/introduction/history/) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 9f4275029c2..3011b575600 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1022,6 +1022,53 @@ Lower values mean higher priority. Threads with low `nice` priority values are e Default value: 0. + +## query_profiler_real_time_period_ns {#query_profiler_real_time_period_ns} + +Sets the period for a real clock timer of the query profiler. Real clock timer counts wall-clock time. + +Possible values: + +- Positive integer number, in nanoseconds. + + Recommended values: + + - 10000000 (100 times a second) nanoseconds and less for single queries. + - 1000000000 (once a second) for cluster-wide profiling. + +- 0 for turning off the timer. + +Type: [UInt64](../../data_types/int_uint.md). + +Default value: 1000000000 nanoseconds (once a second). + +**See Also** + +- [system.trace_log](../system_tables.md#system_tables-trace_log) + +## query_profiler_cpu_time_period_ns {#query_profiler_cpu_time_period_ns} + +Sets the period for a CPU clock timer of the query profiler. This timer counts only CPU time. + +Possible values: + +- Positive integer number of nanoseconds. + + Recommended values: + + - 10000000 (100 times a second) nanosecods and more for for single queries. + - 1000000000 (once a second) for cluster-wide profiling. + +- 0 for turning off the timer. + +Type: [UInt64](../../data_types/int_uint.md). + +Default value: 1000000000 nanoseconds. + +**See Also** + +- [system.trace_log](../system_tables.md#system_tables-trace_log) + ## allow_introspection_functions {#settings-allow_introspection_functions} Enables of disables [introspections functions](../../query_language/functions/introspection.md) for query profiling. diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md index 373b87fbf17..77964c7377f 100644 --- a/docs/en/operations/system_tables.md +++ b/docs/en/operations/system_tables.md @@ -551,7 +551,7 @@ You can specify an arbitrary partitioning key for the `system.query_thread_log` Contains stack traces collected by the sampling query profiler. -ClickHouse creates this table when the [trace_log](server_settings/settings.md#server_settings-trace_log) server configuration section is set. Also the `query_profiler_real_time_period_ns` and `query_profiler_cpu_time_period_ns` settings should be set. +ClickHouse creates this table when the [trace_log](server_settings/settings.md#server_settings-trace_log) server configuration section is set. Also the [query_profiler_real_time_period_ns](settings/settings.md#query_profiler_real_time_period_ns) and [query_profiler_cpu_time_period_ns](settings/settings.md#query_profiler_cpu_time_period_ns) settings should be set. To analyze logs, use the `addressToLine`, `addressToSymbol` and `demangle` introspection functions. diff --git a/docs/en/query_language/functions/string_functions.md b/docs/en/query_language/functions/string_functions.md index a45f41a4528..33e5700f355 100644 --- a/docs/en/query_language/functions/string_functions.md +++ b/docs/en/query_language/functions/string_functions.md @@ -217,17 +217,119 @@ Result: └───────────────────────────────────┘ ``` -## trimLeft(s) +## trimLeft {#trimleft} -Returns a string that removes the whitespace characters on left side. +Removes all consecutive occurrences of common whitespace (ASCII character 32) from the beginning of a string. It doesn't remove other kinds of whitespace characters (tab, no-break space, etc.). -## trimRight(s) +**Syntax** -Returns a string that removes the whitespace characters on right side. +```sql +trimLeft() +``` -## trimBoth(s) +Alias: `ltrim`. -Returns a string that removes the whitespace characters on either side. +**Parameters** + +- `string` — string to trim. [String](../../data_types/string.md). + +**Returned value** + +A string without leading common whitespaces. + +Type: `String`. + +**Example** + +Query: + +```sql +SELECT trimLeft(' Hello, world! ') +``` + +Result: + +```text +┌─trimLeft(' Hello, world! ')─┐ +│ Hello, world! │ +└─────────────────────────────────────┘ +``` + +## trimRight {#trimright} + +Removes all consecutive occurrences of common whitespace (ASCII character 32) from the end of a string. It doesn't remove other kinds of whitespace characters (tab, no-break space, etc.). + +**Syntax** + +```sql +trimRight() +``` + +Alias: `rtrim`. + +**Parameters** + +- `string` — string to trim. [String](../../data_types/string.md). + +**Returned value** + +A string without trailing common whitespaces. + +Type: `String`. + +**Example** + +Query: + +```sql +SELECT trimRight(' Hello, world! ') +``` + +Result: + +```text +┌─trimRight(' Hello, world! ')─┐ +│ Hello, world! │ +└──────────────────────────────────────┘ +``` + +## trimBoth {#trimboth} + +Removes all consecutive occurrences of common whitespace (ASCII character 32) from both ends of a string. It doesn't remove other kinds of whitespace characters (tab, no-break space, etc.). + +**Syntax** + +```sql +trimBoth() +``` + +Alias: `trim`. + +**Parameters** + +- `string` — string to trim. [String](../../data_types/string.md). + +**Returned value** + +A string without leading and trailing common whitespaces. + +Type: `String`. + +**Example** + +Query: + +```sql +SELECT trimBoth(' Hello, world! ') +``` + +Result: + +```text +┌─trimBoth(' Hello, world! ')─┐ +│ Hello, world! │ +└─────────────────────────────────────┘ +``` ## CRC32(s) diff --git a/docs/fa/getting_started/index.md b/docs/fa/getting_started/index.md index 778393aed91..57496c474e2 100644 --- a/docs/fa/getting_started/index.md +++ b/docs/fa/getting_started/index.md @@ -1,197 +1,11 @@
+# ﻥﺪﺷ ﻉﻭﺮﺷ -# شروع به کار +ﻖﯾﺮﻃ ﺯﺍ ﺪﯾﺎﺑ ﻪﻤﻫ ﺯﺍ ﻝﻭﺍ ، ﺪﯿﻨﮐ ﺱﺎﺴﺣﺍ ﺍﺭ ﻥﺁ ﺩﺮﮑﻠﻤﻋ ﺪﯿﻫﺍﻮﺧ ﯽﻣ ﻭ ﺪﯿﺘﺴﻫ ﺩﺭﺍﻭ ﻩﺯﺎﺗ[ﺐﺼﻧ ﻞﺣﺍﺮﻣ](install.md). +ﺪﯿﻨﮐ ﺏﺎﺨﺘﻧﺍ ﺍﺭ ﺮﯾﺯ ﯼﺎﻫ ﻪﻨﯾﺰﮔ ﺯﺍ ﯽﮑﯾ ﺪﯿﻧﺍﻮﺗ ﯽﻣ ﻥﺁ ﺯﺍ ﺲﭘ: -## نیازمندی های سیستم - -این یک سیستم چند سکویی (Cross-Platform) نمی باشد. این ابزار نیاز به Linux Ubuntu Precise (12.04) یا جدیدتر، با معماری x86\_64 و پشتیبانی از SSE 4.2 می باشد. برای چک کردن SSE 4.2 خروجی دستور زیر را بررسی کنید: +* [ﺪﯿﻨﮐ ﯽﻃ ﺍﺭ ﻞﺼﻔﻣ ﺵﺯﻮﻣﺁ](tutorial.md) +* [ﺪﯿﻨﮐ ﺶﯾﺎﻣﺯﺁ ﻪﻧﻮﻤﻧ ﯼﺎﻫ ﻩﺩﺍﺩ ﺎﺑ](example_datasets/ontime.md) +[ﯽﻠﺻﺍ ﻪﻟﺎﻘﻣ](https://clickhouse.yandex/docs/fa/getting_started/)
- -```bash -grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not supported" -``` - -
- -پیشنهاد می کنیم از Ubuntu TrustyT، Ubuntu Xenial یا Ubuntu Precise استفاده کنید. ترمینال باید از UTF-8 پشتیبانی کند. (به صورت پیش فرض در Ubuntu پشتیبانی می شود). - -## نصب - -### نصب از طریق پکیج های Debian/Ubuntu - -در فایل `/etc/apt/sources.list` (یا در یک فایل جدا `/etc/apt/sources.list.d/clickhouse.list`)، Repo زیر را اضافه کنید: - -
- -``` -deb http://repo.yandex.ru/clickhouse/deb/stable/ main/ -``` - -
- -اگر شما میخوایید جدیدترین نسخه ی تست را استفاده کنید، 'stable' رو به 'testing' تغییر بدید. - -سپس دستورات زیر را اجرا کنید: - -
- -```bash -sudo apt-get install dirmngr # optional -sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4 # optional -sudo apt-get update -sudo apt-get install clickhouse-client clickhouse-server -``` - -
- -شما همچنین می توانید از طریق لینک زیر پکیج ClickHouse را به صورت دستی دانلود و نصب کنید: . - -ClickHouse دارای تنظیمات محدودیت دسترسی می باشد. این تنظیمات در فایل 'users.xml' (کنار 'config.xml') می باشد. به صورت پیش فرض دسترسی برای کاربر 'default' از همه جا بدون نیاز به پسورد وجود دارد. 'user/default/networks' را مشاهده کنید. برای اطلاعات بیشتر قسمت "تنظیمات فایل ها" را مشاهده کنید. - - RPM ﯼﺎﻫ ﻪﺘﺴﺑ ﺯﺍ ### - -.ﺪﻨﮐ ﯽﻣ ﻪﯿﺻﻮﺗ ﺲﮐﻮﻨﯿﻟ ﺮﺑ ﯽﻨﺘﺒﻣ rpm ﺮﺑ ﯽﻨﺘﺒﻣ ﯼﺎﻫ ﻊﯾﺯﻮﺗ ﺮﯾﺎﺳ ﻭ CentOS ، RedHat ﯼﺍ - - :ﺪﯿﻨﮐ ﻪﻓﺎﺿﺍ ﺍﺭ ﯽﻤﺳﺭ ﻥﺰﺨﻣ ﺪﯾﺎﺑ ﺍﺪﺘﺑﺍ - -```bash -sudo yum install yum-utils -sudo rpm --import https://repo.yandex.ru/clickhouse/CLICKHOUSE-KEY.GPG -sudo yum-config-manager --add-repo https://repo.yandex.ru/clickhouse/rpm/stable/x86_64 -``` - -.(ﺩﻮﺷ ﯽﻣ ﻪﯿﺻﻮﺗ ﺎﻤﺷ ﺶﯾﺎﻣﺯﺁ ﯼﺎﻫ ﻂﯿﺤﻣ ﯼﺍﺮﺑ ﻦﯾﺍ) ﺪﯿﻨﮐ ﻦﯾﺰﮕﯾﺎﺟ "ﺖﺴﺗ" ﺎﺑ ﺍﺭ "ﺭﺍﺪﯾﺎﭘ" - - :ﺪﯿﻨﮐ ﺐﺼﻧ ﺍﺭ ﺎﻫ ﻪﺘﺴﺑ ﻊﻗﺍﻭ ﺭﺩ ﺎﺗ ﺪﯿﻨﮐ ﺍﺮﺟﺍ ﺍﺭ ﺕﺍﺭﻮﺘﺳﺩ ﻦﯾﺍ ﺲﭙﺳ - -```bash -sudo yum install clickhouse-server clickhouse-client -``` - -. :ﺪﯿﻨﮐ ﺐﺼﻧ ﻭ ﯼﺮﯿﮔﺭﺎﺑ ﺎﺠﻨ - - Docker Image ﺯﺍ ### - -.ﺪﻨﻨﮐ ﯽﻣ ﻩﺩﺎﻔﺘﺳﺍ ﻞﺧﺍﺩ ﺭﺩ "deb" ﯽﻤﺳﺭ ﯼﺎﻫ ﻪﺘﺴﺑ ﺯﺍ ﺮﯾﻭﺎﺼﺗ ﻦﯾﺍ .ﺪﯿﻨﮐ ﻝﺎﺒﻧﺩ ﺍﺭ (/ht - - -### نصب از طریق Source - -برای Compile، دستورالعمل های فایل build.md را دنبال کنید: - -شما میتوانید پکیج را compile و نصب کنید. شما همچنین می توانید بدون نصب پکیج از برنامه ها استفاده کنید. - -
- -``` -Client: dbms/programs/clickhouse-client -Server: dbms/programs/clickhouse-server -``` - -
- -برای سرور، یک کاتالوگ با دیتا بسازید، مانند - -
- -``` -/opt/clickhouse/data/default/ -/opt/clickhouse/metadata/default/ -``` - -
- -(قابل تنظیم در تنظیمات سرور). 'chown' را برای کاربر دلخواه اجرا کنید. - -به مسیر لاگ ها در تنظیمات سرور توجه کنید (src/dbms/programs/config.xml). - -### روش های دیگر نصب - -Docker image: - -پکیج RPM برای CentOS یا RHEL: - -Gentoo: `emerge clickhouse` - -## راه اندازی - -برای استارت سرور (به صورت daemon)، دستور زیر را اجرا کنید: - -
- -```bash -sudo service clickhouse-server start -``` - -
- -لاگ های دایرکتوری `/var/log/clickhouse-server/` directory. را مشاهده کنید. - -اگر سرور استارت نشد، فایل تنظیمات را بررسی کنید `/etc/clickhouse-server/config.xml.` - -شما همچنین می توانید سرور را از طریق کنسول راه اندازی کنید: - -
- -```bash -clickhouse-server --config-file=/etc/clickhouse-server/config.xml -``` - -
- -در این مورد که مناسب زمان توسعه می باشد، لاگ ها در کنسول پرینت می شوند. اگر فایل تنظیمات در دایرکتوری جاری باشد، نیازی به مشخص کردن '--config-file' نمی باشد. به صورت پیش فرض از './config.xml' استفاده می شود. - -شما می توانید از کلاینت command-line برای اتصال به سرور استفاده کنید: - -
- -```bash -clickhouse-client -``` - -
- -پارامترهای پیش فرض، نشان از اتصال به localhost:9000 از طرف کاربر 'default' بدون پسورد را می دهد. از کلاینت میتوان برای اتصال به یک سرور remote استفاده کرد. مثال: - -
- -```bash -clickhouse-client --host=example.com -``` - -
- -برای اطلاعات بیشتر، بخش "کلاینت Command-line" را مشاهده کنید. - -چک کردن سیستم: - -
- -```bash -milovidov@hostname:~/work/metrica/src/dbms/src/Client$ ./clickhouse-client -ClickHouse client version 0.0.18749. -Connecting to localhost:9000. -Connected to ClickHouse server version 0.0.18749. - -:) SELECT 1 - -SELECT 1 - -┌─1─┐ -│ 1 │ -└───┘ - -1 rows in set. Elapsed: 0.003 sec. - -:) -``` - -
- -**تبریک میگم، سیستم کار می کنه!** - -برای ادامه آزمایشات، شما میتوانید دیتاست های تستی را دریافت و امتحان کنید. - -
-[مقاله اصلی](https://clickhouse.yandex/docs/fa/getting_started/) diff --git a/docs/fa/getting_started/install.md b/docs/fa/getting_started/install.md new file mode 100644 index 00000000000..790c9381007 --- /dev/null +++ b/docs/fa/getting_started/install.md @@ -0,0 +1,199 @@ +
+ +# ﯼﺯﺍﺪﻧﺍ ﻩﺍﺭ ﻭ ﺐﺼﻧ + +## نیازمندی های سیستم + +ClickHouse ﺲﮐﻮﻨﯿﻟ ﻉﻮﻧ ﺮﻫ ﯼﻭﺭ ﺮﺑ ﺪﻧﺍﻮﺗ ﯽﻣ ، FreeBSD ﺎﯾ Mac OS X ﯼﺭﺎﻤﻌﻣ ﺎﺑ CPU x + +:ﺖﺳﺍ ﻩﺪﻣﺁ ، ﺪﻨﮐ ﯽﻣ ﯽﻧﺎﺒﯿﺘﺸﭘ SSE 4.2 ﺯﺍ ﯽﻠﻌﻓ CPU ﺎﯾﺁ ﻪﮑﻨﯾﺍ ﯽﺳﺭﺮﺑ ﯼﺍﺮﺑ ﺭﻮﺘﺳﺩ ﻦﯾﺍ + +
+ +```bash +grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not supported" +``` + +
+ +ﺪﯾﺎﺑ ، ﺪﻧﺭﺍﺪﻧ PowerPC64LE ﺎﯾ AArch64 ﯼﺭﺎﻤﻌﻣ ﺎﯾ ﺪﻨﻨﮐ ﯽﻤﻧ ﯽﻧﺎﺒﯿﺘﺸﭘ SSE 4.2 ﺯﺍ ﻪﮐ[ClickHouse ﺪﯿﻨﮐ ﺩﺎﺠﯾﺍ ﻊﺑﺎﻨﻣ ﺯﺍ ﺍﺭ](#from-sources) ﺐﺳﺎﻨﻣ ﺕﺎﻤﯿﻈﻨﺗ ﺎﺑ + +##ﺩﻮﺟﻮﻣ ﺐﺼﻧ ﯼﺎﻫ ﻪﻨﯾﺰﮔ + +### نصب از طریق پکیج های Debian/Ubuntu {#from-deb-packages} + +در فایل `/etc/apt/sources.list` (یا در یک فایل جدا `/etc/apt/sources.list.d/clickhouse.list`)، Repo زیر را اضافه کنید: + +
+ +``` +deb http://repo.yandex.ru/clickhouse/deb/stable/ main/ +``` + +
+ +اگر شما میخوایید جدیدترین نسخه ی تست را استفاده کنید، 'stable' رو به 'testing' تغییر بدید. + +سپس دستورات زیر را اجرا کنید: + +
+ +```bash +sudo apt-get install dirmngr # optional +sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4 # optional +sudo apt-get update +sudo apt-get install clickhouse-client clickhouse-server +``` + +
+ +شما همچنین می توانید از طریق لینک زیر پکیج ClickHouse را به صورت دستی دانلود و نصب کنید: . + +ClickHouse دارای تنظیمات محدودیت دسترسی می باشد. این تنظیمات در فایل 'users.xml' (کنار 'config.xml') می باشد. به صورت پیش فرض دسترسی برای کاربر 'default' از همه جا بدون نیاز به پسورد وجود دارد. 'user/default/networks' را مشاهده کنید. برای اطلاعات بیشتر قسمت "تنظیمات فایل ها" را مشاهده کنید. + +### RPM ﯼﺎﻫ ﻪﺘﺴﺑ ﺯﺍ {#from-rpm-packages} + +.ﺪﻨﮐ ﯽﻣ ﻪﯿﺻﻮﺗ ﺲﮐﻮﻨﯿﻟ ﺮﺑ ﯽﻨﺘﺒﻣ rpm ﺮﺑ ﯽﻨﺘﺒﻣ ﯼﺎﻫ ﻊﯾﺯﻮﺗ ﺮﯾﺎﺳ ﻭ CentOS ، RedHat ﯼﺍ + + :ﺪﯿﻨﮐ ﻪﻓﺎﺿﺍ ﺍﺭ ﯽﻤﺳﺭ ﻥﺰﺨﻣ ﺪﯾﺎﺑ ﺍﺪﺘﺑﺍ + +```bash +sudo yum install yum-utils +sudo rpm --import https://repo.yandex.ru/clickhouse/CLICKHOUSE-KEY.GPG +sudo yum-config-manager --add-repo https://repo.yandex.ru/clickhouse/rpm/stable/x86_64 +``` + +.(ﺩﻮﺷ ﯽﻣ ﻪﯿﺻﻮﺗ ﺎﻤﺷ ﺶﯾﺎﻣﺯﺁ ﯼﺎﻫ ﻂﯿﺤﻣ ﯼﺍﺮﺑ ﻦﯾﺍ) ﺪﯿﻨﮐ ﻦﯾﺰﮕﯾﺎﺟ "ﺖﺴﺗ" ﺎﺑ ﺍﺭ "ﺭﺍﺪﯾﺎﭘ" + + :ﺪﯿﻨﮐ ﺐﺼﻧ ﺍﺭ ﺎﻫ ﻪﺘﺴﺑ ﻊﻗﺍﻭ ﺭﺩ ﺎﺗ ﺪﯿﻨﮐ ﺍﺮﺟﺍ ﺍﺭ ﺕﺍﺭﻮﺘﺳﺩ ﻦﯾﺍ ﺲﭙﺳ + +```bash +sudo yum install clickhouse-server clickhouse-client +``` + +. :ﺪﯿﻨﮐ ﺐﺼﻧ ﻭ ﯼﺮﯿﮔﺭﺎﺑ ﺎﺠﻨ + + Docker Image ﺯﺍ ### + +.ﺪﻨﻨﮐ ﯽﻣ ﻩﺩﺎﻔﺘﺳﺍ ﻞﺧﺍﺩ ﺭﺩ "deb" ﯽﻤﺳﺭ ﯼﺎﻫ ﻪﺘﺴﺑ ﺯﺍ ﺮﯾﻭﺎﺼﺗ ﻦﯾﺍ .ﺪﯿﻨﮐ ﻝﺎﺒﻧﺩ ﺍﺭ (/ht + + +### نصب از طریق Source {#from-sources} + +برای Compile، دستورالعمل های فایل build.md را دنبال کنید: + +شما میتوانید پکیج را compile و نصب کنید. شما همچنین می توانید بدون نصب پکیج از برنامه ها استفاده کنید. + +
+ +``` +Client: dbms/programs/clickhouse-client +Server: dbms/programs/clickhouse-server +``` + +
+ +برای سرور، یک کاتالوگ با دیتا بسازید، مانند + +
+ +``` +/opt/clickhouse/data/default/ +/opt/clickhouse/metadata/default/ +``` + +
+ +(قابل تنظیم در تنظیمات سرور). 'chown' را برای کاربر دلخواه اجرا کنید. + +به مسیر لاگ ها در تنظیمات سرور توجه کنید (src/dbms/programs/config.xml). + +### روش های دیگر نصب {#from-docker-image} + +Docker image: + +پکیج RPM برای CentOS یا RHEL: + +Gentoo: `emerge clickhouse` + +## راه اندازی + +برای استارت سرور (به صورت daemon)، دستور زیر را اجرا کنید: + +
+ +```bash +sudo service clickhouse-server start +``` + +
+ +لاگ های دایرکتوری `/var/log/clickhouse-server/` directory. را مشاهده کنید. + +اگر سرور استارت نشد، فایل تنظیمات را بررسی کنید `/etc/clickhouse-server/config.xml.` + +شما همچنین می توانید سرور را از طریق کنسول راه اندازی کنید: + +
+ +```bash +clickhouse-server --config-file=/etc/clickhouse-server/config.xml +``` + +
+ +در این مورد که مناسب زمان توسعه می باشد، لاگ ها در کنسول پرینت می شوند. اگر فایل تنظیمات در دایرکتوری جاری باشد، نیازی به مشخص کردن '--config-file' نمی باشد. به صورت پیش فرض از './config.xml' استفاده می شود. + +شما می توانید از کلاینت command-line برای اتصال به سرور استفاده کنید: + +
+ +```bash +clickhouse-client +``` + +
+ +پارامترهای پیش فرض، نشان از اتصال به localhost:9000 از طرف کاربر 'default' بدون پسورد را می دهد. از کلاینت میتوان برای اتصال به یک سرور remote استفاده کرد. مثال: + +
+ +```bash +clickhouse-client --host=example.com +``` + +
+ +برای اطلاعات بیشتر، بخش "کلاینت Command-line" را مشاهده کنید. + +چک کردن سیستم: + +
+ +```bash +milovidov@hostname:~/work/metrica/src/dbms/src/Client$ ./clickhouse-client +ClickHouse client version 0.0.18749. +Connecting to localhost:9000. +Connected to ClickHouse server version 0.0.18749. + +:) SELECT 1 + +SELECT 1 + +┌─1─┐ +│ 1 │ +└───┘ + +1 rows in set. Elapsed: 0.003 sec. + +:) +``` + +
+ +**تبریک میگم، سیستم کار می کنه!** + +برای ادامه آزمایشات، شما میتوانید دیتاست های تستی را دریافت و امتحان کنید. + +
+[مقاله اصلی](https://clickhouse.yandex/docs/fa/getting_started/install/) diff --git a/docs/fa/getting_started/tutorial.md b/docs/fa/getting_started/tutorial.md new file mode 120000 index 00000000000..8bc40816ab2 --- /dev/null +++ b/docs/fa/getting_started/tutorial.md @@ -0,0 +1 @@ +../../en/getting_started/tutorial.md \ No newline at end of file diff --git a/docs/fa/introduction/ya_metrika_task.md b/docs/fa/introduction/history.md similarity index 99% rename from docs/fa/introduction/ya_metrika_task.md rename to docs/fa/introduction/history.md index 1ea434f248c..abde10aa6f3 100644 --- a/docs/fa/introduction/ya_metrika_task.md +++ b/docs/fa/introduction/history.md @@ -1,6 +1,6 @@
-# Yandex.Metrica use case +# ClickHouse ﻪﭽﺨﯾﺭﺎﺗ ClickHouse در ابتدا برای قدرت به Yandex.Metrica دومین بستر آنالیز وب در دنیا توسعه داده شد، و همچنان جز اصلی آن است. ClickHouse اجازه می دهند که با بیش از 13 تریلیون رکورد در دیتابیس و بیش از 20 میلیارد event در روز، گزارش های مستقیم (On the fly) از داده های non-aggregate تهیه کنیم. این مقاله پیشنیه ی تاریخی در ارتباط با اهداف اصلی ClickHouse قبل از آنکه به یک محصول open source تبدیل شود، می دهد. diff --git a/docs/fa/query_language/functions/introspection.md b/docs/fa/query_language/functions/introspection.md new file mode 120000 index 00000000000..b1a487e9c77 --- /dev/null +++ b/docs/fa/query_language/functions/introspection.md @@ -0,0 +1 @@ +../../../en/query_language/functions/introspection.md \ No newline at end of file diff --git a/docs/ja/changelog.md b/docs/ja/changelog.md new file mode 120000 index 00000000000..699cc9e7b7c --- /dev/null +++ b/docs/ja/changelog.md @@ -0,0 +1 @@ +../../CHANGELOG.md \ No newline at end of file diff --git a/docs/ja/data_types/array.md b/docs/ja/data_types/array.md new file mode 120000 index 00000000000..808c98bf91a --- /dev/null +++ b/docs/ja/data_types/array.md @@ -0,0 +1 @@ +../../en/data_types/array.md \ No newline at end of file diff --git a/docs/ja/data_types/boolean.md b/docs/ja/data_types/boolean.md new file mode 120000 index 00000000000..42e84f1e52a --- /dev/null +++ b/docs/ja/data_types/boolean.md @@ -0,0 +1 @@ +../../en/data_types/boolean.md \ No newline at end of file diff --git a/docs/ja/data_types/date.md b/docs/ja/data_types/date.md new file mode 120000 index 00000000000..d1ebc137e8f --- /dev/null +++ b/docs/ja/data_types/date.md @@ -0,0 +1 @@ +../../en/data_types/date.md \ No newline at end of file diff --git a/docs/ja/data_types/datetime.md b/docs/ja/data_types/datetime.md new file mode 120000 index 00000000000..2eb9f44e6eb --- /dev/null +++ b/docs/ja/data_types/datetime.md @@ -0,0 +1 @@ +../../en/data_types/datetime.md \ No newline at end of file diff --git a/docs/ja/data_types/decimal.md b/docs/ja/data_types/decimal.md new file mode 120000 index 00000000000..ccea440adfa --- /dev/null +++ b/docs/ja/data_types/decimal.md @@ -0,0 +1 @@ +../../en/data_types/decimal.md \ No newline at end of file diff --git a/docs/ja/data_types/domains/ipv4.md b/docs/ja/data_types/domains/ipv4.md new file mode 120000 index 00000000000..eb4cc7d57b5 --- /dev/null +++ b/docs/ja/data_types/domains/ipv4.md @@ -0,0 +1 @@ +../../../en/data_types/domains/ipv4.md \ No newline at end of file diff --git a/docs/ja/data_types/domains/ipv6.md b/docs/ja/data_types/domains/ipv6.md new file mode 120000 index 00000000000..cca37a22458 --- /dev/null +++ b/docs/ja/data_types/domains/ipv6.md @@ -0,0 +1 @@ +../../../en/data_types/domains/ipv6.md \ No newline at end of file diff --git a/docs/ja/data_types/domains/overview.md b/docs/ja/data_types/domains/overview.md new file mode 120000 index 00000000000..13465d655ee --- /dev/null +++ b/docs/ja/data_types/domains/overview.md @@ -0,0 +1 @@ +../../../en/data_types/domains/overview.md \ No newline at end of file diff --git a/docs/ja/data_types/enum.md b/docs/ja/data_types/enum.md new file mode 120000 index 00000000000..23ebe64773e --- /dev/null +++ b/docs/ja/data_types/enum.md @@ -0,0 +1 @@ +../../en/data_types/enum.md \ No newline at end of file diff --git a/docs/ja/data_types/fixedstring.md b/docs/ja/data_types/fixedstring.md new file mode 120000 index 00000000000..53092fcb884 --- /dev/null +++ b/docs/ja/data_types/fixedstring.md @@ -0,0 +1 @@ +../../en/data_types/fixedstring.md \ No newline at end of file diff --git a/docs/ja/data_types/float.md b/docs/ja/data_types/float.md new file mode 120000 index 00000000000..d2ae6bd11de --- /dev/null +++ b/docs/ja/data_types/float.md @@ -0,0 +1 @@ +../../en/data_types/float.md \ No newline at end of file diff --git a/docs/ja/data_types/index.md b/docs/ja/data_types/index.md new file mode 120000 index 00000000000..c9f29d637f3 --- /dev/null +++ b/docs/ja/data_types/index.md @@ -0,0 +1 @@ +../../en/data_types/index.md \ No newline at end of file diff --git a/docs/ja/data_types/int_uint.md b/docs/ja/data_types/int_uint.md new file mode 120000 index 00000000000..3a913c9328e --- /dev/null +++ b/docs/ja/data_types/int_uint.md @@ -0,0 +1 @@ +../../en/data_types/int_uint.md \ No newline at end of file diff --git a/docs/ja/data_types/nested_data_structures/aggregatefunction.md b/docs/ja/data_types/nested_data_structures/aggregatefunction.md new file mode 120000 index 00000000000..36544324d2b --- /dev/null +++ b/docs/ja/data_types/nested_data_structures/aggregatefunction.md @@ -0,0 +1 @@ +../../../en/data_types/nested_data_structures/aggregatefunction.md \ No newline at end of file diff --git a/docs/ja/data_types/nested_data_structures/index.md b/docs/ja/data_types/nested_data_structures/index.md new file mode 120000 index 00000000000..a5659a9c5cd --- /dev/null +++ b/docs/ja/data_types/nested_data_structures/index.md @@ -0,0 +1 @@ +../../../en/data_types/nested_data_structures/index.md \ No newline at end of file diff --git a/docs/ja/data_types/nested_data_structures/nested.md b/docs/ja/data_types/nested_data_structures/nested.md new file mode 120000 index 00000000000..653a1ce31c3 --- /dev/null +++ b/docs/ja/data_types/nested_data_structures/nested.md @@ -0,0 +1 @@ +../../../en/data_types/nested_data_structures/nested.md \ No newline at end of file diff --git a/docs/ja/data_types/nullable.md b/docs/ja/data_types/nullable.md new file mode 120000 index 00000000000..0233f91d954 --- /dev/null +++ b/docs/ja/data_types/nullable.md @@ -0,0 +1 @@ +../../en/data_types/nullable.md \ No newline at end of file diff --git a/docs/ja/data_types/special_data_types/expression.md b/docs/ja/data_types/special_data_types/expression.md new file mode 120000 index 00000000000..4cec632b416 --- /dev/null +++ b/docs/ja/data_types/special_data_types/expression.md @@ -0,0 +1 @@ +../../../en/data_types/special_data_types/expression.md \ No newline at end of file diff --git a/docs/ja/data_types/special_data_types/index.md b/docs/ja/data_types/special_data_types/index.md new file mode 120000 index 00000000000..f3ca4a47f98 --- /dev/null +++ b/docs/ja/data_types/special_data_types/index.md @@ -0,0 +1 @@ +../../../en/data_types/special_data_types/index.md \ No newline at end of file diff --git a/docs/ja/data_types/special_data_types/interval.md b/docs/ja/data_types/special_data_types/interval.md new file mode 120000 index 00000000000..6829f5ced00 --- /dev/null +++ b/docs/ja/data_types/special_data_types/interval.md @@ -0,0 +1 @@ +../../../en/data_types/special_data_types/interval.md \ No newline at end of file diff --git a/docs/ja/data_types/special_data_types/nothing.md b/docs/ja/data_types/special_data_types/nothing.md new file mode 120000 index 00000000000..197a752ce9c --- /dev/null +++ b/docs/ja/data_types/special_data_types/nothing.md @@ -0,0 +1 @@ +../../../en/data_types/special_data_types/nothing.md \ No newline at end of file diff --git a/docs/ja/data_types/special_data_types/set.md b/docs/ja/data_types/special_data_types/set.md new file mode 120000 index 00000000000..5beb14114d3 --- /dev/null +++ b/docs/ja/data_types/special_data_types/set.md @@ -0,0 +1 @@ +../../../en/data_types/special_data_types/set.md \ No newline at end of file diff --git a/docs/ja/data_types/string.md b/docs/ja/data_types/string.md new file mode 120000 index 00000000000..7bdd739398f --- /dev/null +++ b/docs/ja/data_types/string.md @@ -0,0 +1 @@ +../../en/data_types/string.md \ No newline at end of file diff --git a/docs/ja/data_types/tuple.md b/docs/ja/data_types/tuple.md new file mode 120000 index 00000000000..d30a8463aeb --- /dev/null +++ b/docs/ja/data_types/tuple.md @@ -0,0 +1 @@ +../../en/data_types/tuple.md \ No newline at end of file diff --git a/docs/ja/data_types/uuid.md b/docs/ja/data_types/uuid.md new file mode 120000 index 00000000000..aba05e889ac --- /dev/null +++ b/docs/ja/data_types/uuid.md @@ -0,0 +1 @@ +../../en/data_types/uuid.md \ No newline at end of file diff --git a/docs/ja/database_engines/index.md b/docs/ja/database_engines/index.md new file mode 120000 index 00000000000..bbdb762a4ad --- /dev/null +++ b/docs/ja/database_engines/index.md @@ -0,0 +1 @@ +../../en/database_engines/index.md \ No newline at end of file diff --git a/docs/ja/database_engines/lazy.md b/docs/ja/database_engines/lazy.md new file mode 120000 index 00000000000..66830dcdb2f --- /dev/null +++ b/docs/ja/database_engines/lazy.md @@ -0,0 +1 @@ +../../en/database_engines/lazy.md \ No newline at end of file diff --git a/docs/ja/database_engines/mysql.md b/docs/ja/database_engines/mysql.md new file mode 120000 index 00000000000..51ac4126e2d --- /dev/null +++ b/docs/ja/database_engines/mysql.md @@ -0,0 +1 @@ +../../en/database_engines/mysql.md \ No newline at end of file diff --git a/docs/ja/development/architecture.md b/docs/ja/development/architecture.md new file mode 120000 index 00000000000..abda4dd48a8 --- /dev/null +++ b/docs/ja/development/architecture.md @@ -0,0 +1 @@ +../../en/development/architecture.md \ No newline at end of file diff --git a/docs/ja/development/build.md b/docs/ja/development/build.md new file mode 120000 index 00000000000..480dbc2e9f5 --- /dev/null +++ b/docs/ja/development/build.md @@ -0,0 +1 @@ +../../en/development/build.md \ No newline at end of file diff --git a/docs/ja/development/build_cross_arm.md b/docs/ja/development/build_cross_arm.md new file mode 120000 index 00000000000..983a9872dc1 --- /dev/null +++ b/docs/ja/development/build_cross_arm.md @@ -0,0 +1 @@ +../../en/development/build_cross_arm.md \ No newline at end of file diff --git a/docs/ja/development/build_cross_osx.md b/docs/ja/development/build_cross_osx.md new file mode 120000 index 00000000000..72e64e8631f --- /dev/null +++ b/docs/ja/development/build_cross_osx.md @@ -0,0 +1 @@ +../../en/development/build_cross_osx.md \ No newline at end of file diff --git a/docs/ja/development/build_osx.md b/docs/ja/development/build_osx.md new file mode 120000 index 00000000000..f9adaf24584 --- /dev/null +++ b/docs/ja/development/build_osx.md @@ -0,0 +1 @@ +../../en/development/build_osx.md \ No newline at end of file diff --git a/docs/ja/development/contrib.md b/docs/ja/development/contrib.md new file mode 120000 index 00000000000..4749f95f9ef --- /dev/null +++ b/docs/ja/development/contrib.md @@ -0,0 +1 @@ +../../en/development/contrib.md \ No newline at end of file diff --git a/docs/ja/development/developer_instruction.md b/docs/ja/development/developer_instruction.md new file mode 120000 index 00000000000..bdfa9047aa2 --- /dev/null +++ b/docs/ja/development/developer_instruction.md @@ -0,0 +1 @@ +../../en/development/developer_instruction.md \ No newline at end of file diff --git a/docs/ja/development/index.md b/docs/ja/development/index.md new file mode 120000 index 00000000000..1e2ad97dcc5 --- /dev/null +++ b/docs/ja/development/index.md @@ -0,0 +1 @@ +../../en/development/index.md \ No newline at end of file diff --git a/docs/ja/development/style.md b/docs/ja/development/style.md new file mode 120000 index 00000000000..c1bbf11f421 --- /dev/null +++ b/docs/ja/development/style.md @@ -0,0 +1 @@ +../../en/development/style.md \ No newline at end of file diff --git a/docs/ja/development/tests.md b/docs/ja/development/tests.md new file mode 120000 index 00000000000..c03d36c3916 --- /dev/null +++ b/docs/ja/development/tests.md @@ -0,0 +1 @@ +../../en/development/tests.md \ No newline at end of file diff --git a/docs/ja/faq/general.md b/docs/ja/faq/general.md new file mode 120000 index 00000000000..bc267395b1b --- /dev/null +++ b/docs/ja/faq/general.md @@ -0,0 +1 @@ +../../en/faq/general.md \ No newline at end of file diff --git a/docs/ja/getting_started/example_datasets/amplab_benchmark.md b/docs/ja/getting_started/example_datasets/amplab_benchmark.md new file mode 120000 index 00000000000..78c93906bb0 --- /dev/null +++ b/docs/ja/getting_started/example_datasets/amplab_benchmark.md @@ -0,0 +1 @@ +../../../en/getting_started/example_datasets/amplab_benchmark.md \ No newline at end of file diff --git a/docs/ja/getting_started/example_datasets/criteo.md b/docs/ja/getting_started/example_datasets/criteo.md new file mode 120000 index 00000000000..507dc68cd62 --- /dev/null +++ b/docs/ja/getting_started/example_datasets/criteo.md @@ -0,0 +1 @@ +../../../en/getting_started/example_datasets/criteo.md \ No newline at end of file diff --git a/docs/ja/getting_started/example_datasets/metrica.md b/docs/ja/getting_started/example_datasets/metrica.md new file mode 120000 index 00000000000..984023973eb --- /dev/null +++ b/docs/ja/getting_started/example_datasets/metrica.md @@ -0,0 +1 @@ +../../../en/getting_started/example_datasets/metrica.md \ No newline at end of file diff --git a/docs/ja/getting_started/example_datasets/nyc_taxi.md b/docs/ja/getting_started/example_datasets/nyc_taxi.md new file mode 120000 index 00000000000..c47fc83a293 --- /dev/null +++ b/docs/ja/getting_started/example_datasets/nyc_taxi.md @@ -0,0 +1 @@ +../../../en/getting_started/example_datasets/nyc_taxi.md \ No newline at end of file diff --git a/docs/ja/getting_started/example_datasets/ontime.md b/docs/ja/getting_started/example_datasets/ontime.md new file mode 120000 index 00000000000..87cfbb8be91 --- /dev/null +++ b/docs/ja/getting_started/example_datasets/ontime.md @@ -0,0 +1 @@ +../../../en/getting_started/example_datasets/ontime.md \ No newline at end of file diff --git a/docs/ja/getting_started/example_datasets/star_schema.md b/docs/ja/getting_started/example_datasets/star_schema.md new file mode 120000 index 00000000000..1c26392dd23 --- /dev/null +++ b/docs/ja/getting_started/example_datasets/star_schema.md @@ -0,0 +1 @@ +../../../en/getting_started/example_datasets/star_schema.md \ No newline at end of file diff --git a/docs/ja/getting_started/example_datasets/wikistat.md b/docs/ja/getting_started/example_datasets/wikistat.md new file mode 120000 index 00000000000..bf6e780fb27 --- /dev/null +++ b/docs/ja/getting_started/example_datasets/wikistat.md @@ -0,0 +1 @@ +../../../en/getting_started/example_datasets/wikistat.md \ No newline at end of file diff --git a/docs/ja/getting_started/index.md b/docs/ja/getting_started/index.md new file mode 120000 index 00000000000..1acedb0f03e --- /dev/null +++ b/docs/ja/getting_started/index.md @@ -0,0 +1 @@ +../../en/getting_started/index.md \ No newline at end of file diff --git a/docs/ja/getting_started/install.md b/docs/ja/getting_started/install.md new file mode 120000 index 00000000000..60aa3fb93a4 --- /dev/null +++ b/docs/ja/getting_started/install.md @@ -0,0 +1 @@ +../../en/getting_started/install.md \ No newline at end of file diff --git a/docs/ja/getting_started/tutorial.md b/docs/ja/getting_started/tutorial.md new file mode 120000 index 00000000000..8bc40816ab2 --- /dev/null +++ b/docs/ja/getting_started/tutorial.md @@ -0,0 +1 @@ +../../en/getting_started/tutorial.md \ No newline at end of file diff --git a/docs/ja/guides/apply_catboost_model.md b/docs/ja/guides/apply_catboost_model.md new file mode 120000 index 00000000000..dd36e885974 --- /dev/null +++ b/docs/ja/guides/apply_catboost_model.md @@ -0,0 +1 @@ +../../en/guides/apply_catboost_model.md \ No newline at end of file diff --git a/docs/ja/guides/index.md b/docs/ja/guides/index.md new file mode 120000 index 00000000000..162dcbc3b8f --- /dev/null +++ b/docs/ja/guides/index.md @@ -0,0 +1 @@ +../../en/guides/index.md \ No newline at end of file diff --git a/docs/ja/images/column_oriented.gif b/docs/ja/images/column_oriented.gif new file mode 100644 index 00000000000..15f4b12e697 Binary files /dev/null and b/docs/ja/images/column_oriented.gif differ diff --git a/docs/ja/images/logo.svg b/docs/ja/images/logo.svg new file mode 100644 index 00000000000..70662da887e --- /dev/null +++ b/docs/ja/images/logo.svg @@ -0,0 +1,12 @@ + + + + + + + + + diff --git a/docs/ja/images/row_oriented.gif b/docs/ja/images/row_oriented.gif new file mode 100644 index 00000000000..53daa20f322 Binary files /dev/null and b/docs/ja/images/row_oriented.gif differ diff --git a/docs/ja/index.md b/docs/ja/index.md deleted file mode 100644 index 6dea5f6570b..00000000000 --- a/docs/ja/index.md +++ /dev/null @@ -1,142 +0,0 @@ -# ClickHouseとは? - -ClickHouseは、クエリのオンライン分析処理(OLAP)用の列指向のデータベース管理システム(DBMS)です。 - -「通常の」行指向のDBMSでは、データは次の順序で保存されます。 - -| Row | WatchID | JavaEnable | Title | GoodEvent | EventTime | -| ------ | ------------------- | ---------- | ------------------ | --------- | ------------------- | -| #0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 | -| #1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 | -| #2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 | -| #N | ... | ... | ... | ... | ... | - -つまり、行に関連するすべての値は物理的に隣り合わせに格納されます。 - -行指向のDBMSの例:MySQL, Postgres および MS SQL Server -{: .grey } - -列指向のDBMSでは、データは次のように保存されます: - -| Row: | #0 | #1 | #2 | #N | -| ----------- | ------------------- | ------------------- | ------------------- | ------------------- | -| WatchID: | 89354350662 | 90329509958 | 89953706054 | ... | -| JavaEnable: | 1 | 0 | 1 | ... | -| Title: | Investor Relations | Contact us | Mission | ... | -| GoodEvent: | 1 | 1 | 1 | ... | -| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | ... | - -これらの例は、データが配置される順序のみを示しています。 -異なる列の値は別々に保存され、同じ列のデータは一緒に保存されます。 - -列指向DBMSの例:Vertica, Paraccel (Actian Matrix and Amazon Redshift), Sybase IQ, Exasol, Infobright, InfiniDB, MonetDB (VectorWise and Actian Vector), LucidDB, SAP HANA, Google Dremel, Google PowerDrill, Druid および kdb+ -{: .grey } - -異なったデータ格納の順序は、異なったシナリオにより適します。 -データアクセスシナリオとは、クエリの実行内容、頻度、割合を指します。クエリで読み取られるの各種データの量(行、列、バイト)。データの読み取りと更新の関係。作業データのサイズとローカルでの使用方法。トランザクションが使用されるかどうか、およびそれらがどの程度分離されているか。データ複製と論理的整合性の要件。クエリの種類ごとの遅延とスループットの要件など。 - -システムの負荷が高いほど、使用シナリオの要件に一致するようにセットアップされたシステムをカスタマイズすることがより重要になり、このカスタマイズはより細かくなります。大きく異なるシナリオに等しく適したシステムはありません。システムがさまざまなシナリオに適応可能である場合、高負荷下では、システムはすべてのシナリオを同等に不十分に処理するか、1つまたはいくつかの可能なシナリオでうまく機能します。 - -## OLAPシナリオの主要なプロパティ - -- リクエストの大部分は読み取りアクセス用である。 -- データは、単一行ではなく、かなり大きなバッチ(> 1000行)で更新されます。または、まったく更新されない。 -- データはDBに追加されるが、変更されない。 -- 読み取りの場合、非常に多くの行がDBから抽出されるが、一部の列のみ。 -- テーブルは「幅が広く」、多数の列が含まれる。 -- クエリは比較的まれ(通常、サーバーあたり毎秒数百あるいはそれ以下の数のクエリ)。 -- 単純なクエリでは、約50ミリ秒の遅延が容認される。 -- 列の値はかなり小さく、数値や短い文字列(たとえば、URLごとに60バイト)。 -- 単一のクエリを処理する場合、高いスループットが必要(サーバーあたり毎秒最大数十億行)。 -- トランザクションは必要ない。 -- データの一貫性の要件が低い。 -- クエリごとに1つの大きなテーブルがある。 1つを除くすべてのテーブルは小さい。 -- クエリ結果は、ソースデータよりも大幅に小さくなる。つまり、データはフィルター処理または集計されるため、結果は単一サーバーのRAMに収まる。 - -OLAPシナリオは、他の一般的なシナリオ(OLTPやKey-Valueアクセスなど)とは非常に異なることが容易にわかります。 したがって、まともなパフォーマンスを得るには、OLTPまたはKey-Value DBを使用して分析クエリを処理しようとするのは無意味です。 たとえば、分析にMongoDBまたはRedisを使用しようとすると、OLAPデータベースに比べてパフォーマンスが非常に低下します。 - -## OLAPシナリオで列指向データベースがよりよく機能する理由 - -列指向データベースは、OLAPシナリオにより適しています。ほとんどのクエリの処理が少なくとも100倍高速です。 理由を以下に詳しく説明しますが、その根拠は視覚的に簡単に説明できます: - -**行指向DBMS** - -![Row-oriented](images/row_oriented.gif#) - -**列指向DBMS** - -![Column-oriented](images/column_oriented.gif#) - -違いがわかりましたか? - -### Input/output - -1. 分析クエリでは、少数のテーブル列のみを読み取る必要があります。列指向のデータベースでは、必要なデータのみを読み取ることができます。たとえば、100のうち5つの列が必要な場合、I/Oが20倍削減されることが期待できます。 -2. データはパケットで読み取られるため、圧縮が容易です。列のデータも圧縮が簡単です。これにより、I/Oボリュームがさらに削減されます。 -3. I/Oの削減により、より多くのデータがシステムキャッシュに収まります。 - -たとえば、「各広告プラットフォームのレコード数をカウントする」クエリでは、1つの「広告プラットフォームID」列を読み取る必要がありますが、これは非圧縮では1バイトの領域を要します。トラフィックのほとんどが広告プラットフォームからのものではない場合、この列は少なくとも10倍の圧縮が期待できます。高速な圧縮アルゴリズムを使用すれば、1秒あたり少なくとも非圧縮データに換算して数ギガバイトの速度でデータを展開できます。つまり、このクエリは、単一のサーバーで1秒あたり約数十億行の速度で処理できます。この速度はまさに実際に達成されます。 - -
Example -``` -$ clickhouse-client -ClickHouse client version 0.0.52053. -Connecting to localhost:9000. -Connected to ClickHouse server version 0.0.52053. - -:) SELECT CounterID, count() FROM hits GROUP BY CounterID ORDER BY count() DESC LIMIT 20 - -SELECT -CounterID, -count() -FROM hits -GROUP BY CounterID -ORDER BY count() DESC -LIMIT 20 - -┌─CounterID─┬──count()─┐ -│ 114208 │ 56057344 │ -│ 115080 │ 51619590 │ -│ 3228 │ 44658301 │ -│ 38230 │ 42045932 │ -│ 145263 │ 42042158 │ -│ 91244 │ 38297270 │ -│ 154139 │ 26647572 │ -│ 150748 │ 24112755 │ -│ 242232 │ 21302571 │ -│ 338158 │ 13507087 │ -│ 62180 │ 12229491 │ -│ 82264 │ 12187441 │ -│ 232261 │ 12148031 │ -│ 146272 │ 11438516 │ -│ 168777 │ 11403636 │ -│ 4120072 │ 11227824 │ -│ 10938808 │ 10519739 │ -│ 74088 │ 9047015 │ -│ 115079 │ 8837972 │ -│ 337234 │ 8205961 │ -└───────────┴──────────┘ - -20 rows in set. Elapsed: 0.153 sec. Processed 1.00 billion rows, 4.00 GB (6.53 billion rows/s., 26.10 GB/s.) - -:) -``` - -
- -### CPU - -クエリを実行するには大量の行を処理する必要があるため、個別の行ではなくベクター全体のすべての操作をディスパッチするか、ディスパッチコストがほとんどないようにクエリエンジンを実装すると効率的です。 適切なディスクサブシステムでこれを行わないと、クエリインタープリターが必然的にCPUを失速させます。 -データを列に格納し、可能な場合は列ごとに処理することは理にかなっています。 - -これを行うには2つの方法があります: - -1. ベクトルエンジン。 すべての操作は、個別の値ではなく、ベクトルに対して記述されます。 これは、オペレーションを頻繁に呼び出す必要がなく、ディスパッチコストが無視できることを意味します。 操作コードには、最適化された内部サイクルが含まれています。 - -2. コード生成。 クエリ用に生成されたコードには、すべての間接的な呼び出しが含まれています。 - -これは、単純なクエリを実行する場合には意味がないため、「通常の」データベースでは実行されません。 ただし、例外があります。 たとえば、MemSQLはコード生成を使用して、SQLクエリを処理する際の遅延を減らします。 (比較のために、分析DBMSではレイテンシではなくスループットの最適化が必要です。) - -CPU効率のために、クエリ言語は宣言型(SQLまたはMDX)、または少なくともベクトル(J、K)でなければなりません。 クエリには、最適化を可能にする暗黙的なループのみを含める必要があります。 - -[Original article](https://clickhouse.yandex/docs/en/) diff --git a/docs/ja/index.md b/docs/ja/index.md new file mode 120000 index 00000000000..facf2d71348 --- /dev/null +++ b/docs/ja/index.md @@ -0,0 +1 @@ +../en/index.md \ No newline at end of file diff --git a/docs/ja/interfaces/cli.md b/docs/ja/interfaces/cli.md new file mode 120000 index 00000000000..04588066828 --- /dev/null +++ b/docs/ja/interfaces/cli.md @@ -0,0 +1 @@ +../../en/interfaces/cli.md \ No newline at end of file diff --git a/docs/ja/interfaces/cpp.md b/docs/ja/interfaces/cpp.md new file mode 120000 index 00000000000..581e50e774d --- /dev/null +++ b/docs/ja/interfaces/cpp.md @@ -0,0 +1 @@ +../../en/interfaces/cpp.md \ No newline at end of file diff --git a/docs/ja/interfaces/formats.md b/docs/ja/interfaces/formats.md new file mode 120000 index 00000000000..41a65ebe579 --- /dev/null +++ b/docs/ja/interfaces/formats.md @@ -0,0 +1 @@ +../../en/interfaces/formats.md \ No newline at end of file diff --git a/docs/ja/interfaces/http.md b/docs/ja/interfaces/http.md new file mode 120000 index 00000000000..fb293841d8b --- /dev/null +++ b/docs/ja/interfaces/http.md @@ -0,0 +1 @@ +../../en/interfaces/http.md \ No newline at end of file diff --git a/docs/ja/interfaces/index.md b/docs/ja/interfaces/index.md new file mode 120000 index 00000000000..61537763cac --- /dev/null +++ b/docs/ja/interfaces/index.md @@ -0,0 +1 @@ +../../en/interfaces/index.md \ No newline at end of file diff --git a/docs/ja/interfaces/jdbc.md b/docs/ja/interfaces/jdbc.md new file mode 120000 index 00000000000..27dfe0cfa5a --- /dev/null +++ b/docs/ja/interfaces/jdbc.md @@ -0,0 +1 @@ +../../en/interfaces/jdbc.md \ No newline at end of file diff --git a/docs/ja/interfaces/odbc.md b/docs/ja/interfaces/odbc.md new file mode 120000 index 00000000000..5ff7610e061 --- /dev/null +++ b/docs/ja/interfaces/odbc.md @@ -0,0 +1 @@ +../../en/interfaces/odbc.md \ No newline at end of file diff --git a/docs/ja/interfaces/tcp.md b/docs/ja/interfaces/tcp.md new file mode 120000 index 00000000000..a0529a856e4 --- /dev/null +++ b/docs/ja/interfaces/tcp.md @@ -0,0 +1 @@ +../../en/interfaces/tcp.md \ No newline at end of file diff --git a/docs/ja/interfaces/third-party/client_libraries.md b/docs/ja/interfaces/third-party/client_libraries.md new file mode 120000 index 00000000000..5320bbe1e16 --- /dev/null +++ b/docs/ja/interfaces/third-party/client_libraries.md @@ -0,0 +1 @@ +../../../en/interfaces/third-party/client_libraries.md \ No newline at end of file diff --git a/docs/ja/interfaces/third-party/gui.md b/docs/ja/interfaces/third-party/gui.md new file mode 120000 index 00000000000..ef7bc904197 --- /dev/null +++ b/docs/ja/interfaces/third-party/gui.md @@ -0,0 +1 @@ +../../../en/interfaces/third-party/gui.md \ No newline at end of file diff --git a/docs/ja/interfaces/third-party/integrations.md b/docs/ja/interfaces/third-party/integrations.md new file mode 120000 index 00000000000..9cd0a21e676 --- /dev/null +++ b/docs/ja/interfaces/third-party/integrations.md @@ -0,0 +1 @@ +../../../en/interfaces/third-party/integrations.md \ No newline at end of file diff --git a/docs/ja/interfaces/third-party/proxy.md b/docs/ja/interfaces/third-party/proxy.md new file mode 120000 index 00000000000..877f1b51dab --- /dev/null +++ b/docs/ja/interfaces/third-party/proxy.md @@ -0,0 +1 @@ +../../../en/interfaces/third-party/proxy.md \ No newline at end of file diff --git a/docs/ja/introduction/distinctive_features.md b/docs/ja/introduction/distinctive_features.md new file mode 120000 index 00000000000..9cf00a2a00f --- /dev/null +++ b/docs/ja/introduction/distinctive_features.md @@ -0,0 +1 @@ +../../en/introduction/distinctive_features.md \ No newline at end of file diff --git a/docs/ja/introduction/features_considered_disadvantages.md b/docs/ja/introduction/features_considered_disadvantages.md new file mode 120000 index 00000000000..45d3cdf563a --- /dev/null +++ b/docs/ja/introduction/features_considered_disadvantages.md @@ -0,0 +1 @@ +../../en/introduction/features_considered_disadvantages.md \ No newline at end of file diff --git a/docs/ja/introduction/history.md b/docs/ja/introduction/history.md new file mode 120000 index 00000000000..7004e990a59 --- /dev/null +++ b/docs/ja/introduction/history.md @@ -0,0 +1 @@ +../../en/introduction/history.md \ No newline at end of file diff --git a/docs/ja/introduction/performance.md b/docs/ja/introduction/performance.md new file mode 120000 index 00000000000..cb2912bcb81 --- /dev/null +++ b/docs/ja/introduction/performance.md @@ -0,0 +1 @@ +../../en/introduction/performance.md \ No newline at end of file diff --git a/docs/ja/operations/access_rights.md b/docs/ja/operations/access_rights.md new file mode 120000 index 00000000000..73463029569 --- /dev/null +++ b/docs/ja/operations/access_rights.md @@ -0,0 +1 @@ +../../en/operations/access_rights.md \ No newline at end of file diff --git a/docs/ja/operations/backup.md b/docs/ja/operations/backup.md new file mode 120000 index 00000000000..1003fb30e61 --- /dev/null +++ b/docs/ja/operations/backup.md @@ -0,0 +1 @@ +../../en/operations/backup.md \ No newline at end of file diff --git a/docs/ja/operations/configuration_files.md b/docs/ja/operations/configuration_files.md new file mode 120000 index 00000000000..a2d73dbaa25 --- /dev/null +++ b/docs/ja/operations/configuration_files.md @@ -0,0 +1 @@ +../../en/operations/configuration_files.md \ No newline at end of file diff --git a/docs/ja/operations/index.md b/docs/ja/operations/index.md new file mode 120000 index 00000000000..ce854687b86 --- /dev/null +++ b/docs/ja/operations/index.md @@ -0,0 +1 @@ +../../en/operations/index.md \ No newline at end of file diff --git a/docs/ja/operations/monitoring.md b/docs/ja/operations/monitoring.md new file mode 120000 index 00000000000..515ae8b4fff --- /dev/null +++ b/docs/ja/operations/monitoring.md @@ -0,0 +1 @@ +../../en/operations/monitoring.md \ No newline at end of file diff --git a/docs/ja/operations/quotas.md b/docs/ja/operations/quotas.md new file mode 120000 index 00000000000..1c52cdf1e91 --- /dev/null +++ b/docs/ja/operations/quotas.md @@ -0,0 +1 @@ +../../en/operations/quotas.md \ No newline at end of file diff --git a/docs/ja/operations/requirements.md b/docs/ja/operations/requirements.md new file mode 120000 index 00000000000..a71283af25c --- /dev/null +++ b/docs/ja/operations/requirements.md @@ -0,0 +1 @@ +../../en/operations/requirements.md \ No newline at end of file diff --git a/docs/ja/operations/server_settings/index.md b/docs/ja/operations/server_settings/index.md new file mode 120000 index 00000000000..1d1a0585a42 --- /dev/null +++ b/docs/ja/operations/server_settings/index.md @@ -0,0 +1 @@ +../../../en/operations/server_settings/index.md \ No newline at end of file diff --git a/docs/ja/operations/server_settings/settings.md b/docs/ja/operations/server_settings/settings.md new file mode 120000 index 00000000000..19cd2e82ce7 --- /dev/null +++ b/docs/ja/operations/server_settings/settings.md @@ -0,0 +1 @@ +../../../en/operations/server_settings/settings.md \ No newline at end of file diff --git a/docs/ja/operations/settings/constraints_on_settings.md b/docs/ja/operations/settings/constraints_on_settings.md new file mode 120000 index 00000000000..4dacf908662 --- /dev/null +++ b/docs/ja/operations/settings/constraints_on_settings.md @@ -0,0 +1 @@ +../../../en/operations/settings/constraints_on_settings.md \ No newline at end of file diff --git a/docs/ja/operations/settings/index.md b/docs/ja/operations/settings/index.md new file mode 120000 index 00000000000..fc3968d1f1e --- /dev/null +++ b/docs/ja/operations/settings/index.md @@ -0,0 +1 @@ +../../../en/operations/settings/index.md \ No newline at end of file diff --git a/docs/ja/operations/settings/permissions_for_queries.md b/docs/ja/operations/settings/permissions_for_queries.md new file mode 120000 index 00000000000..ce8473bf01c --- /dev/null +++ b/docs/ja/operations/settings/permissions_for_queries.md @@ -0,0 +1 @@ +../../../en/operations/settings/permissions_for_queries.md \ No newline at end of file diff --git a/docs/ja/operations/settings/query_complexity.md b/docs/ja/operations/settings/query_complexity.md new file mode 120000 index 00000000000..9a9c6d975a9 --- /dev/null +++ b/docs/ja/operations/settings/query_complexity.md @@ -0,0 +1 @@ +../../../en/operations/settings/query_complexity.md \ No newline at end of file diff --git a/docs/ja/operations/settings/settings.md b/docs/ja/operations/settings/settings.md new file mode 120000 index 00000000000..0c8df3cfc90 --- /dev/null +++ b/docs/ja/operations/settings/settings.md @@ -0,0 +1 @@ +../../../en/operations/settings/settings.md \ No newline at end of file diff --git a/docs/ja/operations/settings/settings_profiles.md b/docs/ja/operations/settings/settings_profiles.md new file mode 120000 index 00000000000..35d9747ad56 --- /dev/null +++ b/docs/ja/operations/settings/settings_profiles.md @@ -0,0 +1 @@ +../../../en/operations/settings/settings_profiles.md \ No newline at end of file diff --git a/docs/ja/operations/settings/settings_users.md b/docs/ja/operations/settings/settings_users.md new file mode 120000 index 00000000000..3a6a7cf6948 --- /dev/null +++ b/docs/ja/operations/settings/settings_users.md @@ -0,0 +1 @@ +../../../en/operations/settings/settings_users.md \ No newline at end of file diff --git a/docs/ja/operations/system_tables.md b/docs/ja/operations/system_tables.md new file mode 120000 index 00000000000..c5701190dca --- /dev/null +++ b/docs/ja/operations/system_tables.md @@ -0,0 +1 @@ +../../en/operations/system_tables.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/aggregatingmergetree.md b/docs/ja/operations/table_engines/aggregatingmergetree.md new file mode 120000 index 00000000000..907a073e0c8 --- /dev/null +++ b/docs/ja/operations/table_engines/aggregatingmergetree.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/aggregatingmergetree.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/buffer.md b/docs/ja/operations/table_engines/buffer.md new file mode 120000 index 00000000000..0a3c372fa67 --- /dev/null +++ b/docs/ja/operations/table_engines/buffer.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/buffer.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/collapsingmergetree.md b/docs/ja/operations/table_engines/collapsingmergetree.md new file mode 120000 index 00000000000..ef5cebb48d8 --- /dev/null +++ b/docs/ja/operations/table_engines/collapsingmergetree.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/collapsingmergetree.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/custom_partitioning_key.md b/docs/ja/operations/table_engines/custom_partitioning_key.md new file mode 120000 index 00000000000..a9d18cacb25 --- /dev/null +++ b/docs/ja/operations/table_engines/custom_partitioning_key.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/custom_partitioning_key.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/dictionary.md b/docs/ja/operations/table_engines/dictionary.md new file mode 120000 index 00000000000..2a95f4a669b --- /dev/null +++ b/docs/ja/operations/table_engines/dictionary.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/dictionary.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/distributed.md b/docs/ja/operations/table_engines/distributed.md new file mode 120000 index 00000000000..46994303c35 --- /dev/null +++ b/docs/ja/operations/table_engines/distributed.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/distributed.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/external_data.md b/docs/ja/operations/table_engines/external_data.md new file mode 120000 index 00000000000..27a7b6acec2 --- /dev/null +++ b/docs/ja/operations/table_engines/external_data.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/external_data.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/file.md b/docs/ja/operations/table_engines/file.md new file mode 120000 index 00000000000..27dffc8d78f --- /dev/null +++ b/docs/ja/operations/table_engines/file.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/file.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/graphitemergetree.md b/docs/ja/operations/table_engines/graphitemergetree.md new file mode 120000 index 00000000000..654425d050a --- /dev/null +++ b/docs/ja/operations/table_engines/graphitemergetree.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/graphitemergetree.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/hdfs.md b/docs/ja/operations/table_engines/hdfs.md new file mode 120000 index 00000000000..d4dbfa46e68 --- /dev/null +++ b/docs/ja/operations/table_engines/hdfs.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/hdfs.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/index.md b/docs/ja/operations/table_engines/index.md new file mode 120000 index 00000000000..994dff9b516 --- /dev/null +++ b/docs/ja/operations/table_engines/index.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/index.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/jdbc.md b/docs/ja/operations/table_engines/jdbc.md new file mode 120000 index 00000000000..5165d704b9a --- /dev/null +++ b/docs/ja/operations/table_engines/jdbc.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/jdbc.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/join.md b/docs/ja/operations/table_engines/join.md new file mode 120000 index 00000000000..0914ab950ed --- /dev/null +++ b/docs/ja/operations/table_engines/join.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/join.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/kafka.md b/docs/ja/operations/table_engines/kafka.md new file mode 120000 index 00000000000..cb7bd5dd0f8 --- /dev/null +++ b/docs/ja/operations/table_engines/kafka.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/kafka.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/log.md b/docs/ja/operations/table_engines/log.md new file mode 120000 index 00000000000..2c39ba68522 --- /dev/null +++ b/docs/ja/operations/table_engines/log.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/log.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/log_family.md b/docs/ja/operations/table_engines/log_family.md new file mode 120000 index 00000000000..8c5b5f0365b --- /dev/null +++ b/docs/ja/operations/table_engines/log_family.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/log_family.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/materializedview.md b/docs/ja/operations/table_engines/materializedview.md new file mode 120000 index 00000000000..e3b5deb73dc --- /dev/null +++ b/docs/ja/operations/table_engines/materializedview.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/materializedview.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/memory.md b/docs/ja/operations/table_engines/memory.md new file mode 120000 index 00000000000..eee940c7bd3 --- /dev/null +++ b/docs/ja/operations/table_engines/memory.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/memory.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/merge.md b/docs/ja/operations/table_engines/merge.md new file mode 120000 index 00000000000..9e17d9bb939 --- /dev/null +++ b/docs/ja/operations/table_engines/merge.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/merge.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/mergetree.md b/docs/ja/operations/table_engines/mergetree.md new file mode 120000 index 00000000000..cc6ac1e5297 --- /dev/null +++ b/docs/ja/operations/table_engines/mergetree.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/mergetree.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/mysql.md b/docs/ja/operations/table_engines/mysql.md new file mode 120000 index 00000000000..e4c268658cf --- /dev/null +++ b/docs/ja/operations/table_engines/mysql.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/mysql.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/null.md b/docs/ja/operations/table_engines/null.md new file mode 120000 index 00000000000..c7d9264571e --- /dev/null +++ b/docs/ja/operations/table_engines/null.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/null.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/odbc.md b/docs/ja/operations/table_engines/odbc.md new file mode 120000 index 00000000000..06091fd5377 --- /dev/null +++ b/docs/ja/operations/table_engines/odbc.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/odbc.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/replacingmergetree.md b/docs/ja/operations/table_engines/replacingmergetree.md new file mode 120000 index 00000000000..63ff25a4dd6 --- /dev/null +++ b/docs/ja/operations/table_engines/replacingmergetree.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/replacingmergetree.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/replication.md b/docs/ja/operations/table_engines/replication.md new file mode 120000 index 00000000000..b4b22ac708b --- /dev/null +++ b/docs/ja/operations/table_engines/replication.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/replication.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/set.md b/docs/ja/operations/table_engines/set.md new file mode 120000 index 00000000000..d37e659badd --- /dev/null +++ b/docs/ja/operations/table_engines/set.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/set.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/stripelog.md b/docs/ja/operations/table_engines/stripelog.md new file mode 120000 index 00000000000..f6521a41e3e --- /dev/null +++ b/docs/ja/operations/table_engines/stripelog.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/stripelog.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/summingmergetree.md b/docs/ja/operations/table_engines/summingmergetree.md new file mode 120000 index 00000000000..2b67e953d8a --- /dev/null +++ b/docs/ja/operations/table_engines/summingmergetree.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/summingmergetree.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/tinylog.md b/docs/ja/operations/table_engines/tinylog.md new file mode 120000 index 00000000000..bda90c7d5ce --- /dev/null +++ b/docs/ja/operations/table_engines/tinylog.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/tinylog.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/url.md b/docs/ja/operations/table_engines/url.md new file mode 120000 index 00000000000..d0de71dcf40 --- /dev/null +++ b/docs/ja/operations/table_engines/url.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/url.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/versionedcollapsingmergetree.md b/docs/ja/operations/table_engines/versionedcollapsingmergetree.md new file mode 120000 index 00000000000..5843fba70b8 --- /dev/null +++ b/docs/ja/operations/table_engines/versionedcollapsingmergetree.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/versionedcollapsingmergetree.md \ No newline at end of file diff --git a/docs/ja/operations/table_engines/view.md b/docs/ja/operations/table_engines/view.md new file mode 120000 index 00000000000..3f2164181a7 --- /dev/null +++ b/docs/ja/operations/table_engines/view.md @@ -0,0 +1 @@ +../../../en/operations/table_engines/view.md \ No newline at end of file diff --git a/docs/ja/operations/tips.md b/docs/ja/operations/tips.md new file mode 120000 index 00000000000..9b3413bdbc3 --- /dev/null +++ b/docs/ja/operations/tips.md @@ -0,0 +1 @@ +../../en/operations/tips.md \ No newline at end of file diff --git a/docs/ja/operations/troubleshooting.md b/docs/ja/operations/troubleshooting.md new file mode 120000 index 00000000000..84f0ff34f41 --- /dev/null +++ b/docs/ja/operations/troubleshooting.md @@ -0,0 +1 @@ +../../en/operations/troubleshooting.md \ No newline at end of file diff --git a/docs/ja/operations/update.md b/docs/ja/operations/update.md new file mode 120000 index 00000000000..88a092c0dff --- /dev/null +++ b/docs/ja/operations/update.md @@ -0,0 +1 @@ +../../en/operations/update.md \ No newline at end of file diff --git a/docs/ja/operations/utils/clickhouse-copier.md b/docs/ja/operations/utils/clickhouse-copier.md new file mode 120000 index 00000000000..c9e89e33c7b --- /dev/null +++ b/docs/ja/operations/utils/clickhouse-copier.md @@ -0,0 +1 @@ +../../../en/operations/utils/clickhouse-copier.md \ No newline at end of file diff --git a/docs/ja/operations/utils/clickhouse-local.md b/docs/ja/operations/utils/clickhouse-local.md new file mode 120000 index 00000000000..032aaaa2b84 --- /dev/null +++ b/docs/ja/operations/utils/clickhouse-local.md @@ -0,0 +1 @@ +../../../en/operations/utils/clickhouse-local.md \ No newline at end of file diff --git a/docs/ja/operations/utils/index.md b/docs/ja/operations/utils/index.md new file mode 120000 index 00000000000..dd089d1ef4b --- /dev/null +++ b/docs/ja/operations/utils/index.md @@ -0,0 +1 @@ +../../../en/operations/utils/index.md \ No newline at end of file diff --git a/docs/ja/query_language/agg_functions/combinators.md b/docs/ja/query_language/agg_functions/combinators.md new file mode 120000 index 00000000000..2b914cebd15 --- /dev/null +++ b/docs/ja/query_language/agg_functions/combinators.md @@ -0,0 +1 @@ +../../../en/query_language/agg_functions/combinators.md \ No newline at end of file diff --git a/docs/ja/query_language/agg_functions/index.md b/docs/ja/query_language/agg_functions/index.md new file mode 120000 index 00000000000..2fcf67abdeb --- /dev/null +++ b/docs/ja/query_language/agg_functions/index.md @@ -0,0 +1 @@ +../../../en/query_language/agg_functions/index.md \ No newline at end of file diff --git a/docs/ja/query_language/agg_functions/parametric_functions.md b/docs/ja/query_language/agg_functions/parametric_functions.md new file mode 120000 index 00000000000..fd3ffafcc5b --- /dev/null +++ b/docs/ja/query_language/agg_functions/parametric_functions.md @@ -0,0 +1 @@ +../../../en/query_language/agg_functions/parametric_functions.md \ No newline at end of file diff --git a/docs/ja/query_language/agg_functions/reference.md b/docs/ja/query_language/agg_functions/reference.md new file mode 120000 index 00000000000..c5651cb0793 --- /dev/null +++ b/docs/ja/query_language/agg_functions/reference.md @@ -0,0 +1 @@ +../../../en/query_language/agg_functions/reference.md \ No newline at end of file diff --git a/docs/ja/query_language/alter.md b/docs/ja/query_language/alter.md new file mode 120000 index 00000000000..44f4ecf9737 --- /dev/null +++ b/docs/ja/query_language/alter.md @@ -0,0 +1 @@ +../../en/query_language/alter.md \ No newline at end of file diff --git a/docs/ja/query_language/create.md b/docs/ja/query_language/create.md new file mode 120000 index 00000000000..a13304d176e --- /dev/null +++ b/docs/ja/query_language/create.md @@ -0,0 +1 @@ +../../en/query_language/create.md \ No newline at end of file diff --git a/docs/ja/query_language/dicts/external_dicts.md b/docs/ja/query_language/dicts/external_dicts.md new file mode 120000 index 00000000000..491b94bffe6 --- /dev/null +++ b/docs/ja/query_language/dicts/external_dicts.md @@ -0,0 +1 @@ +../../../en/query_language/dicts/external_dicts.md \ No newline at end of file diff --git a/docs/ja/query_language/dicts/external_dicts_dict.md b/docs/ja/query_language/dicts/external_dicts_dict.md new file mode 120000 index 00000000000..e27820fee60 --- /dev/null +++ b/docs/ja/query_language/dicts/external_dicts_dict.md @@ -0,0 +1 @@ +../../../en/query_language/dicts/external_dicts_dict.md \ No newline at end of file diff --git a/docs/ja/query_language/dicts/external_dicts_dict_layout.md b/docs/ja/query_language/dicts/external_dicts_dict_layout.md new file mode 120000 index 00000000000..e391c5be723 --- /dev/null +++ b/docs/ja/query_language/dicts/external_dicts_dict_layout.md @@ -0,0 +1 @@ +../../../en/query_language/dicts/external_dicts_dict_layout.md \ No newline at end of file diff --git a/docs/ja/query_language/dicts/external_dicts_dict_lifetime.md b/docs/ja/query_language/dicts/external_dicts_dict_lifetime.md new file mode 120000 index 00000000000..03b53c09077 --- /dev/null +++ b/docs/ja/query_language/dicts/external_dicts_dict_lifetime.md @@ -0,0 +1 @@ +../../../en/query_language/dicts/external_dicts_dict_lifetime.md \ No newline at end of file diff --git a/docs/ja/query_language/dicts/external_dicts_dict_sources.md b/docs/ja/query_language/dicts/external_dicts_dict_sources.md new file mode 120000 index 00000000000..d4f4bf8ef3e --- /dev/null +++ b/docs/ja/query_language/dicts/external_dicts_dict_sources.md @@ -0,0 +1 @@ +../../../en/query_language/dicts/external_dicts_dict_sources.md \ No newline at end of file diff --git a/docs/ja/query_language/dicts/external_dicts_dict_structure.md b/docs/ja/query_language/dicts/external_dicts_dict_structure.md new file mode 120000 index 00000000000..69ff759caea --- /dev/null +++ b/docs/ja/query_language/dicts/external_dicts_dict_structure.md @@ -0,0 +1 @@ +../../../en/query_language/dicts/external_dicts_dict_structure.md \ No newline at end of file diff --git a/docs/ja/query_language/dicts/index.md b/docs/ja/query_language/dicts/index.md new file mode 120000 index 00000000000..fdc188ca2a2 --- /dev/null +++ b/docs/ja/query_language/dicts/index.md @@ -0,0 +1 @@ +../../../en/query_language/dicts/index.md \ No newline at end of file diff --git a/docs/ja/query_language/dicts/internal_dicts.md b/docs/ja/query_language/dicts/internal_dicts.md new file mode 120000 index 00000000000..3f9408dcd45 --- /dev/null +++ b/docs/ja/query_language/dicts/internal_dicts.md @@ -0,0 +1 @@ +../../../en/query_language/dicts/internal_dicts.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/arithmetic_functions.md b/docs/ja/query_language/functions/arithmetic_functions.md new file mode 120000 index 00000000000..c22acb8c7f5 --- /dev/null +++ b/docs/ja/query_language/functions/arithmetic_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/arithmetic_functions.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/array_functions.md b/docs/ja/query_language/functions/array_functions.md new file mode 120000 index 00000000000..268b2295a97 --- /dev/null +++ b/docs/ja/query_language/functions/array_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/array_functions.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/array_join.md b/docs/ja/query_language/functions/array_join.md new file mode 120000 index 00000000000..b100dac784d --- /dev/null +++ b/docs/ja/query_language/functions/array_join.md @@ -0,0 +1 @@ +../../../en/query_language/functions/array_join.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/bit_functions.md b/docs/ja/query_language/functions/bit_functions.md new file mode 120000 index 00000000000..b5cccd0c56c --- /dev/null +++ b/docs/ja/query_language/functions/bit_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/bit_functions.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/bitmap_functions.md b/docs/ja/query_language/functions/bitmap_functions.md new file mode 120000 index 00000000000..0a31d3d71d8 --- /dev/null +++ b/docs/ja/query_language/functions/bitmap_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/bitmap_functions.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/comparison_functions.md b/docs/ja/query_language/functions/comparison_functions.md new file mode 120000 index 00000000000..417c589867c --- /dev/null +++ b/docs/ja/query_language/functions/comparison_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/comparison_functions.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/conditional_functions.md b/docs/ja/query_language/functions/conditional_functions.md new file mode 120000 index 00000000000..ad0d775dbb5 --- /dev/null +++ b/docs/ja/query_language/functions/conditional_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/conditional_functions.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/date_time_functions.md b/docs/ja/query_language/functions/date_time_functions.md new file mode 120000 index 00000000000..d11b9b8bb6b --- /dev/null +++ b/docs/ja/query_language/functions/date_time_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/date_time_functions.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/encoding_functions.md b/docs/ja/query_language/functions/encoding_functions.md new file mode 120000 index 00000000000..b2e6be1405b --- /dev/null +++ b/docs/ja/query_language/functions/encoding_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/encoding_functions.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/ext_dict_functions.md b/docs/ja/query_language/functions/ext_dict_functions.md new file mode 120000 index 00000000000..6318f900e4b --- /dev/null +++ b/docs/ja/query_language/functions/ext_dict_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/ext_dict_functions.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/functions_for_nulls.md b/docs/ja/query_language/functions/functions_for_nulls.md new file mode 120000 index 00000000000..fa57e10ad15 --- /dev/null +++ b/docs/ja/query_language/functions/functions_for_nulls.md @@ -0,0 +1 @@ +../../../en/query_language/functions/functions_for_nulls.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/geo.md b/docs/ja/query_language/functions/geo.md new file mode 120000 index 00000000000..86fa3a85d34 --- /dev/null +++ b/docs/ja/query_language/functions/geo.md @@ -0,0 +1 @@ +../../../en/query_language/functions/geo.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/hash_functions.md b/docs/ja/query_language/functions/hash_functions.md new file mode 120000 index 00000000000..90de8ba97e7 --- /dev/null +++ b/docs/ja/query_language/functions/hash_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/hash_functions.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/higher_order_functions.md b/docs/ja/query_language/functions/higher_order_functions.md new file mode 120000 index 00000000000..077feba2a3e --- /dev/null +++ b/docs/ja/query_language/functions/higher_order_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/higher_order_functions.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/in_functions.md b/docs/ja/query_language/functions/in_functions.md new file mode 120000 index 00000000000..3ae5f24dbca --- /dev/null +++ b/docs/ja/query_language/functions/in_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/in_functions.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/index.md b/docs/ja/query_language/functions/index.md new file mode 120000 index 00000000000..a4e9d619cc0 --- /dev/null +++ b/docs/ja/query_language/functions/index.md @@ -0,0 +1 @@ +../../../en/query_language/functions/index.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/introspection.md b/docs/ja/query_language/functions/introspection.md new file mode 120000 index 00000000000..b1a487e9c77 --- /dev/null +++ b/docs/ja/query_language/functions/introspection.md @@ -0,0 +1 @@ +../../../en/query_language/functions/introspection.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/ip_address_functions.md b/docs/ja/query_language/functions/ip_address_functions.md new file mode 120000 index 00000000000..b58175a7cdf --- /dev/null +++ b/docs/ja/query_language/functions/ip_address_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/ip_address_functions.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/json_functions.md b/docs/ja/query_language/functions/json_functions.md new file mode 120000 index 00000000000..1b37184e006 --- /dev/null +++ b/docs/ja/query_language/functions/json_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/json_functions.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/logical_functions.md b/docs/ja/query_language/functions/logical_functions.md new file mode 120000 index 00000000000..32015440e09 --- /dev/null +++ b/docs/ja/query_language/functions/logical_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/logical_functions.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/machine_learning_functions.md b/docs/ja/query_language/functions/machine_learning_functions.md new file mode 120000 index 00000000000..4509602717e --- /dev/null +++ b/docs/ja/query_language/functions/machine_learning_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/machine_learning_functions.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/math_functions.md b/docs/ja/query_language/functions/math_functions.md new file mode 120000 index 00000000000..e01674eca4d --- /dev/null +++ b/docs/ja/query_language/functions/math_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/math_functions.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/other_functions.md b/docs/ja/query_language/functions/other_functions.md new file mode 120000 index 00000000000..65164784ced --- /dev/null +++ b/docs/ja/query_language/functions/other_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/other_functions.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/random_functions.md b/docs/ja/query_language/functions/random_functions.md new file mode 120000 index 00000000000..b873e0c86ac --- /dev/null +++ b/docs/ja/query_language/functions/random_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/random_functions.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/rounding_functions.md b/docs/ja/query_language/functions/rounding_functions.md new file mode 120000 index 00000000000..e1217e3b25a --- /dev/null +++ b/docs/ja/query_language/functions/rounding_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/rounding_functions.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/splitting_merging_functions.md b/docs/ja/query_language/functions/splitting_merging_functions.md new file mode 120000 index 00000000000..5f8771abdec --- /dev/null +++ b/docs/ja/query_language/functions/splitting_merging_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/splitting_merging_functions.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/string_functions.md b/docs/ja/query_language/functions/string_functions.md new file mode 120000 index 00000000000..cc4104aaf53 --- /dev/null +++ b/docs/ja/query_language/functions/string_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/string_functions.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/string_replace_functions.md b/docs/ja/query_language/functions/string_replace_functions.md new file mode 120000 index 00000000000..4ec963ffd0f --- /dev/null +++ b/docs/ja/query_language/functions/string_replace_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/string_replace_functions.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/string_search_functions.md b/docs/ja/query_language/functions/string_search_functions.md new file mode 120000 index 00000000000..0a2c7f4c4f1 --- /dev/null +++ b/docs/ja/query_language/functions/string_search_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/string_search_functions.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/type_conversion_functions.md b/docs/ja/query_language/functions/type_conversion_functions.md new file mode 120000 index 00000000000..fcf51570d15 --- /dev/null +++ b/docs/ja/query_language/functions/type_conversion_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/type_conversion_functions.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/url_functions.md b/docs/ja/query_language/functions/url_functions.md new file mode 120000 index 00000000000..529e4ffdd53 --- /dev/null +++ b/docs/ja/query_language/functions/url_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/url_functions.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/uuid_functions.md b/docs/ja/query_language/functions/uuid_functions.md new file mode 120000 index 00000000000..95e3ded0477 --- /dev/null +++ b/docs/ja/query_language/functions/uuid_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/uuid_functions.md \ No newline at end of file diff --git a/docs/ja/query_language/functions/ym_dict_functions.md b/docs/ja/query_language/functions/ym_dict_functions.md new file mode 120000 index 00000000000..ec5ddc84479 --- /dev/null +++ b/docs/ja/query_language/functions/ym_dict_functions.md @@ -0,0 +1 @@ +../../../en/query_language/functions/ym_dict_functions.md \ No newline at end of file diff --git a/docs/ja/query_language/index.md b/docs/ja/query_language/index.md new file mode 120000 index 00000000000..44dfff9bb18 --- /dev/null +++ b/docs/ja/query_language/index.md @@ -0,0 +1 @@ +../../en/query_language/index.md \ No newline at end of file diff --git a/docs/ja/query_language/insert_into.md b/docs/ja/query_language/insert_into.md new file mode 120000 index 00000000000..29b47662b0d --- /dev/null +++ b/docs/ja/query_language/insert_into.md @@ -0,0 +1 @@ +../../en/query_language/insert_into.md \ No newline at end of file diff --git a/docs/ja/query_language/misc.md b/docs/ja/query_language/misc.md new file mode 120000 index 00000000000..3bd814f3568 --- /dev/null +++ b/docs/ja/query_language/misc.md @@ -0,0 +1 @@ +../../en/query_language/misc.md \ No newline at end of file diff --git a/docs/ja/query_language/operators.md b/docs/ja/query_language/operators.md new file mode 120000 index 00000000000..f94df928a82 --- /dev/null +++ b/docs/ja/query_language/operators.md @@ -0,0 +1 @@ +../../en/query_language/operators.md \ No newline at end of file diff --git a/docs/ja/query_language/select.md b/docs/ja/query_language/select.md new file mode 120000 index 00000000000..c8ec8369383 --- /dev/null +++ b/docs/ja/query_language/select.md @@ -0,0 +1 @@ +../../en/query_language/select.md \ No newline at end of file diff --git a/docs/ja/query_language/show.md b/docs/ja/query_language/show.md new file mode 120000 index 00000000000..4c2f4cf2c4f --- /dev/null +++ b/docs/ja/query_language/show.md @@ -0,0 +1 @@ +../../en/query_language/show.md \ No newline at end of file diff --git a/docs/ja/query_language/syntax.md b/docs/ja/query_language/syntax.md new file mode 120000 index 00000000000..5307fd51ae8 --- /dev/null +++ b/docs/ja/query_language/syntax.md @@ -0,0 +1 @@ +../../en/query_language/syntax.md \ No newline at end of file diff --git a/docs/ja/query_language/system.md b/docs/ja/query_language/system.md new file mode 120000 index 00000000000..6061858c3f2 --- /dev/null +++ b/docs/ja/query_language/system.md @@ -0,0 +1 @@ +../../en/query_language/system.md \ No newline at end of file diff --git a/docs/ja/query_language/table_functions/file.md b/docs/ja/query_language/table_functions/file.md new file mode 120000 index 00000000000..a514547109a --- /dev/null +++ b/docs/ja/query_language/table_functions/file.md @@ -0,0 +1 @@ +../../../en/query_language/table_functions/file.md \ No newline at end of file diff --git a/docs/ja/query_language/table_functions/hdfs.md b/docs/ja/query_language/table_functions/hdfs.md new file mode 120000 index 00000000000..2616e737eb6 --- /dev/null +++ b/docs/ja/query_language/table_functions/hdfs.md @@ -0,0 +1 @@ +../../../en/query_language/table_functions/hdfs.md \ No newline at end of file diff --git a/docs/ja/query_language/table_functions/index.md b/docs/ja/query_language/table_functions/index.md new file mode 120000 index 00000000000..89b22522859 --- /dev/null +++ b/docs/ja/query_language/table_functions/index.md @@ -0,0 +1 @@ +../../../en/query_language/table_functions/index.md \ No newline at end of file diff --git a/docs/ja/query_language/table_functions/input.md b/docs/ja/query_language/table_functions/input.md new file mode 120000 index 00000000000..f23cc8ee673 --- /dev/null +++ b/docs/ja/query_language/table_functions/input.md @@ -0,0 +1 @@ +../../../en/query_language/table_functions/input.md \ No newline at end of file diff --git a/docs/ja/query_language/table_functions/jdbc.md b/docs/ja/query_language/table_functions/jdbc.md new file mode 120000 index 00000000000..73bec80ca58 --- /dev/null +++ b/docs/ja/query_language/table_functions/jdbc.md @@ -0,0 +1 @@ +../../../en/query_language/table_functions/jdbc.md \ No newline at end of file diff --git a/docs/ja/query_language/table_functions/merge.md b/docs/ja/query_language/table_functions/merge.md new file mode 120000 index 00000000000..383f6c88331 --- /dev/null +++ b/docs/ja/query_language/table_functions/merge.md @@ -0,0 +1 @@ +../../../en/query_language/table_functions/merge.md \ No newline at end of file diff --git a/docs/ja/query_language/table_functions/mysql.md b/docs/ja/query_language/table_functions/mysql.md new file mode 120000 index 00000000000..75c032cc63f --- /dev/null +++ b/docs/ja/query_language/table_functions/mysql.md @@ -0,0 +1 @@ +../../../en/query_language/table_functions/mysql.md \ No newline at end of file diff --git a/docs/ja/query_language/table_functions/numbers.md b/docs/ja/query_language/table_functions/numbers.md new file mode 120000 index 00000000000..a679b915669 --- /dev/null +++ b/docs/ja/query_language/table_functions/numbers.md @@ -0,0 +1 @@ +../../../en/query_language/table_functions/numbers.md \ No newline at end of file diff --git a/docs/ja/query_language/table_functions/odbc.md b/docs/ja/query_language/table_functions/odbc.md new file mode 120000 index 00000000000..7620f920494 --- /dev/null +++ b/docs/ja/query_language/table_functions/odbc.md @@ -0,0 +1 @@ +../../../en/query_language/table_functions/odbc.md \ No newline at end of file diff --git a/docs/ja/query_language/table_functions/remote.md b/docs/ja/query_language/table_functions/remote.md new file mode 120000 index 00000000000..b157c4076d3 --- /dev/null +++ b/docs/ja/query_language/table_functions/remote.md @@ -0,0 +1 @@ +../../../en/query_language/table_functions/remote.md \ No newline at end of file diff --git a/docs/ja/query_language/table_functions/url.md b/docs/ja/query_language/table_functions/url.md new file mode 120000 index 00000000000..038e08f7ba9 --- /dev/null +++ b/docs/ja/query_language/table_functions/url.md @@ -0,0 +1 @@ +../../../en/query_language/table_functions/url.md \ No newline at end of file diff --git a/docs/ja/roadmap.md b/docs/ja/roadmap.md new file mode 120000 index 00000000000..24df86352b3 --- /dev/null +++ b/docs/ja/roadmap.md @@ -0,0 +1 @@ +../en/roadmap.md \ No newline at end of file diff --git a/docs/ja/security_changelog.md b/docs/ja/security_changelog.md new file mode 120000 index 00000000000..101a4f4e48c --- /dev/null +++ b/docs/ja/security_changelog.md @@ -0,0 +1 @@ +../en/security_changelog.md \ No newline at end of file diff --git a/docs/redirects.txt b/docs/redirects.txt index 0ff077b660c..b38f6d242f2 100644 --- a/docs/redirects.txt +++ b/docs/redirects.txt @@ -1,3 +1,4 @@ +introduction/ya_metrika_task.md introduction/history.md system_tables.md operations/system_tables.md system_tables/system.asynchronous_metrics.md operations/system_tables.md system_tables/system.clusters.md operations/system_tables.md diff --git a/docs/ru/extended_roadmap.md b/docs/ru/extended_roadmap.md index 801a89af49e..85171cd93d7 100644 --- a/docs/ru/extended_roadmap.md +++ b/docs/ru/extended_roadmap.md @@ -1074,6 +1074,8 @@ Hold. Полезно для заказчиков внутри Яндекса, н ### 21.5. Распараллеливание INSERT при INSERT SELECT, если это необходимо. +[Vxider](https://github.com/Vxider), ICT + ### 21.6. Уменьшение числа потоков для SELECT в случае тривиального INSERT SELECT. ### 21.7. Кэш результатов запросов. diff --git a/docs/ru/getting_started/index.md b/docs/ru/getting_started/index.md index c03ac58f24b..a8d0fbaa5b1 100644 --- a/docs/ru/getting_started/index.md +++ b/docs/ru/getting_started/index.md @@ -1,138 +1,10 @@ # Начало работы -## Системные требования +Если вы новичок в ClickHouse и хотите получить вживую оценить его производительность, прежде всего нужно пройти через [процесс установки](install.md). -ClickHouse может работать на любом Linux, FreeBSD или Mac OS X с архитектурой процессора x86\_64. +После этого можно выбрать один из следующих вариантов: -Хотя предсобранные релизы обычно компилируются с использованием набора инструкций SSE 4.2, что добавляет использование поддерживающего его процессора в список системных требований. Команда для проверки наличия поддержки инструкций SSE 4.2 на текущем процессоре: - -```bash -$ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not supported" -``` - -## Установка - -### Из DEB пакетов - -Яндекс рекомендует использовать официальные скомпилированные `deb` пакеты для Debian или Ubuntu. - -Чтобы установить официальные пакеты, пропишите репозиторий Яндекса в `/etc/apt/sources.list` или в отдельный файл `/etc/apt/sources.list.d/clickhouse.list`: - -```bash -$ deb http://repo.yandex.ru/clickhouse/deb/stable/ main/ -``` - -Если вы хотите использовать наиболее свежую тестовую, замените `stable` на `testing` (не рекомендуется для production окружений). - -Затем для самой установки пакетов выполните: - -```bash -$ sudo apt-get install dirmngr # optional -$ sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4 # optional -$ sudo apt-get update -$ sudo apt-get install clickhouse-client clickhouse-server -``` - -Также эти пакеты можно скачать и установить вручную отсюда: . - -### Из RPM пакетов - -Команда ClickHouse в Яндексе рекомендует использовать официальные предкомпилированные `rpm` пакеты для CentOS, RedHad и всех остальных дистрибутивов Linux, основанных на rpm. - -Сначала нужно подключить официальный репозиторий: -```bash -$ sudo yum install yum-utils -$ sudo rpm --import https://repo.yandex.ru/clickhouse/CLICKHOUSE-KEY.GPG -$ sudo yum-config-manager --add-repo https://repo.yandex.ru/clickhouse/rpm/stable/x86_64 -``` - -Для использования наиболее свежих версий нужно заменить `stable` на `testing` (рекомендуется для тестовых окружений). - -Then run these commands to actually install packages: -Для, собственно, установки пакетов необходимо выполнить следующие команды: - -```bash -$ sudo yum install clickhouse-server clickhouse-client -``` - -Также есть возможность установить пакеты вручную, скачав отсюда: . - -### Из Docker образа - -Для запуска ClickHouse в Docker нужно следовать инструкции на [Docker Hub](https://hub.docker.com/r/yandex/clickhouse-server/). Внутри образов используются официальные `deb` пакеты. - -### Из исходного кода - -Для компиляции ClickHouse вручную, используйте инструкцию для [Linux](../development/build.md) или [Mac OS X](../development/build_osx.md). - -Можно скомпилировать пакеты и установить их, либо использовать программы без установки пакетов. Также при ручной сборке можно отключить необходимость поддержки набора инструкций SSE 4.2 или собрать под процессоры архитектуры AArch64. - -```text -Client: dbms/programs/clickhouse-client -Server: dbms/programs/clickhouse-server -``` - -Для работы собранного вручную сервера необходимо создать директории для данных и метаданных, а также сделать их `chown` для желаемого пользователя. Пути к этим директориям могут быть изменены в конфигурационном файле сервера (src/dbms/programs/server/config.xml), по умолчанию используются следующие: - -```text -/opt/clickhouse/data/default/ -/opt/clickhouse/metadata/default/ -``` - -На Gentoo для установки ClickHouse из исходного кода можно использовать просто `emerge clickhouse`. - -## Запуск - -Для запуска сервера в качестве демона, выполните: - -```bash -$ sudo service clickhouse-server start -``` - -Смотрите логи в директории `/var/log/clickhouse-server/`. - -Если сервер не стартует, проверьте корректность конфигурации в файле `/etc/clickhouse-server/config.xml` - -Также можно запустить сервер вручную из консоли: - -```bash -$ clickhouse-server --config-file=/etc/clickhouse-server/config.xml -``` - -При этом, лог будет выводиться в консоль, что удобно для разработки. -Если конфигурационный файл лежит в текущей директории, то указывать параметр `--config-file` не требуется, по умолчанию будет использован файл `./config.xml`. - -После запуска сервера, соединиться с ним можно с помощью клиента командной строки: - -```bash -$ clickhouse-client -``` - -По умолчанию он соединяется с localhost:9000, от имени пользователя `default` без пароля. Также клиент может быть использован для соединения с удалённым сервером с помощью аргумента `--host`. - -Терминал должен использовать кодировку UTF-8. - -Более подробная информация о клиенте располагается в разделе [«Клиент командной строки»](../interfaces/cli.md). - -Пример проверки работоспособности системы: - -```bash -$ ./clickhouse-client -ClickHouse client version 0.0.18749. -Connecting to localhost:9000. -Connected to ClickHouse server version 0.0.18749. -``` -```sql -SELECT 1 -``` -```text -┌─1─┐ -│ 1 │ -└───┘ -``` - -**Поздравляем, система работает!** - -Для дальнейших экспериментов можно попробовать загрузить один из тестовых наборов данных или пройти [пошаговое руководство для начинающих](https://clickhouse.yandex/tutorial.html). +* [Пройти подробное руководство для начинающих](tutorial.md) +* [Поэкспериментировать с тестовыми наборами данных](example_datasets/ontime.md) [Оригинальная статья](https://clickhouse.yandex/docs/ru/getting_started/) diff --git a/docs/ru/getting_started/install.md b/docs/ru/getting_started/install.md new file mode 100644 index 00000000000..29ccd2b14f4 --- /dev/null +++ b/docs/ru/getting_started/install.md @@ -0,0 +1,144 @@ +# Установка + +## Системные требования + +ClickHouse может работать на любой операционной системе Linux, FreeBSD или Mac OS X с архитектурой процессора x86\_64, AArch64 или PowerPC64LE. + +Предварительно собранные пакеты компилируются для x86\_64 и используют набор инструкций SSE 4.2, поэтому, если не указано иное, его поддержка в используемом процессоре, становится дополнительным требованием к системе. Вот команда, чтобы проверить, поддерживает ли текущий процессор SSE 4.2: + +``` bash +$ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not supported" +``` + +Чтобы запустить ClickHouse на процессорах, которые не поддерживают SSE 4.2, либо имеют архитектуру AArch64 или PowerPC64LE, необходимо самостоятельно [собрать ClickHouse из исходного кода](#from-sources) с соответствующими настройками конфигурации. + +## Доступные варианты установки + +### Из DEB пакетов {#from-deb-packages} + +Яндекс рекомендует использовать официальные скомпилированные `deb` пакеты для Debian или Ubuntu. + +Чтобы установить официальные пакеты, пропишите репозиторий Яндекса в `/etc/apt/sources.list` или в отдельный файл `/etc/apt/sources.list.d/clickhouse.list`: + +``` +deb http://repo.yandex.ru/clickhouse/deb/stable/ main/ +``` + +Если вы хотите использовать наиболее свежую тестовую, замените `stable` на `testing` (не рекомендуется для production окружений). + +Затем для самой установки пакетов выполните: + +```bash +sudo apt-get install dirmngr # optional +sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4 # optional +sudo apt-get update +sudo apt-get install clickhouse-client clickhouse-server +``` + +Также эти пакеты можно скачать и установить вручную отсюда: . + +### Из RPM пакетов {#from-rpm-packages} + +Команда ClickHouse в Яндексе рекомендует использовать официальные предкомпилированные `rpm` пакеты для CentOS, RedHad и всех остальных дистрибутивов Linux, основанных на rpm. + +Сначала нужно подключить официальный репозиторий: +```bash +sudo yum install yum-utils +sudo rpm --import https://repo.yandex.ru/clickhouse/CLICKHOUSE-KEY.GPG +sudo yum-config-manager --add-repo https://repo.yandex.ru/clickhouse/rpm/stable/x86_64 +``` + +Для использования наиболее свежих версий нужно заменить `stable` на `testing` (рекомендуется для тестовых окружений). + +Then run these commands to actually install packages: +Для, собственно, установки пакетов необходимо выполнить следующие команды: + +```bash +sudo yum install clickhouse-server clickhouse-client +``` + +Также есть возможность установить пакеты вручную, скачав отсюда: . + +### Из Docker образа {#from-docker-image} + +Для запуска ClickHouse в Docker нужно следовать инструкции на [Docker Hub](https://hub.docker.com/r/yandex/clickhouse-server/). Внутри образов используются официальные `deb` пакеты. + +### Из исходного кода {#from-sources} + +Для компиляции ClickHouse вручную, используйте инструкцию для [Linux](../development/build.md) или [Mac OS X](../development/build_osx.md). + +Можно скомпилировать пакеты и установить их, либо использовать программы без установки пакетов. Также при ручой сборке можно отключить необходимость поддержки набора инструкций SSE 4.2 или собрать под процессоры архитектуры AArch64. + +``` +Client: dbms/programs/clickhouse-client +Server: dbms/programs/clickhouse-server +``` + +Для работы собранного вручную сервера необходимо создать директории для данных и метаданных, а также сделать их `chown` для желаемого пользователя. Пути к этим директориям могут быть изменены в конфигурационном файле сервера (src/dbms/programs/server/config.xml), по умолчанию используются следующие: + +``` +/opt/clickhouse/data/default/ +/opt/clickhouse/metadata/default/ +``` + +На Gentoo для установки ClickHouse из исходного кода можно использовать просто `emerge clickhouse`. + +## Запуск + +Для запуска сервера в качестве демона, выполните: + +``` bash +$ sudo service clickhouse-server start +``` + +Смотрите логи в директории `/var/log/clickhouse-server/`. + +Если сервер не стартует, проверьте корректность конфигурации в файле `/etc/clickhouse-server/config.xml` + +Также можно запустить сервер вручную из консоли: + +``` bash +$ clickhouse-server --config-file=/etc/clickhouse-server/config.xml +``` + +При этом, лог будет выводиться в консоль, что удобно для разработки. +Если конфигурационный файл лежит в текущей директории, то указывать параметр `--config-file` не требуется, по умолчанию будет использован файл `./config.xml`. + +После запуска сервера, соединиться с ним можно с помощью клиента командной строки: + +``` bash +$ clickhouse-client +``` + +По умолчанию он соединяется с localhost:9000, от имени пользователя `default` без пароля. Также клиент может быть использован для соединения с удалённым сервером с помощью аргумента `--host`. + +Терминал должен использовать кодировку UTF-8. + +Более подробная информация о клиенте располагается в разделе [«Клиент командной строки»](../interfaces/cli.md). + +Пример проверки работоспособности системы: + +``` bash +$ ./clickhouse-client +ClickHouse client version 0.0.18749. +Connecting to localhost:9000. +Connected to ClickHouse server version 0.0.18749. + +:) SELECT 1 + +SELECT 1 + +┌─1─┐ +│ 1 │ +└───┘ + +1 rows in set. Elapsed: 0.003 sec. + +:) +``` + +**Поздравляем, система работает!** + +Для дальнейших экспериментов можно попробовать загрузить один из тестовых наборов данных или пройти [пошаговое руководство для начинающих](https://clickhouse.yandex/tutorial.html). + +[Оригинальная статья](https://clickhouse.yandex/docs/ru/getting_started/install/) diff --git a/docs/ru/getting_started/tutorial.md b/docs/ru/getting_started/tutorial.md new file mode 120000 index 00000000000..8bc40816ab2 --- /dev/null +++ b/docs/ru/getting_started/tutorial.md @@ -0,0 +1 @@ +../../en/getting_started/tutorial.md \ No newline at end of file diff --git a/docs/ru/introduction/ya_metrika_task.md b/docs/ru/introduction/history.md similarity index 99% rename from docs/ru/introduction/ya_metrika_task.md rename to docs/ru/introduction/history.md index c7e22346ae5..c0035b51f82 100644 --- a/docs/ru/introduction/ya_metrika_task.md +++ b/docs/ru/introduction/history.md @@ -1,4 +1,4 @@ -# Постановка задачи в Яндекс.Метрике +# История ClickHouse ClickHouse изначально разрабатывался для обеспечения работы [Яндекс.Метрики](https://metrika.yandex.ru/), [второй крупнейшей в мире](http://w3techs.com/technologies/overview/traffic_analysis/all) платформы для веб аналитики, и продолжает быть её ключевым компонентом. При более 13 триллионах записей в базе данных и более 20 миллиардах событий в сутки, ClickHouse позволяет генерировать индивидуально настроенные отчёты на лету напрямую из неагрегированных данных. Данная статья вкратце демонстрирует какие цели исторически стояли перед ClickHouse на ранних этапах его развития. diff --git a/docs/ru/query_language/functions/string_functions.md b/docs/ru/query_language/functions/string_functions.md index 5e5a270f51b..2169cb794e0 100644 --- a/docs/ru/query_language/functions/string_functions.md +++ b/docs/ru/query_language/functions/string_functions.md @@ -189,6 +189,120 @@ SELECT startsWith('Hello, world!', 'He'); └───────────────────────────────────┘ ``` +## trimLeft {#trimleft} + +Удаляет все последовательные вхождения обычных пробелов (32 символ ASCII) с левого конца строки. Не удаляет другие виды пробелов (табуляция, пробел без разрыва и т. д.). + +**Синтаксис** + +```sql +trimLeft() +``` + +Алиас: `ltrim`. + +**Параметры** + +- `string` — строка для обрезки. [String](../../data_types/string.md). + +**Возвращаемое значение** + +Исходную строку без общих пробельных символов слева. + +Тип: `String`. + +**Пример** + +Запрос: + +```sql +SELECT trimLeft(' Hello, world! ') +``` + +Ответ: + +```text +┌─trimLeft(' Hello, world! ')─┐ +│ Hello, world! │ +└─────────────────────────────────────┘ +``` + +## trimRight {#trimright} + +Удаляет все последовательные вхождения обычных пробелов (32 символ ASCII) с правого конца строки. Не удаляет другие виды пробелов (табуляция, пробел без разрыва и т. д.). + +**Синтаксис** + +```sql +trimRight() +``` + +Алиас: `rtrim`. + +**Параметры** + +- `string` — строка для обрезки. [String](../../data_types/string.md). + +**Возвращаемое значение** + +Исходную строку без общих пробельных символов справа. + +Тип: `String`. + +**Пример** + +Запрос: + +```sql +SELECT trimRight(' Hello, world! ') +``` + +Ответ: + +```text +┌─trimRight(' Hello, world! ')─┐ +│ Hello, world! │ +└──────────────────────────────────────┘ +``` + +## trimBoth {#trimboth} + +Удаляет все последовательные вхождения обычных пробелов (32 символ ASCII) с обоих концов строки. Не удаляет другие виды пробелов (табуляция, пробел без разрыва и т. д.). + +**Синтаксис** + +```sql +trimBoth() +``` + +Алиас: `trim`. + +**Параметры** + +- `string` — строка для обрезки. [String](../../data_types/string.md). + +**Возвращаемое значение** + +Исходную строку без общих пробельных символов с обоих концов строки. + +Тип: `String`. + +**Пример** + +Запрос: + +```sql +SELECT trimBoth(' Hello, world! ') +``` + +Ответ: + +```text +┌─trimBoth(' Hello, world! ')─┐ +│ Hello, world! │ +└─────────────────────────────────────┘ +``` + ## CRC32(s) Возвращает чексумму CRC32 данной строки, используется CRC-32-IEEE 802.3 многочлен и начальным значением `0xffffffff` (т.к. используется реализация из zlib). diff --git a/docs/toc_en.yml b/docs/toc_en.yml index 5ed9265b3bc..8a2b32b240a 100644 --- a/docs/toc_en.yml +++ b/docs/toc_en.yml @@ -5,10 +5,12 @@ nav: - 'Distinctive Features of ClickHouse': 'introduction/distinctive_features.md' - 'ClickHouse Features that Can Be Considered Disadvantages': 'introduction/features_considered_disadvantages.md' - 'Performance': 'introduction/performance.md' - - 'The Yandex.Metrica Task': 'introduction/ya_metrika_task.md' + - 'History': 'introduction/history.md' - 'Getting Started': - - 'Deploying and Running': 'getting_started/index.md' + - 'hidden': 'getting_started/index.md' + - 'Installation': 'getting_started/install.md' + - 'Tutorial': 'getting_started/tutorial.md' - 'Example Datasets': - 'OnTime': 'getting_started/example_datasets/ontime.md' - 'New York Taxi Data': 'getting_started/example_datasets/nyc_taxi.md' diff --git a/docs/toc_fa.yml b/docs/toc_fa.yml index 207034a8718..c5a2a7fd80b 100644 --- a/docs/toc_fa.yml +++ b/docs/toc_fa.yml @@ -1,15 +1,17 @@ nav: -- 'Introduction': +- 'ﯽﻓﺮﻌﻣ': - 'ClickHouse چیست؟': 'index.md' - ' ویژگی های برجسته ClickHouse': 'introduction/distinctive_features.md' - ' ویژگی های از ClickHouse که می تواند معایبی باشد': 'introduction/features_considered_disadvantages.md' - - 'Performance': 'introduction/performance.md' - - 'The Yandex.Metrica task': 'introduction/ya_metrika_task.md' + - 'ﯽﯾﺍﺭﺎﮐ': 'introduction/performance.md' + - 'ﺦﯾﺭﺎﺗ': 'introduction/history.md' - 'Getting started': - - ' شروع به کار': 'getting_started/index.md' - - 'Example datasets': + - 'hidden': 'getting_started/index.md' + - 'ﯼﺯﺍﺪﻧﺍ ﻩﺍﺭ ﻭ ﺐﺼﻧ': 'getting_started/install.md' + - 'ﺵﺯﻮﻣﺁ': 'getting_started/tutorial.md' + - 'ﻪﻧﻮﻤﻧ ﯼﺎﻫ ﻩﺩﺍﺩ ﻪﻋﻮﻤﺠﻣ': - 'OnTime': 'getting_started/example_datasets/ontime.md' - ' داده های تاکسی New York': 'getting_started/example_datasets/nyc_taxi.md' - ' بنچمارک AMPLab Big Data': 'getting_started/example_datasets/amplab_benchmark.md' @@ -18,7 +20,7 @@ nav: - ' بنچمارک Star Schema': 'getting_started/example_datasets/star_schema.md' - 'Yandex.Metrica Data': 'getting_started/example_datasets/metrica.md' -- 'Interfaces': +- 'ﻂﺑﺍﺭ': - 'Interface ها': 'interfaces/index.md' - ' کلاینت Command-line': 'interfaces/cli.md' - 'Native interface (TCP)': 'interfaces/tcp.md' @@ -33,7 +35,7 @@ nav: - 'رابط های بصری': 'interfaces/third-party/gui.md' - 'پروکسی': 'interfaces/third-party/proxy.md' -- 'Data types': +- 'ﻩﺩﺍﺩ ﻉﺍﻮﻧﺍ': - 'Introduction': 'data_types/index.md' - 'UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64': 'data_types/int_uint.md' - 'Float32, Float64': 'data_types/float.md' @@ -147,6 +149,7 @@ nav: - 'Working with geographical coordinates': 'query_language/functions/geo.md' - 'Working with Nullable arguments': 'query_language/functions/functions_for_nulls.md' - 'Machine Learning Functions': 'query_language/functions/machine_learning_functions.md' + - 'Introspection': 'query_language/functions/introspection.md' - 'Other': 'query_language/functions/other_functions.md' - 'Aggregate Functions': - 'Introduction': 'query_language/agg_functions/index.md' diff --git a/docs/toc_ja.yml b/docs/toc_ja.yml index 3c4a5506a06..8a2b32b240a 100644 --- a/docs/toc_ja.yml +++ b/docs/toc_ja.yml @@ -5,10 +5,12 @@ nav: - 'Distinctive Features of ClickHouse': 'introduction/distinctive_features.md' - 'ClickHouse Features that Can Be Considered Disadvantages': 'introduction/features_considered_disadvantages.md' - 'Performance': 'introduction/performance.md' - - 'The Yandex.Metrica Task': 'introduction/ya_metrika_task.md' + - 'History': 'introduction/history.md' - 'Getting Started': - - 'Deploying and Running': 'getting_started/index.md' + - 'hidden': 'getting_started/index.md' + - 'Installation': 'getting_started/install.md' + - 'Tutorial': 'getting_started/tutorial.md' - 'Example Datasets': - 'OnTime': 'getting_started/example_datasets/ontime.md' - 'New York Taxi Data': 'getting_started/example_datasets/nyc_taxi.md' @@ -119,6 +121,7 @@ nav: - 'Working with geographical coordinates': 'query_language/functions/geo.md' - 'Working with Nullable arguments': 'query_language/functions/functions_for_nulls.md' - 'Machine Learning Functions': 'query_language/functions/machine_learning_functions.md' + - 'Introspection': 'query_language/functions/introspection.md' - 'Other': 'query_language/functions/other_functions.md' - 'Aggregate Functions': - 'Introduction': 'query_language/agg_functions/index.md' @@ -215,8 +218,9 @@ nav: - 'Overview of ClickHouse Architecture': 'development/architecture.md' - 'How to Build ClickHouse on Linux': 'development/build.md' - 'How to Build ClickHouse on Mac OS X': 'development/build_osx.md' - - 'How to Build ClickHouse on Linux for Mac OS X': 'development/build_cross.md' - - 'How to Write C++ code': 'development/style.md' + - 'How to Build ClickHouse on Linux for Mac OS X': 'development/build_cross_osx.md' + - 'How to Build ClickHouse on Linux for AARCH64 (ARM64)': 'development/build_cross_arm.md' + - 'How to Write C++ Code': 'development/style.md' - 'How to Run ClickHouse Tests': 'development/tests.md' - 'The Beginner ClickHouse Developer Instruction': 'development/developer_instruction.md' - 'Third-Party Libraries Used': 'development/contrib.md' diff --git a/docs/toc_ru.yml b/docs/toc_ru.yml index 5c71894c8f6..9e4b948cc02 100644 --- a/docs/toc_ru.yml +++ b/docs/toc_ru.yml @@ -5,11 +5,13 @@ nav: - 'Отличительные возможности ClickHouse': 'introduction/distinctive_features.md' - 'Особенности ClickHouse, которые могут считаться недостатками': 'introduction/features_considered_disadvantages.md' - 'Производительность': 'introduction/performance.md' - - 'Постановка задачи в Яндекс.Метрике': 'introduction/ya_metrika_task.md' + - 'История': 'introduction/history.md' - 'Информационная поддержка': 'introduction/info.md' - 'Начало работы': - - 'Установка и запуск': 'getting_started/index.md' + - 'hidden': 'getting_started/index.md' + - 'Установка': 'getting_started/install.md' + - 'Руководство для начинающих': 'getting_started/tutorial.md' - 'Тестовые наборы данных': - 'OnTime': 'getting_started/example_datasets/ontime.md' - 'Данные о такси в Нью-Йорке': 'getting_started/example_datasets/nyc_taxi.md' @@ -214,7 +216,7 @@ nav: - 'Обзор архитектуры ClickHouse': 'development/architecture.md' - 'Как собрать ClickHouse на Linux': 'development/build.md' - 'Как собрать ClickHouse на Mac OS X': 'development/build_osx.md' - - 'Как собрать ClickHouse на Linux для Mac OS X': 'development/build_cross.md' + - 'Как собрать ClickHouse на Linux для Mac OS X': 'development/build_cross_osx.md' - 'Как писать код на C++': 'development/style.md' - 'Как запустить тесты': 'development/tests.md' - 'Инструкция для начинающего разработчика ClickHouse': 'development/developer_instruction.md' diff --git a/docs/toc_zh.yml b/docs/toc_zh.yml index c8be2ad7606..7395dcfe145 100644 --- a/docs/toc_zh.yml +++ b/docs/toc_zh.yml @@ -5,10 +5,12 @@ nav: - 'ClickHouse的独特功能': 'introduction/distinctive_features.md' - 'ClickHouse功能可被视为缺点': 'introduction/features_considered_disadvantages.md' - '性能': 'introduction/performance.md' - - 'Yandex.Metrica使用案例': 'introduction/ya_metrika_task.md' + - '历史': 'introduction/history.md' - '入门指南': - - '部署运行': 'getting_started/index.md' + - 'hidden': 'getting_started/index.md' + - '安装': 'getting_started/install.md' + - '教程': 'getting_started/tutorial.md' - '示例数据集': - '航班飞行数据': 'getting_started/example_datasets/ontime.md' - '纽约市出租车数据': 'getting_started/example_datasets/nyc_taxi.md' @@ -16,6 +18,7 @@ nav: - '维基访问数据': 'getting_started/example_datasets/wikistat.md' - 'Criteo TB级别点击日志': 'getting_started/example_datasets/criteo.md' - 'Star Schema基准测试': 'getting_started/example_datasets/star_schema.md' + - 'Yandex.Metrica': 'getting_started/example_datasets/metrica.md' - '客户端': - '介绍': 'interfaces/index.md' @@ -145,6 +148,7 @@ nav: - 'GEO函数': 'query_language/functions/geo.md' - 'Nullable处理函数': 'query_language/functions/functions_for_nulls.md' - '机器学习函数': 'query_language/functions/machine_learning_functions.md' + - 'Introspection': 'query_language/functions/introspection.md' - '其他函数': 'query_language/functions/other_functions.md' - '聚合函数': - '介绍': 'query_language/agg_functions/index.md' @@ -212,7 +216,7 @@ nav: - 'ClickHouse架构概述': 'development/architecture.md' - '如何在Linux中编译ClickHouse': 'development/build.md' - '如何在Mac OS X中编译ClickHouse': 'development/build_osx.md' - - '如何在Linux中编译Mac OS X ClickHouse': 'development/build_cross.md' + - '如何在Linux中编译Mac OS X ClickHouse': 'development/build_cross_osx.md' - '如何编写C++代码': 'development/style.md' - '如何运行ClickHouse测试': 'development/tests.md' - '开发者指南': 'development/developer_instruction.md' diff --git a/docs/tools/make_links.sh b/docs/tools/make_links.sh index 084f8b9d97b..04c51424ec8 100755 --- a/docs/tools/make_links.sh +++ b/docs/tools/make_links.sh @@ -11,7 +11,7 @@ function do_make_links() for lang in "${langs[@]}" do # replacing "/./" with / - dst_file="../${lang}/${src_file}" + dst_file="../${lang}${src_file}" dst_file="${dst_file/\/\.\//\/}" mkdir -p $(dirname "${dst_file}") diff --git a/docs/tools/mkdocs-material-theme/assets/flags/ja.svg b/docs/tools/mkdocs-material-theme/assets/flags/ja.svg index 177d0e78819..a666c272523 100644 --- a/docs/tools/mkdocs-material-theme/assets/flags/ja.svg +++ b/docs/tools/mkdocs-material-theme/assets/flags/ja.svg @@ -1,5 +1,8 @@ - - - - + + + + + + + diff --git a/docs/tools/mkdocs-material-theme/partials/language/ja.html b/docs/tools/mkdocs-material-theme/partials/language/ja.html index 09c3b291fed..47341ab06ee 100644 --- a/docs/tools/mkdocs-material-theme/partials/language/ja.html +++ b/docs/tools/mkdocs-material-theme/partials/language/ja.html @@ -7,6 +7,12 @@ "footer.next": "次", "meta.comments": "コメント", "meta.source": "ソース", + "nav.latest": "master", + "nav.multi_page": "マルチページ版", + "nav.pdf": "PDF版", + "nav.release": "リリース", + "nav.single_page": "シングルページ版", + "nav.source": "ClickHouseソースコード", "search.placeholder": "検索", "search.result.placeholder": "検索キーワードを入力してください", "search.result.none": "何も見つかりませんでした", diff --git a/docs/zh/getting_started/example_datasets/metrica.md b/docs/zh/getting_started/example_datasets/metrica.md new file mode 120000 index 00000000000..984023973eb --- /dev/null +++ b/docs/zh/getting_started/example_datasets/metrica.md @@ -0,0 +1 @@ +../../../en/getting_started/example_datasets/metrica.md \ No newline at end of file diff --git a/docs/zh/getting_started/index.md b/docs/zh/getting_started/index.md index b1c94600da0..c73181a6068 100644 --- a/docs/zh/getting_started/index.md +++ b/docs/zh/getting_started/index.md @@ -1,154 +1,10 @@ -# 入门指南 +# 入门 -## 系统要求 +如果您是ClickHouse的新手,并希望亲身体验它的性能,首先您需要通过 [安装过程](install.md). -如果从官方仓库安装,需要确保您使用的是x86\_64处理器构架的Linux并且支持SSE 4.2指令集 +之后,您可以选择以下选项之一: -检查是否支持SSE 4.2: +* [通过详细的教程](tutorial.md) +* [试验示例数据集](example_datasets/ontime.md) -```bash -grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not supported" -``` - -我们推荐使用Ubuntu或者Debian。终端必须使用UTF-8编码。 - -基于rpm的系统,你可以使用第三方的安装包:https://packagecloud.io/altinity/clickhouse 或者直接安装debian安装包。 - -ClickHouse还可以在FreeBSD与Mac OS X上工作。同时它可以在不支持SSE 4.2的x86\_64构架和AArch64 CPUs上编译。 - -## 安装 - -### 为Debian/Ubuntu安装 - -在`/etc/apt/sources.list` (或创建`/etc/apt/sources.list.d/clickhouse.list`文件)中添加仓库: - -```bash -$ deb http://repo.yandex.ru/clickhouse/deb/stable/ main/ -``` - -如果你想使用最新的测试版本,请使用'testing'替换'stable'。 - -然后运行: - -```bash -$ sudo apt-get install dirmngr # optional -$ sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4 # optional -$ sudo apt-get update -$ sudo apt-get install clickhouse-client clickhouse-server -``` - -你也可以从这里手动下载安装包:。 - -ClickHouse包含访问控制配置,它们位于`users.xml`文件中(与'config.xml'同目录)。 -默认情况下,允许从任何地方使用默认的‘default’用户无密码的访问ClickHouse。参考‘user/default/networks’。 -有关更多信息,请参考"Configuration files"部分。 - -###为CentOS/RedHat安装 - -Yandex ClickHouse团队建议使用官方预编译的`rpm`软件包,用于CentOS,RedHat和所有其他基于rpm的Linux发行版。 - -首先,您需要添加官方存储库: - -```bash -$ sudo yum install yum-utils -$ sudo rpm --import https://repo.yandex.ru/clickhouse/CLICKHOUSE-KEY.GPG -$ sudo yum-config-manager --add-repo https://repo.yandex.ru/clickhouse/rpm/stable/x86_64 -``` - -如果您想使用最新版本,请将`stable`替换为`testing`(建议您在测试环境中使用)。 - -然后运行这些命令以实际安装包: - -```bash -$ sudo yum install clickhouse-server clickhouse-client -``` - -您也可以从此处手动下载和安装软件包:。 - -###使用Docker安装 - -要在Docker中运行ClickHouse,请遵循[Docker Hub](https://hub.docker.com/r/yandex/clickhouse-server/)上的指南。这些镜像使用官方的`deb`包构建。 - -### 使用源码安装 - -具体编译方式可以参考build.md。 - -你可以编译并安装它们。 -你也可以直接使用而不进行安装。 - -```text -Client: dbms/programs/clickhouse-client -Server: dbms/programs/clickhouse-server -``` - -在服务器中为数据创建如下目录: - -```text -/opt/clickhouse/data/default/ -/opt/clickhouse/metadata/default/ -``` - -(它们可以在server config中配置。) -为需要的用户运行‘chown’ - -日志的路径可以在server config (src/dbms/programs/server/config.xml)中配置。 - -## 启动 - -可以运行如下命令在后台启动服务: - -```bash -sudo service clickhouse-server start -``` - -可以在`/var/log/clickhouse-server/`目录中查看日志。 - -如果服务没有启动,请检查配置文件 `/etc/clickhouse-server/config.xml`。 - -你也可以在控制台中直接启动服务: - -```bash -clickhouse-server --config-file=/etc/clickhouse-server/config.xml -``` - -在这种情况下,日志将被打印到控制台中,这在开发过程中很方便。 -如果配置文件在当前目录中,你可以不指定‘--config-file’参数。它默认使用‘./config.xml’。 - -你可以使用命令行客户端连接到服务: - -```bash -clickhouse-client -``` - -默认情况下它使用‘default’用户无密码的与localhost:9000服务建立连接。 -客户端也可以用于连接远程服务,例如: - -```bash -clickhouse-client --host=example.com -``` - -有关更多信息,请参考"Command-line client"部分。 - -检查系统是否工作: - -```bash -milovidov@hostname:~/work/metrica/src/dbms/src/Client$ ./clickhouse-client -ClickHouse client version 0.0.18749. -Connecting to localhost:9000. -Connected to ClickHouse server version 0.0.18749. -``` -```sql -:) SELECT 1 -``` -```text -┌─1─┐ -│ 1 │ -└───┘ -``` - -**恭喜,系统已经工作了!** - -为了继续进行实验,你可以尝试下载测试数据集。 - - -[Original article](https://clickhouse.yandex/docs/en/getting_started/) +[来源文章](https://clickhouse.yandex/docs/zh/getting_started/) diff --git a/docs/zh/getting_started/install.md b/docs/zh/getting_started/install.md new file mode 100644 index 00000000000..aa3cb816218 --- /dev/null +++ b/docs/zh/getting_started/install.md @@ -0,0 +1,152 @@ +## 系统要求 + +ClickHouse可以在任何具有x86\_64,AArch64或PowerPC64LE CPU架构的Linux,FreeBSD或Mac OS X上运行。 + +虽然预构建的二进制文件通常是为x86 \ _64编译并利用SSE 4.2指令集,但除非另有说明,否则使用支持它的CPU将成为额外的系统要求。这是检查当前CPU是否支持SSE 4.2的命令: + +``` bash +$ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not supported" +``` + +要在不支持SSE 4.2或具有AArch64或PowerPC64LE体系结构的处理器上运行ClickHouse,您应该[通过源构建ClickHouse](#from-sources)进行适当的配置调整。 + +##可用的安装选项 + +### 为Debian/Ubuntu安装 {#from-deb-packages} + +在`/etc/apt/sources.list` (或创建`/etc/apt/sources.list.d/clickhouse.list`文件)中添加仓库: + +```text +deb http://repo.yandex.ru/clickhouse/deb/stable/ main/ +``` + +如果你想使用最新的测试版本,请使用'testing'替换'stable'。 + +然后运行: + +```bash +sudo apt-get install dirmngr # optional +sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4 # optional +sudo apt-get update +sudo apt-get install clickhouse-client clickhouse-server +``` + +你也可以从这里手动下载安装包:。 + +ClickHouse包含访问控制配置,它们位于`users.xml`文件中(与'config.xml'同目录)。 +默认情况下,允许从任何地方使用默认的‘default’用户无密码的访问ClickHouse。参考‘user/default/networks’。 +有关更多信息,请参考"Configuration files"部分。 + +###来自RPM包 {#from-rpm-packages} + +Yandex ClickHouse团队建议使用官方预编译的`rpm`软件包,用于CentOS,RedHat和所有其他基于rpm的Linux发行版。 + +首先,您需要添加官方存储库: + +```bash +sudo yum install yum-utils +sudo rpm --import https://repo.yandex.ru/clickhouse/CLICKHOUSE-KEY.GPG +sudo yum-config-manager --add-repo https://repo.yandex.ru/clickhouse/rpm/stable/x86_64 +``` + +如果您想使用最新版本,请将`stable`替换为`testing`(建议您在测试环境中使用)。 + +然后运行这些命令以实际安装包: + +```bash +sudo yum install clickhouse-server clickhouse-client +``` + +您也可以从此处手动下载和安装软件包:。 + +###来自Docker {#from-docker-image} + +要在Docker中运行ClickHouse,请遵循[Docker Hub](https://hub.docker.com/r/yandex/clickhouse-server/)上的指南。那些图像使用官方的`deb`包。 + +### 使用源码安装 {#from-sources} + +具体编译方式可以参考build.md。 + +你可以编译并安装它们。 +你也可以直接使用而不进行安装。 + +```text +Client: dbms/programs/clickhouse-client +Server: dbms/programs/clickhouse-server +``` + +在服务器中为数据创建如下目录: + +```text +/opt/clickhouse/data/default/ +/opt/clickhouse/metadata/default/ +``` + +(它们可以在server config中配置。) +为需要的用户运行‘chown’ + +日志的路径可以在server config (src/dbms/programs/server/config.xml)中配置。 + +## 启动 + +可以运行如下命令在后台启动服务: + +```bash +sudo service clickhouse-server start +``` + +可以在`/var/log/clickhouse-server/`目录中查看日志。 + +如果服务没有启动,请检查配置文件 `/etc/clickhouse-server/config.xml`。 + +你也可以在控制台中直接启动服务: + +```bash +clickhouse-server --config-file=/etc/clickhouse-server/config.xml +``` + +在这种情况下,日志将被打印到控制台中,这在开发过程中很方便。 +如果配置文件在当前目录中,你可以不指定‘--config-file’参数。它默认使用‘./config.xml’。 + +你可以使用命令行客户端连接到服务: + +```bash +clickhouse-client +``` + +默认情况下它使用‘default’用户无密码的与localhost:9000服务建立连接。 +客户端也可以用于连接远程服务,例如: + +```bash +clickhouse-client --host=example.com +``` + +有关更多信息,请参考"Command-line client"部分。 + +检查系统是否工作: + +```bash +milovidov@hostname:~/work/metrica/src/dbms/src/Client$ ./clickhouse-client +ClickHouse client version 0.0.18749. +Connecting to localhost:9000. +Connected to ClickHouse server version 0.0.18749. + +:) SELECT 1 + +SELECT 1 + +┌─1─┐ +│ 1 │ +└───┘ + +1 rows in set. Elapsed: 0.003 sec. + +:) +``` + +**恭喜,系统已经工作了!** + +为了继续进行实验,你可以尝试下载测试数据集。 + + +[Original article](https://clickhouse.yandex/docs/en/getting_started/install/) diff --git a/docs/zh/getting_started/tutorial.md b/docs/zh/getting_started/tutorial.md new file mode 120000 index 00000000000..8bc40816ab2 --- /dev/null +++ b/docs/zh/getting_started/tutorial.md @@ -0,0 +1 @@ +../../en/getting_started/tutorial.md \ No newline at end of file diff --git a/docs/zh/introduction/ya_metrika_task.md b/docs/zh/introduction/history.md similarity index 99% rename from docs/zh/introduction/ya_metrika_task.md rename to docs/zh/introduction/history.md index da4b18826e0..86fe02f84d5 100644 --- a/docs/zh/introduction/ya_metrika_task.md +++ b/docs/zh/introduction/history.md @@ -1,4 +1,4 @@ -# Yandex.Metrica的使用案例 +# ClickHouse历史 ClickHouse最初是为 [Yandex.Metrica](https://metrica.yandex.com/) [世界第二大Web分析平台](http://w3techs.com/technologies/overview/traffic_analysis/all) 而开发的。多年来一直作为该系统的核心组件被该系统持续使用着。目前为止,该系统在ClickHouse中有超过13万亿条记录,并且每天超过200多亿个事件被处理。它允许直接从原始数据中动态查询并生成报告。本文简要介绍了ClickHouse在其早期发展阶段的目标。 diff --git a/docs/zh/query_language/functions/higher_order_functions.md b/docs/zh/query_language/functions/higher_order_functions.md index e64db0bc8d3..39c6770e5b8 100644 --- a/docs/zh/query_language/functions/higher_order_functions.md +++ b/docs/zh/query_language/functions/higher_order_functions.md @@ -12,7 +12,7 @@ 除了'arrayMap'和'arrayFilter'以外的所有其他函数,都可以省略第一个参数(lambda函数)。在这种情况下,默认返回数组元素本身。 -### arrayMap(func, arr1, ...) +### arrayMap(func, arr1, ...) {#higher_order_functions-array-map} 将arr 将从'func'函数的原始应用程序获得的数组返回到'arr'数组中的每个元素。 diff --git a/docs/zh/query_language/functions/introspection.md b/docs/zh/query_language/functions/introspection.md new file mode 120000 index 00000000000..b1a487e9c77 --- /dev/null +++ b/docs/zh/query_language/functions/introspection.md @@ -0,0 +1 @@ +../../../en/query_language/functions/introspection.md \ No newline at end of file diff --git a/libs/libcommon/CMakeLists.txt b/libs/libcommon/CMakeLists.txt index 357e457b240..3e58cba0164 100644 --- a/libs/libcommon/CMakeLists.txt +++ b/libs/libcommon/CMakeLists.txt @@ -53,6 +53,7 @@ add_library (common include/common/phdr_cache.h include/ext/bit_cast.h + include/ext/chrono_io.h include/ext/collection_cast.h include/ext/enumerate.h include/ext/function_traits.h diff --git a/libs/libcommon/include/ext/chrono_io.h b/libs/libcommon/include/ext/chrono_io.h new file mode 100644 index 00000000000..8fa448b9e6a --- /dev/null +++ b/libs/libcommon/include/ext/chrono_io.h @@ -0,0 +1,37 @@ +#pragma once + +#include +#include +#include + + +namespace ext +{ + template + std::string to_string(const std::chrono::time_point & tp) + { + return DateLUT::instance().timeToString(std::chrono::system_clock::to_time_t(tp)); + } + + template > + std::string to_string(const std::chrono::duration & dur) + { + auto seconds_as_int = std::chrono::duration_cast(dur); + if (seconds_as_int == dur) + return std::to_string(seconds_as_int.count()) + "s"; + auto seconds_as_double = std::chrono::duration_cast>(dur); + return std::to_string(seconds_as_double.count()) + "s"; + } + + template + std::ostream & operator<<(std::ostream & o, const std::chrono::time_point & tp) + { + return o << to_string(tp); + } + + template > + std::ostream & operator<<(std::ostream & o, const std::chrono::duration & dur) + { + return o << to_string(dur); + } +} diff --git a/libs/libcommon/include/ext/range.h b/libs/libcommon/include/ext/range.h index 61b644c2ce5..c379d453f7b 100644 --- a/libs/libcommon/include/ext/range.h +++ b/libs/libcommon/include/ext/range.h @@ -1,46 +1,42 @@ #pragma once #include -#include -#include -#include +#include +#include -/** Numeric range iterator, used to represent a half-closed interval [begin, end). - * In conjunction with std::reverse_iterator allows for forward and backward iteration - * over corresponding interval. - */ namespace ext { - template - using range_iterator = boost::counting_iterator; - - /** Range-based for loop adapter for (reverse_)range_iterator. - * By and large should be in conjunction with ext::range and ext::reverse_range. - */ - template - struct range_wrapper + /// For loop adaptor which is used to iterate through a half-closed interval [begin, end). + template + inline auto range(BeginType begin, EndType end) { - using value_type = typename std::remove_reference::type; - using iterator = range_iterator; + using CommonType = typename std::common_type::type; + return boost::counting_range(begin, end); + } - value_type begin_; - value_type end_; - - iterator begin() const { return iterator(begin_); } - iterator end() const { return iterator(end_); } - }; - - /** Constructs range_wrapper for forward-iteration over [begin, end) in range-based for loop. - * Usage example: - * for (const auto i : ext::range(0, 4)) print(i); - * Output: - * 0 1 2 3 - */ - template - inline range_wrapper::type> range(T1 begin, T2 end) + template + inline auto range(Type end) { - using common_type = typename std::common_type::type; - return { static_cast(begin), static_cast(end) }; + return range(static_cast(0), end); + } + + /// The same as range(), but every value is casted statically to a specified `ValueType`. + /// This is useful to iterate through all constants of a enum. + template + inline auto range_with_static_cast(BeginType begin, EndType end) + { + using CommonType = typename std::common_type::type; + if constexpr (std::is_same_v) + return boost::counting_range(begin, end); + else + return boost::counting_range(begin, end) + | boost::adaptors::transformed([](CommonType x) -> ValueType { return static_cast(x); }); + } + + template + inline auto range_with_static_cast(EndType end) + { + return range_with_static_cast(static_cast(0), end); } } diff --git a/libs/libcommon/include/ext/shared_ptr_helper.h b/libs/libcommon/include/ext/shared_ptr_helper.h index ca7219e6261..df132382fa6 100644 --- a/libs/libcommon/include/ext/shared_ptr_helper.h +++ b/libs/libcommon/include/ext/shared_ptr_helper.h @@ -20,4 +20,20 @@ struct shared_ptr_helper } }; + +template +struct is_shared_ptr +{ + static constexpr bool value = false; +}; + + +template +struct is_shared_ptr> +{ + static constexpr bool value = true; +}; + +template +inline constexpr bool is_shared_ptr_v = is_shared_ptr::value; } diff --git a/website/nginx/default.conf b/website/nginx/default.conf index 98edad41055..fc029323fe2 100644 --- a/website/nginx/default.conf +++ b/website/nginx/default.conf @@ -14,6 +14,8 @@ server { } rewrite ^/docs/$ https://clickhouse.yandex/docs/en/ permanent; + rewrite ^/tutorial.html$ https://clickhouse.yandex/docs/en/getting_started/tutorial/ permanent; + rewrite ^/presentations/(.*)$ https://clickhouse.github.io/clickhouse-presentations/$1 permanent; rewrite ^/reference_en.html$ https://clickhouse.yandex/docs/en/single/ permanent; rewrite ^/reference_ru.html$ https://clickhouse.yandex/docs/ru/single/ permanent; rewrite ^/presentations/(.*)$ https://clickhouse.github.io/clickhouse-presentations/$1 permanent; diff --git a/website/tutorial.html b/website/tutorial.html deleted file mode 100644 index 52216f61dc8..00000000000 --- a/website/tutorial.html +++ /dev/null @@ -1,649 +0,0 @@ - - - - - ClickHouse Quick Start Guide - - - - - - - - - - -
- -
- - -

ClickHouse

-

Tutorial

-
- -

Let's get started with sample dataset from open sources. We will use USA civil flights data since 1987 till 2015. - It's hard to call this sample a Big Data (contains 166 millions rows, 63 Gb of uncompressed data) but this - allows us to quickly get to work. Dataset is available for download here. - Also you may download it from the original datasource as described here.

- -

Firstly we will deploy ClickHouse to a single server. Below that we will also review the process of deployment to - a cluster with support for sharding and replication.

- -

On Ubuntu and Debian Linux ClickHouse can be installed from packages. - For other Linux distributions you can compile - ClickHouse from sources and then install.

- -

clickhouse-client package contains clickhouse-client application — - interactive ClickHouse client. clickhouse-common contains a clickhouse-server binary file. clickhouse-server - — contains config files for the clickhouse-server.

- -

Server config files are located in /etc/clickhouse-server/. Before getting to work please notice the path - element in config. Path determines the location for data storage. It's not really handy to directly - edit config.xml file considering package updates. Recommended way is to override the config elements in - files of config.d directory. - Also you may want to set up access - rights at the start.

- -

clickhouse-server won't be launched automatically after package installation. It won't be automatically - restarted after updates either. Start the server with: -

sudo service clickhouse-server start
- Default location for server logs is /var/log/clickhouse-server/ - Server is ready to handle client connections once "Ready for connections" message was logged.

- -

Use clickhouse-client to connect to the server.

- -
Tips for clickhouse-client -
- Interactive mode: -
-clickhouse-client
-clickhouse-client --host=... --port=... --user=... --password=...
-
- Enable multiline queries: -
-clickhouse-client -m
-clickhouse-client --multiline
-
- Run queries in batch-mode: -
-clickhouse-client --query='SELECT 1'
-echo 'SELECT 1' | clickhouse-client
-
- Insert data from file of a specified format: -
-clickhouse-client --query='INSERT INTO table VALUES' < data.txt
-clickhouse-client --query='INSERT INTO table FORMAT TabSeparated' < data.tsv
-
-
-
- -

Create table for sample dataset

-
Create table query -
-
-$ clickhouse-client --multiline
-ClickHouse client version 0.0.53720.
-Connecting to localhost:9000.
-Connected to ClickHouse server version 0.0.53720.
-
-:) CREATE TABLE ontime
-(
-    Year UInt16,
-    Quarter UInt8,
-    Month UInt8,
-    DayofMonth UInt8,
-    DayOfWeek UInt8,
-    FlightDate Date,
-    UniqueCarrier FixedString(7),
-    AirlineID Int32,
-    Carrier FixedString(2),
-    TailNum String,
-    FlightNum String,
-    OriginAirportID Int32,
-    OriginAirportSeqID Int32,
-    OriginCityMarketID Int32,
-    Origin FixedString(5),
-    OriginCityName String,
-    OriginState FixedString(2),
-    OriginStateFips String,
-    OriginStateName String,
-    OriginWac Int32,
-    DestAirportID Int32,
-    DestAirportSeqID Int32,
-    DestCityMarketID Int32,
-    Dest FixedString(5),
-    DestCityName String,
-    DestState FixedString(2),
-    DestStateFips String,
-    DestStateName String,
-    DestWac Int32,
-    CRSDepTime Int32,
-    DepTime Int32,
-    DepDelay Int32,
-    DepDelayMinutes Int32,
-    DepDel15 Int32,
-    DepartureDelayGroups String,
-    DepTimeBlk String,
-    TaxiOut Int32,
-    WheelsOff Int32,
-    WheelsOn Int32,
-    TaxiIn Int32,
-    CRSArrTime Int32,
-    ArrTime Int32,
-    ArrDelay Int32,
-    ArrDelayMinutes Int32,
-    ArrDel15 Int32,
-    ArrivalDelayGroups Int32,
-    ArrTimeBlk String,
-    Cancelled UInt8,
-    CancellationCode FixedString(1),
-    Diverted UInt8,
-    CRSElapsedTime Int32,
-    ActualElapsedTime Int32,
-    AirTime Int32,
-    Flights Int32,
-    Distance Int32,
-    DistanceGroup UInt8,
-    CarrierDelay Int32,
-    WeatherDelay Int32,
-    NASDelay Int32,
-    SecurityDelay Int32,
-    LateAircraftDelay Int32,
-    FirstDepTime String,
-    TotalAddGTime String,
-    LongestAddGTime String,
-    DivAirportLandings String,
-    DivReachedDest String,
-    DivActualElapsedTime String,
-    DivArrDelay String,
-    DivDistance String,
-    Div1Airport String,
-    Div1AirportID Int32,
-    Div1AirportSeqID Int32,
-    Div1WheelsOn String,
-    Div1TotalGTime String,
-    Div1LongestGTime String,
-    Div1WheelsOff String,
-    Div1TailNum String,
-    Div2Airport String,
-    Div2AirportID Int32,
-    Div2AirportSeqID Int32,
-    Div2WheelsOn String,
-    Div2TotalGTime String,
-    Div2LongestGTime String,
-    Div2WheelsOff String,
-    Div2TailNum String,
-    Div3Airport String,
-    Div3AirportID Int32,
-    Div3AirportSeqID Int32,
-    Div3WheelsOn String,
-    Div3TotalGTime String,
-    Div3LongestGTime String,
-    Div3WheelsOff String,
-    Div3TailNum String,
-    Div4Airport String,
-    Div4AirportID Int32,
-    Div4AirportSeqID Int32,
-    Div4WheelsOn String,
-    Div4TotalGTime String,
-    Div4LongestGTime String,
-    Div4WheelsOff String,
-    Div4TailNum String,
-    Div5Airport String,
-    Div5AirportID Int32,
-    Div5AirportSeqID Int32,
-    Div5WheelsOn String,
-    Div5TotalGTime String,
-    Div5LongestGTime String,
-    Div5WheelsOff String,
-    Div5TailNum String
-)
-ENGINE = MergeTree(FlightDate, (Year, FlightDate), 8192);
-
-
-
- -

Now we have a table of MergeTree type. - MergeTree table type is recommended for usage in production. Table of this kind has a primary key used for - incremental sort of table data. This allows fast execution of queries in ranges of a primary key.

- - -

Note - We store ad network banners impressions logs in ClickHouse. Each table entry looks like: - [Advertiser ID, Impression ID, attribute1, attribute2, …]. - Let assume that our aim is to provide a set of reports for each advertiser. Common and frequently demanded query - would be to count impressions for a specific Advertiser ID. This means that table primary key should start with - Advertiser ID. In this case ClickHouse needs to read smaller amount of data to perform the query for a - given Advertiser ID. -

- -

Load data

-
xz -v -c -d < ontime.csv.xz | clickhouse-client --query="INSERT INTO ontime FORMAT CSV"
-

ClickHouse INSERT query allows to load data in any supported - format. Data load requires just O(1) RAM consumption. INSERT query can receive any data volume as input. - It's strongly recommended to insert data with not too small - size blocks. Notice that insert of blocks with size up to max_insert_block_size (= 1 048 576 - rows by default) is an atomic operation: data block will be inserted completely or not inserted at all. In case - of disconnect during insert operation you may not know if the block was inserted successfully. To achieve - exactly-once semantics ClickHouse supports idempotency for replicated tables. This means - that you may retry insert of the same data block (possibly on a different replicas) but this block will be - inserted just once. Anyway in this guide we will load data from our localhost so we may not take care about data - blocks generation and exactly-once semantics.

- -

INSERT query into tables of MergeTree type is non-blocking (so does a SELECT query). You can execute SELECT - queries right after of during insert operation.

- -

Our sample dataset is a bit not optimal. There are two reasons.

- -

The first is that String data type is used in cases when Enum or numeric type would fit best.

- -

When set of possible values is determined and known to be small. (E.g. OS name, browser - vendors etc.) it's recommended to use Enums or numbers to improve performance. - When set of possible values is not limited (search query, URL, etc.) just go ahead with String.

- -

The second is that dataset contains redundant fields like Year, Quarter, Month, DayOfMonth, DayOfWeek. In fact a - single FlightDate would be enough. Most likely they have been added to improve performance for other DBMS'es - which DateTime handling functions may be not efficient.

- -

ClickHouse functions - for operating with DateTime fields are well-optimized so such redundancy is not required. Anyway much - columns is not a reason to worry — ClickHouse is a column-oriented - DBMS. This allows you to have as much fields as you need. Hundreds of columns in a table is fine for - ClickHouse.

- -

Querying the sample dataset

- -

Here are some examples of the queries from our test data.

- -
    -
  • -
    the most popular destinations in 2015; -
    -
    -SELECT
    -    OriginCityName,
    -    DestCityName,
    -    count(*) AS flights,
    -    bar(flights, 0, 20000, 40)
    -FROM ontime WHERE Year = 2015 GROUP BY OriginCityName, DestCityName ORDER BY flights DESC LIMIT 20
    -
    - -
    -SELECT
    -    OriginCityName < DestCityName ? OriginCityName : DestCityName AS a,
    -    OriginCityName < DestCityName ? DestCityName : OriginCityName AS b,
    -    count(*) AS flights,
    -    bar(flights, 0, 40000, 40)
    -FROM ontime WHERE Year = 2015 GROUP BY a, b ORDER BY flights DESC LIMIT 20
    -
    -
    -
    -
  • -
  • -
    the most popular cities of departure; -
    -
    -SELECT OriginCityName, count(*) AS flights
    -FROM ontime GROUP BY OriginCityName ORDER BY flights DESC LIMIT 20
    -
    -
    -
    -
  • -
  • -
    cities of departure which offer maximum variety of - destinations; -
    -
    -SELECT OriginCityName, uniq(Dest) AS u
    -FROM ontime GROUP BY OriginCityName ORDER BY u DESC LIMIT 20
    -
    -
    -
    -
  • -
  • -
    flight delay dependence on the day of week; -
    -
    -SELECT DayOfWeek, count() AS c, avg(DepDelay >  60) AS delays
    -FROM ontime GROUP BY DayOfWeek ORDER BY DayOfWeek
    -
    -
    -
    -
  • -
  • -
    cities of departure with most frequent delays for 1 hour or - longer; -
    -
    -SELECT OriginCityName, count() AS c, avg(DepDelay >  60) AS delays
    -FROM ontime
    -GROUP BY OriginCityName
    -HAVING c >  100000
    -ORDER BY delays DESC
    -LIMIT 20
    -
    -
    -
    -
  • -
  • -
    flights of maximum duration; -
    -
    -SELECT OriginCityName, DestCityName, count(*) AS flights, avg(AirTime) AS duration
    -FROM ontime
    -GROUP BY OriginCityName, DestCityName
    -ORDER BY duration DESC
    -LIMIT 20
    -
    -
    -
    -
  • -
  • -
    distribution of arrival time delays split by aircompanies; -
    -
    -SELECT Carrier, count() AS c, round(quantileTDigest(0.99)(DepDelay), 2) AS q
    -FROM ontime GROUP BY Carrier ORDER BY q DESC
    -
    -
    -
    -
  • -
  • -
    aircompanies who stopped flights operation; -
    -
    -SELECT Carrier, min(Year), max(Year), count()
    -FROM ontime GROUP BY Carrier HAVING max(Year) < 2015 ORDER BY count() DESC
    -
    -
    -
    -
  • -
  • -
    most trending destination cities in 2015; -
    -
    -SELECT
    -    DestCityName,
    -    sum(Year = 2014) AS c2014,
    -    sum(Year = 2015) AS c2015,
    -    c2015 / c2014 AS diff
    -FROM ontime
    -WHERE Year IN (2014, 2015)
    -GROUP BY DestCityName
    -HAVING c2014 >  10000 AND c2015 >  1000 AND diff >  1
    -ORDER BY diff DESC
    -
    -
    -
    -
  • -
  • -
    destination cities with maximum popularity-season - dependency. -
    -
    -SELECT
    -    DestCityName,
    -    any(total),
    -    avg(abs(monthly * 12 - total) / total) AS avg_month_diff
    -FROM
    -(
    -    SELECT DestCityName, count() AS total
    -    FROM ontime GROUP BY DestCityName HAVING total > 100000
    -)
    -ALL INNER JOIN
    -(
    -    SELECT DestCityName, Month, count() AS monthly
    -    FROM ontime GROUP BY DestCityName, Month HAVING monthly > 10000
    -)
    -USING DestCityName
    -GROUP BY DestCityName
    -ORDER BY avg_month_diff DESC
    -LIMIT 20
    -
    -
    -
    -
  • -
- -

ClickHouse deployment to cluster

-

ClickHouse cluster is a homogenous cluster. Steps to set up: -

    -
  1. Install ClickHouse server on all machines of the cluster
  2. -
  3. Set up cluster configs in configuration file
  4. -
  5. Create local tables on each instance
  6. -
  7. Create a Distributed table
  8. -
-

- -

Distributed-table is actually a kind of - "view" to local tables of ClickHouse cluster. SELECT query from a distributed table will be executed using - resources of all cluster's shards. You may specify configs for multiple clusters and create multiple - Distributed-tables providing views to different clusters.

- -
Config for cluster of three shards. Each shard stores data on a single - replica -
-
-<remote_servers>
-    <perftest_3shards_1replicas>
-        <shard>
-            <replica>
-                <host>example-perftest01j.yandex.ru</host>
-                <port>9000</port>
-            </replica>
-        </shard>
-        <shard>
-            <replica>
-                <host>example-perftest02j.yandex.ru</host>
-                <port>9000</port>
-            </replica>
-        </shard>
-        <shard>
-            <replica>
-                <host>example-perftest03j.yandex.ru</host>
-                <port>9000</port>
-            </replica>
-        </shard>
-    </perftest_3shards_1replicas>
-</remote_servers>
-
-
-
- Creating a local table: -
CREATE TABLE ontime_local (...) ENGINE = MergeTree(FlightDate, (Year, FlightDate), 8192);
- Creating a distributed table providing a view into local tables of the cluster: -
CREATE TABLE ontime_all AS ontime_local
-    ENGINE = Distributed(perftest_3shards_1replicas, default, ontime_local, rand());
- -

You can create a Distributed table on all machines in the cluster. This would allow to run distributed queries on - any machine of the cluster. Besides distributed table you can also use *remote* table function.

- -

Let's run INSERT SELECT into Distributed table - to spread the table to multiple servers.

- -
INSERT INTO ontime_all SELECT * FROM ontime;
- -

Worth to notice that the approach given above wouldn't fit for sharding of large - tables.

- -

As you could expect heavy queries are executed N times faster being launched on 3 servers instead of one.

-
See here -
- - -

You may have noticed that quantiles calculation are slightly different. This happens due to t-digest - algorithm implementation which is non-deterministic — it depends on the order of data processing.

-
-
- -

In this case we have used a cluster with 3 shards each contains a single replica.

- -

To provide for resilience in production environment we recommend that each shard should contain 2-3 replicas - distributed between multiple data-centers. Note that ClickHouse supports unlimited number of replicas.

- -
Config for cluster of one shard containing three replicas -
-
-<remote_servers>
-    ...
-    <perftest_1shards_3replicas>
-        <shard>
-            <replica>
-                <host>example-perftest01j.yandex.ru</host>
-                <port>9000</port>
-             </replica>
-             <replica>
-                <host>example-perftest02j.yandex.ru</host>
-                <port>9000</port>
-             </replica>
-             <replica>
-                <host>example-perftest03j.yandex.ru</host>
-                <port>9000</port>
-             </replica>
-        </shard>
-    </perftest_1shards_3replicas>
-</remote_servers>
-
-
-
- -

To enable replication ZooKeeper is required. - ClickHouse will take care of data consistency on all replicas and run restore procedure after failure - automatically. It's recommended to deploy ZooKeeper cluster to separate servers.

- -

ZooKeeper is not a requirement — in some simple cases you can duplicate the data by writing it into all the - replicas from your application code. This approach is not recommended — in this case ClickHouse is not able to - guarantee data consistency on all replicas. This remains the responsibility of your application.

- -
Set ZooKeeper locations in configuration file -
-
-<zookeeper>
-    <node>
-        <host>zoo01.yandex.ru</host>
-        <port>2181</port>
-    </node>
-    <node>
-        <host>zoo02.yandex.ru</host>
-        <port>2181</port>
-    </node>
-    <node>
-        <host>zoo03.yandex.ru</host>
-        <port>2181</port>
-    </node>
-</zookeeper>
-
-
-
- -

Also we need to set macros for identifying shard and replica — it will be used on table creation

-
-<macros>
-    <shard>01</shard>
-    <replica>01</replica>
-</macros>
-
-

If there are no replicas at the moment on replicated table creation — a new first replica will be instantiated. - If there are already live replicas — new replica will clone the data from existing ones. You have an option to - create all replicated tables first and that insert data to it. Another option is to create some replicas and add - the others after or during data insertion.

- -
-CREATE TABLE ontime_replica (...)
-ENGINE = ReplicatedMergeTree(
-    '/clickhouse_perftest/tables/{shard}/ontime',
-    '{replica}',
-    FlightDate,
-    (Year, FlightDate),
-    8192);
-
-

Here we use ReplicatedMergeTree - table type. In parameters we specify ZooKeeper path containing shard and replica identifiers.

- -
INSERT INTO ontime_replica SELECT * FROM ontime;
-

Replication operates in multi-master mode. Data can be loaded into any replica — it will be synced with other - instances automatically. Replication is asynchronous so at a given moment of time not all replicas may contain - recently inserted data. To allow data insertion at least one replica should be up. Others will sync up data and - repair consistency once they will become active again. Please notice that such scheme allows for the possibility - of just appended data loss.

- -

- ClickHouse source code is published under Apache 2.0 License. Software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied.

- - - -
- - - - - - - - - - -